Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/uio.h>
21 #include <sys/socket.h>
22 #include <sys/wait.h>
23 #include <sys/syslimits.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdint.h>
31 #include <sha1.h>
32 #include <zlib.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <imsg.h>
36 #include <time.h>
38 #include "got_error.h"
39 #include "got_object.h"
40 #include "got_repository.h"
41 #include "got_opentemp.h"
43 #include "got_lib_sha1.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_pack.h"
46 #include "got_lib_path.h"
47 #include "got_lib_inflate.h"
48 #include "got_lib_object.h"
49 #include "got_lib_privsep.h"
50 #include "got_lib_object_parse.h"
51 #include "got_lib_repository.h"
53 #ifndef MIN
54 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
55 #endif
57 const struct got_error *
58 got_object_id_str(char **outbuf, struct got_object_id *id)
59 {
60 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
62 *outbuf = malloc(len);
63 if (*outbuf == NULL)
64 return got_error_from_errno();
66 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
67 free(*outbuf);
68 *outbuf = NULL;
69 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
70 }
72 return NULL;
73 }
75 int
76 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
77 {
78 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
79 }
81 struct got_object_id *
82 got_object_id_dup(struct got_object_id *id1)
83 {
84 struct got_object_id *id2;
86 id2 = malloc(sizeof(*id2));
87 if (id2 == NULL)
88 return NULL;
89 memcpy(id2, id1, sizeof(*id2));
90 return id2;
91 }
93 struct got_object_id *
94 got_object_get_id(struct got_object *obj)
95 {
96 return got_object_id_dup(&obj->id);
97 }
99 const struct got_error *
100 got_object_get_id_str(char **outbuf, struct got_object *obj)
102 return got_object_id_str(outbuf, &obj->id);
105 int
106 got_object_get_type(struct got_object *obj)
108 switch (obj->type) {
109 case GOT_OBJ_TYPE_COMMIT:
110 case GOT_OBJ_TYPE_TREE:
111 case GOT_OBJ_TYPE_BLOB:
112 case GOT_OBJ_TYPE_TAG:
113 return obj->type;
114 default:
115 abort();
116 break;
119 /* not reached */
120 return 0;
123 static const struct got_error *
124 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
126 const struct got_error *err = NULL;
127 char *hex = NULL;
128 char *path_objects = got_repo_get_path_objects(repo);
130 *path = NULL;
132 if (path_objects == NULL)
133 return got_error_from_errno();
135 err = got_object_id_str(&hex, id);
136 if (err)
137 goto done;
139 if (asprintf(path, "%s/%.2x/%s", path_objects,
140 id->sha1[0], hex + 2) == -1)
141 err = got_error_from_errno();
143 done:
144 free(hex);
145 free(path_objects);
146 return err;
149 static const struct got_error *
150 open_loose_object(int *fd, struct got_object *obj, struct got_repository *repo)
152 const struct got_error *err = NULL;
153 char *path;
155 err = object_path(&path, &obj->id, repo);
156 if (err)
157 return err;
158 *fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
159 if (*fd == -1) {
160 err = got_error_from_errno();
161 goto done;
163 done:
164 free(path);
165 return err;
168 static const struct got_error *
169 get_packfile_path(char **path_packfile, struct got_packidx *packidx)
171 size_t size;
173 /* Packfile path contains ".pack" instead of ".idx", so add one byte. */
174 size = strlen(packidx->path_packidx) + 2;
175 if (size < GOT_PACKFILE_NAMELEN + 1)
176 return got_error(GOT_ERR_BAD_PATH);
178 *path_packfile = calloc(size, sizeof(**path_packfile));
179 if (*path_packfile == NULL)
180 return got_error_from_errno();
182 /* Copy up to and excluding ".idx". */
183 if (strlcpy(*path_packfile, packidx->path_packidx,
184 size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
185 return got_error(GOT_ERR_NO_SPACE);
187 if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
188 return got_error(GOT_ERR_NO_SPACE);
190 return NULL;
193 static const struct got_error *
194 open_packed_object(struct got_object **obj, struct got_object_id *id,
195 struct got_repository *repo)
197 const struct got_error *err = NULL;
198 struct got_pack *pack = NULL;
199 struct got_packidx *packidx = NULL;
200 int idx;
201 char *path_packfile;
203 err = got_repo_search_packidx(&packidx, &idx, repo, id);
204 if (err)
205 return err;
207 err = get_packfile_path(&path_packfile, packidx);
208 if (err)
209 return err;
211 pack = got_repo_get_cached_pack(repo, path_packfile);
212 if (pack == NULL) {
213 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
214 if (err)
215 goto done;
218 err = got_object_packed_read_privsep(obj, repo, pack, packidx, idx, id);
219 if (err)
220 goto done;
222 err = got_repo_cache_pack(NULL, repo, (*obj)->path_packfile, packidx);
223 done:
224 free(path_packfile);
225 return err;
228 const struct got_error *
229 got_object_open(struct got_object **obj, struct got_repository *repo,
230 struct got_object_id *id)
232 const struct got_error *err = NULL;
233 char *path;
234 int fd;
236 *obj = got_repo_get_cached_object(repo, id);
237 if (*obj != NULL) {
238 (*obj)->refcnt++;
239 return NULL;
242 err = object_path(&path, id, repo);
243 if (err)
244 return err;
246 fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
247 if (fd == -1) {
248 if (errno != ENOENT) {
249 err = got_error_from_errno();
250 goto done;
252 err = open_packed_object(obj, id, repo);
253 if (err)
254 goto done;
255 if (*obj == NULL)
256 err = got_error(GOT_ERR_NO_OBJ);
257 } else {
258 err = got_object_read_header_privsep(obj, repo, fd);
259 if (err)
260 goto done;
261 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
264 if (err == NULL) {
265 (*obj)->refcnt++;
266 err = got_repo_cache_object(repo, id, *obj);
268 done:
269 free(path);
270 if (fd != -1)
271 close(fd);
272 return err;
276 const struct got_error *
277 got_object_open_by_id_str(struct got_object **obj, struct got_repository *repo,
278 const char *id_str)
280 struct got_object_id id;
282 if (!got_parse_sha1_digest(id.sha1, id_str))
283 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
285 return got_object_open(obj, repo, &id);
288 const struct got_error *
289 got_object_open_as_commit(struct got_commit_object **commit,
290 struct got_repository *repo, struct got_object_id *id)
292 const struct got_error *err;
293 struct got_object *obj;
295 *commit = NULL;
297 err = got_object_open(&obj, repo, id);
298 if (err)
299 return err;
300 if (got_object_get_type(obj) != GOT_OBJ_TYPE_COMMIT) {
301 err = got_error(GOT_ERR_OBJ_TYPE);
302 goto done;
305 err = got_object_commit_open(commit, repo, obj);
306 done:
307 got_object_close(obj);
308 return err;
311 const struct got_error *
312 got_object_qid_alloc(struct got_object_qid **qid, struct got_object_id *id)
314 const struct got_error *err = NULL;
316 *qid = calloc(1, sizeof(**qid));
317 if (*qid == NULL)
318 return got_error_from_errno();
320 (*qid)->id = got_object_id_dup(id);
321 if ((*qid)->id == NULL) {
322 err = got_error_from_errno();
323 got_object_qid_free(*qid);
324 *qid = NULL;
325 return err;
328 return NULL;
331 static const struct got_error *
332 extract_packed_object_to_mem(uint8_t **buf, size_t *len,
333 struct got_object *obj, struct got_repository *repo)
335 const struct got_error *err = NULL;
336 struct got_pack *pack;
338 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
339 return got_error(GOT_ERR_OBJ_NOT_PACKED);
341 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
342 if (pack == NULL) {
343 err = got_repo_cache_pack(&pack, repo,
344 obj->path_packfile, NULL);
345 if (err)
346 return err;
349 return got_packfile_extract_object_to_mem(buf, len, obj, pack);
352 const struct got_error *
353 got_object_commit_open(struct got_commit_object **commit,
354 struct got_repository *repo, struct got_object *obj)
356 const struct got_error *err = NULL;
358 *commit = got_repo_get_cached_commit(repo, &obj->id);
359 if (*commit != NULL) {
360 (*commit)->refcnt++;
361 return NULL;
364 if (obj->type != GOT_OBJ_TYPE_COMMIT)
365 return got_error(GOT_ERR_OBJ_TYPE);
367 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
368 struct got_pack *pack;
369 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
370 if (pack == NULL) {
371 err = got_repo_cache_pack(&pack, repo,
372 obj->path_packfile, NULL);
373 if (err)
374 return err;
376 err = got_object_read_packed_commit_privsep(commit, obj, pack);
377 } else {
378 int fd;
379 err = open_loose_object(&fd, obj, repo);
380 if (err)
381 return err;
382 err = got_object_read_commit_privsep(commit, obj, fd, repo);
383 close(fd);
386 if (err == NULL) {
387 (*commit)->refcnt++;
388 err = got_repo_cache_commit(repo, &obj->id, *commit);
391 return err;
394 const struct got_error *
395 got_object_tree_open(struct got_tree_object **tree,
396 struct got_repository *repo, struct got_object *obj)
398 const struct got_error *err = NULL;
400 *tree = got_repo_get_cached_tree(repo, &obj->id);
401 if (*tree != NULL) {
402 (*tree)->refcnt++;
403 return NULL;
406 if (obj->type != GOT_OBJ_TYPE_TREE)
407 return got_error(GOT_ERR_OBJ_TYPE);
409 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
410 uint8_t *buf;
411 size_t len;
412 err = extract_packed_object_to_mem(&buf, &len, obj, repo);
413 if (err)
414 return err;
415 obj->size = len;
416 err = got_object_parse_tree(tree, buf, len);
417 free(buf);
418 } else {
419 int fd;
420 err = open_loose_object(&fd, obj, repo);
421 if (err)
422 return err;
423 err = got_object_read_tree_privsep(tree, obj, fd, repo);
424 close(fd);
427 if (err == NULL) {
428 (*tree)->refcnt++;
429 err = got_repo_cache_tree(repo, &obj->id, *tree);
432 return err;
435 const struct got_error *
436 got_object_open_as_tree(struct got_tree_object **tree,
437 struct got_repository *repo, struct got_object_id *id)
439 const struct got_error *err;
440 struct got_object *obj;
442 *tree = NULL;
444 err = got_object_open(&obj, repo, id);
445 if (err)
446 return err;
447 if (got_object_get_type(obj) != GOT_OBJ_TYPE_TREE) {
448 err = got_error(GOT_ERR_OBJ_TYPE);
449 goto done;
452 err = got_object_tree_open(tree, repo, obj);
453 done:
454 got_object_close(obj);
455 return err;
458 const struct got_tree_entries *
459 got_object_tree_get_entries(struct got_tree_object *tree)
461 return &tree->entries;
464 static const struct got_error *
465 extract_packed_object(FILE **f, struct got_object *obj,
466 struct got_repository *repo)
468 const struct got_error *err = NULL;
469 struct got_pack *pack;
470 int fd;
472 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
473 return got_error(GOT_ERR_OBJ_NOT_PACKED);
475 fd = got_opentempfd();
476 if (fd == -1)
477 return got_error_from_errno();
479 *f = fdopen(fd, "w+");
480 if (*f == NULL) {
481 err = got_error_from_errno();
482 goto done;
485 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
486 if (pack == NULL) {
487 err = got_repo_cache_pack(&pack, repo,
488 obj->path_packfile, NULL);
489 if (err)
490 goto done;
493 err = got_packfile_extract_object(pack, obj, *f);
494 done:
495 if (err) {
496 if (*f == NULL)
497 close(fd);
498 else
499 fclose(*f);
500 *f = NULL;
502 return err;
505 const struct got_error *
506 got_object_blob_open(struct got_blob_object **blob,
507 struct got_repository *repo, struct got_object *obj, size_t blocksize)
509 const struct got_error *err = NULL;
511 if (obj->type != GOT_OBJ_TYPE_BLOB)
512 return got_error(GOT_ERR_OBJ_TYPE);
514 if (blocksize < obj->hdrlen)
515 return got_error(GOT_ERR_NO_SPACE);
517 *blob = calloc(1, sizeof(**blob));
518 if (*blob == NULL)
519 return got_error_from_errno();
521 (*blob)->read_buf = malloc(blocksize);
522 if ((*blob)->read_buf == NULL) {
523 err = got_error_from_errno();
524 goto done;
526 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
527 err = extract_packed_object(&((*blob)->f), obj, repo);
528 if (err)
529 goto done;
530 } else {
531 int infd, outfd;
532 size_t size;
533 struct stat sb;
535 err = open_loose_object(&infd, obj, repo);
536 if (err)
537 goto done;
540 outfd = got_opentempfd();
541 if (outfd == -1) {
542 err = got_error_from_errno();
543 close(infd);
544 goto done;
547 err = got_object_read_blob_privsep(&size, outfd, infd, repo);
548 close(infd);
549 if (err)
550 goto done;
552 if (size != obj->hdrlen + obj->size) {
553 err = got_error(GOT_ERR_PRIVSEP_LEN);
554 close(outfd);
555 goto done;
558 if (fstat(outfd, &sb) == -1) {
559 err = got_error_from_errno();
560 close(outfd);
561 goto done;
564 if (sb.st_size != size) {
565 err = got_error(GOT_ERR_PRIVSEP_LEN);
566 close(outfd);
567 goto done;
570 (*blob)->f = fdopen(outfd, "rb");
571 if ((*blob)->f == NULL) {
572 err = got_error_from_errno();
573 close(outfd);
574 goto done;
578 (*blob)->hdrlen = obj->hdrlen;
579 (*blob)->blocksize = blocksize;
580 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
582 done:
583 if (err && *blob) {
584 if ((*blob)->f)
585 fclose((*blob)->f);
586 free((*blob)->read_buf);
587 free(*blob);
588 *blob = NULL;
590 return err;
593 const struct got_error *
594 got_object_open_as_blob(struct got_blob_object **blob,
595 struct got_repository *repo, struct got_object_id *id,
596 size_t blocksize)
598 const struct got_error *err;
599 struct got_object *obj;
601 *blob = NULL;
603 err = got_object_open(&obj, repo, id);
604 if (err)
605 return err;
606 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
607 err = got_error(GOT_ERR_OBJ_TYPE);
608 goto done;
611 err = got_object_blob_open(blob, repo, obj, blocksize);
612 done:
613 got_object_close(obj);
614 return err;
617 void
618 got_object_blob_close(struct got_blob_object *blob)
620 free(blob->read_buf);
621 fclose(blob->f);
622 free(blob);
625 char *
626 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
628 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
631 size_t
632 got_object_blob_get_hdrlen(struct got_blob_object *blob)
634 return blob->hdrlen;
637 const uint8_t *
638 got_object_blob_get_read_buf(struct got_blob_object *blob)
640 return blob->read_buf;
643 const struct got_error *
644 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
646 size_t n;
648 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
649 if (n == 0 && ferror(blob->f))
650 return got_ferror(blob->f, GOT_ERR_IO);
651 *outlenp = n;
652 return NULL;
655 const struct got_error *
656 got_object_blob_dump_to_file(size_t *total_len, size_t *nlines,
657 FILE *outfile, struct got_blob_object *blob)
659 const struct got_error *err = NULL;
660 size_t len, hdrlen;
661 const uint8_t *buf;
662 int i;
664 if (total_len)
665 *total_len = 0;
666 if (nlines)
667 *nlines = 0;
669 hdrlen = got_object_blob_get_hdrlen(blob);
670 do {
671 err = got_object_blob_read_block(&len, blob);
672 if (err)
673 return err;
674 if (len == 0)
675 break;
676 if (total_len)
677 *total_len += len;
678 buf = got_object_blob_get_read_buf(blob);
679 if (nlines) {
680 for (i = 0; i < len; i++) {
681 if (buf[i] == '\n')
682 (*nlines)++;
685 /* Skip blob object header first time around. */
686 fwrite(buf + hdrlen, len - hdrlen, 1, outfile);
687 hdrlen = 0;
688 } while (len != 0);
690 fflush(outfile);
691 rewind(outfile);
693 return NULL;
696 static struct got_tree_entry *
697 find_entry_by_name(struct got_tree_object *tree, const char *name)
699 struct got_tree_entry *te;
701 SIMPLEQ_FOREACH(te, &tree->entries.head, entry) {
702 if (strcmp(te->name, name) == 0)
703 return te;
705 return NULL;
708 const struct got_error *
709 got_object_open_by_path(struct got_object **obj, struct got_repository *repo,
710 struct got_object_id *commit_id, const char *path)
712 const struct got_error *err = NULL;
713 struct got_commit_object *commit = NULL;
714 struct got_tree_object *tree = NULL;
715 struct got_tree_entry *te = NULL;
716 char *seg, *s, *s0 = NULL;
717 size_t len = strlen(path);
719 *obj = NULL;
721 /* We are expecting an absolute in-repository path. */
722 if (path[0] != '/')
723 return got_error(GOT_ERR_NOT_ABSPATH);
725 err = got_object_open_as_commit(&commit, repo, commit_id);
726 if (err)
727 goto done;
729 /* Handle opening of root of commit's tree. */
730 if (path[1] == '\0') {
731 err = got_object_open(obj, repo, commit->tree_id);
732 goto done;
735 err = got_object_open_as_tree(&tree, repo, commit->tree_id);
736 if (err)
737 goto done;
739 s0 = strdup(path);
740 if (s0 == NULL) {
741 err = got_error_from_errno();
742 goto done;
744 err = got_canonpath(path, s0, len + 1);
745 if (err)
746 goto done;
748 s = s0;
749 s++; /* skip leading '/' */
750 len--;
751 seg = s;
752 while (len > 0) {
753 struct got_tree_object *next_tree;
755 if (*s != '/') {
756 s++;
757 len--;
758 if (*s)
759 continue;
762 /* end of path segment */
763 *s = '\0';
765 te = find_entry_by_name(tree, seg);
766 if (te == NULL) {
767 err = got_error(GOT_ERR_NO_OBJ);
768 goto done;
771 if (len == 0)
772 break;
774 seg = s + 1;
775 s++;
776 len--;
777 if (*s) {
778 err = got_object_open_as_tree(&next_tree, repo,
779 te->id);
780 te = NULL;
781 if (err)
782 goto done;
783 got_object_tree_close(tree);
784 tree = next_tree;
788 if (te)
789 err = got_object_open(obj, repo, te->id);
790 else
791 err = got_error(GOT_ERR_NO_OBJ);
792 done:
793 free(s0);
794 if (commit)
795 got_object_commit_close(commit);
796 if (tree)
797 got_object_tree_close(tree);
798 return err;