Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/uio.h>
21 #include <sys/socket.h>
22 #include <sys/wait.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdint.h>
30 #include <sha1.h>
31 #include <zlib.h>
32 #include <ctype.h>
33 #include <limits.h>
34 #include <imsg.h>
35 #include <time.h>
37 #include "got_error.h"
38 #include "got_object.h"
39 #include "got_repository.h"
40 #include "got_opentemp.h"
42 #include "got_lib_sha1.h"
43 #include "got_lib_delta.h"
44 #include "got_lib_pack.h"
45 #include "got_lib_path.h"
46 #include "got_lib_inflate.h"
47 #include "got_lib_object.h"
48 #include "got_lib_privsep.h"
49 #include "got_lib_object_parse.h"
50 #include "got_lib_repository.h"
52 #ifndef MIN
53 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
54 #endif
56 const struct got_error *
57 got_object_id_str(char **outbuf, struct got_object_id *id)
58 {
59 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
61 *outbuf = malloc(len);
62 if (*outbuf == NULL)
63 return got_error_from_errno();
65 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
66 free(*outbuf);
67 *outbuf = NULL;
68 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
69 }
71 return NULL;
72 }
74 int
75 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
76 {
77 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
78 }
80 struct got_object_id *
81 got_object_id_dup(struct got_object_id *id1)
82 {
83 struct got_object_id *id2;
85 id2 = malloc(sizeof(*id2));
86 if (id2 == NULL)
87 return NULL;
88 memcpy(id2, id1, sizeof(*id2));
89 return id2;
90 }
92 struct got_object_id *
93 got_object_get_id(struct got_object *obj)
94 {
95 return got_object_id_dup(&obj->id);
96 }
98 const struct got_error *
99 got_object_get_id_str(char **outbuf, struct got_object *obj)
101 return got_object_id_str(outbuf, &obj->id);
104 int
105 got_object_get_type(struct got_object *obj)
107 switch (obj->type) {
108 case GOT_OBJ_TYPE_COMMIT:
109 case GOT_OBJ_TYPE_TREE:
110 case GOT_OBJ_TYPE_BLOB:
111 case GOT_OBJ_TYPE_TAG:
112 return obj->type;
113 default:
114 abort();
115 break;
118 /* not reached */
119 return 0;
122 static const struct got_error *
123 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
125 const struct got_error *err = NULL;
126 char *hex = NULL;
127 char *path_objects = got_repo_get_path_objects(repo);
129 *path = NULL;
131 if (path_objects == NULL)
132 return got_error_from_errno();
134 err = got_object_id_str(&hex, id);
135 if (err)
136 goto done;
138 if (asprintf(path, "%s/%.2x/%s", path_objects,
139 id->sha1[0], hex + 2) == -1)
140 err = got_error_from_errno();
142 done:
143 free(hex);
144 free(path_objects);
145 return err;
148 static const struct got_error *
149 open_loose_object(int *fd, struct got_object *obj, struct got_repository *repo)
151 const struct got_error *err = NULL;
152 char *path;
154 err = object_path(&path, &obj->id, repo);
155 if (err)
156 return err;
157 *fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
158 if (*fd == -1) {
159 err = got_error_from_errno();
160 goto done;
162 done:
163 free(path);
164 return err;
167 static const struct got_error *
168 get_packfile_path(char **path_packfile, struct got_packidx *packidx)
170 size_t size;
172 /* Packfile path contains ".pack" instead of ".idx", so add one byte. */
173 size = strlen(packidx->path_packidx) + 2;
174 if (size < GOT_PACKFILE_NAMELEN + 1)
175 return got_error(GOT_ERR_BAD_PATH);
177 *path_packfile = calloc(size, sizeof(**path_packfile));
178 if (*path_packfile == NULL)
179 return got_error_from_errno();
181 /* Copy up to and excluding ".idx". */
182 if (strlcpy(*path_packfile, packidx->path_packidx,
183 size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
184 return got_error(GOT_ERR_NO_SPACE);
186 if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
187 return got_error(GOT_ERR_NO_SPACE);
189 return NULL;
192 static const struct got_error *
193 open_packed_object(struct got_object **obj, struct got_object_id *id,
194 struct got_repository *repo)
196 const struct got_error *err = NULL;
197 struct got_pack *pack = NULL;
198 struct got_packidx *packidx = NULL;
199 int idx;
200 char *path_packfile;
202 err = got_repo_search_packidx(&packidx, &idx, repo, id);
203 if (err)
204 return err;
206 err = get_packfile_path(&path_packfile, packidx);
207 if (err)
208 return err;
210 pack = got_repo_get_cached_pack(repo, path_packfile);
211 if (pack == NULL) {
212 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
213 if (err)
214 goto done;
217 err = got_packfile_open_object(obj, pack, packidx, idx, id);
218 if (err)
219 goto done;
221 err = got_repo_cache_pack(NULL, repo, (*obj)->path_packfile, packidx);
222 done:
223 free(path_packfile);
224 return err;
227 const struct got_error *
228 got_object_open(struct got_object **obj, struct got_repository *repo,
229 struct got_object_id *id)
231 const struct got_error *err = NULL;
232 char *path;
233 int fd;
235 *obj = got_repo_get_cached_object(repo, id);
236 if (*obj != NULL) {
237 (*obj)->refcnt++;
238 return NULL;
241 err = object_path(&path, id, repo);
242 if (err)
243 return err;
245 fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
246 if (fd == -1) {
247 if (errno != ENOENT) {
248 err = got_error_from_errno();
249 goto done;
251 err = open_packed_object(obj, id, repo);
252 if (err)
253 goto done;
254 if (*obj == NULL)
255 err = got_error(GOT_ERR_NO_OBJ);
256 } else {
257 err = got_object_read_header_privsep(obj, repo, fd);
258 if (err)
259 goto done;
260 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
263 if (err == NULL) {
264 (*obj)->refcnt++;
265 err = got_repo_cache_object(repo, id, *obj);
267 done:
268 free(path);
269 if (fd != -1)
270 close(fd);
271 return err;
275 const struct got_error *
276 got_object_open_by_id_str(struct got_object **obj, struct got_repository *repo,
277 const char *id_str)
279 struct got_object_id id;
281 if (!got_parse_sha1_digest(id.sha1, id_str))
282 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
284 return got_object_open(obj, repo, &id);
287 const struct got_error *
288 got_object_open_as_commit(struct got_commit_object **commit,
289 struct got_repository *repo, struct got_object_id *id)
291 const struct got_error *err;
292 struct got_object *obj;
294 *commit = NULL;
296 err = got_object_open(&obj, repo, id);
297 if (err)
298 return err;
299 if (got_object_get_type(obj) != GOT_OBJ_TYPE_COMMIT) {
300 err = got_error(GOT_ERR_OBJ_TYPE);
301 goto done;
304 err = got_object_commit_open(commit, repo, obj);
305 done:
306 got_object_close(obj);
307 return err;
310 const struct got_error *
311 got_object_qid_alloc(struct got_object_qid **qid, struct got_object_id *id)
313 const struct got_error *err = NULL;
315 *qid = calloc(1, sizeof(**qid));
316 if (*qid == NULL)
317 return got_error_from_errno();
319 (*qid)->id = got_object_id_dup(id);
320 if ((*qid)->id == NULL) {
321 err = got_error_from_errno();
322 got_object_qid_free(*qid);
323 *qid = NULL;
324 return err;
327 return NULL;
330 static const struct got_error *
331 extract_packed_object_to_mem(uint8_t **buf, size_t *len,
332 struct got_object *obj, struct got_repository *repo)
334 const struct got_error *err = NULL;
335 struct got_pack *pack;
337 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
338 return got_error(GOT_ERR_OBJ_NOT_PACKED);
340 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
341 if (pack == NULL) {
342 err = got_repo_cache_pack(&pack, repo,
343 obj->path_packfile, NULL);
344 if (err)
345 return err;
348 return got_packfile_extract_object_to_mem(buf, len, obj, pack);
351 const struct got_error *
352 got_object_commit_open(struct got_commit_object **commit,
353 struct got_repository *repo, struct got_object *obj)
355 const struct got_error *err = NULL;
357 *commit = got_repo_get_cached_commit(repo, &obj->id);
358 if (*commit != NULL) {
359 (*commit)->refcnt++;
360 return NULL;
363 if (obj->type != GOT_OBJ_TYPE_COMMIT)
364 return got_error(GOT_ERR_OBJ_TYPE);
366 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
367 uint8_t *buf;
368 size_t len;
369 err = extract_packed_object_to_mem(&buf, &len, obj, repo);
370 if (err)
371 return err;
372 obj->size = len;
373 err = got_object_parse_commit(commit, buf, len);
374 free(buf);
375 } else {
376 int fd;
377 err = open_loose_object(&fd, obj, repo);
378 if (err)
379 return err;
380 err = got_object_read_commit_privsep(commit, obj, fd, repo);
381 close(fd);
384 if (err == NULL) {
385 (*commit)->refcnt++;
386 err = got_repo_cache_commit(repo, &obj->id, *commit);
389 return err;
392 const struct got_error *
393 got_object_tree_open(struct got_tree_object **tree,
394 struct got_repository *repo, struct got_object *obj)
396 const struct got_error *err = NULL;
398 *tree = got_repo_get_cached_tree(repo, &obj->id);
399 if (*tree != NULL) {
400 (*tree)->refcnt++;
401 return NULL;
404 if (obj->type != GOT_OBJ_TYPE_TREE)
405 return got_error(GOT_ERR_OBJ_TYPE);
407 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
408 uint8_t *buf;
409 size_t len;
410 err = extract_packed_object_to_mem(&buf, &len, obj, repo);
411 if (err)
412 return err;
413 obj->size = len;
414 err = got_object_parse_tree(tree, buf, len);
415 free(buf);
416 } else {
417 int fd;
418 err = open_loose_object(&fd, obj, repo);
419 if (err)
420 return err;
421 err = got_object_read_tree_privsep(tree, obj, fd, repo);
422 close(fd);
425 if (err == NULL) {
426 (*tree)->refcnt++;
427 err = got_repo_cache_tree(repo, &obj->id, *tree);
430 return err;
433 const struct got_error *
434 got_object_open_as_tree(struct got_tree_object **tree,
435 struct got_repository *repo, struct got_object_id *id)
437 const struct got_error *err;
438 struct got_object *obj;
440 *tree = NULL;
442 err = got_object_open(&obj, repo, id);
443 if (err)
444 return err;
445 if (got_object_get_type(obj) != GOT_OBJ_TYPE_TREE) {
446 err = got_error(GOT_ERR_OBJ_TYPE);
447 goto done;
450 err = got_object_tree_open(tree, repo, obj);
451 done:
452 got_object_close(obj);
453 return err;
456 const struct got_tree_entries *
457 got_object_tree_get_entries(struct got_tree_object *tree)
459 return &tree->entries;
462 static const struct got_error *
463 extract_packed_object(FILE **f, struct got_object *obj,
464 struct got_repository *repo)
466 const struct got_error *err = NULL;
467 struct got_pack *pack;
468 int fd;
470 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
471 return got_error(GOT_ERR_OBJ_NOT_PACKED);
473 fd = got_opentempfd();
474 if (fd == -1)
475 return got_error_from_errno();
477 *f = fdopen(fd, "w+");
478 if (*f == NULL) {
479 err = got_error_from_errno();
480 goto done;
483 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
484 if (pack == NULL) {
485 err = got_repo_cache_pack(&pack, repo,
486 obj->path_packfile, NULL);
487 if (err)
488 goto done;
491 err = got_packfile_extract_object(pack, obj, *f);
492 done:
493 if (err) {
494 if (*f == NULL)
495 close(fd);
496 else
497 fclose(*f);
498 *f = NULL;
500 return err;
503 const struct got_error *
504 got_object_blob_open(struct got_blob_object **blob,
505 struct got_repository *repo, struct got_object *obj, size_t blocksize)
507 const struct got_error *err = NULL;
509 if (obj->type != GOT_OBJ_TYPE_BLOB)
510 return got_error(GOT_ERR_OBJ_TYPE);
512 if (blocksize < obj->hdrlen)
513 return got_error(GOT_ERR_NO_SPACE);
515 *blob = calloc(1, sizeof(**blob));
516 if (*blob == NULL)
517 return got_error_from_errno();
519 (*blob)->read_buf = malloc(blocksize);
520 if ((*blob)->read_buf == NULL) {
521 err = got_error_from_errno();
522 goto done;
524 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
525 err = extract_packed_object(&((*blob)->f), obj, repo);
526 if (err)
527 goto done;
528 } else {
529 int infd, outfd;
530 size_t size;
531 struct stat sb;
533 err = open_loose_object(&infd, obj, repo);
534 if (err)
535 goto done;
538 outfd = got_opentempfd();
539 if (outfd == -1) {
540 err = got_error_from_errno();
541 close(infd);
542 goto done;
545 err = got_object_read_blob_privsep(&size, outfd, infd, repo);
546 close(infd);
547 if (err)
548 goto done;
550 if (size != obj->hdrlen + obj->size) {
551 err = got_error(GOT_ERR_PRIVSEP_LEN);
552 close(outfd);
553 goto done;
556 if (fstat(outfd, &sb) == -1) {
557 err = got_error_from_errno();
558 close(outfd);
559 goto done;
562 if (sb.st_size != size) {
563 err = got_error(GOT_ERR_PRIVSEP_LEN);
564 close(outfd);
565 goto done;
568 (*blob)->f = fdopen(outfd, "rb");
569 if ((*blob)->f == NULL) {
570 err = got_error_from_errno();
571 close(outfd);
572 goto done;
576 (*blob)->hdrlen = obj->hdrlen;
577 (*blob)->blocksize = blocksize;
578 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
580 done:
581 if (err && *blob) {
582 if ((*blob)->f)
583 fclose((*blob)->f);
584 free((*blob)->read_buf);
585 free(*blob);
586 *blob = NULL;
588 return err;
591 const struct got_error *
592 got_object_open_as_blob(struct got_blob_object **blob,
593 struct got_repository *repo, struct got_object_id *id,
594 size_t blocksize)
596 const struct got_error *err;
597 struct got_object *obj;
599 *blob = NULL;
601 err = got_object_open(&obj, repo, id);
602 if (err)
603 return err;
604 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
605 err = got_error(GOT_ERR_OBJ_TYPE);
606 goto done;
609 err = got_object_blob_open(blob, repo, obj, blocksize);
610 done:
611 got_object_close(obj);
612 return err;
615 void
616 got_object_blob_close(struct got_blob_object *blob)
618 free(blob->read_buf);
619 fclose(blob->f);
620 free(blob);
623 char *
624 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
626 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
629 size_t
630 got_object_blob_get_hdrlen(struct got_blob_object *blob)
632 return blob->hdrlen;
635 const uint8_t *
636 got_object_blob_get_read_buf(struct got_blob_object *blob)
638 return blob->read_buf;
641 const struct got_error *
642 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
644 size_t n;
646 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
647 if (n == 0 && ferror(blob->f))
648 return got_ferror(blob->f, GOT_ERR_IO);
649 *outlenp = n;
650 return NULL;
653 const struct got_error *
654 got_object_blob_dump_to_file(size_t *total_len, size_t *nlines,
655 FILE *outfile, struct got_blob_object *blob)
657 const struct got_error *err = NULL;
658 size_t len, hdrlen;
659 const uint8_t *buf;
660 int i;
662 if (total_len)
663 *total_len = 0;
664 if (nlines)
665 *nlines = 0;
667 hdrlen = got_object_blob_get_hdrlen(blob);
668 do {
669 err = got_object_blob_read_block(&len, blob);
670 if (err)
671 return err;
672 if (len == 0)
673 break;
674 if (total_len)
675 *total_len += len;
676 buf = got_object_blob_get_read_buf(blob);
677 if (nlines) {
678 for (i = 0; i < len; i++) {
679 if (buf[i] == '\n')
680 (*nlines)++;
683 /* Skip blob object header first time around. */
684 fwrite(buf + hdrlen, len - hdrlen, 1, outfile);
685 hdrlen = 0;
686 } while (len != 0);
688 fflush(outfile);
689 rewind(outfile);
691 return NULL;
694 static struct got_tree_entry *
695 find_entry_by_name(struct got_tree_object *tree, const char *name)
697 struct got_tree_entry *te;
699 SIMPLEQ_FOREACH(te, &tree->entries.head, entry) {
700 if (strcmp(te->name, name) == 0)
701 return te;
703 return NULL;
706 const struct got_error *
707 got_object_open_by_path(struct got_object **obj, struct got_repository *repo,
708 struct got_object_id *commit_id, const char *path)
710 const struct got_error *err = NULL;
711 struct got_commit_object *commit = NULL;
712 struct got_tree_object *tree = NULL;
713 struct got_tree_entry *te = NULL;
714 char *seg, *s, *s0 = NULL;
715 size_t len = strlen(path);
717 *obj = NULL;
719 /* We are expecting an absolute in-repository path. */
720 if (path[0] != '/')
721 return got_error(GOT_ERR_NOT_ABSPATH);
723 err = got_object_open_as_commit(&commit, repo, commit_id);
724 if (err)
725 goto done;
727 /* Handle opening of root of commit's tree. */
728 if (path[1] == '\0') {
729 err = got_object_open(obj, repo, commit->tree_id);
730 goto done;
733 err = got_object_open_as_tree(&tree, repo, commit->tree_id);
734 if (err)
735 goto done;
737 s0 = strdup(path);
738 if (s0 == NULL) {
739 err = got_error_from_errno();
740 goto done;
742 err = got_canonpath(path, s0, len + 1);
743 if (err)
744 goto done;
746 s = s0;
747 s++; /* skip leading '/' */
748 len--;
749 seg = s;
750 while (len > 0) {
751 struct got_tree_object *next_tree;
753 if (*s != '/') {
754 s++;
755 len--;
756 if (*s)
757 continue;
760 /* end of path segment */
761 *s = '\0';
763 te = find_entry_by_name(tree, seg);
764 if (te == NULL) {
765 err = got_error(GOT_ERR_NO_OBJ);
766 goto done;
769 if (len == 0)
770 break;
772 seg = s + 1;
773 s++;
774 len--;
775 if (*s) {
776 err = got_object_open_as_tree(&next_tree, repo,
777 te->id);
778 te = NULL;
779 if (err)
780 goto done;
781 got_object_tree_close(tree);
782 tree = next_tree;
786 if (te)
787 err = got_object_open(obj, repo, te->id);
788 else
789 err = got_error(GOT_ERR_NO_OBJ);
790 done:
791 free(s0);
792 if (commit)
793 got_object_commit_close(commit);
794 if (tree)
795 got_object_tree_close(tree);
796 return err;