Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/uio.h>
21 #include <sys/socket.h>
22 #include <sys/wait.h>
23 #include <sys/syslimits.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdint.h>
31 #include <sha1.h>
32 #include <zlib.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <imsg.h>
36 #include <time.h>
38 #include "got_error.h"
39 #include "got_object.h"
40 #include "got_repository.h"
41 #include "got_opentemp.h"
43 #include "got_lib_sha1.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_pack.h"
46 #include "got_lib_path.h"
47 #include "got_lib_inflate.h"
48 #include "got_lib_object.h"
49 #include "got_lib_privsep.h"
50 #include "got_lib_object_idcache.h"
51 #include "got_lib_object_cache.h"
52 #include "got_lib_object_parse.h"
53 #include "got_lib_repository.h"
55 #ifndef MIN
56 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
57 #endif
59 int
60 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
61 {
62 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
63 }
65 struct got_object_id *
66 got_object_id_dup(struct got_object_id *id1)
67 {
68 struct got_object_id *id2;
70 id2 = malloc(sizeof(*id2));
71 if (id2 == NULL)
72 return NULL;
73 memcpy(id2, id1, sizeof(*id2));
74 return id2;
75 }
77 struct got_object_id *
78 got_object_get_id(struct got_object *obj)
79 {
80 return &obj->id;
81 }
83 const struct got_error *
84 got_object_get_id_str(char **outbuf, struct got_object *obj)
85 {
86 return got_object_id_str(outbuf, &obj->id);
87 }
89 int
90 got_object_get_type(struct got_object *obj)
91 {
92 switch (obj->type) {
93 case GOT_OBJ_TYPE_COMMIT:
94 case GOT_OBJ_TYPE_TREE:
95 case GOT_OBJ_TYPE_BLOB:
96 case GOT_OBJ_TYPE_TAG:
97 return obj->type;
98 default:
99 abort();
100 break;
103 /* not reached */
104 return 0;
107 static const struct got_error *
108 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
110 const struct got_error *err = NULL;
111 char *hex = NULL;
112 char *path_objects = got_repo_get_path_objects(repo);
114 *path = NULL;
116 if (path_objects == NULL)
117 return got_error_from_errno();
119 err = got_object_id_str(&hex, id);
120 if (err)
121 goto done;
123 if (asprintf(path, "%s/%.2x/%s", path_objects,
124 id->sha1[0], hex + 2) == -1)
125 err = got_error_from_errno();
127 done:
128 free(hex);
129 free(path_objects);
130 return err;
133 static const struct got_error *
134 open_loose_object(int *fd, struct got_object *obj, struct got_repository *repo)
136 const struct got_error *err = NULL;
137 char *path;
139 err = object_path(&path, &obj->id, repo);
140 if (err)
141 return err;
142 *fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
143 if (*fd == -1) {
144 err = got_error_from_errno();
145 goto done;
147 done:
148 free(path);
149 return err;
152 static const struct got_error *
153 get_packfile_path(char **path_packfile, struct got_packidx *packidx)
155 size_t size;
157 /* Packfile path contains ".pack" instead of ".idx", so add one byte. */
158 size = strlen(packidx->path_packidx) + 2;
159 if (size < GOT_PACKFILE_NAMELEN + 1)
160 return got_error(GOT_ERR_BAD_PATH);
162 *path_packfile = calloc(size, sizeof(**path_packfile));
163 if (*path_packfile == NULL)
164 return got_error_from_errno();
166 /* Copy up to and excluding ".idx". */
167 if (strlcpy(*path_packfile, packidx->path_packidx,
168 size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
169 return got_error(GOT_ERR_NO_SPACE);
171 if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
172 return got_error(GOT_ERR_NO_SPACE);
174 return NULL;
177 static const struct got_error *
178 open_packed_object(struct got_object **obj, struct got_object_id *id,
179 struct got_repository *repo)
181 const struct got_error *err = NULL;
182 struct got_pack *pack = NULL;
183 struct got_packidx *packidx = NULL;
184 int idx;
185 char *path_packfile;
187 err = got_repo_search_packidx(&packidx, &idx, repo, id);
188 if (err)
189 return err;
191 err = get_packfile_path(&path_packfile, packidx);
192 if (err)
193 return err;
195 pack = got_repo_get_cached_pack(repo, path_packfile);
196 if (pack == NULL) {
197 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
198 if (err)
199 goto done;
202 err = got_object_packed_read_privsep(obj, repo, pack, packidx, idx, id);
203 if (err)
204 goto done;
206 err = got_repo_cache_pack(NULL, repo, (*obj)->path_packfile, packidx);
207 done:
208 free(path_packfile);
209 return err;
212 const struct got_error *
213 got_object_open(struct got_object **obj, struct got_repository *repo,
214 struct got_object_id *id)
216 const struct got_error *err = NULL;
217 char *path;
218 int fd;
220 *obj = got_repo_get_cached_object(repo, id);
221 if (*obj != NULL) {
222 (*obj)->refcnt++;
223 return NULL;
226 err = open_packed_object(obj, id, repo);
227 if (err && err->code != GOT_ERR_NO_OBJ)
228 return err;
229 if (*obj) {
230 (*obj)->refcnt++;
231 return got_repo_cache_object(repo, id, *obj);
234 err = object_path(&path, id, repo);
235 if (err)
236 return err;
238 fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
239 if (fd == -1) {
240 if (errno == ENOENT)
241 err = got_error(GOT_ERR_NO_OBJ);
242 else
243 err = got_error_from_errno();
244 goto done;
245 } else {
246 err = got_object_read_header_privsep(obj, repo, fd);
247 if (err)
248 goto done;
249 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
252 (*obj)->refcnt++;
253 err = got_repo_cache_object(repo, id, *obj);
254 done:
255 free(path);
256 if (fd != -1)
257 close(fd);
258 return err;
262 const struct got_error *
263 got_object_open_by_id_str(struct got_object **obj, struct got_repository *repo,
264 const char *id_str)
266 struct got_object_id id;
268 if (!got_parse_sha1_digest(id.sha1, id_str))
269 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
271 return got_object_open(obj, repo, &id);
274 const struct got_error *
275 got_object_open_as_commit(struct got_commit_object **commit,
276 struct got_repository *repo, struct got_object_id *id)
278 const struct got_error *err;
279 struct got_object *obj;
281 *commit = got_repo_get_cached_commit(repo, id);
282 if (*commit != NULL) {
283 (*commit)->refcnt++;
284 return NULL;
287 err = got_object_open(&obj, repo, id);
288 if (err)
289 return err;
290 if (got_object_get_type(obj) != GOT_OBJ_TYPE_COMMIT) {
291 err = got_error(GOT_ERR_OBJ_TYPE);
292 goto done;
295 err = got_object_commit_open(commit, repo, obj);
296 done:
297 got_object_close(obj);
298 return err;
301 const struct got_error *
302 got_object_qid_alloc(struct got_object_qid **qid, struct got_object_id *id)
304 const struct got_error *err = NULL;
306 *qid = calloc(1, sizeof(**qid));
307 if (*qid == NULL)
308 return got_error_from_errno();
310 (*qid)->id = got_object_id_dup(id);
311 if ((*qid)->id == NULL) {
312 err = got_error_from_errno();
313 got_object_qid_free(*qid);
314 *qid = NULL;
315 return err;
318 return NULL;
321 const struct got_error *
322 got_object_commit_open(struct got_commit_object **commit,
323 struct got_repository *repo, struct got_object *obj)
325 const struct got_error *err = NULL;
327 *commit = got_repo_get_cached_commit(repo, &obj->id);
328 if (*commit != NULL) {
329 (*commit)->refcnt++;
330 return NULL;
333 if (obj->type != GOT_OBJ_TYPE_COMMIT)
334 return got_error(GOT_ERR_OBJ_TYPE);
336 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
337 struct got_pack *pack;
338 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
339 if (pack == NULL) {
340 err = got_repo_cache_pack(&pack, repo,
341 obj->path_packfile, NULL);
342 if (err)
343 return err;
345 err = got_object_read_packed_commit_privsep(commit, obj, pack);
346 } else {
347 int fd;
348 err = open_loose_object(&fd, obj, repo);
349 if (err)
350 return err;
351 err = got_object_read_commit_privsep(commit, obj, fd, repo);
352 close(fd);
355 if (err == NULL) {
356 (*commit)->refcnt++;
357 err = got_repo_cache_commit(repo, &obj->id, *commit);
360 return err;
363 const struct got_error *
364 got_object_tree_open(struct got_tree_object **tree,
365 struct got_repository *repo, struct got_object *obj)
367 const struct got_error *err = NULL;
369 *tree = got_repo_get_cached_tree(repo, &obj->id);
370 if (*tree != NULL) {
371 (*tree)->refcnt++;
372 return NULL;
375 if (obj->type != GOT_OBJ_TYPE_TREE)
376 return got_error(GOT_ERR_OBJ_TYPE);
378 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
379 struct got_pack *pack;
380 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
381 if (pack == NULL) {
382 err = got_repo_cache_pack(&pack, repo,
383 obj->path_packfile, NULL);
384 if (err)
385 return err;
387 err = got_object_read_packed_tree_privsep(tree, obj, pack);
388 } else {
389 int fd;
390 err = open_loose_object(&fd, obj, repo);
391 if (err)
392 return err;
393 err = got_object_read_tree_privsep(tree, obj, fd, repo);
394 close(fd);
397 if (err == NULL) {
398 (*tree)->refcnt++;
399 err = got_repo_cache_tree(repo, &obj->id, *tree);
402 return err;
405 const struct got_error *
406 got_object_open_as_tree(struct got_tree_object **tree,
407 struct got_repository *repo, struct got_object_id *id)
409 const struct got_error *err;
410 struct got_object *obj;
412 *tree = got_repo_get_cached_tree(repo, id);
413 if (*tree != NULL) {
414 (*tree)->refcnt++;
415 return NULL;
418 err = got_object_open(&obj, repo, id);
419 if (err)
420 return err;
421 if (got_object_get_type(obj) != GOT_OBJ_TYPE_TREE) {
422 err = got_error(GOT_ERR_OBJ_TYPE);
423 goto done;
426 err = got_object_tree_open(tree, repo, obj);
427 done:
428 got_object_close(obj);
429 return err;
432 const struct got_tree_entries *
433 got_object_tree_get_entries(struct got_tree_object *tree)
435 return &tree->entries;
438 static const struct got_error *
439 read_packed_blob_privsep(size_t *size, int outfd, struct got_object *obj,
440 struct got_pack *pack)
442 const struct got_error *err = NULL;
443 int outfd_child;
444 int basefd, accumfd; /* temporary files for delta application */
446 basefd = got_opentempfd();
447 if (basefd == -1)
448 return got_error_from_errno();
449 accumfd = got_opentempfd();
450 if (accumfd == -1)
451 return got_error_from_errno();
453 outfd_child = dup(outfd);
454 if (outfd_child == -1)
455 return got_error_from_errno();
457 err = got_privsep_send_obj_req(pack->privsep_child->ibuf, -1, obj);
458 if (err)
459 return err;
461 err = got_privsep_send_blob_outfd(pack->privsep_child->ibuf,
462 outfd_child);
463 if (err) {
464 close(outfd_child);
465 return err;
467 err = got_privsep_send_tmpfd(pack->privsep_child->ibuf,
468 basefd);
469 if (err) {
470 close(basefd);
471 close(accumfd);
472 close(outfd_child);
473 return err;
476 err = got_privsep_send_tmpfd(pack->privsep_child->ibuf,
477 accumfd);
478 if (err) {
479 close(accumfd);
480 close(outfd_child);
481 return err;
484 err = got_privsep_recv_blob(size, pack->privsep_child->ibuf);
485 if (err)
486 return err;
488 if (lseek(outfd, SEEK_SET, 0) == -1)
489 err = got_error_from_errno();
491 return err;
494 const struct got_error *
495 got_object_blob_open(struct got_blob_object **blob,
496 struct got_repository *repo, struct got_object *obj, size_t blocksize)
498 const struct got_error *err = NULL;
499 int outfd;
500 size_t size;
501 struct stat sb;
503 if (obj->type != GOT_OBJ_TYPE_BLOB)
504 return got_error(GOT_ERR_OBJ_TYPE);
506 if (blocksize < obj->hdrlen)
507 return got_error(GOT_ERR_NO_SPACE);
509 *blob = calloc(1, sizeof(**blob));
510 if (*blob == NULL)
511 return got_error_from_errno();
513 outfd = got_opentempfd();
514 if (outfd == -1)
515 return got_error_from_errno();
517 (*blob)->read_buf = malloc(blocksize);
518 if ((*blob)->read_buf == NULL) {
519 err = got_error_from_errno();
520 goto done;
522 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
523 struct got_pack *pack;
524 pack = got_repo_get_cached_pack(repo, obj->path_packfile);
525 if (pack == NULL) {
526 err = got_repo_cache_pack(&pack, repo,
527 obj->path_packfile, NULL);
528 if (err)
529 goto done;
531 err = read_packed_blob_privsep(&size, outfd, obj, pack);
532 if (err)
533 goto done;
534 obj->size = size;
535 } else {
536 int infd;
538 err = open_loose_object(&infd, obj, repo);
539 if (err)
540 goto done;
542 err = got_object_read_blob_privsep(&size, outfd, infd, repo);
543 close(infd);
544 if (err)
545 goto done;
547 if (size != obj->hdrlen + obj->size) {
548 err = got_error(GOT_ERR_PRIVSEP_LEN);
549 goto done;
553 if (fstat(outfd, &sb) == -1) {
554 err = got_error_from_errno();
555 goto done;
558 if (sb.st_size != obj->hdrlen + obj->size) {
559 err = got_error(GOT_ERR_PRIVSEP_LEN);
560 goto done;
563 (*blob)->f = fdopen(outfd, "rb");
564 if ((*blob)->f == NULL) {
565 err = got_error_from_errno();
566 close(outfd);
567 goto done;
570 (*blob)->hdrlen = obj->hdrlen;
571 (*blob)->blocksize = blocksize;
572 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
574 done:
575 if (err) {
576 if (*blob) {
577 if ((*blob)->f)
578 fclose((*blob)->f);
579 free((*blob)->read_buf);
580 free(*blob);
581 *blob = NULL;
582 } else if (outfd != -1)
583 close(outfd);
585 return err;
588 const struct got_error *
589 got_object_open_as_blob(struct got_blob_object **blob,
590 struct got_repository *repo, struct got_object_id *id,
591 size_t blocksize)
593 const struct got_error *err;
594 struct got_object *obj;
596 *blob = NULL;
598 err = got_object_open(&obj, repo, id);
599 if (err)
600 return err;
601 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
602 err = got_error(GOT_ERR_OBJ_TYPE);
603 goto done;
606 err = got_object_blob_open(blob, repo, obj, blocksize);
607 done:
608 got_object_close(obj);
609 return err;
612 void
613 got_object_blob_close(struct got_blob_object *blob)
615 free(blob->read_buf);
616 fclose(blob->f);
617 free(blob);
620 char *
621 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
623 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
626 size_t
627 got_object_blob_get_hdrlen(struct got_blob_object *blob)
629 return blob->hdrlen;
632 const uint8_t *
633 got_object_blob_get_read_buf(struct got_blob_object *blob)
635 return blob->read_buf;
638 const struct got_error *
639 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
641 size_t n;
643 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
644 if (n == 0 && ferror(blob->f))
645 return got_ferror(blob->f, GOT_ERR_IO);
646 *outlenp = n;
647 return NULL;
650 const struct got_error *
651 got_object_blob_dump_to_file(size_t *total_len, size_t *nlines,
652 FILE *outfile, struct got_blob_object *blob)
654 const struct got_error *err = NULL;
655 size_t len, hdrlen;
656 const uint8_t *buf;
657 int i;
659 if (total_len)
660 *total_len = 0;
661 if (nlines)
662 *nlines = 0;
664 hdrlen = got_object_blob_get_hdrlen(blob);
665 do {
666 err = got_object_blob_read_block(&len, blob);
667 if (err)
668 return err;
669 if (len == 0)
670 break;
671 if (total_len)
672 *total_len += len;
673 buf = got_object_blob_get_read_buf(blob);
674 if (nlines) {
675 for (i = 0; i < len; i++) {
676 if (buf[i] == '\n')
677 (*nlines)++;
680 /* Skip blob object header first time around. */
681 fwrite(buf + hdrlen, len - hdrlen, 1, outfile);
682 hdrlen = 0;
683 } while (len != 0);
685 fflush(outfile);
686 rewind(outfile);
688 return NULL;
691 static struct got_tree_entry *
692 find_entry_by_name(struct got_tree_object *tree, const char *name, size_t len)
694 struct got_tree_entry *te;
696 SIMPLEQ_FOREACH(te, &tree->entries.head, entry) {
697 if (strncmp(te->name, name, len) == 0)
698 return te;
700 return NULL;
703 const struct got_error *
704 got_object_id_by_path(struct got_object_id **id, struct got_repository *repo,
705 struct got_object_id *commit_id, const char *path)
707 const struct got_error *err = NULL;
708 struct got_commit_object *commit = NULL;
709 struct got_tree_object *tree = NULL;
710 struct got_tree_entry *te = NULL;
711 const char *seg, *s;
712 size_t seglen, len = strlen(path);
714 *id = NULL;
716 /* We are expecting an absolute in-repository path. */
717 if (path[0] != '/')
718 return got_error(GOT_ERR_NOT_ABSPATH);
720 err = got_object_open_as_commit(&commit, repo, commit_id);
721 if (err)
722 goto done;
724 /* Handle opening of root of commit's tree. */
725 if (path[1] == '\0') {
726 *id = got_object_id_dup(commit->tree_id);
727 if (*id == NULL)
728 err = got_error_from_errno();
729 goto done;
732 err = got_object_open_as_tree(&tree, repo, commit->tree_id);
733 if (err)
734 goto done;
736 s = path;
737 s++; /* skip leading '/' */
738 len--;
739 seg = s;
740 seglen = 0;
741 while (len > 0) {
742 struct got_tree_object *next_tree;
744 if (*s != '/') {
745 s++;
746 len--;
747 seglen++;
748 if (*s)
749 continue;
752 te = find_entry_by_name(tree, seg, seglen);
753 if (te == NULL) {
754 err = got_error(GOT_ERR_NO_OBJ);
755 goto done;
758 if (len == 0)
759 break;
761 seg = s + 1;
762 seglen = 0;
763 s++;
764 len--;
765 if (*s) {
766 err = got_object_open_as_tree(&next_tree, repo,
767 te->id);
768 te = NULL;
769 if (err)
770 goto done;
771 got_object_tree_close(tree);
772 tree = next_tree;
776 if (te) {
777 *id = got_object_id_dup(te->id);
778 if (*id == NULL)
779 return got_error_from_errno();
780 } else
781 err = got_error(GOT_ERR_NO_OBJ);
782 done:
783 if (commit)
784 got_object_commit_close(commit);
785 if (tree)
786 got_object_tree_close(tree);
787 return err;
790 const struct got_error *
791 got_object_tree_path_changed(int *changed,
792 struct got_tree_object *tree01, struct got_tree_object *tree02,
793 const char *path, struct got_repository *repo)
795 const struct got_error *err = NULL;
796 struct got_tree_object *tree1 = NULL, *tree2 = NULL;
797 struct got_tree_entry *te1 = NULL, *te2 = NULL;
798 const char *seg, *s;
799 size_t seglen, len = strlen(path);
801 *changed = 0;
803 /* We are expecting an absolute in-repository path. */
804 if (path[0] != '/')
805 return got_error(GOT_ERR_NOT_ABSPATH);
807 /* We not do support comparing the root path. */
808 if (path[1] == '\0')
809 return got_error(GOT_ERR_BAD_PATH);
811 tree1 = tree01;
812 tree2 = tree02;
813 s = path;
814 s++; /* skip leading '/' */
815 len--;
816 seg = s;
817 seglen = 0;
818 while (len > 0) {
819 struct got_tree_object *next_tree1, *next_tree2;
821 if (*s != '/') {
822 s++;
823 len--;
824 seglen++;
825 if (*s)
826 continue;
829 te1 = find_entry_by_name(tree1, seg, seglen);
830 if (te1 == NULL) {
831 err = got_error(GOT_ERR_NO_OBJ);
832 goto done;
835 te2 = find_entry_by_name(tree2, seg, seglen);
836 if (te2 == NULL) {
837 *changed = 1;
838 goto done;
841 if (te1->mode != te2->mode) {
842 *changed = 1;
843 goto done;
846 if (got_object_id_cmp(te1->id, te2->id) == 0) {
847 *changed = 0;
848 goto done;
851 if (len == 0) { /* final path element */
852 *changed = 1;
853 goto done;
856 seg = s + 1;
857 s++;
858 len--;
859 seglen = 0;
860 if (*s) {
861 err = got_object_open_as_tree(&next_tree1, repo,
862 te1->id);
863 te1 = NULL;
864 if (err)
865 goto done;
866 if (tree1 != tree01)
867 got_object_tree_close(tree1);
868 tree1 = next_tree1;
870 err = got_object_open_as_tree(&next_tree2, repo,
871 te2->id);
872 te2 = NULL;
873 if (err)
874 goto done;
875 if (tree2 != tree02)
876 got_object_tree_close(tree2);
877 tree2 = next_tree2;
880 done:
881 if (tree1 && tree1 != tree01)
882 got_object_tree_close(tree1);
883 if (tree2 && tree2 != tree02)
884 got_object_tree_close(tree2);
885 return err;