Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/stat.h>
18 #include <sys/queue.h>
20 #include <errno.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sha1.h>
25 #include <zlib.h>
26 #include <ctype.h>
27 #include <limits.h>
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_repository.h"
32 #include "got_sha1.h"
33 #include "pack.h"
35 #ifndef MIN
36 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
37 #endif
39 #ifndef nitems
40 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
41 #endif
43 #define GOT_OBJ_TAG_COMMIT "commit"
44 #define GOT_OBJ_TAG_TREE "tree"
45 #define GOT_OBJ_TAG_BLOB "blob"
47 #define GOT_COMMIT_TAG_TREE "tree "
48 #define GOT_COMMIT_TAG_PARENT "parent "
49 #define GOT_COMMIT_TAG_AUTHOR "author "
50 #define GOT_COMMIT_TAG_COMMITTER "committer "
52 char *
53 got_object_id_str(struct got_object_id *id, char *buf, size_t size)
54 {
55 return got_sha1_digest_to_str(id->sha1, buf, size);
56 }
58 int
59 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
60 {
61 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
62 }
64 const char *
65 got_object_get_type_tag(int type)
66 {
67 switch (type) {
68 case GOT_OBJ_TYPE_COMMIT:
69 return GOT_OBJ_TAG_COMMIT;
70 case GOT_OBJ_TYPE_TREE:
71 return GOT_OBJ_TAG_TREE;
72 case GOT_OBJ_TYPE_BLOB:
73 return GOT_OBJ_TAG_BLOB;
74 }
76 return NULL;
77 }
79 static void
80 inflate_end(struct got_zstream_buf *zb)
81 {
82 free(zb->inbuf);
83 free(zb->outbuf);
84 inflateEnd(&zb->z);
85 }
87 static const struct got_error *
88 inflate_init(struct got_zstream_buf *zb, size_t bufsize)
89 {
90 const struct got_error *err = NULL;
92 memset(zb, 0, sizeof(*zb));
94 zb->z.zalloc = Z_NULL;
95 zb->z.zfree = Z_NULL;
96 if (inflateInit(&zb->z) != Z_OK) {
97 err = got_error(GOT_ERR_IO);
98 goto done;
99 }
101 zb->inlen = zb->outlen = bufsize;
103 zb->inbuf = calloc(1, zb->inlen);
104 if (zb->inbuf == NULL) {
105 err = got_error(GOT_ERR_NO_MEM);
106 goto done;
109 zb->outbuf = calloc(1, zb->outlen);
110 if (zb->outbuf == NULL) {
111 err = got_error(GOT_ERR_NO_MEM);
112 goto done;
115 done:
116 if (err)
117 inflate_end(zb);
118 return err;
121 static const struct got_error *
122 inflate_read(struct got_zstream_buf *zb, FILE *f, size_t *outlenp)
124 size_t last_total_out = zb->z.total_out;
125 z_stream *z = &zb->z;
126 int n, ret;
128 z->next_out = zb->outbuf;
129 z->avail_out = zb->outlen;
131 do {
132 if (z->avail_in == 0) {
133 int i;
134 n = fread(zb->inbuf, 1, zb->inlen, f);
135 if (n == 0) {
136 if (ferror(f))
137 return got_error(GOT_ERR_IO);
138 *outlenp = 0;
139 return NULL;
141 z->next_in = zb->inbuf;
142 z->avail_in = n;
144 ret = inflate(z, Z_SYNC_FLUSH);
145 } while (ret == Z_OK && z->avail_out > 0);
147 if (ret != Z_OK) {
148 if (ret != Z_STREAM_END)
149 return got_error(GOT_ERR_DECOMPRESSION);
150 zb->flags |= GOT_ZSTREAM_F_HAVE_MORE;
153 *outlenp = z->total_out - last_total_out;
154 return NULL;
157 static const struct got_error *
158 parse_object_header(struct got_object **obj, char *buf, size_t len)
160 const char *obj_tags[] = {
161 GOT_OBJ_TAG_COMMIT,
162 GOT_OBJ_TAG_TREE,
163 GOT_OBJ_TAG_BLOB
164 };
165 const int obj_types[] = {
166 GOT_OBJ_TYPE_COMMIT,
167 GOT_OBJ_TYPE_TREE,
168 GOT_OBJ_TYPE_BLOB,
169 };
170 int type = 0;
171 size_t size = 0, hdrlen = 0;
172 int i;
173 char *p = strchr(buf, '\0');
175 if (p == NULL)
176 return got_error(GOT_ERR_BAD_OBJ_HDR);
178 hdrlen = strlen(buf) + 1 /* '\0' */;
180 for (i = 0; i < nitems(obj_tags); i++) {
181 const char *tag = obj_tags[i];
182 size_t tlen = strlen(tag);
183 const char *errstr;
185 if (strncmp(buf, tag, tlen) != 0)
186 continue;
188 type = obj_types[i];
189 if (len <= tlen)
190 return got_error(GOT_ERR_BAD_OBJ_HDR);
191 size = strtonum(buf + tlen, 0, LONG_MAX, &errstr);
192 if (errstr != NULL)
193 return got_error(GOT_ERR_BAD_OBJ_HDR);
194 break;
197 if (type == 0)
198 return got_error(GOT_ERR_BAD_OBJ_HDR);
200 *obj = calloc(1, sizeof(**obj));
201 (*obj)->type = type;
202 (*obj)->hdrlen = hdrlen;
203 (*obj)->size = size;
204 return NULL;
207 static const struct got_error *
208 read_object_header(struct got_object **obj, struct got_repository *repo,
209 FILE *f)
211 const struct got_error *err;
212 struct got_zstream_buf zb;
213 char *buf;
214 size_t len;
215 const size_t zbsize = 64;
216 size_t outlen, totlen;
217 int i, ret;
219 buf = calloc(zbsize, sizeof(char));
220 if (buf == NULL)
221 return got_error(GOT_ERR_NO_MEM);
223 err = inflate_init(&zb, zbsize);
224 if (err)
225 return err;
227 i = 0;
228 totlen = 0;
229 do {
230 err = inflate_read(&zb, f, &outlen);
231 if (err)
232 goto done;
233 if (strchr(zb.outbuf, '\0') == NULL) {
234 buf = recallocarray(buf, 1 + i, 2 + i, zbsize);
235 if (buf == NULL) {
236 err = got_error(GOT_ERR_NO_MEM);
237 goto done;
240 memcpy(buf + totlen, zb.outbuf, outlen);
241 totlen += outlen;
242 i++;
243 } while (strchr(zb.outbuf, '\0') == NULL);
245 err = parse_object_header(obj, buf, totlen);
246 done:
247 inflate_end(&zb);
248 return err;
251 static const struct got_error *
252 object_path(char **path, struct got_object_id *id,
253 struct got_repository *repo)
255 const struct got_error *err = NULL;
256 char hex[SHA1_DIGEST_STRING_LENGTH];
257 char *path_objects = got_repo_get_path_objects(repo);
259 if (path_objects == NULL)
260 return got_error(GOT_ERR_NO_MEM);
262 got_object_id_str(id, hex, sizeof(hex));
264 if (asprintf(path, "%s/%.2x/%s", path_objects,
265 id->sha1[0], hex + 2) == -1)
266 err = got_error(GOT_ERR_NO_MEM);
268 free(path_objects);
269 return err;
272 const struct got_error *
273 got_object_open(struct got_object **obj, struct got_repository *repo,
274 struct got_object_id *id)
276 const struct got_error *err = NULL;
277 char *path;
278 FILE *f = NULL;
280 err = object_path(&path, id, repo);
281 if (err)
282 return err;
284 f = fopen(path, "rb");
285 if (f == NULL) {
286 if (errno != ENOENT) {
287 err = got_error_from_errno();
288 goto done;
290 err = got_packfile_extract_object(&f, id, repo);
291 if (err)
292 goto done;
293 if (f == NULL) {
294 err = got_error(GOT_ERR_NO_OBJ);
295 goto done;
299 err = read_object_header(obj, repo, f);
300 if (err == NULL)
301 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
302 done:
303 free(path);
304 if (f != NULL)
305 fclose(f);
306 return err;
309 void
310 got_object_close(struct got_object *obj)
312 free(obj);
315 static int
316 commit_object_valid(struct got_commit_object *commit)
318 int i;
319 int n;
321 if (commit == NULL)
322 return 0;
324 n = 0;
325 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
326 if (commit->tree_id.sha1[i] == 0)
327 n++;
329 if (n == SHA1_DIGEST_LENGTH)
330 return 0;
332 return 1;
335 static const struct got_error *
336 parse_commit_object(struct got_commit_object **commit, char *buf, size_t len)
338 const struct got_error *err = NULL;
339 char *s = buf;
340 size_t tlen;
341 ssize_t remain = (ssize_t)len;
343 *commit = calloc(1, sizeof(**commit));
344 if (*commit == NULL)
345 return got_error(GOT_ERR_NO_MEM);
347 SIMPLEQ_INIT(&(*commit)->parent_ids);
349 tlen = strlen(GOT_COMMIT_TAG_TREE);
350 if (strncmp(s, GOT_COMMIT_TAG_TREE, tlen) == 0) {
351 remain -= tlen;
352 if (remain < SHA1_DIGEST_STRING_LENGTH) {
353 err = got_error(GOT_ERR_BAD_OBJ_DATA);
354 goto done;
356 s += tlen;
357 if (!got_parse_sha1_digest((*commit)->tree_id.sha1, s)) {
358 err = got_error(GOT_ERR_BAD_OBJ_DATA);
359 goto done;
361 remain -= SHA1_DIGEST_STRING_LENGTH;
362 s += SHA1_DIGEST_STRING_LENGTH;
363 } else {
364 err = got_error(GOT_ERR_BAD_OBJ_DATA);
365 goto done;
368 tlen = strlen(GOT_COMMIT_TAG_PARENT);
369 while (strncmp(s, GOT_COMMIT_TAG_PARENT, tlen) == 0) {
370 struct got_parent_id *pid;
372 remain -= tlen;
373 if (remain < SHA1_DIGEST_STRING_LENGTH) {
374 err = got_error(GOT_ERR_BAD_OBJ_DATA);
375 goto done;
378 pid = calloc(1, sizeof(*pid));
379 if (pid == NULL) {
380 err = got_error(GOT_ERR_NO_MEM);
381 goto done;
383 s += tlen;
384 if (!got_parse_sha1_digest(pid->id.sha1, s)) {
385 err = got_error(GOT_ERR_BAD_OBJ_DATA);
386 goto done;
388 SIMPLEQ_INSERT_TAIL(&(*commit)->parent_ids, pid, entry);
389 (*commit)->nparents++;
391 s += SHA1_DIGEST_STRING_LENGTH;
394 tlen = strlen(GOT_COMMIT_TAG_AUTHOR);
395 if (strncmp(s, GOT_COMMIT_TAG_AUTHOR, tlen) == 0) {
396 char *p;
398 remain -= tlen;
399 if (remain <= 0) {
400 err = got_error(GOT_ERR_BAD_OBJ_DATA);
401 goto done;
403 s += tlen;
404 p = strchr(s, '\n');
405 if (p == NULL) {
406 err = got_error(GOT_ERR_BAD_OBJ_DATA);
407 goto done;
409 *p = '\0';
410 (*commit)->author = strdup(s);
411 if ((*commit)->author == NULL) {
412 err = got_error(GOT_ERR_NO_MEM);
413 goto done;
415 s += strlen((*commit)->author) + 1;
418 tlen = strlen(GOT_COMMIT_TAG_COMMITTER);
419 if (strncmp(s, GOT_COMMIT_TAG_COMMITTER, tlen) == 0) {
420 char *p;
422 remain -= tlen;
423 if (remain <= 0) {
424 err = got_error(GOT_ERR_BAD_OBJ_DATA);
425 goto done;
427 s += tlen;
428 p = strchr(s, '\n');
429 if (p == NULL) {
430 err = got_error(GOT_ERR_BAD_OBJ_DATA);
431 goto done;
433 *p = '\0';
434 (*commit)->committer = strdup(s);
435 if ((*commit)->committer == NULL) {
436 err = got_error(GOT_ERR_NO_MEM);
437 goto done;
439 s += strlen((*commit)->committer) + 1;
442 (*commit)->logmsg = strdup(s);
443 done:
444 if (err)
445 got_object_commit_close(*commit);
446 return err;
449 static void
450 tree_entry_close(struct got_tree_entry *te)
452 free(te->name);
453 free(te);
456 static const struct got_error *
457 parse_tree_entry(struct got_tree_entry **te, size_t *elen, char *buf,
458 size_t maxlen)
460 char *p = buf, *space;
461 const struct got_error *err = NULL;
462 char hex[SHA1_DIGEST_STRING_LENGTH];
464 *te = calloc(1, sizeof(**te));
465 if (*te == NULL)
466 return got_error(GOT_ERR_NO_MEM);
468 *elen = strlen(buf) + 1;
469 if (*elen > maxlen) {
470 free(*te);
471 return got_error(GOT_ERR_BAD_OBJ_DATA);
474 space = strchr(buf, ' ');
475 if (space == NULL) {
476 free(*te);
477 return got_error(GOT_ERR_BAD_OBJ_DATA);
479 while (*p != ' ') {
480 if (*p < '0' && *p > '7') {
481 err = got_error(GOT_ERR_BAD_OBJ_DATA);
482 goto done;
484 (*te)->mode <<= 3;
485 (*te)->mode |= *p - '0';
486 p++;
489 (*te)->name = strdup(space + 1);
490 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH) {
491 err = got_error(GOT_ERR_BAD_OBJ_DATA);
492 goto done;
494 buf += strlen(buf) + 1;
495 memcpy((*te)->id.sha1, buf, SHA1_DIGEST_LENGTH);
496 *elen += SHA1_DIGEST_LENGTH;
497 done:
498 if (err)
499 tree_entry_close(*te);
500 return err;
503 static const struct got_error *
504 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
505 char *buf, size_t len)
507 size_t remain = len;
508 int nentries;
510 *tree = calloc(1, sizeof(**tree));
511 if (*tree == NULL)
512 return got_error(GOT_ERR_NO_MEM);
514 SIMPLEQ_INIT(&(*tree)->entries);
516 while (remain > 0) {
517 struct got_tree_entry *te;
518 size_t elen;
520 parse_tree_entry(&te, &elen, buf, remain);
521 (*tree)->nentries++;
522 SIMPLEQ_INSERT_TAIL(&(*tree)->entries, te, entry);
523 buf += elen;
524 remain -= elen;
527 if (remain != 0) {
528 got_object_tree_close(*tree);
529 return got_error(GOT_ERR_BAD_OBJ_DATA);
532 return NULL;
535 static const struct got_error *
536 read_commit_object(struct got_commit_object **commit,
537 struct got_repository *repo, struct got_object *obj, const char *path)
539 const struct got_error *err = NULL;
540 FILE *f;
541 struct got_zstream_buf zb;
542 size_t len;
543 char *p;
544 int i, ret;
546 f = fopen(path, "rb");
547 if (f == NULL)
548 return got_error(GOT_ERR_BAD_PATH);
550 err = inflate_init(&zb, 8192);
551 if (err) {
552 fclose(f);
553 return err;
556 do {
557 err = inflate_read(&zb, f, &len);
558 if (err || len == 0)
559 break;
560 } while (len < obj->hdrlen + obj->size);
562 if (len < obj->hdrlen + obj->size) {
563 err = got_error(GOT_ERR_BAD_OBJ_DATA);
564 goto done;
567 /* Skip object header. */
568 len -= obj->hdrlen;
569 err = parse_commit_object(commit, zb.outbuf + obj->hdrlen, len);
570 done:
571 inflate_end(&zb);
572 fclose(f);
573 return err;
576 const struct got_error *
577 got_object_commit_open(struct got_commit_object **commit,
578 struct got_repository *repo, struct got_object *obj)
580 const struct got_error *err = NULL;
581 char *path = NULL;
583 if (obj->type != GOT_OBJ_TYPE_COMMIT)
584 return got_error(GOT_ERR_OBJ_TYPE);
586 err = object_path(&path, &obj->id, repo);
587 if (err)
588 return err;
590 err = read_commit_object(commit, repo, obj, path);
591 free(path);
592 return err;
595 void
596 got_object_commit_close(struct got_commit_object *commit)
598 struct got_parent_id *pid;
600 while (!SIMPLEQ_EMPTY(&commit->parent_ids)) {
601 pid = SIMPLEQ_FIRST(&commit->parent_ids);
602 SIMPLEQ_REMOVE_HEAD(&commit->parent_ids, entry);
603 free(pid);
606 free(commit->author);
607 free(commit->committer);
608 free(commit->logmsg);
609 free(commit);
612 static const struct got_error *
613 read_tree_object(struct got_tree_object **tree,
614 struct got_repository *repo, struct got_object *obj, const char *path)
616 const struct got_error *err = NULL;
617 FILE *f;
618 struct got_zstream_buf zb;
619 size_t len;
620 char *p;
621 int i, ret;
623 f = fopen(path, "rb");
624 if (f == NULL)
625 return got_error(GOT_ERR_BAD_PATH);
627 err = inflate_init(&zb, 8192);
628 if (err) {
629 fclose(f);
630 return err;
633 do {
634 err = inflate_read(&zb, f, &len);
635 if (err || len == 0)
636 break;
637 } while (len < obj->hdrlen + obj->size);
639 if (len < obj->hdrlen + obj->size) {
640 err = got_error(GOT_ERR_BAD_OBJ_DATA);
641 goto done;
644 /* Skip object header. */
645 len -= obj->hdrlen;
646 err = parse_tree_object(tree, repo, zb.outbuf + obj->hdrlen, len);
647 done:
648 inflate_end(&zb);
649 fclose(f);
650 return err;
653 const struct got_error *
654 got_object_tree_open(struct got_tree_object **tree,
655 struct got_repository *repo, struct got_object *obj)
657 const struct got_error *err = NULL;
658 char *path = NULL;
660 if (obj->type != GOT_OBJ_TYPE_TREE)
661 return got_error(GOT_ERR_OBJ_TYPE);
663 err = object_path(&path, &obj->id, repo);
664 if (err)
665 return err;
667 err = read_tree_object(tree, repo, obj, path);
668 free(path);
669 return err;
672 void
673 got_object_tree_close(struct got_tree_object *tree)
675 struct got_tree_entry *te;
677 while (!SIMPLEQ_EMPTY(&tree->entries)) {
678 te = SIMPLEQ_FIRST(&tree->entries);
679 SIMPLEQ_REMOVE_HEAD(&tree->entries, entry);
680 tree_entry_close(te);
683 free(tree);
686 const struct got_error *
687 got_object_blob_open(struct got_blob_object **blob,
688 struct got_repository *repo, struct got_object *obj, size_t blocksize)
690 const struct got_error *err = NULL;
691 char *path;
693 if (obj->type != GOT_OBJ_TYPE_BLOB)
694 return got_error(GOT_ERR_OBJ_TYPE);
696 if (blocksize < obj->hdrlen)
697 return got_error(GOT_ERR_NO_SPACE);
699 err = object_path(&path, &obj->id, repo);
700 if (err)
701 return err;
703 *blob = calloc(1, sizeof(**blob));
704 if (*blob == NULL) {
705 free(path);
706 return got_error(GOT_ERR_NO_MEM);
709 (*blob)->f = fopen(path, "rb");
710 if ((*blob)->f == NULL) {
711 free(*blob);
712 free(path);
713 return got_error(GOT_ERR_BAD_PATH);
716 err = inflate_init(&(*blob)->zb, blocksize);
717 if (err != NULL) {
718 fclose((*blob)->f);
719 free(*blob);
720 free(path);
721 return err;
724 (*blob)->hdrlen = obj->hdrlen;
725 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
727 free(path);
728 return err;
731 void
732 got_object_blob_close(struct got_blob_object *blob)
734 inflate_end(&blob->zb);
735 fclose(blob->f);
736 free(blob);
739 const struct got_error *
740 got_object_blob_read_block(struct got_blob_object *blob, size_t *outlenp)
742 return inflate_read(&blob->zb, blob->f, outlenp);