Blob


1 /*
2 * Copyright (c) 2017 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <limits.h>
24 #include <sha1.h>
25 #include <zlib.h>
27 #include "got_repository.h"
28 #include "got_object.h"
29 #include "got_error.h"
30 #include "got_diff.h"
31 #include "got_opentemp.h"
33 #include "got_lib_diff.h"
34 #include "got_lib_path.h"
35 #include "got_lib_delta.h"
36 #include "got_lib_inflate.h"
37 #include "got_lib_object.h"
39 static char *
40 get_datestr(time_t *time, char *datebuf)
41 {
42 char *p, *s = ctime_r(time, datebuf);
43 p = strchr(s, '\n');
44 if (p)
45 *p = '\0';
46 return s;
47 }
49 static char *
50 get_label(const char *path, time_t time, const char *fallback)
51 {
52 char datebuf[26];
53 char *datestr = NULL;
54 char *label;
56 if (path == NULL)
57 return strdup(fallback);
59 if (time)
60 datestr = get_datestr(&time, datebuf);
62 if (asprintf(&label, "%s%s%s%s", path, datestr ? "\t" : "",
63 datestr ? datestr : "", datestr ? " UTC" : "") == -1)
64 return NULL;
66 return label;
67 }
69 static const struct got_error *
70 diff_blobs(struct got_blob_object *blob1, struct got_blob_object *blob2,
71 const char *label1, const char *label2, time_t time1, time_t time2,
72 int diff_context, FILE *outfile, struct got_diff_changes *changes)
73 {
74 struct got_diff_state ds;
75 struct got_diff_args args;
76 const struct got_error *err = NULL;
77 FILE *f1 = NULL, *f2 = NULL;
78 char hex1[SHA1_DIGEST_STRING_LENGTH];
79 char hex2[SHA1_DIGEST_STRING_LENGTH];
80 char *idstr1 = NULL, *idstr2 = NULL;
81 char *l1 = NULL, *l2 = NULL;
82 size_t size1, size2;
83 int res, flags = 0;
85 memset(&args, 0, sizeof(args));
87 if (blob1) {
88 f1 = got_opentemp();
89 if (f1 == NULL)
90 return got_error(GOT_ERR_FILE_OPEN);
91 } else
92 flags |= D_EMPTY1;
94 if (blob2) {
95 f2 = got_opentemp();
96 if (f2 == NULL) {
97 fclose(f1);
98 return got_error(GOT_ERR_FILE_OPEN);
99 }
100 } else
101 flags |= D_EMPTY2;
103 size1 = 0;
104 if (blob1) {
105 idstr1 = got_object_blob_id_str(blob1, hex1, sizeof(hex1));
106 err = got_object_blob_dump_to_file(&size1, NULL, f1, blob1);
107 if (err)
108 goto done;
109 } else
110 idstr1 = "/dev/null";
112 size2 = 0;
113 if (blob2) {
114 idstr2 = got_object_blob_id_str(blob2, hex2, sizeof(hex2));
115 err = got_object_blob_dump_to_file(&size2, NULL, f2, blob2);
116 if (err)
117 goto done;
118 } else
119 idstr2 = "/dev/null";
121 memset(&ds, 0, sizeof(ds));
122 /* XXX should stat buffers be passed in args instead of ds? */
123 ds.stb1.st_mode = S_IFREG;
124 if (blob1)
125 ds.stb1.st_size = size1;
126 ds.stb1.st_mtime = 0; /* XXX */
128 ds.stb2.st_mode = S_IFREG;
129 if (blob2)
130 ds.stb2.st_size = size2;
131 ds.stb2.st_mtime = 0; /* XXX */
133 args.diff_format = D_UNIFIED;
134 l1 = get_label(label1, time1, idstr1);
135 if (l1 == NULL) {
136 err = got_error_from_errno();
137 goto done;
139 args.label[0] = l1;
140 l2 = get_label(label2, time2, idstr2);
141 if (l2 == NULL) {
142 err = got_error_from_errno();
143 goto done;
145 args.label[1] = l2;
146 args.diff_context = diff_context;
147 flags |= D_PROTOTYPE;
149 fprintf(outfile, "blob - %s\n", idstr1);
150 fprintf(outfile, "blob + %s\n", idstr2);
151 err = got_diffreg(&res, f1, f2, flags, &args, &ds, outfile, changes);
152 done:
153 free(l1);
154 free(l2);
155 if (f1)
156 fclose(f1);
157 if (f2)
158 fclose(f2);
159 return err;
162 const struct got_error *
163 got_diff_blob(struct got_blob_object *blob1, struct got_blob_object *blob2,
164 const char *label1, const char *label2, time_t time1, time_t time2,
165 int diff_context, FILE *outfile)
167 return diff_blobs(blob1, blob2, label1, label2, time1, time2,
168 diff_context, outfile, NULL);
171 const struct got_error *
172 got_diff_blob_lines_changed(struct got_diff_changes **changes,
173 struct got_blob_object *blob1, struct got_blob_object *blob2)
175 const struct got_error *err = NULL;
177 *changes = calloc(1, sizeof(**changes));
178 if (*changes == NULL)
179 return got_error_from_errno();
180 SIMPLEQ_INIT(&(*changes)->entries);
182 err = diff_blobs(blob1, blob2, NULL, NULL, 0, 0, 3, NULL, *changes);
183 if (err) {
184 got_diff_free_changes(*changes);
185 *changes = NULL;
187 return err;
190 void
191 got_diff_free_changes(struct got_diff_changes *changes)
193 struct got_diff_change *change;
194 while (!SIMPLEQ_EMPTY(&changes->entries)) {
195 change = SIMPLEQ_FIRST(&changes->entries);
196 SIMPLEQ_REMOVE_HEAD(&changes->entries, entry);
197 free(change);
199 free(changes);
202 struct got_tree_entry *
203 match_entry_by_name(struct got_tree_entry *te1, struct got_tree_object *tree2)
205 struct got_tree_entry *te2;
206 const struct got_tree_entries *entries2;
208 entries2 = got_object_tree_get_entries(tree2);
209 SIMPLEQ_FOREACH(te2, &entries2->head, entry) {
210 if (strcmp(te1->name, te2->name) == 0)
211 return te2;
213 return NULL;
216 static const struct got_error *
217 diff_added_blob(struct got_object_id *id, const char *label, time_t time,
218 int diff_context, struct got_repository *repo, FILE *outfile)
220 const struct got_error *err;
221 struct got_blob_object *blob = NULL;
222 struct got_object *obj = NULL;
224 err = got_object_open(&obj, repo, id);
225 if (err)
226 return err;
228 err = got_object_blob_open(&blob, repo, obj, 8192);
229 if (err)
230 goto done;
231 err = got_diff_blob(NULL, blob, NULL, label, 0, time, diff_context,
232 outfile);
233 done:
234 got_object_close(obj);
235 if (blob)
236 got_object_blob_close(blob);
237 return err;
240 static const struct got_error *
241 diff_modified_blob(struct got_object_id *id1, struct got_object_id *id2,
242 const char *label1, const char *label2, time_t time1, time_t time2,
243 int diff_context, struct got_repository *repo, FILE *outfile)
245 const struct got_error *err;
246 struct got_object *obj1 = NULL;
247 struct got_object *obj2 = NULL;
248 struct got_blob_object *blob1 = NULL;
249 struct got_blob_object *blob2 = NULL;
251 err = got_object_open(&obj1, repo, id1);
252 if (err)
253 return err;
254 if (obj1->type != GOT_OBJ_TYPE_BLOB) {
255 err = got_error(GOT_ERR_OBJ_TYPE);
256 goto done;
259 err = got_object_open(&obj2, repo, id2);
260 if (err)
261 goto done;
262 if (obj2->type != GOT_OBJ_TYPE_BLOB) {
263 err = got_error(GOT_ERR_BAD_OBJ_DATA);
264 goto done;
267 err = got_object_blob_open(&blob1, repo, obj1, 8192);
268 if (err)
269 goto done;
271 err = got_object_blob_open(&blob2, repo, obj2, 8192);
272 if (err)
273 goto done;
275 err = got_diff_blob(blob1, blob2, label1, label2, time1, time2,
276 diff_context, outfile);
278 done:
279 if (obj1)
280 got_object_close(obj1);
281 if (obj2)
282 got_object_close(obj2);
283 if (blob1)
284 got_object_blob_close(blob1);
285 if (blob2)
286 got_object_blob_close(blob2);
287 return err;
290 static const struct got_error *
291 diff_deleted_blob(struct got_object_id *id, const char *label,
292 time_t time, int diff_context, struct got_repository *repo, FILE *outfile)
294 const struct got_error *err;
295 struct got_blob_object *blob = NULL;
296 struct got_object *obj = NULL;
298 err = got_object_open(&obj, repo, id);
299 if (err)
300 return err;
302 err = got_object_blob_open(&blob, repo, obj, 8192);
303 if (err)
304 goto done;
305 err = got_diff_blob(blob, NULL, label, NULL, time, 0, diff_context,
306 outfile);
307 done:
308 got_object_close(obj);
309 if (blob)
310 got_object_blob_close(blob);
311 return err;
314 static const struct got_error *
315 diff_added_tree(struct got_object_id *id, const char *label, time_t time,
316 int diff_context, struct got_repository *repo, FILE *outfile)
318 const struct got_error *err = NULL;
319 struct got_object *treeobj = NULL;
320 struct got_tree_object *tree = NULL;
322 err = got_object_open(&treeobj, repo, id);
323 if (err)
324 goto done;
326 if (treeobj->type != GOT_OBJ_TYPE_TREE) {
327 err = got_error(GOT_ERR_OBJ_TYPE);
328 goto done;
331 err = got_object_tree_open(&tree, repo, treeobj);
332 if (err)
333 goto done;
335 err = got_diff_tree(NULL, tree, NULL, label, 0, time,
336 diff_context, repo, outfile);
338 done:
339 if (tree)
340 got_object_tree_close(tree);
341 if (treeobj)
342 got_object_close(treeobj);
343 return err;
346 static const struct got_error *
347 diff_modified_tree(struct got_object_id *id1, struct got_object_id *id2,
348 const char *label1, const char *label2, time_t time1, time_t time2,
349 int diff_context, struct got_repository *repo, FILE *outfile)
351 const struct got_error *err;
352 struct got_object *treeobj1 = NULL;
353 struct got_object *treeobj2 = NULL;
354 struct got_tree_object *tree1 = NULL;
355 struct got_tree_object *tree2 = NULL;
357 err = got_object_open(&treeobj1, repo, id1);
358 if (err)
359 goto done;
361 if (treeobj1->type != GOT_OBJ_TYPE_TREE) {
362 err = got_error(GOT_ERR_OBJ_TYPE);
363 goto done;
366 err = got_object_open(&treeobj2, repo, id2);
367 if (err)
368 goto done;
370 if (treeobj2->type != GOT_OBJ_TYPE_TREE) {
371 err = got_error(GOT_ERR_OBJ_TYPE);
372 goto done;
375 err = got_object_tree_open(&tree1, repo, treeobj1);
376 if (err)
377 goto done;
379 err = got_object_tree_open(&tree2, repo, treeobj2);
380 if (err)
381 goto done;
383 err = got_diff_tree(tree1, tree2, label1, label2, time1, time2,
384 diff_context, repo, outfile);
386 done:
387 if (tree1)
388 got_object_tree_close(tree1);
389 if (tree2)
390 got_object_tree_close(tree2);
391 if (treeobj1)
392 got_object_close(treeobj1);
393 if (treeobj2)
394 got_object_close(treeobj2);
395 return err;
398 static const struct got_error *
399 diff_deleted_tree(struct got_object_id *id, const char *label, time_t time,
400 int diff_context, struct got_repository *repo, FILE *outfile)
402 const struct got_error *err;
403 struct got_object *treeobj = NULL;
404 struct got_tree_object *tree = NULL;
406 err = got_object_open(&treeobj, repo, id);
407 if (err)
408 goto done;
410 if (treeobj->type != GOT_OBJ_TYPE_TREE) {
411 err = got_error(GOT_ERR_OBJ_TYPE);
412 goto done;
415 err = got_object_tree_open(&tree, repo, treeobj);
416 if (err)
417 goto done;
419 err = got_diff_tree(tree, NULL, label, NULL, time, 0, diff_context,
420 repo, outfile);
421 done:
422 if (tree)
423 got_object_tree_close(tree);
424 if (treeobj)
425 got_object_close(treeobj);
426 return err;
429 static const struct got_error *
430 diff_kind_mismatch(struct got_object_id *id1, struct got_object_id *id2,
431 const char *label1, const char *label2, FILE *outfile)
433 /* XXX TODO */
434 return NULL;
437 static const struct got_error *
438 diff_entry_old_new(struct got_tree_entry *te1, struct got_tree_entry *te2,
439 const char *label1, const char *label2, time_t time1, time_t time2,
440 int diff_context, struct got_repository *repo, FILE *outfile)
442 const struct got_error *err = NULL;
443 int id_match;
445 if (te2 == NULL) {
446 if (S_ISDIR(te1->mode))
447 err = diff_deleted_tree(te1->id, label1, time1,
448 diff_context, repo, outfile);
449 else
450 err = diff_deleted_blob(te1->id, label1, time1,
451 diff_context, repo, outfile);
452 return err;
455 id_match = (got_object_id_cmp(te1->id, te2->id) == 0);
456 if (S_ISDIR(te1->mode) && S_ISDIR(te2->mode)) {
457 if (!id_match)
458 return diff_modified_tree(te1->id, te2->id,
459 label1, label2, time1, time2, diff_context,
460 repo, outfile);
461 } else if (S_ISREG(te1->mode) && S_ISREG(te2->mode)) {
462 if (!id_match)
463 return diff_modified_blob(te1->id, te2->id,
464 label1, label2, time1, time2, diff_context,
465 repo, outfile);
468 if (id_match)
469 return NULL;
471 return diff_kind_mismatch(te1->id, te2->id, label1, label2, outfile);
474 static const struct got_error *
475 diff_entry_new_old(struct got_tree_entry *te2, struct got_tree_entry *te1,
476 const char *label2, time_t time2, int diff_context,
477 struct got_repository *repo, FILE *outfile)
479 if (te1 != NULL) /* handled by diff_entry_old_new() */
480 return NULL;
482 if (S_ISDIR(te2->mode))
483 return diff_added_tree(te2->id, label2, time2, diff_context,
484 repo, outfile);
486 return diff_added_blob(te2->id, label2, time2, diff_context, repo,
487 outfile);
490 const struct got_error *
491 got_diff_tree(struct got_tree_object *tree1, struct got_tree_object *tree2,
492 const char *label1, const char *label2, time_t time1, time_t time2,
493 int diff_context, struct got_repository *repo, FILE *outfile)
495 const struct got_error *err = NULL;
496 struct got_tree_entry *te1 = NULL;
497 struct got_tree_entry *te2 = NULL;
498 char *l1 = NULL, *l2 = NULL;
500 if (tree1) {
501 const struct got_tree_entries *entries;
502 entries = got_object_tree_get_entries(tree1);
503 te1 = SIMPLEQ_FIRST(&entries->head);
504 if (te1 && asprintf(&l1, "%s%s%s", label1, label1[0] ? "/" : "",
505 te1->name) == -1)
506 return got_error_from_errno();
508 if (tree2) {
509 const struct got_tree_entries *entries;
510 entries = got_object_tree_get_entries(tree2);
511 te2 = SIMPLEQ_FIRST(&entries->head);
512 if (te2 && asprintf(&l2, "%s%s%s", label2, label2[0] ? "/" : "",
513 te2->name) == -1)
514 return got_error_from_errno();
517 do {
518 if (te1) {
519 struct got_tree_entry *te = NULL;
520 if (tree2)
521 te = match_entry_by_name(te1, tree2);
522 if (te) {
523 free(l2);
524 l2 = NULL;
525 if (te && asprintf(&l2, "%s%s%s", label2,
526 label2[0] ? "/" : "", te->name) == -1)
527 return got_error_from_errno();
529 err = diff_entry_old_new(te1, te, l1, l2, time1, time2,
530 diff_context, repo, outfile);
531 if (err)
532 break;
535 if (te2) {
536 struct got_tree_entry *te = NULL;
537 if (tree1)
538 te = match_entry_by_name(te2, tree1);
539 free(l2);
540 if (te) {
541 if (asprintf(&l2, "%s%s%s", label2,
542 label2[0] ? "/" : "", te->name) == -1)
543 return got_error_from_errno();
544 } else {
545 if (asprintf(&l2, "%s%s%s", label2,
546 label2[0] ? "/" : "", te2->name) == -1)
547 return got_error_from_errno();
549 err = diff_entry_new_old(te2, te, l2, time2,
550 diff_context, repo, outfile);
551 if (err)
552 break;
555 free(l1);
556 l1 = NULL;
557 if (te1) {
558 te1 = SIMPLEQ_NEXT(te1, entry);
559 if (te1 &&
560 asprintf(&l1, "%s%s%s", label1,
561 label1[0] ? "/" : "", te1->name) == -1)
562 return got_error_from_errno();
564 free(l2);
565 l2 = NULL;
566 if (te2) {
567 te2 = SIMPLEQ_NEXT(te2, entry);
568 if (te2 &&
569 asprintf(&l2, "%s%s%s", label2,
570 label2[0] ? "/" : "", te2->name) == -1)
571 return got_error_from_errno();
573 } while (te1 || te2);
575 return err;
578 const struct got_error *
579 got_diff_objects_as_blobs(struct got_object_id *id1, struct got_object_id *id2,
580 const char *label1, const char *label2, time_t time1, time_t time2,
581 int diff_context, struct got_repository *repo, FILE *outfile)
583 const struct got_error *err;
584 struct got_blob_object *blob1 = NULL, *blob2 = NULL;
586 if (id1 == NULL && id2 == NULL)
587 return got_error(GOT_ERR_NO_OBJ);
589 if (id1) {
590 err = got_object_open_as_blob(&blob1, repo, id1, 8192);
591 if (err)
592 goto done;
594 if (id2) {
595 err = got_object_open_as_blob(&blob2, repo, id2, 8192);
596 if (err)
597 goto done;
599 err = got_diff_blob(blob1, blob2, label1, label2, time1, time2,
600 diff_context, outfile);
601 done:
602 if (blob1)
603 got_object_blob_close(blob1);
604 if (blob2)
605 got_object_blob_close(blob2);
606 return err;
609 const struct got_error *
610 got_diff_objects_as_trees(struct got_object_id *id1, struct got_object_id *id2,
611 char *label1, char *label2, time_t time1, time_t time2,
612 int diff_context, struct got_repository *repo, FILE *outfile)
614 const struct got_error *err;
615 struct got_tree_object *tree1 = NULL, *tree2 = NULL;
617 if (id1 == NULL && id2 == NULL)
618 return got_error(GOT_ERR_NO_OBJ);
620 if (id1) {
621 err = got_object_open_as_tree(&tree1, repo, id1);
622 if (err)
623 goto done;
625 if (id2) {
626 err = got_object_open_as_tree(&tree2, repo, id2);
627 if (err)
628 goto done;
630 err = got_diff_tree(tree1, tree2, label1, label2, time1, time2,
631 diff_context, repo, outfile);
632 done:
633 if (tree1)
634 got_object_tree_close(tree1);
635 if (tree2)
636 got_object_tree_close(tree2);
637 return err;
640 const struct got_error *
641 got_diff_objects_as_commits(struct got_object_id *id1,
642 struct got_object_id *id2, int diff_context,
643 struct got_repository *repo, FILE *outfile)
645 const struct got_error *err;
646 struct got_commit_object *commit1 = NULL, *commit2 = NULL;
648 if (id2 == NULL)
649 return got_error(GOT_ERR_NO_OBJ);
651 if (id1) {
652 err = got_object_open_as_commit(&commit1, repo, id1);
653 if (err)
654 goto done;
657 err = got_object_open_as_commit(&commit2, repo, id2);
658 if (err)
659 goto done;
661 err = got_diff_objects_as_trees(
662 commit1 ? got_object_commit_get_tree_id(commit1) : NULL,
663 got_object_commit_get_tree_id(commit2), "", "",
664 commit1 ? got_object_commit_get_committer_time(commit1) : 0,
665 got_object_commit_get_committer_time(commit2),
666 diff_context,
667 repo, outfile);
668 done:
669 if (commit1)
670 got_object_commit_close(commit1);
671 if (commit2)
672 got_object_commit_close(commit2);
673 return err;