Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/stdint.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sha1.h>
27 #include <zlib.h>
28 #include <ctype.h>
30 #include "got_error.h"
31 #include "got_object.h"
32 #include "got_cancel.h"
33 #include "got_commit_graph.h"
34 #include "got_path.h"
36 #include "got_lib_delta.h"
37 #include "got_lib_inflate.h"
38 #include "got_lib_object.h"
39 #include "got_lib_object_idset.h"
41 struct got_commit_graph_node {
42 struct got_object_id id;
43 time_t timestamp;
45 /* Used during graph iteration. */
46 TAILQ_ENTRY(got_commit_graph_node) entry;
47 };
49 TAILQ_HEAD(got_commit_graph_iter_list, got_commit_graph_node);
51 struct got_commit_graph_branch_tip {
52 struct got_object_id *commit_id;
53 struct got_commit_object *commit;
54 struct got_commit_graph_node *new_node;
55 int changed;
56 int branch_done;
57 };
59 struct got_commit_graph {
60 /* The set of all commits we have traversed. */
61 struct got_object_idset *node_ids;
63 int flags;
64 #define GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL 0x01
66 /*
67 * A set of object IDs of known parent commits which we have not yet
68 * traversed. Each commit ID in this set represents a branch in commit
69 * history: Either the first-parent branch of the head node, or another
70 * branch corresponding to a traversed merge commit for which we have
71 * not traversed a branch point commit yet.
72 *
73 * Whenever we add a commit with a matching ID to the graph, we remove
74 * its corresponding element from this set, and add new elements for
75 * each of that commit's parent commits which were not traversed yet.
76 *
77 * When API users ask us to fetch more commits, we fetch commits from
78 * all currently open branches. This allows API users to process
79 * commits in linear order even though the history contains branches.
80 */
81 struct got_object_idset *open_branches;
83 /* Array of branch tips for fetch_commits_from_open_branches(). */
84 struct got_commit_graph_branch_tip *tips;
85 int ntips;
87 /* Path of tree entry of interest to the API user. */
88 char *path;
90 /* The next commit to return when the API user asks for one. */
91 struct got_commit_graph_node *iter_node;
93 /* The graph iteration list contains all nodes in sorted order. */
94 struct got_commit_graph_iter_list iter_list;
95 };
97 static const struct got_error *
98 detect_changed_path(int *changed, struct got_commit_object *commit,
99 struct got_object_id *commit_id, const char *path,
100 struct got_repository *repo)
102 const struct got_error *err = NULL;
103 struct got_commit_object *pcommit = NULL;
104 struct got_tree_object *tree = NULL, *ptree = NULL;
105 struct got_object_qid *pid;
107 if (got_path_is_root_dir(path)) {
108 *changed = 1;
109 return NULL;
112 *changed = 0;
114 pid = SIMPLEQ_FIRST(&commit->parent_ids);
115 if (pid == NULL) {
116 struct got_object_id *obj_id;
117 err = got_object_id_by_path(&obj_id, repo, commit_id, path);
118 if (err) {
119 if (err->code == GOT_ERR_NO_TREE_ENTRY)
120 err = NULL;
121 } else
122 *changed = 1; /* The path was created in this commit. */
123 free(obj_id);
124 return err;
127 err = got_object_open_as_tree(&tree, repo, commit->tree_id);
128 if (err)
129 return err;
131 err = got_object_open_as_commit(&pcommit, repo, pid->id);
132 if (err)
133 goto done;
135 err = got_object_open_as_tree(&ptree, repo, pcommit->tree_id);
136 if (err)
137 goto done;
139 err = got_object_tree_path_changed(changed, tree, ptree, path, repo);
140 done:
141 if (tree)
142 got_object_tree_close(tree);
143 if (ptree)
144 got_object_tree_close(ptree);
145 if (pcommit)
146 got_object_commit_close(pcommit);
147 return err;
150 static void
151 add_node_to_iter_list(struct got_commit_graph *graph,
152 struct got_commit_graph_node *node,
153 struct got_commit_graph_node *child_node)
155 struct got_commit_graph_node *n, *next;
157 if (TAILQ_EMPTY(&graph->iter_list)) {
158 TAILQ_INSERT_HEAD(&graph->iter_list, node, entry);
159 graph->iter_node = node;
160 return;
163 n = graph->iter_node;
164 /* Ensure that an iteration in progress will see this new commit. */
165 while (n) {
166 next = TAILQ_NEXT(n, entry);
167 if (next && node->timestamp >= next->timestamp) {
168 TAILQ_INSERT_BEFORE(next, node, entry);
169 return;
171 n = next;
173 TAILQ_INSERT_TAIL(&graph->iter_list, node, entry);
176 static const struct got_error *
177 close_branch(struct got_commit_graph *graph, struct got_object_id *commit_id)
179 const struct got_error *err;
181 err = got_object_idset_remove(NULL, graph->open_branches, commit_id);
182 if (err && err->code != GOT_ERR_NO_OBJ)
183 return err;
184 return NULL;
187 static const struct got_error *
188 advance_branch(struct got_commit_graph *graph,
189 struct got_commit_graph_node *node,
190 struct got_object_id *commit_id, struct got_commit_object *commit,
191 struct got_repository *repo)
193 const struct got_error *err;
194 struct got_object_qid *qid;
196 err = close_branch(graph, commit_id);
197 if (err)
198 return err;
200 if (graph->flags & GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL) {
201 qid = SIMPLEQ_FIRST(&commit->parent_ids);
202 if (qid == NULL ||
203 got_object_idset_get(graph->open_branches, qid->id))
204 return NULL;
205 return got_object_idset_add(graph->open_branches,
206 qid->id, node);
209 /*
210 * If we are graphing commits for a specific path, skip branches
211 * which do not contribute any content to this path.
212 */
213 if (commit->nparents > 1 && !got_path_is_root_dir(graph->path)) {
214 struct got_object_id *merged_id, *prev_id = NULL;
215 int branches_differ = 0;
217 err = got_object_id_by_path(&merged_id, repo, commit_id,
218 graph->path);
219 if (err)
220 return err;
222 SIMPLEQ_FOREACH(qid, &commit->parent_ids, entry) {
223 struct got_object_id *id;
225 if (got_object_idset_get(graph->open_branches, qid->id))
226 continue;
228 err = got_object_id_by_path(&id, repo, qid->id,
229 graph->path);
230 if (err) {
231 if (err->code == GOT_ERR_NO_TREE_ENTRY) {
232 branches_differ = 1;
233 continue;
235 free(merged_id);
236 free(prev_id);
237 return err;
240 if (prev_id) {
241 if (!branches_differ &&
242 got_object_id_cmp(id, prev_id) != 0)
243 branches_differ = 1;
244 free(prev_id);
246 prev_id = id;
248 /*
249 * If a branch has created the merged content we can
250 * skip any other branches.
251 */
252 if (got_object_id_cmp(merged_id, id) == 0) {
253 err = got_object_idset_add(graph->open_branches,
254 qid->id, node);
255 free(merged_id);
256 free(id);
257 return err;
261 free(prev_id);
262 prev_id = NULL;
263 free(merged_id);
264 merged_id = NULL;
266 /*
267 * If the path's content is the same on all branches,
268 * follow the first parent only.
269 */
270 if (!branches_differ) {
271 qid = SIMPLEQ_FIRST(&commit->parent_ids);
272 if (qid == NULL)
273 return NULL;
274 if (got_object_idset_get(graph->open_branches, qid->id))
275 return NULL;
276 if (got_object_idset_get(graph->node_ids, qid->id))
277 return NULL; /* parent already traversed */
278 return got_object_idset_add(graph->open_branches,
279 qid->id, node);
283 SIMPLEQ_FOREACH(qid, &commit->parent_ids, entry) {
284 if (got_object_idset_get(graph->open_branches, qid->id))
285 continue;
286 if (got_object_idset_get(graph->node_ids, qid->id))
287 continue; /* parent already traversed */
288 err = got_object_idset_add(graph->open_branches, qid->id, node);
289 if (err)
290 return err;
293 return NULL;
296 static const struct got_error *
297 add_node(struct got_commit_graph_node **new_node, int *changed,
298 int *branch_done, struct got_commit_graph *graph,
299 struct got_object_id *commit_id, struct got_commit_object *commit,
300 struct got_commit_graph_node *child_node, struct got_repository *repo)
302 const struct got_error *err = NULL;
303 struct got_commit_graph_node *node;
305 *new_node = NULL;
306 *changed = 0;
307 *branch_done = 0;
309 node = calloc(1, sizeof(*node));
310 if (node == NULL)
311 return got_error_from_errno("calloc");
313 memcpy(&node->id, commit_id, sizeof(node->id));
314 node->timestamp = commit->committer_time;
316 err = got_object_idset_add(graph->node_ids, &node->id, node);
317 if (err) {
318 free(node);
319 return err;
322 err = detect_changed_path(changed, commit, commit_id, graph->path,
323 repo);
324 if (err) {
325 if (err->code == GOT_ERR_NO_OBJ) {
326 /*
327 * History of the path stops here on the current
328 * branch. Keep going on other branches.
329 */
330 err = NULL;
331 *branch_done = 1;
332 } else {
333 got_object_idset_remove(NULL, graph->node_ids,
334 &node->id);
335 free(node);
336 return err;
340 if (*changed)
341 add_node_to_iter_list(graph, node, child_node);
342 *new_node = node;
343 return NULL;
346 const struct got_error *
347 got_commit_graph_open(struct got_commit_graph **graph,
348 const char *path, int first_parent_traversal)
350 const struct got_error *err = NULL;
352 *graph = calloc(1, sizeof(**graph));
353 if (*graph == NULL)
354 return got_error_from_errno("calloc");
356 (*graph)->path = strdup(path);
357 if ((*graph)->path == NULL) {
358 err = got_error_from_errno("strdup");
359 goto done;
362 (*graph)->node_ids = got_object_idset_alloc();
363 if ((*graph)->node_ids == NULL) {
364 err = got_error_from_errno("got_object_idset_alloc");
365 goto done;
368 (*graph)->open_branches = got_object_idset_alloc();
369 if ((*graph)->open_branches == NULL) {
370 err = got_error_from_errno("got_object_idset_alloc");
371 goto done;
374 TAILQ_INIT(&(*graph)->iter_list);
375 if (first_parent_traversal)
376 (*graph)->flags |= GOT_COMMIT_GRAPH_FIRST_PARENT_TRAVERSAL;
377 done:
378 if (err) {
379 got_commit_graph_close(*graph);
380 *graph = NULL;
382 return err;
385 struct add_branch_tip_arg {
386 struct got_commit_graph_branch_tip *tips;
387 int ntips;
388 struct got_repository *repo;
389 struct got_commit_graph *graph;
390 };
392 static const struct got_error *
393 add_branch_tip(struct got_object_id *commit_id, void *data, void *arg)
395 const struct got_error *err;
396 struct got_commit_graph_node *child_node = data;
397 struct add_branch_tip_arg *a = arg;
398 struct got_commit_graph_node *new_node;
399 struct got_commit_object *commit;
400 int changed, branch_done;
402 err = got_object_open_as_commit(&commit, a->repo, commit_id);
403 if (err)
404 return err;
406 err = add_node(&new_node, &changed, &branch_done, a->graph,
407 commit_id, commit, child_node, a->repo);
408 if (err)
409 return err;
411 a->tips[a->ntips].commit_id = new_node ? &new_node->id : NULL;
412 a->tips[a->ntips].commit = commit;
413 a->tips[a->ntips].new_node = new_node;
414 a->tips[a->ntips].changed = changed;
415 a->tips[a->ntips].branch_done = branch_done;
416 a->ntips++;
418 return NULL;
421 static const struct got_error *
422 fetch_commits_from_open_branches(int *nfetched,
423 struct got_object_id **changed_id, struct got_commit_graph *graph,
424 struct got_repository *repo, got_cancel_cb cancel_cb, void *cancel_arg)
426 const struct got_error *err;
427 struct add_branch_tip_arg arg;
428 int i, ntips;
430 *nfetched = 0;
431 if (changed_id)
432 *changed_id = NULL;
434 ntips = got_object_idset_num_elements(graph->open_branches);
435 if (ntips == 0)
436 return NULL;
438 /* (Re-)allocate branch tips array if necessary. */
439 if (graph->ntips < ntips) {
440 struct got_commit_graph_branch_tip *tips;
441 tips = recallocarray(graph->tips, graph->ntips, ntips,
442 sizeof(*tips));
443 if (tips == NULL)
444 return got_error_from_errno("recallocarray");
445 graph->tips = tips;
446 graph->ntips = ntips;
448 arg.tips = graph->tips;
449 arg.ntips = 0; /* add_branch_tip() will increment */
450 arg.repo = repo;
451 arg.graph = graph;
452 err = got_object_idset_for_each(graph->open_branches, add_branch_tip,
453 &arg);
454 if (err)
455 goto done;
457 for (i = 0; i < arg.ntips; i++) {
458 struct got_object_id *commit_id;
459 struct got_commit_object *commit;
460 struct got_commit_graph_node *new_node;
461 int branch_done, changed;
463 if (cancel_cb) {
464 err = (*cancel_cb)(cancel_arg);
465 if (err)
466 break;
469 commit_id = arg.tips[i].commit_id;
470 commit = arg.tips[i].commit;
471 new_node = arg.tips[i].new_node;
472 branch_done = arg.tips[i].branch_done;
473 changed = arg.tips[i].changed;
475 if (branch_done)
476 err = close_branch(graph, commit_id);
477 else
478 err = advance_branch(graph, new_node, commit_id,
479 commit, repo);
480 if (err)
481 break;
482 if (changed && changed_id && *changed_id == NULL)
483 *changed_id = commit_id;
485 done:
486 for (i = 0; i < arg.ntips; i++)
487 got_object_commit_close(arg.tips[i].commit);
488 (*nfetched) = arg.ntips;
489 return err;
492 static const struct got_error *
493 free_node_iter(struct got_object_id *id, void *data, void *arg)
495 struct got_commit_graph_node *node = data;
496 free(node);
497 return NULL;
500 void
501 got_commit_graph_close(struct got_commit_graph *graph)
503 if (graph->open_branches)
504 got_object_idset_free(graph->open_branches);
505 got_object_idset_for_each(graph->node_ids, free_node_iter, NULL);
506 if (graph->node_ids)
507 got_object_idset_free(graph->node_ids);
508 free(graph->tips);
509 free(graph->path);
510 free(graph);
513 const struct got_error *
514 got_commit_graph_iter_start(struct got_commit_graph *graph,
515 struct got_object_id *id, struct got_repository *repo,
516 got_cancel_cb cancel_cb, void *cancel_arg)
518 const struct got_error *err = NULL;
519 struct got_commit_graph_node *start_node;
520 struct got_commit_object *commit;
521 int changed, branch_done;
523 if (!TAILQ_EMPTY(&graph->iter_list))
524 return got_error(GOT_ERR_ITER_BUSY);
526 err = got_object_open_as_commit(&commit, repo, id);
527 if (err)
528 return err;
530 err = add_node(&start_node, &changed, &branch_done, graph, id,
531 commit, NULL, repo);
532 if (err)
533 goto done;
535 err = advance_branch(graph, start_node, id, commit, repo);
536 if (err)
537 goto done;
539 if (!changed) {
540 /* Locate first commit which changed graph->path. */
541 struct got_object_id *changed_id = NULL;
542 while (changed_id == NULL) {
543 int ncommits;
544 err = fetch_commits_from_open_branches(&ncommits,
545 &changed_id, graph, repo, cancel_cb, cancel_arg);
546 if (err) {
547 got_object_commit_close(commit);
548 return err;
551 start_node = got_object_idset_get(graph->node_ids, changed_id);
553 done:
554 got_object_commit_close(commit);
555 if (err == NULL)
556 graph->iter_node = start_node;
557 return err;
560 const struct got_error *
561 got_commit_graph_iter_next(struct got_object_id **id,
562 struct got_commit_graph *graph, struct got_repository *repo,
563 got_cancel_cb cancel_cb, void *cancel_arg)
565 const struct got_error *err = NULL;
567 *id = NULL;
569 if (graph->iter_node == NULL) {
570 /* We are done iterating, or iteration was not started. */
571 return got_error(GOT_ERR_ITER_COMPLETED);
574 if (graph->iter_node ==
575 TAILQ_LAST(&graph->iter_list, got_commit_graph_iter_list) &&
576 got_object_idset_num_elements(graph->open_branches) == 0) {
577 /* We are done iterating. */
578 *id = &graph->iter_node->id;
579 graph->iter_node = NULL;
580 return NULL;
583 while (TAILQ_NEXT(graph->iter_node, entry) == NULL &&
584 got_object_idset_num_elements(graph->open_branches) > 0) {
585 int ncommits;
586 struct got_object_id *changed_id;
587 err = fetch_commits_from_open_branches(&ncommits,
588 &changed_id, graph, repo, cancel_cb, cancel_arg);
589 if (err)
590 return err;
593 *id = &graph->iter_node->id;
594 graph->iter_node = TAILQ_NEXT(graph->iter_node, entry);
595 return NULL;
598 const struct got_error *
599 got_commit_graph_find_youngest_common_ancestor(struct got_object_id **yca_id,
600 struct got_object_id *commit_id, struct got_object_id *commit_id2,
601 struct got_repository *repo, got_cancel_cb cancel_cb, void *cancel_arg)
603 const struct got_error *err = NULL;
604 struct got_commit_graph *graph = NULL, *graph2 = NULL;
605 int completed = 0, completed2 = 0;
606 struct got_object_idset *commit_ids;
608 *yca_id = NULL;
610 commit_ids = got_object_idset_alloc();
611 if (commit_ids == NULL)
612 return got_error_from_errno("got_object_idset_alloc");
614 err = got_commit_graph_open(&graph, "/", 1);
615 if (err)
616 goto done;
618 err = got_commit_graph_open(&graph2, "/", 1);
619 if (err)
620 goto done;
622 err = got_commit_graph_iter_start(graph, commit_id, repo,
623 cancel_cb, cancel_arg);
624 if (err)
625 goto done;
627 err = got_commit_graph_iter_start(graph2, commit_id2, repo,
628 cancel_cb, cancel_arg);
629 if (err)
630 goto done;
632 for (;;) {
633 struct got_object_id *id = NULL, *id2 = NULL;
635 if (cancel_cb) {
636 err = (*cancel_cb)(cancel_arg);
637 if (err)
638 break;
641 if (!completed) {
642 err = got_commit_graph_iter_next(&id, graph, repo,
643 cancel_cb, cancel_arg);
644 if (err) {
645 if (err->code != GOT_ERR_ITER_COMPLETED)
646 break;
647 err = NULL;
648 completed = 1;
652 if (!completed2) {
653 err = got_commit_graph_iter_next(&id2, graph2, repo,
654 cancel_cb, cancel_arg);
655 if (err) {
656 if (err->code != GOT_ERR_ITER_COMPLETED)
657 break;
658 err = NULL;
659 completed2 = 1;
663 if (id) {
664 if (got_object_idset_get(commit_ids, id)) {
665 *yca_id = got_object_id_dup(id);
666 if (*yca_id)
667 break;
668 err = got_error_from_errno("got_object_id_dup");
669 break;
672 err = got_object_idset_add(commit_ids, id, id);
673 if (err)
674 break;
676 if (id2) {
677 if (got_object_idset_get(commit_ids, id2)) {
678 *yca_id = got_object_id_dup(id2);
679 if (*yca_id)
680 break;
681 err = got_error_from_errno("got_object_id_dup");
682 break;
685 err = got_object_idset_add(commit_ids, id2, id2);
686 if (err)
687 break;
690 if (completed && completed2) {
691 err = got_error(GOT_ERR_ANCESTRY);
692 break;
696 done:
697 got_object_idset_free(commit_ids);
698 if (graph)
699 got_commit_graph_close(graph);
700 if (graph2)
701 got_commit_graph_close(graph2);
702 return err;