Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
24 #include <stdint.h>
25 #include <imsg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sha1.h>
30 #include <limits.h>
31 #include <zlib.h>
33 #include "got_error.h"
34 #include "got_cancel.h"
35 #include "got_object.h"
36 #include "got_path.h"
37 #include "got_reference.h"
38 #include "got_repository_admin.h"
39 #include "got_opentemp.h"
41 #include "got_lib_deltify.h"
42 #include "got_lib_delta.h"
43 #include "got_lib_object.h"
44 #include "got_lib_object_idset.h"
45 #include "got_lib_object_cache.h"
46 #include "got_lib_deflate.h"
47 #include "got_lib_pack.h"
48 #include "got_lib_privsep.h"
49 #include "got_lib_repository.h"
51 #ifndef MAX
52 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
53 #endif
55 struct got_pack_meta {
56 struct got_object_id id;
57 char *path;
58 int obj_type;
59 off_t size;
60 time_t mtime;
62 /* The best delta we picked */
63 struct got_pack_meta *head;
64 struct got_pack_meta *prev;
65 struct got_delta_instruction *deltas;
66 int ndeltas;
67 int nchain;
69 /* Only used for delta window */
70 struct got_delta_table *dtab;
72 /* Only used for writing offset deltas */
73 off_t off;
74 };
76 struct got_pack_metavec {
77 struct got_pack_meta **meta;
78 int nmeta;
79 int metasz;
80 };
82 static const struct got_error *
83 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
84 const char *path, int obj_type, time_t mtime)
85 {
86 const struct got_error *err = NULL;
87 struct got_pack_meta *m;
89 *new = NULL;
91 m = calloc(1, sizeof(*m));
92 if (m == NULL)
93 return got_error_from_errno("calloc");
95 memcpy(&m->id, id, sizeof(m->id));
97 m->path = strdup(path);
98 if (m->path == NULL) {
99 err = got_error_from_errno("strdup");
100 free(m);
101 return err;
104 m->obj_type = obj_type;
105 m->mtime = mtime;
106 *new = m;
107 return NULL;
110 static void
111 clear_meta(struct got_pack_meta *meta)
113 if (meta == NULL)
114 return;
115 free(meta->deltas);
116 meta->deltas = NULL;
117 free(meta->path);
118 meta->path = NULL;
121 static void
122 free_nmeta(struct got_pack_meta **meta, int nmeta)
124 int i;
126 for (i = 0; i < nmeta; i++)
127 clear_meta(meta[i]);
128 free(meta);
131 static int
132 delta_order_cmp(const void *pa, const void *pb)
134 struct got_pack_meta *a, *b;
135 int cmp;
137 a = *(struct got_pack_meta **)pa;
138 b = *(struct got_pack_meta **)pb;
140 if (a->obj_type != b->obj_type)
141 return a->obj_type - b->obj_type;
142 cmp = strcmp(a->path, b->path);
143 if (cmp != 0)
144 return cmp;
145 if (a->mtime != b->mtime)
146 return a->mtime - b->mtime;
147 return got_object_id_cmp(&a->id, &b->id);
150 static int
151 delta_size(struct got_delta_instruction *deltas, int ndeltas)
153 int i, size = 32;
154 for (i = 0; i < ndeltas; i++) {
155 if (deltas[i].copy)
156 size += GOT_DELTA_SIZE_SHIFT;
157 else
158 size += deltas[i].len + 1;
160 return size;
164 static const struct got_error *
165 pick_deltas(struct got_pack_meta **meta, int nmeta, int nours,
166 struct got_repository *repo,
167 got_pack_progress_cb progress_cb, void *progress_arg,
168 got_cancel_cb cancel_cb, void *cancel_arg)
170 const struct got_error *err = NULL;
171 struct got_pack_meta *m = NULL, *base = NULL;
172 struct got_raw_object *raw = NULL, *base_raw = NULL;
173 struct got_delta_instruction *deltas;
174 int i, j, size, ndeltas, best;
175 const int max_base_candidates = 10;
177 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
178 for (i = 0; i < nmeta; i++) {
179 if (cancel_cb) {
180 err = (*cancel_cb)(cancel_arg);
181 if (err)
182 break;
184 if (progress_cb) {
185 err = progress_cb(progress_arg, 0L, nours, nmeta, i, 0);
186 if (err)
187 goto done;
189 m = meta[i];
190 m->deltas = NULL;
191 m->ndeltas = 0;
193 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
194 m->obj_type == GOT_OBJ_TYPE_TAG)
195 continue;
197 err = got_object_raw_open(&raw, repo, &m->id, 8192);
198 if (err)
199 goto done;
200 m->size = raw->size;
202 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
203 raw->size + raw->hdrlen);
204 if (err)
205 goto done;
207 if (i > max_base_candidates) {
208 struct got_pack_meta *n = NULL;
209 n = meta[i - (max_base_candidates + 1)];
210 got_deltify_free(n->dtab);
211 n->dtab = NULL;
214 best = raw->size;
215 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
216 if (cancel_cb) {
217 err = (*cancel_cb)(cancel_arg);
218 if (err)
219 goto done;
221 base = meta[j];
222 /* long chains make unpacking slow, avoid such bases */
223 if (base->nchain >= 128 ||
224 base->obj_type != m->obj_type)
225 continue;
227 err = got_object_raw_open(&base_raw, repo, &base->id,
228 8192);
229 if (err)
230 goto done;
231 err = got_deltify(&deltas, &ndeltas,
232 raw->f, raw->hdrlen, raw->size + raw->hdrlen,
233 base->dtab, base_raw->f, base_raw->hdrlen,
234 base_raw->size + base_raw->hdrlen);
235 got_object_raw_close(base_raw);
236 base_raw = NULL;
237 if (err)
238 goto done;
240 size = delta_size(deltas, ndeltas);
241 if (size + 32 < best){
242 /*
243 * if we already picked a best delta,
244 * replace it.
245 */
246 free(m->deltas);
247 best = size;
248 m->deltas = deltas;
249 m->ndeltas = ndeltas;
250 m->nchain = base->nchain + 1;
251 m->prev = base;
252 m->head = base->head;
253 if (m->head == NULL)
254 m->head = base;
255 } else {
256 free(deltas);
257 deltas = NULL;
258 ndeltas = 0;
262 got_object_raw_close(raw);
263 raw = NULL;
265 done:
266 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
267 got_deltify_free(meta[i]->dtab);
268 meta[i]->dtab = NULL;
270 if (raw)
271 got_object_raw_close(raw);
272 if (base_raw)
273 got_object_raw_close(base_raw);
274 return err;
277 static const struct got_error *
278 search_packidx(int *found, struct got_object_id *id,
279 struct got_repository *repo)
281 const struct got_error *err = NULL;
282 struct got_packidx *packidx = NULL;
283 int idx;
285 *found = 0;
287 err = got_repo_search_packidx(&packidx, &idx, repo, id);
288 if (err == NULL)
289 *found = 1; /* object is already packed */
290 else if (err->code == GOT_ERR_NO_OBJ)
291 err = NULL;
292 return err;
295 static const int obj_types[] = {
296 GOT_OBJ_TYPE_ANY,
297 GOT_OBJ_TYPE_COMMIT,
298 GOT_OBJ_TYPE_TREE,
299 GOT_OBJ_TYPE_BLOB,
300 GOT_OBJ_TYPE_TAG,
301 GOT_OBJ_TYPE_OFFSET_DELTA,
302 GOT_OBJ_TYPE_REF_DELTA
303 };
305 static const struct got_error *
306 add_meta(struct got_pack_metavec *v, struct got_object_idset *idset,
307 struct got_object_id *id, const char *path, int obj_type,
308 time_t mtime, int loose_obj_only, struct got_repository *repo)
310 const struct got_error *err;
311 struct got_pack_meta *m;
313 if (loose_obj_only) {
314 int is_packed;
315 err = search_packidx(&is_packed, id, repo);
316 if (err)
317 return err;
318 if (is_packed)
319 return NULL;
322 err = got_object_idset_add(idset, id, (void *)&obj_types[obj_type]);
323 if (err)
324 return err;
326 if (v == NULL)
327 return NULL;
329 err = alloc_meta(&m, id, path, obj_type, mtime);
330 if (err)
331 goto done;
333 if (v->nmeta == v->metasz){
334 size_t newsize = 2 * v->metasz;
335 struct got_pack_meta **new;
336 new = reallocarray(v->meta, newsize, sizeof(*new));
337 if (new == NULL) {
338 err = got_error_from_errno("reallocarray");
339 goto done;
341 v->meta = new;
342 v->metasz = newsize;
344 done:
345 if (err) {
346 clear_meta(m);
347 free(m);
348 } else
349 v->meta[v->nmeta++] = m;
351 return err;
354 static const struct got_error *
355 load_tree_entries(struct got_object_id_queue *ids, struct got_pack_metavec *v,
356 struct got_object_idset *idset, struct got_object_id *tree_id,
357 const char *dpath, time_t mtime, struct got_repository *repo,
358 int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg)
360 const struct got_error *err;
361 struct got_tree_object *tree;
362 char *p = NULL;
363 int i;
365 err = got_object_open_as_tree(&tree, repo, tree_id);
366 if (err)
367 return err;
369 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
370 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
371 struct got_object_id *id = got_tree_entry_get_id(e);
372 mode_t mode = got_tree_entry_get_mode(e);
374 if (cancel_cb) {
375 err = (*cancel_cb)(cancel_arg);
376 if (err)
377 break;
380 if (got_object_tree_entry_is_submodule(e) ||
381 got_object_idset_contains(idset, id))
382 continue;
384 if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
385 got_tree_entry_get_name(e)) == -1) {
386 err = got_error_from_errno("asprintf");
387 break;
390 if (S_ISDIR(mode)) {
391 struct got_object_qid *qid;
392 err = got_object_qid_alloc(&qid, id);
393 if (err)
394 break;
395 STAILQ_INSERT_TAIL(ids, qid, entry);
396 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
397 err = add_meta(v, idset, id, p, GOT_OBJ_TYPE_BLOB,
398 mtime, loose_obj_only, repo);
399 if (err)
400 break;
402 free(p);
403 p = NULL;
406 got_object_tree_close(tree);
407 free(p);
408 return err;
411 static const struct got_error *
412 load_tree(struct got_pack_metavec *v, struct got_object_idset *idset,
413 struct got_object_id *tree_id, const char *dpath, time_t mtime,
414 int loose_obj_only, struct got_repository *repo,
415 got_cancel_cb cancel_cb, void *cancel_arg)
417 const struct got_error *err = NULL;
418 struct got_object_id_queue tree_ids;
419 struct got_object_qid *qid;
421 if (got_object_idset_contains(idset, tree_id))
422 return NULL;
424 err = got_object_qid_alloc(&qid, tree_id);
425 if (err)
426 return err;
428 STAILQ_INIT(&tree_ids);
429 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
431 while (!STAILQ_EMPTY(&tree_ids)) {
432 if (cancel_cb) {
433 err = (*cancel_cb)(cancel_arg);
434 if (err)
435 break;
438 qid = STAILQ_FIRST(&tree_ids);
439 STAILQ_REMOVE_HEAD(&tree_ids, entry);
441 if (got_object_idset_contains(idset, qid->id)) {
442 got_object_qid_free(qid);
443 continue;
446 err = add_meta(v, idset, qid->id, dpath, GOT_OBJ_TYPE_TREE,
447 mtime, loose_obj_only, repo);
448 if (err) {
449 got_object_qid_free(qid);
450 break;
453 err = load_tree_entries(&tree_ids, v, idset, qid->id, dpath,
454 mtime, repo, loose_obj_only, cancel_cb, cancel_arg);
455 got_object_qid_free(qid);
456 if (err)
457 break;
460 got_object_id_queue_free(&tree_ids);
461 return err;
464 static const struct got_error *
465 load_commit(struct got_pack_metavec *v, struct got_object_idset *idset,
466 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
467 got_cancel_cb cancel_cb, void *cancel_arg)
469 const struct got_error *err;
470 struct got_commit_object *commit;
472 if (got_object_idset_contains(idset, id))
473 return NULL;
475 if (loose_obj_only) {
476 int is_packed;
477 err = search_packidx(&is_packed, id, repo);
478 if (err)
479 return err;
480 if (is_packed)
481 return NULL;
484 err = got_object_open_as_commit(&commit, repo, id);
485 if (err)
486 return err;
488 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_COMMIT,
489 got_object_commit_get_committer_time(commit),
490 loose_obj_only, repo);
491 if (err)
492 goto done;
494 err = load_tree(v, idset, got_object_commit_get_tree_id(commit),
495 "", got_object_commit_get_committer_time(commit),
496 loose_obj_only, repo, cancel_cb, cancel_arg);
497 done:
498 got_object_commit_close(commit);
499 return err;
502 static const struct got_error *
503 load_tag(struct got_pack_metavec *v, struct got_object_idset *idset,
504 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
505 got_cancel_cb cancel_cb, void *cancel_arg)
507 const struct got_error *err;
508 struct got_tag_object *tag = NULL;
510 if (got_object_idset_contains(idset, id))
511 return NULL;
513 if (loose_obj_only) {
514 int is_packed;
515 err = search_packidx(&is_packed, id, repo);
516 if (err)
517 return err;
518 if (is_packed)
519 return NULL;
522 err = got_object_open_as_tag(&tag, repo, id);
523 if (err)
524 return err;
526 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_TAG,
527 got_object_tag_get_tagger_time(tag),
528 loose_obj_only, repo);
529 if (err)
530 goto done;
532 switch (got_object_tag_get_object_type(tag)) {
533 case GOT_OBJ_TYPE_COMMIT:
534 err = load_commit(v, idset,
535 got_object_tag_get_object_id(tag), repo,
536 loose_obj_only, cancel_cb, cancel_arg);
537 break;
538 case GOT_OBJ_TYPE_TREE:
539 err = load_tree(v, idset, got_object_tag_get_object_id(tag),
540 "", got_object_tag_get_tagger_time(tag),
541 loose_obj_only, repo, cancel_cb, cancel_arg);
542 break;
543 default:
544 break;
547 done:
548 got_object_tag_close(tag);
549 return err;
552 enum findtwixt_color {
553 COLOR_KEEP = 0,
554 COLOR_DROP,
555 COLOR_BLANK,
556 };
557 static const int findtwixt_colors[] = {
558 COLOR_KEEP,
559 COLOR_DROP,
560 COLOR_BLANK
561 };
563 static const struct got_error *
564 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
565 int color, struct got_repository *repo)
567 const struct got_error *err;
568 struct got_object_qid *qid;
570 err = got_object_qid_alloc(&qid, id);
571 if (err)
572 return err;
574 STAILQ_INSERT_TAIL(ids, qid, entry);
575 qid->data = (void *)&findtwixt_colors[color];
576 return NULL;
579 static const struct got_error *
580 drop_commit(struct got_object_idset *keep, struct got_object_idset *drop,
581 struct got_object_id *id, struct got_repository *repo,
582 got_cancel_cb cancel_cb, void *cancel_arg)
584 const struct got_error *err = NULL;
585 struct got_commit_object *commit;
586 const struct got_object_id_queue *parents;
587 struct got_object_id_queue ids;
588 struct got_object_qid *qid;
590 STAILQ_INIT(&ids);
592 err = got_object_qid_alloc(&qid, id);
593 if (err)
594 return err;
595 STAILQ_INSERT_HEAD(&ids, qid, entry);
597 while (!STAILQ_EMPTY(&ids)) {
598 if (cancel_cb) {
599 err = (*cancel_cb)(cancel_arg);
600 if (err)
601 break;
604 qid = STAILQ_FIRST(&ids);
605 STAILQ_REMOVE_HEAD(&ids, entry);
607 if (got_object_idset_contains(drop, qid->id)) {
608 got_object_qid_free(qid);
609 continue;
612 err = got_object_idset_add(drop, qid->id,
613 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
614 if (err) {
615 got_object_qid_free(qid);
616 break;
619 if (!got_object_idset_contains(keep, qid->id)) {
620 got_object_qid_free(qid);
621 continue;
624 err = got_object_open_as_commit(&commit, repo, qid->id);
625 got_object_qid_free(qid);
626 if (err)
627 break;
629 parents = got_object_commit_get_parent_ids(commit);
630 if (parents) {
631 err = got_object_id_queue_copy(parents, &ids);
632 if (err) {
633 got_object_commit_close(commit);
634 break;
637 got_object_commit_close(commit);
640 got_object_id_queue_free(&ids);
641 return err;
644 struct append_id_arg {
645 struct got_object_id **array;
646 int idx;
647 };
649 static const struct got_error *
650 append_id(struct got_object_id *id, void *data, void *arg)
652 struct append_id_arg *a = arg;
654 a->array[a->idx] = got_object_id_dup(id);
655 if (a->array[a->idx] == NULL)
656 return got_error_from_errno("got_object_id_dup");
658 a->idx++;
659 return NULL;
662 static const struct got_error *
663 findtwixt(struct got_object_id ***res, int *nres,
664 struct got_object_id **head, int nhead,
665 struct got_object_id **tail, int ntail,
666 struct got_repository *repo,
667 got_cancel_cb cancel_cb, void *cancel_arg)
669 const struct got_error *err = NULL;
670 struct got_object_id_queue ids;
671 struct got_object_idset *keep, *drop;
672 struct got_object_qid *qid;
673 int i, ncolor, nkeep, obj_type;
675 STAILQ_INIT(&ids);
676 *res = NULL;
677 *nres = 0;
679 keep = got_object_idset_alloc();
680 if (keep == NULL)
681 return got_error_from_errno("got_object_idset_alloc");
683 drop = got_object_idset_alloc();
684 if (drop == NULL) {
685 err = got_error_from_errno("got_object_idset_alloc");
686 goto done;
689 for (i = 0; i < nhead; i++) {
690 struct got_object_id *id = head[i];
691 if (id == NULL)
692 continue;
693 err = got_object_get_type(&obj_type, repo, id);
694 if (err)
695 return err;
696 if (obj_type != GOT_OBJ_TYPE_COMMIT)
697 continue;
698 err = queue_commit_id(&ids, id, COLOR_KEEP, repo);
699 if (err)
700 goto done;
702 for (i = 0; i < ntail; i++) {
703 struct got_object_id *id = tail[i];
704 if (id == NULL)
705 continue;
706 err = got_object_get_type(&obj_type, repo, id);
707 if (err)
708 return err;
709 if (obj_type != GOT_OBJ_TYPE_COMMIT)
710 continue;
711 err = queue_commit_id(&ids, id, COLOR_DROP, repo);
712 if (err)
713 goto done;
716 while (!STAILQ_EMPTY(&ids)) {
717 int qcolor;
718 qid = STAILQ_FIRST(&ids);
719 qcolor = *((int *)qid->data);
721 if (got_object_idset_contains(drop, qid->id))
722 ncolor = COLOR_DROP;
723 else if (got_object_idset_contains(keep, qid->id))
724 ncolor = COLOR_KEEP;
725 else
726 ncolor = COLOR_BLANK;
728 if (ncolor == COLOR_DROP || (ncolor == COLOR_KEEP &&
729 qcolor == COLOR_KEEP)) {
730 STAILQ_REMOVE_HEAD(&ids, entry);
731 got_object_qid_free(qid);
732 continue;
735 if (ncolor == COLOR_KEEP && qcolor == COLOR_DROP) {
736 err = drop_commit(keep, drop, qid->id, repo,
737 cancel_cb, cancel_arg);
738 if (err)
739 goto done;
740 } else if (ncolor == COLOR_BLANK) {
741 struct got_commit_object *commit;
742 struct got_object_id *id;
743 const struct got_object_id_queue *parents;
744 struct got_object_qid *pid;
746 id = got_object_id_dup(qid->id);
747 if (id == NULL) {
748 err = got_error_from_errno("got_object_id_dup");
749 goto done;
751 if (qcolor == COLOR_KEEP)
752 err = got_object_idset_add(keep, id,
753 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
754 else
755 err = got_object_idset_add(drop, id,
756 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
757 if (err) {
758 free(id);
759 goto done;
762 err = got_object_open_as_commit(&commit, repo, id);
763 if (err) {
764 free(id);
765 goto done;
767 parents = got_object_commit_get_parent_ids(commit);
768 if (parents) {
769 STAILQ_FOREACH(pid, parents, entry) {
770 err = queue_commit_id(&ids, pid->id,
771 qcolor, repo);
772 if (err) {
773 free(id);
774 goto done;
778 got_object_commit_close(commit);
779 commit = NULL;
780 } else {
781 /* should not happen */
782 err = got_error_fmt(GOT_ERR_NOT_IMPL,
783 "%s ncolor=%d qcolor=%d", __func__, ncolor, qcolor);
784 goto done;
787 STAILQ_REMOVE_HEAD(&ids, entry);
788 got_object_qid_free(qid);
791 nkeep = got_object_idset_num_elements(keep);
792 if (nkeep > 0) {
793 struct append_id_arg arg;
794 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
795 if (arg.array == NULL) {
796 err = got_error_from_errno("calloc");
797 goto done;
799 arg.idx = 0;
800 err = got_object_idset_for_each(keep, append_id, &arg);
801 if (err) {
802 free(arg.array);
803 goto done;
805 *res = arg.array;
806 *nres = nkeep;
808 done:
809 got_object_idset_free(keep);
810 got_object_idset_free(drop);
811 got_object_id_queue_free(&ids);
812 return err;
815 static const struct got_error *
816 read_meta(struct got_pack_meta ***meta, int *nmeta,
817 struct got_object_id **theirs, int ntheirs,
818 struct got_object_id **ours, int nours, struct got_repository *repo,
819 int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
820 got_cancel_cb cancel_cb, void *cancel_arg)
822 const struct got_error *err = NULL;
823 struct got_object_id **ids = NULL;
824 struct got_object_idset *idset;
825 int i, nobj = 0, obj_type;
826 struct got_pack_metavec v;
828 *meta = NULL;
829 *nmeta = 0;
831 idset = got_object_idset_alloc();
832 if (idset == NULL)
833 return got_error_from_errno("got_object_idset_alloc");
835 v.nmeta = 0;
836 v.metasz = 64;
837 v.meta = calloc(v.metasz, sizeof(struct got_pack_meta *));
838 if (v.meta == NULL) {
839 err = got_error_from_errno("calloc");
840 goto done;
843 err = findtwixt(&ids, &nobj, ours, nours, theirs, ntheirs, repo,
844 cancel_cb, cancel_arg);
845 if (err || nobj == 0)
846 goto done;
848 for (i = 0; i < ntheirs; i++) {
849 struct got_object_id *id = theirs[i];
850 if (id == NULL)
851 continue;
852 err = got_object_get_type(&obj_type, repo, id);
853 if (err)
854 return err;
855 if (obj_type != GOT_OBJ_TYPE_COMMIT)
856 continue;
857 err = load_commit(NULL, idset, id, repo,
858 loose_obj_only, cancel_cb, cancel_arg);
859 if (err)
860 goto done;
861 if (progress_cb) {
862 err = progress_cb(progress_arg, 0L, nours,
863 v.nmeta, 0, 0);
864 if (err)
865 goto done;
869 for (i = 0; i < ntheirs; i++) {
870 struct got_object_id *id = theirs[i];
871 int *cached_type;
872 if (id == NULL)
873 continue;
874 cached_type = got_object_idset_get(idset, id);
875 if (cached_type == NULL) {
876 err = got_object_get_type(&obj_type, repo, id);
877 if (err)
878 goto done;
879 } else
880 obj_type = *cached_type;
881 if (obj_type != GOT_OBJ_TYPE_TAG)
882 continue;
883 err = load_tag(NULL, idset, id, repo,
884 loose_obj_only, cancel_cb, cancel_arg);
885 if (err)
886 goto done;
887 if (progress_cb) {
888 err = progress_cb(progress_arg, 0L, nours,
889 v.nmeta, 0, 0);
890 if (err)
891 goto done;
895 for (i = 0; i < nobj; i++) {
896 err = load_commit(&v, idset, ids[i], repo,
897 loose_obj_only, cancel_cb, cancel_arg);
898 if (err)
899 goto done;
900 if (progress_cb) {
901 err = progress_cb(progress_arg, 0L, nours,
902 v.nmeta, 0, 0);
903 if (err)
904 goto done;
908 for (i = 0; i < nours; i++) {
909 struct got_object_id *id = ours[i];
910 int *cached_type;
911 if (id == NULL)
912 continue;
913 cached_type = got_object_idset_get(idset, id);
914 if (cached_type == NULL) {
915 err = got_object_get_type(&obj_type, repo, id);
916 if (err)
917 goto done;
918 } else
919 obj_type = *cached_type;
920 if (obj_type != GOT_OBJ_TYPE_TAG)
921 continue;
922 err = load_tag(&v, idset, id, repo,
923 loose_obj_only, cancel_cb, cancel_arg);
924 if (err)
925 goto done;
926 if (progress_cb) {
927 err = progress_cb(progress_arg, 0L, nours,
928 v.nmeta, 0, 0);
929 if (err)
930 goto done;
934 done:
935 for (i = 0; i < nobj; i++) {
936 free(ids[i]);
938 free(ids);
939 got_object_idset_free(idset);
940 if (err == NULL) {
941 *meta = v.meta;
942 *nmeta = v.nmeta;
943 } else
944 free(v.meta);
946 return err;
949 const struct got_error *
950 hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx)
952 size_t n;
954 SHA1Update(ctx, buf, len);
955 n = fwrite(buf, 1, len, f);
956 if (n != len)
957 return got_ferror(f, GOT_ERR_IO);
958 return NULL;
961 static void
962 putbe32(char *b, uint32_t n)
964 b[0] = n >> 24;
965 b[1] = n >> 16;
966 b[2] = n >> 8;
967 b[3] = n >> 0;
970 static int
971 write_order_cmp(const void *pa, const void *pb)
973 struct got_pack_meta *a, *b, *ahd, *bhd;
975 a = *(struct got_pack_meta **)pa;
976 b = *(struct got_pack_meta **)pb;
977 ahd = (a->head == NULL) ? a : a->head;
978 bhd = (b->head == NULL) ? b : b->head;
979 if (ahd->mtime != bhd->mtime)
980 return bhd->mtime - ahd->mtime;
981 if (ahd != bhd)
982 return (uintptr_t)bhd - (uintptr_t)ahd;
983 if (a->nchain != b->nchain)
984 return a->nchain - b->nchain;
985 return a->mtime - b->mtime;
988 static const struct got_error *
989 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
991 size_t i;
993 *hdrlen = 0;
995 hdr[0] = obj_type << 4;
996 hdr[0] |= len & 0xf;
997 len >>= 4;
998 for (i = 1; len != 0; i++){
999 if (i >= bufsize)
1000 return got_error(GOT_ERR_NO_SPACE);
1001 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1002 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1003 len >>= GOT_DELTA_SIZE_SHIFT;
1006 *hdrlen = i;
1007 return NULL;
1010 static const struct got_error *
1011 encodedelta(struct got_pack_meta *m, struct got_raw_object *o,
1012 off_t base_size, FILE *f)
1014 unsigned char buf[16], *bp;
1015 int i, j;
1016 off_t n;
1017 size_t w;
1018 struct got_delta_instruction *d;
1020 /* base object size */
1021 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
1022 n = base_size >> GOT_DELTA_SIZE_SHIFT;
1023 for (i = 1; n > 0; i++) {
1024 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
1025 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
1026 n >>= GOT_DELTA_SIZE_SHIFT;
1028 w = fwrite(buf, 1, i, f);
1029 if (w != i)
1030 return got_ferror(f, GOT_ERR_IO);
1032 /* target object size */
1033 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
1034 n = o->size >> GOT_DELTA_SIZE_SHIFT;
1035 for (i = 1; n > 0; i++) {
1036 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
1037 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
1038 n >>= GOT_DELTA_SIZE_SHIFT;
1040 w = fwrite(buf, 1, i, f);
1041 if (w != i)
1042 return got_ferror(f, GOT_ERR_IO);
1044 for (j = 0; j < m->ndeltas; j++) {
1045 d = &m->deltas[j];
1046 if (d->copy) {
1047 n = d->offset;
1048 bp = &buf[1];
1049 buf[0] = GOT_DELTA_BASE_COPY;
1050 for (i = 0; i < 4; i++) {
1051 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
1052 buf[0] |= 1 << i;
1053 *bp++ = n & 0xff;
1054 n >>= 8;
1055 if (n == 0)
1056 break;
1059 n = d->len;
1060 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
1061 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
1062 for (i = 0; i < 3 && n > 0; i++) {
1063 buf[0] |= 1 << (i + 4);
1064 *bp++ = n & 0xff;
1065 n >>= 8;
1068 w = fwrite(buf, 1, bp - buf, f);
1069 if (w != bp - buf)
1070 return got_ferror(f, GOT_ERR_IO);
1071 } else {
1072 char content[128];
1073 size_t r;
1074 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1)
1075 return got_error_from_errno("fseeko");
1076 n = 0;
1077 while (n != d->len) {
1078 buf[0] = (d->len - n < 127) ? d->len - n : 127;
1079 w = fwrite(buf, 1, 1, f);
1080 if (w != 1)
1081 return got_ferror(f, GOT_ERR_IO);
1082 r = fread(content, 1, buf[0], o->f);
1083 if (r != buf[0])
1084 return got_ferror(o->f, GOT_ERR_IO);
1085 w = fwrite(content, 1, buf[0], f);
1086 if (w != buf[0])
1087 return got_ferror(f, GOT_ERR_IO);
1088 n += buf[0];
1093 return NULL;
1096 static int
1097 packoff(char *hdr, off_t off)
1099 int i, j;
1100 char rbuf[8];
1102 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1103 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1104 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1105 GOT_DELTA_SIZE_MORE;
1108 j = 0;
1109 while (i > 0)
1110 hdr[j++] = rbuf[--i];
1111 return j;
1114 static const struct got_error *
1115 genpack(uint8_t *pack_sha1, FILE *packfile,
1116 struct got_pack_meta **meta, int nmeta, int nours,
1117 int use_offset_deltas, struct got_repository *repo,
1118 got_pack_progress_cb progress_cb, void *progress_arg,
1119 got_cancel_cb cancel_cb, void *cancel_arg)
1121 const struct got_error *err = NULL;
1122 int i, nh;
1123 off_t nd;
1124 SHA1_CTX ctx;
1125 struct got_pack_meta *m;
1126 struct got_raw_object *raw = NULL;
1127 FILE *delta_file = NULL;
1128 char buf[32];
1129 size_t outlen, n;
1130 struct got_deflate_checksum csum;
1131 off_t packfile_size = 0;
1133 SHA1Init(&ctx);
1134 csum.output_sha1 = &ctx;
1135 csum.output_crc = NULL;
1137 err = hwrite(packfile, "PACK", 4, &ctx);
1138 if (err)
1139 return err;
1140 putbe32(buf, GOT_PACKFILE_VERSION);
1141 err = hwrite(packfile, buf, 4, &ctx);
1142 if (err)
1143 goto done;
1144 putbe32(buf, nmeta);
1145 err = hwrite(packfile, buf, 4, &ctx);
1146 if (err)
1147 goto done;
1148 qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp);
1149 for (i = 0; i < nmeta; i++) {
1150 if (progress_cb) {
1151 err = progress_cb(progress_arg, packfile_size, nours,
1152 nmeta, nmeta, i);
1153 if (err)
1154 goto done;
1156 m = meta[i];
1157 m->off = ftello(packfile);
1158 err = got_object_raw_open(&raw, repo, &m->id, 8192);
1159 if (err)
1160 goto done;
1161 if (m->deltas == NULL) {
1162 err = packhdr(&nh, buf, sizeof(buf),
1163 m->obj_type, raw->size);
1164 if (err)
1165 goto done;
1166 err = hwrite(packfile, buf, nh, &ctx);
1167 if (err)
1168 goto done;
1169 packfile_size += nh;
1170 if (fseeko(raw->f, raw->hdrlen, SEEK_SET) == -1) {
1171 err = got_error_from_errno("fseeko");
1172 goto done;
1174 err = got_deflate_to_file(&outlen, raw->f, packfile,
1175 &csum);
1176 if (err)
1177 goto done;
1178 packfile_size += outlen;
1179 } else {
1180 if (delta_file == NULL) {
1181 delta_file = got_opentemp();
1182 if (delta_file == NULL) {
1183 err = got_error_from_errno(
1184 "got_opentemp");
1185 goto done;
1188 if (ftruncate(fileno(delta_file), 0L) == -1) {
1189 err = got_error_from_errno("ftruncate");
1190 goto done;
1192 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1193 err = got_error_from_errno("fseeko");
1194 goto done;
1196 err = encodedelta(m, raw, m->prev->size, delta_file);
1197 if (err)
1198 goto done;
1199 nd = ftello(delta_file);
1200 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1201 err = got_error_from_errno("fseeko");
1202 goto done;
1204 if (use_offset_deltas && m->prev->off != 0) {
1205 err = packhdr(&nh, buf, sizeof(buf),
1206 GOT_OBJ_TYPE_OFFSET_DELTA, nd);
1207 if (err)
1208 goto done;
1209 nh += packoff(buf + nh,
1210 m->off - m->prev->off);
1211 err = hwrite(packfile, buf, nh, &ctx);
1212 if (err)
1213 goto done;
1214 packfile_size += nh;
1215 } else {
1216 err = packhdr(&nh, buf, sizeof(buf),
1217 GOT_OBJ_TYPE_REF_DELTA, nd);
1218 err = hwrite(packfile, buf, nh, &ctx);
1219 if (err)
1220 goto done;
1221 packfile_size += nh;
1222 err = hwrite(packfile, m->prev->id.sha1,
1223 sizeof(m->prev->id.sha1), &ctx);
1224 packfile_size += sizeof(m->prev->id.sha1);
1225 if (err)
1226 goto done;
1228 err = got_deflate_to_file(&outlen, delta_file,
1229 packfile, &csum);
1230 if (err)
1231 goto done;
1232 packfile_size += outlen;
1234 got_object_raw_close(raw);
1235 raw = NULL;
1237 SHA1Final(pack_sha1, &ctx);
1238 n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
1239 if (n != SHA1_DIGEST_LENGTH)
1240 err = got_ferror(packfile, GOT_ERR_IO);
1241 packfile_size += SHA1_DIGEST_LENGTH;
1242 packfile_size += sizeof(struct got_packfile_hdr);
1243 err = progress_cb(progress_arg, packfile_size, nours,
1244 nmeta, nmeta, nmeta);
1245 if (err)
1246 goto done;
1247 done:
1248 if (delta_file && fclose(delta_file) == EOF && err == NULL)
1249 err = got_error_from_errno("fclose");
1250 if (raw)
1251 got_object_raw_close(raw);
1252 return err;
1255 const struct got_error *
1256 got_pack_create(uint8_t *packsha1, FILE *packfile,
1257 struct got_object_id **theirs, int ntheirs,
1258 struct got_object_id **ours, int nours,
1259 struct got_repository *repo, int loose_obj_only, int allow_empty,
1260 got_pack_progress_cb progress_cb, void *progress_arg,
1261 got_cancel_cb cancel_cb, void *cancel_arg)
1263 const struct got_error *err;
1264 struct got_pack_meta **meta;
1265 int nmeta;
1267 err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo,
1268 loose_obj_only, progress_cb, progress_arg, cancel_cb, cancel_arg);
1269 if (err)
1270 return err;
1272 if (nmeta == 0 && !allow_empty) {
1273 err = got_error(GOT_ERR_CANNOT_PACK);
1274 goto done;
1276 if (nmeta > 0) {
1277 err = pick_deltas(meta, nmeta, nours, repo,
1278 progress_cb, progress_arg, cancel_cb, cancel_arg);
1279 if (err)
1280 goto done;
1283 err = genpack(packsha1, packfile, meta, nmeta, nours, 1, repo,
1284 progress_cb, progress_arg, cancel_cb, cancel_arg);
1285 if (err)
1286 goto done;
1287 done:
1288 free_nmeta(meta, nmeta);
1289 return err;