Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
24 #include <stdint.h>
25 #include <imsg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sha1.h>
30 #include <limits.h>
31 #include <zlib.h>
33 #include "got_error.h"
34 #include "got_cancel.h"
35 #include "got_object.h"
36 #include "got_path.h"
37 #include "got_reference.h"
38 #include "got_repository_admin.h"
39 #include "got_opentemp.h"
41 #include "got_lib_deltify.h"
42 #include "got_lib_delta.h"
43 #include "got_lib_object.h"
44 #include "got_lib_object_idset.h"
45 #include "got_lib_object_cache.h"
46 #include "got_lib_deflate.h"
47 #include "got_lib_pack.h"
48 #include "got_lib_privsep.h"
49 #include "got_lib_repository.h"
51 #ifndef MIN
52 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
53 #endif
55 #ifndef MAX
56 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
57 #endif
59 struct got_pack_meta {
60 struct got_object_id id;
61 char *path;
62 int obj_type;
63 off_t size;
64 time_t mtime;
66 /* The best delta we picked */
67 struct got_pack_meta *head;
68 struct got_pack_meta *prev;
69 off_t delta_offset; /* offset in delta cache file */
70 off_t delta_len; /* length in delta cache file */
71 int nchain;
73 /* Only used for delta window */
74 struct got_delta_table *dtab;
76 /* Only used for writing offset deltas */
77 off_t off;
78 };
80 struct got_pack_metavec {
81 struct got_pack_meta **meta;
82 int nmeta;
83 int metasz;
84 };
86 static const struct got_error *
87 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
88 const char *path, int obj_type, time_t mtime)
89 {
90 const struct got_error *err = NULL;
91 struct got_pack_meta *m;
93 *new = NULL;
95 m = calloc(1, sizeof(*m));
96 if (m == NULL)
97 return got_error_from_errno("calloc");
99 memcpy(&m->id, id, sizeof(m->id));
101 m->path = strdup(path);
102 if (m->path == NULL) {
103 err = got_error_from_errno("strdup");
104 free(m);
105 return err;
108 m->obj_type = obj_type;
109 m->mtime = mtime;
110 *new = m;
111 return NULL;
114 static void
115 clear_meta(struct got_pack_meta *meta)
117 if (meta == NULL)
118 return;
119 free(meta->path);
120 meta->path = NULL;
123 static void
124 free_nmeta(struct got_pack_meta **meta, int nmeta)
126 int i;
128 for (i = 0; i < nmeta; i++)
129 clear_meta(meta[i]);
130 free(meta);
133 static int
134 delta_order_cmp(const void *pa, const void *pb)
136 struct got_pack_meta *a, *b;
137 int cmp;
139 a = *(struct got_pack_meta **)pa;
140 b = *(struct got_pack_meta **)pb;
142 if (a->obj_type != b->obj_type)
143 return a->obj_type - b->obj_type;
144 cmp = strcmp(a->path, b->path);
145 if (cmp != 0)
146 return cmp;
147 if (a->mtime != b->mtime)
148 return a->mtime - b->mtime;
149 return got_object_id_cmp(&a->id, &b->id);
152 static int
153 delta_size(struct got_delta_instruction *deltas, int ndeltas)
155 int i, size = 32;
156 for (i = 0; i < ndeltas; i++) {
157 if (deltas[i].copy)
158 size += GOT_DELTA_SIZE_SHIFT;
159 else
160 size += deltas[i].len + 1;
162 return size;
165 static const struct got_error *
166 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
167 struct got_delta_instruction *deltas, int ndeltas,
168 off_t base_size, FILE *f);
170 static const struct got_error *
171 pick_deltas(struct got_pack_meta **meta, int nmeta, int nours,
172 FILE *delta_cache, struct got_repository *repo,
173 got_pack_progress_cb progress_cb, void *progress_arg,
174 got_cancel_cb cancel_cb, void *cancel_arg)
176 const struct got_error *err = NULL;
177 struct got_pack_meta *m = NULL, *base = NULL;
178 struct got_raw_object *raw = NULL, *base_raw = NULL;
179 struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
180 int i, j, size, best_size, ndeltas, best_ndeltas;
181 const int max_base_candidates = 10;
182 int outfd = -1;
184 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
185 for (i = 0; i < nmeta; i++) {
186 if (cancel_cb) {
187 err = (*cancel_cb)(cancel_arg);
188 if (err)
189 break;
191 if (progress_cb) {
192 err = progress_cb(progress_arg, 0L, nours, nmeta, i, 0);
193 if (err)
194 goto done;
196 m = meta[i];
198 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
199 m->obj_type == GOT_OBJ_TYPE_TAG)
200 continue;
202 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
203 if (err)
204 goto done;
205 m->size = raw->size;
207 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
208 raw->size + raw->hdrlen);
209 if (err)
210 goto done;
212 if (i > max_base_candidates) {
213 struct got_pack_meta *n = NULL;
214 n = meta[i - (max_base_candidates + 1)];
215 got_deltify_free(n->dtab);
216 n->dtab = NULL;
219 best_size = raw->size;
220 best_ndeltas = 0;
221 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
222 if (cancel_cb) {
223 err = (*cancel_cb)(cancel_arg);
224 if (err)
225 goto done;
227 base = meta[j];
228 /* long chains make unpacking slow, avoid such bases */
229 if (base->nchain >= 32 ||
230 base->obj_type != m->obj_type)
231 continue;
233 err = got_object_raw_open(&base_raw, &outfd, repo,
234 &base->id);
235 if (err)
236 goto done;
237 err = got_deltify(&deltas, &ndeltas,
238 raw->f, raw->hdrlen, raw->size + raw->hdrlen,
239 base->dtab, base_raw->f, base_raw->hdrlen,
240 base_raw->size + base_raw->hdrlen);
241 got_object_raw_close(base_raw);
242 base_raw = NULL;
243 if (err)
244 goto done;
246 size = delta_size(deltas, ndeltas);
247 if (size + 32 < best_size){
248 /*
249 * if we already picked a best delta,
250 * replace it.
251 */
252 best_size = size;
253 free(best_deltas);
254 best_deltas = deltas;
255 best_ndeltas = ndeltas;
256 deltas = NULL;
257 m->nchain = base->nchain + 1;
258 m->prev = base;
259 m->head = base->head;
260 if (m->head == NULL)
261 m->head = base;
262 } else {
263 free(deltas);
264 deltas = NULL;
265 ndeltas = 0;
269 if (best_ndeltas > 0) {
270 m->delta_offset = ftello(delta_cache);
271 err = encode_delta(m, raw, best_deltas,
272 best_ndeltas, m->prev->size, delta_cache);
273 free(best_deltas);
274 best_deltas = NULL;
275 best_ndeltas = 0;
276 if (err)
277 goto done;
278 m->delta_len = ftello(delta_cache) - m->delta_offset;
281 got_object_raw_close(raw);
282 raw = NULL;
284 done:
285 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
286 got_deltify_free(meta[i]->dtab);
287 meta[i]->dtab = NULL;
289 if (raw)
290 got_object_raw_close(raw);
291 if (base_raw)
292 got_object_raw_close(base_raw);
293 if (outfd != -1 && close(outfd) == -1 && err == NULL)
294 err = got_error_from_errno("close");
295 free(deltas);
296 free(best_deltas);
297 return err;
300 static const struct got_error *
301 search_packidx(int *found, struct got_object_id *id,
302 struct got_repository *repo)
304 const struct got_error *err = NULL;
305 struct got_packidx *packidx = NULL;
306 int idx;
308 *found = 0;
310 err = got_repo_search_packidx(&packidx, &idx, repo, id);
311 if (err == NULL)
312 *found = 1; /* object is already packed */
313 else if (err->code == GOT_ERR_NO_OBJ)
314 err = NULL;
315 return err;
318 static const int obj_types[] = {
319 GOT_OBJ_TYPE_ANY,
320 GOT_OBJ_TYPE_COMMIT,
321 GOT_OBJ_TYPE_TREE,
322 GOT_OBJ_TYPE_BLOB,
323 GOT_OBJ_TYPE_TAG,
324 GOT_OBJ_TYPE_OFFSET_DELTA,
325 GOT_OBJ_TYPE_REF_DELTA
326 };
328 static const struct got_error *
329 add_meta(struct got_pack_metavec *v, struct got_object_idset *idset,
330 struct got_object_id *id, const char *path, int obj_type,
331 time_t mtime, int loose_obj_only, struct got_repository *repo)
333 const struct got_error *err;
334 struct got_pack_meta *m;
336 if (loose_obj_only) {
337 int is_packed;
338 err = search_packidx(&is_packed, id, repo);
339 if (err)
340 return err;
341 if (is_packed)
342 return NULL;
345 err = got_object_idset_add(idset, id, (void *)&obj_types[obj_type]);
346 if (err)
347 return err;
349 if (v == NULL)
350 return NULL;
352 err = alloc_meta(&m, id, path, obj_type, mtime);
353 if (err)
354 goto done;
356 if (v->nmeta == v->metasz){
357 size_t newsize = 2 * v->metasz;
358 struct got_pack_meta **new;
359 new = reallocarray(v->meta, newsize, sizeof(*new));
360 if (new == NULL) {
361 err = got_error_from_errno("reallocarray");
362 goto done;
364 v->meta = new;
365 v->metasz = newsize;
367 done:
368 if (err) {
369 clear_meta(m);
370 free(m);
371 } else
372 v->meta[v->nmeta++] = m;
374 return err;
377 static const struct got_error *
378 load_tree_entries(struct got_object_id_queue *ids, struct got_pack_metavec *v,
379 struct got_object_idset *idset, struct got_object_id *tree_id,
380 const char *dpath, time_t mtime, struct got_repository *repo,
381 int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg)
383 const struct got_error *err;
384 struct got_tree_object *tree;
385 char *p = NULL;
386 int i;
388 err = got_object_open_as_tree(&tree, repo, tree_id);
389 if (err)
390 return err;
392 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
393 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
394 struct got_object_id *id = got_tree_entry_get_id(e);
395 mode_t mode = got_tree_entry_get_mode(e);
397 if (cancel_cb) {
398 err = (*cancel_cb)(cancel_arg);
399 if (err)
400 break;
403 if (got_object_tree_entry_is_submodule(e) ||
404 got_object_idset_contains(idset, id))
405 continue;
407 if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
408 got_tree_entry_get_name(e)) == -1) {
409 err = got_error_from_errno("asprintf");
410 break;
413 if (S_ISDIR(mode)) {
414 struct got_object_qid *qid;
415 err = got_object_qid_alloc(&qid, id);
416 if (err)
417 break;
418 STAILQ_INSERT_TAIL(ids, qid, entry);
419 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
420 err = add_meta(v, idset, id, p, GOT_OBJ_TYPE_BLOB,
421 mtime, loose_obj_only, repo);
422 if (err)
423 break;
425 free(p);
426 p = NULL;
429 got_object_tree_close(tree);
430 free(p);
431 return err;
434 static const struct got_error *
435 load_tree(struct got_pack_metavec *v, struct got_object_idset *idset,
436 struct got_object_id *tree_id, const char *dpath, time_t mtime,
437 int loose_obj_only, struct got_repository *repo,
438 got_cancel_cb cancel_cb, void *cancel_arg)
440 const struct got_error *err = NULL;
441 struct got_object_id_queue tree_ids;
442 struct got_object_qid *qid;
444 if (got_object_idset_contains(idset, tree_id))
445 return NULL;
447 err = got_object_qid_alloc(&qid, tree_id);
448 if (err)
449 return err;
451 STAILQ_INIT(&tree_ids);
452 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
454 while (!STAILQ_EMPTY(&tree_ids)) {
455 if (cancel_cb) {
456 err = (*cancel_cb)(cancel_arg);
457 if (err)
458 break;
461 qid = STAILQ_FIRST(&tree_ids);
462 STAILQ_REMOVE_HEAD(&tree_ids, entry);
464 if (got_object_idset_contains(idset, qid->id)) {
465 got_object_qid_free(qid);
466 continue;
469 err = add_meta(v, idset, qid->id, dpath, GOT_OBJ_TYPE_TREE,
470 mtime, loose_obj_only, repo);
471 if (err) {
472 got_object_qid_free(qid);
473 break;
476 err = load_tree_entries(&tree_ids, v, idset, qid->id, dpath,
477 mtime, repo, loose_obj_only, cancel_cb, cancel_arg);
478 got_object_qid_free(qid);
479 if (err)
480 break;
483 got_object_id_queue_free(&tree_ids);
484 return err;
487 static const struct got_error *
488 load_commit(struct got_pack_metavec *v, struct got_object_idset *idset,
489 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
490 got_cancel_cb cancel_cb, void *cancel_arg)
492 const struct got_error *err;
493 struct got_commit_object *commit;
495 if (got_object_idset_contains(idset, id))
496 return NULL;
498 if (loose_obj_only) {
499 int is_packed;
500 err = search_packidx(&is_packed, id, repo);
501 if (err)
502 return err;
503 if (is_packed)
504 return NULL;
507 err = got_object_open_as_commit(&commit, repo, id);
508 if (err)
509 return err;
511 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_COMMIT,
512 got_object_commit_get_committer_time(commit),
513 loose_obj_only, repo);
514 if (err)
515 goto done;
517 err = load_tree(v, idset, got_object_commit_get_tree_id(commit),
518 "", got_object_commit_get_committer_time(commit),
519 loose_obj_only, repo, cancel_cb, cancel_arg);
520 done:
521 got_object_commit_close(commit);
522 return err;
525 static const struct got_error *
526 load_tag(struct got_pack_metavec *v, struct got_object_idset *idset,
527 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
528 got_cancel_cb cancel_cb, void *cancel_arg)
530 const struct got_error *err;
531 struct got_tag_object *tag = NULL;
533 if (got_object_idset_contains(idset, id))
534 return NULL;
536 if (loose_obj_only) {
537 int is_packed;
538 err = search_packidx(&is_packed, id, repo);
539 if (err)
540 return err;
541 if (is_packed)
542 return NULL;
545 err = got_object_open_as_tag(&tag, repo, id);
546 if (err)
547 return err;
549 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_TAG,
550 got_object_tag_get_tagger_time(tag),
551 loose_obj_only, repo);
552 if (err)
553 goto done;
555 switch (got_object_tag_get_object_type(tag)) {
556 case GOT_OBJ_TYPE_COMMIT:
557 err = load_commit(v, idset,
558 got_object_tag_get_object_id(tag), repo,
559 loose_obj_only, cancel_cb, cancel_arg);
560 break;
561 case GOT_OBJ_TYPE_TREE:
562 err = load_tree(v, idset, got_object_tag_get_object_id(tag),
563 "", got_object_tag_get_tagger_time(tag),
564 loose_obj_only, repo, cancel_cb, cancel_arg);
565 break;
566 default:
567 break;
570 done:
571 got_object_tag_close(tag);
572 return err;
575 enum findtwixt_color {
576 COLOR_KEEP = 0,
577 COLOR_DROP,
578 COLOR_BLANK,
579 };
580 static const int findtwixt_colors[] = {
581 COLOR_KEEP,
582 COLOR_DROP,
583 COLOR_BLANK
584 };
586 static const struct got_error *
587 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
588 int color, struct got_repository *repo)
590 const struct got_error *err;
591 struct got_object_qid *qid;
593 err = got_object_qid_alloc(&qid, id);
594 if (err)
595 return err;
597 STAILQ_INSERT_TAIL(ids, qid, entry);
598 qid->data = (void *)&findtwixt_colors[color];
599 return NULL;
602 static const struct got_error *
603 drop_commit(struct got_object_idset *keep, struct got_object_idset *drop,
604 struct got_object_id *id, struct got_repository *repo,
605 got_cancel_cb cancel_cb, void *cancel_arg)
607 const struct got_error *err = NULL;
608 struct got_commit_object *commit;
609 const struct got_object_id_queue *parents;
610 struct got_object_id_queue ids;
611 struct got_object_qid *qid;
613 STAILQ_INIT(&ids);
615 err = got_object_qid_alloc(&qid, id);
616 if (err)
617 return err;
618 STAILQ_INSERT_HEAD(&ids, qid, entry);
620 while (!STAILQ_EMPTY(&ids)) {
621 if (cancel_cb) {
622 err = (*cancel_cb)(cancel_arg);
623 if (err)
624 break;
627 qid = STAILQ_FIRST(&ids);
628 STAILQ_REMOVE_HEAD(&ids, entry);
630 if (got_object_idset_contains(drop, qid->id)) {
631 got_object_qid_free(qid);
632 continue;
635 err = got_object_idset_add(drop, qid->id,
636 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
637 if (err) {
638 got_object_qid_free(qid);
639 break;
642 if (!got_object_idset_contains(keep, qid->id)) {
643 got_object_qid_free(qid);
644 continue;
647 err = got_object_open_as_commit(&commit, repo, qid->id);
648 got_object_qid_free(qid);
649 if (err)
650 break;
652 parents = got_object_commit_get_parent_ids(commit);
653 if (parents) {
654 err = got_object_id_queue_copy(parents, &ids);
655 if (err) {
656 got_object_commit_close(commit);
657 break;
660 got_object_commit_close(commit);
663 got_object_id_queue_free(&ids);
664 return err;
667 struct append_id_arg {
668 struct got_object_id **array;
669 int idx;
670 };
672 static const struct got_error *
673 append_id(struct got_object_id *id, void *data, void *arg)
675 struct append_id_arg *a = arg;
677 a->array[a->idx] = got_object_id_dup(id);
678 if (a->array[a->idx] == NULL)
679 return got_error_from_errno("got_object_id_dup");
681 a->idx++;
682 return NULL;
685 static const struct got_error *
686 findtwixt(struct got_object_id ***res, int *nres,
687 struct got_object_id **head, int nhead,
688 struct got_object_id **tail, int ntail,
689 struct got_repository *repo,
690 got_cancel_cb cancel_cb, void *cancel_arg)
692 const struct got_error *err = NULL;
693 struct got_object_id_queue ids;
694 struct got_object_idset *keep, *drop;
695 struct got_object_qid *qid;
696 int i, ncolor, nkeep, obj_type;
698 STAILQ_INIT(&ids);
699 *res = NULL;
700 *nres = 0;
702 keep = got_object_idset_alloc();
703 if (keep == NULL)
704 return got_error_from_errno("got_object_idset_alloc");
706 drop = got_object_idset_alloc();
707 if (drop == NULL) {
708 err = got_error_from_errno("got_object_idset_alloc");
709 goto done;
712 for (i = 0; i < nhead; i++) {
713 struct got_object_id *id = head[i];
714 if (id == NULL)
715 continue;
716 err = got_object_get_type(&obj_type, repo, id);
717 if (err)
718 return err;
719 if (obj_type != GOT_OBJ_TYPE_COMMIT)
720 continue;
721 err = queue_commit_id(&ids, id, COLOR_KEEP, repo);
722 if (err)
723 goto done;
725 for (i = 0; i < ntail; i++) {
726 struct got_object_id *id = tail[i];
727 if (id == NULL)
728 continue;
729 err = got_object_get_type(&obj_type, repo, id);
730 if (err)
731 return err;
732 if (obj_type != GOT_OBJ_TYPE_COMMIT)
733 continue;
734 err = queue_commit_id(&ids, id, COLOR_DROP, repo);
735 if (err)
736 goto done;
739 while (!STAILQ_EMPTY(&ids)) {
740 int qcolor;
741 qid = STAILQ_FIRST(&ids);
742 qcolor = *((int *)qid->data);
744 if (got_object_idset_contains(drop, qid->id))
745 ncolor = COLOR_DROP;
746 else if (got_object_idset_contains(keep, qid->id))
747 ncolor = COLOR_KEEP;
748 else
749 ncolor = COLOR_BLANK;
751 if (ncolor == COLOR_DROP || (ncolor == COLOR_KEEP &&
752 qcolor == COLOR_KEEP)) {
753 STAILQ_REMOVE_HEAD(&ids, entry);
754 got_object_qid_free(qid);
755 continue;
758 if (ncolor == COLOR_KEEP && qcolor == COLOR_DROP) {
759 err = drop_commit(keep, drop, qid->id, repo,
760 cancel_cb, cancel_arg);
761 if (err)
762 goto done;
763 } else if (ncolor == COLOR_BLANK) {
764 struct got_commit_object *commit;
765 struct got_object_id *id;
766 const struct got_object_id_queue *parents;
767 struct got_object_qid *pid;
769 id = got_object_id_dup(qid->id);
770 if (id == NULL) {
771 err = got_error_from_errno("got_object_id_dup");
772 goto done;
774 if (qcolor == COLOR_KEEP)
775 err = got_object_idset_add(keep, id,
776 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
777 else
778 err = got_object_idset_add(drop, id,
779 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
780 if (err) {
781 free(id);
782 goto done;
785 err = got_object_open_as_commit(&commit, repo, id);
786 if (err) {
787 free(id);
788 goto done;
790 parents = got_object_commit_get_parent_ids(commit);
791 if (parents) {
792 STAILQ_FOREACH(pid, parents, entry) {
793 err = queue_commit_id(&ids, pid->id,
794 qcolor, repo);
795 if (err) {
796 free(id);
797 goto done;
801 got_object_commit_close(commit);
802 commit = NULL;
803 } else {
804 /* should not happen */
805 err = got_error_fmt(GOT_ERR_NOT_IMPL,
806 "%s ncolor=%d qcolor=%d", __func__, ncolor, qcolor);
807 goto done;
810 STAILQ_REMOVE_HEAD(&ids, entry);
811 got_object_qid_free(qid);
814 nkeep = got_object_idset_num_elements(keep);
815 if (nkeep > 0) {
816 struct append_id_arg arg;
817 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
818 if (arg.array == NULL) {
819 err = got_error_from_errno("calloc");
820 goto done;
822 arg.idx = 0;
823 err = got_object_idset_for_each(keep, append_id, &arg);
824 if (err) {
825 free(arg.array);
826 goto done;
828 *res = arg.array;
829 *nres = nkeep;
831 done:
832 got_object_idset_free(keep);
833 got_object_idset_free(drop);
834 got_object_id_queue_free(&ids);
835 return err;
838 static const struct got_error *
839 read_meta(struct got_pack_meta ***meta, int *nmeta,
840 struct got_object_id **theirs, int ntheirs,
841 struct got_object_id **ours, int nours, struct got_repository *repo,
842 int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
843 got_cancel_cb cancel_cb, void *cancel_arg)
845 const struct got_error *err = NULL;
846 struct got_object_id **ids = NULL;
847 struct got_object_idset *idset;
848 int i, nobj = 0, obj_type;
849 struct got_pack_metavec v;
851 *meta = NULL;
852 *nmeta = 0;
854 idset = got_object_idset_alloc();
855 if (idset == NULL)
856 return got_error_from_errno("got_object_idset_alloc");
858 v.nmeta = 0;
859 v.metasz = 64;
860 v.meta = calloc(v.metasz, sizeof(struct got_pack_meta *));
861 if (v.meta == NULL) {
862 err = got_error_from_errno("calloc");
863 goto done;
866 err = findtwixt(&ids, &nobj, ours, nours, theirs, ntheirs, repo,
867 cancel_cb, cancel_arg);
868 if (err || nobj == 0)
869 goto done;
871 for (i = 0; i < ntheirs; i++) {
872 struct got_object_id *id = theirs[i];
873 if (id == NULL)
874 continue;
875 err = got_object_get_type(&obj_type, repo, id);
876 if (err)
877 return err;
878 if (obj_type != GOT_OBJ_TYPE_COMMIT)
879 continue;
880 err = load_commit(NULL, idset, id, repo,
881 loose_obj_only, cancel_cb, cancel_arg);
882 if (err)
883 goto done;
884 if (progress_cb) {
885 err = progress_cb(progress_arg, 0L, nours,
886 v.nmeta, 0, 0);
887 if (err)
888 goto done;
892 for (i = 0; i < ntheirs; i++) {
893 struct got_object_id *id = theirs[i];
894 int *cached_type;
895 if (id == NULL)
896 continue;
897 cached_type = got_object_idset_get(idset, id);
898 if (cached_type == NULL) {
899 err = got_object_get_type(&obj_type, repo, id);
900 if (err)
901 goto done;
902 } else
903 obj_type = *cached_type;
904 if (obj_type != GOT_OBJ_TYPE_TAG)
905 continue;
906 err = load_tag(NULL, idset, id, repo,
907 loose_obj_only, cancel_cb, cancel_arg);
908 if (err)
909 goto done;
910 if (progress_cb) {
911 err = progress_cb(progress_arg, 0L, nours,
912 v.nmeta, 0, 0);
913 if (err)
914 goto done;
918 for (i = 0; i < nobj; i++) {
919 err = load_commit(&v, idset, ids[i], repo,
920 loose_obj_only, cancel_cb, cancel_arg);
921 if (err)
922 goto done;
923 if (progress_cb) {
924 err = progress_cb(progress_arg, 0L, nours,
925 v.nmeta, 0, 0);
926 if (err)
927 goto done;
931 for (i = 0; i < nours; i++) {
932 struct got_object_id *id = ours[i];
933 int *cached_type;
934 if (id == NULL)
935 continue;
936 cached_type = got_object_idset_get(idset, id);
937 if (cached_type == NULL) {
938 err = got_object_get_type(&obj_type, repo, id);
939 if (err)
940 goto done;
941 } else
942 obj_type = *cached_type;
943 if (obj_type != GOT_OBJ_TYPE_TAG)
944 continue;
945 err = load_tag(&v, idset, id, repo,
946 loose_obj_only, cancel_cb, cancel_arg);
947 if (err)
948 goto done;
949 if (progress_cb) {
950 err = progress_cb(progress_arg, 0L, nours,
951 v.nmeta, 0, 0);
952 if (err)
953 goto done;
957 done:
958 for (i = 0; i < nobj; i++) {
959 free(ids[i]);
961 free(ids);
962 got_object_idset_free(idset);
963 if (err == NULL) {
964 *meta = v.meta;
965 *nmeta = v.nmeta;
966 } else
967 free(v.meta);
969 return err;
972 const struct got_error *
973 hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx)
975 size_t n;
977 SHA1Update(ctx, buf, len);
978 n = fwrite(buf, 1, len, f);
979 if (n != len)
980 return got_ferror(f, GOT_ERR_IO);
981 return NULL;
984 static void
985 putbe32(char *b, uint32_t n)
987 b[0] = n >> 24;
988 b[1] = n >> 16;
989 b[2] = n >> 8;
990 b[3] = n >> 0;
993 static int
994 write_order_cmp(const void *pa, const void *pb)
996 struct got_pack_meta *a, *b, *ahd, *bhd;
998 a = *(struct got_pack_meta **)pa;
999 b = *(struct got_pack_meta **)pb;
1000 ahd = (a->head == NULL) ? a : a->head;
1001 bhd = (b->head == NULL) ? b : b->head;
1002 if (ahd->mtime != bhd->mtime)
1003 return bhd->mtime - ahd->mtime;
1004 if (ahd != bhd)
1005 return (uintptr_t)bhd - (uintptr_t)ahd;
1006 if (a->nchain != b->nchain)
1007 return a->nchain - b->nchain;
1008 return a->mtime - b->mtime;
1011 static const struct got_error *
1012 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
1014 size_t i;
1016 *hdrlen = 0;
1018 hdr[0] = obj_type << 4;
1019 hdr[0] |= len & 0xf;
1020 len >>= 4;
1021 for (i = 1; len != 0; i++){
1022 if (i >= bufsize)
1023 return got_error(GOT_ERR_NO_SPACE);
1024 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1025 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1026 len >>= GOT_DELTA_SIZE_SHIFT;
1029 *hdrlen = i;
1030 return NULL;
1033 static const struct got_error *
1034 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
1035 struct got_delta_instruction *deltas, int ndeltas,
1036 off_t base_size, FILE *f)
1038 unsigned char buf[16], *bp;
1039 int i, j;
1040 off_t n;
1041 size_t w;
1042 struct got_delta_instruction *d;
1044 /* base object size */
1045 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
1046 n = base_size >> GOT_DELTA_SIZE_SHIFT;
1047 for (i = 1; n > 0; i++) {
1048 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
1049 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
1050 n >>= GOT_DELTA_SIZE_SHIFT;
1052 w = fwrite(buf, 1, i, f);
1053 if (w != i)
1054 return got_ferror(f, GOT_ERR_IO);
1056 /* target object size */
1057 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
1058 n = o->size >> GOT_DELTA_SIZE_SHIFT;
1059 for (i = 1; n > 0; i++) {
1060 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
1061 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
1062 n >>= GOT_DELTA_SIZE_SHIFT;
1064 w = fwrite(buf, 1, i, f);
1065 if (w != i)
1066 return got_ferror(f, GOT_ERR_IO);
1068 for (j = 0; j < ndeltas; j++) {
1069 d = &deltas[j];
1070 if (d->copy) {
1071 n = d->offset;
1072 bp = &buf[1];
1073 buf[0] = GOT_DELTA_BASE_COPY;
1074 for (i = 0; i < 4; i++) {
1075 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
1076 buf[0] |= 1 << i;
1077 *bp++ = n & 0xff;
1078 n >>= 8;
1079 if (n == 0)
1080 break;
1083 n = d->len;
1084 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
1085 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
1086 for (i = 0; i < 3 && n > 0; i++) {
1087 buf[0] |= 1 << (i + 4);
1088 *bp++ = n & 0xff;
1089 n >>= 8;
1092 w = fwrite(buf, 1, bp - buf, f);
1093 if (w != bp - buf)
1094 return got_ferror(f, GOT_ERR_IO);
1095 } else {
1096 char content[128];
1097 size_t r;
1098 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1)
1099 return got_error_from_errno("fseeko");
1100 n = 0;
1101 while (n != d->len) {
1102 buf[0] = (d->len - n < 127) ? d->len - n : 127;
1103 w = fwrite(buf, 1, 1, f);
1104 if (w != 1)
1105 return got_ferror(f, GOT_ERR_IO);
1106 r = fread(content, 1, buf[0], o->f);
1107 if (r != buf[0])
1108 return got_ferror(o->f, GOT_ERR_IO);
1109 w = fwrite(content, 1, buf[0], f);
1110 if (w != buf[0])
1111 return got_ferror(f, GOT_ERR_IO);
1112 n += buf[0];
1117 return NULL;
1120 static int
1121 packoff(char *hdr, off_t off)
1123 int i, j;
1124 char rbuf[8];
1126 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1127 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1128 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1129 GOT_DELTA_SIZE_MORE;
1132 j = 0;
1133 while (i > 0)
1134 hdr[j++] = rbuf[--i];
1135 return j;
1138 static const struct got_error *
1139 genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache,
1140 struct got_pack_meta **meta, int nmeta, int nours,
1141 int use_offset_deltas, struct got_repository *repo,
1142 got_pack_progress_cb progress_cb, void *progress_arg,
1143 got_cancel_cb cancel_cb, void *cancel_arg)
1145 const struct got_error *err = NULL;
1146 int i, nh;
1147 SHA1_CTX ctx;
1148 struct got_pack_meta *m;
1149 struct got_raw_object *raw = NULL;
1150 FILE *delta_file = NULL;
1151 char buf[32];
1152 size_t outlen, n;
1153 struct got_deflate_checksum csum;
1154 off_t packfile_size = 0;
1155 int outfd = -1;
1157 SHA1Init(&ctx);
1158 csum.output_sha1 = &ctx;
1159 csum.output_crc = NULL;
1161 err = hwrite(packfile, "PACK", 4, &ctx);
1162 if (err)
1163 return err;
1164 putbe32(buf, GOT_PACKFILE_VERSION);
1165 err = hwrite(packfile, buf, 4, &ctx);
1166 if (err)
1167 goto done;
1168 putbe32(buf, nmeta);
1169 err = hwrite(packfile, buf, 4, &ctx);
1170 if (err)
1171 goto done;
1172 qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp);
1173 for (i = 0; i < nmeta; i++) {
1174 if (progress_cb) {
1175 err = progress_cb(progress_arg, packfile_size, nours,
1176 nmeta, nmeta, i);
1177 if (err)
1178 goto done;
1180 m = meta[i];
1181 m->off = ftello(packfile);
1182 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
1183 if (err)
1184 goto done;
1185 if (m->delta_len == 0) {
1186 err = packhdr(&nh, buf, sizeof(buf),
1187 m->obj_type, raw->size);
1188 if (err)
1189 goto done;
1190 err = hwrite(packfile, buf, nh, &ctx);
1191 if (err)
1192 goto done;
1193 packfile_size += nh;
1194 if (fseeko(raw->f, raw->hdrlen, SEEK_SET) == -1) {
1195 err = got_error_from_errno("fseeko");
1196 goto done;
1198 err = got_deflate_to_file(&outlen, raw->f, packfile,
1199 &csum);
1200 if (err)
1201 goto done;
1202 packfile_size += outlen;
1203 } else {
1204 off_t remain;
1205 if (delta_file == NULL) {
1206 delta_file = got_opentemp();
1207 if (delta_file == NULL) {
1208 err = got_error_from_errno(
1209 "got_opentemp");
1210 goto done;
1213 if (ftruncate(fileno(delta_file), 0L) == -1) {
1214 err = got_error_from_errno("ftruncate");
1215 goto done;
1217 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1218 err = got_error_from_errno("fseeko");
1219 goto done;
1221 if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
1222 == -1) {
1223 err = got_error_from_errno("fseeko");
1224 goto done;
1226 remain = m->delta_len;
1227 while (remain > 0) {
1228 char delta_buf[8192];
1229 size_t r, w, n;
1230 n = MIN(remain, sizeof(delta_buf));
1231 r = fread(delta_buf, 1, n, delta_cache);
1232 if (r != n) {
1233 err = got_ferror(delta_cache,
1234 GOT_ERR_IO);
1235 goto done;
1237 w = fwrite(delta_buf, 1, n, delta_file);
1238 if (w != n) {
1239 err = got_ferror(delta_file,
1240 GOT_ERR_IO);
1241 goto done;
1243 remain -= n;
1245 if (use_offset_deltas && m->prev->off != 0) {
1246 err = packhdr(&nh, buf, sizeof(buf),
1247 GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
1248 if (err)
1249 goto done;
1250 nh += packoff(buf + nh,
1251 m->off - m->prev->off);
1252 err = hwrite(packfile, buf, nh, &ctx);
1253 if (err)
1254 goto done;
1255 packfile_size += nh;
1256 } else {
1257 err = packhdr(&nh, buf, sizeof(buf),
1258 GOT_OBJ_TYPE_REF_DELTA, m->delta_len);
1259 err = hwrite(packfile, buf, nh, &ctx);
1260 if (err)
1261 goto done;
1262 packfile_size += nh;
1263 err = hwrite(packfile, m->prev->id.sha1,
1264 sizeof(m->prev->id.sha1), &ctx);
1265 packfile_size += sizeof(m->prev->id.sha1);
1266 if (err)
1267 goto done;
1269 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1270 err = got_error_from_errno("fseeko");
1271 goto done;
1273 err = got_deflate_to_file(&outlen, delta_file,
1274 packfile, &csum);
1275 if (err)
1276 goto done;
1277 packfile_size += outlen;
1279 got_object_raw_close(raw);
1280 raw = NULL;
1282 SHA1Final(pack_sha1, &ctx);
1283 n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
1284 if (n != SHA1_DIGEST_LENGTH)
1285 err = got_ferror(packfile, GOT_ERR_IO);
1286 packfile_size += SHA1_DIGEST_LENGTH;
1287 packfile_size += sizeof(struct got_packfile_hdr);
1288 err = progress_cb(progress_arg, packfile_size, nours,
1289 nmeta, nmeta, nmeta);
1290 if (err)
1291 goto done;
1292 done:
1293 if (delta_file && fclose(delta_file) == EOF && err == NULL)
1294 err = got_error_from_errno("fclose");
1295 if (raw)
1296 got_object_raw_close(raw);
1297 if (outfd != -1 && close(outfd) == -1 && err == NULL)
1298 err = got_error_from_errno("close");
1299 return err;
1302 const struct got_error *
1303 got_pack_create(uint8_t *packsha1, FILE *packfile,
1304 struct got_object_id **theirs, int ntheirs,
1305 struct got_object_id **ours, int nours,
1306 struct got_repository *repo, int loose_obj_only, int allow_empty,
1307 got_pack_progress_cb progress_cb, void *progress_arg,
1308 got_cancel_cb cancel_cb, void *cancel_arg)
1310 const struct got_error *err;
1311 struct got_pack_meta **meta;
1312 int nmeta;
1313 FILE *delta_cache = NULL;
1315 err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo,
1316 loose_obj_only, progress_cb, progress_arg, cancel_cb, cancel_arg);
1317 if (err)
1318 return err;
1320 if (nmeta == 0 && !allow_empty) {
1321 err = got_error(GOT_ERR_CANNOT_PACK);
1322 goto done;
1325 delta_cache = got_opentemp();
1326 if (delta_cache == NULL) {
1327 err = got_error_from_errno("got_opentemp");
1328 goto done;
1331 if (nmeta > 0) {
1332 err = pick_deltas(meta, nmeta, nours, delta_cache, repo,
1333 progress_cb, progress_arg, cancel_cb, cancel_arg);
1334 if (err)
1335 goto done;
1336 if (fseeko(delta_cache, 0L, SEEK_SET) == -1) {
1337 err = got_error_from_errno("fseeko");
1338 goto done;
1342 err = genpack(packsha1, packfile, delta_cache, meta, nmeta, nours, 1,
1343 repo, progress_cb, progress_arg, cancel_cb, cancel_arg);
1344 if (err)
1345 goto done;
1346 done:
1347 free_nmeta(meta, nmeta);
1348 if (delta_cache && fclose(delta_cache) == EOF && err == NULL)
1349 err = got_error_from_errno("fclose");
1350 return err;