Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
23 #include <sys/time.h>
25 #include <endian.h>
26 #include <stdint.h>
27 #include <imsg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sha1.h>
32 #include <time.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <zlib.h>
37 #include "got_error.h"
38 #include "got_cancel.h"
39 #include "got_object.h"
40 #include "got_path.h"
41 #include "got_reference.h"
42 #include "got_repository_admin.h"
43 #include "got_opentemp.h"
45 #include "got_lib_deltify.h"
46 #include "got_lib_delta.h"
47 #include "got_lib_object.h"
48 #include "got_lib_object_idset.h"
49 #include "got_lib_object_cache.h"
50 #include "got_lib_deflate.h"
51 #include "got_lib_pack.h"
52 #include "got_lib_privsep.h"
53 #include "got_lib_repository.h"
54 #include "got_lib_ratelimit.h"
56 #ifndef MIN
57 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
58 #endif
60 #ifndef MAX
61 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
62 #endif
64 #ifndef nitems
65 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
66 #endif
68 struct got_pack_meta {
69 struct got_object_id id;
70 char *path;
71 int obj_type;
72 off_t size;
73 time_t mtime;
75 /* The best delta we picked */
76 struct got_pack_meta *head;
77 struct got_pack_meta *prev;
78 unsigned char *delta_buf; /* if not encoded in delta cache file */
79 off_t delta_offset; /* offset in delta cache file */
80 off_t delta_len; /* encoded delta length */
81 int nchain;
83 int have_reused_delta;
84 off_t reused_delta_offset; /* offset of delta in reused pack file */
85 struct got_object_id *base_obj_id;
87 /* Only used for delta window */
88 struct got_delta_table *dtab;
90 /* Only used for writing offset deltas */
91 off_t off;
92 };
94 struct got_pack_metavec {
95 struct got_pack_meta **meta;
96 int nmeta;
97 int metasz;
98 };
100 static const struct got_error *
101 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
102 const char *path, int obj_type, time_t mtime)
104 const struct got_error *err = NULL;
105 struct got_pack_meta *m;
107 *new = NULL;
109 m = calloc(1, sizeof(*m));
110 if (m == NULL)
111 return got_error_from_errno("calloc");
113 memcpy(&m->id, id, sizeof(m->id));
115 m->path = strdup(path);
116 if (m->path == NULL) {
117 err = got_error_from_errno("strdup");
118 free(m);
119 return err;
122 m->obj_type = obj_type;
123 m->mtime = mtime;
124 *new = m;
125 return NULL;
128 static void
129 clear_meta(struct got_pack_meta *meta)
131 if (meta == NULL)
132 return;
133 free(meta->path);
134 meta->path = NULL;
135 free(meta->delta_buf);
136 meta->delta_buf = NULL;
137 free(meta->base_obj_id);
138 meta->base_obj_id = NULL;
141 static void
142 free_nmeta(struct got_pack_meta **meta, int nmeta)
144 int i;
146 for (i = 0; i < nmeta; i++)
147 clear_meta(meta[i]);
148 free(meta);
151 static int
152 delta_order_cmp(const void *pa, const void *pb)
154 struct got_pack_meta *a, *b;
155 int cmp;
157 a = *(struct got_pack_meta **)pa;
158 b = *(struct got_pack_meta **)pb;
160 if (a->obj_type != b->obj_type)
161 return a->obj_type - b->obj_type;
162 cmp = strcmp(a->path, b->path);
163 if (cmp != 0)
164 return cmp;
165 if (a->mtime != b->mtime)
166 return a->mtime - b->mtime;
167 return got_object_id_cmp(&a->id, &b->id);
170 static off_t
171 delta_size(struct got_delta_instruction *deltas, int ndeltas)
173 int i;
174 off_t size = 32;
175 for (i = 0; i < ndeltas; i++) {
176 if (deltas[i].copy)
177 size += GOT_DELTA_SIZE_SHIFT;
178 else
179 size += deltas[i].len + 1;
181 return size;
184 static const struct got_error *
185 append(unsigned char **p, size_t *len, off_t *sz, void *seg, int nseg)
187 char *n;
189 if (*len + nseg >= *sz) {
190 while (*len + nseg >= *sz)
191 *sz += *sz / 2;
192 n = realloc(*p, *sz);
193 if (n == NULL)
194 return got_error_from_errno("realloc");
195 *p = n;
197 memcpy(*p + *len, seg, nseg);
198 *len += nseg;
199 return NULL;
202 static const struct got_error *
203 encode_delta_in_mem(struct got_pack_meta *m, struct got_raw_object *o,
204 struct got_delta_instruction *deltas, int ndeltas,
205 off_t delta_size, off_t base_size)
207 const struct got_error *err;
208 unsigned char buf[16], *bp;
209 int i, j;
210 size_t len = 0;
211 off_t n;
212 struct got_delta_instruction *d;
214 m->delta_buf = malloc(delta_size);
215 if (m->delta_buf == NULL)
216 return got_error_from_errno("calloc");
218 /* base object size */
219 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
220 n = base_size >> GOT_DELTA_SIZE_SHIFT;
221 for (i = 1; n > 0; i++) {
222 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
223 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
224 n >>= GOT_DELTA_SIZE_SHIFT;
226 err = append(&m->delta_buf, &len, &delta_size, buf, i);
227 if (err)
228 return err;
230 /* target object size */
231 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
232 n = o->size >> GOT_DELTA_SIZE_SHIFT;
233 for (i = 1; n > 0; i++) {
234 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
235 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
236 n >>= GOT_DELTA_SIZE_SHIFT;
238 err = append(&m->delta_buf, &len, &delta_size, buf, i);
239 if (err)
240 return err;
242 for (j = 0; j < ndeltas; j++) {
243 d = &deltas[j];
244 if (d->copy) {
245 n = d->offset;
246 bp = &buf[1];
247 buf[0] = GOT_DELTA_BASE_COPY;
248 for (i = 0; i < 4; i++) {
249 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
250 buf[0] |= 1 << i;
251 *bp++ = n & 0xff;
252 n >>= 8;
253 if (n == 0)
254 break;
257 n = d->len;
258 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
259 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
260 for (i = 0; i < 3 && n > 0; i++) {
261 buf[0] |= 1 << (i + 4);
262 *bp++ = n & 0xff;
263 n >>= 8;
266 err = append(&m->delta_buf, &len, &delta_size,
267 buf, bp - buf);
268 if (err)
269 return err;
270 } else if (o->f == NULL) {
271 n = 0;
272 while (n != d->len) {
273 buf[0] = (d->len - n < 127) ? d->len - n : 127;
274 err = append(&m->delta_buf, &len, &delta_size,
275 buf, 1);
276 if (err)
277 return err;
278 err = append(&m->delta_buf, &len, &delta_size,
279 o->data + o->hdrlen + d->offset + n,
280 buf[0]);
281 if (err)
282 return err;
283 n += buf[0];
285 } else {
286 char content[128];
287 size_t r;
288 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1)
289 return got_error_from_errno("fseeko");
290 n = 0;
291 while (n != d->len) {
292 buf[0] = (d->len - n < 127) ? d->len - n : 127;
293 err = append(&m->delta_buf, &len, &delta_size,
294 buf, 1);
295 if (err)
296 return err;
297 r = fread(content, 1, buf[0], o->f);
298 if (r != buf[0])
299 return got_ferror(o->f, GOT_ERR_IO);
300 err = append(&m->delta_buf, &len, &delta_size,
301 content, buf[0]);
302 if (err)
303 return err;
304 n += buf[0];
309 m->delta_len = len;
310 return NULL;
313 static const struct got_error *
314 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
315 struct got_delta_instruction *deltas, int ndeltas,
316 off_t base_size, FILE *f)
318 unsigned char buf[16], *bp;
319 int i, j;
320 off_t n;
321 size_t w;
322 struct got_delta_instruction *d;
324 /* base object size */
325 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
326 n = base_size >> GOT_DELTA_SIZE_SHIFT;
327 for (i = 1; n > 0; i++) {
328 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
329 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
330 n >>= GOT_DELTA_SIZE_SHIFT;
332 w = fwrite(buf, 1, i, f);
333 if (w != i)
334 return got_ferror(f, GOT_ERR_IO);
336 /* target object size */
337 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
338 n = o->size >> GOT_DELTA_SIZE_SHIFT;
339 for (i = 1; n > 0; i++) {
340 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
341 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
342 n >>= GOT_DELTA_SIZE_SHIFT;
344 w = fwrite(buf, 1, i, f);
345 if (w != i)
346 return got_ferror(f, GOT_ERR_IO);
348 for (j = 0; j < ndeltas; j++) {
349 d = &deltas[j];
350 if (d->copy) {
351 n = d->offset;
352 bp = &buf[1];
353 buf[0] = GOT_DELTA_BASE_COPY;
354 for (i = 0; i < 4; i++) {
355 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
356 buf[0] |= 1 << i;
357 *bp++ = n & 0xff;
358 n >>= 8;
359 if (n == 0)
360 break;
363 n = d->len;
364 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
365 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
366 for (i = 0; i < 3 && n > 0; i++) {
367 buf[0] |= 1 << (i + 4);
368 *bp++ = n & 0xff;
369 n >>= 8;
372 w = fwrite(buf, 1, bp - buf, f);
373 if (w != bp - buf)
374 return got_ferror(f, GOT_ERR_IO);
375 } else if (o->f == NULL) {
376 n = 0;
377 while (n != d->len) {
378 buf[0] = (d->len - n < 127) ? d->len - n : 127;
379 w = fwrite(buf, 1, 1, f);
380 if (w != 1)
381 return got_ferror(f, GOT_ERR_IO);
382 w = fwrite(o->data + o->hdrlen + d->offset + n,
383 1, buf[0], f);
384 if (w != buf[0])
385 return got_ferror(f, GOT_ERR_IO);
386 n += buf[0];
388 } else {
389 char content[128];
390 size_t r;
391 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1)
392 return got_error_from_errno("fseeko");
393 n = 0;
394 while (n != d->len) {
395 buf[0] = (d->len - n < 127) ? d->len - n : 127;
396 w = fwrite(buf, 1, 1, f);
397 if (w != 1)
398 return got_ferror(f, GOT_ERR_IO);
399 r = fread(content, 1, buf[0], o->f);
400 if (r != buf[0])
401 return got_ferror(o->f, GOT_ERR_IO);
402 w = fwrite(content, 1, buf[0], f);
403 if (w != buf[0])
404 return got_ferror(f, GOT_ERR_IO);
405 n += buf[0];
410 m->delta_len = ftello(f) - m->delta_offset;
411 return NULL;
414 static const struct got_error *
415 report_progress(got_pack_progress_cb progress_cb, void *progress_arg,
416 struct got_ratelimit *rl, int ncolored, int nfound, int ntrees,
417 off_t packfile_size, int ncommits, int nobj_total, int obj_deltify,
418 int nobj_written)
420 const struct got_error *err;
421 int elapsed;
423 if (progress_cb == NULL)
424 return NULL;
426 err = got_ratelimit_check(&elapsed, rl);
427 if (err || !elapsed)
428 return err;
430 return progress_cb(progress_arg, ncolored, nfound, ntrees,
431 packfile_size, ncommits, nobj_total, obj_deltify, nobj_written);
434 static const struct got_error *
435 add_meta(struct got_pack_meta *m, struct got_pack_metavec *v)
437 if (v->nmeta == v->metasz){
438 size_t newsize = 2 * v->metasz;
439 struct got_pack_meta **new;
440 new = reallocarray(v->meta, newsize, sizeof(*new));
441 if (new == NULL)
442 return got_error_from_errno("reallocarray");
443 v->meta = new;
444 v->metasz = newsize;
447 v->meta[v->nmeta++] = m;
448 return NULL;
451 static const struct got_error *
452 reuse_delta(int idx, struct got_pack_meta *m, struct got_pack_metavec *v,
453 struct got_object_idset *idset, struct got_pack *pack,
454 struct got_packidx *packidx, int delta_cache_fd,
455 struct got_repository *repo)
457 const struct got_error *err = NULL;
458 struct got_pack_meta *base = NULL;
459 struct got_object_id *base_obj_id = NULL;
460 off_t delta_len = 0, delta_offset = 0, delta_cache_offset = 0;
461 uint64_t base_size, result_size;
463 if (m->have_reused_delta)
464 return NULL;
466 err = got_object_read_raw_delta(&base_size, &result_size, &delta_len,
467 &delta_offset, &delta_cache_offset, &base_obj_id, delta_cache_fd,
468 packidx, idx, &m->id, repo);
469 if (err)
470 return err;
472 if (delta_offset + delta_len < delta_offset)
473 return got_error(GOT_ERR_BAD_PACKFILE);
475 base = got_object_idset_get(idset, base_obj_id);
476 if (base == NULL)
477 goto done;
479 m->delta_len = delta_len;
480 m->delta_offset = delta_cache_offset;
481 m->prev = base;
482 m->size = result_size;
483 m->have_reused_delta = 1;
484 m->reused_delta_offset = delta_offset;
485 m->base_obj_id = base_obj_id;
486 base_obj_id = NULL;
487 err = add_meta(m, v);
488 done:
489 free(base_obj_id);
490 return err;
493 static const struct got_error *
494 find_pack_for_reuse(struct got_packidx **best_packidx,
495 struct got_repository *repo)
497 const struct got_error *err = NULL;
498 struct got_pathlist_entry *pe;
499 const char *best_packidx_path = NULL;
500 int nobj_max = 0;
502 *best_packidx = NULL;
504 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
505 const char *path_packidx = pe->path;
506 struct got_packidx *packidx;
507 int nobj;
509 err = got_repo_get_packidx(&packidx, path_packidx, repo);
510 if (err)
511 break;
513 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
514 if (nobj > nobj_max) {
515 best_packidx_path = path_packidx;
516 nobj_max = nobj;
520 if (best_packidx_path) {
521 err = got_repo_get_packidx(best_packidx, best_packidx_path,
522 repo);
525 return err;
528 struct search_deltas_arg {
529 struct got_packidx *packidx;
530 struct got_pack *pack;
531 struct got_object_idset *idset;
532 struct got_pack_metavec *v;
533 int delta_cache_fd;
534 struct got_repository *repo;
535 got_pack_progress_cb progress_cb;
536 void *progress_arg;
537 struct got_ratelimit *rl;
538 got_cancel_cb cancel_cb;
539 void *cancel_arg;
540 int ncolored;
541 int nfound;
542 int ntrees;
543 int ncommits;
544 };
546 static const struct got_error *
547 search_delta_for_object(struct got_object_id *id, void *data, void *arg)
549 const struct got_error *err;
550 struct got_pack_meta *m = data;
551 struct search_deltas_arg *a = arg;
552 int obj_idx;
553 struct got_object *obj = NULL;
555 if (a->cancel_cb) {
556 err = (*a->cancel_cb)(a->cancel_arg);
557 if (err)
558 return err;
561 if (!got_repo_check_packidx_bloom_filter(a->repo,
562 a->packidx->path_packidx, id))
563 return NULL;
565 obj_idx = got_packidx_get_object_idx(a->packidx, id);
566 if (obj_idx == -1)
567 return NULL;
569 /* TODO:
570 * Opening and closing an object just to check its flags
571 * is a bit expensive. We could have an imsg which requests
572 * plain type/size information for an object without doing
573 * work such as traversing the object's entire delta chain
574 * to find the base object type, and other such info which
575 * we don't really need here.
576 */
577 err = got_object_open_from_packfile(&obj, &m->id, a->pack,
578 a->packidx, obj_idx, a->repo);
579 if (err)
580 return err;
582 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
583 reuse_delta(obj_idx, m, a->v, a->idset, a->pack, a->packidx,
584 a->delta_cache_fd, a->repo);
585 if (err)
586 goto done;
587 err = report_progress(a->progress_cb, a->progress_arg, a->rl,
588 a->ncolored, a->nfound, a->ntrees, 0L, a->ncommits,
589 got_object_idset_num_elements(a->idset), a->v->nmeta, 0);
591 done:
592 got_object_close(obj);
593 return err;
596 static const struct got_error *
597 search_deltas(struct got_pack_metavec *v, struct got_object_idset *idset,
598 int delta_cache_fd, int ncolored, int nfound, int ntrees, int ncommits,
599 struct got_repository *repo,
600 got_pack_progress_cb progress_cb, void *progress_arg,
601 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
603 const struct got_error *err = NULL;
604 char *path_packfile = NULL;
605 struct got_packidx *packidx;
606 struct got_pack *pack;
607 struct search_deltas_arg sda;
609 err = find_pack_for_reuse(&packidx, repo);
610 if (err)
611 return err;
613 if (packidx == NULL)
614 return NULL;
616 err = got_packidx_get_packfile_path(&path_packfile,
617 packidx->path_packidx);
618 if (err)
619 return err;
621 pack = got_repo_get_cached_pack(repo, path_packfile);
622 if (pack == NULL) {
623 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
624 if (err)
625 goto done;
628 sda.packidx = packidx;
629 sda.pack = pack;
630 sda.idset = idset;
631 sda.v = v;
632 sda.delta_cache_fd = delta_cache_fd;
633 sda.repo = repo;
634 sda.progress_cb = progress_cb;
635 sda.progress_arg = progress_arg;
636 sda.rl = rl;
637 sda.cancel_cb = cancel_cb;
638 sda.cancel_arg = cancel_arg;
639 sda.ncolored = ncolored;
640 sda.nfound = nfound;
641 sda.ntrees = ntrees;
642 sda.ncommits = ncommits;
643 err = got_object_idset_for_each(idset, search_delta_for_object, &sda);
644 done:
645 free(path_packfile);
646 return err;
649 static const struct got_error *
650 pick_deltas(struct got_pack_meta **meta, int nmeta, int ncolored,
651 int nfound, int ntrees, int ncommits, int nreused, FILE *delta_cache,
652 struct got_repository *repo,
653 got_pack_progress_cb progress_cb, void *progress_arg,
654 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
656 const struct got_error *err = NULL;
657 struct got_pack_meta *m = NULL, *base = NULL;
658 struct got_raw_object *raw = NULL, *base_raw = NULL;
659 struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
660 int i, j, ndeltas, best_ndeltas;
661 off_t size, best_size;
662 const int max_base_candidates = 3;
663 size_t delta_memsize = 0;
664 const size_t max_delta_memsize = 25 * GOT_DELTA_RESULT_SIZE_CACHED_MAX;
665 int outfd = -1;
667 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
668 for (i = 0; i < nmeta; i++) {
669 if (cancel_cb) {
670 err = (*cancel_cb)(cancel_arg);
671 if (err)
672 break;
674 err = report_progress(progress_cb, progress_arg, rl,
675 ncolored, nfound, ntrees, 0L, ncommits, nreused + nmeta,
676 nreused + i, 0);
677 if (err)
678 goto done;
679 m = meta[i];
681 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
682 m->obj_type == GOT_OBJ_TYPE_TAG)
683 continue;
685 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
686 if (err)
687 goto done;
688 m->size = raw->size;
690 if (raw->f == NULL) {
691 err = got_deltify_init_mem(&m->dtab, raw->data,
692 raw->hdrlen, raw->size + raw->hdrlen);
693 } else {
694 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
695 raw->size + raw->hdrlen);
697 if (err)
698 goto done;
700 if (i > max_base_candidates) {
701 struct got_pack_meta *n = NULL;
702 n = meta[i - (max_base_candidates + 1)];
703 got_deltify_free(n->dtab);
704 n->dtab = NULL;
707 best_size = raw->size;
708 best_ndeltas = 0;
709 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
710 if (cancel_cb) {
711 err = (*cancel_cb)(cancel_arg);
712 if (err)
713 goto done;
715 base = meta[j];
716 /* long chains make unpacking slow, avoid such bases */
717 if (base->nchain >= 128 ||
718 base->obj_type != m->obj_type)
719 continue;
721 err = got_object_raw_open(&base_raw, &outfd, repo,
722 &base->id);
723 if (err)
724 goto done;
726 if (raw->f == NULL && base_raw->f == NULL) {
727 err = got_deltify_mem_mem(&deltas, &ndeltas,
728 raw->data, raw->hdrlen,
729 raw->size + raw->hdrlen,
730 base->dtab, base_raw->data,
731 base_raw->hdrlen,
732 base_raw->size + base_raw->hdrlen);
733 } else if (raw->f == NULL) {
734 err = got_deltify_mem_file(&deltas, &ndeltas,
735 raw->data, raw->hdrlen,
736 raw->size + raw->hdrlen,
737 base->dtab, base_raw->f,
738 base_raw->hdrlen,
739 base_raw->size + base_raw->hdrlen);
740 } else if (base_raw->f == NULL) {
741 err = got_deltify_file_mem(&deltas, &ndeltas,
742 raw->f, raw->hdrlen,
743 raw->size + raw->hdrlen,
744 base->dtab, base_raw->data,
745 base_raw->hdrlen,
746 base_raw->size + base_raw->hdrlen);
747 } else {
748 err = got_deltify(&deltas, &ndeltas,
749 raw->f, raw->hdrlen,
750 raw->size + raw->hdrlen,
751 base->dtab, base_raw->f, base_raw->hdrlen,
752 base_raw->size + base_raw->hdrlen);
754 got_object_raw_close(base_raw);
755 base_raw = NULL;
756 if (err)
757 goto done;
759 size = delta_size(deltas, ndeltas);
760 if (size + 32 < best_size){
761 /*
762 * if we already picked a best delta,
763 * replace it.
764 */
765 best_size = size;
766 free(best_deltas);
767 best_deltas = deltas;
768 best_ndeltas = ndeltas;
769 deltas = NULL;
770 m->nchain = base->nchain + 1;
771 m->prev = base;
772 m->head = base->head;
773 if (m->head == NULL)
774 m->head = base;
775 } else {
776 free(deltas);
777 deltas = NULL;
778 ndeltas = 0;
782 if (best_ndeltas > 0) {
783 if (best_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX &&
784 delta_memsize + best_size <= max_delta_memsize) {
785 delta_memsize += best_size;
786 err = encode_delta_in_mem(m, raw, best_deltas,
787 best_ndeltas, best_size, m->prev->size);
788 } else {
789 m->delta_offset = ftello(delta_cache);
790 /*
791 * TODO:
792 * Storing compressed delta data in the delta
793 * cache file would probably be more efficient
794 * than writing uncompressed delta data here
795 * and compressing it while writing the pack
796 * file. This would also allow for reusing
797 * deltas in their compressed form.
798 */
799 err = encode_delta(m, raw, best_deltas,
800 best_ndeltas, m->prev->size, delta_cache);
802 free(best_deltas);
803 best_deltas = NULL;
804 best_ndeltas = 0;
805 if (err)
806 goto done;
809 got_object_raw_close(raw);
810 raw = NULL;
812 done:
813 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
814 got_deltify_free(meta[i]->dtab);
815 meta[i]->dtab = NULL;
817 if (raw)
818 got_object_raw_close(raw);
819 if (base_raw)
820 got_object_raw_close(base_raw);
821 if (outfd != -1 && close(outfd) == -1 && err == NULL)
822 err = got_error_from_errno("close");
823 free(deltas);
824 free(best_deltas);
825 return err;
828 static const struct got_error *
829 search_packidx(int *found, struct got_object_id *id,
830 struct got_repository *repo)
832 const struct got_error *err = NULL;
833 struct got_packidx *packidx = NULL;
834 int idx;
836 *found = 0;
838 err = got_repo_search_packidx(&packidx, &idx, repo, id);
839 if (err == NULL)
840 *found = 1; /* object is already packed */
841 else if (err->code == GOT_ERR_NO_OBJ)
842 err = NULL;
843 return err;
846 static const struct got_error *
847 add_object(int want_meta, struct got_object_idset *idset,
848 struct got_object_id *id, const char *path, int obj_type,
849 time_t mtime, int loose_obj_only, struct got_repository *repo,
850 int *ncolored, int *nfound, int *ntrees,
851 got_pack_progress_cb progress_cb, void *progress_arg,
852 struct got_ratelimit *rl)
854 const struct got_error *err;
855 struct got_pack_meta *m = NULL;
857 if (loose_obj_only) {
858 int is_packed;
859 err = search_packidx(&is_packed, id, repo);
860 if (err)
861 return err;
862 if (is_packed && want_meta)
863 return NULL;
866 if (want_meta) {
867 err = alloc_meta(&m, id, path, obj_type, mtime);
868 if (err)
869 return err;
871 (*nfound)++;
872 err = report_progress(progress_cb, progress_arg, rl,
873 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
874 if (err) {
875 clear_meta(m);
876 free(m);
877 return err;
881 err = got_object_idset_add(idset, id, m);
882 if (err) {
883 clear_meta(m);
884 free(m);
886 return err;
889 static const struct got_error *
890 load_tree_entries(struct got_object_id_queue *ids, int want_meta,
891 struct got_object_idset *idset, struct got_object_id *tree_id,
892 const char *dpath, time_t mtime, struct got_repository *repo,
893 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
894 got_pack_progress_cb progress_cb, void *progress_arg,
895 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
897 const struct got_error *err;
898 struct got_tree_object *tree;
899 char *p = NULL;
900 int i;
902 err = got_object_open_as_tree(&tree, repo, tree_id);
903 if (err)
904 return err;
906 (*ntrees)++;
907 err = report_progress(progress_cb, progress_arg, rl,
908 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
909 if (err)
910 return err;
912 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
913 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
914 struct got_object_id *id = got_tree_entry_get_id(e);
915 mode_t mode = got_tree_entry_get_mode(e);
917 if (cancel_cb) {
918 err = (*cancel_cb)(cancel_arg);
919 if (err)
920 break;
923 if (got_object_tree_entry_is_submodule(e) ||
924 got_object_idset_contains(idset, id))
925 continue;
927 if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
928 got_tree_entry_get_name(e)) == -1) {
929 err = got_error_from_errno("asprintf");
930 break;
933 if (S_ISDIR(mode)) {
934 struct got_object_qid *qid;
935 err = got_object_qid_alloc(&qid, id);
936 if (err)
937 break;
938 STAILQ_INSERT_TAIL(ids, qid, entry);
939 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
940 err = add_object(want_meta, idset, id, p,
941 GOT_OBJ_TYPE_BLOB, mtime, loose_obj_only, repo,
942 ncolored, nfound, ntrees,
943 progress_cb, progress_arg, rl);
944 if (err)
945 break;
947 free(p);
948 p = NULL;
951 got_object_tree_close(tree);
952 free(p);
953 return err;
956 static const struct got_error *
957 load_tree(int want_meta, struct got_object_idset *idset,
958 struct got_object_id *tree_id, const char *dpath, time_t mtime,
959 struct got_repository *repo, int loose_obj_only,
960 int *ncolored, int *nfound, int *ntrees,
961 got_pack_progress_cb progress_cb, void *progress_arg,
962 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
964 const struct got_error *err = NULL;
965 struct got_object_id_queue tree_ids;
966 struct got_object_qid *qid;
968 if (got_object_idset_contains(idset, tree_id))
969 return NULL;
971 err = got_object_qid_alloc(&qid, tree_id);
972 if (err)
973 return err;
975 STAILQ_INIT(&tree_ids);
976 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
978 while (!STAILQ_EMPTY(&tree_ids)) {
979 if (cancel_cb) {
980 err = (*cancel_cb)(cancel_arg);
981 if (err)
982 break;
985 qid = STAILQ_FIRST(&tree_ids);
986 STAILQ_REMOVE_HEAD(&tree_ids, entry);
988 if (got_object_idset_contains(idset, &qid->id)) {
989 got_object_qid_free(qid);
990 continue;
993 err = add_object(want_meta, idset, &qid->id, dpath,
994 GOT_OBJ_TYPE_TREE, mtime, loose_obj_only, repo,
995 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
996 if (err) {
997 got_object_qid_free(qid);
998 break;
1001 err = load_tree_entries(&tree_ids, want_meta, idset, &qid->id,
1002 dpath, mtime, repo, loose_obj_only, ncolored, nfound,
1003 ntrees, progress_cb, progress_arg, rl,
1004 cancel_cb, cancel_arg);
1005 got_object_qid_free(qid);
1006 if (err)
1007 break;
1010 got_object_id_queue_free(&tree_ids);
1011 return err;
1014 static const struct got_error *
1015 load_commit(int want_meta, struct got_object_idset *idset,
1016 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
1017 int *ncolored, int *nfound, int *ntrees,
1018 got_pack_progress_cb progress_cb, void *progress_arg,
1019 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1021 const struct got_error *err;
1022 struct got_commit_object *commit;
1024 if (got_object_idset_contains(idset, id))
1025 return NULL;
1027 if (loose_obj_only) {
1028 int is_packed;
1029 err = search_packidx(&is_packed, id, repo);
1030 if (err)
1031 return err;
1032 if (is_packed && want_meta)
1033 return NULL;
1036 err = got_object_open_as_commit(&commit, repo, id);
1037 if (err)
1038 return err;
1040 err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_COMMIT,
1041 got_object_commit_get_committer_time(commit),
1042 loose_obj_only, repo,
1043 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1044 if (err)
1045 goto done;
1047 err = load_tree(want_meta, idset, got_object_commit_get_tree_id(commit),
1048 "", got_object_commit_get_committer_time(commit),
1049 repo, loose_obj_only, ncolored, nfound, ntrees,
1050 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1051 done:
1052 got_object_commit_close(commit);
1053 return err;
1056 static const struct got_error *
1057 load_tag(int want_meta, struct got_object_idset *idset,
1058 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
1059 int *ncolored, int *nfound, int *ntrees,
1060 got_pack_progress_cb progress_cb, void *progress_arg,
1061 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1063 const struct got_error *err;
1064 struct got_tag_object *tag = NULL;
1066 if (got_object_idset_contains(idset, id))
1067 return NULL;
1069 if (loose_obj_only) {
1070 int is_packed;
1071 err = search_packidx(&is_packed, id, repo);
1072 if (err)
1073 return err;
1074 if (is_packed && want_meta)
1075 return NULL;
1078 err = got_object_open_as_tag(&tag, repo, id);
1079 if (err)
1080 return err;
1082 err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_TAG,
1083 got_object_tag_get_tagger_time(tag), loose_obj_only, repo,
1084 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1085 if (err)
1086 goto done;
1088 switch (got_object_tag_get_object_type(tag)) {
1089 case GOT_OBJ_TYPE_COMMIT:
1090 err = load_commit(want_meta, idset,
1091 got_object_tag_get_object_id(tag), repo, loose_obj_only,
1092 ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
1093 cancel_cb, cancel_arg);
1094 break;
1095 case GOT_OBJ_TYPE_TREE:
1096 err = load_tree(want_meta, idset,
1097 got_object_tag_get_object_id(tag), "",
1098 got_object_tag_get_tagger_time(tag), repo, loose_obj_only,
1099 ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
1100 cancel_cb, cancel_arg);
1101 break;
1102 default:
1103 break;
1106 done:
1107 got_object_tag_close(tag);
1108 return err;
1111 enum findtwixt_color {
1112 COLOR_KEEP = 0,
1113 COLOR_DROP,
1114 COLOR_BLANK,
1115 COLOR_SKIP,
1118 static const int findtwixt_colors[] = {
1119 COLOR_KEEP,
1120 COLOR_DROP,
1121 COLOR_BLANK,
1122 COLOR_SKIP,
1125 static const struct got_error *
1126 paint_commit(struct got_object_qid *qid, int color)
1128 if (color < 0 || color >= nitems(findtwixt_colors))
1129 return got_error(GOT_ERR_RANGE);
1131 qid->data = (void *)&findtwixt_colors[color];
1132 return NULL;
1135 static const struct got_error *
1136 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
1137 int color, struct got_repository *repo)
1139 const struct got_error *err;
1140 struct got_object_qid *qid;
1142 err = got_object_qid_alloc(&qid, id);
1143 if (err)
1144 return err;
1146 STAILQ_INSERT_TAIL(ids, qid, entry);
1147 return paint_commit(qid, color);
1150 struct append_id_arg {
1151 struct got_object_id **array;
1152 int idx;
1153 struct got_object_idset *drop;
1154 struct got_object_idset *skip;
1157 static const struct got_error *
1158 append_id(struct got_object_id *id, void *data, void *arg)
1160 struct append_id_arg *a = arg;
1162 if (got_object_idset_contains(a->skip, id) ||
1163 got_object_idset_contains(a->drop, id))
1164 return NULL;
1166 a->array[++a->idx] = got_object_id_dup(id);
1167 if (a->array[a->idx] == NULL)
1168 return got_error_from_errno("got_object_id_dup");
1170 return NULL;
1173 static const struct got_error *
1174 queue_commit_or_tag_id(struct got_object_id *id, int color,
1175 struct got_object_id_queue *ids, struct got_repository *repo)
1177 const struct got_error *err;
1178 struct got_tag_object *tag = NULL;
1179 int obj_type;
1181 err = got_object_get_type(&obj_type, repo, id);
1182 if (err)
1183 return err;
1185 if (obj_type == GOT_OBJ_TYPE_TAG) {
1186 err = got_object_open_as_tag(&tag, repo, id);
1187 if (err)
1188 return err;
1189 obj_type = got_object_tag_get_object_type(tag);
1190 id = got_object_tag_get_object_id(tag);
1193 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1194 err = queue_commit_id(ids, id, color, repo);
1195 if (err)
1196 goto done;
1198 done:
1199 if (tag)
1200 got_object_tag_close(tag);
1201 return err;
1204 static const struct got_error *
1205 paint_commits(int *ncolored, struct got_object_id_queue *ids, int nids,
1206 struct got_object_idset *keep, struct got_object_idset *drop,
1207 struct got_object_idset *skip, struct got_repository *repo,
1208 got_pack_progress_cb progress_cb, void *progress_arg,
1209 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1211 const struct got_error *err = NULL;
1212 struct got_commit_object *commit = NULL;
1213 const struct got_object_id_queue *parents;
1214 struct got_object_qid *qid;
1215 int nqueued = nids, nskip = 0;
1217 while (!STAILQ_EMPTY(ids) && nskip != nqueued) {
1218 int color;
1220 if (cancel_cb) {
1221 err = cancel_cb(cancel_arg);
1222 if (err)
1223 break;
1226 qid = STAILQ_FIRST(ids);
1227 STAILQ_REMOVE_HEAD(ids, entry);
1228 nqueued--;
1229 color = *((int *)qid->data);
1230 if (color == COLOR_SKIP)
1231 nskip--;
1233 if (got_object_idset_contains(skip, &qid->id)) {
1234 got_object_qid_free(qid);
1235 continue;
1238 switch (color) {
1239 case COLOR_KEEP:
1240 if (got_object_idset_contains(keep, &qid->id)) {
1241 got_object_qid_free(qid);
1242 continue;
1244 if (got_object_idset_contains(drop, &qid->id)) {
1245 err = paint_commit(qid, COLOR_SKIP);
1246 if (err)
1247 goto done;
1248 nskip++;
1249 } else
1250 (*ncolored)++;
1251 err = got_object_idset_add(keep, &qid->id, NULL);
1252 if (err)
1253 goto done;
1254 break;
1255 case COLOR_DROP:
1256 if (got_object_idset_contains(drop, &qid->id)) {
1257 got_object_qid_free(qid);
1258 continue;
1260 if (got_object_idset_contains(keep, &qid->id)) {
1261 err = paint_commit(qid, COLOR_SKIP);
1262 if (err)
1263 goto done;
1264 nskip++;
1265 } else
1266 (*ncolored)++;
1267 err = got_object_idset_add(drop, &qid->id, NULL);
1268 if (err)
1269 goto done;
1270 break;
1271 case COLOR_SKIP:
1272 if (!got_object_idset_contains(skip, &qid->id)) {
1273 err = got_object_idset_add(skip, &qid->id,
1274 NULL);
1275 if (err)
1276 goto done;
1278 break;
1279 default:
1280 /* should not happen */
1281 err = got_error_fmt(GOT_ERR_NOT_IMPL,
1282 "%s invalid commit color %d", __func__, color);
1283 goto done;
1286 err = report_progress(progress_cb, progress_arg, rl,
1287 *ncolored, 0, 0, 0L, 0, 0, 0, 0);
1288 if (err)
1289 break;
1292 err = got_object_open_as_commit(&commit, repo, &qid->id);
1293 if (err)
1294 break;
1296 parents = got_object_commit_get_parent_ids(commit);
1297 if (parents) {
1298 struct got_object_qid *pid;
1299 color = *((int *)qid->data);
1300 STAILQ_FOREACH(pid, parents, entry) {
1301 err = queue_commit_id(ids, &pid->id, color,
1302 repo);
1303 if (err)
1304 break;
1305 nqueued++;
1306 if (color == COLOR_SKIP)
1307 nskip++;
1311 got_object_commit_close(commit);
1312 commit = NULL;
1313 got_object_qid_free(qid);
1315 done:
1316 if (commit)
1317 got_object_commit_close(commit);
1318 return err;
1321 static const struct got_error *
1322 findtwixt(struct got_object_id ***res, int *nres, int *ncolored,
1323 struct got_object_id **head, int nhead,
1324 struct got_object_id **tail, int ntail,
1325 struct got_repository *repo,
1326 got_pack_progress_cb progress_cb, void *progress_arg,
1327 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1329 const struct got_error *err = NULL;
1330 struct got_object_id_queue ids;
1331 struct got_object_idset *keep, *drop, *skip = NULL;
1332 int i, nkeep;
1334 STAILQ_INIT(&ids);
1335 *res = NULL;
1336 *nres = 0;
1337 *ncolored = 0;
1339 keep = got_object_idset_alloc();
1340 if (keep == NULL)
1341 return got_error_from_errno("got_object_idset_alloc");
1343 drop = got_object_idset_alloc();
1344 if (drop == NULL) {
1345 err = got_error_from_errno("got_object_idset_alloc");
1346 goto done;
1349 skip = got_object_idset_alloc();
1350 if (skip == NULL) {
1351 err = got_error_from_errno("got_object_idset_alloc");
1352 goto done;
1355 for (i = 0; i < nhead; i++) {
1356 struct got_object_id *id = head[i];
1357 if (id == NULL)
1358 continue;
1359 err = queue_commit_or_tag_id(id, COLOR_KEEP, &ids, repo);
1360 if (err)
1361 goto done;
1364 for (i = 0; i < ntail; i++) {
1365 struct got_object_id *id = tail[i];
1366 if (id == NULL)
1367 continue;
1368 err = queue_commit_or_tag_id(id, COLOR_DROP, &ids, repo);
1369 if (err)
1370 goto done;
1373 err = paint_commits(ncolored, &ids, nhead + ntail,
1374 keep, drop, skip, repo, progress_cb, progress_arg, rl,
1375 cancel_cb, cancel_arg);
1376 if (err)
1377 goto done;
1379 nkeep = got_object_idset_num_elements(keep);
1380 if (nkeep > 0) {
1381 struct append_id_arg arg;
1382 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
1383 if (arg.array == NULL) {
1384 err = got_error_from_errno("calloc");
1385 goto done;
1387 arg.idx = -1;
1388 arg.skip = skip;
1389 arg.drop = drop;
1390 err = got_object_idset_for_each(keep, append_id, &arg);
1391 if (err) {
1392 free(arg.array);
1393 goto done;
1395 *res = arg.array;
1396 *nres = arg.idx + 1;
1398 done:
1399 got_object_idset_free(keep);
1400 got_object_idset_free(drop);
1401 if (skip)
1402 got_object_idset_free(skip);
1403 got_object_id_queue_free(&ids);
1404 return err;
1407 static const struct got_error *
1408 load_object_ids(int *ncolored, int *nfound, int *ntrees,
1409 struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
1410 struct got_object_id **ours, int nours, struct got_repository *repo,
1411 int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
1412 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1414 const struct got_error *err = NULL;
1415 struct got_object_id **ids = NULL;
1416 int i, nobj = 0, obj_type;
1418 *ncolored = 0;
1419 *nfound = 0;
1420 *ntrees = 0;
1422 err = findtwixt(&ids, &nobj, ncolored, ours, nours, theirs, ntheirs,
1423 repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1424 if (err || nobj == 0)
1425 goto done;
1427 for (i = 0; i < ntheirs; i++) {
1428 struct got_object_id *id = theirs[i];
1429 if (id == NULL)
1430 continue;
1431 err = got_object_get_type(&obj_type, repo, id);
1432 if (err)
1433 return err;
1434 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1435 err = load_commit(0, idset, id, repo,
1436 loose_obj_only, ncolored, nfound, ntrees,
1437 progress_cb, progress_arg, rl,
1438 cancel_cb, cancel_arg);
1439 if (err)
1440 goto done;
1441 } else if (obj_type == GOT_OBJ_TYPE_TAG) {
1442 err = load_tag(0, idset, id, repo,
1443 loose_obj_only, ncolored, nfound, ntrees,
1444 progress_cb, progress_arg, rl,
1445 cancel_cb, cancel_arg);
1446 if (err)
1447 goto done;
1451 for (i = 0; i < nobj; i++) {
1452 err = load_commit(1, idset, ids[i], repo, loose_obj_only,
1453 ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
1454 cancel_cb, cancel_arg);
1455 if (err)
1456 goto done;
1459 for (i = 0; i < nours; i++) {
1460 struct got_object_id *id = ours[i];
1461 struct got_pack_meta *m;
1462 if (id == NULL)
1463 continue;
1464 m = got_object_idset_get(idset, id);
1465 if (m == NULL) {
1466 err = got_object_get_type(&obj_type, repo, id);
1467 if (err)
1468 goto done;
1469 } else
1470 obj_type = m->obj_type;
1471 if (obj_type != GOT_OBJ_TYPE_TAG)
1472 continue;
1473 err = load_tag(1, idset, id, repo, loose_obj_only,
1474 ncolored, nfound, ntrees, progress_cb, progress_arg, rl,
1475 cancel_cb, cancel_arg);
1476 if (err)
1477 goto done;
1479 done:
1480 for (i = 0; i < nobj; i++) {
1481 free(ids[i]);
1483 free(ids);
1484 return err;
1487 const struct got_error *
1488 hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx)
1490 size_t n;
1492 SHA1Update(ctx, buf, len);
1493 n = fwrite(buf, 1, len, f);
1494 if (n != len)
1495 return got_ferror(f, GOT_ERR_IO);
1496 return NULL;
1499 static void
1500 putbe32(char *b, uint32_t n)
1502 b[0] = n >> 24;
1503 b[1] = n >> 16;
1504 b[2] = n >> 8;
1505 b[3] = n >> 0;
1508 static int
1509 write_order_cmp(const void *pa, const void *pb)
1511 struct got_pack_meta *a, *b, *ahd, *bhd;
1513 a = *(struct got_pack_meta **)pa;
1514 b = *(struct got_pack_meta **)pb;
1515 ahd = (a->head == NULL) ? a : a->head;
1516 bhd = (b->head == NULL) ? b : b->head;
1517 if (ahd->mtime != bhd->mtime)
1518 return bhd->mtime - ahd->mtime;
1519 if (ahd != bhd)
1520 return (uintptr_t)bhd - (uintptr_t)ahd;
1521 if (a->nchain != b->nchain)
1522 return a->nchain - b->nchain;
1523 return a->mtime - b->mtime;
1526 static int
1527 reuse_write_order_cmp(const void *pa, const void *pb)
1529 struct got_pack_meta *a, *b;
1531 a = *(struct got_pack_meta **)pa;
1532 b = *(struct got_pack_meta **)pb;
1534 if (a->reused_delta_offset < b->reused_delta_offset)
1535 return -1;
1536 if (a->reused_delta_offset > b->reused_delta_offset)
1537 return 1;
1538 return 0;
1541 static const struct got_error *
1542 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
1544 size_t i;
1546 *hdrlen = 0;
1548 hdr[0] = obj_type << 4;
1549 hdr[0] |= len & 0xf;
1550 len >>= 4;
1551 for (i = 1; len != 0; i++){
1552 if (i >= bufsize)
1553 return got_error(GOT_ERR_NO_SPACE);
1554 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1555 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1556 len >>= GOT_DELTA_SIZE_SHIFT;
1559 *hdrlen = i;
1560 return NULL;
1563 static int
1564 packoff(char *hdr, off_t off)
1566 int i, j;
1567 char rbuf[8];
1569 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1570 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1571 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1572 GOT_DELTA_SIZE_MORE;
1575 j = 0;
1576 while (i > 0)
1577 hdr[j++] = rbuf[--i];
1578 return j;
1581 static const struct got_error *
1582 deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *packfile,
1583 struct got_pack_meta *m)
1585 const struct got_error *err;
1586 char buf[32];
1587 int nh;
1589 if (m->prev->off != 0) {
1590 err = packhdr(&nh, buf, sizeof(buf),
1591 GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
1592 if (err)
1593 return err;
1594 nh += packoff(buf + nh, m->off - m->prev->off);
1595 err = hwrite(packfile, buf, nh, ctx);
1596 if (err)
1597 return err;
1598 *packfile_size += nh;
1599 } else {
1600 err = packhdr(&nh, buf, sizeof(buf),
1601 GOT_OBJ_TYPE_REF_DELTA, m->delta_len);
1602 if (err)
1603 return err;
1604 err = hwrite(packfile, buf, nh, ctx);
1605 if (err)
1606 return err;
1607 *packfile_size += nh;
1608 err = hwrite(packfile, m->prev->id.sha1,
1609 sizeof(m->prev->id.sha1), ctx);
1610 if (err)
1611 return err;
1612 *packfile_size += sizeof(m->prev->id.sha1);
1615 return NULL;
1618 static const struct got_error *
1619 write_packed_object(off_t *packfile_size, FILE *packfile,
1620 FILE *delta_cache, struct got_pack_meta *m, int *outfd,
1621 SHA1_CTX *ctx, struct got_repository *repo)
1623 const struct got_error *err = NULL;
1624 struct got_deflate_checksum csum;
1625 char buf[32];
1626 int nh;
1627 struct got_raw_object *raw = NULL;
1628 off_t outlen;
1630 csum.output_sha1 = ctx;
1631 csum.output_crc = NULL;
1633 m->off = ftello(packfile);
1634 if (m->delta_len == 0) {
1635 err = got_object_raw_open(&raw, outfd, repo, &m->id);
1636 if (err)
1637 goto done;
1638 err = packhdr(&nh, buf, sizeof(buf),
1639 m->obj_type, raw->size);
1640 if (err)
1641 goto done;
1642 err = hwrite(packfile, buf, nh, ctx);
1643 if (err)
1644 goto done;
1645 *packfile_size += nh;
1646 if (raw->f == NULL) {
1647 err = got_deflate_to_file_mmap(&outlen,
1648 raw->data + raw->hdrlen, 0, raw->size,
1649 packfile, &csum);
1650 if (err)
1651 goto done;
1652 } else {
1653 if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
1654 == -1) {
1655 err = got_error_from_errno("fseeko");
1656 goto done;
1658 err = got_deflate_to_file(&outlen, raw->f,
1659 raw->size, packfile, &csum);
1660 if (err)
1661 goto done;
1663 *packfile_size += outlen;
1664 got_object_raw_close(raw);
1665 raw = NULL;
1666 } else if (m->delta_buf) {
1667 err = deltahdr(packfile_size, ctx, packfile, m);
1668 if (err)
1669 goto done;
1670 err = got_deflate_to_file_mmap(&outlen,
1671 m->delta_buf, 0, m->delta_len, packfile, &csum);
1672 if (err)
1673 goto done;
1674 *packfile_size += outlen;
1675 free(m->delta_buf);
1676 m->delta_buf = NULL;
1677 } else {
1678 if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
1679 == -1) {
1680 err = got_error_from_errno("fseeko");
1681 goto done;
1683 err = deltahdr(packfile_size, ctx, packfile, m);
1684 if (err)
1685 goto done;
1686 err = got_deflate_to_file(&outlen, delta_cache,
1687 m->delta_len, packfile, &csum);
1688 if (err)
1689 goto done;
1690 *packfile_size += outlen;
1692 done:
1693 if (raw)
1694 got_object_raw_close(raw);
1695 return err;
1698 static const struct got_error *
1699 genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache,
1700 struct got_pack_meta **deltify, int ndeltify,
1701 struct got_pack_meta **reuse, int nreuse,
1702 int ncolored, int nfound, int ntrees, int nours,
1703 struct got_repository *repo,
1704 got_pack_progress_cb progress_cb, void *progress_arg,
1705 struct got_ratelimit *rl,
1706 got_cancel_cb cancel_cb, void *cancel_arg)
1708 const struct got_error *err = NULL;
1709 int i;
1710 SHA1_CTX ctx;
1711 struct got_pack_meta *m;
1712 char buf[32];
1713 size_t n;
1714 off_t packfile_size = 0;
1715 int outfd = -1;
1717 SHA1Init(&ctx);
1719 err = hwrite(packfile, "PACK", 4, &ctx);
1720 if (err)
1721 return err;
1722 putbe32(buf, GOT_PACKFILE_VERSION);
1723 err = hwrite(packfile, buf, 4, &ctx);
1724 if (err)
1725 goto done;
1726 putbe32(buf, ndeltify + nreuse);
1727 err = hwrite(packfile, buf, 4, &ctx);
1728 if (err)
1729 goto done;
1731 qsort(deltify, ndeltify, sizeof(struct got_pack_meta *),
1732 write_order_cmp);
1733 for (i = 0; i < ndeltify; i++) {
1734 err = report_progress(progress_cb, progress_arg, rl,
1735 ncolored, nfound, ntrees, packfile_size, nours,
1736 ndeltify + nreuse, ndeltify + nreuse, i);
1737 if (err)
1738 goto done;
1739 m = deltify[i];
1740 err = write_packed_object(&packfile_size, packfile,
1741 delta_cache, m, &outfd, &ctx, repo);
1742 if (err)
1743 goto done;
1746 qsort(reuse, nreuse, sizeof(struct got_pack_meta *),
1747 reuse_write_order_cmp);
1748 for (i = 0; i < nreuse; i++) {
1749 err = report_progress(progress_cb, progress_arg, rl,
1750 ncolored, nfound, ntrees, packfile_size, nours,
1751 ndeltify + nreuse, ndeltify + nreuse, ndeltify + i);
1752 if (err)
1753 goto done;
1754 m = reuse[i];
1755 err = write_packed_object(&packfile_size, packfile,
1756 delta_cache, m, &outfd, &ctx, repo);
1757 if (err)
1758 goto done;
1761 SHA1Final(pack_sha1, &ctx);
1762 n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
1763 if (n != SHA1_DIGEST_LENGTH)
1764 err = got_ferror(packfile, GOT_ERR_IO);
1765 packfile_size += SHA1_DIGEST_LENGTH;
1766 packfile_size += sizeof(struct got_packfile_hdr);
1767 if (progress_cb) {
1768 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1769 packfile_size, nours, ndeltify + nreuse,
1770 ndeltify + nreuse, ndeltify + nreuse);
1771 if (err)
1772 goto done;
1774 done:
1775 if (outfd != -1 && close(outfd) == -1 && err == NULL)
1776 err = got_error_from_errno("close");
1777 return err;
1780 static const struct got_error *
1781 remove_unused_object(struct got_object_id *id, void *data, void *arg)
1783 struct got_object_idset *idset = arg;
1785 if (data == NULL)
1786 got_object_idset_remove(NULL, idset, id);
1788 return NULL;
1791 static const struct got_error *
1792 remove_reused_object(struct got_object_id *id, void *data, void *arg)
1794 struct got_object_idset *idset = arg;
1795 struct got_pack_meta *m = data;
1797 if (m->have_reused_delta)
1798 got_object_idset_remove(NULL, idset, id);
1800 return NULL;
1803 static const struct got_error *
1804 add_meta_idset_cb(struct got_object_id *id, void *data, void *arg)
1806 struct got_pack_meta *m = data;
1807 struct got_pack_metavec *v = arg;
1809 return add_meta(m, v);
1812 const struct got_error *
1813 got_pack_create(uint8_t *packsha1, FILE *packfile,
1814 struct got_object_id **theirs, int ntheirs,
1815 struct got_object_id **ours, int nours,
1816 struct got_repository *repo, int loose_obj_only, int allow_empty,
1817 got_pack_progress_cb progress_cb, void *progress_arg,
1818 got_cancel_cb cancel_cb, void *cancel_arg)
1820 const struct got_error *err;
1821 int delta_cache_fd = -1;
1822 FILE *delta_cache = NULL;
1823 struct got_object_idset *idset;
1824 struct got_ratelimit rl;
1825 struct got_pack_metavec deltify, reuse;
1826 int ncolored = 0, nfound = 0, ntrees = 0;
1828 memset(&deltify, 0, sizeof(deltify));
1829 memset(&reuse, 0, sizeof(reuse));
1831 got_ratelimit_init(&rl, 0, 500);
1833 idset = got_object_idset_alloc();
1834 if (idset == NULL)
1835 return got_error_from_errno("got_object_idset_alloc");
1837 err = load_object_ids(&ncolored, &nfound, &ntrees, idset, theirs,
1838 ntheirs, ours, nours, repo, loose_obj_only,
1839 progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
1840 if (err)
1841 return err;
1843 err = got_object_idset_for_each(idset, remove_unused_object, idset);
1844 if (err)
1845 goto done;
1847 if (progress_cb) {
1848 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1849 0L, nours, got_object_idset_num_elements(idset), 0, 0);
1850 if (err)
1851 goto done;
1854 if (got_object_idset_num_elements(idset) == 0 && !allow_empty) {
1855 err = got_error(GOT_ERR_CANNOT_PACK);
1856 goto done;
1859 delta_cache_fd = got_opentempfd();
1860 if (delta_cache_fd == -1) {
1861 err = got_error_from_errno("got_opentemp");
1862 goto done;
1865 reuse.metasz = 64;
1866 reuse.meta = calloc(reuse.metasz,
1867 sizeof(struct got_pack_meta *));
1868 if (reuse.meta == NULL) {
1869 err = got_error_from_errno("calloc");
1870 goto done;
1873 err = search_deltas(&reuse, idset, delta_cache_fd, ncolored, nfound,
1874 ntrees, nours, repo, progress_cb, progress_arg, &rl,
1875 cancel_cb, cancel_arg);
1876 if (err)
1877 goto done;
1878 if (reuse.nmeta > 0) {
1879 err = got_object_idset_for_each(idset,
1880 remove_reused_object, idset);
1881 if (err)
1882 goto done;
1885 delta_cache = fdopen(delta_cache_fd, "a+");
1886 if (delta_cache == NULL) {
1887 err = got_error_from_errno("fdopen");
1888 goto done;
1890 delta_cache_fd = -1;
1892 if (fseeko(delta_cache, 0L, SEEK_END) == -1) {
1893 err = got_error_from_errno("fseeko");
1894 goto done;
1897 deltify.meta = calloc(got_object_idset_num_elements(idset),
1898 sizeof(struct got_pack_meta *));
1899 if (deltify.meta == NULL) {
1900 err = got_error_from_errno("calloc");
1901 goto done;
1903 deltify.metasz = got_object_idset_num_elements(idset);
1905 err = got_object_idset_for_each(idset, add_meta_idset_cb, &deltify);
1906 if (err)
1907 goto done;
1908 if (deltify.nmeta > 0) {
1909 err = pick_deltas(deltify.meta, deltify.nmeta, ncolored,
1910 nfound, ntrees, nours, reuse.nmeta, delta_cache, repo,
1911 progress_cb, progress_arg, &rl, cancel_cb, cancel_arg);
1912 if (err)
1913 goto done;
1914 if (fseeko(delta_cache, 0L, SEEK_SET) == -1) {
1915 err = got_error_from_errno("fseeko");
1916 goto done;
1920 err = genpack(packsha1, packfile, delta_cache, deltify.meta,
1921 deltify.nmeta, reuse.meta, reuse.nmeta, ncolored, nfound, ntrees,
1922 nours, repo, progress_cb, progress_arg, &rl,
1923 cancel_cb, cancel_arg);
1924 if (err)
1925 goto done;
1926 done:
1927 free_nmeta(deltify.meta, deltify.nmeta);
1928 free_nmeta(reuse.meta, reuse.nmeta);
1929 got_object_idset_free(idset);
1930 if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL)
1931 err = got_error_from_errno("close");
1932 if (delta_cache && fclose(delta_cache) == EOF && err == NULL)
1933 err = got_error_from_errno("fclose");
1934 return err;