Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
39 struct got_blame_line {
40 int annotated;
41 struct got_object_id id;
42 };
44 struct got_blame_diff_offsets {
45 struct got_diffoffset_chunks *chunks;
46 struct got_object_id *commit_id;
47 SLIST_ENTRY(got_blame_diff_offsets) entry;
48 };
50 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
52 struct got_blame {
53 FILE *f;
54 size_t nlines;
55 struct got_blame_line *lines; /* one per line */
56 int ncommits;
57 struct got_blame_diff_offsets_list diff_offsets_list;
58 };
60 static void
61 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
62 {
63 if (diff_offsets->chunks)
64 got_diffoffset_free(diff_offsets->chunks);
65 free(diff_offsets->commit_id);
66 free(diff_offsets);
67 }
69 static const struct got_error *
70 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
71 struct got_object_id *commit_id)
72 {
73 const struct got_error *err = NULL;
75 *diff_offsets = calloc(1, sizeof(**diff_offsets));
76 if (*diff_offsets == NULL)
77 return got_error_from_errno();
79 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
80 if ((*diff_offsets)->commit_id == NULL) {
81 err = got_error_from_errno();
82 free_diff_offsets(*diff_offsets);
83 *diff_offsets = NULL;
84 return err;
85 }
87 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
88 if (err) {
89 free_diff_offsets(*diff_offsets);
90 return err;
91 }
93 return NULL;
94 }
96 static const struct got_error *
97 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
98 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
99 void *arg)
101 const struct got_error *err = NULL;
102 struct got_blame_line *line;
104 if (lineno < 1 || lineno > blame->nlines)
105 return got_error(GOT_ERR_RANGE);
107 line = &blame->lines[lineno - 1];
108 if (line->annotated)
109 return NULL;
111 memcpy(&line->id, id, sizeof(line->id));
112 line->annotated = 1;
113 if (cb)
114 err = cb(arg, blame->nlines, lineno, id);
115 return err;
118 static int
119 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
120 int lineno)
122 struct got_blame_diff_offsets *diff_offsets;
124 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
125 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
127 return lineno;
130 static const struct got_error *
131 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
132 struct got_object_id *commit_id,
133 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
134 void *arg)
136 const struct got_error *err = NULL;
137 struct got_diff_change *change;
138 struct got_blame_diff_offsets *diff_offsets;
140 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
141 int c = change->cv.c;
142 int d = change->cv.d;
143 int new_lineno = c;
144 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
145 int ln;
147 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
148 err = annotate_line(blame,
149 get_blamed_line(&blame->diff_offsets_list, ln),
150 commit_id, cb, arg);
151 if (err)
152 return err;
156 err = alloc_diff_offsets(&diff_offsets, commit_id);
157 if (err)
158 return err;
159 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
160 int a = change->cv.a;
161 int b = change->cv.b;
162 int c = change->cv.c;
163 int d = change->cv.d;
164 int old_lineno = a;
165 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
166 int new_lineno = c;
167 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
169 err = got_diffoffset_add(diff_offsets->chunks,
170 old_lineno, old_length, new_lineno, new_length);
171 if (err) {
172 free_diff_offsets(diff_offsets);
173 return err;
176 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
178 return NULL;
181 static const struct got_error *
182 blame_commit(struct got_blame *blame, struct got_object_id *id,
183 struct got_object_id *pid, const char *path, struct got_repository *repo,
184 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
185 void *arg)
187 const struct got_error *err = NULL;
188 struct got_object *obj = NULL, *pobj = NULL;
189 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
190 struct got_blob_object *blob = NULL, *pblob = NULL;
191 struct got_diff_changes *changes = NULL;
193 err = got_object_id_by_path(&obj_id, repo, id, path);
194 if (err)
195 goto done;
197 err = got_object_open(&obj, repo, obj_id);
198 if (err)
199 goto done;
201 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
202 err = got_error(GOT_ERR_OBJ_TYPE);
203 goto done;
206 err = got_object_id_by_path(&pobj_id, repo, pid, path);
207 if (err) {
208 if (err->code == GOT_ERR_NO_OBJ) {
209 /* Blob's history began in previous commit. */
210 err = got_error(GOT_ERR_ITER_COMPLETED);
212 goto done;
215 /* If IDs match then don't bother with diffing. */
216 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
217 if (cb)
218 err = cb(arg, blame->nlines, -1, id);
219 goto done;
222 err = got_object_open(&pobj, repo, pobj_id);
223 if (err)
224 goto done;
226 if (got_object_get_type(pobj) != GOT_OBJ_TYPE_BLOB) {
227 /*
228 * Encountered a non-blob at the path (probably a tree).
229 * Blob's history began in previous commit.
230 */
231 err = got_error(GOT_ERR_ITER_COMPLETED);
232 goto done;
235 err = got_object_blob_open(&blob, repo, obj, 8192);
236 if (err)
237 goto done;
239 err = got_object_blob_open(&pblob, repo, pobj, 8192);
240 if (err)
241 goto done;
243 err = got_diff_blob_lines_changed(&changes, pblob, blob);
244 if (err)
245 goto done;
247 if (changes) {
248 err = blame_changes(blame, changes, id, cb, arg);
249 got_diff_free_changes(changes);
250 } else if (cb)
251 err = cb(arg, blame->nlines, -1, id);
252 done:
253 free(obj_id);
254 free(pobj_id);
255 if (obj)
256 got_object_close(obj);
257 if (pobj)
258 got_object_close(pobj);
259 if (blob)
260 got_object_blob_close(blob);
261 if (pblob)
262 got_object_blob_close(pblob);
263 return err;
266 static void
267 blame_close(struct got_blame *blame)
269 struct got_blame_diff_offsets *diff_offsets;
271 if (blame->f)
272 fclose(blame->f);
273 free(blame->lines);
274 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
275 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
276 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
277 free_diff_offsets(diff_offsets);
279 free(blame);
282 static const struct got_error *
283 blame_open(struct got_blame **blamep, const char *path,
284 struct got_object_id *start_commit_id, struct got_repository *repo,
285 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
286 void *arg)
288 const struct got_error *err = NULL;
289 struct got_object *obj = NULL;
290 struct got_object_id *obj_id = NULL;
291 struct got_blob_object *blob = NULL;
292 struct got_blame *blame = NULL;
293 struct got_commit_object *commit = NULL;
294 struct got_object_id *id = NULL;
295 int lineno;
297 *blamep = NULL;
299 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
300 if (err)
301 return err;
303 err = got_object_open(&obj, repo, obj_id);
304 if (err)
305 goto done;
307 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
308 err = got_error(GOT_ERR_OBJ_TYPE);
309 goto done;
312 err = got_object_blob_open(&blob, repo, obj, 8192);
313 if (err)
314 goto done;
316 blame = calloc(1, sizeof(*blame));
317 if (blame == NULL)
318 return got_error_from_errno();
320 blame->f = got_opentemp();
321 if (blame->f == NULL) {
322 err = got_error_from_errno();
323 goto done;
325 err = got_object_blob_dump_to_file(NULL, &blame->nlines, blame->f,
326 blob);
327 if (err)
328 goto done;
330 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
331 if (blame->lines == NULL) {
332 err = got_error_from_errno();
333 goto done;
336 /* Loop over first-parent history and try to blame commits. */
337 /* TODO: Iterate commits via commit graph instead. */
338 err = got_object_open_as_commit(&commit, repo, start_commit_id);
339 if (err)
340 goto done;
341 id = got_object_id_dup(start_commit_id);
342 if (id == NULL) {
343 err = got_error_from_errno();
344 goto done;
346 while (1) {
347 struct got_object_qid *pid;
349 pid = SIMPLEQ_FIRST(&commit->parent_ids);
350 if (pid == NULL)
351 break;
353 err = blame_commit(blame, id, pid->id, path, repo, cb, arg);
354 if (err) {
355 if (err->code == GOT_ERR_ITER_COMPLETED)
356 err = NULL;
357 break;
360 free(id);
361 id = got_object_id_dup(pid->id);
362 if (id == NULL) {
363 err = got_error_from_errno();
364 goto done;
366 got_object_commit_close(commit);
367 err = got_object_open_as_commit(&commit, repo, id);
368 if (err)
369 goto done;
372 /* Annotate remaining non-annotated lines with last commit. */
373 for (lineno = 1; lineno <= blame->nlines; lineno++) {
374 err = annotate_line(blame, lineno, id, cb, arg);
375 if (err)
376 goto done;
379 done:
380 free(obj_id);
381 if (obj)
382 got_object_close(obj);
383 if (blob)
384 got_object_blob_close(blob);
385 if (commit)
386 got_object_commit_close(commit);
387 if (err) {
388 if (blame)
389 blame_close(blame);
390 } else
391 *blamep = blame;
393 return err;
396 static const struct got_error *
397 blame_line(struct got_object_id **id, struct got_blame *blame, int lineno)
399 if (lineno < 1 || lineno > blame->nlines)
400 return got_error(GOT_ERR_RANGE);
401 *id = &blame->lines[lineno - 1].id;
402 return NULL;
405 static char *
406 parse_next_line(FILE *f, size_t *len)
408 char *line;
409 size_t linelen;
410 size_t lineno;
411 const char delim[3] = { '\0', '\0', '\0'};
413 line = fparseln(f, &linelen, &lineno, delim, 0);
414 if (len)
415 *len = linelen;
416 return line;
419 const struct got_error *
420 got_blame(const char *path, struct got_object_id *start_commit_id,
421 struct got_repository *repo, FILE *outfile)
423 const struct got_error *err = NULL;
424 struct got_blame *blame;
425 int lineno;
426 char *abspath;
428 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
429 return got_error_from_errno();
431 err = blame_open(&blame, abspath, start_commit_id, repo, NULL, NULL);
432 if (err) {
433 free(abspath);
434 return err;
437 for (lineno = 1; lineno <= blame->nlines; lineno++) {
438 struct got_object_id *id;
439 char *line, *id_str;
441 line = parse_next_line(blame->f, NULL);
442 if (line == NULL)
443 break;
445 err = blame_line(&id, blame, lineno);
446 if (err) {
447 free(line);
448 break;
451 err = got_object_id_str(&id_str, id);
452 /* Do not free id; It points into blame->lines. */
453 if (err) {
454 free(line);
455 break;
458 fprintf(outfile, "%.8s %s\n", id_str, line);
459 free(line);
460 free(id_str);
463 blame_close(blame);
464 free(abspath);
465 return err;
468 const struct got_error *
469 got_blame_incremental(const char *path, struct got_object_id *commit_id,
470 struct got_repository *repo,
471 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
472 void *arg)
474 const struct got_error *err = NULL;
475 struct got_blame *blame;
476 char *abspath;
478 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
479 return got_error_from_errno();
481 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
482 free(abspath);
483 if (blame)
484 blame_close(blame);
485 return err;