Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 /*
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
178 */
179 struct context_vec {
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
184 };
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void range(FILE *, int, int, char *);
190 static void uni_range(FILE *, int, int);
191 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
192 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
193 static void prune(struct got_diff_state *);
194 static void equiv(struct line *, int, struct line *, int, int *);
195 static void unravel(struct got_diff_state *, int);
196 static int unsort(struct line *, int, int *);
197 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
198 static void sort(struct line *, int);
199 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
200 static int asciifile(FILE *);
201 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
202 static int newcand(struct got_diff_state *, int, int, int, int *);
203 static int search(struct got_diff_state *, int *, int, int);
204 static int skipline(FILE *);
205 static int isqrt(int);
206 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
207 static int readhash(struct got_diff_state *, FILE *, int);
208 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
209 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
211 /*
212 * chrtran points to one of 2 translation tables: cup2low if folding upper to
213 * lower case clow2low if not folding case
214 */
215 u_char clow2low[256] = {
216 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
217 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
218 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
219 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
220 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
221 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
222 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
223 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
224 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
225 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
226 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
227 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
228 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
229 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
230 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
231 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
232 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
233 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
234 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
235 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
236 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
237 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
238 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
239 0xfd, 0xfe, 0xff
240 };
242 u_char cup2low[256] = {
243 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
244 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
245 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
246 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
247 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
248 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
249 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
250 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
251 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
252 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
253 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
254 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
255 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
256 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
257 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
258 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
259 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
260 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
261 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
262 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
263 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
264 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
265 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
266 0xfd, 0xfe, 0xff
267 };
269 static void
270 diff_output(FILE *outfile, const char *fmt, ...)
272 va_list ap;
274 va_start(ap, fmt);
275 vfprintf(outfile, fmt, ap);
276 va_end(ap);
279 const struct got_error *
280 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
281 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
283 const struct got_error *err = NULL;
284 int i, *p;
285 long *lp;
287 *rval = D_SAME;
288 ds->anychange = 0;
289 ds->lastline = 0;
290 ds->lastmatchline = 0;
291 ds->context_vec_ptr = ds->context_vec_start - 1;
292 ds->max_context = 64;
293 if (flags & D_IGNORECASE)
294 ds->chrtran = cup2low;
295 else
296 ds->chrtran = clow2low;
297 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
298 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
299 return NULL;
301 if (flags & D_EMPTY1) {
302 f1 = fopen(_PATH_DEVNULL, "r");
303 if (f1 == NULL) {
304 err = got_error_from_errno();
305 goto closem;
308 else if (f1 == NULL) {
309 args->status |= 2;
310 goto closem;
313 if (flags & D_EMPTY2) {
314 f2 = fopen(_PATH_DEVNULL, "r");
315 if (f2 == NULL) {
316 err = got_error_from_errno();
317 goto closem;
319 } else if (f2 == NULL) {
320 args->status |= 2;
321 goto closem;
324 switch (files_differ(ds, f1, f2, flags)) {
325 case 0:
326 goto closem;
327 case 1:
328 break;
329 default:
330 /* error */
331 args->status |= 2;
332 goto closem;
335 if ((flags & D_FORCEASCII) == 0 &&
336 (!asciifile(f1) || !asciifile(f2))) {
337 *rval = D_BINARY;
338 args->status |= 1;
339 goto closem;
341 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
342 err = got_error_from_errno();
343 goto closem;
345 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
346 err = got_error_from_errno();
347 goto closem;
350 prune(ds);
351 sort(ds->sfile[0], ds->slen[0]);
352 sort(ds->sfile[1], ds->slen[1]);
354 ds->member = (int *)ds->file[1];
355 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
356 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
357 if (p == NULL) {
358 err = got_error_from_errno();
359 goto closem;
361 ds->member = p;
363 ds->class = (int *)ds->file[0];
364 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
365 err = got_error_from_errno();
366 goto closem;
368 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
369 if (p == NULL) {
370 err = got_error_from_errno();
371 goto closem;
373 ds->class = p;
375 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
376 if (ds->klist == NULL) {
377 err = got_error_from_errno();
378 goto closem;
380 ds->clen = 0;
381 ds->clistlen = 100;
382 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
383 if (ds->clist == NULL) {
384 err = got_error_from_errno();
385 goto closem;
387 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
388 if (i < 0) {
389 err = got_error_from_errno();
390 goto closem;
393 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
394 if (p == NULL) {
395 err = got_error_from_errno();
396 goto closem;
398 ds->J = p;
399 unravel(ds, ds->klist[i]);
401 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
402 if (lp == NULL) {
403 err = got_error_from_errno();
404 goto closem;
406 ds->ixold = lp;
407 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
408 if (lp == NULL) {
409 err = got_error_from_errno();
410 goto closem;
412 ds->ixnew = lp;
413 check(ds, f1, f2, flags);
414 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
415 flags))
416 err = got_error_from_errno();
417 closem:
418 free(ds->J);
419 free(ds->member);
420 free(ds->class);
421 free(ds->clist);
422 free(ds->klist);
423 free(ds->ixold);
424 free(ds->ixnew);
425 if (ds->anychange) {
426 args->status |= 1;
427 if (*rval == D_SAME)
428 *rval = D_DIFFER;
430 if (f1 != NULL)
431 fclose(f1);
432 if (f2 != NULL)
433 fclose(f2);
435 return (err);
438 /*
439 * Check to see if the given files differ.
440 * Returns 0 if they are the same, 1 if different, and -1 on error.
441 * XXX - could use code from cmp(1) [faster]
442 */
443 static int
444 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
446 char buf1[BUFSIZ], buf2[BUFSIZ];
447 size_t i, j;
449 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
450 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
451 return (1);
452 for (;;) {
453 i = fread(buf1, 1, sizeof(buf1), f1);
454 j = fread(buf2, 1, sizeof(buf2), f2);
455 if ((!i && ferror(f1)) || (!j && ferror(f2)))
456 return (-1);
457 if (i != j)
458 return (1);
459 if (i == 0)
460 return (0);
461 if (memcmp(buf1, buf2, i) != 0)
462 return (1);
466 static int
467 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
469 struct line *p, *q;
470 int j, h;
471 size_t sz;
473 rewind(fd);
475 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
476 if (sz < 100)
477 sz = 100;
479 p = calloc(sz + 3, sizeof(*p));
480 if (p == NULL)
481 return (-1);
482 for (j = 0; (h = readhash(ds, fd, flags));) {
483 if (j == sz) {
484 sz = sz * 3 / 2;
485 q = reallocarray(p, sz + 3, sizeof(*p));
486 if (q == NULL) {
487 free(p);
488 return (-1);
490 p = q;
492 p[++j].value = h;
494 ds->len[i] = j;
495 ds->file[i] = p;
497 return (0);
500 static void
501 prune(struct got_diff_state *ds)
503 int i, j;
505 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
506 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
507 ds->pref++)
509 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
510 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
511 ds->suff++)
513 for (j = 0; j < 2; j++) {
514 ds->sfile[j] = ds->file[j] + ds->pref;
515 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
516 for (i = 0; i <= ds->slen[j]; i++)
517 ds->sfile[j][i].serial = i;
521 static void
522 equiv(struct line *a, int n, struct line *b, int m, int *c)
524 int i, j;
526 i = j = 1;
527 while (i <= n && j <= m) {
528 if (a[i].value < b[j].value)
529 a[i++].value = 0;
530 else if (a[i].value == b[j].value)
531 a[i++].value = j;
532 else
533 j++;
535 while (i <= n)
536 a[i++].value = 0;
537 b[m + 1].value = 0;
538 j = 0;
539 while (++j <= m) {
540 c[j] = -b[j].serial;
541 while (b[j + 1].value == b[j].value) {
542 j++;
543 c[j] = b[j].serial;
546 c[j] = -1;
549 /* Code taken from ping.c */
550 static int
551 isqrt(int n)
553 int y, x = 1;
555 if (n == 0)
556 return (0);
558 do { /* newton was a stinker */
559 y = x;
560 x = n / x;
561 x += y;
562 x /= 2;
563 } while ((x - y) > 1 || (x - y) < -1);
565 return (x);
568 static int
569 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
571 int i, k, y, j, l;
572 int oldc, tc, oldl, sq;
573 u_int numtries, bound;
574 int error;
576 if (flags & D_MINIMAL)
577 bound = UINT_MAX;
578 else {
579 sq = isqrt(n);
580 bound = MAXIMUM(256, sq);
583 k = 0;
584 c[0] = newcand(ds, 0, 0, 0, &error);
585 if (error)
586 return -1;
587 for (i = 1; i <= n; i++) {
588 j = a[i];
589 if (j == 0)
590 continue;
591 y = -b[j];
592 oldl = 0;
593 oldc = c[0];
594 numtries = 0;
595 do {
596 if (y <= ds->clist[oldc].y)
597 continue;
598 l = search(ds, c, k, y);
599 if (l != oldl + 1)
600 oldc = c[l - 1];
601 if (l <= k) {
602 if (ds->clist[c[l]].y <= y)
603 continue;
604 tc = c[l];
605 c[l] = newcand(ds, i, y, oldc, &error);
606 if (error)
607 return -1;
608 oldc = tc;
609 oldl = l;
610 numtries++;
611 } else {
612 c[l] = newcand(ds, i, y, oldc, &error);
613 if (error)
614 return -1;
615 k++;
616 break;
618 } while ((y = b[++j]) > 0 && numtries < bound);
620 return (k);
623 static int
624 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
626 struct cand *q;
628 if (ds->clen == ds->clistlen) {
629 ds->clistlen = ds->clistlen * 11 / 10;
630 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
631 if (q == NULL) {
632 *errorp = -1;
633 free(ds->clist);
634 ds->clist = NULL;
635 return 0;
637 ds->clist = q;
639 q = ds->clist + ds->clen;
640 q->x = x;
641 q->y = y;
642 q->pred = pred;
643 *errorp = 0;
644 return (ds->clen++);
647 static int
648 search(struct got_diff_state *ds, int *c, int k, int y)
650 int i, j, l, t;
652 if (ds->clist[c[k]].y < y) /* quick look for typical case */
653 return (k + 1);
654 i = 0;
655 j = k + 1;
656 for (;;) {
657 l = (i + j) / 2;
658 if (l <= i)
659 break;
660 t = ds->clist[c[l]].y;
661 if (t > y)
662 j = l;
663 else if (t < y)
664 i = l;
665 else
666 return (l);
668 return (l + 1);
671 static void
672 unravel(struct got_diff_state *ds, int p)
674 struct cand *q;
675 int i;
677 for (i = 0; i <= ds->len[0]; i++)
678 ds->J[i] = i <= ds->pref ? i :
679 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
680 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
681 ds->J[q->x + ds->pref] = q->y + ds->pref;
684 /*
685 * Check does double duty:
686 * 1. ferret out any fortuitous correspondences due
687 * to confounding by hashing (which result in "jackpot")
688 * 2. collect random access indexes to the two files
689 */
690 static void
691 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
693 int i, j, jackpot, c, d;
694 long ctold, ctnew;
696 rewind(f1);
697 rewind(f2);
698 j = 1;
699 ds->ixold[0] = ds->ixnew[0] = 0;
700 jackpot = 0;
701 ctold = ctnew = 0;
702 for (i = 1; i <= ds->len[0]; i++) {
703 if (ds->J[i] == 0) {
704 ds->ixold[i] = ctold += skipline(f1);
705 continue;
707 while (j < ds->J[i]) {
708 ds->ixnew[j] = ctnew += skipline(f2);
709 j++;
711 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
712 for (;;) {
713 c = getc(f1);
714 d = getc(f2);
715 /*
716 * GNU diff ignores a missing newline
717 * in one file for -b or -w.
718 */
719 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
720 if (c == EOF && d == '\n') {
721 ctnew++;
722 break;
723 } else if (c == '\n' && d == EOF) {
724 ctold++;
725 break;
728 ctold++;
729 ctnew++;
730 if ((flags & D_FOLDBLANKS) && isspace(c) &&
731 isspace(d)) {
732 do {
733 if (c == '\n')
734 break;
735 ctold++;
736 } while (isspace(c = getc(f1)));
737 do {
738 if (d == '\n')
739 break;
740 ctnew++;
741 } while (isspace(d = getc(f2)));
742 } else if ((flags & D_IGNOREBLANKS)) {
743 while (isspace(c) && c != '\n') {
744 c = getc(f1);
745 ctold++;
747 while (isspace(d) && d != '\n') {
748 d = getc(f2);
749 ctnew++;
752 if (ds->chrtran[c] != ds->chrtran[d]) {
753 jackpot++;
754 ds->J[i] = 0;
755 if (c != '\n' && c != EOF)
756 ctold += skipline(f1);
757 if (d != '\n' && c != EOF)
758 ctnew += skipline(f2);
759 break;
761 if (c == '\n' || c == EOF)
762 break;
764 } else {
765 for (;;) {
766 ctold++;
767 ctnew++;
768 if ((c = getc(f1)) != (d = getc(f2))) {
769 /* jackpot++; */
770 ds->J[i] = 0;
771 if (c != '\n' && c != EOF)
772 ctold += skipline(f1);
773 if (d != '\n' && c != EOF)
774 ctnew += skipline(f2);
775 break;
777 if (c == '\n' || c == EOF)
778 break;
781 ds->ixold[i] = ctold;
782 ds->ixnew[j] = ctnew;
783 j++;
785 for (; j <= ds->len[1]; j++)
786 ds->ixnew[j] = ctnew += skipline(f2);
787 /*
788 * if (jackpot)
789 * fprintf(stderr, "jackpot\n");
790 */
793 /* shellsort CACM #201 */
794 static void
795 sort(struct line *a, int n)
797 struct line *ai, *aim, w;
798 int j, m = 0, k;
800 if (n == 0)
801 return;
802 for (j = 1; j <= n; j *= 2)
803 m = 2 * j - 1;
804 for (m /= 2; m != 0; m /= 2) {
805 k = n - m;
806 for (j = 1; j <= k; j++) {
807 for (ai = &a[j]; ai > a; ai -= m) {
808 aim = &ai[m];
809 if (aim < ai)
810 break; /* wraparound */
811 if (aim->value > ai[0].value ||
812 (aim->value == ai[0].value &&
813 aim->serial > ai[0].serial))
814 break;
815 w.value = ai[0].value;
816 ai[0].value = aim->value;
817 aim->value = w.value;
818 w.serial = ai[0].serial;
819 ai[0].serial = aim->serial;
820 aim->serial = w.serial;
826 static int
827 unsort(struct line *f, int l, int *b)
829 int *a, i;
831 a = calloc(l + 1, sizeof(*a));
832 if (a == NULL)
833 return (-1);
834 for (i = 1; i <= l; i++)
835 a[f[i].serial] = f[i].value;
836 for (i = 1; i <= l; i++)
837 b[i] = a[i];
838 free(a);
840 return (0);
843 static int
844 skipline(FILE *f)
846 int i, c;
848 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849 continue;
850 return (i);
853 static int
854 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
855 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
857 int m, i0, i1, j0, j1;
858 int error = 0;
860 rewind(f1);
861 rewind(f2);
862 m = ds->len[0];
863 ds->J[0] = 0;
864 ds->J[m + 1] = ds->len[1] + 1;
865 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
866 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
867 i0++;
868 j0 = ds->J[i0 - 1] + 1;
869 i1 = i0 - 1;
870 while (i1 < m && ds->J[i1 + 1] == 0)
871 i1++;
872 j1 = ds->J[i1 + 1] - 1;
873 ds->J[i1] = j1;
874 error = change(outfile, ds, args, file1, f1, file2, f2,
875 i0, i1, j0, j1, &flags);
876 if (error)
877 return (error);
879 if (m == 0) {
880 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
881 1, ds->len[1], &flags);
882 if (error)
883 return (error);
885 if (args->diff_format == D_IFDEF) {
886 for (;;) {
887 #define c i0
888 if ((c = getc(f1)) == EOF)
889 return (0);
890 diff_output(outfile, "%c", c);
892 #undef c
894 if (ds->anychange != 0)
895 dump_unified_vec(outfile, ds, args, f1, f2, flags);
897 return (0);
900 static void
901 range(FILE *outfile, int a, int b, char *separator)
903 diff_output(outfile, "%d", a > b ? b : a);
904 if (a < b)
905 diff_output(outfile, "%s%d", separator, b);
908 static void
909 uni_range(FILE *outfile, int a, int b)
911 if (a < b)
912 diff_output(outfile, "%d,%d", a, b - a + 1);
913 else if (a == b)
914 diff_output(outfile, "%d", b);
915 else
916 diff_output(outfile, "%d,0", b);
919 /*
920 * Indicate that there is a difference between lines a and b of the from file
921 * to get to lines c to d of the to file. If a is greater then b then there
922 * are no lines in the from file involved and this means that there were
923 * lines appended (beginning at b). If c is greater than d then there are
924 * lines missing from the to file.
925 */
926 static int
927 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
928 const char *file1, FILE *f1, const char *file2, FILE *f2,
929 int a, int b, int c, int d, int *pflags)
931 int i;
933 if (args->diff_format != D_IFDEF && a > b && c > d)
934 return (0);
936 if (*pflags & D_HEADER) {
937 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
938 *pflags &= ~D_HEADER;
940 if (args->diff_format == D_UNIFIED) {
941 /*
942 * Allocate change records as needed.
943 */
944 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
945 struct context_vec *cvp;
946 ptrdiff_t offset;
947 offset = ds->context_vec_ptr - ds->context_vec_start;
948 ds->max_context <<= 1;
949 ds->context_vec_start =
950 cvp = reallocarray(ds->context_vec_start,
951 ds->max_context, sizeof(*ds->context_vec_start));
952 if (cvp == NULL) {
953 free(ds->context_vec_start);
954 return (-1);
956 ds->context_vec_start = cvp;
957 ds->context_vec_end = ds->context_vec_start +
958 ds->max_context;
959 ds->context_vec_ptr = ds->context_vec_start + offset;
961 if (ds->anychange == 0) {
962 /*
963 * Print the context/unidiff header first time through.
964 */
965 print_header(outfile, ds, args, file1, file2);
966 ds->anychange = 1;
967 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
968 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
969 /*
970 * If this change is more than 'diff_context' lines from the
971 * previous change, dump the record and reset it.
972 */
973 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
975 ds->context_vec_ptr++;
976 ds->context_vec_ptr->a = a;
977 ds->context_vec_ptr->b = b;
978 ds->context_vec_ptr->c = c;
979 ds->context_vec_ptr->d = d;
980 return (0);
982 if (ds->anychange == 0)
983 ds->anychange = 1;
984 switch (args->diff_format) {
985 case D_BRIEF:
986 return (0);
987 case D_REVERSE:
988 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
989 range(outfile, a, b, " ");
990 diff_output(outfile, "\n");
991 break;
992 case D_NREVERSE:
993 if (a > b)
994 diff_output(outfile, "a%d %d\n", b, d - c + 1);
995 else {
996 diff_output(outfile, "d%d %d\n", a, b - a + 1);
997 if (!(c > d))
998 /* add changed lines */
999 diff_output(outfile, "a%d %d\n", b, d - c + 1);
1001 break;
1003 if (args->diff_format == D_IFDEF)
1004 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1005 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
1006 if (args->diff_format == D_REVERSE && c <= d)
1007 diff_output(outfile, ".\n");
1008 if (ds->inifdef) {
1009 diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1010 ds->inifdef = 0;
1013 return (0);
1016 static int
1017 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1018 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1020 int i, j, c, lastc, col, nc;
1023 * When doing #ifdef's, copy down to current line
1024 * if this is the first file, so that stuff makes it to output.
1026 if (args->diff_format == D_IFDEF && oldfile) {
1027 long curpos = ftell(lb);
1028 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1029 nc = f[a > b ? b : a - 1] - curpos;
1030 for (i = 0; i < nc; i++)
1031 diff_output(outfile, "%c", getc(lb));
1033 if (a > b)
1034 return (0);
1035 if (args->diff_format == D_IFDEF) {
1036 if (ds->inifdef) {
1037 diff_output(outfile, "#else /* %s%s */\n",
1038 oldfile == 1 ? "!" : "", args->ifdefname);
1039 } else {
1040 if (oldfile)
1041 diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1042 else
1043 diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1045 ds->inifdef = 1 + oldfile;
1047 for (i = a; i <= b; i++) {
1048 fseek(lb, f[i - 1], SEEK_SET);
1049 nc = f[i] - f[i - 1];
1050 if (args->diff_format != D_IFDEF && ch != '\0') {
1051 diff_output(outfile, "%c", ch);
1052 if (args->Tflag && args->diff_format == D_UNIFIED)
1053 diff_output(outfile, "\t");
1054 else if (args->diff_format != D_UNIFIED)
1055 diff_output(outfile, " ");
1057 col = 0;
1058 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1059 if ((c = getc(lb)) == EOF) {
1060 if (args->diff_format == D_REVERSE ||
1061 args->diff_format == D_NREVERSE)
1062 warnx("No newline at end of file");
1063 else
1064 diff_output(outfile, "\n\\ No newline at end of "
1065 "file\n");
1066 return (0);
1068 if (c == '\t' && (flags & D_EXPANDTABS)) {
1069 do {
1070 diff_output(outfile, " ");
1071 } while (++col & 7);
1072 } else {
1073 diff_output(outfile, "%c", c);
1074 col++;
1078 return (0);
1082 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1084 static int
1085 readhash(struct got_diff_state *ds, FILE *f, int flags)
1087 int i, t, space;
1088 int sum;
1090 sum = 1;
1091 space = 0;
1092 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1093 if (flags & D_IGNORECASE)
1094 for (i = 0; (t = getc(f)) != '\n'; i++) {
1095 if (t == EOF) {
1096 if (i == 0)
1097 return (0);
1098 break;
1100 sum = sum * 127 + ds->chrtran[t];
1102 else
1103 for (i = 0; (t = getc(f)) != '\n'; i++) {
1104 if (t == EOF) {
1105 if (i == 0)
1106 return (0);
1107 break;
1109 sum = sum * 127 + t;
1111 } else {
1112 for (i = 0;;) {
1113 switch (t = getc(f)) {
1114 case '\t':
1115 case '\r':
1116 case '\v':
1117 case '\f':
1118 case ' ':
1119 space++;
1120 continue;
1121 default:
1122 if (space && (flags & D_IGNOREBLANKS) == 0) {
1123 i++;
1124 space = 0;
1126 sum = sum * 127 + ds->chrtran[t];
1127 i++;
1128 continue;
1129 case EOF:
1130 if (i == 0)
1131 return (0);
1132 /* FALLTHROUGH */
1133 case '\n':
1134 break;
1136 break;
1140 * There is a remote possibility that we end up with a zero sum.
1141 * Zero is used as an EOF marker, so return 1 instead.
1143 return (sum == 0 ? 1 : sum);
1146 static int
1147 asciifile(FILE *f)
1149 unsigned char buf[BUFSIZ];
1150 size_t cnt;
1152 if (f == NULL)
1153 return (1);
1155 rewind(f);
1156 cnt = fread(buf, 1, sizeof(buf), f);
1157 return (memchr(buf, '\0', cnt) == NULL);
1160 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1162 static char *
1163 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1165 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1166 size_t nc;
1167 int last = ds->lastline;
1168 char *state = NULL;
1170 ds->lastline = pos;
1171 while (pos > last) {
1172 fseek(fp, f[pos - 1], SEEK_SET);
1173 nc = f[pos] - f[pos - 1];
1174 if (nc >= sizeof(buf))
1175 nc = sizeof(buf) - 1;
1176 nc = fread(buf, 1, nc, fp);
1177 if (nc > 0) {
1178 buf[nc] = '\0';
1179 buf[strcspn(buf, "\n")] = '\0';
1180 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1181 if (begins_with(buf, "private:")) {
1182 if (!state)
1183 state = " (private)";
1184 } else if (begins_with(buf, "protected:")) {
1185 if (!state)
1186 state = " (protected)";
1187 } else if (begins_with(buf, "public:")) {
1188 if (!state)
1189 state = " (public)";
1190 } else {
1191 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1192 if (state)
1193 strlcat(ds->lastbuf, state,
1194 sizeof ds->lastbuf);
1195 ds->lastmatchline = pos;
1196 return ds->lastbuf;
1200 pos--;
1202 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1205 /* dump accumulated "unified" diff changes */
1206 static void
1207 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1208 FILE *f1, FILE *f2, int flags)
1210 struct context_vec *cvp = ds->context_vec_start;
1211 int lowa, upb, lowc, upd;
1212 int a, b, c, d;
1213 char ch, *f;
1215 if (ds->context_vec_start > ds->context_vec_ptr)
1216 return;
1218 b = d = 0; /* gcc */
1219 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1220 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1221 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1222 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1224 diff_output(outfile, "@@ -");
1225 uni_range(outfile, lowa, upb);
1226 diff_output(outfile, " +");
1227 uni_range(outfile, lowc, upd);
1228 diff_output(outfile, " @@");
1229 if ((flags & D_PROTOTYPE)) {
1230 f = match_function(ds, ds->ixold, lowa-1, f1);
1231 if (f != NULL)
1232 diff_output(outfile, " %s", f);
1234 diff_output(outfile, "\n");
1237 * Output changes in "unified" diff format--the old and new lines
1238 * are printed together.
1240 for (; cvp <= ds->context_vec_ptr; cvp++) {
1241 a = cvp->a;
1242 b = cvp->b;
1243 c = cvp->c;
1244 d = cvp->d;
1247 * c: both new and old changes
1248 * d: only changes in the old file
1249 * a: only changes in the new file
1251 if (a <= b && c <= d)
1252 ch = 'c';
1253 else
1254 ch = (a <= b) ? 'd' : 'a';
1256 switch (ch) {
1257 case 'c':
1258 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1259 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1260 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1261 break;
1262 case 'd':
1263 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1264 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1265 break;
1266 case 'a':
1267 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1268 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1269 break;
1271 lowa = b + 1;
1272 lowc = d + 1;
1274 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1276 ds->context_vec_ptr = ds->context_vec_start - 1;
1279 static void
1280 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1281 const char *file1, const char *file2)
1283 if (args->label[0] != NULL)
1284 diff_output(outfile, "--- %s\n", args->label[0]);
1285 else
1286 diff_output(outfile, "--- %s\t%s", file1,
1287 ctime(&ds->stb1.st_mtime));
1288 if (args->label[1] != NULL)
1289 diff_output(outfile, "+++ %s\n", args->label[1]);
1290 else
1291 diff_output(outfile, "+++ %s\t%s", file2,
1292 ctime(&ds->stb2.st_mtime));