1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
86 #include "got_error.h"
87 #include "got_object.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 #define diff_output printf
187 static FILE *opentemp(const char *);
188 static void output(char *, FILE *, char *, FILE *, int);
189 static void check(FILE *, FILE *, int);
190 static void range(int, int, char *);
191 static void uni_range(int, int);
192 static void dump_context_vec(FILE *, FILE *, int);
193 static void dump_unified_vec(FILE *, FILE *, int);
194 static void prepare(int, FILE *, off_t, int);
195 static void prune(void);
196 static void equiv(struct line *, int, struct line *, int, int *);
197 static void unravel(int);
198 static void unsort(struct line *, int, int *);
199 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
200 static void sort(struct line *, int);
201 static void print_header(const char *, const char *);
202 static int ignoreline(char *);
203 static int asciifile(FILE *);
204 static int fetch(long *, int, int, FILE *, int, int, int);
205 static int newcand(int, int, int);
206 static int search(int *, int, int);
207 static int skipline(FILE *);
208 static int isqrt(int);
209 static int stone(int *, int, int *, int *, int);
210 static int readhash(FILE *, int);
211 static int files_differ(FILE *, FILE *, int);
212 static char *match_function(const long *, int, FILE *);
213 static char *preadline(int, size_t, off_t);
215 static int *J; /* will be overlaid on class */
216 static int *class; /* will be overlaid on file[0] */
217 static int *klist; /* will be overlaid on file[0] after class */
218 static int *member; /* will be overlaid on file[1] */
220 static int inifdef; /* whether or not we are in a #ifdef block */
222 static int pref, suff; /* length of prefix and suffix */
224 static int anychange;
225 static long *ixnew; /* will be overlaid on file[1] */
226 static long *ixold; /* will be overlaid on klist */
227 static struct cand *clist; /* merely a free storage pot for candidates */
228 static int clistlen; /* the length of clist */
229 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
230 static u_char *chrtran; /* translation table for case-folding */
231 static struct context_vec *context_vec_start;
232 static struct context_vec *context_vec_end;
233 static struct context_vec *context_vec_ptr;
235 #define FUNCTION_CONTEXT_SIZE 55
236 static char lastbuf[FUNCTION_CONTEXT_SIZE];
238 static int lastmatchline;
240 int Nflag, Pflag, rflag, sflag, Tflag;
241 int diff_format, diff_context, status;
242 char *start, *ifdefname, *diffargs, *label[2], *ignore_pats;
243 struct stat stb1, stb2;
247 * chrtran points to one of 2 translation tables: cup2low if folding upper to
248 * lower case clow2low if not folding case
250 u_char clow2low[256] = {
251 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
252 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
253 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
254 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
255 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
256 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
257 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
258 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
259 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
260 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
261 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
262 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
263 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
264 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
265 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
266 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
267 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
268 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
269 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
270 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
271 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
272 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
273 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
277 u_char cup2low[256] = {
278 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
279 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
280 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
281 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
282 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
283 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
284 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
285 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
286 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
287 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
288 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
289 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
290 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
291 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
292 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
293 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
294 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
295 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
296 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
297 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
298 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
299 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
300 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
304 const struct got_error *
305 got_diffreg(int *rval, char *file1, char *file2, int flags)
307 const struct got_error *err = NULL;
311 diff_format = D_UNIFIED;
313 if (strcmp(file1, "-") == 0)
314 fstat(STDIN_FILENO, &stb1);
315 else if (stat(file1, &stb1) != 0)
316 return got_error(GOT_ERR_BAD_PATH);
318 if (strcmp(file2, "-") == 0)
319 fstat(STDIN_FILENO, &stb2);
320 else if (stat(file2, &stb2) != 0)
321 return got_error(GOT_ERR_BAD_PATH);
328 context_vec_ptr = context_vec_start - 1;
329 if (flags & D_IGNORECASE)
333 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) {
334 *rval = (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
337 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
340 if (flags & D_EMPTY1)
341 f1 = fopen(_PATH_DEVNULL, "r");
343 if (!S_ISREG(stb1.st_mode)) {
344 if ((f1 = opentemp(file1)) == NULL ||
345 fstat(fileno(f1), &stb1) < 0) {
349 } else if (strcmp(file1, "-") == 0)
352 f1 = fopen(file1, "r");
359 if (flags & D_EMPTY2)
360 f2 = fopen(_PATH_DEVNULL, "r");
362 if (!S_ISREG(stb2.st_mode)) {
363 if ((f2 = opentemp(file2)) == NULL ||
364 fstat(fileno(f2), &stb2) < 0) {
368 } else if (strcmp(file2, "-") == 0)
371 f2 = fopen(file2, "r");
378 switch (files_differ(f1, f2, flags)) {
389 if ((flags & D_FORCEASCII) == 0 &&
390 (!asciifile(f1) || !asciifile(f2))) {
395 prepare(0, f1, stb1.st_size, flags);
396 prepare(1, f2, stb2.st_size, flags);
399 sort(sfile[0], slen[0]);
400 sort(sfile[1], slen[1]);
402 member = (int *)file[1];
403 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
404 member = reallocarray(member, slen[1] + 2, sizeof(*member));
405 if (member == NULL) {
406 err = got_error(GOT_ERR_NO_MEM);
410 class = (int *)file[0];
411 unsort(sfile[0], slen[0], class);
412 class = reallocarray(class, slen[0] + 2, sizeof(*class));
414 err = got_error(GOT_ERR_NO_MEM);
418 klist = calloc(slen[0] + 2, sizeof(*klist));
420 err = got_error(GOT_ERR_NO_MEM);
425 clist = calloc(clistlen, sizeof(*clist));
427 err = got_error(GOT_ERR_NO_MEM);
430 i = stone(class, slen[0], member, klist, flags);
434 J = reallocarray(J, len[0] + 2, sizeof(*J));
436 err = got_error(GOT_ERR_NO_MEM);
445 ixold = reallocarray(ixold, len[0] + 2, sizeof(*ixold));
447 err = got_error(GOT_ERR_NO_MEM);
450 ixnew = reallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
452 err = got_error(GOT_ERR_NO_MEM);
455 check(f1, f2, flags);
456 output(file1, f1, file2, f2, flags);
472 * Check to see if the given files differ.
473 * Returns 0 if they are the same, 1 if different, and -1 on error.
474 * XXX - could use code from cmp(1) [faster]
477 files_differ(FILE *f1, FILE *f2, int flags)
479 char buf1[BUFSIZ], buf2[BUFSIZ];
482 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
486 i = fread(buf1, 1, sizeof(buf1), f1);
487 j = fread(buf2, 1, sizeof(buf2), f2);
488 if ((!i && ferror(f1)) || (!j && ferror(f2)))
494 if (memcmp(buf1, buf2, i) != 0)
500 opentemp(const char *file)
502 char buf[BUFSIZ], tempfile[PATH_MAX];
506 if (strcmp(file, "-") == 0)
508 else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
511 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
513 if ((ofd = mkstemp(tempfile)) < 0) {
518 while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
519 if (write(ofd, buf, nread) != nread) {
526 lseek(ofd, (off_t)0, SEEK_SET);
527 return (fdopen(ofd, "r"));
531 splice(char *dir, char *file)
536 dirlen = strlen(dir);
537 while (dirlen != 0 && dir[dirlen - 1] == '/')
539 if ((tail = strrchr(file, '/')) == NULL)
543 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
548 prepare(int i, FILE *fd, off_t filesize, int flags)
556 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
560 p = xcalloc(sz + 3, sizeof(*p));
561 for (j = 0; (h = readhash(fd, flags));) {
564 p = xreallocarray(p, sz + 3, sizeof(*p));
577 for (pref = 0; pref < len[0] && pref < len[1] &&
578 file[0][pref + 1].value == file[1][pref + 1].value;
581 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
582 file[0][len[0] - suff].value == file[1][len[1] - suff].value;
585 for (j = 0; j < 2; j++) {
586 sfile[j] = file[j] + pref;
587 slen[j] = len[j] - pref - suff;
588 for (i = 0; i <= slen[j]; i++)
589 sfile[j][i].serial = i;
594 equiv(struct line *a, int n, struct line *b, int m, int *c)
599 while (i <= n && j <= m) {
600 if (a[i].value < b[j].value)
602 else if (a[i].value == b[j].value)
613 while (b[j + 1].value == b[j].value) {
621 /* Code taken from ping.c */
630 do { /* newton was a stinker */
635 } while ((x - y) > 1 || (x - y) < -1);
641 stone(int *a, int n, int *b, int *c, int flags)
644 int oldc, tc, oldl, sq;
645 u_int numtries, bound;
647 if (flags & D_MINIMAL)
651 bound = MAXIMUM(256, sq);
655 c[0] = newcand(0, 0, 0);
656 for (i = 1; i <= n; i++) {
665 if (y <= clist[oldc].y)
671 if (clist[c[l]].y <= y)
674 c[l] = newcand(i, y, oldc);
679 c[l] = newcand(i, y, oldc);
683 } while ((y = b[++j]) > 0 && numtries < bound);
689 newcand(int x, int y, int pred)
693 if (clen == clistlen) {
694 clistlen = clistlen * 11 / 10;
695 clist = xreallocarray(clist, clistlen, sizeof(*clist));
705 search(int *c, int k, int y)
709 if (clist[c[k]].y < y) /* quick look for typical case */
734 for (i = 0; i <= len[0]; i++)
735 J[i] = i <= pref ? i :
736 i > len[0] - suff ? i + len[1] - len[0] : 0;
737 for (q = clist + p; q->y != 0; q = clist + q->pred)
738 J[q->x + pref] = q->y + pref;
742 * Check does double duty:
743 * 1. ferret out any fortuitous correspondences due
744 * to confounding by hashing (which result in "jackpot")
745 * 2. collect random access indexes to the two files
748 check(FILE *f1, FILE *f2, int flags)
750 int i, j, jackpot, c, d;
756 ixold[0] = ixnew[0] = 0;
759 for (i = 1; i <= len[0]; i++) {
761 ixold[i] = ctold += skipline(f1);
765 ixnew[j] = ctnew += skipline(f2);
768 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
773 * GNU diff ignores a missing newline
774 * in one file for -b or -w.
776 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
777 if (c == EOF && d == '\n') {
780 } else if (c == '\n' && d == EOF) {
787 if ((flags & D_FOLDBLANKS) && isspace(c) &&
793 } while (isspace(c = getc(f1)));
798 } while (isspace(d = getc(f2)));
799 } else if ((flags & D_IGNOREBLANKS)) {
800 while (isspace(c) && c != '\n') {
804 while (isspace(d) && d != '\n') {
809 if (chrtran[c] != chrtran[d]) {
812 if (c != '\n' && c != EOF)
813 ctold += skipline(f1);
814 if (d != '\n' && c != EOF)
815 ctnew += skipline(f2);
818 if (c == '\n' || c == EOF)
825 if ((c = getc(f1)) != (d = getc(f2))) {
828 if (c != '\n' && c != EOF)
829 ctold += skipline(f1);
830 if (d != '\n' && c != EOF)
831 ctnew += skipline(f2);
834 if (c == '\n' || c == EOF)
842 for (; j <= len[1]; j++)
843 ixnew[j] = ctnew += skipline(f2);
846 * fprintf(stderr, "jackpot\n");
850 /* shellsort CACM #201 */
852 sort(struct line *a, int n)
854 struct line *ai, *aim, w;
859 for (j = 1; j <= n; j *= 2)
861 for (m /= 2; m != 0; m /= 2) {
863 for (j = 1; j <= k; j++) {
864 for (ai = &a[j]; ai > a; ai -= m) {
867 break; /* wraparound */
868 if (aim->value > ai[0].value ||
869 (aim->value == ai[0].value &&
870 aim->serial > ai[0].serial))
872 w.value = ai[0].value;
873 ai[0].value = aim->value;
874 aim->value = w.value;
875 w.serial = ai[0].serial;
876 ai[0].serial = aim->serial;
877 aim->serial = w.serial;
884 unsort(struct line *f, int l, int *b)
888 a = xcalloc(l + 1, sizeof(*a));
889 for (i = 1; i <= l; i++)
890 a[f[i].serial] = f[i].value;
891 for (i = 1; i <= l; i++)
901 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
907 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
909 int m, i0, i1, j0, j1;
915 J[m + 1] = len[1] + 1;
916 if (diff_format != D_EDIT) {
917 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
918 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
922 while (i1 < m && J[i1 + 1] == 0)
926 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
929 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
930 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
934 while (i1 > 1 && J[i1 - 1] == 0)
938 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
942 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
943 if (diff_format == D_IFDEF) {
946 if ((c = getc(f1)) == EOF)
948 diff_output("%c", c);
952 if (anychange != 0) {
953 if (diff_format == D_CONTEXT)
954 dump_context_vec(f1, f2, flags);
955 else if (diff_format == D_UNIFIED)
956 dump_unified_vec(f1, f2, flags);
961 range(int a, int b, char *separator)
963 diff_output("%d", a > b ? b : a);
965 diff_output("%s%d", separator, b);
969 uni_range(int a, int b)
972 diff_output("%d,%d", a, b - a + 1);
974 diff_output("%d", b);
976 diff_output("%d,0", b);
980 preadline(int fd, size_t rlen, off_t off)
985 line = xmalloc(rlen + 1);
986 if ((nr = pread(fd, line, rlen, off)) < 0)
988 if (nr > 0 && line[nr-1] == '\n')
995 ignoreline(char *line)
997 return 0; /* do not ignore any lines */
1001 * Indicate that there is a difference between lines a and b of the from file
1002 * to get to lines c to d of the to file. If a is greater then b then there
1003 * are no lines in the from file involved and this means that there were
1004 * lines appended (beginning at b). If c is greater than d then there are
1005 * lines missing from the to file.
1008 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1011 static size_t max_context = 64;
1015 if (diff_format != D_IFDEF && a > b && c > d)
1017 if (ignore_pats != NULL) {
1020 * All lines in the change, insert, or delete must
1021 * match an ignore pattern for the change to be
1024 if (a <= b) { /* Changes and deletes. */
1025 for (i = a; i <= b; i++) {
1026 line = preadline(fileno(f1),
1027 ixold[i] - ixold[i - 1], ixold[i - 1]);
1028 if (!ignoreline(line))
1032 if (a > b || c <= d) { /* Changes and inserts. */
1033 for (i = c; i <= d; i++) {
1034 line = preadline(fileno(f2),
1035 ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1036 if (!ignoreline(line))
1043 if (*pflags & D_HEADER) {
1044 diff_output("%s %s %s\n", diffargs, file1, file2);
1045 *pflags &= ~D_HEADER;
1047 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1049 * Allocate change records as needed.
1051 if (context_vec_ptr == context_vec_end - 1) {
1052 ptrdiff_t offset = context_vec_ptr - context_vec_start;
1054 context_vec_start = xreallocarray(context_vec_start,
1055 max_context, sizeof(*context_vec_start));
1056 context_vec_end = context_vec_start + max_context;
1057 context_vec_ptr = context_vec_start + offset;
1059 if (anychange == 0) {
1061 * Print the context/unidiff header first time through.
1063 print_header(file1, file2);
1065 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1066 c > context_vec_ptr->d + (2 * diff_context) + 1) {
1068 * If this change is more than 'diff_context' lines from the
1069 * previous change, dump the record and reset it.
1071 if (diff_format == D_CONTEXT)
1072 dump_context_vec(f1, f2, *pflags);
1074 dump_unified_vec(f1, f2, *pflags);
1077 context_vec_ptr->a = a;
1078 context_vec_ptr->b = b;
1079 context_vec_ptr->c = c;
1080 context_vec_ptr->d = d;
1085 switch (diff_format) {
1091 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1092 if (diff_format == D_NORMAL)
1097 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1103 diff_output("a%d %d\n", b, d - c + 1);
1105 diff_output("d%d %d\n", a, b - a + 1);
1107 /* add changed lines */
1108 diff_output("a%d %d\n", b, d - c + 1);
1112 if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1113 fetch(ixold, a, b, f1, '<', 1, *pflags);
1114 if (a <= b && c <= d && diff_format == D_NORMAL)
1115 diff_output("---\n");
1117 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1118 if (i != 0 && diff_format == D_EDIT) {
1120 * A non-zero return value for D_EDIT indicates that the
1121 * last line printed was a bare dot (".") that has been
1122 * escaped as ".." to prevent ed(1) from misinterpreting
1123 * it. We have to add a substitute command to change this
1124 * back and restart where we left off.
1127 diff_output("%ds/.//\n", a + i - 1);
1133 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1136 diff_output("#endif /* %s */\n", ifdefname);
1142 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1144 int i, j, c, lastc, col, nc;
1147 * When doing #ifdef's, copy down to current line
1148 * if this is the first file, so that stuff makes it to output.
1150 if (diff_format == D_IFDEF && oldfile) {
1151 long curpos = ftell(lb);
1152 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1153 nc = f[a > b ? b : a - 1] - curpos;
1154 for (i = 0; i < nc; i++)
1155 diff_output("%c", getc(lb));
1159 if (diff_format == D_IFDEF) {
1161 diff_output("#else /* %s%s */\n",
1162 oldfile == 1 ? "!" : "", ifdefname);
1165 diff_output("#ifndef %s\n", ifdefname);
1167 diff_output("#ifdef %s\n", ifdefname);
1169 inifdef = 1 + oldfile;
1171 for (i = a; i <= b; i++) {
1172 fseek(lb, f[i - 1], SEEK_SET);
1173 nc = f[i] - f[i - 1];
1174 if (diff_format != D_IFDEF && ch != '\0') {
1175 diff_output("%c", ch);
1176 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1177 || diff_format == D_UNIFIED))
1179 else if (diff_format != D_UNIFIED)
1183 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1184 if ((c = getc(lb)) == EOF) {
1185 if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1186 diff_format == D_NREVERSE)
1187 warnx("No newline at end of file");
1189 diff_output("\n\\ No newline at end of "
1193 if (c == '\t' && (flags & D_EXPANDTABS)) {
1196 } while (++col & 7);
1198 if (diff_format == D_EDIT && j == 1 && c == '\n'
1201 * Don't print a bare "." line
1202 * since that will confuse ed(1).
1203 * Print ".." instead and return,
1204 * giving the caller an offset
1205 * from which to restart.
1210 diff_output("%c", c);
1219 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1222 readhash(FILE *f, int flags)
1229 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1230 if (flags & D_IGNORECASE)
1231 for (i = 0; (t = getc(f)) != '\n'; i++) {
1237 sum = sum * 127 + chrtran[t];
1240 for (i = 0; (t = getc(f)) != '\n'; i++) {
1246 sum = sum * 127 + t;
1250 switch (t = getc(f)) {
1259 if (space && (flags & D_IGNOREBLANKS) == 0) {
1263 sum = sum * 127 + chrtran[t];
1277 * There is a remote possibility that we end up with a zero sum.
1278 * Zero is used as an EOF marker, so return 1 instead.
1280 return (sum == 0 ? 1 : sum);
1286 unsigned char buf[BUFSIZ];
1293 cnt = fread(buf, 1, sizeof(buf), f);
1294 return (memchr(buf, '\0', cnt) == NULL);
1297 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1300 match_function(const long *f, int pos, FILE *fp)
1302 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1304 int last = lastline;
1308 while (pos > last) {
1309 fseek(fp, f[pos - 1], SEEK_SET);
1310 nc = f[pos] - f[pos - 1];
1311 if (nc >= sizeof(buf))
1312 nc = sizeof(buf) - 1;
1313 nc = fread(buf, 1, nc, fp);
1316 buf[strcspn(buf, "\n")] = '\0';
1317 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1318 if (begins_with(buf, "private:")) {
1320 state = " (private)";
1321 } else if (begins_with(buf, "protected:")) {
1323 state = " (protected)";
1324 } else if (begins_with(buf, "public:")) {
1326 state = " (public)";
1328 strlcpy(lastbuf, buf, sizeof lastbuf);
1330 strlcat(lastbuf, state,
1332 lastmatchline = pos;
1339 return lastmatchline > 0 ? lastbuf : NULL;
1342 /* dump accumulated "context" diff changes */
1344 dump_context_vec(FILE *f1, FILE *f2, int flags)
1346 struct context_vec *cvp = context_vec_start;
1347 int lowa, upb, lowc, upd, do_output;
1351 if (context_vec_start > context_vec_ptr)
1354 b = d = 0; /* gcc */
1355 lowa = MAXIMUM(1, cvp->a - diff_context);
1356 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1357 lowc = MAXIMUM(1, cvp->c - diff_context);
1358 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1360 diff_output("***************");
1361 if ((flags & D_PROTOTYPE)) {
1362 f = match_function(ixold, lowa-1, f1);
1364 diff_output(" %s", f);
1366 diff_output("\n*** ");
1367 range(lowa, upb, ",");
1368 diff_output(" ****\n");
1371 * Output changes to the "old" file. The first loop suppresses
1372 * output if there were no changes to the "old" file (we'll see
1373 * the "old" lines as context in the "new" list).
1376 for (; cvp <= context_vec_ptr; cvp++)
1377 if (cvp->a <= cvp->b) {
1378 cvp = context_vec_start;
1383 while (cvp <= context_vec_ptr) {
1389 if (a <= b && c <= d)
1392 ch = (a <= b) ? 'd' : 'a';
1395 fetch(ixold, lowa, b, f1, ' ', 0, flags);
1397 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1398 fetch(ixold, a, b, f1,
1399 ch == 'c' ? '!' : '-', 0, flags);
1404 fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1406 /* output changes to the "new" file */
1407 diff_output("--- ");
1408 range(lowc, upd, ",");
1409 diff_output(" ----\n");
1412 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1413 if (cvp->c <= cvp->d) {
1414 cvp = context_vec_start;
1419 while (cvp <= context_vec_ptr) {
1425 if (a <= b && c <= d)
1428 ch = (a <= b) ? 'd' : 'a';
1431 fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1433 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1434 fetch(ixnew, c, d, f2,
1435 ch == 'c' ? '!' : '+', 0, flags);
1440 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1442 context_vec_ptr = context_vec_start - 1;
1445 /* dump accumulated "unified" diff changes */
1447 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1449 struct context_vec *cvp = context_vec_start;
1450 int lowa, upb, lowc, upd;
1454 if (context_vec_start > context_vec_ptr)
1457 b = d = 0; /* gcc */
1458 lowa = MAXIMUM(1, cvp->a - diff_context);
1459 upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1460 lowc = MAXIMUM(1, cvp->c - diff_context);
1461 upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1463 diff_output("@@ -");
1464 uni_range(lowa, upb);
1466 uni_range(lowc, upd);
1468 if ((flags & D_PROTOTYPE)) {
1469 f = match_function(ixold, lowa-1, f1);
1471 diff_output(" %s", f);
1476 * Output changes in "unified" diff format--the old and new lines
1477 * are printed together.
1479 for (; cvp <= context_vec_ptr; cvp++) {
1486 * c: both new and old changes
1487 * d: only changes in the old file
1488 * a: only changes in the new file
1490 if (a <= b && c <= d)
1493 ch = (a <= b) ? 'd' : 'a';
1497 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1498 fetch(ixold, a, b, f1, '-', 0, flags);
1499 fetch(ixnew, c, d, f2, '+', 0, flags);
1502 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1503 fetch(ixold, a, b, f1, '-', 0, flags);
1506 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1507 fetch(ixnew, c, d, f2, '+', 0, flags);
1513 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1515 context_vec_ptr = context_vec_start - 1;
1519 print_header(const char *file1, const char *file2)
1521 if (label[0] != NULL)
1522 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1525 diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1526 file1, ctime(&stb1.st_mtime));
1527 if (label[1] != NULL)
1528 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1531 diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1532 file2, ctime(&stb2.st_mtime));