Got Mirror

Blob

Date:: Wed Nov 29 16:11:11 2017 UTC
Message:: add and test basic blob diffing functionality
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stddef.h>
77 #include <stdint.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82 #include <limits.h>
83 #include <sha1.h>
84 #include <zlib.h>
85 
86 #include "got_error.h"
87 #include "got_object.h"
88 #include "got_diff.h"
89 
90 #include "diff.h"
91 #include "xmalloc.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 } *file[2];
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 #define	diff_output	printf
187 static FILE	*opentemp(const char *);
188 static void	 output(char *, FILE *, char *, FILE *, int);
189 static void	 check(FILE *, FILE *, int);
190 static void	 range(int, int, char *);
191 static void	 uni_range(int, int);
192 static void	 dump_context_vec(FILE *, FILE *, int);
193 static void	 dump_unified_vec(FILE *, FILE *, int);
194 static void	 prepare(int, FILE *, off_t, int);
195 static void	 prune(void);
196 static void	 equiv(struct line *, int, struct line *, int, int *);
197 static void	 unravel(int);
198 static void	 unsort(struct line *, int, int *);
199 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
200 static void	 sort(struct line *, int);
201 static void	 print_header(const char *, const char *);
202 static int	 ignoreline(char *);
203 static int	 asciifile(FILE *);
204 static int	 fetch(long *, int, int, FILE *, int, int, int);
205 static int	 newcand(int, int, int);
206 static int	 search(int *, int, int);
207 static int	 skipline(FILE *);
208 static int	 isqrt(int);
209 static int	 stone(int *, int, int *, int *, int);
210 static int	 readhash(FILE *, int);
211 static int	 files_differ(FILE *, FILE *, int);
212 static char	*match_function(const long *, int, FILE *);
213 static char	*preadline(int, size_t, off_t);
214 
215 static int  *J;			/* will be overlaid on class */
216 static int  *class;		/* will be overlaid on file[0] */
217 static int  *klist;		/* will be overlaid on file[0] after class */
218 static int  *member;		/* will be overlaid on file[1] */
219 static int   clen;
220 static int   inifdef;		/* whether or not we are in a #ifdef block */
221 static int   len[2];
222 static int   pref, suff;	/* length of prefix and suffix */
223 static int   slen[2];
224 static int   anychange;
225 static long *ixnew;		/* will be overlaid on file[1] */
226 static long *ixold;		/* will be overlaid on klist */
227 static struct cand *clist;	/* merely a free storage pot for candidates */
228 static int   clistlen;		/* the length of clist */
229 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
230 static u_char *chrtran;		/* translation table for case-folding */
231 static struct context_vec *context_vec_start;
232 static struct context_vec *context_vec_end;
233 static struct context_vec *context_vec_ptr;
234 
235 #define FUNCTION_CONTEXT_SIZE	55
236 static char lastbuf[FUNCTION_CONTEXT_SIZE];
237 static int lastline;
238 static int lastmatchline;
239 
240 int	 Nflag, Pflag, rflag, sflag, Tflag;
241 int	 diff_format, diff_context, status;
242 char	*start, *ifdefname, *diffargs, *label[2], *ignore_pats;
243 struct stat stb1, stb2;
244 
245 
246 /*
247  * chrtran points to one of 2 translation tables: cup2low if folding upper to
248  * lower case clow2low if not folding case
249  */
250 u_char clow2low[256] = {
251 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
252 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
253 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
254 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
255 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
256 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
257 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
258 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
259 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
260 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
261 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
262 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
263 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
264 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
265 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
266 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
267 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
268 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
269 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
270 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
271 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
272 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
273 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
274 	0xfd, 0xfe, 0xff
275 };
276 
277 u_char cup2low[256] = {
278 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
279 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
280 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
281 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
282 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
283 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
284 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
285 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
286 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
287 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
288 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
289 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
290 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
291 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
292 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
293 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
294 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
295 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
296 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
297 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
298 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
299 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
300 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
301 	0xfd, 0xfe, 0xff
302 };
303 
304 const struct got_error *
305 got_diffreg(int *rval, char *file1, char *file2, int flags)
306 {
307 	const struct got_error *err = NULL;
308 	FILE *f1, *f2;
309 	int i;
310 
311 	diff_format = D_UNIFIED;
312 
313 	if (strcmp(file1, "-") == 0)
314 		fstat(STDIN_FILENO, &stb1);
315 	else if (stat(file1, &stb1) != 0)
316 		return got_error(GOT_ERR_BAD_PATH);
317 
318 	if (strcmp(file2, "-") == 0)
319 		fstat(STDIN_FILENO, &stb2);
320 	else if (stat(file2, &stb2) != 0)
321 		return got_error(GOT_ERR_BAD_PATH);
322 
323 	f1 = f2 = NULL;
324 	*rval = D_SAME;
325 	anychange = 0;
326 	lastline = 0;
327 	lastmatchline = 0;
328 	context_vec_ptr = context_vec_start - 1;
329 	if (flags & D_IGNORECASE)
330 		chrtran = cup2low;
331 	else
332 		chrtran = clow2low;
333 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) {
334 		*rval = (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
335 		return NULL;
336 	}
337 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
338 		goto closem;
339 
340 	if (flags & D_EMPTY1)
341 		f1 = fopen(_PATH_DEVNULL, "r");
342 	else {
343 		if (!S_ISREG(stb1.st_mode)) {
344 			if ((f1 = opentemp(file1)) == NULL ||
345 			    fstat(fileno(f1), &stb1) < 0) {
346 				status |= 2;
347 				goto closem;
348 			}
349 		} else if (strcmp(file1, "-") == 0)
350 			f1 = stdin;
351 		else
352 			f1 = fopen(file1, "r");
353 	}
354 	if (f1 == NULL) {
355 		status |= 2;
356 		goto closem;
357 	}
358 
359 	if (flags & D_EMPTY2)
360 		f2 = fopen(_PATH_DEVNULL, "r");
361 	else {
362 		if (!S_ISREG(stb2.st_mode)) {
363 			if ((f2 = opentemp(file2)) == NULL ||
364 			    fstat(fileno(f2), &stb2) < 0) {
365 				status |= 2;
366 				goto closem;
367 			}
368 		} else if (strcmp(file2, "-") == 0)
369 			f2 = stdin;
370 		else
371 			f2 = fopen(file2, "r");
372 	}
373 	if (f2 == NULL) {
374 		status |= 2;
375 		goto closem;
376 	}
377 
378 	switch (files_differ(f1, f2, flags)) {
379 	case 0:
380 		goto closem;
381 	case 1:
382 		break;
383 	default:
384 		/* error */
385 		status |= 2;
386 		goto closem;
387 	}
388 
389 	if ((flags & D_FORCEASCII) == 0 &&
390 	    (!asciifile(f1) || !asciifile(f2))) {
391 		*rval = D_BINARY;
392 		status |= 1;
393 		goto closem;
394 	}
395 	prepare(0, f1, stb1.st_size, flags);
396 	prepare(1, f2, stb2.st_size, flags);
397 
398 	prune();
399 	sort(sfile[0], slen[0]);
400 	sort(sfile[1], slen[1]);
401 
402 	member = (int *)file[1];
403 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
404 	member = reallocarray(member, slen[1] + 2, sizeof(*member));
405 	if (member == NULL) {
406 		err = got_error(GOT_ERR_NO_MEM);
407 		goto closem;
408 	}
409 
410 	class = (int *)file[0];
411 	unsort(sfile[0], slen[0], class);
412 	class = reallocarray(class, slen[0] + 2, sizeof(*class));
413 	if (class == NULL) {
414 		err = got_error(GOT_ERR_NO_MEM);
415 		goto closem;
416 	}
417 
418 	klist = calloc(slen[0] + 2, sizeof(*klist));
419 	if (klist == NULL) {
420 		err = got_error(GOT_ERR_NO_MEM);
421 		goto closem;
422 	}
423 	clen = 0;
424 	clistlen = 100;
425 	clist = calloc(clistlen, sizeof(*clist));
426 	if (clist == NULL) {
427 		err = got_error(GOT_ERR_NO_MEM);
428 		goto closem;
429 	}
430 	i = stone(class, slen[0], member, klist, flags);
431 	free(member);
432 	free(class);
433 
434 	J = reallocarray(J, len[0] + 2, sizeof(*J));
435 	if (J == NULL) {
436 		err = got_error(GOT_ERR_NO_MEM);
437 		goto closem;
438 	}
439 	unravel(klist[i]);
440 	free(clist);
441 	clist = NULL;
442 	free(klist);
443 	klist = NULL;
444 
445 	ixold = reallocarray(ixold, len[0] + 2, sizeof(*ixold));
446 	if (ixold == NULL) {
447 		err = got_error(GOT_ERR_NO_MEM);
448 		goto closem;
449 	}
450 	ixnew = reallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
451 	if (ixnew == NULL) {
452 		err = got_error(GOT_ERR_NO_MEM);
453 		goto closem;
454 	}
455 	check(f1, f2, flags);
456 	output(file1, f1, file2, f2, flags);
457 closem:
458 	if (anychange) {
459 		status |= 1;
460 		if (*rval == D_SAME)
461 			*rval = D_DIFFER;
462 	}
463 	if (f1 != NULL)
464 		fclose(f1);
465 	if (f2 != NULL)
466 		fclose(f2);
467 
468 	return (err);
469 }
470 
471 /*
472  * Check to see if the given files differ.
473  * Returns 0 if they are the same, 1 if different, and -1 on error.
474  * XXX - could use code from cmp(1) [faster]
475  */
476 static int
477 files_differ(FILE *f1, FILE *f2, int flags)
478 {
479 	char buf1[BUFSIZ], buf2[BUFSIZ];
480 	size_t i, j;
481 
482 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
484 		return (1);
485 	for (;;) {
486 		i = fread(buf1, 1, sizeof(buf1), f1);
487 		j = fread(buf2, 1, sizeof(buf2), f2);
488 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
489 			return (-1);
490 		if (i != j)
491 			return (1);
492 		if (i == 0)
493 			return (0);
494 		if (memcmp(buf1, buf2, i) != 0)
495 			return (1);
496 	}
497 }
498 
499 static FILE *
500 opentemp(const char *file)
501 {
502 	char buf[BUFSIZ], tempfile[PATH_MAX];
503 	ssize_t nread;
504 	int ifd, ofd;
505 
506 	if (strcmp(file, "-") == 0)
507 		ifd = STDIN_FILENO;
508 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
509 		return (NULL);
510 
511 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
512 
513 	if ((ofd = mkstemp(tempfile)) < 0) {
514 		close(ifd);
515 		return (NULL);
516 	}
517 	unlink(tempfile);
518 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
519 		if (write(ofd, buf, nread) != nread) {
520 			close(ifd);
521 			close(ofd);
522 			return (NULL);
523 		}
524 	}
525 	close(ifd);
526 	lseek(ofd, (off_t)0, SEEK_SET);
527 	return (fdopen(ofd, "r"));
528 }
529 
530 char *
531 splice(char *dir, char *file)
532 {
533 	char *tail, *buf;
534 	size_t dirlen;
535 
536 	dirlen = strlen(dir);
537 	while (dirlen != 0 && dir[dirlen - 1] == '/')
538 	    dirlen--;
539 	if ((tail = strrchr(file, '/')) == NULL)
540 		tail = file;
541 	else
542 		tail++;
543 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
544 	return (buf);
545 }
546 
547 static void
548 prepare(int i, FILE *fd, off_t filesize, int flags)
549 {
550 	struct line *p;
551 	int j, h;
552 	size_t sz;
553 
554 	rewind(fd);
555 
556 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
557 	if (sz < 100)
558 		sz = 100;
559 
560 	p = xcalloc(sz + 3, sizeof(*p));
561 	for (j = 0; (h = readhash(fd, flags));) {
562 		if (j == sz) {
563 			sz = sz * 3 / 2;
564 			p = xreallocarray(p, sz + 3, sizeof(*p));
565 		}
566 		p[++j].value = h;
567 	}
568 	len[i] = j;
569 	file[i] = p;
570 }
571 
572 static void
573 prune(void)
574 {
575 	int i, j;
576 
577 	for (pref = 0; pref < len[0] && pref < len[1] &&
578 	    file[0][pref + 1].value == file[1][pref + 1].value;
579 	    pref++)
580 		;
581 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
582 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
583 	    suff++)
584 		;
585 	for (j = 0; j < 2; j++) {
586 		sfile[j] = file[j] + pref;
587 		slen[j] = len[j] - pref - suff;
588 		for (i = 0; i <= slen[j]; i++)
589 			sfile[j][i].serial = i;
590 	}
591 }
592 
593 static void
594 equiv(struct line *a, int n, struct line *b, int m, int *c)
595 {
596 	int i, j;
597 
598 	i = j = 1;
599 	while (i <= n && j <= m) {
600 		if (a[i].value < b[j].value)
601 			a[i++].value = 0;
602 		else if (a[i].value == b[j].value)
603 			a[i++].value = j;
604 		else
605 			j++;
606 	}
607 	while (i <= n)
608 		a[i++].value = 0;
609 	b[m + 1].value = 0;
610 	j = 0;
611 	while (++j <= m) {
612 		c[j] = -b[j].serial;
613 		while (b[j + 1].value == b[j].value) {
614 			j++;
615 			c[j] = b[j].serial;
616 		}
617 	}
618 	c[j] = -1;
619 }
620 
621 /* Code taken from ping.c */
622 static int
623 isqrt(int n)
624 {
625 	int y, x = 1;
626 
627 	if (n == 0)
628 		return (0);
629 
630 	do { /* newton was a stinker */
631 		y = x;
632 		x = n / x;
633 		x += y;
634 		x /= 2;
635 	} while ((x - y) > 1 || (x - y) < -1);
636 
637 	return (x);
638 }
639 
640 static int
641 stone(int *a, int n, int *b, int *c, int flags)
642 {
643 	int i, k, y, j, l;
644 	int oldc, tc, oldl, sq;
645 	u_int numtries, bound;
646 
647 	if (flags & D_MINIMAL)
648 		bound = UINT_MAX;
649 	else {
650 		sq = isqrt(n);
651 		bound = MAXIMUM(256, sq);
652 	}
653 
654 	k = 0;
655 	c[0] = newcand(0, 0, 0);
656 	for (i = 1; i <= n; i++) {
657 		j = a[i];
658 		if (j == 0)
659 			continue;
660 		y = -b[j];
661 		oldl = 0;
662 		oldc = c[0];
663 		numtries = 0;
664 		do {
665 			if (y <= clist[oldc].y)
666 				continue;
667 			l = search(c, k, y);
668 			if (l != oldl + 1)
669 				oldc = c[l - 1];
670 			if (l <= k) {
671 				if (clist[c[l]].y <= y)
672 					continue;
673 				tc = c[l];
674 				c[l] = newcand(i, y, oldc);
675 				oldc = tc;
676 				oldl = l;
677 				numtries++;
678 			} else {
679 				c[l] = newcand(i, y, oldc);
680 				k++;
681 				break;
682 			}
683 		} while ((y = b[++j]) > 0 && numtries < bound);
684 	}
685 	return (k);
686 }
687 
688 static int
689 newcand(int x, int y, int pred)
690 {
691 	struct cand *q;
692 
693 	if (clen == clistlen) {
694 		clistlen = clistlen * 11 / 10;
695 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
696 	}
697 	q = clist + clen;
698 	q->x = x;
699 	q->y = y;
700 	q->pred = pred;
701 	return (clen++);
702 }
703 
704 static int
705 search(int *c, int k, int y)
706 {
707 	int i, j, l, t;
708 
709 	if (clist[c[k]].y < y)	/* quick look for typical case */
710 		return (k + 1);
711 	i = 0;
712 	j = k + 1;
713 	for (;;) {
714 		l = (i + j) / 2;
715 		if (l <= i)
716 			break;
717 		t = clist[c[l]].y;
718 		if (t > y)
719 			j = l;
720 		else if (t < y)
721 			i = l;
722 		else
723 			return (l);
724 	}
725 	return (l + 1);
726 }
727 
728 static void
729 unravel(int p)
730 {
731 	struct cand *q;
732 	int i;
733 
734 	for (i = 0; i <= len[0]; i++)
735 		J[i] = i <= pref ? i :
736 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
737 	for (q = clist + p; q->y != 0; q = clist + q->pred)
738 		J[q->x + pref] = q->y + pref;
739 }
740 
741 /*
742  * Check does double duty:
743  *  1.	ferret out any fortuitous correspondences due
744  *	to confounding by hashing (which result in "jackpot")
745  *  2.  collect random access indexes to the two files
746  */
747 static void
748 check(FILE *f1, FILE *f2, int flags)
749 {
750 	int i, j, jackpot, c, d;
751 	long ctold, ctnew;
752 
753 	rewind(f1);
754 	rewind(f2);
755 	j = 1;
756 	ixold[0] = ixnew[0] = 0;
757 	jackpot = 0;
758 	ctold = ctnew = 0;
759 	for (i = 1; i <= len[0]; i++) {
760 		if (J[i] == 0) {
761 			ixold[i] = ctold += skipline(f1);
762 			continue;
763 		}
764 		while (j < J[i]) {
765 			ixnew[j] = ctnew += skipline(f2);
766 			j++;
767 		}
768 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
769 			for (;;) {
770 				c = getc(f1);
771 				d = getc(f2);
772 				/*
773 				 * GNU diff ignores a missing newline
774 				 * in one file for -b or -w.
775 				 */
776 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
777 					if (c == EOF && d == '\n') {
778 						ctnew++;
779 						break;
780 					} else if (c == '\n' && d == EOF) {
781 						ctold++;
782 						break;
783 					}
784 				}
785 				ctold++;
786 				ctnew++;
787 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
788 				    isspace(d)) {
789 					do {
790 						if (c == '\n')
791 							break;
792 						ctold++;
793 					} while (isspace(c = getc(f1)));
794 					do {
795 						if (d == '\n')
796 							break;
797 						ctnew++;
798 					} while (isspace(d = getc(f2)));
799 				} else if ((flags & D_IGNOREBLANKS)) {
800 					while (isspace(c) && c != '\n') {
801 						c = getc(f1);
802 						ctold++;
803 					}
804 					while (isspace(d) && d != '\n') {
805 						d = getc(f2);
806 						ctnew++;
807 					}
808 				}
809 				if (chrtran[c] != chrtran[d]) {
810 					jackpot++;
811 					J[i] = 0;
812 					if (c != '\n' && c != EOF)
813 						ctold += skipline(f1);
814 					if (d != '\n' && c != EOF)
815 						ctnew += skipline(f2);
816 					break;
817 				}
818 				if (c == '\n' || c == EOF)
819 					break;
820 			}
821 		} else {
822 			for (;;) {
823 				ctold++;
824 				ctnew++;
825 				if ((c = getc(f1)) != (d = getc(f2))) {
826 					/* jackpot++; */
827 					J[i] = 0;
828 					if (c != '\n' && c != EOF)
829 						ctold += skipline(f1);
830 					if (d != '\n' && c != EOF)
831 						ctnew += skipline(f2);
832 					break;
833 				}
834 				if (c == '\n' || c == EOF)
835 					break;
836 			}
837 		}
838 		ixold[i] = ctold;
839 		ixnew[j] = ctnew;
840 		j++;
841 	}
842 	for (; j <= len[1]; j++)
843 		ixnew[j] = ctnew += skipline(f2);
844 	/*
845 	 * if (jackpot)
846 	 *	fprintf(stderr, "jackpot\n");
847 	 */
848 }
849 
850 /* shellsort CACM #201 */
851 static void
852 sort(struct line *a, int n)
853 {
854 	struct line *ai, *aim, w;
855 	int j, m = 0, k;
856 
857 	if (n == 0)
858 		return;
859 	for (j = 1; j <= n; j *= 2)
860 		m = 2 * j - 1;
861 	for (m /= 2; m != 0; m /= 2) {
862 		k = n - m;
863 		for (j = 1; j <= k; j++) {
864 			for (ai = &a[j]; ai > a; ai -= m) {
865 				aim = &ai[m];
866 				if (aim < ai)
867 					break;	/* wraparound */
868 				if (aim->value > ai[0].value ||
869 				    (aim->value == ai[0].value &&
870 					aim->serial > ai[0].serial))
871 					break;
872 				w.value = ai[0].value;
873 				ai[0].value = aim->value;
874 				aim->value = w.value;
875 				w.serial = ai[0].serial;
876 				ai[0].serial = aim->serial;
877 				aim->serial = w.serial;
878 			}
879 		}
880 	}
881 }
882 
883 static void
884 unsort(struct line *f, int l, int *b)
885 {
886 	int *a, i;
887 
888 	a = xcalloc(l + 1, sizeof(*a));
889 	for (i = 1; i <= l; i++)
890 		a[f[i].serial] = f[i].value;
891 	for (i = 1; i <= l; i++)
892 		b[i] = a[i];
893 	free(a);
894 }
895 
896 static int
897 skipline(FILE *f)
898 {
899 	int i, c;
900 
901 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
902 		continue;
903 	return (i);
904 }
905 
906 static void
907 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
908 {
909 	int m, i0, i1, j0, j1;
910 
911 	rewind(f1);
912 	rewind(f2);
913 	m = len[0];
914 	J[0] = 0;
915 	J[m + 1] = len[1] + 1;
916 	if (diff_format != D_EDIT) {
917 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
918 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
919 				i0++;
920 			j0 = J[i0 - 1] + 1;
921 			i1 = i0 - 1;
922 			while (i1 < m && J[i1 + 1] == 0)
923 				i1++;
924 			j1 = J[i1 + 1] - 1;
925 			J[i1] = j1;
926 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
927 		}
928 	} else {
929 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
930 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
931 				i0--;
932 			j0 = J[i0 + 1] - 1;
933 			i1 = i0 + 1;
934 			while (i1 > 1 && J[i1 - 1] == 0)
935 				i1--;
936 			j1 = J[i1 - 1] + 1;
937 			J[i1] = j1;
938 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
939 		}
940 	}
941 	if (m == 0)
942 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
943 	if (diff_format == D_IFDEF) {
944 		for (;;) {
945 #define	c i0
946 			if ((c = getc(f1)) == EOF)
947 				return;
948 			diff_output("%c", c);
949 		}
950 #undef c
951 	}
952 	if (anychange != 0) {
953 		if (diff_format == D_CONTEXT)
954 			dump_context_vec(f1, f2, flags);
955 		else if (diff_format == D_UNIFIED)
956 			dump_unified_vec(f1, f2, flags);
957 	}
958 }
959 
960 static void
961 range(int a, int b, char *separator)
962 {
963 	diff_output("%d", a > b ? b : a);
964 	if (a < b)
965 		diff_output("%s%d", separator, b);
966 }
967 
968 static void
969 uni_range(int a, int b)
970 {
971 	if (a < b)
972 		diff_output("%d,%d", a, b - a + 1);
973 	else if (a == b)
974 		diff_output("%d", b);
975 	else
976 		diff_output("%d,0", b);
977 }
978 
979 static char *
980 preadline(int fd, size_t rlen, off_t off)
981 {
982 	char *line;
983 	ssize_t nr;
984 
985 	line = xmalloc(rlen + 1);
986 	if ((nr = pread(fd, line, rlen, off)) < 0)
987 		err(2, "preadline");
988 	if (nr > 0 && line[nr-1] == '\n')
989 		nr--;
990 	line[nr] = '\0';
991 	return (line);
992 }
993 
994 static int
995 ignoreline(char *line)
996 {
997 	return 0; /* do not ignore any lines */
998 }
999 
1000 /*
1001  * Indicate that there is a difference between lines a and b of the from file
1002  * to get to lines c to d of the to file.  If a is greater then b then there
1003  * are no lines in the from file involved and this means that there were
1004  * lines appended (beginning at b).  If c is greater than d then there are
1005  * lines missing from the to file.
1006  */
1007 static void
1008 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1009     int *pflags)
1010 {
1011 	static size_t max_context = 64;
1012 	int i;
1013 
1014 restart:
1015 	if (diff_format != D_IFDEF && a > b && c > d)
1016 		return;
1017 	if (ignore_pats != NULL) {
1018 		char *line;
1019 		/*
1020 		 * All lines in the change, insert, or delete must
1021 		 * match an ignore pattern for the change to be
1022 		 * ignored.
1023 		 */
1024 		if (a <= b) {		/* Changes and deletes. */
1025 			for (i = a; i <= b; i++) {
1026 				line = preadline(fileno(f1),
1027 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1028 				if (!ignoreline(line))
1029 					goto proceed;
1030 			}
1031 		}
1032 		if (a > b || c <= d) {	/* Changes and inserts. */
1033 			for (i = c; i <= d; i++) {
1034 				line = preadline(fileno(f2),
1035 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1036 				if (!ignoreline(line))
1037 					goto proceed;
1038 			}
1039 		}
1040 		return;
1041 	}
1042 proceed:
1043 	if (*pflags & D_HEADER) {
1044 		diff_output("%s %s %s\n", diffargs, file1, file2);
1045 		*pflags &= ~D_HEADER;
1046 	}
1047 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1048 		/*
1049 		 * Allocate change records as needed.
1050 		 */
1051 		if (context_vec_ptr == context_vec_end - 1) {
1052 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1053 			max_context <<= 1;
1054 			context_vec_start = xreallocarray(context_vec_start,
1055 			    max_context, sizeof(*context_vec_start));
1056 			context_vec_end = context_vec_start + max_context;
1057 			context_vec_ptr = context_vec_start + offset;
1058 		}
1059 		if (anychange == 0) {
1060 			/*
1061 			 * Print the context/unidiff header first time through.
1062 			 */
1063 			print_header(file1, file2);
1064 			anychange = 1;
1065 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1066 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1067 			/*
1068 			 * If this change is more than 'diff_context' lines from the
1069 			 * previous change, dump the record and reset it.
1070 			 */
1071 			if (diff_format == D_CONTEXT)
1072 				dump_context_vec(f1, f2, *pflags);
1073 			else
1074 				dump_unified_vec(f1, f2, *pflags);
1075 		}
1076 		context_vec_ptr++;
1077 		context_vec_ptr->a = a;
1078 		context_vec_ptr->b = b;
1079 		context_vec_ptr->c = c;
1080 		context_vec_ptr->d = d;
1081 		return;
1082 	}
1083 	if (anychange == 0)
1084 		anychange = 1;
1085 	switch (diff_format) {
1086 	case D_BRIEF:
1087 		return;
1088 	case D_NORMAL:
1089 	case D_EDIT:
1090 		range(a, b, ",");
1091 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1092 		if (diff_format == D_NORMAL)
1093 			range(c, d, ",");
1094 		diff_output("\n");
1095 		break;
1096 	case D_REVERSE:
1097 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1098 		range(a, b, " ");
1099 		diff_output("\n");
1100 		break;
1101 	case D_NREVERSE:
1102 		if (a > b)
1103 			diff_output("a%d %d\n", b, d - c + 1);
1104 		else {
1105 			diff_output("d%d %d\n", a, b - a + 1);
1106 			if (!(c > d))
1107 				/* add changed lines */
1108 				diff_output("a%d %d\n", b, d - c + 1);
1109 		}
1110 		break;
1111 	}
1112 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1113 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1114 		if (a <= b && c <= d && diff_format == D_NORMAL)
1115 			diff_output("---\n");
1116 	}
1117 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1118 	if (i != 0 && diff_format == D_EDIT) {
1119 		/*
1120 		 * A non-zero return value for D_EDIT indicates that the
1121 		 * last line printed was a bare dot (".") that has been
1122 		 * escaped as ".." to prevent ed(1) from misinterpreting
1123 		 * it.  We have to add a substitute command to change this
1124 		 * back and restart where we left off.
1125 		 */
1126 		diff_output(".\n");
1127 		diff_output("%ds/.//\n", a + i - 1);
1128 		b = a + i - 1;
1129 		a = b + 1;
1130 		c += i;
1131 		goto restart;
1132 	}
1133 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1134 		diff_output(".\n");
1135 	if (inifdef) {
1136 		diff_output("#endif /* %s */\n", ifdefname);
1137 		inifdef = 0;
1138 	}
1139 }
1140 
1141 static int
1142 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1143 {
1144 	int i, j, c, lastc, col, nc;
1145 
1146 	/*
1147 	 * When doing #ifdef's, copy down to current line
1148 	 * if this is the first file, so that stuff makes it to output.
1149 	 */
1150 	if (diff_format == D_IFDEF && oldfile) {
1151 		long curpos = ftell(lb);
1152 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1153 		nc = f[a > b ? b : a - 1] - curpos;
1154 		for (i = 0; i < nc; i++)
1155 			diff_output("%c", getc(lb));
1156 	}
1157 	if (a > b)
1158 		return (0);
1159 	if (diff_format == D_IFDEF) {
1160 		if (inifdef) {
1161 			diff_output("#else /* %s%s */\n",
1162 			    oldfile == 1 ? "!" : "", ifdefname);
1163 		} else {
1164 			if (oldfile)
1165 				diff_output("#ifndef %s\n", ifdefname);
1166 			else
1167 				diff_output("#ifdef %s\n", ifdefname);
1168 		}
1169 		inifdef = 1 + oldfile;
1170 	}
1171 	for (i = a; i <= b; i++) {
1172 		fseek(lb, f[i - 1], SEEK_SET);
1173 		nc = f[i] - f[i - 1];
1174 		if (diff_format != D_IFDEF && ch != '\0') {
1175 			diff_output("%c", ch);
1176 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1177 			    || diff_format == D_UNIFIED))
1178 				diff_output("\t");
1179 			else if (diff_format != D_UNIFIED)
1180 				diff_output(" ");
1181 		}
1182 		col = 0;
1183 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1184 			if ((c = getc(lb)) == EOF) {
1185 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1186 				    diff_format == D_NREVERSE)
1187 					warnx("No newline at end of file");
1188 				else
1189 					diff_output("\n\\ No newline at end of "
1190 					    "file\n");
1191 				return (0);
1192 			}
1193 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1194 				do {
1195 					diff_output(" ");
1196 				} while (++col & 7);
1197 			} else {
1198 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1199 				    && lastc == '.') {
1200 					/*
1201 					 * Don't print a bare "." line
1202 					 * since that will confuse ed(1).
1203 					 * Print ".." instead and return,
1204 					 * giving the caller an offset
1205 					 * from which to restart.
1206 					 */
1207 					diff_output(".\n");
1208 					return (i - a + 1);
1209 				}
1210 				diff_output("%c", c);
1211 				col++;
1212 			}
1213 		}
1214 	}
1215 	return (0);
1216 }
1217 
1218 /*
1219  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1220  */
1221 static int
1222 readhash(FILE *f, int flags)
1223 {
1224 	int i, t, space;
1225 	int sum;
1226 
1227 	sum = 1;
1228 	space = 0;
1229 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1230 		if (flags & D_IGNORECASE)
1231 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1232 				if (t == EOF) {
1233 					if (i == 0)
1234 						return (0);
1235 					break;
1236 				}
1237 				sum = sum * 127 + chrtran[t];
1238 			}
1239 		else
1240 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1241 				if (t == EOF) {
1242 					if (i == 0)
1243 						return (0);
1244 					break;
1245 				}
1246 				sum = sum * 127 + t;
1247 			}
1248 	} else {
1249 		for (i = 0;;) {
1250 			switch (t = getc(f)) {
1251 			case '\t':
1252 			case '\r':
1253 			case '\v':
1254 			case '\f':
1255 			case ' ':
1256 				space++;
1257 				continue;
1258 			default:
1259 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1260 					i++;
1261 					space = 0;
1262 				}
1263 				sum = sum * 127 + chrtran[t];
1264 				i++;
1265 				continue;
1266 			case EOF:
1267 				if (i == 0)
1268 					return (0);
1269 				/* FALLTHROUGH */
1270 			case '\n':
1271 				break;
1272 			}
1273 			break;
1274 		}
1275 	}
1276 	/*
1277 	 * There is a remote possibility that we end up with a zero sum.
1278 	 * Zero is used as an EOF marker, so return 1 instead.
1279 	 */
1280 	return (sum == 0 ? 1 : sum);
1281 }
1282 
1283 static int
1284 asciifile(FILE *f)
1285 {
1286 	unsigned char buf[BUFSIZ];
1287 	size_t cnt;
1288 
1289 	if (f == NULL)
1290 		return (1);
1291 
1292 	rewind(f);
1293 	cnt = fread(buf, 1, sizeof(buf), f);
1294 	return (memchr(buf, '\0', cnt) == NULL);
1295 }
1296 
1297 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1298 
1299 static char *
1300 match_function(const long *f, int pos, FILE *fp)
1301 {
1302 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1303 	size_t nc;
1304 	int last = lastline;
1305 	char *state = NULL;
1306 
1307 	lastline = pos;
1308 	while (pos > last) {
1309 		fseek(fp, f[pos - 1], SEEK_SET);
1310 		nc = f[pos] - f[pos - 1];
1311 		if (nc >= sizeof(buf))
1312 			nc = sizeof(buf) - 1;
1313 		nc = fread(buf, 1, nc, fp);
1314 		if (nc > 0) {
1315 			buf[nc] = '\0';
1316 			buf[strcspn(buf, "\n")] = '\0';
1317 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1318 				if (begins_with(buf, "private:")) {
1319 					if (!state)
1320 						state = " (private)";
1321 				} else if (begins_with(buf, "protected:")) {
1322 					if (!state)
1323 						state = " (protected)";
1324 				} else if (begins_with(buf, "public:")) {
1325 					if (!state)
1326 						state = " (public)";
1327 				} else {
1328 					strlcpy(lastbuf, buf, sizeof lastbuf);
1329 					if (state)
1330 						strlcat(lastbuf, state,
1331 						    sizeof lastbuf);
1332 					lastmatchline = pos;
1333 					return lastbuf;
1334 				}
1335 			}
1336 		}
1337 		pos--;
1338 	}
1339 	return lastmatchline > 0 ? lastbuf : NULL;
1340 }
1341 
1342 /* dump accumulated "context" diff changes */
1343 static void
1344 dump_context_vec(FILE *f1, FILE *f2, int flags)
1345 {
1346 	struct context_vec *cvp = context_vec_start;
1347 	int lowa, upb, lowc, upd, do_output;
1348 	int a, b, c, d;
1349 	char ch, *f;
1350 
1351 	if (context_vec_start > context_vec_ptr)
1352 		return;
1353 
1354 	b = d = 0;		/* gcc */
1355 	lowa = MAXIMUM(1, cvp->a - diff_context);
1356 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1357 	lowc = MAXIMUM(1, cvp->c - diff_context);
1358 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1359 
1360 	diff_output("***************");
1361 	if ((flags & D_PROTOTYPE)) {
1362 		f = match_function(ixold, lowa-1, f1);
1363 		if (f != NULL)
1364 			diff_output(" %s", f);
1365 	}
1366 	diff_output("\n*** ");
1367 	range(lowa, upb, ",");
1368 	diff_output(" ****\n");
1369 
1370 	/*
1371 	 * Output changes to the "old" file.  The first loop suppresses
1372 	 * output if there were no changes to the "old" file (we'll see
1373 	 * the "old" lines as context in the "new" list).
1374 	 */
1375 	do_output = 0;
1376 	for (; cvp <= context_vec_ptr; cvp++)
1377 		if (cvp->a <= cvp->b) {
1378 			cvp = context_vec_start;
1379 			do_output++;
1380 			break;
1381 		}
1382 	if (do_output) {
1383 		while (cvp <= context_vec_ptr) {
1384 			a = cvp->a;
1385 			b = cvp->b;
1386 			c = cvp->c;
1387 			d = cvp->d;
1388 
1389 			if (a <= b && c <= d)
1390 				ch = 'c';
1391 			else
1392 				ch = (a <= b) ? 'd' : 'a';
1393 
1394 			if (ch == 'a')
1395 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1396 			else {
1397 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1398 				fetch(ixold, a, b, f1,
1399 				    ch == 'c' ? '!' : '-', 0, flags);
1400 			}
1401 			lowa = b + 1;
1402 			cvp++;
1403 		}
1404 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1405 	}
1406 	/* output changes to the "new" file */
1407 	diff_output("--- ");
1408 	range(lowc, upd, ",");
1409 	diff_output(" ----\n");
1410 
1411 	do_output = 0;
1412 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1413 		if (cvp->c <= cvp->d) {
1414 			cvp = context_vec_start;
1415 			do_output++;
1416 			break;
1417 		}
1418 	if (do_output) {
1419 		while (cvp <= context_vec_ptr) {
1420 			a = cvp->a;
1421 			b = cvp->b;
1422 			c = cvp->c;
1423 			d = cvp->d;
1424 
1425 			if (a <= b && c <= d)
1426 				ch = 'c';
1427 			else
1428 				ch = (a <= b) ? 'd' : 'a';
1429 
1430 			if (ch == 'd')
1431 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1432 			else {
1433 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1434 				fetch(ixnew, c, d, f2,
1435 				    ch == 'c' ? '!' : '+', 0, flags);
1436 			}
1437 			lowc = d + 1;
1438 			cvp++;
1439 		}
1440 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1441 	}
1442 	context_vec_ptr = context_vec_start - 1;
1443 }
1444 
1445 /* dump accumulated "unified" diff changes */
1446 static void
1447 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1448 {
1449 	struct context_vec *cvp = context_vec_start;
1450 	int lowa, upb, lowc, upd;
1451 	int a, b, c, d;
1452 	char ch, *f;
1453 
1454 	if (context_vec_start > context_vec_ptr)
1455 		return;
1456 
1457 	b = d = 0;		/* gcc */
1458 	lowa = MAXIMUM(1, cvp->a - diff_context);
1459 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1460 	lowc = MAXIMUM(1, cvp->c - diff_context);
1461 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1462 
1463 	diff_output("@@ -");
1464 	uni_range(lowa, upb);
1465 	diff_output(" +");
1466 	uni_range(lowc, upd);
1467 	diff_output(" @@");
1468 	if ((flags & D_PROTOTYPE)) {
1469 		f = match_function(ixold, lowa-1, f1);
1470 		if (f != NULL)
1471 			diff_output(" %s", f);
1472 	}
1473 	diff_output("\n");
1474 
1475 	/*
1476 	 * Output changes in "unified" diff format--the old and new lines
1477 	 * are printed together.
1478 	 */
1479 	for (; cvp <= context_vec_ptr; cvp++) {
1480 		a = cvp->a;
1481 		b = cvp->b;
1482 		c = cvp->c;
1483 		d = cvp->d;
1484 
1485 		/*
1486 		 * c: both new and old changes
1487 		 * d: only changes in the old file
1488 		 * a: only changes in the new file
1489 		 */
1490 		if (a <= b && c <= d)
1491 			ch = 'c';
1492 		else
1493 			ch = (a <= b) ? 'd' : 'a';
1494 
1495 		switch (ch) {
1496 		case 'c':
1497 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1498 			fetch(ixold, a, b, f1, '-', 0, flags);
1499 			fetch(ixnew, c, d, f2, '+', 0, flags);
1500 			break;
1501 		case 'd':
1502 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1503 			fetch(ixold, a, b, f1, '-', 0, flags);
1504 			break;
1505 		case 'a':
1506 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1507 			fetch(ixnew, c, d, f2, '+', 0, flags);
1508 			break;
1509 		}
1510 		lowa = b + 1;
1511 		lowc = d + 1;
1512 	}
1513 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1514 
1515 	context_vec_ptr = context_vec_start - 1;
1516 }
1517 
1518 static void
1519 print_header(const char *file1, const char *file2)
1520 {
1521 	if (label[0] != NULL)
1522 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1523 		    label[0]);
1524 	else
1525 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1526 		    file1, ctime(&stb1.st_mtime));
1527 	if (label[1] != NULL)
1528 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1529 		    label[1]);
1530 	else
1531 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1532 		    file2, ctime(&stb2.st_mtime));
1533 }