Blob


1 /* Split source by line breaks, and calculate a simplistic checksum. */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <errno.h>
19 #include <inttypes.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
25 #include <diff/arraylist.h>
26 #include <diff/diff_main.h>
27 #include "diff_internal.h"
28 #include "diff_debug.h"
30 static int
31 diff_data_atomize_text_lines_fd(struct diff_data *d)
32 {
33 off_t pos = 0;
34 const off_t end = pos + d->len;
35 unsigned int array_size_estimate = d->len / 50;
36 unsigned int pow2 = 1;
38 while (array_size_estimate >>= 1)
39 pow2++;
41 ARRAYLIST_INIT(d->atoms, 1 << pow2);
43 if (fseek(d->root->f, 0L, SEEK_SET) == -1)
44 return errno;
46 while (pos < end) {
47 off_t line_end = pos;
48 unsigned int hash = 0;
49 unsigned char buf[512];
50 size_t r, i;
51 struct diff_atom *atom;
52 int eol = 0;
54 while (eol == 0 && line_end < end) {
55 r = fread(buf, sizeof(char), sizeof(buf), d->root->f);
56 if (r == 0 && ferror(d->root->f))
57 return errno;
58 i = 0;
59 while (eol == 0 && i < r) {
60 if (buf[i] != '\r' && buf[i] != '\n') {
61 hash = hash * 23 + buf[i];
62 line_end++;
63 } else
64 eol = buf[i];
65 i++;
66 }
67 }
69 /* When not at the end of data, the line ending char ('\r' or
70 * '\n') must follow */
71 if (line_end < end)
72 line_end++;
73 /* If that was an '\r', also pull in any following '\n' */
74 if (line_end < end && eol == '\r') {
75 if (fseeko(d->root->f, line_end, SEEK_SET) == -1)
76 return errno;
77 r = fread(buf, sizeof(char), sizeof(buf), d->root->f);
78 if (r == 0 && ferror(d->root->f))
79 return errno;
80 if (r == 1 && buf[0] == '\n' )
81 line_end++;
82 }
84 /* Record the found line as diff atom */
85 ARRAYLIST_ADD(atom, d->atoms);
86 if (!atom)
87 return ENOMEM;
89 *atom = (struct diff_atom){
90 .d = d,
91 .pos = pos,
92 .at = NULL, /* atom data is not memory-mapped */
93 .len = line_end - pos,
94 .hash = hash,
95 };
97 /* Starting point for next line: */
98 pos = line_end;
99 if (fseeko(d->root->f, pos, SEEK_SET) == -1)
100 return errno;
103 return DIFF_RC_OK;
106 static int
107 diff_data_atomize_text_lines_mmap(struct diff_data *d)
109 const uint8_t *pos = d->data;
110 const uint8_t *end = pos + d->len;
112 unsigned int array_size_estimate = d->len / 50;
113 unsigned int pow2 = 1;
114 while (array_size_estimate >>= 1)
115 pow2++;
117 ARRAYLIST_INIT(d->atoms, 1 << pow2);
119 while (pos < end) {
120 const uint8_t *line_end = pos;
121 unsigned int hash = 0;
123 while (line_end < end && *line_end != '\r' && *line_end != '\n') {
124 hash = hash * 23 + *line_end;
125 line_end++;
128 /* When not at the end of data, the line ending char ('\r' or
129 * '\n') must follow */
130 if (line_end < end)
131 line_end++;
132 /* If that was an '\r', also pull in any following '\n' */
133 if (line_end[0] == '\r'
134 && line_end < end && line_end[1] == '\n')
135 line_end++;
137 /* Record the found line as diff atom */
138 struct diff_atom *atom;
139 ARRAYLIST_ADD(atom, d->atoms);
140 if (!atom)
141 return ENOMEM;
143 *atom = (struct diff_atom){
144 .d = d,
145 .pos = (off_t)(pos - d->data),
146 .at = pos,
147 .len = line_end - pos,
148 .hash = hash,
149 };
151 /* Starting point for next line: */
152 pos = line_end;
155 return DIFF_RC_OK;
158 static int
159 diff_data_atomize_text_lines(struct diff_data *d)
161 if (d->data == NULL)
162 return diff_data_atomize_text_lines_fd(d);
163 else
164 return diff_data_atomize_text_lines_mmap(d);
167 int
168 diff_atomize_text_by_line(void *func_data, struct diff_data *left,
169 struct diff_data *right)
171 int rc;
172 rc = diff_data_atomize_text_lines(left);
173 if (rc != DIFF_RC_OK)
174 return rc;
175 return diff_data_atomize_text_lines(right);