commit 732e8ee0325715558a17b919a7f6a16bf64d66e3 from: Stefan Sperling date: Sun Sep 20 17:25:03 2020 UTC add support for ignoring whitespace (-w option) Tests grow the ability to call diff with options. Options to use are encoded in the test file name as a single dash followed by a string of option letters. E.g. -wp for 'ignore whitespace' and 'do patience' commit - cd25827e24b8fad0a3f4a89b72fbdba86bc2c5d1 commit + 732e8ee0325715558a17b919a7f6a16bf64d66e3 blob - 7846e5688b5f703b6d8d58917d7050d3174b578a blob + 18dd0286fe53d332f4c991dc64386cf6d2a55003 --- diff/diff.c +++ diff/diff.c @@ -43,32 +43,35 @@ static const char *getprogname() #endif __dead void usage(void); -int diffreg(char *, char *, int); +int diffreg(char *, char *, bool, bool); int openfile(const char *, char **, struct stat *); __dead void usage(void) { fprintf(stderr, - "usage: %s [-p] file1 file2\n" + "usage: %s [-pw] file1 file2\n" "\n" " -p Use Patience Diff (slower but often nicer)\n" + " -w Ignore Whitespace\n" , getprogname()); exit(1); } -static bool do_patience = false; - int main(int argc, char *argv[]) { int ch, rc; + bool do_patience = false, ignore_whitespace = false; - while ((ch = getopt(argc, argv, "p")) != -1) { + while ((ch = getopt(argc, argv, "pw")) != -1) { switch (ch) { case 'p': do_patience = true; break; + case 'w': + ignore_whitespace = true; + break; default: usage(); } @@ -80,7 +83,7 @@ main(int argc, char *argv[]) if (argc != 2) usage(); - rc = diffreg(argv[0], argv[1], 0); + rc = diffreg(argv[0], argv[1], do_patience, ignore_whitespace); if (rc != DIFF_RC_OK) { fprintf(stderr, "diff: %s\n", strerror(rc)); return 1; @@ -132,7 +135,7 @@ const struct diff_config diff_config_patience = { }; int -diffreg(char *file1, char *file2, int flags) +diffreg(char *file1, char *file2, bool do_patience, bool ignore_whitespace) { char *str1, *str2; int fd1, fd2; @@ -150,7 +153,8 @@ diffreg(char *file1, char *file2, int flags) fd1 = openfile(file1, &str1, &st1); fd2 = openfile(file2, &str2, &st2); - result = diff_main(cfg, fd1, str1, st1.st_size, fd2, str2, st2.st_size); + result = diff_main(cfg, fd1, str1, st1.st_size, fd2, str2, st2.st_size, + ignore_whitespace); #if 0 rc = diff_output_plain(stdout, &info, result); #else blob - 6397e0c1d52b5e01b3f3e1df122ace240efe2bf2 blob + 5d87a63c1fcbe06352062094f2bd48b70b86f1d4 --- include/diff/diff_main.h +++ include/diff/diff_main.h @@ -101,6 +101,8 @@ struct diff_data { ARRAYLIST(struct diff_atom) atoms; struct diff_data *root; + + bool ignore_whitespace; }; void diff_data_free(struct diff_data *diff_data); @@ -356,5 +358,5 @@ struct diff_result *diff_main(const struct diff_config int left_fd, const uint8_t *left_data, off_t left_len, int right_fd, const uint8_t *right_data, - off_t right_len); + off_t right_len, bool ignore_whitespace); void diff_result_free(struct diff_result *result); blob - 84c41f39c99ce5bb1a20384b96c9cdc772b89ed6 blob + 840d4e7718250a86e04d1ac09b88ca3d5f7f6927 --- lib/diff_main.c +++ lib/diff_main.c @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -49,9 +50,48 @@ read_at(int fd, int at_pos, unsigned char *buf, size_t static int buf_cmp(const unsigned char *left, size_t left_len, - const unsigned char *right, size_t right_len) + const unsigned char *right, size_t right_len, + bool ignore_whitespace) { - int cmp = memcmp(left, right, MIN(left_len, right_len)); + int cmp; + + if (ignore_whitespace) { + int il = 0, ir = 0; + while (il < left_len && ir < right_len) { + unsigned char cl = left[il]; + unsigned char cr = right[ir]; + + if (isspace(cl) && il < left_len) { + il++; + continue; + } + if (isspace(cr) && ir < right_len) { + ir++; + continue; + } + + if (cl > cr) + return 1; + if (cr > cl) + return -1; + il++; + ir++; + } + while (il < left_len) { + unsigned char cl = left[il++]; + if (!isspace(cl)) + return 1; + } + while (ir < right_len) { + unsigned char cr = right[ir++]; + if (!isspace(cr)) + return -1; + } + + return 0; + } + + cmp = memcmp(left, right, MIN(left_len, right_len)); if (cmp) return cmp; if (left_len == right_len) @@ -65,22 +105,29 @@ diff_atom_cmp(int *cmp, const struct diff_atom *right) { off_t remain_left, remain_right; + bool ignore_whitespace; + + ignore_whitespace = (left->d->root->ignore_whitespace || + right->d->root->ignore_whitespace); - if (!left->len && !right->len) { - *cmp = 0; - return 0; - } - if (!right->len) { - *cmp = 1; - return 0; - } - if (!left->len) { - *cmp = -1; - return 0; + if (!ignore_whitespace) { + if (!left->len && !right->len) { + *cmp = 0; + return 0; + } + if (!right->len) { + *cmp = 1; + return 0; + } + if (!left->len) { + *cmp = -1; + return 0; + } } if (left->at != NULL && right->at != NULL) { - *cmp = buf_cmp(left->at, left->len, right->at, right->len); + *cmp = buf_cmp(left->at, left->len, right->at, right->len, + ignore_whitespace); return 0; } @@ -131,7 +178,8 @@ diff_atom_cmp(int *cmp, p_right = right->at + (right->len - remain_right); } - r = buf_cmp(p_left, n_left, p_right, n_right); + r = buf_cmp(p_left, n_left, p_right, n_right, + ignore_whitespace); if (r) { *cmp = r; return 0; @@ -243,7 +291,7 @@ chunk_added: void diff_data_init_root(struct diff_data *d, int fd, const uint8_t *data, - unsigned long long len) + unsigned long long len, bool ignore_whitespace) { *d = (struct diff_data){ .fd = fd, @@ -251,6 +299,7 @@ diff_data_init_root(struct diff_data *d, int fd, const .data = data, .len = len, .root = d, + .ignore_whitespace = ignore_whitespace, }; } @@ -411,15 +460,18 @@ return_rc: struct diff_result * diff_main(const struct diff_config *config, int left_fd, const uint8_t *left_data, off_t left_len, - int right_fd, const uint8_t *right_data, off_t right_len) + int right_fd, const uint8_t *right_data, off_t right_len, + bool ignore_whitespace) { struct diff_result *result = malloc(sizeof(struct diff_result)); if (!result) return NULL; *result = (struct diff_result){}; - diff_data_init_root(&result->left, left_fd, left_data, left_len); - diff_data_init_root(&result->right, right_fd, right_data, right_len); + diff_data_init_root(&result->left, left_fd, left_data, left_len, + ignore_whitespace); + diff_data_init_root(&result->right, right_fd, right_data, right_len, + ignore_whitespace); if (!config->atomize_func) { result->rc = EINVAL; blob - /dev/null blob + 139970c971d6d429a4e774640a0735e86fc0a7dd (mode 644) --- /dev/null +++ test/expect013.diff @@ -0,0 +1,10 @@ +--- test013.left-w.txt ++++ test013.right-w.txt +@@ -3,5 +3,5 @@ + C + D + E +-F +-G ++F x ++y G blob - 17f8df091e1bb71385f342f2e783b09347df7d75 blob + f2b8d42de7348bdf55fa8fa3e22691c387e77fd2 --- test/verify_all.sh +++ test/verify_all.sh @@ -8,11 +8,18 @@ verify_diff_script() { orig_left="$1" orig_right="$2" the_diff="$3" + diff_opts="$4" + expected_diff="$5" verify_left="verify.$orig_left" verify_right="verify.$orig_right" - if [ "x$diff_type" = "xunidiff" ]; then + if [ -n "$diff_opts" ]; then + if ! cmp "$got_diff" "$expected_diff" ; then + echo "FAIL: $got_diff != $expected_diff" + return 1 + fi + elif [ "x$diff_type" = "xunidiff" ]; then cp "$orig_left" "$verify_right" patch --quiet -u "$verify_right" "$the_diff" if ! cmp "$orig_right" "$verify_right" ; then @@ -43,14 +50,15 @@ verify_diff_script() { return 0 } -for left in test*.left.* ; do - right="$(echo "$left" | sed 's/\.left\./.right./')" - expected_diff="$(echo "$left" | sed 's/test\([0-9]*\)\..*/expect\1.diff/')" +for left in test*.left* ; do + right="$(echo "$left" | sed 's/\.left/\.right/')" + diff_opts="$(echo "$left" | sed 's/test[0-9]*\.left\([-a-zA-Z]*\).txt/\1/')" + expected_diff="$(echo "$left" | sed 's/test\([-0-9a-zA-Z]*\)\..*/expect\1.diff/')" got_diff="verify.$expected_diff" - "$diff_prog" "$left" "$right" > "$got_diff" + "$diff_prog" $diff_opts "$left" "$right" > "$got_diff" set -e - verify_diff_script "$left" "$right" "$got_diff" + verify_diff_script "$left" "$right" "$got_diff" "$diff_opts" "$expected_diff" set +e done blob - /dev/null blob + 53b3adf33dac5151d56547908cd6496798c80c3a (mode 644) --- /dev/null +++ test/test013.left-w.txt @@ -0,0 +1,7 @@ +A +B +C +D +E +F +G blob - /dev/null blob + 1509b4b4fdfabbb8725d42c243b684c03fd1c1db (mode 644) --- /dev/null +++ test/test013.right-w.txt @@ -0,0 +1,7 @@ +A + B +C + D +E +F x +y G