commit 85ab45596727cfd0254c6d5b6f0c5705b7b6e89e from: Stefan Sperling date: Tue Sep 22 09:47:05 2020 UTC move some definitions from the public diff_main.h to an internal header file commit - f374e91343146fc0584d53f4b767a3ebfe7bc49e commit + 85ab45596727cfd0254c6d5b6f0c5705b7b6e89e blob - 6c36ecdb31c5271095adb923f9de7efe30eb74a9 blob + 701fe6fdba14ea9bae8e9e6b929831a7bc068dc8 --- include/diff/diff_main.h +++ include/diff/diff_main.h @@ -15,77 +15,18 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifndef MAX -#define MAX(A,B) ((A)>(B)?(A):(B)) -#endif -#ifndef MIN -#define MIN(A,B) ((A)<(B)?(A):(B)) -#endif - struct diff_range { int start; int end; }; -static inline bool -diff_range_empty(const struct diff_range *r) -{ - return r->start == r->end; -} - -static inline bool -diff_ranges_touch(const struct diff_range *a, const struct diff_range *b) -{ - return (a->end >= b->start) && (a->start <= b->end); -} - -static inline void -diff_ranges_merge(struct diff_range *a, const struct diff_range *b) -{ - *a = (struct diff_range){ - .start = MIN(a->start, b->start), - .end = MAX(a->end, b->end), - }; -} - -static inline int -diff_range_len(const struct diff_range *r) -{ - if (!r) - return 0; - return r->end - r->start; -} - /* List of all possible return codes of a diff invocation. */ #define DIFF_RC_USE_DIFF_ALGO_FALLBACK -1 #define DIFF_RC_OK 0 /* Any positive return values are errno values from sys/errno.h */ -struct diff_data; +struct diff_atom; -struct diff_atom { - struct diff_data *d; /* back pointer to associated diff data */ - - off_t pos; /* if not memory-mapped */ - const uint8_t *at; /* if memory-mapped */ - off_t len; - - /* This hash is just a very cheap speed up for finding *mismatching* - * atoms. When hashes match, we still need to compare entire atoms to - * find out whether they are indeed identical or not. */ - unsigned int hash; - - /* State for the Patience Diff algorithm */ - /* TODO: keep a separate array for the patience state */ - struct { - bool unique_here; - bool unique_in_both; - struct diff_atom *pos_in_other; - struct diff_atom *prev_stack; - struct diff_range identical_lines; - } patience; -}; - /* For each file, there is a "root" struct diff_data referencing the entire * file, which the atoms are parsed from. In recursion of diff algorithm, there * may be "child" struct diff_data only referencing a subsection of the file, @@ -107,114 +48,9 @@ struct diff_data { void diff_data_free(struct diff_data *diff_data); -int -diff_atom_cmp(int *cmp, - const struct diff_atom *left, - const struct diff_atom *right); - -/* Indicate whether two given diff atoms match. */ -int -diff_atom_same(bool *same, - const struct diff_atom *left, - const struct diff_atom *right); - -/* The atom's index in the entire file. For atoms divided by lines of text, this - * yields the line number (starting with 0). Also works for diff_data that - * reference only a subsection of a file, always reflecting the global position - * in the file (and not the relative position within the subsection). */ -#define diff_atom_root_idx(DIFF_DATA, ATOM) \ - ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \ - ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \ - : (DIFF_DATA)->root->atoms.len) - -/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this - * yields the line number (starting with 0). */ -#define diff_atom_idx(DIFF_DATA, ATOM) \ - ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \ - ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \ - : (DIFF_DATA)->atoms.len) - -#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \ - for ((ATOM) = (FIRST_ATOM); \ - (ATOM) \ - && ((ATOM) >= (FIRST_ATOM)) \ - && ((ATOM) - (FIRST_ATOM) < (COUNT)); \ - (ATOM)++) - -#define diff_data_foreach_atom(ATOM, DIFF_DATA) \ - foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len) - -#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \ - for ((ATOM) = (FROM); \ - (ATOM) \ - && ((ATOM) >= (DIFF_DATA)->atoms.head) \ - && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \ - (ATOM)++) - -/* A diff chunk represents a set of atoms on the left and/or a set of atoms on - * the right. - * - * If solved == false: - * The diff algorithm has divided the source file, and this is a chunk that the - * inner_algo should run on next. - * The lines on the left should be diffed against the lines on the right. - * (If there are no left lines or no right lines, it implies solved == true, - * because there is nothing to diff.) - * - * If solved == true: - * If there are only left atoms, it is a chunk removing atoms from the left ("a - * minus chunk"). - * If there are only right atoms, it is a chunk adding atoms from the right ("a - * plus chunk"). - * If there are both left and right lines, it is a chunk of equal content on - * both sides, and left_count == right_count: - * - * - foo } - * - bar }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3, - * - baz } right_start = NULL, right_count = 0 } - * moo } - * goo }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3, - * zoo } right_start = &right.atoms.head[0], right_count = 3 } - * +loo } - * +roo }-- diff_chunk{ left_start = NULL, left_count = 0, - * +too } right_start = &right.atoms.head[3], right_count = 3 } - * - */ -struct diff_chunk { - bool solved; - struct diff_atom *left_start; - unsigned int left_count; - struct diff_atom *right_start; - unsigned int right_count; -}; - +struct diff_chunk; typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t; -#define DIFF_RESULT_ALLOC_BLOCKSIZE 128 -enum diff_chunk_type { - CHUNK_EMPTY, - CHUNK_PLUS, - CHUNK_MINUS, - CHUNK_SAME, - CHUNK_ERROR, -}; - -static inline enum diff_chunk_type -diff_chunk_type(const struct diff_chunk *chunk) -{ - if (!chunk->left_count && !chunk->right_count) - return CHUNK_EMPTY; - if (!chunk->solved) - return CHUNK_ERROR; - if (!chunk->right_count) - return CHUNK_MINUS; - if (!chunk->left_count) - return CHUNK_PLUS; - if (chunk->left_count != chunk->right_count) - return CHUNK_ERROR; - return CHUNK_SAME; -} - struct diff_result { int rc; struct diff_data left; @@ -222,22 +58,8 @@ struct diff_result { diff_chunk_arraylist_t chunks; }; -struct diff_state { - /* The final result passed to the original diff caller. */ - struct diff_result *result; +struct diff_state; - /* The root diff_data is in result->left,right, these are (possibly) - * subsections of the root data. */ - struct diff_data left; - struct diff_data right; - - unsigned int recursion_depth_left; - - /* Remaining chunks from one diff algorithm pass, if any solved == false - * chunks came up. */ - diff_chunk_arraylist_t temp_result; -}; - struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved, struct diff_atom *left_start, unsigned int left_count, blob - e0bebd8db4f63c98c197cd5b1b64ddd23aeee754 blob + 882f94d7166ec134f12b1d70b4a076a0f17877a7 --- lib/diff_atomize_text.c +++ lib/diff_atomize_text.c @@ -25,6 +25,7 @@ #include #include #include "diff_debug.h" +#include "diff_internal.h" static int diff_data_atomize_text_lines_fd(struct diff_data *d) blob - e47921b35f615024740d645645eb9ebc74225ee1 blob + c31b9e6e33142ba75f981720737dd4b978c52a07 --- lib/diff_main.c +++ lib/diff_main.c @@ -33,6 +33,7 @@ #include #include "diff_debug.h" +#include "diff_internal.h" static int read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len) blob - /dev/null blob + eda757a32c6d6cb95825ee2b4489c06e6bcc620f (mode 644) --- /dev/null +++ lib/diff_internal.h @@ -0,0 +1,211 @@ +/* Generic infrastructure to implement various diff algorithms. */ +/* + * Copyright (c) 2020 Neels Hofmeyr + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef MAX +#define MAX(A,B) ((A)>(B)?(A):(B)) +#endif +#ifndef MIN +#define MIN(A,B) ((A)<(B)?(A):(B)) +#endif + +static inline bool +diff_range_empty(const struct diff_range *r) +{ + return r->start == r->end; +} + +static inline bool +diff_ranges_touch(const struct diff_range *a, const struct diff_range *b) +{ + return (a->end >= b->start) && (a->start <= b->end); +} + +static inline void +diff_ranges_merge(struct diff_range *a, const struct diff_range *b) +{ + *a = (struct diff_range){ + .start = MIN(a->start, b->start), + .end = MAX(a->end, b->end), + }; +} + +static inline int +diff_range_len(const struct diff_range *r) +{ + if (!r) + return 0; + return r->end - r->start; +} + +/* List of all possible return codes of a diff invocation. */ +#define DIFF_RC_USE_DIFF_ALGO_FALLBACK -1 +#define DIFF_RC_OK 0 +/* Any positive return values are errno values from sys/errno.h */ + +struct diff_data; + +struct diff_atom { + struct diff_data *d; /* back pointer to associated diff data */ + + off_t pos; /* if not memory-mapped */ + const uint8_t *at; /* if memory-mapped */ + off_t len; + + /* This hash is just a very cheap speed up for finding *mismatching* + * atoms. When hashes match, we still need to compare entire atoms to + * find out whether they are indeed identical or not. */ + unsigned int hash; + + /* State for the Patience Diff algorithm */ + /* TODO: keep a separate array for the patience state */ + struct { + bool unique_here; + bool unique_in_both; + struct diff_atom *pos_in_other; + struct diff_atom *prev_stack; + struct diff_range identical_lines; + } patience; +}; + +int +diff_atom_cmp(int *cmp, + const struct diff_atom *left, + const struct diff_atom *right); + +/* Indicate whether two given diff atoms match. */ +int +diff_atom_same(bool *same, + const struct diff_atom *left, + const struct diff_atom *right); + +/* The atom's index in the entire file. For atoms divided by lines of text, this + * yields the line number (starting with 0). Also works for diff_data that + * reference only a subsection of a file, always reflecting the global position + * in the file (and not the relative position within the subsection). */ +#define diff_atom_root_idx(DIFF_DATA, ATOM) \ + ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \ + ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \ + : (DIFF_DATA)->root->atoms.len) + +/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this + * yields the line number (starting with 0). */ +#define diff_atom_idx(DIFF_DATA, ATOM) \ + ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \ + ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \ + : (DIFF_DATA)->atoms.len) + +#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \ + for ((ATOM) = (FIRST_ATOM); \ + (ATOM) \ + && ((ATOM) >= (FIRST_ATOM)) \ + && ((ATOM) - (FIRST_ATOM) < (COUNT)); \ + (ATOM)++) + +#define diff_data_foreach_atom(ATOM, DIFF_DATA) \ + foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len) + +#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \ + for ((ATOM) = (FROM); \ + (ATOM) \ + && ((ATOM) >= (DIFF_DATA)->atoms.head) \ + && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \ + (ATOM)++) + +/* A diff chunk represents a set of atoms on the left and/or a set of atoms on + * the right. + * + * If solved == false: + * The diff algorithm has divided the source file, and this is a chunk that the + * inner_algo should run on next. + * The lines on the left should be diffed against the lines on the right. + * (If there are no left lines or no right lines, it implies solved == true, + * because there is nothing to diff.) + * + * If solved == true: + * If there are only left atoms, it is a chunk removing atoms from the left ("a + * minus chunk"). + * If there are only right atoms, it is a chunk adding atoms from the right ("a + * plus chunk"). + * If there are both left and right lines, it is a chunk of equal content on + * both sides, and left_count == right_count: + * + * - foo } + * - bar }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3, + * - baz } right_start = NULL, right_count = 0 } + * moo } + * goo }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3, + * zoo } right_start = &right.atoms.head[0], right_count = 3 } + * +loo } + * +roo }-- diff_chunk{ left_start = NULL, left_count = 0, + * +too } right_start = &right.atoms.head[3], right_count = 3 } + * + */ +struct diff_chunk { + bool solved; + struct diff_atom *left_start; + unsigned int left_count; + struct diff_atom *right_start; + unsigned int right_count; +}; + +#define DIFF_RESULT_ALLOC_BLOCKSIZE 128 + +enum diff_chunk_type { + CHUNK_EMPTY, + CHUNK_PLUS, + CHUNK_MINUS, + CHUNK_SAME, + CHUNK_ERROR, +}; + +static inline enum diff_chunk_type +diff_chunk_type(const struct diff_chunk *chunk) +{ + if (!chunk->left_count && !chunk->right_count) + return CHUNK_EMPTY; + if (!chunk->solved) + return CHUNK_ERROR; + if (!chunk->right_count) + return CHUNK_MINUS; + if (!chunk->left_count) + return CHUNK_PLUS; + if (chunk->left_count != chunk->right_count) + return CHUNK_ERROR; + return CHUNK_SAME; +} + +struct diff_state { + /* The final result passed to the original diff caller. */ + struct diff_result *result; + + /* The root diff_data is in result->left,right, these are (possibly) + * subsections of the root data. */ + struct diff_data left; + struct diff_data right; + + unsigned int recursion_depth_left; + + /* Remaining chunks from one diff algorithm pass, if any solved == false + * chunks came up. */ + diff_chunk_arraylist_t temp_result; +}; + +struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved, + struct diff_atom *left_start, + unsigned int left_count, + struct diff_atom *right_start, + unsigned int right_count); blob - 745a119ea407c711f457c96474762ca5f849ad2b blob + c10e5e5475cce9bedea908a75e628123a864d339 --- lib/diff_myers.c +++ lib/diff_myers.c @@ -28,6 +28,7 @@ #include #include "diff_debug.h" +#include "diff_internal.h" /* Myers' diff algorithm [1] is nicely explained in [2]. * [1] http://www.xmailserver.org/diff2.pdf blob - e9dd89fa1226d66603de9f1e80fd72bca21fefcb blob + 03a4042d7c614b8d5565ce69f13d39b81da33884 --- lib/diff_output.c +++ lib/diff_output.c @@ -26,6 +26,8 @@ #include #include +#include "diff_internal.h" + static int get_atom_byte(int *ch, struct diff_atom *atom, off_t off) { blob - 38318db6a210f599103e1fc40e5d5f989ea6dc0b blob + b79b53f3bb025ac548d288c82aa34658347b35a0 --- lib/diff_output_plain.c +++ lib/diff_output_plain.c @@ -25,6 +25,7 @@ #include #include +#include "diff_internal.h" int diff_output_plain(FILE *dest, const struct diff_input_info *info, blob - 63d712dc89458bd762fad94e1d01c418d4887d19 blob + 189d9d7583c852336b9e70c6a20d6969b4f6726c --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -26,6 +26,7 @@ #include #include "diff_debug.h" +#include "diff_internal.h" static bool chunk_context_empty(const struct diff_chunk_context *cc) blob - ad4072149dfdef088e99354b9c276ddd4fcd24dc blob + 80af37f56e32b7c6d1fbf33b2d1dc07db12346c3 --- lib/diff_patience.c +++ lib/diff_patience.c @@ -27,6 +27,7 @@ #include #include "diff_debug.h" +#include "diff_internal.h" /* Set unique_here = true for all atoms that exist exactly once in this list. */ static int