commit 29916bb6c0c248ca6fa5486cb9e081d92112e86c from: Stefan Sperling date: Wed Nov 18 14:14:06 2020 UTC expose struct diff_atom in public API for external atomizer implementations commit - fe6d58fb52ea8d1041a8eb65e28a04816df67c08 commit + 29916bb6c0c248ca6fa5486cb9e081d92112e86c blob - 40142164742fb5bb3d4316a5bca80a90d132836c blob + bd94a9109c7f29432f6af3217b4a5775f3f0493c --- include/diff_main.h +++ include/diff_main.h @@ -25,8 +25,73 @@ struct diff_range { #define DIFF_RC_OK 0 /* Any positive return values are errno values from sys/errno.h */ -struct diff_atom; +struct diff_atom { + struct diff_data *root; /* back pointer to root diff data */ + off_t pos; /* if not memory-mapped */ + const uint8_t *at; /* if memory-mapped */ + off_t len; + + /* This hash is just a very cheap speed up for finding *mismatching* + * atoms. When hashes match, we still need to compare entire atoms to + * find out whether they are indeed identical or not. + * Calculated over all atom bytes with diff_atom_hash_update(). */ + unsigned int hash; +}; + +/* Mix another atom_byte into the provided hash value and return the result. + * The hash value passed in for the first byte of the atom must be zero. */ +unsigned int +diff_atom_hash_update(unsigned int hash, unsigned char atom_byte); + +/* Compare two atoms for equality. Return 0 on success, or errno on failure. + * Set cmp to -1, 0, or 1, just like strcmp(). */ +int +diff_atom_cmp(int *cmp, + const struct diff_atom *left, + const struct diff_atom *right); + + +/* The atom's index in the entire file. For atoms divided by lines of text, this + * yields the line number (starting with 0). Also works for diff_data that + * reference only a subsection of a file, always reflecting the global position + * in the file (and not the relative position within the subsection). */ +#define diff_atom_root_idx(DIFF_DATA, ATOM) \ + ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \ + ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \ + : (DIFF_DATA)->root->atoms.len) + +/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this + * yields the line number (starting with 0). */ +#define diff_atom_idx(DIFF_DATA, ATOM) \ + ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \ + ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \ + : (DIFF_DATA)->atoms.len) + +#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \ + for ((ATOM) = (FIRST_ATOM); \ + (ATOM) \ + && ((ATOM) >= (FIRST_ATOM)) \ + && ((ATOM) - (FIRST_ATOM) < (COUNT)); \ + (ATOM)++) + +#define diff_data_foreach_atom(ATOM, DIFF_DATA) \ + foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len) + +#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \ + for ((ATOM) = (FROM); \ + (ATOM) \ + && ((ATOM) >= (DIFF_DATA)->atoms.head) \ + && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \ + (ATOM)++) + +#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \ + for ((ATOM) = (FROM); \ + (ATOM) \ + && ((ATOM) >= (DIFF_DATA)->atoms.head) \ + && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \ + (ATOM)--) + /* For each file, there is a "root" struct diff_data referencing the entire * file, which the atoms are parsed from. In recursion of diff algorithm, there * may be "child" struct diff_data only referencing a subsection of the file, blob - 1bdb99777475de80a76425f1e2c3243dbbe9d9a6 blob + 1da34c64d7271fbd198a3a0cfc8d0414641d22d8 --- lib/diff_atomize_text.c +++ lib/diff_atomize_text.c @@ -29,6 +29,12 @@ #include "diff_internal.h" #include "diff_debug.h" +unsigned int +diff_atom_hash_update(unsigned int hash, unsigned char atom_byte) +{ + return hash * 23 + atom_byte; +} + static int diff_data_atomize_text_lines_fd(struct diff_data *d) { @@ -63,7 +69,8 @@ diff_data_atomize_text_lines_fd(struct diff_data *d) if (buf[i] != '\r' && buf[i] != '\n') { if (!ignore_whitespace || !isspace(buf[i])) - hash = hash * 23 + buf[i]; + hash = diff_atom_hash_update( + hash, buf[i]); line_end++; } else eol = buf[i]; blob - 94ef28c472ae1b07dee34bf8618414ded3037d74 blob + 699cdbdee8d7c7fa45ac1a2cf93547d0a2c9fdc8 --- lib/diff_internal.h +++ lib/diff_internal.h @@ -56,72 +56,12 @@ diff_range_len(const struct diff_range *r) #define DIFF_RC_OK 0 /* Any positive return values are errno values from sys/errno.h */ -struct diff_data; - -struct diff_atom { - struct diff_data *root; /* back pointer to root diff data */ - - off_t pos; /* if not memory-mapped */ - const uint8_t *at; /* if memory-mapped */ - off_t len; - - /* This hash is just a very cheap speed up for finding *mismatching* - * atoms. When hashes match, we still need to compare entire atoms to - * find out whether they are indeed identical or not. */ - unsigned int hash; -}; - -int -diff_atom_cmp(int *cmp, - const struct diff_atom *left, - const struct diff_atom *right); - /* Indicate whether two given diff atoms match. */ int diff_atom_same(bool *same, const struct diff_atom *left, const struct diff_atom *right); -/* The atom's index in the entire file. For atoms divided by lines of text, this - * yields the line number (starting with 0). Also works for diff_data that - * reference only a subsection of a file, always reflecting the global position - * in the file (and not the relative position within the subsection). */ -#define diff_atom_root_idx(DIFF_DATA, ATOM) \ - ((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \ - ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \ - : (DIFF_DATA)->root->atoms.len) - -/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this - * yields the line number (starting with 0). */ -#define diff_atom_idx(DIFF_DATA, ATOM) \ - ((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \ - ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \ - : (DIFF_DATA)->atoms.len) - -#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \ - for ((ATOM) = (FIRST_ATOM); \ - (ATOM) \ - && ((ATOM) >= (FIRST_ATOM)) \ - && ((ATOM) - (FIRST_ATOM) < (COUNT)); \ - (ATOM)++) - -#define diff_data_foreach_atom(ATOM, DIFF_DATA) \ - foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len) - -#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \ - for ((ATOM) = (FROM); \ - (ATOM) \ - && ((ATOM) >= (DIFF_DATA)->atoms.head) \ - && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \ - (ATOM)++) - -#define diff_data_foreach_atom_backwards_from(FROM, ATOM, DIFF_DATA) \ - for ((ATOM) = (FROM); \ - (ATOM) \ - && ((ATOM) >= (DIFF_DATA)->atoms.head) \ - && ((ATOM) - (DIFF_DATA)->atoms.head >= 0); \ - (ATOM)--) - /* A diff chunk represents a set of atoms on the left and/or a set of atoms on * the right. *