commit 8546b0450f31c11e9433f5d5c6dc1d79c86107ed from: Neels Hofmeyr date: Sun Sep 20 14:14:11 2020 UTC diff result: ensure sane order of result chunks Ensure that a adjacent chunks of same type are combined, and that a minus block always precedes an adjacent plus block. The upcoming myers-divide optimization is prone to produce weird ordering of plus and minus chunks. commit - 2c8a57df59d1b63727c21ec367fdd2a091392d0c commit + 8546b0450f31c11e9433f5d5c6dc1d79c86107ed blob - c70915a7fff65bfdfe065d3836d6114ffa8a3421 blob + 6397e0c1d52b5e01b3f3e1df122ace240efe2bf2 --- include/diff/diff_main.h +++ include/diff/diff_main.h @@ -188,6 +188,30 @@ struct diff_chunk { typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t; #define DIFF_RESULT_ALLOC_BLOCKSIZE 128 + +enum diff_chunk_type { + CHUNK_EMPTY, + CHUNK_PLUS, + CHUNK_MINUS, + CHUNK_SAME, + CHUNK_ERROR, +}; + +static inline enum diff_chunk_type +diff_chunk_type(const struct diff_chunk *chunk) +{ + if (!chunk->left_count && !chunk->right_count) + return CHUNK_EMPTY; + if (!chunk->solved) + return CHUNK_ERROR; + if (!chunk->right_count) + return CHUNK_MINUS; + if (!chunk->left_count) + return CHUNK_PLUS; + if (chunk->left_count != chunk->right_count) + return CHUNK_ERROR; + return CHUNK_SAME; +} struct diff_result { int rc; blob - 67e31955bdbcf8b9120e49f0711927a1c52ebcfe blob + 84c41f39c99ce5bb1a20384b96c9cdc772b89ed6 --- lib/diff_main.c +++ lib/diff_main.c @@ -170,31 +170,75 @@ diff_state_add_chunk(struct diff_state *state, bool so struct diff_atom *left_start, unsigned int left_count, struct diff_atom *right_start, unsigned int right_count) { - struct diff_chunk *chunk; - diff_chunk_arraylist_t *result; - - if (solved && !state->temp_result.len) - result = &state->result->chunks; - else - result = &state->temp_result; - - ARRAYLIST_ADD(chunk, *result); - if (!chunk) - return NULL; - *chunk = (struct diff_chunk){ + diff_chunk_arraylist_t *result = NULL; + struct diff_chunk *new_chunk; + struct diff_chunk chunk = { .solved = solved, .left_start = left_start, .left_count = left_count, .right_start = right_start, .right_count = right_count, }; + enum diff_chunk_type last_t; + enum diff_chunk_type prev_last_t; + enum diff_chunk_type new_t; - debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED"); - debug("L\n"); - debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count); + if (!solved || state->temp_result.len) { + /* Append to temp_result */ + result = &state->temp_result; + } else if (!state->result->chunks.len) { + /* Append to final result */ + result = &state->result->chunks; + } + if (result) { + ARRAYLIST_ADD(new_chunk, *result); + if (!new_chunk) + return NULL; + *new_chunk = chunk; + goto chunk_added; + } + + /* Append to solved chunks; make sure that adjacent chunks of same type are combined, and that a minus chunk + * never directly follows a plus chunk. */ + result = &state->result->chunks; + + prev_last_t = result->len > 1 ? + diff_chunk_type(&result->head[result->len - 2]) : CHUNK_EMPTY; + last_t = diff_chunk_type(&result->head[result->len - 1]); + new_t = diff_chunk_type(&chunk); + + if (new_t == last_t) { + new_chunk = &result->head[result->len - 1]; + new_chunk->left_count += chunk.left_count; + new_chunk->right_count += chunk.right_count; + } else if (last_t == CHUNK_PLUS && new_t == CHUNK_MINUS) { + /* If a minus-chunk follows a plus-chunk, place it above the plus-chunk. + * Is the one before that also a minus? combine. */ + if (prev_last_t == CHUNK_MINUS) { + new_chunk = &result->head[result->len - 2]; + new_chunk->left_count += chunk.left_count; + new_chunk->right_count += chunk.right_count; + } else { + ARRAYLIST_INSERT(new_chunk, *result, result->len - 2); + if (!new_chunk) + return NULL; + *new_chunk = chunk; + } + } else { + ARRAYLIST_ADD(new_chunk, *result); + if (!new_chunk) + return NULL; + *new_chunk = chunk; + goto chunk_added; + } + +chunk_added: + debug("Add %s chunk:\n", new_chunk->solved ? "solved" : "UNSOLVED"); + debug("L\n"); + debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count); debug("R\n"); - debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count); - return chunk; + debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count); + return new_chunk; } void blob - 192458c53860d0a67b9316e2f7eeec6e09372cc1 blob + 32d0bb8ed077cff937e57b8c5051638aeff4fd82 --- lib/diff_output_unidiff.c +++ lib/diff_output_unidiff.c @@ -27,30 +27,6 @@ #include "debug.h" -enum chunk_type { - CHUNK_EMPTY, - CHUNK_PLUS, - CHUNK_MINUS, - CHUNK_SAME, - CHUNK_WEIRD, -}; - -static inline enum chunk_type -chunk_type(const struct diff_chunk *chunk) -{ - if (!chunk->left_count && !chunk->right_count) - return CHUNK_EMPTY; - if (!chunk->solved) - return CHUNK_WEIRD; - if (!chunk->right_count) - return CHUNK_MINUS; - if (!chunk->left_count) - return CHUNK_PLUS; - if (chunk->left_count != chunk->right_count) - return CHUNK_WEIRD; - return CHUNK_SAME; -} - struct chunk_context { struct diff_range chunk; struct diff_range left, right; @@ -195,7 +171,7 @@ diff_output_unidiff(FILE *dest, const struct diff_inpu int i; for (i = 0; i < result->chunks.len; i++) { struct diff_chunk *c = &result->chunks.head[i]; - enum chunk_type t = chunk_type(c); + enum diff_chunk_type t = diff_chunk_type(c); struct chunk_context next; if (t != CHUNK_MINUS && t != CHUNK_PLUS)