Commit Diff


commit - c8846eb959c22c43dd5f8d9193b7e0c83c31b8af
commit + 6c8c5d3f0b6b2ba6c23acd67179fd37e8f2af66b
blob - 9dc641cf345f7bda61a2e56012332d68e357bddb
blob + f49a6893b6847f02d5127764222375f1184d051d
--- include/diff_main.h
+++ include/diff_main.h
@@ -42,6 +42,8 @@ struct diff_data {
 
 	ARRAYLIST(struct diff_atom) atoms;
 	struct diff_data *root;
+	struct diff_data *current;
+	void *algo_data;
 
 	int diff_flags;
 
blob - e9088e91a37fcda61b2cf9cd6487f3abe30adee4
blob + 5c8fd1fc7ae45e166e7af7a09a3e9b4cfa0f47b9
--- lib/diff_internal.h
+++ lib/diff_internal.h
@@ -69,15 +69,6 @@ struct diff_atom {
 	 * atoms. When hashes match, we still need to compare entire atoms to
 	 * find out whether they are indeed identical or not. */
 	unsigned int hash;
-
-	/* State for the Patience Diff algorithm */
-	/* TODO: keep a separate array for the patience state */
-	struct {
-		bool unique_in_both;
-		struct diff_atom *pos_in_other;
-		struct diff_atom *prev_stack;
-		struct diff_range identical_lines;
-	} patience;
 };
 
 int
blob - e7f728f1729206b8262ad23227da9f0d7d0dff66
blob + 90e4bb8ef6330a66994c94fc1ecbd104f42a3846
--- lib/diff_patience.c
+++ lib/diff_patience.c
@@ -29,6 +29,23 @@
 #include "diff_internal.h"
 #include "diff_debug.h"
 
+/* Per-atom state for the Patience Diff algorithm */
+struct atom_patience {
+	bool unique_in_both;
+	struct diff_atom *pos_in_other;
+	struct diff_atom *prev_stack;
+	struct diff_range identical_lines;
+};
+
+/* A diff_atom has a backpointer to the root diff_data. That points to the
+ * current diff_data, a possibly smaller section of the root. That current
+ * diff_data->algo_data is a pointer to an array of struct atom_patience. The
+ * atom's index in current diff_data gives the index in the atom_patience array.
+ */
+#define PATIENCE(ATOM) \
+	(((struct atom_patience*)((ATOM)->root->current->algo_data))\
+	 [diff_atom_idx((ATOM)->root->current, ATOM)])
+
 int diff_atoms_qsort_compar(const void *_a, const void *_b)
 {
 	const struct diff_atom *a = *(struct diff_atom**)_a;
@@ -89,6 +106,8 @@ diff_atoms_mark_unique_in_both_by_qsort(struct diff_da
 	unsigned int i;
 	unsigned int unique_in_both_count = 0;
 	int rc;
+	left->err = 0;
+	right->err = 0;
 	left->root->err = 0;
 	right->root->err = 0;
 	diff_data_foreach_atom(a, left) {
@@ -132,10 +151,10 @@ diff_atoms_mark_unique_in_both_by_qsort(struct diff_da
 		if ((count_first_side == 1)
 		    && (count_other_side == 1)) {
 			b = all_atoms[i+1];
-			a->patience.unique_in_both = true;
-			a->patience.pos_in_other = b;
-			b->patience.unique_in_both = true;
-			b->patience.pos_in_other = a;
+			PATIENCE(a).unique_in_both = true;
+			PATIENCE(a).pos_in_other = b;
+			PATIENCE(b).unique_in_both = true;
+			PATIENCE(b).pos_in_other = a;
 			unique_in_both_count++;
 		}
 	}
@@ -162,14 +181,13 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat
 		next_l_idx = l_idx + 1;
 		struct diff_atom *l = &left->atoms.head[l_idx];
 
-		if (!l->patience.unique_in_both)
+		if (!PATIENCE(l).unique_in_both)
 			continue;
 
 		debug("check identical lines around ");
 		debug_dump_atom(left, right, l);
 
-		unsigned int r_idx = diff_atom_idx(right,
-						   l->patience.pos_in_other);
+		unsigned int r_idx = diff_atom_idx(right, PATIENCE(l).pos_in_other);
 
 		struct diff_range identical_l;
 		struct diff_range identical_r;
@@ -212,23 +230,23 @@ diff_atoms_swallow_identical_neighbors(struct diff_dat
 				break;
 			l_end = &left->atoms.head[identical_l.end];
 			r_end = &right->atoms.head[identical_r.end];
-			if (!l_end->patience.unique_in_both)
+			if (!PATIENCE(l_end).unique_in_both)
 				continue;
 			/* Part of a chunk of identical lines, remove from
 			 * listing of unique_in_both lines */
-			l_end->patience.unique_in_both = false;
-			r_end->patience.unique_in_both = false;
+			PATIENCE(l_end).unique_in_both = false;
+			PATIENCE(r_end).unique_in_both = false;
 			(*unique_in_both_count)--;
 		}
 
-		l->patience.identical_lines = identical_l;
-		l->patience.pos_in_other->patience.identical_lines =
+		PATIENCE(l).identical_lines = identical_l;
+		PATIENCE(PATIENCE(l).pos_in_other).identical_lines =
 			identical_r;
 
 		l_min = identical_l.end;
 		r_min = identical_r.end;
 
-		if (!diff_range_empty(&l->patience.identical_lines)) {
+		if (!diff_range_empty(&PATIENCE(l).identical_lines)) {
 			debug("common-unique line at l=%u r=%u swallowed"
 			      " identical lines l=%u-%u r=%u-%u\n",
 			      l_idx, r_idx,
@@ -251,8 +269,8 @@ find_target_stack(struct diff_atom *atom,
 	while (lo < hi) {
 		unsigned int mid = (lo + hi) >> 1;
 
-		if (patience_stacks[mid]->patience.pos_in_other
-		    < atom->patience.pos_in_other)
+		if (PATIENCE(patience_stacks[mid]).pos_in_other
+		    < PATIENCE(atom).pos_in_other)
 			lo = mid + 1;
 		else
 			hi = mid;
@@ -272,20 +290,28 @@ diff_algo_patience(const struct diff_algo_config *algo
 		   struct diff_state *state)
 {
 	int rc;
-
 	struct diff_data *left = &state->left;
 	struct diff_data *right = &state->right;
-
+	struct atom_patience *atom_patience_left =
+		calloc(left->atoms.len, sizeof(struct atom_patience));
+	struct atom_patience *atom_patience_right =
+		calloc(right->atoms.len, sizeof(struct atom_patience));
 	unsigned int unique_in_both_count;
+	struct diff_atom **lcs = NULL;
 
 	debug("\n** %s\n", __func__);
 
+	left->root->current = left;
+	right->root->current = right;
+	left->algo_data = atom_patience_left;
+	right->algo_data = atom_patience_right;
+
 	/* Find those lines that appear exactly once in 'left' and exactly once
 	 * in 'right'. */
 	rc = diff_atoms_mark_unique_in_both_by_qsort(left, right,
 						     &unique_in_both_count);
 	if (rc)
-		return rc;
+		goto free_and_exit;
 
 	debug("unique_in_both_count %u\n", unique_in_both_count);
 	debug("left:\n");
@@ -296,13 +322,14 @@ diff_algo_patience(const struct diff_algo_config *algo
 	if (!unique_in_both_count) {
 		/* Cannot apply Patience, tell the caller to use fallback_algo
 		 * instead. */
-		return DIFF_RC_USE_DIFF_ALGO_FALLBACK;
+		rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK;
+		goto free_and_exit;
 	}
 
 	rc = diff_atoms_swallow_identical_neighbors(left, right,
 						    &unique_in_both_count);
 	if (rc)
-		return rc;
+		goto free_and_exit;
 	debug("After swallowing identical neighbors: unique_in_both = %u\n",
 	      unique_in_both_count);
 
@@ -311,7 +338,6 @@ diff_algo_patience(const struct diff_algo_config *algo
 	/* An array of Longest Common Sequence is the result of the below
 	 * subscope: */
 	unsigned int lcs_count = 0;
-	struct diff_atom **lcs = NULL;
 	struct diff_atom *lcs_tail = NULL;
 
 	{
@@ -338,7 +364,7 @@ diff_algo_patience(const struct diff_algo_config *algo
 		struct diff_atom *atom;
 		struct diff_atom **uniques_end = uniques;
 		diff_data_foreach_atom(atom, left) {
-			if (!atom->patience.unique_in_both)
+			if (!PATIENCE(atom).unique_in_both)
 				continue;
 			*uniques_end = atom;
 			uniques_end++;
@@ -351,7 +377,7 @@ diff_algo_patience(const struct diff_algo_config *algo
 		 * line number is derived from the atom*, which are array items
 		 * and hence reflect the relative position in the source file.
 		 * So we got the common-uniques from 'left' and sort them
-		 * according to atom->patience.pos_in_other. */
+		 * according to PATIENCE(atom).pos_in_other. */
 		unsigned int i;
 		for (i = 0; i < unique_in_both_count; i++) {
 			atom = uniques[i];
@@ -366,7 +392,7 @@ diff_algo_patience(const struct diff_algo_config *algo
 			/* Record a back reference to the next stack on the
 			 * left, which will form the final longest sequence
 			 * later. */
-			atom->patience.prev_stack = target_stack ?
+			PATIENCE(atom).prev_stack = target_stack ?
 				patience_stacks[target_stack - 1] : NULL;
 
 		}
@@ -377,14 +403,14 @@ diff_algo_patience(const struct diff_algo_config *algo
 		lcs_count = patience_stacks_count;
 
 		/* uniques and patience_stacks are no longer needed.
-		 * Backpointers are in atom->patience.prev_stack */
+		 * Backpointers are in PATIENCE(atom).prev_stack */
 		free(atom_pointers);
 	}
 
 	lcs = recallocarray(NULL, 0, lcs_count, sizeof(struct diff_atom*));
 	struct diff_atom **lcs_backtrace_pos = &lcs[lcs_count - 1];
 	struct diff_atom *atom;
-	for (atom = lcs_tail; atom; atom = atom->patience.prev_stack, lcs_backtrace_pos--) {
+	for (atom = lcs_tail; atom; atom = PATIENCE(atom).prev_stack, lcs_backtrace_pos--) {
 		assert(lcs_backtrace_pos >= lcs);
 		*lcs_backtrace_pos = atom;
 	}
@@ -419,14 +445,14 @@ diff_algo_patience(const struct diff_algo_config *algo
 
 		if (i < lcs_count) {
 			atom = lcs[i];
-			atom_r = atom->patience.pos_in_other;
+			atom_r = PATIENCE(atom).pos_in_other;
 			debug("lcs[%u] = left[%u] = right[%u]\n", i,
 			      diff_atom_idx(left, atom), diff_atom_idx(right, atom_r));
-			left_idx = atom->patience.identical_lines.start;
-			right_idx = atom_r->patience.identical_lines.start;
+			left_idx = PATIENCE(atom).identical_lines.start;
+			right_idx = PATIENCE(atom_r).identical_lines.start;
 			debug(" identical lines l %u-%u  r %u-%u\n",
-			      atom->patience.identical_lines.start, atom->patience.identical_lines.end,
-			      atom_r->patience.identical_lines.start, atom_r->patience.identical_lines.end);
+			      PATIENCE(atom).identical_lines.start, PATIENCE(atom).identical_lines.end,
+			      PATIENCE(atom_r).identical_lines.start, PATIENCE(atom_r).identical_lines.end);
 		} else {
 			atom = NULL;
 			atom_r = NULL;
@@ -466,21 +492,21 @@ diff_algo_patience(const struct diff_algo_config *algo
 						  left_atom, left_section_len,
 						  right_atom,
 						  right_section_len))
-				goto return_rc;
+				goto free_and_exit;
 		} else if (left_section_len && !right_section_len) {
 			/* Only left atoms and none on the right, they form a
 			 * "minus" chunk, then. */
 			if (!diff_state_add_chunk(state, true,
 						  left_atom, left_section_len,
 						  right_atom, 0))
-				goto return_rc;
+				goto free_and_exit;
 		} else if (!left_section_len && right_section_len) {
 			/* No left atoms, only atoms on the right, they form a
 			 * "plus" chunk, then. */
 			if (!diff_state_add_chunk(state, true,
 						  left_atom, 0,
 						  right_atom, right_section_len))
-				goto return_rc;
+				goto free_and_exit;
 		}
 		/* else: left_section_len == 0 and right_section_len == 0, i.e.
 		 * nothing here. */
@@ -493,15 +519,15 @@ diff_algo_patience(const struct diff_algo_config *algo
 			void *ok;
 			ok = diff_state_add_chunk(state, true,
 				left->atoms.head
-				    + atom->patience.identical_lines.start,
-				diff_range_len(&atom->patience.identical_lines),
+				    + PATIENCE(atom).identical_lines.start,
+				diff_range_len(&PATIENCE(atom).identical_lines),
 				right->atoms.head
-				    + atom_r->patience.identical_lines.start,
-				diff_range_len(&atom_r->patience.identical_lines));
+				    + PATIENCE(atom_r).identical_lines.start,
+				diff_range_len(&PATIENCE(atom_r).identical_lines));
 			if (!ok)
-				goto return_rc;
-			left_pos = atom->patience.identical_lines.end;
-			right_pos = atom_r->patience.identical_lines.end;
+				goto free_and_exit;
+			left_pos = PATIENCE(atom).identical_lines.end;
+			right_pos = PATIENCE(atom_r).identical_lines.end;
 		} else {
 			left_pos = left_idx + 1;
 			right_pos = right_idx + 1;
@@ -514,7 +540,12 @@ diff_algo_patience(const struct diff_algo_config *algo
 
 	rc = DIFF_RC_OK;
 
-return_rc:
-	free(lcs);
+free_and_exit:
+	left->root->current = NULL;
+	right->root->current = NULL;
+	free(atom_patience_left);
+	free(atom_patience_right);
+	if (lcs)
+		free(lcs);
 	return rc;
 }