commit 3b0f3d6191103b52a0619ed00752f7f5e6fa754c
from: Neels Hofmeyr <neels@hofmeyr.de>
date: Wed Jan 22 03:02:58 2020 UTC

initial commit

commit - /dev/null
commit + 3b0f3d6191103b52a0619ed00752f7f5e6fa754c
blob - /dev/null
blob + 4ce1b320e06779fbf641c890bdb8a0d6dcea3ed9 (mode 644)
--- /dev/null
+++ .gitignore
@@ -0,0 +1,8 @@
+.*.sw?
+diff/diff
+*.o
+*.d
+*.a
+tags
+test/got*.diff
+test/verify.*
blob - /dev/null
blob + d908088b90fc58865c6f9352047c7d52f37c3a48 (mode 644)
--- /dev/null
+++ LICENCE
@@ -0,0 +1,13 @@
+ Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+
+ Permission to use, copy, modify, and distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
blob - /dev/null
blob + 2ff357ceee0eed252b191258de3fea36fa4ae92b (mode 644)
--- /dev/null
+++ README
@@ -0,0 +1,29 @@
+This is a collection of diff algorithms, to test various combinations.
+
+The initial aim was to provide a faster diff implementation for got
+(gameoftrees.org) with a BSD license, at the u2k20 OpenBSD hackathon.
+A side effect could be improving OpenBSD's /usr/bin/diff utility.
+
+At the time of writing, this is little more than a playground / benchmark basis
+/ diff algorithm analysis platform. What could be done:
+- add profiling and test series to rate diff algorithm combinations.
+- interface with / merge into got.
+
+The Myers and Patience Diff algorithm implementations found here are based on
+the explanations found in these blog post series:
+  https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/ ff.
+and
+  https://blog.jcoglan.com/2017/09/19/the-patience-diff-algorithm/ ff.
+-- possibly the single most comprehensive explanations of these algorithms.
+Many thanks for this valuable door opener!
+The source code itself is not based on the code found in those blogs, but
+written from scratch with the knowledge gained.
+
+Compile:
+  OpenBSD:
+    make -C diff
+  Linux:
+    make -f linux_Makefile -C diff
+
+Test:
+  make -C test/
blob - /dev/null
blob + b93701332fb1013c6f09d620e138075dc268925c (mode 644)
--- /dev/null
+++ diff/Makefile
@@ -0,0 +1,38 @@
+.PATH:${.CURDIR}/../lib
+
+.include "../diff-version.mk"
+
+PROG=	diff
+SRCS= \
+	diff.c \
+	diff_atomize_text.c \
+	diff_main.c \
+	diff_myers.c \
+	diff_patience.c \
+	diff_output.c \
+	diff_output_plain.c \
+	diff_output_unidiff.c \
+	${END}
+MAN =	${PROG}.1
+
+CPPFLAGS = -I${.CURDIR}/../include -I${.CURDIR}/../lib
+
+.if defined(PROFILE)
+LDADD = -lutil_p -lz_p -lc_p
+.else
+LDADD = -lutil -lz
+.endif
+
+.if ${DIFF_RELEASE} != "Yes"
+NOMAN = Yes
+.endif
+
+realinstall:
+	${INSTALL} ${INSTALL_COPY} -o ${BINOWN} -g ${BINGRP} \
+	-m ${BINMODE} ${PROG} ${BINDIR}/${PROG}
+
+dist:
+	mkdir ../diff-${DIFF_VERSION}/diff
+	cp ${SRCS} ${MAN} ../diff-${DIFF_VERSION}/diff
+
+.include <bsd.prog.mk>
blob - /dev/null
blob + be68fea20a6bbf1f0e109ccd5ea2042919055f31 (mode 644)
--- /dev/null
+++ diff/diff.c
@@ -0,0 +1,143 @@
+/* Commandline diff utility to test diff implementations. */
+/*
+ * Copyright (c) 2018 Martin Pieuchot
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifdef __linux__
+/* stupid shims to compile and test on linux */
+#define __dead
+
+static const char *getprogname()
+{
+	return "diff";
+}
+#endif
+
+__dead void	 usage(void);
+int		 diffreg(char *, char *, int);
+char		*mmapfile(const char *, struct stat *);
+
+__dead void
+usage(void)
+{
+	fprintf(stderr, "usage: %s file1 file2\n", getprogname());
+	exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int ch;
+
+	while ((ch = getopt(argc, argv, "")) != -1) {
+		switch (ch) {
+		default:
+			usage();
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 2)
+		usage();
+
+	return diffreg(argv[0], argv[1], 0);
+}
+
+#include <diff/diff_main.h>
+#include <diff/diff_output.h>
+
+const struct diff_algo_config myers, patience, myers_divide;
+
+const struct diff_algo_config myers = (struct diff_algo_config){
+	.impl = diff_algo_myers,
+	.permitted_state_size = 104 * sizeof(int),
+	.fallback_algo = &patience,
+};
+
+const struct diff_algo_config patience = (struct diff_algo_config){
+	.impl = diff_algo_patience,
+	.inner_algo = &patience,	// After subdivision, do Patience again.
+	.fallback_algo = &myers_divide, // If subdivision failed, do Myers Divide et Impera.
+};
+
+const struct diff_algo_config myers_divide = (struct diff_algo_config){
+	.impl = diff_algo_myers_divide,
+	.inner_algo = &myers,		// When division succeeded, start from the top.
+					// (fallback_algo = NULL implies diff_algo_none).
+};
+
+const struct diff_config diff_config = {
+	.atomize_func = diff_atomize_text_by_line,
+	.algo = &myers,
+};
+
+int
+diffreg(char *file1, char *file2, int flags)
+{
+	char *str1, *str2;
+	struct stat st1, st2;
+	struct diff_input_info info = {
+		.left_path = file1,
+		.right_path = file2,
+	};
+
+	str1 = mmapfile(file1, &st1);
+	str2 = mmapfile(file2, &st2);
+
+	diff_plain(stdout, &diff_config, &info, str1, st1.st_size, str2, st2.st_size);
+
+	munmap(str1, st1.st_size);
+	munmap(str2, st2.st_size);
+
+	return 0;
+}
+
+char *
+mmapfile(const char *path, struct stat *st)
+{
+	int			 fd;
+	char			*p;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		err(2, "%s", path);
+
+	if (fstat(fd, st) == -1)
+		err(2, "%s", path);
+
+	if ((uintmax_t)st->st_size > SIZE_MAX)
+		errx(2, "%s: file too big to fit memory", path);
+
+	p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (p == MAP_FAILED)
+		err(2, "mmap");
+
+	close(fd);
+
+	return p;
+}
blob - /dev/null
blob + 62c7103d70ac32f292cc05f62c782305dd8798f6 (mode 644)
--- /dev/null
+++ diff/linux_Makefile
@@ -0,0 +1,13 @@
+CFLAGS = -fsanitize=address -fsanitize=undefined -g -O3
+
+diff: diff.c ../lib/libdiff.a
+	gcc $(CFLAGS) -I../include -o $@ $^
+
+
+../lib/libdiff.a: ../lib/*.[hc] ../include/diff/*.h
+	$(MAKE) -f linux_Makefile -C ../lib
+
+.PHONY: clean
+clean:
+	rm diff
+	$(MAKE) -f linux_Makefile -C ../lib clean
blob - /dev/null
blob + 1d14cd7ecfcc0a39e1935164b1480f4dffbf0b28 (mode 644)
--- /dev/null
+++ diff-version.mk
@@ -0,0 +1,8 @@
+DIFF_RELEASE=No
+DIFF_VERSION_NUMBER=0.1
+
+.if ${DIFF_RELEASE} == Yes
+DIFF_VERSION=${DIFF_VERSION_NUMBER}
+.else
+DIFF_VERSION=${DIFF_VERSION_NUMBER}-current
+.endif
blob - /dev/null
blob + 395468a8c29ce7c6d5ca5283a1a1aa247acb3d87 (mode 644)
--- /dev/null
+++ include/diff/arraylist.h
@@ -0,0 +1,96 @@
+/* Auto-reallocating array for arbitrary member types. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#ifdef __linux__
+/* stupid shims to compile and test on linux */
+#include <strings.h>
+static inline void *reallocarray(void *buf, size_t n, size_t member_size)
+{
+	return realloc(buf, n * member_size);
+}
+static inline void *recallocarray(void *buf, size_t oldn, size_t n, size_t member_size)
+{
+	buf = reallocarray(buf, n, member_size);
+	bzero(((char*)buf) + (oldn * member_size), (n - oldn) * member_size);
+	return buf;
+}
+#endif
+
+
+/* Usage:
+ *
+ * ARRAYLIST(any_type_t) list;
+ * // OR
+ * typedef ARRAYLIST(any_type_t) any_type_list_t;
+ * any_type_list_t list;
+ *
+ * ARRAYLIST_INIT(list, 128); // < number of (at first unused) members to add on each realloc
+ * any_type_t *x;
+ * while (bar) {
+ *         ARRAYLIST_ADD(x, list); // < enlarges the allocated array as needed; list.head may change due to realloc
+ *         if (!x)
+ *                 abort();
+ *         *x = random_foo_value;
+ * }
+ * for (i = 0; i < list.len; i++)
+ *         printf("%s", foo_to_str(list.head[i]));
+ * ARRAYLIST_FREE(list);
+ */
+#define ARRAYLIST(MEMBER_TYPE) \
+	struct { \
+		MEMBER_TYPE *head; \
+		unsigned int len; \
+		unsigned int allocated; \
+		unsigned int alloc_blocksize; \
+	}
+
+#define ARRAYLIST_INIT(ARRAY_LIST, ALLOC_BLOCKSIZE) do { \
+		(ARRAY_LIST).head = NULL; \
+		(ARRAY_LIST).len = 0; \
+		(ARRAY_LIST).allocated = 0; \
+		(ARRAY_LIST).alloc_blocksize = ALLOC_BLOCKSIZE; \
+	} while(0)
+
+#define ARRAYLIST_ADD(NEW_ITEM_P, ARRAY_LIST) do { \
+		if ((ARRAY_LIST).len && !(ARRAY_LIST).allocated) { \
+			NEW_ITEM_P = NULL; \
+			break; \
+		} \
+		if ((ARRAY_LIST).head == NULL || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
+			(ARRAY_LIST).allocated += (ARRAY_LIST).alloc_blocksize ? : 8; \
+			(ARRAY_LIST).head = recallocarray((ARRAY_LIST).head, (ARRAY_LIST).len, \
+							  (ARRAY_LIST).allocated, sizeof(*(ARRAY_LIST).head)); \
+		}; \
+		if (!(ARRAY_LIST).head || (ARRAY_LIST).allocated < (ARRAY_LIST).len + 1) { \
+			NEW_ITEM_P = NULL; \
+			break; \
+		} \
+		(NEW_ITEM_P) = &(ARRAY_LIST).head[(ARRAY_LIST).len]; \
+		(ARRAY_LIST).len++; \
+	} while (0)
+
+#define ARRAYLIST_CLEAR(ARRAY_LIST) \
+	(ARRAY_LIST).len = 0
+
+#define ARRAYLIST_FREE(ARRAY_LIST) \
+	do { \
+		if ((ARRAY_LIST).head && (ARRAY_LIST).allocated) \
+			free((ARRAY_LIST).head); \
+		ARRAYLIST_INIT(ARRAY_LIST, (ARRAY_LIST).alloc_blocksize); \
+	} while(0)
blob - /dev/null
blob + 073e0a21c0a312a7c09f13092e23da0decdec43a (mode 644)
--- /dev/null
+++ include/diff/diff_main.h
@@ -0,0 +1,249 @@
+/* Generic infrastructure to implement various diff algorithms. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+
+#include <diff/arraylist.h>
+
+/* List of all possible return codes of a diff invocation. */
+enum diff_rc {
+	DIFF_RC_USE_DIFF_ALGO_FALLBACK = -1,
+	DIFF_RC_OK = 0,
+	DIFF_RC_ENOTSUP = ENOTSUP,
+	DIFF_RC_ENOMEM = ENOMEM,
+	DIFF_RC_EINVAL = EINVAL,
+};
+
+struct diff_atom {
+	const uint8_t *at;
+	size_t len;
+
+	/* This hash is just a very cheap speed up for finding *mismatching* atoms. When hashes match, we still need to
+	 * compare entire atoms to find out whether they are indeed identical or not. */
+	unsigned int hash;
+
+	/* State for the Patience Diff algorithm */
+	/* TODO: keep a separate array for the patience state */
+	struct {
+		bool unique_here;
+		bool unique_in_both;
+		struct diff_atom *pos_in_other;
+		struct diff_atom *prev_stack;
+	} patience;
+};
+
+static inline bool diff_atom_same(const struct diff_atom *left, const struct diff_atom *right)
+{
+	return left->hash == right->hash
+		&& left->len == right->len
+		&& memcmp(left->at, right->at, left->len) == 0;
+}
+
+/* For each file, there is a "root" struct diff_data referencing the entire file, which the atoms are parsed from. In
+ * recursion of diff algorithm, there may be "child" struct diff_data only referencing a subsection of the file,
+ * re-using the atoms parsing. For "root" structs, atoms_allocated will be nonzero, indicating that the array of atoms
+ * is owned by that struct. For "child" structs, atoms_allocated == 0, to indicate that the struct is referencing a
+ * subset of atoms. */
+struct diff_data {
+	const uint8_t *data;
+	size_t len;
+	ARRAYLIST(struct diff_atom) atoms;
+	struct diff_data *root;
+};
+
+void diff_data_free(struct diff_data *diff_data);
+
+/* The atom's index in the entire file. For atoms divided by lines of text, this yields the line number (starting with
+ * 0). Also works for diff_data that reference only a subsection of a file, always reflecting the global position in
+ * the file (and not the relative position within the subsection). */
+#define diff_atom_root_idx(DIFF_DATA, ATOM) \
+	((ATOM) ? (ATOM) - ((DIFF_DATA)->root->atoms.head) : (DIFF_DATA)->root->atoms.len)
+
+/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this yields the line number (starting with
+ * 0). */
+#define diff_atom_idx(DIFF_DATA, ATOM) \
+	((ATOM) ? (ATOM) - ((DIFF_DATA)->atoms.head) : (DIFF_DATA)->atoms.len)
+
+#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
+	for ((ATOM) = (FIRST_ATOM); \
+	     (ATOM) && ((ATOM) >= (FIRST_ATOM)) && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
+	     (ATOM)++)
+
+#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
+	foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
+
+#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
+	for ((ATOM) = (FROM); \
+	     (ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
+	     (ATOM)++)
+
+/* A diff chunk represents a set of atoms on the left and/or a set of atoms on the right.
+ *
+ * If solved == false:
+ * The diff algorithm has divided the source file, and this is a chunk that the inner_algo should run on next.
+ * The lines on the left should be diffed against the lines on the right.
+ * (If there are no left lines or no right lines, it implies solved == true, because there is nothing to diff.)
+ *
+ * If solved == true:
+ * If there are only left atoms, it is a chunk removing atoms from the left ("a minus chunk").
+ * If there are only right atoms, it is a chunk adding atoms from the right ("a plus chunk").
+ * If there are both left and right lines, it is a chunk of equal content on both sides,
+ * and left_count == right_count:
+ *
+ * - foo  }
+ * - bar  }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3,
+ * - baz  }               right_start = NULL, right_count = 0 }
+ *   moo    }
+ *   goo    }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3,
+ *   zoo    }               right_start = &right.atoms.head[0], right_count = 3 }
+ *  +loo      }
+ *  +roo      }-- diff_chunk{ left_start = NULL, left_count = 0,
+ *  +too      }               right_start = &right.atoms.head[3], right_count = 3 }
+ *
+ */
+struct diff_chunk {
+	bool solved;
+	struct diff_atom *left_start;
+	unsigned int left_count;
+	struct diff_atom *right_start;
+	unsigned int right_count;
+};
+
+typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
+#define DIFF_RESULT_ALLOC_BLOCKSIZE 128
+
+struct diff_result {
+	enum diff_rc rc;
+	struct diff_data left;
+	struct diff_data right;
+	diff_chunk_arraylist_t chunks;
+};
+
+struct diff_state {
+	/* The final result passed to the original diff caller. */
+	struct diff_result *result;
+
+	/* The root diff_data is in result->left,right, these are (possibly) subsections of the root data. */
+	struct diff_data left;
+	struct diff_data right;
+
+	unsigned int recursion_depth_left;
+
+	/* Remaining chunks from one diff algorithm pass, if any solved == false chunks came up. */
+	diff_chunk_arraylist_t temp_result;
+};
+
+struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
+					struct diff_atom *left_start, unsigned int left_count,
+					struct diff_atom *right_start, unsigned int right_count);
+
+/* Signature of a utility function to divide both source files into diff atoms.
+ * It is possible that a (future) algorithm requires both source files to decide on atom split points, hence this gets
+ * both left and right to atomize at the same time.
+ * An example is diff_atomize_text_by_line() in diff_atomize_text.c.
+ *
+ * func_data: context pointer (free to be used by implementation).
+ * left: struct diff_data with left->data and left->len already set up, and left->atoms to be created.
+ * right: struct diff_data with right->data and right->len already set up, and right->atoms to be created.
+ */
+typedef enum diff_rc (*diff_atomize_func_t)(void *func_data, struct diff_data *left, struct diff_data *right);
+
+extern enum diff_rc diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right);
+
+struct diff_algo_config;
+typedef enum diff_rc (*diff_algo_impl_t)(const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Form a result with all left-side removed and all right-side added, i.e. no actual diff algorithm involved. */
+enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Myers Diff tracing from the start all the way through to the end, requiring quadratic amounts of memory. This can
+ * fail if the required space surpasses algo_config->permitted_state_size. */
+extern enum diff_rc diff_algo_myers(const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Myers "Divide et Impera": tracing forwards from the start and backwards from the end to find a midpoint that divides
+ * the problem into smaller chunks. Requires only linear amounts of memory. */
+extern enum diff_rc diff_algo_myers_divide(const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Patience Diff algorithm, which divides a larger diff into smaller chunks. For very specific scenarios, it may lead to
+ * a complete diff result by itself, but needs a fallback algo to solve chunks that don't have common-unique atoms. */
+extern enum diff_rc diff_algo_patience(const struct diff_algo_config *algo_config, struct diff_state *state);
+
+/* Diff algorithms to use, possibly nested. For example:
+ *
+ * struct diff_algo_config myers, patience, myers_divide;
+ *
+ * myers = (struct diff_algo_config){
+ *         .impl = diff_algo_myers,
+ *         .permitted_state_size = 32 * 1024 * 1024,
+ *         .fallback_algo = &patience, // when too large, do diff_algo_patience
+ * };
+ *
+ * patience = (struct diff_algo_config){
+ *         .impl = diff_algo_patience,
+ *         .inner_algo = &patience,        // After subdivision, do Patience again.
+ *         .fallback_algo = &myers_divide, // If subdivision failed, do Myers Divide et Impera.
+ * };
+ *
+ * myers_divide = (struct diff_algo_config){
+ *         .impl = diff_algo_myers_divide,
+ *         .inner_algo = &myers,           // When division succeeded, start from the top.
+ *                                         // (fallback_algo = NULL implies diff_algo_none).
+ * };
+ *
+ * struct diff_config config = {
+ *         .algo = &myers,
+ *         ...
+ * };
+ * diff_main(&config, ...);
+ */
+struct diff_algo_config {
+	diff_algo_impl_t impl;
+
+	/* Fail this algo if it would use more than this amount of memory, and instead use fallback_algo
+	 * (diff_algo_myers). permitted_state_size == 0 means no limitation. */
+	size_t permitted_state_size;
+
+	/* For algorithms that divide into smaller chunks, use this algorithm to solve the divided chunks. */
+	const struct diff_algo_config *inner_algo;
+
+	/* If the algorithm fails (e.g. diff_algo_myers_if_small needs too large state, or diff_algo_patience can't find
+	 * any common-unique atoms), then use this algorithm instead. */
+	const struct diff_algo_config *fallback_algo;
+};
+
+struct diff_config {
+	diff_atomize_func_t atomize_func;
+	void *atomize_func_data;
+
+	const struct diff_algo_config *algo;
+
+	/* How deep to step into subdivisions of a source file, a paranoia / safety measure to guard against infinite
+	 * loops through diff algorithms. When the maximum recursion is reached, employ diff_algo_none (i.e. remove all
+	 * left atoms and add all right atoms). */
+	unsigned int max_recursion_depth;
+};
+
+struct diff_result *diff_main(const struct diff_config *config,
+			      const uint8_t *left_data, size_t left_len,
+			      const uint8_t *right_data, size_t right_len);
+void diff_result_free(struct diff_result *result);
blob - /dev/null
blob + 5d07e74d868098aab6819c41c9499f0f1b30040c (mode 644)
--- /dev/null
+++ include/diff/diff_output.h
@@ -0,0 +1,43 @@
+/* Diff output generators and invocation shims. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdio.h>
+#include <diff/diff_main.h>
+
+struct diff_input_info {
+	const char *arbitrary_info;
+	const char *left_path;
+	const char *right_path;
+};
+
+enum diff_rc diff_output_plain(FILE *dest, const struct diff_input_info *info,
+			       const struct diff_result *result);
+enum diff_rc diff_plain(FILE *dest, const struct diff_config *diff_config,
+			const struct diff_input_info *info,
+			const char *left, int left_len, const char *right, int right_len);
+
+enum diff_rc diff_output_unidiff(FILE *dest, const struct diff_input_info *info,
+				 const struct diff_result *result, unsigned int context_lines);
+enum diff_rc diff_unidiff(FILE *dest, const struct diff_config *diff_config,
+			  const struct diff_input_info *info,
+			  const char *left, int left_len, const char *right, int right_len,
+			  unsigned int context_lines);
+
+enum diff_rc diff_output_info(FILE *dest, const struct diff_input_info *info);
+void diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, unsigned int count);
blob - /dev/null
blob + 72a597bf05c06f25fd94c1421712e178888a0ca3 (mode 644)
--- /dev/null
+++ lib/debug.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#define DEBUG 0
+
+#if DEBUG
+#include <stdio.h>
+#define print(args...) fprintf(stderr, ##args)
+#define debug print
+#define debug_dump dump
+#define debug_dump_atom dump_atom
+#define debug_dump_atoms dump_atoms
+#define debug_dump_myers_graph dump_myers_graph
+
+static inline void dump_atom(const struct diff_data *left, const struct diff_data *right, const struct diff_atom *atom)
+{
+	if (!atom) {
+		print("NULL atom\n");
+		return;
+	}
+	if (left)
+		print(" %3ld", diff_atom_root_idx(left, atom));
+	if (right && atom->patience.pos_in_other)
+		print(" %3ld", diff_atom_root_idx(right, atom->patience.pos_in_other));
+
+	print(" %s%s '", atom->patience.unique_here ? "u" : " ", atom->patience.unique_in_both ? "c" : " ");
+	const char *s;
+	for (s = atom->at; s < (const char*)(atom->at + atom->len); s++) {
+		if (*s == '\r')
+			print("\\r");
+		else if (*s == '\n')
+			print("\\n");
+		else if (*s < 32 || *s >= 127)
+			print("\\x%02x", *s);
+		else
+			print("%c", *s);
+	}
+	print("'\n");
+}
+
+static inline void dump_atoms(const struct diff_data *d, struct diff_atom *atom, unsigned int count)
+{
+	struct diff_atom *i;
+	foreach_diff_atom(i, atom, count) {
+		dump_atom(d, NULL, i);
+	}
+}
+
+static inline void dump(struct diff_data *d)
+{
+	dump_atoms(d, d->atoms.head, d->atoms.len);
+}
+
+static inline void dump_myers_graph(const struct diff_data *l, const struct diff_data *r,
+				    int *kd)
+{
+	int x;
+	int y;
+	print("  ");
+	for (x = 0; x <= l->atoms.len; x++)
+		print("%2d", x);
+	print("\n");
+
+	for (y = 0; y <= r->atoms.len; y++) {
+		print("%2d ", y);
+		for (x = 0; x <= l->atoms.len; x++) {
+
+			/* print d advancements from kd, if any. */
+			int d = -1;
+			if (kd) {
+				int max = l->atoms.len + r->atoms.len;
+				size_t kd_len = max + 1 + max;
+				int *kd_pos = kd;
+				int di;
+#define xk_to_y(X, K) ((X) - (K))
+				for (di = 0; di < max; di++) {
+					int ki;
+					for (ki = di; ki >= -di; ki -= 2) {
+						if (x == kd_pos[ki]
+						    && y == xk_to_y(x, ki)) {
+							d = di;
+							break;
+						}
+					}
+					if (d >= 0)
+						break;
+					kd_pos += kd_len;
+				}
+			}
+			if (d >= 0)
+				print("%d", d);
+			else
+				print("o");
+			if (x < l->atoms.len && d < 10)
+				print("-");
+		}
+		print("\n");
+		if (y == r->atoms.len)
+			break;
+
+		print("   ");
+		for (x = 0; x < l->atoms.len; x++) {
+			if (diff_atom_same(&l->atoms.head[x], &r->atoms.head[y]))
+				print("|\\");
+			else
+				print("| ");
+		}
+		print("|\n");
+	}
+}
+
+#else
+#define debug(args...)
+#define debug_dump(args...)
+#define debug_dump_atom(args...)
+#define debug_dump_atoms(args...)
+#define debug_dump_myers_graph(args...)
+#endif
blob - /dev/null
blob + 964c1353da1b055ae761ae6961c45ea19e16d032 (mode 644)
--- /dev/null
+++ lib/diff_atomize_text.c
@@ -0,0 +1,75 @@
+/* Split source by line breaks, and calculate a simplistic checksum. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <diff/diff_main.h>
+
+static int diff_data_atomize_text_lines(struct diff_data *d)
+{
+	const uint8_t *pos = d->data;
+	const uint8_t *end = pos + d->len;
+	enum diff_rc rc;
+
+	unsigned int array_size_estimate = d->len / 50;
+	unsigned int pow2 = 1;
+	while (array_size_estimate >>= 1)
+		pow2++;
+
+	ARRAYLIST_INIT(d->atoms, 1 << pow2);
+
+	while (pos < end) {
+		const uint8_t *line_end = pos;
+		unsigned int hash = 0;
+
+		while (line_end < end && *line_end != '\r' && *line_end != '\n') {
+			hash = hash * 23 + *line_end;
+			line_end++;
+		}
+
+		/* When not at the end of data, the line ending char ('\r' or '\n') must follow */
+		if (line_end < end)
+			line_end++;
+		/* If that was an '\r', also pull in any following '\n' */
+		if (line_end[0] == '\r' && line_end < end && line_end[1] == '\n')
+			line_end++;
+
+		/* Record the found line as diff atom */
+		struct diff_atom *atom;
+		ARRAYLIST_ADD(atom, d->atoms);
+		if (!atom)
+			return DIFF_RC_ENOMEM;
+
+		*atom = (struct diff_atom){
+			.at = pos,
+			.len = line_end - pos,
+			.hash = hash,
+		};
+
+		/* Starting point for next line: */
+		pos = line_end;
+	}
+
+	return DIFF_RC_OK;
+}
+
+enum diff_rc diff_atomize_text_by_line(void *func_data, struct diff_data *left, struct diff_data *right)
+{
+	enum diff_rc rc;
+	rc = diff_data_atomize_text_lines(left);
+	if (rc != DIFF_RC_OK)
+		return rc;
+	return diff_data_atomize_text_lines(right);
+}
blob - /dev/null
blob + 285bd40d5941fe84c5d9a9bd437a4a9f981ead13 (mode 644)
--- /dev/null
+++ lib/diff_main.c
@@ -0,0 +1,246 @@
+/* Generic infrastructure to implement various diff algorithms (implementation). */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+#include <assert.h>
+
+#include <diff/diff_main.h>
+
+#include "debug.h"
+
+/* Even if a left or right side is empty, diff output may need to know the position in that file.
+ * So left_start or right_start must never be NULL -- pass left_count or right_count as zero to indicate staying at that
+ * position without consuming any lines. */
+struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
+					struct diff_atom *left_start, unsigned int left_count,
+					struct diff_atom *right_start, unsigned int right_count)
+{
+	struct diff_chunk *chunk;
+	diff_chunk_arraylist_t *result;
+
+	if (solved && !state->temp_result.len)
+		result = &state->result->chunks;
+	else
+		result = &state->temp_result;
+
+	ARRAYLIST_ADD(chunk, *result);
+	if (!chunk)
+		return NULL;
+	*chunk = (struct diff_chunk){
+		.solved = solved,
+		.left_start = left_start,
+		.left_count = left_count,
+		.right_start = right_start,
+		.right_count = right_count,
+	};
+
+	debug("Add %s chunk:\n", chunk->solved ? "solved" : "UNSOLVED");
+	debug("L\n");
+	debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
+	debug("R\n");
+	debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
+	return chunk;
+}
+
+void diff_data_init_root(struct diff_data *d, const uint8_t *data, size_t len)
+{
+	*d = (struct diff_data){
+		.data = data,
+		.len = len,
+		.root = d,
+	};
+}
+
+void diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
+			       struct diff_atom *from_atom, unsigned int atoms_count)
+{
+	struct diff_atom *last_atom = from_atom + atoms_count - 1;
+	*d = (struct diff_data){
+		.data = from_atom->at,
+		.len = (last_atom->at + last_atom->len) - from_atom->at,
+		.root = parent->root,
+		.atoms.head = from_atom,
+		.atoms.len = atoms_count,
+	};
+
+	debug("subsection:\n");
+	debug_dump(d);
+}
+
+void diff_data_free(struct diff_data *diff_data)
+{
+	if (!diff_data)
+		return;
+	if (diff_data->atoms.allocated)
+		ARRAYLIST_FREE(diff_data->atoms);
+}
+
+enum diff_rc diff_algo_none(const struct diff_algo_config *algo_config, struct diff_state *state)
+{
+	debug("\n** %s\n", __func__);
+	debug("left:\n");
+	debug_dump(&state->left);
+	debug("right:\n");
+	debug_dump(&state->right);
+	debug_dump_myers_graph(&state->left, &state->right, NULL);
+
+	/* Add a chunk of equal lines, if any */
+	unsigned int equal_atoms = 0;
+	while (equal_atoms < state->left.atoms.len && equal_atoms < state->right.atoms.len
+	       && diff_atom_same(&state->left.atoms.head[equal_atoms], &state->right.atoms.head[equal_atoms]))
+		equal_atoms++;
+	if (equal_atoms) {
+		if (!diff_state_add_chunk(state, true,
+					  &state->left.atoms.head[0], equal_atoms,
+					  &state->right.atoms.head[0], equal_atoms))
+			return DIFF_RC_ENOMEM;
+	}
+
+	/* Add a "minus" chunk with all lines from the left. */
+	if (equal_atoms < state->left.atoms.len) {
+		if (!diff_state_add_chunk(state, true,
+					  &state->left.atoms.head[equal_atoms],
+					  state->left.atoms.len - equal_atoms,
+					  NULL, 0))
+		    return DIFF_RC_ENOMEM;
+	}
+
+	/* Add a "plus" chunk with all lines from the right. */
+	if (equal_atoms < state->right.atoms.len) {
+		if (!diff_state_add_chunk(state, true,
+					  NULL, 0,
+					  &state->right.atoms.head[equal_atoms],
+					  state->right.atoms.len - equal_atoms))
+		return DIFF_RC_ENOMEM;
+	}
+	return DIFF_RC_OK;
+}
+
+enum diff_rc diff_run_algo(const struct diff_algo_config *algo_config, struct diff_state *state)
+{
+	enum diff_rc rc;
+	ARRAYLIST_FREE(state->temp_result);
+
+	if (!algo_config || !algo_config->impl || !state->recursion_depth_left) {
+		debug("MAX RECURSION REACHED, just dumping diff chunks\n");
+		return diff_algo_none(algo_config, state);
+	}
+
+	ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
+	rc = algo_config->impl(algo_config, state);
+	switch (rc) {
+	case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
+		debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n", algo_config->fallback_algo);
+		rc = diff_run_algo(algo_config->fallback_algo, state);
+		goto return_rc;
+
+	case DIFF_RC_OK:
+		/* continue below */
+		break;
+
+	default:
+		/* some error happened */
+		goto return_rc;
+	}
+
+	/* Pick up any diff chunks that are still unsolved and feed to inner_algo.
+	 * inner_algo will solve unsolved chunks and append to result,
+	 * and subsequent solved chunks on this level are then appended to result afterwards. */
+	int i;
+	for (i = 0; i < state->temp_result.len; i++) {
+		struct diff_chunk *c = &state->temp_result.head[i];
+		if (c->solved) {
+			struct diff_chunk *final_c;
+			ARRAYLIST_ADD(final_c, state->result->chunks);
+			if (!final_c) {
+				rc = DIFF_RC_ENOMEM;
+				goto return_rc;
+			}
+			*final_c = *c;
+			continue;
+		}
+
+		/* c is an unsolved chunk, feed to inner_algo */
+		struct diff_state inner_state = {
+			.result = state->result,
+			.recursion_depth_left = state->recursion_depth_left - 1,
+		};
+		diff_data_init_subsection(&inner_state.left, &state->left, c->left_start, c->left_count);
+		diff_data_init_subsection(&inner_state.right, &state->right, c->right_start, c->right_count);
+
+		rc = diff_run_algo(algo_config->inner_algo, &inner_state);
+
+		if (rc != DIFF_RC_OK)
+			goto return_rc;
+	}
+
+	rc = DIFF_RC_OK;
+return_rc:
+	ARRAYLIST_FREE(state->temp_result);
+	return rc;
+}
+
+struct diff_result *diff_main(const struct diff_config *config,
+			      const uint8_t *left_data, size_t left_len,
+			      const uint8_t *right_data, size_t right_len)
+{
+	struct diff_result *result = malloc(sizeof(struct diff_result));
+	if (!result)
+		return NULL;
+
+	*result = (struct diff_result){};
+	diff_data_init_root(&result->left, left_data, left_len);
+	diff_data_init_root(&result->right, right_data, right_len);
+
+	if (!config->atomize_func) {
+		result->rc = DIFF_RC_EINVAL;
+		return result;
+	}
+
+	result->rc = config->atomize_func(config->atomize_func_data, &result->left, &result->right);
+	if (result->rc != DIFF_RC_OK)
+		return result;
+
+	struct diff_state state = {
+		.result = result,
+		.recursion_depth_left = config->max_recursion_depth ? : 1024,
+	};
+	diff_data_init_subsection(&state.left, &result->left, result->left.atoms.head, result->left.atoms.len);
+	diff_data_init_subsection(&state.right, &result->right, result->right.atoms.head, result->right.atoms.len);
+
+	result->rc = diff_run_algo(config->algo, &state);
+
+	return result;
+}
+
+void diff_result_free(struct diff_result *result)
+{
+	if (!result)
+		return;
+	diff_data_free(&result->left);
+	diff_data_free(&result->right);
+	ARRAYLIST_FREE(result->chunks);
+	free(result);
+}
blob - /dev/null
blob + cba927a4a5813ed3a1afa1247227a9637271e176 (mode 644)
--- /dev/null
+++ lib/diff_myers.c
@@ -0,0 +1,1043 @@
+/* Myers diff algorithm implementation, invented by Eugene W. Myers [1].
+ * Implementations of both the Myers Divide Et Impera (using linear space)
+ * and the canonical Myers algorithm (using quadratic space). */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <diff/diff_main.h>
+
+#include "debug.h"
+
+/* Myers' diff algorithm [1] is nicely explained in [2].
+ * [1] http://www.xmailserver.org/diff2.pdf
+ * [2] https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/ ff.
+ *
+ * Myers approaches finding the smallest diff as a graph problem.
+ * The crux is that the original algorithm requires quadratic amount of memory:
+ * both sides' lengths added, and that squared. So if we're diffing lines of text, two files with 1000 lines each would
+ * blow up to a matrix of about 2000 * 2000 ints of state, about 16 Mb of RAM to figure out 2 kb of text.
+ * The solution is using Myers' "divide and conquer" extension algorithm, which does the original traversal from both
+ * ends of the files to reach a middle where these "snakes" touch, hence does not need to backtrace the traversal, and
+ * so gets away with only keeping a single column of that huge state matrix in memory.
+ *
+ * Todo: the divide and conquer requires linear *space*, not necessarily linear *time*. It recurses, apparently doing
+ * multiple Myers passes, and also it apparently favors fragmented diffs in cases where chunks of text were moved to a
+ * different place. Up to a given count of diff atoms (text lines), it might be desirable to accept the quadratic memory
+ * usage, get nicer diffs and less re-iteration of the same data?
+ */
+
+struct diff_box {
+	unsigned int left_start;
+	unsigned int left_end;
+	unsigned int right_start;
+	unsigned int right_end;
+};
+
+#define diff_box_empty(DIFF_SNAKE) ((DIFF_SNAKE)->left_end == 0)
+
+
+/* If the two contents of a file are A B C D E and X B C Y,
+ * the Myers diff graph looks like:
+ *
+ *   k0  k1
+ *    \   \
+ * k-1     0 1 2 3 4 5
+ *   \      A B C D E
+ *     0   o-o-o-o-o-o
+ *      X  | | | | | |
+ *     1   o-o-o-o-o-o
+ *      B  | |\| | | |
+ *     2   o-o-o-o-o-o
+ *      C  | | |\| | |
+ *     3   o-o-o-o-o-o
+ *      Y  | | | | | |\
+ *     4   o-o-o-o-o-o c1
+ *                  \ \
+ *                 c-1 c0
+ *
+ * Moving right means delete an atom from the left-hand-side,
+ * Moving down means add an atom from the right-hand-side.
+ * Diagonals indicate identical atoms on both sides, the challenge is to use as many diagonals as possible.
+ *
+ * The original Myers algorithm walks all the way from the top left to the bottom right, remembers all steps, and then
+ * backtraces to find the shortest path. However, that requires keeping the entire graph in memory, which needs
+ * quadratic space.
+ *
+ * Myers adds a variant that uses linear space -- note, not linear time, only linear space: walk forward and backward,
+ * find a meeting point in the middle, and recurse on the two separate sections. This is called "divide and conquer".
+ *
+ * d: the step number, starting with 0, a.k.a. the distance from the starting point.
+ * k: relative index in the state array for the forward scan, indicating on which diagonal through the diff graph we
+ *    currently are.
+ * c: relative index in the state array for the backward scan, indicating the diagonal number from the bottom up.
+ *
+ * The "divide and conquer" traversal through the Myers graph looks like this:
+ *
+ *      | d=   0   1   2   3      2   1   0
+ *  ----+--------------------------------------------
+ *  k=  |                                      c=
+ *   4  |                                       3
+ *      |
+ *   3  |                 3,0    5,2            2
+ *      |                /          \
+ *   2  |             2,0            5,3        1
+ *      |            /                 \
+ *   1  |         1,0     4,3 >= 4,3    5,4<--  0
+ *      |        /       /          \  /
+ *   0  |  -->0,0     3,3            4,4       -1
+ *      |        \   /              /
+ *  -1  |         0,1     1,2    3,4           -2
+ *      |            \   /
+ *  -2  |             0,2                      -3
+ *      |                \
+ *      |                 0,3
+ *      |  forward->                 <-backward
+ *
+ * x,y pairs here are the coordinates in the Myers graph:
+ * x = atom index in left-side source, y = atom index in the right-side source.
+ *
+ * Only one forward column and one backward column are kept in mem, each need at most left.len + 1 + right.len items.
+ * Note that each d step occupies either the even or the odd items of a column: if e.g. the previous column is in the
+ * odd items, the next column is formed in the even items, without overwriting the previous column's results.
+ *
+ * Also note that from the diagonal index k and the x coordinate, the y coordinate can be derived:
+ *    y = x - k
+ * Hence the state array only needs to keep the x coordinate, i.e. the position in the left-hand file, and the y
+ * coordinate, i.e. position in the right-hand file, is derived from the index in the state array.
+ *
+ * The two traces meet at 4,3, the first step (here found in the forward traversal) where a forward position is on or
+ * past a backward traced position on the same diagonal.
+ *
+ * This divides the problem space into:
+ *
+ *         0 1 2 3 4 5
+ *          A B C D E
+ *     0   o-o-o-o-o
+ *      X  | | | | |
+ *     1   o-o-o-o-o
+ *      B  | |\| | |
+ *     2   o-o-o-o-o
+ *      C  | | |\| |
+ *     3   o-o-o-o-*-o   *: forward and backward meet here
+ *      Y          | |
+ *     4           o-o
+ *
+ * Doing the same on each section lead to:
+ *
+ *         0 1 2 3 4 5
+ *          A B C D E
+ *     0   o-o
+ *      X  | |
+ *     1   o-b    b: backward d=1 first reaches here (sliding up the snake)
+ *      B     \   f: then forward d=2 reaches here (sliding down the snake)
+ *     2       o     As result, the box from b to f is found to be identical;
+ *      C       \    leaving a top box from 0,0 to 1,1 and a bottom trivial tail 3,3 to 4,3.
+ *     3         f-o
+ *
+ *     3           o-*
+ *      Y            |
+ *     4             o   *: forward and backward meet here
+ *
+ * and solving the last top left box gives:
+ *
+ *         0 1 2 3 4 5
+ *          A B C D E           -A
+ *     0   o-o                  +X
+ *      X    |                   B
+ *     1     o                   C
+ *      B     \                 -D
+ *     2       o                -E
+ *      C       \               +Y
+ *     3         o-o-o
+ *      Y            |
+ *     4             o
+ *
+ */
+
+#define xk_to_y(X, K) ((X) - (K))
+#define xc_to_y(X, C, DELTA) ((X) - (C) + (DELTA))
+#define k_to_c(K, DELTA) ((K) + (DELTA))
+#define c_to_k(C, DELTA) ((C) - (DELTA))
+
+/* Do one forwards step in the "divide and conquer" graph traversal.
+ * left: the left side to diff.
+ * right: the right side to diff against.
+ * kd_forward: the traversal state for forwards traversal, modified by this function.
+ *             This is carried over between invocations with increasing d.
+ *             kd_forward points at the center of the state array, allowing negative indexes.
+ * kd_backward: the traversal state for backwards traversal, to find a meeting point.
+ *              Since forwards is done first, kd_backward will be valid for d - 1, not d.
+ *              kd_backward points at the center of the state array, allowing negative indexes.
+ * d: Step or distance counter, indicating for what value of d the kd_forward should be populated.
+ *    For d == 0, kd_forward[0] is initialized, i.e. the first invocation should be for d == 0.
+ * meeting_snake: resulting meeting point, if any.
+ */
+static void diff_divide_myers_forward(struct diff_data *left, struct diff_data *right,
+				      int *kd_forward, int *kd_backward, int d,
+				      struct diff_box *meeting_snake)
+{
+	int delta = (int)right->atoms.len - (int)left->atoms.len;
+	int prev_x;
+	int prev_y;
+	int k;
+	int x;
+
+	debug("-- %s d=%d\n", __func__, d);
+	debug_dump_myers_graph(left, right, NULL);
+
+	for (k = d; k >= -d; k -= 2) {
+		if (k < -(int)right->atoms.len || k > (int)left->atoms.len) {
+			/* This diagonal is completely outside of the Myers graph, don't calculate it. */
+			if (k < -(int)right->atoms.len)
+				debug(" %d k < -(int)right->atoms.len %d\n", k, -(int)right->atoms.len);
+			else
+				debug(" %d k > left->atoms.len %d\n", k, left->atoms.len);
+			if (k < 0) {
+				/* We are traversing negatively, and already below the entire graph, nothing will come
+				 * of this. */
+				debug(" break");
+				break;
+			}
+			debug(" continue");
+			continue;
+		}
+		debug("- k = %d\n", k);
+		if (d == 0) {
+			/* This is the initializing step. There is no prev_k yet, get the initial x from the top left of
+			 * the Myers graph. */
+			x = 0;
+		}
+		/* Favoring "-" lines first means favoring moving rightwards in the Myers graph.
+		 * For this, all k should derive from k - 1, only the bottom most k derive from k + 1:
+		 *
+		 *      | d=   0   1   2
+		 *  ----+----------------
+		 *  k=  |
+		 *   2  |             2,0 <-- from prev_k = 2 - 1 = 1
+		 *      |            /
+		 *   1  |         1,0
+		 *      |        /
+		 *   0  |  -->0,0     3,3
+		 *      |       \\   /
+		 *  -1  |         0,1 <-- bottom most for d=1 from prev_k = -1 + 1 = 0
+		 *      |           \\
+		 *  -2  |             0,2 <-- bottom most for d=2 from prev_k = -2 + 1 = -1
+		 *
+		 * Except when a k + 1 from a previous run already means a further advancement in the graph.
+		 * If k == d, there is no k + 1 and k - 1 is the only option.
+		 * If k < d, use k + 1 in case that yields a larger x. Also use k + 1 if k - 1 is outside the graph.
+		 */
+		else if (k > -d && (k == d
+				    || (k - 1 >= -(int)right->atoms.len
+					&& kd_forward[k - 1] >= kd_forward[k + 1]))) {
+			/* Advance from k - 1.
+			 * From position prev_k, step to the right in the Myers graph: x += 1.
+			 */
+			int prev_k = k - 1;
+			prev_x = kd_forward[prev_k];
+			prev_y = xk_to_y(prev_x, prev_k);
+			x = prev_x + 1;
+		} else {
+			/* The bottom most one.
+			 * From position prev_k, step to the bottom in the Myers graph: y += 1.
+			 * Incrementing y is achieved by decrementing k while keeping the same x.
+			 * (since we're deriving y from y = x - k).
+			 */
+			int prev_k = k + 1;
+			prev_x = kd_forward[prev_k];
+			prev_y = xk_to_y(prev_x, prev_k);
+			x = prev_x;
+		}
+
+		/* Slide down any snake that we might find here. */
+		while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len
+		       && diff_atom_same(&left->atoms.head[x], &right->atoms.head[xk_to_y(x, k)]))
+		       x++;
+		kd_forward[k] = x;
+
+		if (DEBUG) {
+			int fi;
+			for (fi = d; fi >= k; fi--) {
+				debug("kd_forward[%d] = (%d, %d)\n", fi, kd_forward[fi], kd_forward[fi] - fi);
+				/*
+				if (kd_forward[fi] >= 0 && kd_forward[fi] < left->atoms.len)
+					debug_dump_atom(left, right, &left->atoms.head[kd_forward[fi]]);
+				else
+					debug("\n");
+				if (kd_forward[fi]-fi >= 0 && kd_forward[fi]-fi < right->atoms.len)
+					debug_dump_atom(right, left, &right->atoms.head[kd_forward[fi]-fi]);
+				else
+					debug("\n");
+				*/
+			}
+		}
+
+		if (x < 0 || x > left->atoms.len
+		    || xk_to_y(x, k) < 0 || xk_to_y(x, k) > right->atoms.len)
+			continue;
+
+		/* Figured out a new forwards traversal, see if this has gone onto or even past a preceding backwards
+		 * traversal.
+		 *
+		 * If the delta in length is odd, then d and backwards_d hit the same state indexes:
+		 *      | d=   0   1   2      1   0
+		 *  ----+----------------    ----------------
+		 *  k=  |                              c=
+		 *   4  |                               3
+		 *      |
+		 *   3  |                               2
+		 *      |                same
+		 *   2  |             2,0====5,3        1
+		 *      |            /          \
+		 *   1  |         1,0            5,4<-- 0
+		 *      |        /              /
+		 *   0  |  -->0,0     3,3====4,4       -1
+		 *      |        \   /
+		 *  -1  |         0,1                  -2
+		 *      |            \
+		 *  -2  |             0,2              -3
+		 *      |
+		 *
+		 * If the delta is even, they end up off-by-one, i.e. on different diagonals:
+		 *
+		 *      | d=   0   1   2    1   0
+		 *  ----+----------------  ----------------
+		 *      |                            c=
+		 *   3  |                             3
+		 *      |
+		 *   2  |             2,0 off         2
+		 *      |            /   \\
+		 *   1  |         1,0      4,3        1
+		 *      |        /       //   \
+		 *   0  |  -->0,0     3,3      4,4<-- 0
+		 *      |        \   /        /
+		 *  -1  |         0,1      3,4       -1
+		 *      |            \   //
+		 *  -2  |             0,2            -2
+		 *      |
+		 *
+		 * So in the forward path, we can only match up diagonals when the delta is odd.
+		 */
+		 /* Forwards is done first, so the backwards one was still at d - 1. Can't do this for d == 0. */
+		int backwards_d = d - 1;
+		if ((delta & 1) && (backwards_d >= 0)) {
+			debug("backwards_d = %d\n", backwards_d);
+
+			/* If both sides have the same length, forward and backward start on the same diagonal, meaning the
+			 * backwards state index c == k.
+			 * As soon as the lengths are not the same, the backwards traversal starts on a different diagonal, and
+			 * c = k shifted by the difference in length.
+			 */
+			int c = k_to_c(k, delta);
+
+			/* When the file sizes are very different, the traversal trees start on far distant diagonals.
+			 * They don't necessarily meet straight on. See whether this forward value is on a diagonal that
+			 * is also valid in kd_backward[], and match them if so. */
+			if (c >= -backwards_d && c <= backwards_d) {
+				/* Current k is on a diagonal that exists in kd_backward[]. If the two x positions have
+				 * met or passed (forward walked onto or past backward), then we've found a midpoint / a
+				 * mid-box.
+				 *
+				 * But we need to avoid matching a situation like this:
+				 *       0  1
+				 *        x y
+				 *   0   o-o-o
+				 *     x |\| |
+				 *   1   o-o-o
+				 *     y | |\|
+				 *   2  (B)o-o  <--(B) backwards traversal reached here
+				 *     a | | |
+				 *   3   o-o-o<-- prev_x, prev_y
+				 *     b | | |
+				 *   4   o-o(F) <--(F) forwards traversal  reached here
+				 *     x |\| |     Now both are on the same diagonal and look like they passed,
+				 *   5   o-o-o     but actually they have sneaked past each other and have not met.
+				 *     y | |\|
+				 *   6   o-o-o
+				 *
+				 * The solution is to notice that prev_x,prev_y were also already past (B).
+				 */
+				int backward_x = kd_backward[c];
+				int backward_y = xc_to_y(backward_x, c, delta);
+				debug(" prev_x,y = (%d,%d)  c%d:backward_x,y = (%d,%d)  k%d:x,y = (%d,%d)\n",
+				      prev_x, prev_y, c, backward_x, backward_y, k, x, xk_to_y(x, k));
+				if (prev_x <= backward_x && prev_y <= backward_y
+				    && x >= backward_x) {
+					*meeting_snake = (struct diff_box){
+						.left_start = backward_x,
+						.left_end = x,
+						.right_start = xc_to_y(backward_x, c, delta),
+						.right_end = xk_to_y(x, k),
+					};
+					debug("HIT x=(%u,%u) - y=(%u,%u)\n",
+					      meeting_snake->left_start,
+					      meeting_snake->right_start,
+					      meeting_snake->left_end,
+					      meeting_snake->right_end);
+					return;
+				}
+			}
+		}
+	}
+}
+
+/* Do one backwards step in the "divide and conquer" graph traversal.
+ * left: the left side to diff.
+ * right: the right side to diff against.
+ * kd_forward: the traversal state for forwards traversal, to find a meeting point.
+ *             Since forwards is done first, after this, both kd_forward and kd_backward will be valid for d.
+ *             kd_forward points at the center of the state array, allowing negative indexes.
+ * kd_backward: the traversal state for backwards traversal, to find a meeting point.
+ *              This is carried over between invocations with increasing d.
+ *              kd_backward points at the center of the state array, allowing negative indexes.
+ * d: Step or distance counter, indicating for what value of d the kd_backward should be populated.
+ *    Before the first invocation, kd_backward[0] shall point at the bottom right of the Myers graph
+ *    (left.len, right.len).
+ *    The first invocation will be for d == 1.
+ * meeting_snake: resulting meeting point, if any.
+ */
+static void diff_divide_myers_backward(struct diff_data *left, struct diff_data *right,
+				       int *kd_forward, int *kd_backward, int d,
+				       struct diff_box *meeting_snake)
+{
+	int delta = (int)right->atoms.len - (int)left->atoms.len;
+	int prev_x;
+	int prev_y;
+	int c;
+	int x;
+
+	debug("-- %s d=%d\n", __func__, d);
+	debug_dump_myers_graph(left, right, NULL);
+
+	for (c = d; c >= -d; c -= 2) {
+		if (c < -(int)left->atoms.len || c > (int)right->atoms.len) {
+			/* This diagonal is completely outside of the Myers graph, don't calculate it. */
+			if (c < -(int)left->atoms.len)
+				debug(" %d c < -(int)left->atoms.len %d\n", c, -(int)left->atoms.len);
+			else
+				debug(" %d c > right->atoms.len %d\n", c, right->atoms.len);
+			if (c < 0) {
+				/* We are traversing negatively, and already below the entire graph, nothing will come
+				 * of this. */
+				debug(" break");
+				break;
+			}
+			debug(" continue");
+			continue;
+		}
+		debug("- c = %d\n", c);
+		if (d == 0) {
+			/* This is the initializing step. There is no prev_c yet, get the initial x from the bottom
+			 * right of the Myers graph. */
+			x = left->atoms.len;
+		}
+		/* Favoring "-" lines first means favoring moving rightwards in the Myers graph.
+		 * For this, all c should derive from c - 1, only the bottom most c derive from c + 1:
+		 *
+		 *                                  2   1   0
+		 *  ---------------------------------------------------
+		 *                                               c=
+		 *                                                3
+		 *
+		 *         from prev_c = c - 1 --> 5,2            2
+		 *                                    \
+		 *                                     5,3        1
+		 *                                        \
+		 *                                 4,3     5,4<-- 0
+		 *                                    \   /
+		 *  bottom most for d=1 from c + 1 --> 4,4       -1
+		 *                                    /
+		 *         bottom most for d=2 --> 3,4           -2
+		 *
+		 * Except when a c + 1 from a previous run already means a further advancement in the graph.
+		 * If c == d, there is no c + 1 and c - 1 is the only option.
+		 * If c < d, use c + 1 in case that yields a larger x. Also use c + 1 if c - 1 is outside the graph.
+		 */
+		else if (c > -d && (c == d
+				    || (c - 1 >= -(int)right->atoms.len
+					&& kd_backward[c - 1] <= kd_backward[c + 1]))) {
+			/* A top one.
+			 * From position prev_c, step upwards in the Myers graph: y -= 1.
+			 * Decrementing y is achieved by incrementing c while keeping the same x.
+			 * (since we're deriving y from y = x - c + delta).
+			 */
+			int prev_c = c - 1;
+			prev_x = kd_backward[prev_c];
+			prev_y = xc_to_y(prev_x, prev_c, delta);
+			x = prev_x;
+		} else {
+			/* The bottom most one.
+			 * From position prev_c, step to the left in the Myers graph: x -= 1.
+			 */
+			int prev_c = c + 1;
+			prev_x = kd_backward[prev_c];
+			prev_y = xc_to_y(prev_x, prev_c, delta);
+			x = prev_x - 1;
+		}
+
+		/* Slide up any snake that we might find here. */
+		debug("c=%d x-1=%d Yb-1=%d-1=%d\n", c, x-1, xc_to_y(x, c, delta), xc_to_y(x, c, delta)-1);
+		if (x > 0) {
+			debug("  l="); debug_dump_atom(left, right, &left->atoms.head[x-1]);
+		}
+		if (xc_to_y(x, c, delta) > 0) {
+			debug("   r="); debug_dump_atom(right, left, &right->atoms.head[xc_to_y(x, c, delta)-1]);
+		}
+		while (x > 0 && xc_to_y(x, c, delta) > 0
+		       && diff_atom_same(&left->atoms.head[x-1], &right->atoms.head[xc_to_y(x, c, delta)-1]))
+		       x--;
+		kd_backward[c] = x;
+
+		if (DEBUG) {
+			int fi;
+			for (fi = d; fi >= c; fi--) {
+				debug("kd_backward[%d] = (%d, %d)\n", fi, kd_backward[fi],
+				      kd_backward[fi] - fi + delta);
+				/*
+				if (kd_backward[fi] >= 0 && kd_backward[fi] < left->atoms.len)
+					debug_dump_atom(left, right, &left->atoms.head[kd_backward[fi]]);
+				else
+					debug("\n");
+				if (kd_backward[fi]-fi+delta >= 0 && kd_backward[fi]-fi+delta < right->atoms.len)
+					debug_dump_atom(right, left, &right->atoms.head[kd_backward[fi]-fi+delta]);
+				else
+					debug("\n");
+				*/
+			}
+		}
+
+		if (x < 0 || x > left->atoms.len
+		    || xc_to_y(x, c, delta) < 0 || xc_to_y(x, c, delta) > right->atoms.len)
+			continue;
+
+		/* Figured out a new backwards traversal, see if this has gone onto or even past a preceding forwards
+		 * traversal.
+		 *
+		 * If the delta in length is even, then d and backwards_d hit the same state indexes -- note how this is
+		 * different from in the forwards traversal, because now both d are the same:
+		 *
+		 *      | d=   0   1   2      2   1   0
+		 *  ----+----------------    --------------------
+		 *  k=  |                                  c=
+		 *   4  |
+		 *      |
+		 *   3  |                                   3
+		 *      |                same
+		 *   2  |             2,0====5,2            2
+		 *      |            /          \
+		 *   1  |         1,0            5,3        1
+		 *      |        /              /  \
+		 *   0  |  -->0,0     3,3====4,3    5,4<--  0
+		 *      |        \   /             /
+		 *  -1  |         0,1            4,4       -1
+		 *      |            \
+		 *  -2  |             0,2                  -2
+		 *      |
+		 *                                      -3
+		 * If the delta is odd, they end up off-by-one, i.e. on different diagonals.
+		 * So in the backward path, we can only match up diagonals when the delta is even.
+		 */
+		if ((delta & 1) == 0) {
+			/* Forwards was done first, now both d are the same. */
+			int forwards_d = d;
+
+			/* As soon as the lengths are not the same, the backwards traversal starts on a different diagonal, and
+			 * c = k shifted by the difference in length.
+			 */
+			int k = c_to_k(c, delta);
+
+			/* When the file sizes are very different, the traversal trees start on far distant diagonals.
+			 * They don't necessarily meet straight on. See whether this backward value is also on a valid
+			 * diagonal in kd_forward[], and match them if so. */
+			if (k >= -forwards_d && k <= forwards_d) {
+				/* Current c is on a diagonal that exists in kd_forward[]. If the two x positions have
+				 * met or passed (backward walked onto or past forward), then we've found a midpoint / a
+				 * mid-box. */
+				int forward_x = kd_forward[k];
+				int forward_y = xk_to_y(forward_x, k);
+				debug("Compare %d to %d  k=%d  (x=%d,y=%d) to (x=%d,y=%d)\n",
+				      forward_x, x, k,
+				      forward_x, xk_to_y(forward_x, k), x, xc_to_y(x, c, delta));
+				if (forward_x <= prev_x && forward_y <= prev_y
+				    && forward_x >= x) {
+					*meeting_snake = (struct diff_box){
+						.left_start = x,
+						.left_end = forward_x,
+						.right_start = xc_to_y(x, c, delta),
+						.right_end = xk_to_y(forward_x, k),
+					};
+					debug("HIT x=%u,%u - y=%u,%u\n",
+					      meeting_snake->left_start,
+					      meeting_snake->right_start,
+					      meeting_snake->left_end,
+					      meeting_snake->right_end);
+					return;
+				}
+			}
+		}
+	}
+}
+
+/* Myers "Divide et Impera": tracing forwards from the start and backwards from the end to find a midpoint that divides
+ * the problem into smaller chunks. Requires only linear amounts of memory. */
+enum diff_rc diff_algo_myers_divide(const struct diff_algo_config *algo_config, struct diff_state *state)
+{
+	enum diff_rc rc = DIFF_RC_ENOMEM;
+	struct diff_data *left = &state->left;
+	struct diff_data *right = &state->right;
+
+	debug("\n** %s\n", __func__);
+	debug("left:\n");
+	debug_dump(left);
+	debug("right:\n");
+	debug_dump(right);
+	debug_dump_myers_graph(left, right, NULL);
+
+	/* Allocate two columns of a Myers graph, one for the forward and one for the backward traversal. */
+	unsigned int max = left->atoms.len + right->atoms.len;
+	size_t kd_len = max + 1;
+	size_t kd_buf_size = kd_len << 1;
+	int *kd_buf = reallocarray(NULL, kd_buf_size, sizeof(int));
+	if (!kd_buf)
+		return DIFF_RC_ENOMEM;
+	int i;
+	for (i = 0; i < kd_buf_size; i++)
+		kd_buf[i] = -1;
+	int *kd_forward = kd_buf;
+	int *kd_backward = kd_buf + kd_len;
+
+	/* The 'k' axis in Myers spans positive and negative indexes, so point the kd to the middle.
+	 * It is then possible to index from -max/2 .. max/2. */
+	kd_forward += max/2;
+	kd_backward += max/2;
+
+	int d;
+	struct diff_box mid_snake = {};
+	for (d = 0; d <= (max/2); d++) {
+		debug("-- d=%d\n", d);
+		diff_divide_myers_forward(left, right, kd_forward, kd_backward, d, &mid_snake);
+		if (!diff_box_empty(&mid_snake))
+			break;
+		diff_divide_myers_backward(left, right, kd_forward, kd_backward, d, &mid_snake);
+		if (!diff_box_empty(&mid_snake))
+			break;
+	}
+
+	if (diff_box_empty(&mid_snake)) {
+		/* Divide and conquer failed to find a meeting point. Use the fallback_algo defined in the algo_config
+		 * (leave this to the caller). This is just paranoia/sanity, we normally should always find a midpoint.
+		 */
+		debug(" no midpoint \n");
+		rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK;
+		goto return_rc;
+	} else {
+		debug(" mid snake L: %u to %u of %u   R: %u to %u of %u\n",
+		      mid_snake.left_start, mid_snake.left_end, left->atoms.len,
+		      mid_snake.right_start, mid_snake.right_end, right->atoms.len);
+
+		/* Section before the mid-snake.  */
+		debug("Section before the mid-snake\n");
+
+		struct diff_atom *left_atom = &left->atoms.head[0];
+		unsigned int left_section_len = mid_snake.left_start;
+		struct diff_atom *right_atom = &right->atoms.head[0];
+		unsigned int right_section_len = mid_snake.right_start;
+
+		if (left_section_len && right_section_len) {
+			/* Record an unsolved chunk, the caller will apply inner_algo() on this chunk. */
+			if (!diff_state_add_chunk(state, false,
+						  left_atom, left_section_len,
+						  right_atom, right_section_len))
+				goto return_rc;
+		} else if (left_section_len && !right_section_len) {
+			/* Only left atoms and none on the right, they form a "minus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, left_section_len,
+						  right_atom, 0))
+				goto return_rc;
+		} else if (!left_section_len && right_section_len) {
+			/* No left atoms, only atoms on the right, they form a "plus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, 0,
+						  right_atom, right_section_len))
+				goto return_rc;
+		}
+		/* else: left_section_len == 0 and right_section_len == 0, i.e. nothing before the mid-snake. */
+
+		/* the mid-snake, identical data on both sides: */
+		debug("the mid-snake\n");
+		if (!diff_state_add_chunk(state, true,
+					  &left->atoms.head[mid_snake.left_start],
+					  mid_snake.left_end - mid_snake.left_start,
+					  &right->atoms.head[mid_snake.right_start],
+					  mid_snake.right_end - mid_snake.right_start))
+			goto return_rc;
+
+		/* Section after the mid-snake. */
+		debug("Section after the mid-snake\n");
+		debug("  left_end %u  right_end %u\n", mid_snake.left_end, mid_snake.right_end);
+		debug("  left_count %u  right_count %u\n", left->atoms.len, right->atoms.len);
+		left_atom = &left->atoms.head[mid_snake.left_end];
+		left_section_len = left->atoms.len - mid_snake.left_end;
+		right_atom = &right->atoms.head[mid_snake.right_end];
+		right_section_len = right->atoms.len - mid_snake.right_end;
+
+		if (left_section_len && right_section_len) {
+			/* Record an unsolved chunk, the caller will apply inner_algo() on this chunk. */
+			if (!diff_state_add_chunk(state, false,
+						  left_atom, left_section_len,
+						  right_atom, right_section_len))
+				goto return_rc;
+		} else if (left_section_len && !right_section_len) {
+			/* Only left atoms and none on the right, they form a "minus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, left_section_len,
+						  right_atom, 0))
+				goto return_rc;
+		} else if (!left_section_len && right_section_len) {
+			/* No left atoms, only atoms on the right, they form a "plus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, 0,
+						  right_atom, right_section_len))
+				goto return_rc;
+		}
+		/* else: left_section_len == 0 and right_section_len == 0, i.e. nothing after the mid-snake. */
+	}
+
+	rc = DIFF_RC_OK;
+
+return_rc:
+	free(kd_buf);
+	debug("** END %s\n", __func__);
+	return rc;
+}
+
+/* Myers Diff tracing from the start all the way through to the end, requiring quadratic amounts of memory. This can
+ * fail if the required space surpasses algo_config->permitted_state_size. */
+enum diff_rc diff_algo_myers(const struct diff_algo_config *algo_config, struct diff_state *state)
+{
+	/* do a diff_divide_myers_forward() without a _backward(), so that it walks forward across the entire
+	 * files to reach the end. Keep each run's state, and do a final backtrace. */
+	enum diff_rc rc = DIFF_RC_ENOMEM;
+	struct diff_data *left = &state->left;
+	struct diff_data *right = &state->right;
+
+	debug("\n** %s\n", __func__);
+	debug("left:\n");
+	debug_dump(left);
+	debug("right:\n");
+	debug_dump(right);
+	debug_dump_myers_graph(left, right, NULL);
+
+	/* Allocate two columns of a Myers graph, one for the forward and one for the backward traversal. */
+	unsigned int max = left->atoms.len + right->atoms.len;
+	size_t kd_len = max + 1 + max;
+	size_t kd_buf_size = kd_len * kd_len;
+	debug("state size: %zu\n", kd_buf_size);
+	if (kd_buf_size < kd_len /* overflow? */
+	    || kd_buf_size * sizeof(int) > algo_config->permitted_state_size) {
+		debug("state size %zu > permitted_state_size %zu, use fallback_algo\n",
+		      kd_buf_size, algo_config->permitted_state_size);
+		return DIFF_RC_USE_DIFF_ALGO_FALLBACK;
+	}
+
+	int *kd_buf = reallocarray(NULL, kd_buf_size, sizeof(int));
+	if (!kd_buf)
+		return DIFF_RC_ENOMEM;
+	int i;
+	for (i = 0; i < kd_buf_size; i++)
+		kd_buf[i] = -1;
+
+	/* The 'k' axis in Myers spans positive and negative indexes, so point the kd to the middle.
+	 * It is then possible to index from -max .. max. */
+	int *kd_origin = kd_buf + max;
+	int *kd_column = kd_origin;
+
+	int d;
+	int backtrack_d = -1;
+	int backtrack_k = 0;
+	int k;
+	int x, y;
+	for (d = 0; d <= max; d++, kd_column += kd_len) {
+		debug("-- d=%d\n", d);
+
+		debug("-- %s d=%d\n", __func__, d);
+
+		for (k = d; k >= -d; k -= 2) {
+			if (k < -(int)right->atoms.len || k > (int)left->atoms.len) {
+				/* This diagonal is completely outside of the Myers graph, don't calculate it. */
+				if (k < -(int)right->atoms.len)
+					debug(" %d k < -(int)right->atoms.len %d\n", k, -(int)right->atoms.len);
+				else
+					debug(" %d k > left->atoms.len %d\n", k, left->atoms.len);
+				if (k < 0) {
+					/* We are traversing negatively, and already below the entire graph, nothing will come
+					 * of this. */
+					debug(" break");
+					break;
+				}
+				debug(" continue");
+				continue;
+			}
+
+			debug("- k = %d\n", k);
+			if (d == 0) {
+				/* This is the initializing step. There is no prev_k yet, get the initial x from the top left of
+				 * the Myers graph. */
+				x = 0;
+			} else {
+				int *kd_prev_column = kd_column - kd_len;
+
+				/* Favoring "-" lines first means favoring moving rightwards in the Myers graph.
+				 * For this, all k should derive from k - 1, only the bottom most k derive from k + 1:
+				 *
+				 *      | d=   0   1   2
+				 *  ----+----------------
+				 *  k=  |
+				 *   2  |             2,0 <-- from prev_k = 2 - 1 = 1
+				 *      |            /
+				 *   1  |         1,0
+				 *      |        /
+				 *   0  |  -->0,0     3,3
+				 *      |       \\   /
+				 *  -1  |         0,1 <-- bottom most for d=1 from prev_k = -1 + 1 = 0
+				 *      |           \\
+				 *  -2  |             0,2 <-- bottom most for d=2 from prev_k = -2 + 1 = -1
+				 *
+				 * Except when a k + 1 from a previous run already means a further advancement in the graph.
+				 * If k == d, there is no k + 1 and k - 1 is the only option.
+				 * If k < d, use k + 1 in case that yields a larger x. Also use k + 1 if k - 1 is outside the graph.
+				 */
+				if (k > -d && (k == d
+					       || (k - 1 >= -(int)right->atoms.len
+						   && kd_prev_column[k - 1] >= kd_prev_column[k + 1]))) {
+					/* Advance from k - 1.
+					 * From position prev_k, step to the right in the Myers graph: x += 1.
+					 */
+					int prev_k = k - 1;
+					int prev_x = kd_prev_column[prev_k];
+					x = prev_x + 1;
+				} else {
+					/* The bottom most one.
+					 * From position prev_k, step to the bottom in the Myers graph: y += 1.
+					 * Incrementing y is achieved by decrementing k while keeping the same x.
+					 * (since we're deriving y from y = x - k).
+					 */
+					int prev_k = k + 1;
+					int prev_x = kd_prev_column[prev_k];
+					x = prev_x;
+				}
+			}
+
+			/* Slide down any snake that we might find here. */
+			while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len
+			       && diff_atom_same(&left->atoms.head[x], &right->atoms.head[xk_to_y(x, k)]))
+			       x++;
+			kd_column[k] = x;
+
+			if (DEBUG) {
+				int fi;
+				for (fi = d; fi >= k; fi-=2) {
+					debug("kd_column[%d] = (%d, %d)\n", fi, kd_column[fi], kd_column[fi] - fi);
+#if 0
+					if (kd_column[fi] >= 0 && kd_column[fi] < left->atoms.len)
+						debug_dump_atom(left, right, &left->atoms.head[kd_column[fi]]);
+					else
+						debug("\n");
+					if (kd_column[fi]-fi >= 0 && kd_column[fi]-fi < right->atoms.len)
+						debug_dump_atom(right, left, &right->atoms.head[kd_column[fi]-fi]);
+					else
+						debug("\n");
+#endif
+				}
+			}
+
+			if (x == left->atoms.len && xk_to_y(x, k) == right->atoms.len) {
+				/* Found a path */
+				backtrack_d = d;
+				backtrack_k = k;
+				debug("Reached the end at d = %d, k = %d\n",
+				      backtrack_d, backtrack_k);
+				break;
+			}
+		}
+
+		if (backtrack_d >= 0)
+			break;
+	}
+
+	debug_dump_myers_graph(left, right, kd_origin);
+
+	/* backtrack. A matrix spanning from start to end of the file is ready:
+	 *
+	 *      | d=   0   1   2   3   4
+	 *  ----+---------------------------------
+	 *  k=  |
+	 *   3  |
+	 *      |
+	 *   2  |             2,0
+	 *      |            /
+	 *   1  |         1,0     4,3
+	 *      |        /       /   \
+	 *   0  |  -->0,0     3,3     4,4 --> backtrack_d = 4, backtrack_k = 0
+	 *      |        \   /   \
+	 *  -1  |         0,1     3,4
+	 *      |            \
+	 *  -2  |             0,2
+	 *      |
+	 *
+	 * From (4,4) backwards, find the previous position that is the largest, and remember it.
+	 *
+	 */
+	for (d = backtrack_d, k = backtrack_k; d >= 0; d--) {
+		x = kd_column[k];
+		y = xk_to_y(x, k);
+
+		/* When the best position is identified, remember it for that kd_column.
+		 * That kd_column is no longer needed otherwise, so just re-purpose kd_column[0] = x and kd_column[1] = y,
+		 * so that there is no need to allocate more memory.
+		 */
+		kd_column[0] = x;
+		kd_column[1] = y;
+		debug("Backtrack d=%d: xy=(%d, %d)\n",
+		      d, kd_column[0], kd_column[1]);
+
+		/* Don't access memory before kd_buf */
+		if (d == 0)
+			break;
+		int *kd_prev_column = kd_column - kd_len;
+
+		/* When y == 0, backtracking downwards (k-1) is the only way.
+		 * When x == 0, backtracking upwards (k+1) is the only way.
+		 *
+		 *      | d=   0   1   2   3   4
+		 *  ----+---------------------------------
+		 *  k=  |
+		 *   3  |
+		 *      |                ..y == 0
+		 *   2  |             2,0
+		 *      |            /
+		 *   1  |         1,0     4,3
+		 *      |        /       /   \
+		 *   0  |  -->0,0     3,3     4,4 --> backtrack_d = 4, backtrack_k = 0
+		 *      |        \   /   \
+		 *  -1  |         0,1     3,4
+		 *      |            \
+		 *  -2  |             0,2__
+		 *      |                  x == 0
+		 */
+		debug("prev[k-1] = %d,%d  prev[k+1] = %d,%d\n",
+		      kd_prev_column[k-1], xk_to_y(kd_prev_column[k-1],k-1),
+		      kd_prev_column[k+1], xk_to_y(kd_prev_column[k+1],k+1));
+		if (y == 0
+		    || (x > 0 && kd_prev_column[k - 1] >= kd_prev_column[k + 1])) {
+			k = k - 1;
+			debug("prev k=k-1=%d x=%d y=%d\n",
+			      k, kd_prev_column[k], xk_to_y(kd_prev_column[k], k));
+		} else {
+			k = k + 1;
+			debug("prev k=k+1=%d x=%d y=%d\n",
+			      k, kd_prev_column[k], xk_to_y(kd_prev_column[k], k));
+		}
+		kd_column = kd_prev_column;
+	}
+
+	/* Forwards again, this time recording the diff chunks.
+	 * Definitely start from 0,0. kd_column[0] may actually point to the bottom of a snake starting at 0,0 */
+	x = 0;
+	y = 0;
+
+	kd_column = kd_origin;
+	for (d = 0; d <= backtrack_d; d++, kd_column += kd_len) {
+		int next_x = kd_column[0];
+		int next_y = kd_column[1];
+		debug("Forward track from xy(%d,%d) to xy(%d,%d)\n",
+		      x, y, next_x, next_y);
+
+		struct diff_atom *left_atom = &left->atoms.head[x];
+		int left_section_len = next_x - x;
+		struct diff_atom *right_atom = &right->atoms.head[y];
+		int right_section_len = next_y - y;
+
+		rc = DIFF_RC_ENOMEM;
+		if (left_section_len && right_section_len) {
+			/* This must be a snake slide.
+			 * Snake slides have a straight line leading into them (except when starting at (0,0)). Find
+			 * out whether the lead-in is horizontal or vertical:
+			 *
+			 *     left
+			 *  ---------->
+			 *  |
+			 * r|   o-o        o
+			 * i|      \       |
+			 * g|       o      o
+			 * h|        \      \
+			 * t|         o      o
+			 *  v
+			 *
+			 * If left_section_len > right_section_len, the lead-in is horizontal, meaning first
+			 * remove one atom from the left before sliding down the snake.
+			 * If right_section_len > left_section_len, the lead-in is vetical, so add one atom from
+			 * the right before sliding down the snake. */
+			if (left_section_len == right_section_len + 1) {
+				if (!diff_state_add_chunk(state, true,
+							  left_atom, 1,
+							  right_atom, 0))
+					goto return_rc;
+				left_atom++;
+				left_section_len--;
+			} else if (right_section_len == left_section_len + 1) {
+				if (!diff_state_add_chunk(state, true,
+							  left_atom, 0,
+							  right_atom, 1))
+					goto return_rc;
+				right_atom++;
+				right_section_len--;
+			} else if (left_section_len != right_section_len) {
+				/* The numbers are making no sense. Should never happen. */
+				rc = DIFF_RC_USE_DIFF_ALGO_FALLBACK;
+				goto return_rc;
+			}
+
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, left_section_len,
+						  right_atom, right_section_len))
+				goto return_rc;
+		} else if (left_section_len && !right_section_len) {
+			/* Only left atoms and none on the right, they form a "minus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, left_section_len,
+						  right_atom, 0))
+				goto return_rc;
+		} else if (!left_section_len && right_section_len) {
+			/* No left atoms, only atoms on the right, they form a "plus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, 0,
+						  right_atom, right_section_len))
+				goto return_rc;
+		}
+
+		x = next_x;
+		y = next_y;
+	}
+
+	rc = DIFF_RC_OK;
+
+return_rc:
+	free(kd_buf);
+	debug("** END %s rc=%d\n", __func__, rc);
+	return rc;
+}
blob - /dev/null
blob + 20d1f9a79ec84281efede279f51bd2292a99466a (mode 644)
--- /dev/null
+++ lib/diff_output.c
@@ -0,0 +1,52 @@
+/* Common parts for printing diff output */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <diff/diff_output.h>
+
+void diff_output_lines(FILE *dest, const char *prefix, struct diff_atom *start_atom, unsigned int count)
+{
+	struct diff_atom *atom;
+	foreach_diff_atom(atom, start_atom, count) {
+		fprintf(dest, "%s", prefix);
+		int i;
+		unsigned int len = atom->len;
+		if (len && atom->at[len - 1] == '\n') {
+			len--;
+			if (len && atom->at[len - 1] == '\r')
+				len--;
+		}
+
+		for (i = 0; i < len; i++) {
+			char c = atom->at[i];
+			if (c < 0x20 || c >= 0x7f)
+				fprintf(dest, "\\x%02x", (unsigned char)c);
+			else
+				fprintf(dest, "%c", c);
+		}
+		fprintf(dest, "\n");
+	}
+}
+
+enum diff_rc diff_output_info(FILE *dest, const struct diff_input_info *info)
+{
+	if (info->arbitrary_info && *info->arbitrary_info)
+		fprintf(dest, "%s", info->arbitrary_info);
+	fprintf(dest, "--- %s\n+++ %s\n",
+		info->left_path ? : "a",
+		info->right_path ? : "b");
+        return DIFF_RC_OK;
+}
blob - /dev/null
blob + 796c3ee69f83bf338c67c8b310b27c2dea7501f1 (mode 644)
--- /dev/null
+++ lib/diff_output_plain.c
@@ -0,0 +1,54 @@
+/* Output all lines of a diff_result. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <diff/diff_output.h>
+
+enum diff_rc diff_output_plain(FILE *dest, const struct diff_input_info *info,
+			       const struct diff_result *result)
+{
+	if (!result)
+		return DIFF_RC_EINVAL;
+	if (result->rc != DIFF_RC_OK)
+		return result->rc;
+
+	diff_output_info(dest, info);
+
+	int i;
+	for (i = 0; i < result->chunks.len; i++) {
+		struct diff_chunk *c = &result->chunks.head[i];
+		if (c->left_count && c->right_count)
+			diff_output_lines(dest, c->solved ? " " : "?", c->left_start, c->left_count);
+		else if (c->left_count && !c->right_count)
+			diff_output_lines(dest, c->solved ? "-" : "?", c->left_start, c->left_count);
+		else if (c->right_count && !c->left_count)
+			diff_output_lines(dest, c->solved ? "+" : "?", c->right_start, c->right_count);
+	}
+	return DIFF_RC_OK;
+}
+
+enum diff_rc diff_plain(FILE *dest, const struct diff_config *diff_config,
+			const struct diff_input_info *info,
+			const char *left, int left_len, const char *right, int right_len)
+{
+	enum diff_rc rc;
+	left_len = left_len < 0 ? strlen(left) : left_len;
+	right_len = right_len < 0 ? strlen(right) : right_len;
+	struct diff_result *result = diff_main(diff_config, left, left_len, right, right_len);
+	rc = diff_output_plain(dest, info, result);
+	diff_result_free(result);
+	return rc;
+}
blob - /dev/null
blob + d12e4c0b5d67538e6f0dbd89b84c22447ec44d25 (mode 644)
--- /dev/null
+++ lib/diff_output_unidiff.c
@@ -0,0 +1,233 @@
+/* Produce a unidiff output from a diff_result. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <diff/diff_output.h>
+
+#include "debug.h"
+
+enum chunk_type {
+	CHUNK_EMPTY,
+	CHUNK_PLUS,
+	CHUNK_MINUS,
+	CHUNK_SAME,
+	CHUNK_WEIRD,
+};
+
+static inline enum chunk_type chunk_type(const struct diff_chunk *chunk)
+{
+	if (!chunk->left_count && !chunk->right_count)
+		return CHUNK_EMPTY;
+	if (!chunk->solved)
+		return CHUNK_WEIRD;
+	if (!chunk->right_count)
+		return CHUNK_MINUS;
+	if (!chunk->left_count)
+		return CHUNK_PLUS;
+	if (chunk->left_count != chunk->right_count)
+		return CHUNK_WEIRD;
+	return CHUNK_SAME;
+}
+
+#define MAX(A,B) ((A)>(B)?(A):(B))
+#define MIN(A,B) ((A)<(B)?(A):(B))
+
+struct range {
+	int start;
+	int end;
+};
+
+static bool range_empty(const struct range *r)
+{
+	return r->start == r->end;
+}
+
+static bool ranges_touch(const struct range *a, const struct range *b)
+{
+	return (a->end >= b->start) && (a->start <= b->end);
+}
+
+static void ranges_merge(struct range *a, const struct range *b)
+{
+	*a = (struct range){
+		.start = MIN(a->start, b->start),
+		.end = MAX(a->end, b->end),
+	};
+}
+
+struct chunk_context {
+	struct range chunk;
+	struct range left, right;
+};
+
+static bool chunk_context_empty(const struct chunk_context *cc)
+{
+	return range_empty(&cc->chunk);
+}
+
+static void chunk_context_get(struct chunk_context *cc, const struct diff_result *r, int chunk_idx,
+		       int context_lines)
+{
+	const struct diff_chunk *c = &r->chunks.head[chunk_idx];
+	int left_start = diff_atom_root_idx(&r->left, c->left_start);
+	int right_start = diff_atom_root_idx(&r->right, c->right_start);
+
+	*cc = (struct chunk_context){
+		.chunk = {
+			.start = chunk_idx,
+			.end = chunk_idx + 1,
+		},
+		.left = {
+			.start = MAX(0, left_start - context_lines),
+			.end = MIN(r->left.atoms.len, left_start + c->left_count + context_lines),
+		},
+		.right = {
+			.start = MAX(0, right_start - context_lines),
+			.end = MIN(r->right.atoms.len, right_start + c->right_count + context_lines),
+		},
+	};
+}
+
+static bool chunk_contexts_touch(const struct chunk_context *cc, const struct chunk_context *other)
+{
+	return ranges_touch(&cc->chunk, &other->chunk)
+		|| ranges_touch(&cc->left, &other->left)
+		|| ranges_touch(&cc->right, &other->right);
+}
+
+static void chunk_contexts_merge(struct chunk_context *cc, const struct chunk_context *other)
+{
+	ranges_merge(&cc->chunk, &other->chunk);
+	ranges_merge(&cc->left, &other->left);
+	ranges_merge(&cc->right, &other->right);
+}
+
+static void diff_output_unidiff_chunk(FILE *dest, bool *info_printed, const struct diff_input_info *info,
+				      const struct diff_result *result, const struct chunk_context *cc)
+{
+	if (range_empty(&cc->left) && range_empty(&cc->right))
+		return;
+
+	if (!(*info_printed)) {
+		diff_output_info(dest, info);
+		*info_printed = true;
+	}
+
+	fprintf(dest, "@@ -%d,%d +%d,%d @@\n",
+		cc->left.start + 1, cc->left.end - cc->left.start,
+		cc->right.start + 1, cc->right.end - cc->right.start);
+
+	/* Got the absolute line numbers where to start printing, and the index of the interesting (non-context) chunk.
+	 * To print context lines above the interesting chunk, nipping on the previous chunk index may be necessary.
+	 * It is guaranteed to be only context lines where left == right, so it suffices to look on the left. */
+	const struct diff_chunk *first_chunk = &result->chunks.head[cc->chunk.start];
+	int chunk_start_line = diff_atom_root_idx(&result->left, first_chunk->left_start);
+	if (cc->left.start < chunk_start_line)
+		diff_output_lines(dest, " ", &result->left.atoms.head[cc->left.start],
+				  chunk_start_line - cc->left.start);
+
+	/* Now write out all the joined chunks and contexts between them */
+	int c_idx;
+	for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
+		const struct diff_chunk *c = &result->chunks.head[c_idx];
+
+		if (c->left_count && c->right_count)
+			diff_output_lines(dest, c->solved ? " " : "?", c->left_start, c->left_count);
+		else if (c->left_count && !c->right_count)
+			diff_output_lines(dest, c->solved ? "-" : "?", c->left_start, c->left_count);
+		else if (c->right_count && !c->left_count)
+			diff_output_lines(dest, c->solved ? "+" : "?", c->right_start, c->right_count);
+	}
+
+	/* Trailing context? */
+	const struct diff_chunk *last_chunk = &result->chunks.head[cc->chunk.end - 1];
+	int chunk_end_line = diff_atom_root_idx(&result->left, last_chunk->left_start + last_chunk->left_count);
+	if (cc->left.end > chunk_end_line)
+		diff_output_lines(dest, " ", &result->left.atoms.head[chunk_end_line],
+				  cc->left.end - chunk_end_line);
+}
+
+enum diff_rc diff_output_unidiff(FILE *dest, const struct diff_input_info *info,
+				 const struct diff_result *result, unsigned int context_lines)
+{
+	if (!result)
+		return DIFF_RC_EINVAL;
+	if (result->rc != DIFF_RC_OK)
+		return result->rc;
+
+	struct chunk_context cc = {};
+	bool info_printed = false;
+
+	int i;
+	for (i = 0; i < result->chunks.len; i++) {
+		struct diff_chunk *c = &result->chunks.head[i];
+		enum chunk_type t = chunk_type(c);
+
+		if (t == CHUNK_MINUS || t == CHUNK_PLUS) {
+			if (chunk_context_empty(&cc)) {
+				/* These are the first lines being printed.
+				 * Note down the start point, any number of subsequent chunks may be joined up to this
+				 * unidiff chunk by context lines or by being directly adjacent. */
+				chunk_context_get(&cc, result, i, context_lines);
+				debug("new chunk to be printed: chunk %d-%d left %d-%d right %d-%d\n",
+				      cc.chunk.start, cc.chunk.end,
+				      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
+			} else {
+				/* There already is a previous chunk noted down for being printed.
+				 * Does it join up with this one? */
+				struct chunk_context next;
+				chunk_context_get(&next, result, i, context_lines);
+				debug("new chunk to be printed: chunk %d-%d left %d-%d right %d-%d\n",
+				      next.chunk.start, next.chunk.end,
+				      next.left.start, next.left.end, next.right.start, next.right.end);
+				if (chunk_contexts_touch(&cc, &next)) {
+					/* This next context touches or overlaps the previous one, join. */
+					chunk_contexts_merge(&cc, &next);
+					debug("new chunk to be printed touches previous chunk, now: left %d-%d right %d-%d\n",
+					      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
+				} else {
+					/* No touching, so the previous context is complete with a gap between it and
+					 * this next one. Print the previous one and start fresh here. */
+					debug("new chunk to be printed does not touch previous chunk; print left %d-%d right %d-%d\n",
+					      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
+					diff_output_unidiff_chunk(dest, &info_printed, info, result, &cc);
+					cc = next;
+					debug("new unprinted chunk is left %d-%d right %d-%d\n",
+					      cc.left.start, cc.left.end, cc.right.start, cc.right.end);
+				}
+			}
+		}
+
+	}
+
+	if (!chunk_context_empty(&cc))
+		diff_output_unidiff_chunk(dest, &info_printed, info, result, &cc);
+	return DIFF_RC_OK;
+}
+
+enum diff_rc diff_unidiff(FILE *dest, const struct diff_config *diff_config,
+			  const struct diff_input_info *info,
+			  const char *left, int left_len, const char *right, int right_len,
+			  unsigned int context_lines)
+{
+	enum diff_rc rc;
+	left_len = left_len < 0 ? strlen(left) : left_len;
+	right_len = right_len < 0 ? strlen(right) : right_len;
+	struct diff_result *result = diff_main(diff_config, left, left_len, right, right_len);
+	rc = diff_output_unidiff(dest, info, result, context_lines);
+	diff_result_free(result);
+	return rc;
+}
blob - /dev/null
blob + 9fcf8196417c1df4fda8effb7cdbb57995974764 (mode 644)
--- /dev/null
+++ lib/diff_patience.c
@@ -0,0 +1,331 @@
+/* Implementation of the Patience Diff algorithm invented by Bram Cohen:
+ * Divide a diff problem into smaller chunks by an LCS of common-unique lines. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <assert.h>
+#include <diff/diff_main.h>
+
+#include "debug.h"
+
+/* Set unique_here = true for all atoms that exist exactly once in this list. */
+static void diff_atoms_mark_unique(struct diff_data *d, unsigned int *unique_count)
+{
+	struct diff_atom *i;
+	unsigned int count = 0;
+	diff_data_foreach_atom(i, d) {
+		i->patience.unique_here = true;
+		i->patience.unique_in_both = true;
+		count++;
+	}
+	diff_data_foreach_atom(i, d) {
+		struct diff_atom *j;
+
+		if (!i->patience.unique_here)
+			continue;
+
+		diff_data_foreach_atom_from(i + 1, j, d) {
+			if (diff_atom_same(i, j)) {
+				if (i->patience.unique_here) {
+					i->patience.unique_here = false;
+					i->patience.unique_in_both = false;
+					count--;
+				}
+				j->patience.unique_here = false;
+				j->patience.unique_in_both = false;
+				count--;
+			}
+		}
+	}
+	if (unique_count)
+		*unique_count = count;
+}
+
+/* Mark those lines as atom->patience.unique_in_both = true that appear exactly once in each side. */
+static void diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_data *right,
+					   unsigned int *unique_in_both_count)
+{
+	/* Derive the final unique_in_both count without needing an explicit iteration. So this is just some
+	 * optimiziation to save one iteration in the end. */
+	unsigned int unique_in_both;
+
+	diff_atoms_mark_unique(left, &unique_in_both);
+	diff_atoms_mark_unique(right, NULL);
+
+	debug("unique_in_both %u\n", unique_in_both);
+
+	struct diff_atom *i;
+	diff_data_foreach_atom(i, left) {
+		if (!i->patience.unique_here)
+			continue;
+		struct diff_atom *j;
+		int found_in_b = 0;
+		diff_data_foreach_atom(j, right) {
+			if (!diff_atom_same(i, j))
+				continue;
+			if (!j->patience.unique_here) {
+				found_in_b = 2; /* or more */
+				break;
+			} else {
+				found_in_b = 1;
+				j->patience.pos_in_other = i;
+				i->patience.pos_in_other = j;
+			}
+		}
+
+		if (found_in_b == 0 || found_in_b > 1) {
+			i->patience.unique_in_both = false;
+			unique_in_both--;
+			debug("unique_in_both %u  (%d) ", unique_in_both, found_in_b);
+			debug_dump_atom(left, NULL, i);
+		}
+	}
+
+	/* Still need to unmark right[*]->patience.unique_in_both for atoms that don't exist in left */
+	diff_data_foreach_atom(i, right) {
+		if (!i->patience.unique_here
+		    || !i->patience.unique_in_both)
+			continue;
+		struct diff_atom *j;
+		bool found_in_a = false;
+		diff_data_foreach_atom(j, left) {
+			if (!j->patience.unique_in_both)
+				continue;
+			if (!diff_atom_same(i, j))
+				continue;
+			found_in_a = true;
+			break;
+		}
+
+		if (!found_in_a)
+			i->patience.unique_in_both = false;
+	}
+
+	if (unique_in_both_count)
+		*unique_in_both_count = unique_in_both;
+}
+
+
+/* Among the lines that appear exactly once in each side, find the longest streak that appear in both files in the same
+ * order (with other stuff allowed to interleave). Use patience sort for that, as in the Patience Diff algorithm.
+ * See https://bramcohen.livejournal.com/73318.html and, for a much more detailed explanation,
+ * https://blog.jcoglan.com/2017/09/19/the-patience-diff-algorithm/ */
+enum diff_rc diff_algo_patience(const struct diff_algo_config *algo_config, struct diff_state *state)
+{
+	enum diff_rc rc = DIFF_RC_ENOMEM;
+
+	struct diff_data *left = &state->left;
+	struct diff_data *right = &state->right;
+
+	unsigned int unique_in_both_count;
+
+	debug("\n** %s\n", __func__);
+
+	/* Find those lines that appear exactly once in 'left' and exactly once in 'right'. */
+	diff_atoms_mark_unique_in_both(left, right, &unique_in_both_count);
+
+	debug("unique_in_both_count %u\n", unique_in_both_count);
+	debug("left:\n");
+	debug_dump(left);
+	debug("right:\n");
+	debug_dump(right);
+
+	if (!unique_in_both_count) {
+		/* Cannot apply Patience, tell the caller to use fallback_algo instead. */
+		return DIFF_RC_USE_DIFF_ALGO_FALLBACK;
+	}
+
+	/* An array of Longest Common Sequence is the result of the below subscope: */
+	unsigned int lcs_count = 0;
+	struct diff_atom **lcs = NULL;
+	struct diff_atom *lcs_tail = NULL;
+
+	{
+		/* This subscope marks the lifetime of the atom_pointers allocation */
+
+		/* One chunk of storage for atom pointers */
+		struct diff_atom **atom_pointers = recallocarray(NULL, 0, unique_in_both_count * 2, sizeof(struct diff_atom*));
+
+		/* Half for the list of atoms that still need to be put on stacks */
+		struct diff_atom **uniques = atom_pointers;
+
+		/* Half for the patience sort state's "card stacks" -- we remember only each stack's topmost "card" */
+		struct diff_atom **patience_stacks = atom_pointers + unique_in_both_count;
+		unsigned int patience_stacks_count = 0;
+
+		/* Take all common, unique items from 'left' ... */
+
+		struct diff_atom *atom;
+		struct diff_atom **uniques_end = uniques;
+		diff_data_foreach_atom(atom, left) {
+			if (!atom->patience.unique_in_both)
+				continue;
+			*uniques_end = atom;
+			uniques_end++;
+		}
+
+		/* ...and sort them to the order found in 'right'.
+		 * The idea is to find the leftmost stack that has a higher line number and add it to the stack's top.
+		 * If there is no such stack, open a new one on the right. The line number is derived from the atom*,
+		 * which are array items and hence reflect the relative position in the source file. So we got the
+		 * common-uniques from 'left' and sort them according to atom->patience.pos_in_other. */
+		unsigned int i;
+		for (i = 0; i < unique_in_both_count; i++) {
+			atom = uniques[i];
+			unsigned int target_stack;
+
+			if (!patience_stacks_count)
+				target_stack = 0;
+			else {
+				/* binary search to find the stack to put this atom "card" on. */
+				unsigned int lo = 0;
+				unsigned int hi = patience_stacks_count;
+				while (lo < hi) {
+					unsigned int mid = (lo + hi) >> 1;
+					if (patience_stacks[mid]->patience.pos_in_other < atom->patience.pos_in_other)
+						lo = mid + 1;
+					else
+						hi = mid;
+				}
+
+				target_stack = lo;
+			}
+
+			assert(target_stack <= patience_stacks_count);
+			patience_stacks[target_stack] = atom;
+			if (target_stack == patience_stacks_count)
+				patience_stacks_count++;
+
+			/* Record a back reference to the next stack on the left, which will form the final longest sequence
+			 * later. */
+			atom->patience.prev_stack = target_stack ? patience_stacks[target_stack - 1] : NULL;
+
+		}
+
+		/* backtrace through prev_stack references to form the final longest common sequence */
+		lcs_tail = patience_stacks[patience_stacks_count - 1];
+		lcs_count = patience_stacks_count;
+
+		/* uniques and patience_stacks are no longer needed. Backpointers are in atom->patience.prev_stack */
+		free(atom_pointers);
+	}
+
+	lcs = recallocarray(NULL, 0, lcs_count, sizeof(struct diff_atom*));
+	struct diff_atom **lcs_backtrace_pos = &lcs[lcs_count - 1];
+	struct diff_atom *atom;
+	for (atom = lcs_tail; atom; atom = atom->patience.prev_stack, lcs_backtrace_pos--) {
+		assert(lcs_backtrace_pos >= lcs);
+		*lcs_backtrace_pos = atom;
+	}
+
+	unsigned int i;
+	if (DEBUG) {
+		debug("\npatience LCS:\n");
+		for (i = 0; i < lcs_count; i++) {
+			debug_dump_atom(left, right, lcs[i]);
+		}
+	}
+
+
+	/* TODO: For each common-unique line found (now listed in lcs), swallow lines upwards and downwards that are
+	 * identical on each side. Requires a way to represent atoms being glued to adjacent atoms. */
+
+	debug("\ntraverse LCS, possibly recursing:\n");
+
+	/* Now we have pinned positions in both files at which it makes sense to divide the diff problem into smaller
+	 * chunks. Go into the next round: look at each section in turn, trying to again find common-unique lines in
+	 * those smaller sections. As soon as no more are found, the remaining smaller sections are solved by Myers. */
+	unsigned int left_pos = 0;
+	unsigned int right_pos = 0;
+	for (i = 0; i <= lcs_count; i++) {
+		struct diff_atom *atom;
+		unsigned int left_idx;
+		unsigned int right_idx;
+
+		if (i < lcs_count) {
+			atom = lcs[i];
+			left_idx = diff_atom_idx(left, atom);
+			right_idx = diff_atom_idx(right, atom->patience.pos_in_other);
+		} else {
+			atom = NULL;
+			left_idx = left->atoms.len;
+			right_idx = right->atoms.len;
+		}
+
+		/* 'atom' now marks an atom that matches on both sides according to patience-diff
+		 * (a common-unique identical atom in both files).
+		 * Handle the section before and the atom itself; the section after will be handled by the next loop
+		 * iteration -- note that i loops to last element + 1 ("i <= lcs_count"), so that there will be another
+		 * final iteration to pick up the last remaining items after the last LCS atom.
+		 * The sections before might also be empty on left and/or right.
+		 * left_pos and right_pos mark the indexes of the first atoms that have not yet been handled in the
+		 * previous loop iteration.
+		 * left_idx and right_idx mark the indexes of the matching atom on left and right, respectively. */
+
+		debug("iteration %u  left_pos %u  left_idx %u  right_pos %u  right_idx %u\n",
+		      i, left_pos, left_idx, right_pos, right_idx);
+
+		struct diff_chunk *chunk;
+
+		/* Section before the matching atom */
+		struct diff_atom *left_atom = &left->atoms.head[left_pos];
+		unsigned int left_section_len = left_idx - left_pos;
+
+		struct diff_atom *right_atom = &(right->atoms.head[right_pos]);
+		unsigned int right_section_len = right_idx - right_pos;
+
+		if (left_section_len && right_section_len) {
+			/* Record an unsolved chunk, the caller will apply inner_algo() on this chunk. */
+			if (!diff_state_add_chunk(state, false,
+						  left_atom, left_section_len,
+						  right_atom, right_section_len))
+				goto return_rc;
+		} else if (left_section_len && !right_section_len) {
+			/* Only left atoms and none on the right, they form a "minus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, left_section_len,
+						  right_atom, 0))
+				goto return_rc;
+		} else if (!left_section_len && right_section_len) {
+			/* No left atoms, only atoms on the right, they form a "plus" chunk, then. */
+			if (!diff_state_add_chunk(state, true,
+						  left_atom, 0,
+						  right_atom, right_section_len))
+				goto return_rc;
+		}
+		/* else: left_section_len == 0 and right_section_len == 0, i.e. nothing here. */
+
+		/* The atom found to match on both sides forms a chunk of equals on each side. In the very last
+		 * iteration of this loop, there is no matching atom, we were just cleaning out the remaining lines. */
+		if (atom) {
+			if (!diff_state_add_chunk(state, true,
+						  atom, 1,
+						  atom->patience.pos_in_other, 1))
+				goto return_rc;
+		}
+
+		left_pos = left_idx + 1;
+		right_pos = right_idx + 1;
+	}
+	debug("** END %s\n", __func__);
+
+	rc = DIFF_RC_OK;
+
+return_rc:
+	free(lcs);
+	return rc;
+}
+
blob - /dev/null
blob + e8e14be5621f0f4536f2e0e42b7e9fb59d758cf4 (mode 644)
--- /dev/null
+++ lib/linux_Makefile
@@ -0,0 +1,24 @@
+srcs = \
+	diff_atomize_text.c \
+	diff_main.c \
+	diff_myers.c \
+	diff_patience.c \
+	diff_output.c \
+	diff_output_plain.c \
+	diff_output_unidiff.c \
+	$(END)
+
+objs = $(srcs:.c=.o)
+
+libdiff.a: $(objs)
+	ar rcs $@ $^
+
+CFLAGS = -fsanitize=address -fsanitize=undefined -g
+
+%.o: %.c ./*.h ../include/diff/*.h
+	gcc $(CFLAGS) -I../include -o $@ -c $<
+
+.PHONY: clean
+clean:
+	-rm $(objs)
+	-rm libdiff.a
blob - /dev/null
blob + b4a9acb0cdc71e2d714c76d3f510873470cb8236 (mode 644)
--- /dev/null
+++ man/diff.1
@@ -0,0 +1,47 @@
+.\"	$OpenBSD$
+.\"
+.\" Copyright (c) 2018 Martin Pieuchot
+.\" Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: August 28 2017 $
+.Dt DIFF 1
+.Os
+.Sh NAME
+.Nm diff
+.Nd compare files
+.Sh SYNOPSIS
+.Nm diff
+.Ar file1 file2
+.Sh DESCRIPTION
+The
+.Nm
+utility compares the contents of
+.Ar file1
+and
+.Ar file2
+line by line.
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It 0
+No differences were found.
+.It 1
+Differences were found.
+.It >1
+An error occurred.
+.El
blob - /dev/null
blob + ad6ffbd9d12fc4f81e7e59ea3b4b7801c0b53887 (mode 644)
--- /dev/null
+++ test/Makefile
@@ -0,0 +1,7 @@
+.PHONY: test verify clean
+test: verify clean
+
+verify:
+	./verify_all.sh
+clean:
+	-rm verify.*
blob - /dev/null
blob + 60764f10614644471b9cae1fdf7787246ef1ed92 (mode 644)
--- /dev/null
+++ test/README
@@ -0,0 +1,7 @@
+The test produces a diff, which is successful if it is able to reconstruct the
+original source files from it. It is not tested whether diff output is optimal
+or beautiful.
+
+Since different diff algorithms can produce different diff outputs, the
+expect*.diff files are merely provided for reference and are not part of the
+tests.
blob - /dev/null
blob + 48768c491c8dc456f94d9bd13e9a2f27590bbb0d (mode 644)
--- /dev/null
+++ test/expect001.diff
@@ -0,0 +1,11 @@
+--- test001.left.txt
++++ test001.right.txt
+-A
+-B
+ C
+-A
+ B
++A
+ B
+ A
++C
blob - /dev/null
blob + 5a409a01b7a7269424e11fd6a07a054e997dd3c5 (mode 644)
--- /dev/null
+++ test/expect002.diff
@@ -0,0 +1,15 @@
+--- test002.left.txt
++++ test002.right.txt
+-A
+-B
+ C
+-A
+ B
++A
+ B
+ A
++C
+ X
+-Y
+ Z
++Q
blob - /dev/null
blob + ff1c8b5c8d3c587b15fe9d9ec83ba6ceb516436e (mode 644)
--- /dev/null
+++ test/expect003.diff
@@ -0,0 +1,9 @@
+--- test003.left.txt
++++ test003.right.txt
+-a
++x
+ b
+ c
+-d
+-e
++y
blob - /dev/null
blob + 5989a134be2ab91b93f757600becacc8065b8f94 (mode 644)
--- /dev/null
+++ test/expect004.diff
@@ -0,0 +1,23 @@
+--- test004.left.txt
++++ test004.right.txt
++int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n)
++{
++    if (chunk == NULL) return 0;
++
++    return start <= chunk->length && n <= chunk->length - start;
++}
++
+ void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n)
+ {
+     if (!Chunk_bounds_check(src, src_start, n)) return;
+     if (!Chunk_bounds_check(dst, dst_start, n)) return;
+ 
+     memcpy(dst->data + dst_start, src->data + src_start, n);
+ }
+-
+-int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n)
+-{
+-    if (chunk == NULL) return 0;
+-
+-    return start <= chunk->length && n <= chunk->length - start;
+-}
blob - /dev/null
blob + 25e2e0bc35f05d8383e52fad77872be96de5dca0 (mode 644)
--- /dev/null
+++ test/expect005.diff
@@ -0,0 +1,11 @@
+--- test005.left.txt
++++ test005.right.txt
++The Slits
++Gil Scott Heron
+ David Axelrod
+ Electric Prunes
+-Gil Scott Heron
+-The Slits
+ Faust
+ The Sonics
+ The Sonics
blob - /dev/null
blob + ea0a5d1c20b159e7dad0631318ef5690875da3ba (mode 644)
--- /dev/null
+++ test/expect006.diff
@@ -0,0 +1,29 @@
+--- test006.left.txt
++++ test006.right.txt
+ Below is an example license to be used for new code in OpenBSD,
+ modeled after the ISC license.
+ 
+ It is important to specify the year of the copyright.  Additional years
+ should be separated by a comma, e.g.
+-    Copyright (c) 2003, 2004
++    Copyright (c) 2003, 2004, 2005
+ 
+ If you add extra text to the body of the license, be careful not to
+ add further restrictions.
+ 
+ /*
+  * Copyright (c) CCYY YOUR NAME HERE <user@your.dom.ain>
+  *
+- * Permission to use, copy, modify, and distribute this software for any
+  * purpose with or without fee is hereby granted, provided that the above
+  * copyright notice and this permission notice appear in all copies.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+  */
++An extra line
blob - /dev/null
blob + 996246e7974d7b25bc76ee14c63114a241be4704 (mode 644)
--- /dev/null
+++ test/expect007.diff
@@ -0,0 +1,4 @@
+--- test007.left.txt
++++ test007.right.txt
+-x
++abcdx
blob - /dev/null
blob + c4f9c82c25819151690ffa3bcd44eea1b6e43159 (mode 644)
--- /dev/null
+++ test/expect008.diff
@@ -0,0 +1,8 @@
+--- test008.left.txt
++++ test008.right.txt
+ x
++a
++b
++c
++d
++x
blob - /dev/null
blob + 8f75e2ee8130368c1ef12eb5b35b99b786f49ec1 (mode 644)
--- /dev/null
+++ test/expect009.diff
@@ -0,0 +1,12 @@
+--- test009.left.txt
++++ test009.right.txt
+ x
+ a
+ b
++c
++d
++e
++f
++x
++a
++b
blob - /dev/null
blob + fd113b0f71509c767b9a4e7d4cc2f70f9d41c459 (mode 644)
--- /dev/null
+++ test/test001.left.txt
@@ -0,0 +1,7 @@
+A
+B
+C
+A
+B
+B
+A
blob - /dev/null
blob + 0075e6d23de0d19b7f870a3e82b9b310001b3db9 (mode 644)
--- /dev/null
+++ test/test001.right.txt
@@ -0,0 +1,6 @@
+C
+B
+A
+B
+A
+C
blob - /dev/null
blob + de77f56f2d47f324d97bd0e8968589acd31a54d6 (mode 644)
--- /dev/null
+++ test/test002.left.txt
@@ -0,0 +1,10 @@
+A
+B
+C
+A
+B
+B
+A
+X
+Y
+Z
blob - /dev/null
blob + a9b4b8b851ddd7eba0955da5c7cd1bf316b7da83 (mode 644)
--- /dev/null
+++ test/test002.right.txt
@@ -0,0 +1,9 @@
+C
+B
+A
+B
+A
+C
+X
+Z
+Q
blob - /dev/null
blob + 940532533944dd159bfd11136fac2ee35872de38 (mode 644)
--- /dev/null
+++ test/test003.left.txt
@@ -0,0 +1,5 @@
+a
+b
+c
+d
+e
blob - /dev/null
blob + d9f266e145742427cb8635edb7343b2d7cd33b92 (mode 644)
--- /dev/null
+++ test/test003.right.txt
@@ -0,0 +1,4 @@
+x
+b
+c
+y
blob - /dev/null
blob + 72d95cf6331a2a7f35baa19863e6dd0250112e23 (mode 644)
--- /dev/null
+++ test/test004.left.txt
@@ -0,0 +1,14 @@
+void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n)
+{
+    if (!Chunk_bounds_check(src, src_start, n)) return;
+    if (!Chunk_bounds_check(dst, dst_start, n)) return;
+
+    memcpy(dst->data + dst_start, src->data + src_start, n);
+}
+
+int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n)
+{
+    if (chunk == NULL) return 0;
+
+    return start <= chunk->length && n <= chunk->length - start;
+}
blob - /dev/null
blob + cfe8f2ed6be03213c89877245b9ce3da3a4802d8 (mode 644)
--- /dev/null
+++ test/test004.right.txt
@@ -0,0 +1,14 @@
+int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n)
+{
+    if (chunk == NULL) return 0;
+
+    return start <= chunk->length && n <= chunk->length - start;
+}
+
+void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n)
+{
+    if (!Chunk_bounds_check(src, src_start, n)) return;
+    if (!Chunk_bounds_check(dst, dst_start, n)) return;
+
+    memcpy(dst->data + dst_start, src->data + src_start, n);
+}
blob - /dev/null
blob + b77fc41c4c871564f41f74ebc9f4a07710681b3e (mode 644)
--- /dev/null
+++ test/test005.left.txt
@@ -0,0 +1,7 @@
+David Axelrod
+Electric Prunes
+Gil Scott Heron
+The Slits
+Faust
+The Sonics
+The Sonics
blob - /dev/null
blob + 2480cecfba48d082c6c9c52ba386499542d3e2aa (mode 644)
--- /dev/null
+++ test/test005.right.txt
@@ -0,0 +1,7 @@
+The Slits
+Gil Scott Heron
+David Axelrod
+Electric Prunes
+Faust
+The Sonics
+The Sonics
blob - /dev/null
blob + e01fce04d5ef4776a326a510558cfe85af41a22a (mode 644)
--- /dev/null
+++ test/test006.left.txt
@@ -0,0 +1,25 @@
+Below is an example license to be used for new code in OpenBSD,
+modeled after the ISC license.
+
+It is important to specify the year of the copyright.  Additional years
+should be separated by a comma, e.g.
+    Copyright (c) 2003, 2004
+
+If you add extra text to the body of the license, be careful not to
+add further restrictions.
+
+/*
+ * Copyright (c) CCYY YOUR NAME HERE <user@your.dom.ain>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
blob - /dev/null
blob + 40a0f253dd2e3503d295711fcfe09427f52219b5 (mode 644)
--- /dev/null
+++ test/test006.right.txt
@@ -0,0 +1,25 @@
+Below is an example license to be used for new code in OpenBSD,
+modeled after the ISC license.
+
+It is important to specify the year of the copyright.  Additional years
+should be separated by a comma, e.g.
+    Copyright (c) 2003, 2004, 2005
+
+If you add extra text to the body of the license, be careful not to
+add further restrictions.
+
+/*
+ * Copyright (c) CCYY YOUR NAME HERE <user@your.dom.ain>
+ *
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+An extra line
blob - /dev/null
blob + 587be6b4c3f93f93c489c0111bba5596147a26cb (mode 644)
--- /dev/null
+++ test/test007.left.txt
@@ -0,0 +1 @@
+x
blob - /dev/null
blob + 47b0ccb04734aa02fca7a524d6fbc171756a6a5b (mode 644)
--- /dev/null
+++ test/test007.right.txt
@@ -0,0 +1 @@
+abcdx
blob - /dev/null
blob + 587be6b4c3f93f93c489c0111bba5596147a26cb (mode 644)
--- /dev/null
+++ test/test008.left.txt
@@ -0,0 +1 @@
+x
blob - /dev/null
blob + 70b67eef4500e1d108c59c18653e56159e5c60ad (mode 644)
--- /dev/null
+++ test/test008.right.txt
@@ -0,0 +1,6 @@
+x
+a
+b
+c
+d
+x
blob - /dev/null
blob + e4e5238f8e5d3a5deb0169f1795d18a9a1eebe68 (mode 644)
--- /dev/null
+++ test/test009.left.txt
@@ -0,0 +1,3 @@
+x
+a
+b
blob - /dev/null
blob + 3a4386800dba971814434a508e6de9867ebc7abc (mode 644)
--- /dev/null
+++ test/test009.right.txt
@@ -0,0 +1,10 @@
+x
+a
+b
+c
+d
+e
+f
+x
+a
+b
blob - /dev/null
blob + 4a0ec1a64add5fccad7d01d0856e38be06629717 (mode 755)
--- /dev/null
+++ test/verify_all.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+diff_prog="../diff/diff"
+
+diff_type=plain
+
+verify_diff_script() {
+	orig_left="$1"
+	orig_right="$2"
+	the_diff="$3"
+
+	verify_left="verify.$orig_left"
+	verify_right="verify.$orig_right"
+
+        if [ "x$diff_type" = "xunidiff" ]; then
+                cp "$orig_left" "$verify_right"
+                patch --quiet -u "$verify_right" "$the_diff"
+                if ! cmp "$orig_right" "$verify_right" ; then
+                        echo "FAIL: $orig_right != $verify_right"
+                        return 1
+                fi
+
+                cp "$orig_right" "$verify_left"
+                patch --quiet -u -R "$verify_left" "$the_diff"
+                if ! cmp "$orig_left" "$verify_left" ; then
+                        echo "FAIL: $orig_left != $verify_left"
+                        return 1
+                fi
+        else
+                tail -n +3 "$the_diff" | grep -v "^+" | sed 's/^.//' > "$verify_left"
+                tail -n +3 "$the_diff" | grep -v "^-" | sed 's/^.//' > "$verify_right"
+
+                if ! cmp "$orig_left" "$verify_left" ; then
+                        echo "FAIL: $orig_left != $verify_left"
+                        return 1
+                fi
+                if ! cmp "$orig_right" "$verify_right" ; then
+                        echo "FAIL: $orig_right != $verify_right"
+                        return 1
+                fi
+        fi
+        echo "OK: $diff_prog $orig_left $orig_right"
+        return 0
+}
+
+for left in test*.left.* ; do
+        right="$(echo "$left" | sed 's/\.left\./.right./')"
+        expected_diff="$(echo "$left" | sed 's/test\([0-9]*\)\..*/expect\1.diff/')"
+        got_diff="verify.$expected_diff"
+
+        "$diff_prog" "$left" "$right" > "$got_diff"
+
+        set -e
+	verify_diff_script "$left" "$right" "$got_diff"
+        set +e
+done