commit - 87890bc26c1c6958bd64bb9d46fbc29ba6a92d95
commit + ea5e974da9b1047689411a00ecc0a9c1fb101d73
blob - 80cfc6f803bbf046beb7285ddb64fd7ef3169391
blob + e1dd226bd532d7adaf1e4712e8c13bf3cc29ffba
--- gotd/libexec/got-notify-http/got-notify-http.c
+++ gotd/libexec/got-notify-http/got-notify-http.c
#include "got_version.h"
#include "bufio.h"
+#include "utf8d.h"
#define USERAGENT "got-notify-http/" GOT_VERSION_STR
static void
escape(FILE *fp, const uint8_t *s)
{
- for (; *s; ++s) {
- /*
- * XXX: this is broken for UNICODE: we should leave
- * the multibyte characters as-is.
- */
+ uint32_t codepoint, state;
+ const uint8_t *start = s;
- if (*s >= ' ' && *s <= '~') {
- fputc(*s, fp);
- continue;
- }
-
- switch (*s) {
- case '"':
- case '\\':
- fprintf(fp, "\\%c", *s);
- break;
- case '\b':
- fprintf(fp, "\\b");
- break;
- case '\f':
- fprintf(fp, "\\f");
- break;
- case '\n':
- fprintf(fp, "\\n");
- break;
- case '\r':
- fprintf(fp, "\\r");
- break;
- case '\t':
- fprintf(fp, "\\t");
+ state = 0;
+ for (; *s; ++s) {
+ switch (decode(&state, &codepoint, *s)) {
+ case UTF8_ACCEPT:
+ switch (codepoint) {
+ case '"':
+ case '\\':
+ fprintf(fp, "\\%c", *s);
+ break;
+ case '\b':
+ fprintf(fp, "\\b");
+ break;
+ case '\f':
+ fprintf(fp, "\\f");
+ break;
+ case '\n':
+ fprintf(fp, "\\n");
+ break;
+ case '\r':
+ fprintf(fp, "\\r");
+ break;
+ case '\t':
+ fprintf(fp, "\\t");
+ break;
+ default:
+ /* other control characters */
+ if (codepoint < ' ' || codepoint == 0x7F) {
+ fprintf(fp, "\\u%04x", codepoint);
+ break;
+ }
+ fwrite(start, 1, s - start + 1, fp);
+ break;
+ }
+ start = s + 1;
break;
- default:
- fprintf(fp, "\\u%04X", *s);
+
+ case UTF8_REJECT:
+ /* bad UTF-8 sequence; try to recover */
+ fputs("\\uFFFD", fp);
+ state = UTF8_ACCEPT;
+ start = s + 1;
break;
}
}
blob - /dev/null
blob + 480e9c55ac4a6aa58414ec65e0053dd620b5b01d (mode 644)
--- /dev/null
+++ gotd/libexec/got-notify-http/utf8d.h
+/*
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static uint32_t inline
+decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*codep << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state*16 + type];
+ return *state;
+}
blob - 6892df7536c1a19134db4827c42f870d903c65a7
blob + 3255fa55f2841d50b5170e35303b4e7622843e04
--- regress/gotd/http_notification.sh
+++ regress/gotd/http_notification.sh
"author":"$GOT_AUTHOR",
"date":"$d",
"message":"make changes\n",
+ "diffstat":{},
+ "changes":{}
+ }]}
+ .
+ ,j
+ w
+ EOF
+
+ cmp -s $testroot/stdout.expected $testroot/stdout
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ diff -u $testroot/stdout.expected $testroot/stdout
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ test_done "$testroot" "$ret"
+}
+
+test_bad_utf8() {
+ local testroot=`test_init bad_utf8 1`
+
+ got clone -a -q ${GOTD_TEST_REPO_URL} $testroot/repo-clone
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ echo "got clone failed unexpectedly" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ got checkout -q $testroot/repo-clone $testroot/wt >/dev/null
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ echo "got checkout failed unexpectedly" >&2
+ test_done "$testroot" 1
+ fi
+
+ # invalid utf8 sequenc
+ commit_msg="make$(printf '\xED\xA0\x80')changes"
+
+ echo "changed" > $testroot/wt/alpha
+ (cd $testroot/wt && got commit -m "$commit_msg" > /dev/null)
+ local commit_id=`git_show_head $testroot/repo-clone`
+ local author_time=`git_show_author_time $testroot/repo-clone`
+
+ timeout 5 ./http-server -p $GOTD_TEST_HTTP_PORT \
+ > $testroot/stdout &
+
+ got send -b main -q -r $testroot/repo-clone
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ echo "got send failed unexpectedly" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ wait %1 # wait for the http "server"
+
+ d=`date -u -r $author_time +"%a %b %e %X %Y UTC"`
+
+ touch "$testroot/stdout.expected"
+ ed -s "$testroot/stdout.expected" <<-EOF
+ a
+ {"notifications":[{
+ "short":false,
+ "id":"$commit_id",
+ "author":"$GOT_AUTHOR",
+ "date":"$d",
+ "message":"make\uFFFD\uFFFDchanges\n",
"diffstat":{},
"changes":{}
}]}
test_parseargs "$@"
run_test test_file_changed
+run_test test_bad_utf8
run_test test_many_commits_not_summarized
run_test test_many_commits_summarized