Author: brane
Date: Thu Aug 8 11:08:33 2013
New Revision: 1511695
URL: http://svn.apache.org/r1511695
Log:
Added a private function that checks if a UTF-8 string is normalized.
* subversion/include/private/svn_utf_private.h (svn_utf__is_normalized): New.
* subversion/libsvn_subr/utf8proc.c (normalize_cstring): New.
(svn_utf__is_normalized): Implement, using normalize_cstring.
* subversion/tests/libsvn_subr/utf-test.c
(test_utf_is_normalized): New test for svn_utf__is_normalized.
(test_funcs): Added test_utf_is_normalized.
Modified:
subversion/trunk/subversion/include/private/svn_utf_private.h
subversion/trunk/subversion/libsvn_subr/utf8proc.c
subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_utf_private.h?rev=1511695&r1=1511694&r2=1511695&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Thu Aug 8
11:08:33 2013
@@ -112,6 +112,14 @@ svn_utf__normcmp(int *result,
const char *str2, apr_size_t len2,
svn_membuf_t *buf1, svn_membuf_t *buf2);
+/* Check if STRING is a valid, NFC-normalized UTF-8 string. Note that
+ * a FALSE return value may indicate that STRING is not valid UTF-8 at
+ * all.
+ *
+ * Use SCRATCH_POOL for temporary allocations.
+ */
+svn_boolean_t
+svn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool);
/* Pattern matching similar to the the SQLite LIKE and GLOB
* operators. PATTERN, KEY and ESCAPE must all point to UTF-8
Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf8proc.c?rev=1511695&r1=1511694&r2=1511695&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Thu Aug 8 11:08:33 2013
@@ -105,6 +105,34 @@ decompose_normalized(apr_size_t *result_
return SVN_NO_ERROR;
}
+/* Fill the given BUFFER with an NFC UTF-8 representation of the UTF-8
+ * STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING is
+ * NUL-terminated; otherwise look only at the first LENGTH bytes in
+ * STRING. Upon return, BUFFER->data points at a NUL-terminated string
+ * of UTF-8 characters.
+ *
+ * A returned error may indicate that STRING contains invalid UTF-8 or
+ * invalid Unicode codepoints. Any error message comes from utf8proc.
+ */
+static svn_error_t *
+normalize_cstring(apr_size_t *result_length,
+ const char *string, apr_size_t length,
+ svn_membuf_t *buffer)
+{
+ ssize_t result = unicode_decomposition(0, string, length, buffer);
+ if (result >= 0)
+ {
+ svn_membuf__resize(buffer, result * sizeof(apr_int32_t) + 1);
+ result = utf8proc_reencode(buffer->data, result,
+ UTF8PROC_COMPOSE | UTF8PROC_STABLE);
+ }
+ if (result < 0)
+ return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL,
+ gettext(utf8proc_errmsg(result)));
+ *result_length = result;
+ return SVN_NO_ERROR;
+}
+
/* Compare two arrays of UCS-4 codes, BUFA of length LENA and BUFB of
* length LENB. Return 0 if they're equal, a negative value if BUFA is
* less than BUFB, otherwise a positive value.
@@ -305,6 +333,22 @@ svn_utf__glob(svn_boolean_t *match,
return SVN_NO_ERROR;
}
+svn_boolean_t
+svn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool)
+{
+ svn_error_t *err;
+ svn_membuf_t buffer;
+ apr_size_t result_length;
+ const apr_size_t length = strlen(string);
+ svn_membuf__create(&buffer, length * sizeof(apr_int32_t), scratch_pool);
+ err = normalize_cstring(&result_length, string, length, &buffer);
+ if (err)
+ {
+ svn_error_clear(err);
+ return FALSE;
+ }
+ return (length == result_length && 0 == strcmp(string, buffer.data));
+}
const char *
svn_utf__fuzzy_escape(const char *src, apr_size_t length, apr_pool_t *pool)
Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/utf-test.c?rev=1511695&r1=1511694&r2=1511695&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Thu Aug 8
11:08:33 2013
@@ -674,6 +674,69 @@ test_utf_fuzzy_escape(apr_pool_t *pool)
return SVN_NO_ERROR;
}
+static svn_error_t *
+test_utf_is_normalized(apr_pool_t *pool)
+{
+ /* Normalized: NFC */
+ static const char nfc[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe1\xbb\x9d" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* Normalized: NFD */
+ static const char nfd[] =
+ "S\xcc\xa3\xcc\x87" /* S with dot above and below */
+ "u\xcc\x8a" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "v\xcc\x83" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "r\xcc\x8f" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "i\xcc\x88\xcc\x81" /* i with diaeresis and acute */
+ "o\xcc\x9b\xcc\x80" /* o with grave and hook */
+ "n\xcc\xad"; /* n with circumflex below */
+
+ /* Mixed, denormalized */
+ static const char mixup[] =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* Invalid UTF-8 */
+ static const char invalid[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe6" /* Invalid byte */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ SVN_ERR_ASSERT(svn_utf__is_normalized(nfc, pool));
+ SVN_ERR_ASSERT(!svn_utf__is_normalized(nfd, pool));
+ SVN_ERR_ASSERT(!svn_utf__is_normalized(mixup, pool));
+ SVN_ERR_ASSERT(!svn_utf__is_normalized(invalid, pool));
+
+ return SVN_NO_ERROR;
+}
+
/* The test table. */
@@ -694,5 +757,7 @@ struct svn_test_descriptor_t test_funcs[
"test svn_utf__glob"),
SVN_TEST_PASS2(test_utf_fuzzy_escape,
"test svn_utf__fuzzy_escape"),
+ SVN_TEST_PASS2(test_utf_is_normalized,
+ "test svn_utf__is_normalized"),
SVN_TEST_NULL
};