Author: brane
Date: Sat May 23 13:02:20 2026
New Revision: 1934533
Log:
Revert part of r1934528 that implemented svn_utf_cstring_utf8_width() as
a wrapper around svn_utf__cstring_utf8_grapheme_breaks().
* subversion/include/private/svn_utf_private.h
(svn_utf__cstring_utf8_grapheme_breaks): Update the return value docstring.
(svn_utf_cstring_utf8_width): Back to the stand-alone implementation, with
an added integer overflow check. Simplify width calculation a bit because
we know that utf8proc_charwidth() won't return negative values.
* subversion/tests/libsvn_subr/utf-test.c
(fat_emojis, mixup, invalid, bom): Extract test data from test_utf8_width().
(test_utf8_width): Extract the grapheme breaks test to a new function.
(test_utf8_grapheme_breaks): New test case.
(test_funcs): Register test_utf8_grapheme_breaks.
Modified:
subversion/trunk/subversion/include/private/svn_utf_private.h
subversion/trunk/subversion/libsvn_subr/utf8proc.c
subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h Sat May
23 12:10:04 2026 (r1934532)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Sat May
23 13:02:20 2026 (r1934533)
@@ -316,7 +316,7 @@ typedef struct svn_utf__utf8_grapheme_t
* If GRAPHEMES is NULL, the list of graphemes will not be allocated
* and POOL may also be NULL.
*
- * If CSTR is not a valid UTF-8 string, the returned value will be negative.
+ * If CSTR is not a valid UTF-8 string, the returned value will be -1.
*/
apr_ssize_t
svn_utf__cstring_utf8_grapheme_breaks(apr_array_header_t **graphemes,
Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c Sat May 23 12:10:04
2026 (r1934532)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Sat May 23 13:02:20
2026 (r1934533)
@@ -692,25 +692,10 @@ svn_utf__cstring_utf8_grapheme_breaks(ap
return total_width;
}
-#if 1
int
svn_utf_cstring_utf8_width(const char *cstr)
{
- const apr_ssize_t width =
- svn_utf__cstring_utf8_grapheme_breaks(NULL, cstr, NULL);
-
- /* Check for return value overflow. It's unfortunate that we chose to use
- 'int' for what is essentially a string length value. */
- if (width > INT_MAX)
- return -1;
-
- return (int)width;
-}
-#else
-int
-svn_utf_cstring_utf8_width(const char *cstr)
-{
- int width = 0;
+ apr_ssize_t width = 0;
if (*cstr == '\0')
return 0;
@@ -721,10 +706,8 @@ svn_utf_cstring_utf8_width(const char *c
while (*cstr)
{
utf8proc_int32_t ucs;
- int w;
-
- utf8proc_ssize_t nbytes = utf8proc_iterate(
- (const utf8proc_uint8_t *)cstr, -1, &ucs);
+ const utf8proc_ssize_t nbytes =
+ utf8proc_iterate((const utf8proc_uint8_t *)cstr, -1, &ucs);
if (nbytes < 0)
return -1;
@@ -732,15 +715,16 @@ svn_utf_cstring_utf8_width(const char *c
cstr += nbytes;
/* Determine the width of this character and add it to the total. */
- w = utf8proc_charwidth(ucs);
- if (w == -1)
- return -1;
- width += w;
+ width += utf8proc_charwidth(ucs);
}
- return width;
+ /* Check for return value overflow. It's unfortunate that we chose
+ to use 'int' for what is essentially a string length value. */
+ if (width > INT_MAX)
+ return -1;
+
+ return (int)width;
}
-#endif
/* Advances CSTR by N printable UTF-8 characters */
static const char *
Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Sat May 23
12:10:04 2026 (r1934532)
+++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Sat May 23
13:02:20 2026 (r1934533)
@@ -1000,28 +1000,28 @@ test_utf_xfrm(apr_pool_t *pool)
return SVN_NO_ERROR;
}
+/* Test data for test_utf8_width and test_utf8_grapheme_breaks */
+static const char *fat_emojis =
+ "\xf0\x9f\xa5\xba" /* three emojis, each two columns wide */
+ "\xf0\x9f\x91\x89"
+ "\xf0\x9f\x91\x88";
+static const char *mixup =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+static const char *invalid = "a" "\xe6" "bc";
+static const char *bom = "\xEF\xBB\xBF" "abc";
+
static svn_error_t *
test_utf8_width(apr_pool_t *pool)
{
- apr_array_header_t *graphemes;
-
- /* there are three emojis that each have wcwidth of two */
- const char *fat_emojis = "\xf0\x9f\xa5\xba\xf0\x9f\x91\x89\xf0\x9f\x91\x88";
- const char *mixup =
- "S\xcc\x87\xcc\xa3" /* S with dot above and below */
- "\xc5\xaf" /* u with ring */
- "b\xcc\xb1" /* b with macron below */
- "\xe1\xb9\xbd" /* v with tilde */
- "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
- "\xc8\x91" /* r with double grave */
- "s\xcc\x8c" /* s with caron */
- "\xe1\xb8\xaf" /* i with diaeresis and acute */
- "o\xcc\x80\xcc\x9b" /* o with grave and hook */
- "\xe1\xb9\x8b"; /* n with circumflex below */
- const char *invalid = "a" "\xe6" "bc";
- const char *bom = "\xEF\xBB\xBF" "abc";
-
- /* Test the public API */
SVN_TEST_INT_ASSERT(svn_utf_cstring_utf8_width(""), 0);
SVN_TEST_INT_ASSERT(svn_utf_cstring_utf8_width("abc123"), 6);
SVN_TEST_INT_ASSERT(svn_utf_cstring_utf8_width(fat_emojis), 6);
@@ -1029,15 +1029,31 @@ test_utf8_width(apr_pool_t *pool)
SVN_TEST_INT_ASSERT(svn_utf_cstring_utf8_width(invalid), -1);
SVN_TEST_INT_ASSERT(svn_utf_cstring_utf8_width(bom), 3);
- /* Test grapheme breakdown */
- svn_utf__cstring_utf8_grapheme_breaks(&graphemes, "", pool);
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+test_utf8_grapheme_breaks(apr_pool_t *pool)
+{
+ apr_array_header_t *graphemes = (void*)~0;
+
+ SVN_TEST_INT_ASSERT(
+ svn_utf__cstring_utf8_grapheme_breaks(&graphemes, "", pool), 0);
SVN_TEST_ASSERT(graphemes == NULL);
+ SVN_TEST_INT_ASSERT(
+ svn_utf__cstring_utf8_grapheme_breaks(NULL, invalid, NULL), -1);
+
+#define STRING_LENGTH_FROM_GRAPHEMES \
+ APR_ARRAY_IDX(graphemes, graphemes->nelts - 1, svn_utf__utf8_grapheme_t).end
+
svn_utf__cstring_utf8_grapheme_breaks(&graphemes, "abc123", pool);
SVN_TEST_INT_ASSERT(graphemes->nelts, 6);
+ SVN_TEST_INT_ASSERT(STRING_LENGTH_FROM_GRAPHEMES, strlen("abc123"));
svn_utf__cstring_utf8_grapheme_breaks(&graphemes, fat_emojis, pool);
SVN_TEST_INT_ASSERT(graphemes->nelts, 3);
+ SVN_TEST_INT_ASSERT(STRING_LENGTH_FROM_GRAPHEMES, strlen(fat_emojis));
SVN_TEST_INT_ASSERT(
APR_ARRAY_IDX(graphemes, 0, svn_utf__utf8_grapheme_t).width, 2);
SVN_TEST_INT_ASSERT(
@@ -1046,13 +1062,17 @@ test_utf8_width(apr_pool_t *pool)
APR_ARRAY_IDX(graphemes, 2, svn_utf__utf8_grapheme_t).width, 2);
svn_utf__cstring_utf8_grapheme_breaks(&graphemes, mixup, pool);
+ SVN_TEST_INT_ASSERT(STRING_LENGTH_FROM_GRAPHEMES, strlen(mixup));
SVN_TEST_INT_ASSERT(graphemes->nelts, 10);
svn_utf__cstring_utf8_grapheme_breaks(&graphemes, bom, pool);
+ SVN_TEST_INT_ASSERT(STRING_LENGTH_FROM_GRAPHEMES, strlen(bom));
SVN_TEST_INT_ASSERT(graphemes->nelts, 4);
SVN_TEST_INT_ASSERT(
APR_ARRAY_IDX(graphemes, 0, svn_utf__utf8_grapheme_t).width, 0);
+#undef STRING_LENGTH_FROM_GRAPHEMES
+
return SVN_NO_ERROR;
}
@@ -1131,6 +1151,8 @@ static struct svn_test_descriptor_t test
"test svn_utf__xfrm"),
SVN_TEST_PASS2(test_utf8_width,
"test svn_utf_cstring_utf8_width"),
+ SVN_TEST_PASS2(test_utf8_grapheme_breaks,
+ "test utf8 grapheme breaks"),
SVN_TEST_PASS2(test_utf8_align,
"test utf8 alignment"),
SVN_TEST_NULL