Author: rinrab
Date: Wed May 20 08:15:53 2026
New Revision: 1934423

Log:
Implement routines to do alignment of UTF-8 string to the left or to the right
side. We'll use them in the cmdline code to properly format tables where we
want them to handle Unicode strings properly.

* subversion/include/private/svn_utf_private.h
  (svn_utf__cstring_utf8_align_right,
   svn_utf__cstring_utf8_align_left): Declare symbols.
* subversionlibsvn_subr/utf8proc.c
  (utf8_skipn,
   svn_utf__cstring_utf8_align_right,
   svn_utf__cstring_utf8_align_left): New functions.
* subversiontests/libsvn_subr/utf-test.c
  (test_utf8_align): Add test to check new API.
  (test_funcs): Run the test.

Modified:
   subversion/trunk/subversion/include/private/svn_utf_private.h
   subversion/trunk/subversion/libsvn_subr/utf8proc.c
   subversion/trunk/subversion/tests/libsvn_subr/utf-test.c

Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h       Wed May 
20 06:45:03 2026        (r1934422)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h       Wed May 
20 08:15:53 2026        (r1934423)
@@ -288,6 +288,35 @@ svn_utf__utf32_to_utf8(const svn_string_
                        apr_pool_t *scratch_pool);
 
 
+/* Return a new string with a copy of @a cstr allocated in @a pool aligned to
+ * the right side with spaces. This function takes UTF-8 multibyte encoding and
+ * wcwidth into an account. The new string will be have exacly as much
+ * printable characters as @a padding describes.
+ *
+ * Please note, there might be a little artifact when there is a wider
+ * character, then the string won't be perfectly aligned.
+ */
+char *
+svn_utf__cstring_utf8_align_right(const char *cstr,
+                                  int padding,
+                                  apr_pool_t *pool);
+
+/* Return a new string with a copy of @a cstr allocated in @a pool aligned to
+ * the left side with spaces. This function takes UTF-8 multibyte encoding and
+ * wcwidth into an account. The new string will be have exacly as much
+ * printable characters as @a padding describes.
+ *
+ * Please note, there might be a little artifact when there is a wider
+ * character, then the string won't be perfectly aligned.
+ *
+ * Similar to svn_utf__cstring_utf8_align_right() but doing alignment to the
+ * left side.
+ */
+char *
+svn_utf__cstring_utf8_align_left(const char *cstr,
+                                 int padding,
+                                 apr_pool_t *pool);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */

Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c  Wed May 20 06:45:03 
2026        (r1934422)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c  Wed May 20 08:15:53 
2026        (r1934423)
@@ -638,3 +638,71 @@ svn_utf_cstring_utf8_width(const char *c
 
   return width;
 }
+
+/* Advances CSTR by N printable UTF-8 characters */
+static const char *
+utf8_skipn(const char *cstr, apr_size_t n)
+{
+  apr_size_t i = 0;
+
+  while (*cstr && i < n)
+    {
+      apr_int32_t ucs;
+
+      int nbytes = utf8proc_iterate((apr_byte_t*)cstr, -1, &ucs);
+
+      if (nbytes < 0)
+        return NULL;
+
+      cstr += nbytes;
+      i += utf8proc_charwidth(ucs);
+    }
+
+  return cstr;
+}
+
+char *
+svn_utf__cstring_utf8_align_right(const char *cstr, int padding,
+                                  apr_pool_t *pool)
+{
+  int width = svn_utf_cstring_utf8_width(cstr);
+  int size = strlen(cstr);
+
+  if (width > padding)
+    {
+      int len = utf8_skipn(cstr, padding) - cstr;
+      return apr_pstrmemdup(pool, cstr + size - len, len);
+    }
+  else
+    {
+      int spaces = padding - width;
+      char *result = apr_palloc(pool, size + spaces);
+      memset(result, ' ', spaces);
+      memcpy(result + spaces, cstr, size);
+      result[size + spaces] = '\0';
+      return result;
+    }
+}
+
+char *
+svn_utf__cstring_utf8_align_left(const char *cstr, int padding,
+                                 apr_pool_t *pool)
+{
+  int width = svn_utf_cstring_utf8_width(cstr);
+
+  if (width > padding)
+    {
+      int len = utf8_skipn(cstr, padding) - cstr;
+      return apr_pstrmemdup(pool, cstr, len);
+    }
+  else
+    {
+      int size = strlen(cstr);
+      int spaces = padding - width;
+      char *result = apr_palloc(pool, size + spaces + 1);
+      memcpy(result, cstr, size);
+      memset(result + size, ' ', spaces);
+      result[size + spaces] = '\0';
+      return result;
+    }
+}

Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c    Wed May 20 
06:45:03 2026        (r1934422)
+++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c    Wed May 20 
08:15:53 2026        (r1934423)
@@ -1028,6 +1028,49 @@ test_utf8_width(apr_pool_t *pool)
   return SVN_NO_ERROR;
 }
 
+static svn_error_t *
+test_utf8_align(apr_pool_t *pool)
+{
+  /* ASCII */
+  SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_left("abc", 5, pool),
+                         "abc  ");
+  SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_left("abc", 2, pool),
+                         "ab");
+  SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_right("abc", 5, pool),
+                         "  abc");
+  SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_right("abc", 2, pool),
+                         "bc");
+
+  /* two byte symbols */
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_left("\xc5\xaf\xc5\xa1", 4, pool),
+      "\xc5\xaf\xc5\xa1  ");
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_left("\xc5\xaf\xc5\xa1", 1, pool),
+      "\xc5\xaf");
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_right("\xc5\xaf\xc5\xa1", 4, pool),
+      "  \xc5\xaf\xc5\xa1");
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_right("\xc5\xaf\xc5\xa1", 1, pool),
+      "\xc5\xa1");
+
+  /* an emoji */
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_right("\xf0\x9f\xa5\xba", 2, pool),
+      "\xf0\x9f\xa5\xba");
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_right("\xf0\x9f\xa5\xba", 3, pool),
+      " \xf0\x9f\xa5\xba");
+
+  /* this is technically wrong (?) */
+  SVN_TEST_STRING_ASSERT(
+      svn_utf__cstring_utf8_align_right("\xf0\x9f\xa5\xba", 1, pool),
+      "\xf0\x9f\xa5\xba");
+
+  return SVN_NO_ERROR;
+}
+
 
 /* The test table.  */
 
@@ -1060,6 +1103,8 @@ static struct svn_test_descriptor_t test
                    "test svn_utf__xfrm"),
     SVN_TEST_PASS2(test_utf8_width,
                    "test svn_utf_cstring_utf8_width"),
+    SVN_TEST_PASS2(test_utf8_align,
+                   "test utf8 alignment"),
     SVN_TEST_NULL
   };
 

Reply via email to