comphelper/qa/string/test_string.cxx |   20 ++++++++++++++++++++
 comphelper/source/misc/string.cxx    |    9 +++++++++
 include/comphelper/string.hxx        |   13 +++++++++++++
 include/o3tl/string_view.hxx         |   11 ++---------
 o3tl/qa/test-string_view.cxx         |   16 ++--------------
 5 files changed, 46 insertions(+), 23 deletions(-)

New commits:
commit 28cc0bff10f5dcec0c7b698ae7ba275845b2cad1
Author:     Stephan Bergmann <[email protected]>
AuthorDate: Fri May 5 09:46:52 2023 +0200
Commit:     Stephan Bergmann <[email protected]>
CommitDate: Fri May 5 11:30:12 2023 +0200

    Break comphelper::adjustIndexToStartOfSurrogate out of 
o3tl::iterateCodePoints
    
    ...as what they do is orthogonal (and it turned out that the use case that
    motivated the addition of o3tl::iterateCodePoints in the first place needs 
them
    independently, anyway)
    
    Change-Id: Id33901a2f7ac627253654ee6d883305dcf5a456f
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151415
    Tested-by: Jenkins
    Reviewed-by: Stephan Bergmann <[email protected]>

diff --git a/comphelper/qa/string/test_string.cxx 
b/comphelper/qa/string/test_string.cxx
index 58f9c3f63c16..974673ca2940 100644
--- a/comphelper/qa/string/test_string.cxx
+++ b/comphelper/qa/string/test_string.cxx
@@ -50,6 +50,7 @@ public:
     void testReverseCodePoints();
     void testSplit();
     void testRemoveAny();
+    void testAdjustIndexToStartOfSurrogate();
 
     CPPUNIT_TEST_SUITE(TestString);
     CPPUNIT_TEST(testStripStart);
@@ -63,6 +64,7 @@ public:
     CPPUNIT_TEST(testReverseCodePoints);
     CPPUNIT_TEST(testSplit);
     CPPUNIT_TEST(testRemoveAny);
+    CPPUNIT_TEST(testAdjustIndexToStartOfSurrogate);
     CPPUNIT_TEST_SUITE_END();
 };
 
@@ -237,6 +239,24 @@ void TestString::testRemoveAny()
     CPPUNIT_ASSERT_EQUAL(OUString(), removeAny(in, test7));
 }
 
+void TestString::testAdjustIndexToStartOfSurrogate() {
+    CPPUNIT_ASSERT_EQUAL(
+        sal_Int32(0),
+        comphelper::string::adjustIndexToStartOfSurrogate("", 0));
+    CPPUNIT_ASSERT_EQUAL(
+        sal_Int32(0),
+        comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 0));
+    CPPUNIT_ASSERT_EQUAL(
+        sal_Int32(0),
+        comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 1));
+    CPPUNIT_ASSERT_EQUAL(
+        sal_Int32(2),
+        comphelper::string::adjustIndexToStartOfSurrogate(u"\U00010000", 2));
+    CPPUNIT_ASSERT_EQUAL(
+        sal_Int32(1),
+        comphelper::string::adjustIndexToStartOfSurrogate(u"\xD800", 1));
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(TestString);
 
 }
diff --git a/comphelper/source/misc/string.cxx 
b/comphelper/source/misc/string.cxx
index da5c8b92c05c..0fdd24c83d7e 100644
--- a/comphelper/source/misc/string.cxx
+++ b/comphelper/source/misc/string.cxx
@@ -19,6 +19,7 @@
 
 #include <sal/config.h>
 
+#include <cassert>
 #include <cstddef>
 #include <string_view>
 #include <utility>
@@ -679,6 +680,14 @@ OUString sanitizeStringSurrogates(const OUString& rString)
     return rString;
 }
 
+sal_Int32 adjustIndexToStartOfSurrogate(OUString const & string, sal_Int32 
index) {
+    assert(index >= 0 && index <= string.getLength());
+    return
+        (index > 0 && rtl::isHighSurrogate(string[index - 1])
+         && index < string.getLength() && rtl::isLowSurrogate(string[index]))
+        ? index - 1 : index;
+}
+
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/comphelper/string.hxx b/include/comphelper/string.hxx
index 38564bc16673..8144a7e95e28 100644
--- a/include/comphelper/string.hxx
+++ b/include/comphelper/string.hxx
@@ -384,6 +384,19 @@ COMPHELPER_DLLPUBLIC bool 
isdigitAsciiString(std::u16string_view rString);
  */
 COMPHELPER_DLLPUBLIC OUString sanitizeStringSurrogates(const OUString& 
rString);
 
+/** Adjust an index in case it points into the middle of a surrogate pair.
+
+    @param string  An OUString
+
+    @param index  A valid index into the string or to its end (i.e., must be 
in the range from zero
+    to the length of string, inclusive)
+
+    @return index - 1 if the original index pointed into the middle of a 
surrogate pair; otherwise
+    the unchanged index
+ */
+COMPHELPER_DLLPUBLIC sal_Int32 adjustIndexToStartOfSurrogate(
+    OUString const & string, sal_Int32 index);
+
 } // namespace comphelper::string
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/o3tl/string_view.hxx b/include/o3tl/string_view.hxx
index b5b64d5991dd..5d03b2cfba87 100644
--- a/include/o3tl/string_view.hxx
+++ b/include/o3tl/string_view.hxx
@@ -508,11 +508,9 @@ inline double toDouble(std::string_view str)
     return rtl_math_stringToDouble(str.data(), str.data() + str.size(), '.', 
0, nullptr, nullptr);
 }
 
-// Similar to OUString::iterateCodePoints, but for std::string_view.
-// If preAdjustIndex is true: prior to any other operation, *indexUtf16 is 
adjusted by -1 if it
-// originally pointed into the middle of a surrogate pair.
+// Like OUString::iterateCodePoints, but for std::string_view:
 inline sal_uInt32 iterateCodePoints(std::u16string_view string, std::size_t* 
indexUtf16,
-                                    sal_Int32 incrementCodePoints = 1, bool 
preAdjustIndex = false)
+                                    sal_Int32 incrementCodePoints = 1)
 {
     std::size_t n;
     char16_t cu;
@@ -520,11 +518,6 @@ inline sal_uInt32 iterateCodePoints(std::u16string_view 
string, std::size_t* ind
     assert(indexUtf16 != nullptr);
     n = *indexUtf16;
     assert(n <= string.length());
-    if (preAdjustIndex && n != 0 && rtl::isHighSurrogate(string[n - 1])
-        && rtl::isLowSurrogate(string[n]))
-    {
-        --n;
-    }
     while (incrementCodePoints < 0)
     {
         assert(n > 0);
diff --git a/o3tl/qa/test-string_view.cxx b/o3tl/qa/test-string_view.cxx
index 5378ce9415ed..c658da16b5dc 100644
--- a/o3tl/qa/test-string_view.cxx
+++ b/o3tl/qa/test-string_view.cxx
@@ -737,28 +737,16 @@ private:
     {
         {
             std::size_t i = 1;
-            auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1, 
false);
+            auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1);
             CPPUNIT_ASSERT_EQUAL(std::size_t(2), i);
             CPPUNIT_ASSERT_EQUAL(sal_uInt32(0xDC00), c);
         }
-        {
-            std::size_t i = 1;
-            auto const c = o3tl::iterateCodePoints(u"\U00010000", &i, 1, true);
-            CPPUNIT_ASSERT_EQUAL(std::size_t(2), i);
-            CPPUNIT_ASSERT_EQUAL(sal_uInt32(0x10000), c);
-        }
         {
             std::size_t i = 2;
-            auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1, 
false);
+            auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1);
             CPPUNIT_ASSERT_EQUAL(std::size_t(1), i);
             CPPUNIT_ASSERT_EQUAL(sal_uInt32(0x10000), c);
         }
-        {
-            std::size_t i = 2;
-            auto const c = o3tl::iterateCodePoints(u"a\U00010000", &i, -1, 
true);
-            CPPUNIT_ASSERT_EQUAL(std::size_t(0), i);
-            CPPUNIT_ASSERT_EQUAL(sal_uInt32('a'), c);
-        }
     }
 };
 

Reply via email to