sal/rtl/string.cxx  |    9 ------
 sal/rtl/strtmpl.hxx |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 sal/rtl/ustring.cxx |   41 ++----------------------------
 3 files changed, 76 insertions(+), 45 deletions(-)

New commits:
commit 4e4a01302a140d75a49055821b3197a2eda81db5
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Wed Feb 16 09:45:40 2022 +0100
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Wed Feb 16 18:15:27 2022 +0100

    Related: tdf#147421: optimize O(U)String's replaceAll*
    
    ... and underlying low-level C functions.
    
    Change-Id: I78c2dd75e272e6d3cdd2cfae007ca0a0ec1635e1
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/129942
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx
index d67246dc159c..a2020561a7c7 100644
--- a/sal/rtl/string.cxx
+++ b/sal/rtl/string.cxx
@@ -361,14 +361,7 @@ void rtl_string_newReplaceAll(
     sal_Int32 fromLength, char const * to, sal_Int32 toLength)
     SAL_THROW_EXTERN_C()
 {
-    rtl_string_assign(newStr, str);
-    for (sal_Int32 i = 0;; i += toLength) {
-        rtl_string_newReplaceFirst(
-            newStr, *newStr, from, fromLength, to, toLength, &i);
-        if (i == -1) {
-            break;
-        }
-    }
+    rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, 
toLength, 0);
 }
 
 sal_Int32 SAL_CALL rtl_str_getLength(const char* pStr) SAL_THROW_EXTERN_C()
diff --git a/sal/rtl/strtmpl.hxx b/sal/rtl/strtmpl.hxx
index 7a909bb5efe3..4f04cc404ee5 100644
--- a/sal/rtl/strtmpl.hxx
+++ b/sal/rtl/strtmpl.hxx
@@ -35,6 +35,9 @@
 #include <wchar.h>
 #include <sal/log.hxx>
 #include <rtl/character.hxx>
+#include <rtl/strbuf.h>
+#include <rtl/ustrbuf.h>
+#include <rtl/ustring.hxx>
 
 namespace rtl::str
 {
@@ -1808,6 +1811,74 @@ sal_Int32 getToken                                ( 
IMPL_RTL_STRINGDATA** ppThis
     }
 }
 
+namespace detail
+{
+template <typename CharType1, typename CharType2>
+sal_Int32 indexOf(const CharType1* s, sal_Int32 len, const CharType2* subStr, 
sal_Int32 subLen)
+{
+    if constexpr (std::is_same_v<CharType1, CharType2>)
+        return indexOfStr_WithLength(s, len, subStr, subLen);
+    else if constexpr (std::is_same_v<CharType1, sal_Unicode> && 
std::is_same_v<CharType2, char>)
+        return rtl_ustr_indexOfAscii_WithLength(s, len, subStr, subLen);
+}
+
+template <class S, typename CharType1>
+void append(S** s, sal_Int32* capacity, const CharType1* s1, sal_Int32 len)
+{
+    if constexpr (std::is_same_v<S, rtl_uString> && std::is_same_v<CharType1, 
sal_Unicode>)
+        return rtl_uStringbuffer_insert(s, capacity, (*s)->length, s1, len);
+    else if constexpr (std::is_same_v<S, rtl_uString> && 
std::is_same_v<CharType1, char>)
+        return rtl_uStringbuffer_insert_ascii(s, capacity, (*s)->length, s1, 
len);
+    else if constexpr (std::is_same_v<S, rtl_String> && 
std::is_same_v<CharType1, char>)
+        return rtl_stringbuffer_insert(s, capacity, (*s)->length, s1, len);
+}
+}
+
+template <class S, typename CharTypeFrom, typename CharTypeTo>
+void newReplaceAllFromIndex(S** s, S* s1, CharTypeFrom const* from, sal_Int32 
fromLength,
+                            CharTypeTo const* to, sal_Int32 toLength, 
sal_Int32 fromIndex)
+{
+    assert(s != nullptr);
+    assert(s1 != nullptr);
+    assert(fromLength >= 0);
+    assert(from != nullptr || fromLength == 0);
+    assert(toLength >= 0);
+    assert(to != nullptr || toLength == 0);
+    assert(fromIndex >= 0 && fromIndex <= s1->length);
+    sal_Int32 i = detail::indexOf(s1->buffer + fromIndex, s1->length - 
fromIndex, from, fromLength);
+    if (i >= 0)
+    {
+        if (s1->length - fromLength > SAL_MAX_INT32 - toLength)
+            std::abort();
+        acquire(s1); // in case *s == s1
+        sal_Int32 nCapacity = s1->length + (toLength - fromLength);
+        if (fromLength < toLength)
+        {
+            // Pre-allocate up to 16 replacements more
+            const sal_Int32 nMaxMoreFinds = (s1->length - fromIndex - i - 
fromLength) / fromLength;
+            const sal_Int32 nIncrease = toLength - fromLength;
+            const sal_Int32 nMoreReplacements = std::min(
+                { nMaxMoreFinds, (SAL_MAX_INT32 - nCapacity) / nIncrease, 
sal_Int32(16) });
+            nCapacity += nMoreReplacements * nIncrease;
+        }
+        new_WithLength(s, nCapacity);
+        i += fromIndex;
+        fromIndex = 0;
+        do
+        {
+            detail::append(s, &nCapacity, s1->buffer + fromIndex, i);
+            detail::append(s, &nCapacity, to, toLength);
+            fromIndex += i + fromLength;
+            i = detail::indexOf(s1->buffer + fromIndex, s1->length - 
fromIndex, from, fromLength);
+        } while (i >= 0);
+        // the rest
+        detail::append(s, &nCapacity, s1->buffer + fromIndex, s1->length - 
fromIndex);
+        release(s1);
+    }
+    else
+        assign(s, s1);
+}
+
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index c550c99c419e..70048db58f6e 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -1367,15 +1367,7 @@ void rtl_uString_newReplaceAllAsciiLAsciiL(
     sal_Int32 fromLength, char const * to, sal_Int32 toLength)
     SAL_THROW_EXTERN_C()
 {
-    assert(toLength >= 0);
-    rtl_uString_assign(newStr, str);
-    for (sal_Int32 i = 0;; i += toLength) {
-        rtl_uString_newReplaceFirstAsciiLAsciiL(
-            newStr, *newStr, from, fromLength, to, toLength, &i);
-        if (i == -1) {
-            break;
-        }
-    }
+    rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, 
toLength, 0);
 }
 
 void rtl_uString_newReplaceAllAsciiLUtf16L(
@@ -1383,15 +1375,7 @@ void rtl_uString_newReplaceAllAsciiLUtf16L(
     sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength)
     SAL_THROW_EXTERN_C()
 {
-    assert(toLength >= 0);
-    rtl_uString_assign(newStr, str);
-    for (sal_Int32 i = 0;; i += toLength) {
-        rtl_uString_newReplaceFirstAsciiLUtf16L(
-            newStr, *newStr, from, fromLength, to, toLength, &i);
-        if (i == -1 || *newStr == nullptr) {
-            break;
-        }
-    }
+    rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, 
toLength, 0);
 }
 
 void rtl_uString_newReplaceAllUtf16LAsciiL(
@@ -1399,15 +1383,7 @@ void rtl_uString_newReplaceAllUtf16LAsciiL(
     sal_Int32 fromLength, char const * to, sal_Int32 toLength)
     SAL_THROW_EXTERN_C()
 {
-    assert(toLength >= 0);
-    rtl_uString_assign(newStr, str);
-    for (sal_Int32 i = 0;; i += toLength) {
-        rtl_uString_newReplaceFirstUtf16LAsciiL(
-            newStr, *newStr, from, fromLength, to, toLength, &i);
-        if (i == -1 || *newStr == nullptr) {
-            break;
-        }
-    }
+    rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, 
toLength, 0);
 }
 
 void rtl_uString_newReplaceAllUtf16LUtf16L(
@@ -1423,16 +1399,7 @@ void rtl_uString_newReplaceAllFromIndexUtf16LUtf16L(
     sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength, 
sal_Int32 fromIndex)
     SAL_THROW_EXTERN_C()
 {
-    assert(toLength >= 0);
-    assert(fromIndex >= 0 && fromIndex <= str->length);
-    rtl_uString_assign(newStr, str);
-    for (sal_Int32 i = fromIndex;; i += toLength) {
-        rtl_uString_newReplaceFirstUtf16LUtf16L(
-            newStr, *newStr, from, fromLength, to, toLength, &i);
-        if (i == -1 || *newStr == nullptr) {
-            break;
-        }
-    }
+    rtl::str::newReplaceAllFromIndex(newStr, str, from, fromLength, to, 
toLength, fromIndex);
 }
 
 sal_Int32 SAL_CALL rtl_ustr_getLength(const sal_Unicode* pStr) 
SAL_THROW_EXTERN_C()

Reply via email to