This is an automated email from the ASF dual-hosted git repository.

damjan pushed a commit to branch icu-c-api
in repository https://gitbox.apache.org/repos/asf/openoffice.git

commit 1a6b597b3e85db881c84da3dd636f3e82110225f
Author: Damjan Jovanovic <[email protected]>
AuthorDate: Sun May 4 08:28:33 2025 +0200

    Fix loading i18npool custom break iterator rules from C.
    
    Patch by: me
---
 .../source/breakiterator/breakiterator_unicode.cxx | 59 ++++++++++------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/main/i18npool/source/breakiterator/breakiterator_unicode.cxx 
b/main/i18npool/source/breakiterator/breakiterator_unicode.cxx
index fdd28c056b..8885598d7a 100644
--- a/main/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/main/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -77,46 +77,42 @@ BreakIterator_Unicode::~BreakIterator_Unicode()
         if (line.aBreakIterator) ubrk_close(line.aBreakIterator);
 }
 
-// Hard to support in C:
-// 1. setBreakType() cannot be reached from C.
-// 2. UDataMemory's udata_getLength() is a private API, yet we need the length 
for ubrk_openBinaryRules().
-#if 0
-/*
-       Wrapper class to provide public access to the RuleBasedBreakIterator's
-       setbreakType method.
-*/
-class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
-       public:
-               inline void publicSetBreakType(int32_t type) {
-                       setBreakType(type);
-               };
-               OOoRuleBasedBreakIterator(UDataMemory* image,
-                               UErrorCode &status) : 
-                       RuleBasedBreakIterator(image, status) { };
-
-};
-
 static UBreakIterator* open_udata_BreakIterator(UDataMemory *udm, UErrorCode 
*status)
 {
     *status = U_ZERO_ERROR;
     UDataInfo info;
     info.size = sizeof(info);
     udata_getInfo(udm, &info);
-    if (  !(info.isBigEndian == U_IS_BIG_ENDIAN &&
+    if (  !(
+#if defined(OSL_LITENDIAN)
+            !info.isBigEndian  &&
+#else
+            info.isBigEndian &&
+#endif
             info.charsetFamily == U_CHARSET_FAMILY &&
             info.dataFormat[0] == 0x42 &&  // dataFormat="Brk "
             info.dataFormat[1] == 0x72 &&
             info.dataFormat[2] == 0x6b &&
-            info.dataFormat[3] == 0x20)
+            info.dataFormat[3] == 0x20
+           )
     ) {
         *status = U_INVALID_FORMAT_ERROR;
         return NULL;
     }
 
     uint8_t *memory = (uint8_t*) udata_getMemory(udm);
-    return ubrk_openBinaryRules(memory, udata_getLength(udm), NULL, 0, status);
+    // FIXME: We have no way to get the real length, without parsing private 
data out first.
+    // But ubrk_openBinaryRules() calls
+    //     RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t 
*compiledRules,
+    //                 uint32_t       ruleLength,
+    //                 UErrorCode     &status)
+    // which, ***AT THE TIME OF WRITING***, only does these checks, and 
nothing else:
+    // (compiledRules == nullptr || ruleLength < sizeof(RBBIDataHeader))
+    // (data->fLength > ruleLength)
+    // both those must be false for this to work, so we just use an arbitrary 
large number:
+    uint32_t length = 1000000000;
+    return ubrk_openBinaryRules(memory, length, NULL, 0, status);
 }
-#endif
 
 // loading ICU breakiterator on demand.
 void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const 
com::sun::star::lang::Locale& rLocale,
@@ -144,8 +140,6 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
             ubrk_close(icuBI->aBreakIterator);
             icuBI->aBreakIterator=NULL;
         }
-// Hard to support in C:
-#if 0
         if (rule) {
             uno::Sequence< OUString > breakRules = 
LocaleData().getBreakIteratorRules(rLocale);
 
@@ -176,16 +170,17 @@ void SAL_CALL 
BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
                 }
             }
             if (rbi) {
-                switch (rBreakType) {
-                    case LOAD_CHARACTER_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_CHARACTER); break;
-                    case LOAD_WORD_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_WORD); break;
-                    case LOAD_SENTENCE_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_SENTENCE); break;
-                    case LOAD_LINE_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_LINE); break;
-                }
+                // This was always a bad idea, as it required calling a 
private, internal ICU API,
+                // and isn't possible since ICU >= 61.1 as per 
https://unicode-org.atlassian.net/browse/ICU-10688.
+                //switch (rBreakType) {
+                //    case LOAD_CHARACTER_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_CHARACTER); break;
+                //    case LOAD_WORD_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_WORD); break;
+                //    case LOAD_SENTENCE_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_SENTENCE); break;
+                //    case LOAD_LINE_BREAKITERATOR: 
rbi->publicSetBreakType(UBRK_LINE); break;
+                //}
                 icuBI->aBreakIterator = rbi;
             }
         }
-#endif
 
         if (!icuBI->aBreakIterator) {
             ::rtl::OUStringBuffer locale;

Reply via email to