include/unotools/charclass.hxx     |   25 ++++++-----
 linguistic/source/misc.cxx         |    4 -
 unotools/source/i18n/charclass.cxx |   83 ++++++++++++++++++++++++++++---------
 3 files changed, 80 insertions(+), 32 deletions(-)

New commits:
commit ab0adac692b67fe7b63dee665607400c6a7e6c01
Author:     Eike Rathke <er...@redhat.com>
AuthorDate: Mon Sep 12 17:14:24 2022 +0200
Commit:     Eike Rathke <er...@redhat.com>
CommitDate: Tue Sep 13 10:25:17 2022 +0200

    Fix everything using XCharacterClassification::getStringType() and don't 
use it
    
    See note in offapi/com/sun/star/i18n/XCharacterClassification.idl
    
    The brain dead implementation is useless but API ... its use in
    isAlphaNumericType() and similar never returned what would had
    been expected.
    
    Change-Id: I278f2468182dab94c32273ef69cf9634bc002cb4
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139809
    Reviewed-by: Eike Rathke <er...@redhat.com>
    Tested-by: Jenkins

diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx
index c76c7ae35f0b..7cb35ba2a594 100644
--- a/include/unotools/charclass.hxx
+++ b/include/unotools/charclass.hxx
@@ -31,28 +31,29 @@
 namespace com::sun::star::uno { class XComponentContext; }
 namespace com::sun::star::i18n { class XCharacterClassification; }
 
-const sal_Int32 nCharClassAlphaType =
+inline constexpr sal_Int32 nCharClassAlphaType =
     css::i18n::KCharacterType::UPPER |
     css::i18n::KCharacterType::LOWER |
     css::i18n::KCharacterType::TITLE_CASE;
 
-const sal_Int32 nCharClassAlphaTypeMask =
+inline constexpr sal_Int32 nCharClassAlphaTypeMask =
     nCharClassAlphaType |
+    css::i18n::KCharacterType::LETTER |     // Alpha is also always a LETTER
     css::i18n::KCharacterType::PRINTABLE |
     css::i18n::KCharacterType::BASE_FORM;
 
-const sal_Int32 nCharClassLetterType =
+inline constexpr sal_Int32 nCharClassLetterType =
     nCharClassAlphaType |
     css::i18n::KCharacterType::LETTER;
 
-const sal_Int32 nCharClassLetterTypeMask =
+inline constexpr sal_Int32 nCharClassLetterTypeMask =
     nCharClassAlphaTypeMask |
     css::i18n::KCharacterType::LETTER;
 
-const sal_Int32 nCharClassNumericType =
+inline constexpr sal_Int32 nCharClassNumericType =
     css::i18n::KCharacterType::DIGIT;
 
-const sal_Int32 nCharClassNumericTypeMask =
+inline constexpr sal_Int32 nCharClassNumericTypeMask =
     nCharClassNumericType |
     css::i18n::KCharacterType::PRINTABLE |
     css::i18n::KCharacterType::BASE_FORM;
@@ -86,14 +87,14 @@ public:
     /// isalpha() on ascii values of entire string
     static bool isAsciiAlpha( std::u16string_view rStr );
 
-    /// whether type is pure numeric or not, e.g. return of getStringType
+    /// whether type is pure numeric or not, e.g. return of getCharacterType()
     static bool isNumericType( sal_Int32 nType )
     {
         return ((nType & nCharClassNumericType) != 0) &&
             ((nType & ~nCharClassNumericTypeMask) == 0);
     }
 
-    /// whether type is pure alphanumeric or not, e.g. return of getStringType
+    /// whether type is pure alphanumeric or not, e.g. return of 
getCharacterType()
     static bool isAlphaNumericType( sal_Int32 nType )
     {
         return ((nType & (nCharClassAlphaType |
@@ -102,14 +103,14 @@ public:
             nCharClassNumericTypeMask)) == 0);
     }
 
-    /// whether type is pure letter or not, e.g. return of getStringType
+    /// whether type is pure letter or not, e.g. return of getCharacterType()
     static bool isLetterType( sal_Int32 nType )
     {
         return ((nType & nCharClassLetterType) != 0) &&
             ((nType & ~nCharClassLetterTypeMask) == 0);
     }
 
-    /// whether type is pure letternumeric or not, e.g. return of getStringType
+    /// whether type is pure letternumeric or not, e.g. return of 
getCharacterType()
     static bool isLetterNumericType( sal_Int32 nType )
     {
         return ((nType & (nCharClassLetterType |
@@ -141,7 +142,6 @@ public:
     css::i18n::DirectionProperty getCharacterDirection( const OUString& rStr, 
sal_Int32 nPos ) const;
     css::i18n::UnicodeScript getScript( const OUString& rStr, sal_Int32 nPos ) 
const;
     sal_Int32 getCharacterType( const OUString& rStr, sal_Int32 nPos ) const;
-    sal_Int32 getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 
nCount ) const;
 
     css::i18n::ParseResult parseAnyToken(
                                     const OUString& rStr,
@@ -167,10 +167,13 @@ public:
     bool isDigit( const OUString& rStr, sal_Int32 nPos ) const;
     bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const;
     bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const;
+    bool isUpper( const OUString& rStr, sal_Int32 nPos ) const;
     bool isLetter( const OUString& rStr ) const;
     bool isNumeric( const OUString& rStr ) const;
     bool isLetterNumeric( const OUString& rStr ) const;
 
+    bool isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) 
const;
+
 private:
 
     const css::lang::Locale &  getMyLocale() const;
diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx
index dc4c17ce8430..c315ad1270ee 100644
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@@ -558,9 +558,7 @@ uno::Reference< XHyphenatedWord > 
RebuildHyphensAndControlChars(
 bool IsUpper( const OUString &rText, sal_Int32 nPos, sal_Int32 nLen, 
LanguageType nLanguage )
 {
     CharClass aCC(( LanguageTag( nLanguage ) ));
-    sal_Int32 nFlags = aCC.getStringType( rText, nPos, nLen );
-    return      (nFlags & KCharacterType::UPPER)
-            && !(nFlags & KCharacterType::LOWER);
+    return aCC.isUpper( rText, nPos, nLen );
 }
 
 CapType capitalType(const OUString& aTerm, CharClass const * pCC)
diff --git a/unotools/source/i18n/charclass.cxx 
b/unotools/source/i18n/charclass.cxx
index ace153d03a74..4573687c4d29 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -134,7 +134,14 @@ bool CharClass::isLetter( const OUString& rStr ) const
 {
     try
     {
-        return isLetterType( xCC->getStringType( rStr, 0, rStr.getLength(), 
getMyLocale() ) );
+        sal_Int32 nPos = 0;
+        while (nPos < rStr.getLength())
+        {
+            if (!isLetter( rStr, nPos))
+                return false;
+            rStr.iterateCodePoints( &nPos);
+        }
+        return true;
     }
     catch ( const Exception& )
     {
@@ -165,7 +172,14 @@ bool CharClass::isNumeric( const OUString& rStr ) const
 {
     try
     {
-        return isNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), 
getMyLocale() ) );
+        sal_Int32 nPos = 0;
+        while (nPos < rStr.getLength())
+        {
+            if (!isDigit( rStr, nPos))
+                return false;
+            rStr.iterateCodePoints( &nPos);
+        }
+        return true;
     }
     catch ( const Exception& )
     {
@@ -183,7 +197,7 @@ bool CharClass::isAlphaNumeric( const OUString& rStr, 
sal_Int32 nPos ) const
     try
     {
         return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
-                (nCharClassAlphaType | KCharacterType::DIGIT)) != 0;
+                (nCharClassAlphaType | nCharClassNumericType)) != 0;
     }
     catch ( const Exception& )
     {
@@ -201,7 +215,7 @@ bool CharClass::isLetterNumeric( const OUString& rStr, 
sal_Int32 nPos ) const
     try
     {
         return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
-                (nCharClassLetterType | KCharacterType::DIGIT)) != 0;
+                (nCharClassLetterType | nCharClassNumericType)) != 0;
     }
     catch ( const Exception& )
     {
@@ -214,7 +228,53 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) 
const
 {
     try
     {
-        return isLetterNumericType( xCC->getStringType( rStr, 0, 
rStr.getLength(), getMyLocale() ) );
+        sal_Int32 nPos = 0;
+        while (nPos < rStr.getLength())
+        {
+            if (!isLetterNumeric( rStr, nPos))
+                return false;
+            rStr.iterateCodePoints( &nPos);
+        }
+        return true;
+    }
+    catch ( const Exception& )
+    {
+        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+    }
+    return false;
+}
+
+bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
+{
+    sal_Unicode c = rStr[nPos];
+    if ( c < 128 )
+        return rtl::isAsciiUpperCase(c);
+
+    try
+    {
+        return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
+                KCharacterType::UPPER) != 0;
+    }
+    catch ( const Exception& )
+    {
+        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
+    }
+    return false;
+}
+
+bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 
nCount ) const
+{
+    try
+    {
+        assert(nPos >= 0 && nCount >= 0);
+        sal_Int32 nLen = std::min( nPos + nCount, rStr.getLength());
+        while (nPos < nLen)
+        {
+            if (!isUpper( rStr, nPos))
+                return false;
+            rStr.iterateCodePoints( &nPos);
+        }
+        return true;
     }
     catch ( const Exception& )
     {
@@ -314,19 +374,6 @@ sal_Int32 CharClass::getCharacterType( const OUString& 
rStr, sal_Int32 nPos ) co
     return 0;
 }
 
-sal_Int32 CharClass::getStringType( const OUString& rStr, sal_Int32 nPos, 
sal_Int32 nCount ) const
-{
-    try
-    {
-        return xCC->getStringType( rStr, nPos, nCount, getMyLocale() );
-    }
-    catch ( const Exception& )
-    {
-        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
-    }
-    return 0;
-}
-
 css::i18n::ParseResult CharClass::parseAnyToken(
             const OUString& rStr,
             sal_Int32 nPos,

Reply via email to