sc/source/core/tool/interpr1.cxx | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-)
New commits: commit c5b2e6926fd7137d95920f1b639b810c61ba3a98 Author: Mike Kaganski <[email protected]> AuthorDate: Fri Mar 6 18:54:29 2026 +0100 Commit: Xisco Fauli <[email protected]> CommitDate: Tue Mar 10 16:38:38 2026 +0100 Improve IsDBCS Introduced in commit 0946de1e2fbf8bd5ad3919429f648359d4464eca (Resolves: #i121120 Support RightB(), LeftB(), LenB() and MidB(), 2013-06-11), it assumed that scriptList (added in the same commit) was strictly sorted. However, that wasn't enforced. This change adds a couple of static asserts, making sure that each pair is strictly ordered ('from', 'to'); and all pairs have 'to' of the previous pair less than 'from' of the next. The complex check in 'is_sorted' is because at least on Windows, simple "l.to < r.from" predicate returns 'sorted' for overlapping ranges, because it only checks that negation of the predicate is false for the pair "next, prev". That discovered an element in the array, that was most definitely a leftover from development: the element with index 2 had 'from' and 'to' in reverse. The interesting thing is, that if reversed back, they form a range completely covering from element index 1 to index 3. It is likely that the element index 2 was intentionally reverted (and put in that position) - to not participate in the search. This change drops that leftover. Two elements - index 2 and index 4 - had overlapping ranges. That meant that they actually formed a single large range; this change merges them. Important to notice, that this range includes blocks with surrogates and PUA; that explains handling of SMP characters (tdf#171165): the two UTF-16 code units, which are surrogates, are checked, instead of the code point. This patch is not expected change the result from the function. The single place where scriptList was used was simplified. Change-Id: I8c94265c1faa21a26f66317eff5627681f511aaf Reviewed-on: https://gerrit.libreoffice.org/c/core/+/201151 Reviewed-by: Mike Kaganski <[email protected]> Tested-by: Jenkins Signed-off-by: Xisco Fauli <[email protected]> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/201277 diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index 88a0899c3315..0a607eddb0eb 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -11769,30 +11769,33 @@ struct UBlockScript { } -const UBlockScript scriptList[] = { +constexpr UBlockScript scriptList[] = { {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO}, - {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES}, - {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS,UBLOCK_CJK_RADICALS_SUPPLEMENT }, - {UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS}, + {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS}, {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS}, {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS}, {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT}, {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES} }; +static_assert(std::ranges::all_of(scriptList, [](const auto& r) { return r.from <= r.to; })); +static_assert(std::ranges::is_sorted(scriptList, + [](const auto& l, const auto& r) + { + // avoid interleaving ranges; without the second part, + // this would pass the check: {{0, 10}, {5, 15}} + return (l.to < r.from || l.from < r.to); + })); static bool IsDBCS(sal_Unicode currentChar) { // for the locale of ja-JP, character U+0x005c and U+0x20ac should be ScriptType::Asian if( (currentChar == 0x005c || currentChar == 0x20ac) && (MsLangId::getConfiguredSystemLanguage() == LANGUAGE_JAPANESE) ) return true; - sal_uInt16 i; - bool bRet = false; UBlockCode block = ublock_getCode(currentChar); - for ( i = 0; i < SAL_N_ELEMENTS(scriptList); i++) { - if (block <= scriptList[i].to) break; - } - bRet = (i < SAL_N_ELEMENTS(scriptList) && block >= scriptList[i].from); - return bRet; + for (auto [from, to] : scriptList) // scriptList is sorted ascending + if (block <= to) // only the first block with to >= block can contain block + return block >= from; + return false; } static sal_Int32 lcl_getLengthB( std::u16string_view str, sal_Int32 nPos ) {
