sc/qa/unit/data/functions/text/fods/findb.fods | 9 + sc/qa/unit/data/functions/text/fods/leftb.fods | 34 ---- sc/qa/unit/data/functions/text/fods/lenb.fods | 34 ---- sc/qa/unit/data/functions/text/fods/midb.fods | 65 -------- sc/qa/unit/data/functions/text/fods/replaceb.fods | 176 +--------------------- sc/qa/unit/data/functions/text/fods/rightb.fods | 11 - sc/qa/unit/data/functions/text/fods/searchb.fods | 96 +----------- sc/source/core/tool/interpr1.cxx | 112 ++++---------- 8 files changed, 90 insertions(+), 447 deletions(-)
New commits: commit 20549379e8dc62647c513524063c5fae12f1add5 Author: Mike Kaganski <[email protected]> AuthorDate: Fri Mar 6 22:50:27 2026 +0100 Commit: Mike Kaganski <[email protected]> CommitDate: Sat Mar 7 13:23:31 2026 +0100 tdf#171165: fix handling of SMP characters in "byte string" functions This patch removes surrogates and PUA from scriptList, to match how Excel calculates. It also simplifies implementations of lcl_RightB / lcl_LeftB. Change-Id: I4af1069eda790df0dda7f402fe4b5f3c47c42c34 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/201161 Reviewed-by: Mike Kaganski <[email protected]> Tested-by: Jenkins diff --git a/sc/qa/unit/data/functions/text/fods/findb.fods b/sc/qa/unit/data/functions/text/fods/findb.fods index 8d8484a536c1..e32f0c11fd8b 100644 --- a/sc/qa/unit/data/functions/text/fods/findb.fods +++ b/sc/qa/unit/data/functions/text/fods/findb.fods @@ -1482,6 +1482,15 @@ </table:table-cell> <table:table-cell table:number-columns-repeated="11"/> </table:table-row> + <table:table-row> + <table:table-cell table:formula="of:=FINDB("A";"🙁🙂A")"/> + <table:table-cell office:value-type="float" office:value="5"/> + <table:table-cell table:formula="of:=[.A32]=[.B32]"/> + <table:table-cell table:formula="of:=FORMULA([.A32])"/> + <table:table-cell table:number-columns-repeated="2"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> + <table:table-cell table:number-columns-repeated="8"/> + </table:table-row> <calcext:conditional-formats> <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C50"> <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> diff --git a/sc/qa/unit/data/functions/text/fods/leftb.fods b/sc/qa/unit/data/functions/text/fods/leftb.fods index cce2fcefed4f..28aec8df060f 100644 --- a/sc/qa/unit/data/functions/text/fods/leftb.fods +++ b/sc/qa/unit/data/functions/text/fods/leftb.fods @@ -1011,33 +1011,13 @@ <table:table-cell table:style-name="ce17"/> <table:table-cell table:number-columns-repeated="4"/> </table:table-row> - <table:table-row table:style-name="ro5" table:number-rows-repeated="27"> - <table:table-cell table:style-name="ce14"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="4"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:number-columns-repeated="4"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="39"> - <table:table-cell table:style-name="ce23"/> - <table:table-cell table:style-name="ce24"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="9"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="186"> - <table:table-cell table:number-columns-repeated="2"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="9"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="1048313"> - <table:table-cell table:number-columns-repeated="13"/> - </table:table-row> - <table:table-row table:style-name="ro2"> - <table:table-cell table:number-columns-repeated="13"/> + <table:table-row> + <table:table-cell table:formula="of:=LEFTB("🙁🙂A";2)"/> + <table:table-cell office:value-type="string" office:string-value="🙁"/> + <table:table-cell table:formula="of:=[.A11]=[.B11]"/> + <table:table-cell table:formula="of:=FORMULA([.A11])"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> + <table:table-cell table:number-columns-repeated="8"/> </table:table-row> <calcext:conditional-formats> <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C262"> diff --git a/sc/qa/unit/data/functions/text/fods/lenb.fods b/sc/qa/unit/data/functions/text/fods/lenb.fods index 5777d4dc4295..e97dd8e8cdb2 100644 --- a/sc/qa/unit/data/functions/text/fods/lenb.fods +++ b/sc/qa/unit/data/functions/text/fods/lenb.fods @@ -1051,33 +1051,13 @@ <table:table-cell table:style-name="ce17"/> <table:table-cell table:number-columns-repeated="4"/> </table:table-row> - <table:table-row table:style-name="ro5" table:number-rows-repeated="28"> - <table:table-cell table:style-name="ce14"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="4"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:number-columns-repeated="4"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="39"> - <table:table-cell table:style-name="ce23"/> - <table:table-cell table:style-name="ce24"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="9"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="186"> - <table:table-cell table:number-columns-repeated="2"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="9"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="1048313"> - <table:table-cell table:number-columns-repeated="13"/> - </table:table-row> - <table:table-row table:style-name="ro2"> - <table:table-cell table:number-columns-repeated="13"/> + <table:table-row> + <table:table-cell table:formula="of:=LENB("🙁🙂A")"/> + <table:table-cell office:value-type="float" office:value="5"/> + <table:table-cell table:formula="of:=[.A10]=[.B10]"/> + <table:table-cell table:formula="of:=FORMULA([.A10])"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> + <table:table-cell table:number-columns-repeated="8"/> </table:table-row> <calcext:conditional-formats> <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C262"> diff --git a/sc/qa/unit/data/functions/text/fods/midb.fods b/sc/qa/unit/data/functions/text/fods/midb.fods index 50dcaabaf977..d646f1746079 100644 --- a/sc/qa/unit/data/functions/text/fods/midb.fods +++ b/sc/qa/unit/data/functions/text/fods/midb.fods @@ -916,66 +916,13 @@ </table:table-cell> <table:table-cell table:number-columns-repeated="5"/> </table:table-row> - <table:table-row table:style-name="ro5"> - <table:table-cell/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="5"/> - </table:table-row> - <table:table-row table:style-name="ro2"> - <table:table-cell table:style-name="ce9" table:number-columns-repeated="2"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="5"/> - </table:table-row> - <table:table-row table:style-name="ro2"> - <table:table-cell table:number-columns-repeated="2"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="5"/> - </table:table-row> - <table:table-row table:style-name="ro5"> - <table:table-cell table:style-name="ce14"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="5"/> - </table:table-row> - <table:table-row table:style-name="ro5"> - <table:table-cell table:style-name="ce16"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="4"/> - <table:table-cell table:style-name="ce17"/> - </table:table-row> - <table:table-row table:style-name="ro5" table:number-rows-repeated="27"> - <table:table-cell table:style-name="ce14"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> + <table:table-row> + <table:table-cell table:formula="of:=MIDB("🙁🙂A";3;2)"/> + <table:table-cell office:value-type="string" office:string-value="🙂"/> + <table:table-cell table:formula="of:=[.A6]=[.B6]"/> + <table:table-cell table:formula="of:=FORMULA([.A6])"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> <table:table-cell table:number-columns-repeated="4"/> - <table:table-cell table:style-name="ce17"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="39"> - <table:table-cell table:style-name="ce23"/> - <table:table-cell table:style-name="ce24"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="5"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="186"> - <table:table-cell table:number-columns-repeated="2"/> - <table:table-cell table:style-name="ce27"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="5"/> - </table:table-row> - <table:table-row table:style-name="ro2" table:number-rows-repeated="1048313"> - <table:table-cell table:number-columns-repeated="9"/> - </table:table-row> - <table:table-row table:style-name="ro2"> - <table:table-cell table:number-columns-repeated="9"/> </table:table-row> <calcext:conditional-formats> <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C262"> diff --git a/sc/qa/unit/data/functions/text/fods/replaceb.fods b/sc/qa/unit/data/functions/text/fods/replaceb.fods index 7a7ee8bcae01..2678ba03e340 100644 --- a/sc/qa/unit/data/functions/text/fods/replaceb.fods +++ b/sc/qa/unit/data/functions/text/fods/replaceb.fods @@ -1787,173 +1787,17 @@ </table:table-cell> <table:table-cell table:number-columns-repeated="9"/> </table:table-row> + <table:table-row> + <table:table-cell table:formula="of:=REPLACEB("🙁🙂A";3;2;"!")"/> + <table:table-cell office:value-type="string" office:string-value="🙁!A"/> + <table:table-cell table:formula="of:=[.A36]=[.B36]"/> + <table:table-cell table:formula="of:=FORMULA([.A36])"/> + <table:table-cell table:number-columns-repeated="2"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> + <table:table-cell table:number-columns-repeated="6"/> + </table:table-row> <calcext:conditional-formats> - <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C2"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C3:Sheet2.C3"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C4:Sheet2.C4"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C5:Sheet2.C5"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C6:Sheet2.C6"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C7:Sheet2.C7"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C8:Sheet2.C8"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C9:Sheet2.C9"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C10:Sheet2.C10"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C11:Sheet2.C11"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C12:Sheet2.C12"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C13:Sheet2.C13"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C14:Sheet2.C14"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C15:Sheet2.C15"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C16:Sheet2.C16"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C17:Sheet2.C17"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C18:Sheet2.C18"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C19:Sheet2.C19"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C20:Sheet2.C20"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C21:Sheet2.C21"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C22:Sheet2.C22"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C23:Sheet2.C23"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C24:Sheet2.C24"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C25:Sheet2.C25"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C26:Sheet2.C26"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C27:Sheet2.C27"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C28:Sheet2.C28"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C29:Sheet2.C29"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C30:Sheet2.C30"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C31:Sheet2.C31"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C32:Sheet2.C32"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C33:Sheet2.C33"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C34:Sheet2.C34"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C35:Sheet2.C35"> + <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C100"> <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> diff --git a/sc/qa/unit/data/functions/text/fods/rightb.fods b/sc/qa/unit/data/functions/text/fods/rightb.fods index d6276172afd3..e2f645d2bd6c 100644 --- a/sc/qa/unit/data/functions/text/fods/rightb.fods +++ b/sc/qa/unit/data/functions/text/fods/rightb.fods @@ -1082,11 +1082,12 @@ </table:table-cell> </table:table-row> <table:table-row table:style-name="ro5"> - <table:table-cell table:style-name="ce14"/> - <table:table-cell table:style-name="ce17"/> - <table:table-cell table:style-name="ce29"/> - <table:table-cell table:style-name="ce11"/> - <table:table-cell table:number-columns-repeated="4"/> + <table:table-cell table:formula="of:=RIGHTB("🙁🙂A";3)"/> + <table:table-cell office:value-type="string" office:string-value="🙂A"/> + <table:table-cell table:formula="of:=[.A11]=[.B11]"/> + <table:table-cell table:formula="of:=FORMULA([.A11])"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> + <table:table-cell table:number-columns-repeated="3"/> <table:table-cell office:value-type="string" calcext:value-type="string"> <text:p>a</text:p> </table:table-cell> diff --git a/sc/qa/unit/data/functions/text/fods/searchb.fods b/sc/qa/unit/data/functions/text/fods/searchb.fods index f6c351ff67f7..68bf834e2e2d 100644 --- a/sc/qa/unit/data/functions/text/fods/searchb.fods +++ b/sc/qa/unit/data/functions/text/fods/searchb.fods @@ -1517,97 +1517,21 @@ </table:table-cell> <table:table-cell table:number-columns-repeated="11"/> </table:table-row> + <table:table-row> + <table:table-cell table:formula="of:=SEARCHB("a";"🙁🙂A")"/> + <table:table-cell office:value-type="float" office:value="5"/> + <table:table-cell table:formula="of:=[.A33]=[.B33]"/> + <table:table-cell table:formula="of:=FORMULA([.A33])"/> + <table:table-cell table:number-columns-repeated="2"/> + <table:table-cell office:value-type="string" office:string-value="tdf#171165: emoji (and other SMP characters not in specific blocks) take two bytes, as in Excel"/> + <table:table-cell table:number-columns-repeated="8"/> + </table:table-row> <calcext:conditional-formats> - <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C2"> + <calcext:conditional-format calcext:target-range-address="Sheet2.C2:Sheet2.C100"> <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C2"/> <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C2"/> <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C2"/> </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C3:Sheet2.C3"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C3"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C3"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C3"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C4:Sheet2.C4"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C4"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C4"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C4"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C6:Sheet2.C6"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C6"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C6"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C6"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C7:Sheet2.C7"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C7"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C7"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C7"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C9:Sheet2.C9"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C9"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C9"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C9"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C5:Sheet2.C5"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C5"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C5"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C5"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C8:Sheet2.C8"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C8"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C8"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C8"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C10:Sheet2.C10"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C10"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C10"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C10"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C15:Sheet2.C15"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C15"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C15"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C15"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C11:Sheet2.C11"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C11"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C11"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C11"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C12:Sheet2.C12"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C12"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C12"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C12"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C13:Sheet2.C13"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C13"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C13"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C13"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C14:Sheet2.C14"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C14"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C14"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C14"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C17:Sheet2.C30"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C17"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C17"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C17"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C16:Sheet2.C16"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C16"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C16"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C16"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C31:Sheet2.C31"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C31"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C31"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C31"/> - </calcext:conditional-format> - <calcext:conditional-format calcext:target-range-address="Sheet2.C32:Sheet2.C32"> - <calcext:condition calcext:apply-style-name="Default" calcext:value="=""" calcext:base-cell-address="Sheet2.C32"/> - <calcext:condition calcext:apply-style-name="Untitled1" calcext:value="=0" calcext:base-cell-address="Sheet2.C32"/> - <calcext:condition calcext:apply-style-name="Untitled2" calcext:value="=1" calcext:base-cell-address="Sheet2.C32"/> - </calcext:conditional-format> </calcext:conditional-formats> </table:table> <table:named-expressions/> diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index c8d1a0d496f7..49212725c64f 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -11772,7 +11772,8 @@ struct UBlockScript { constexpr UBlockScript scriptList[] = { {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO}, - {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS}, + {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES}, + {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS}, {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS}, {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS}, {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT}, @@ -11786,28 +11787,23 @@ static_assert(std::ranges::is_sorted(scriptList, // this would pass the check: {{0, 10}, {5, 15}} return (l.to < r.from || l.from < r.to); })); -static bool IsDBCS(sal_Unicode currentChar) +static sal_Int32 ByteLen(sal_Unicode currentChar) { // for the locale of ja-JP, character U+0x005c and U+0x20ac should be ScriptType::Asian if( (currentChar == 0x005c || currentChar == 0x20ac) && (MsLangId::getConfiguredSystemLanguage() == LANGUAGE_JAPANESE) ) - return true; + return 2; UBlockCode block = ublock_getCode(currentChar); for (auto [from, to] : scriptList) // scriptList is sorted ascending if (block <= to) // only the first block with to >= block can contain block - return block >= from; - return false; + return block >= from ? 2 : 1; + return 1; } static sal_Int32 getLengthB(std::u16string_view str) { sal_Int32 length = 0; for (size_t index = 0; index < str.size(); ++index) - { - if (IsDBCS(str[index])) - length += 2; - else - length++; - } + length += ByteLen(str[index]); return length; } void ScInterpreter::ScLenB() @@ -11816,31 +11812,15 @@ void ScInterpreter::ScLenB() } static OUString lcl_RightB(const OUString &rStr, sal_Int32 n) { - if( n < getLengthB(rStr) ) + assert(n >= 0); + for (sal_Int32 nPos = rStr.getLength();; --nPos) { - OUStringBuffer aBuf(rStr); - sal_Int32 index = aBuf.getLength(); - while(index-- >= 0) - { - if(0 == n) - { - aBuf.remove( 0, index + 1); - break; - } - if(-1 == n) - { - aBuf.remove( 0, index + 2 ); - aBuf.insert( 0, " "); - break; - } - if(IsDBCS(aBuf[index])) - n -= 2; - else - n--; - } - return aBuf.makeStringAndClear(); + if (n == 0 || nPos == 0) + return rStr.copy(nPos); + n -= ByteLen(rStr[nPos - 1]); + if (n < 0) // only one "byte" of a "doublebyte" character is requested; produce a space + return OUString::Concat(" ") + rStr.subView(nPos); } - return rStr; } void ScInterpreter::ScRightB() { @@ -11865,31 +11845,15 @@ void ScInterpreter::ScRightB() } static OUString lcl_LeftB(const OUString &rStr, sal_Int32 n) { - if( n < getLengthB(rStr) ) + assert(n >= 0); + for (sal_Int32 nPos = 0;; ++nPos) { - OUStringBuffer aBuf(rStr); - sal_Int32 index = -1; - while(index++ < aBuf.getLength()) - { - if(0 == n) - { - aBuf.truncate(index); - break; - } - if(-1 == n) - { - aBuf.truncate( index - 1 ); - aBuf.append(" "); - break; - } - if(IsDBCS(aBuf[index])) - n -= 2; - else - n--; - } - return aBuf.makeStringAndClear(); + if (n == 0 || nPos == rStr.getLength()) + return rStr.copy(0, nPos); + n -= ByteLen(rStr[nPos]); + if (n < 0) // only one "byte" of a "doublebyte" character is requested; produce a space + return rStr.subView(0, nPos) + OUString::Concat(" "); } - return rStr; } void ScInterpreter::ScLeftB() { commit af1af15aff515705919c08847baf2d30b074de1f Author: Mike Kaganski <[email protected]> AuthorDate: Fri Mar 6 20:34:45 2026 +0100 Commit: Mike Kaganski <[email protected]> CommitDate: Sat Mar 7 13:23:26 2026 +0100 Simplify getLengthB Merge it with lcl_getLengthB; let its callers pass correct string views. Change-Id: Id577bd18d9cc93e45024052da9f034a8d64e94a5 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/201157 Tested-by: Jenkins Reviewed-by: Mike Kaganski <[email protected]> diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index b6ed2d53f4f0..c8d1a0d496f7 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -11798,27 +11798,18 @@ static bool IsDBCS(sal_Unicode currentChar) return block >= from; return false; } -static sal_Int32 lcl_getLengthB( std::u16string_view str, sal_Int32 nPos ) +static sal_Int32 getLengthB(std::u16string_view str) { - sal_Int32 index = 0; sal_Int32 length = 0; - while ( index < nPos ) + for (size_t index = 0; index < str.size(); ++index) { if (IsDBCS(str[index])) length += 2; else length++; - index++; } return length; } -static sal_Int32 getLengthB(std::u16string_view str) -{ - if(str.empty()) - return 0; - else - return lcl_getLengthB( str, str.size() ); -} void ScInterpreter::ScLenB() { PushDouble( getLengthB(GetString().getString()) ); @@ -11992,7 +11983,7 @@ void ScInterpreter::ScFindB() else { // obtain byte value of nPos - int nBytePos = lcl_getLengthB( aBuf, nPos ); + int nBytePos = getLengthB(aBuf.subView(0, nPos)); PushDouble( nBytePos + nStart ); } } @@ -12037,7 +12028,7 @@ void ScInterpreter::ScSearchB() else { // obtain byte value of nPos - int nBytePos = lcl_getLengthB( aSubStr, nPos ); + int nBytePos = getLengthB(aSubStr.subView(0, nPos)); PushDouble( nBytePos + nStart ); } } commit 53c1a742333c38f4253c98cd3aa0b10329d885fe Author: Mike Kaganski <[email protected]> AuthorDate: Fri Mar 6 18:54:29 2026 +0100 Commit: Mike Kaganski <[email protected]> CommitDate: Sat Mar 7 13:23:16 2026 +0100 Improve IsDBCS Introduced in commit 0946de1e2fbf8bd5ad3919429f648359d4464eca (Resolves: #i121120 Support RightB(), LeftB(), LenB() and MidB(), 2013-06-11), it assumed that scriptList (added in the same commit) was strictly sorted. However, that wasn't enforced. This change adds a couple of static asserts, making sure that each pair is strictly ordered ('from', 'to'); and all pairs have 'to' of the previous pair less than 'from' of the next. The complex check in 'is_sorted' is because at least on Windows, simple "l.to < r.from" predicate returns 'sorted' for overlapping ranges, because it only checks that negation of the predicate is false for the pair "next, prev". That discovered an element in the array, that was most definitely a leftover from development: the element with index 2 had 'from' and 'to' in reverse. The interesting thing is, that if reversed back, they form a range completely covering from element index 1 to index 3. It is likely that the element index 2 was intentionally reverted (and put in that position) - to not participate in the search. This change drops that leftover. Two elements - index 2 and index 4 - had overlapping ranges. That meant that they actually formed a single large range; this change merges them. Important to notice, that this range includes blocks with surrogates and PUA; that explains handling of SMP characters (tdf#171165): the two UTF-16 code units, which are surrogates, are checked, instead of the code point. This patch is not expected change the result from the function. The single place where scriptList was used was simplified. Change-Id: I8c94265c1faa21a26f66317eff5627681f511aaf Reviewed-on: https://gerrit.libreoffice.org/c/core/+/201151 Reviewed-by: Mike Kaganski <[email protected]> Tested-by: Jenkins diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index a5f3e2962f3f..b6ed2d53f4f0 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -11770,30 +11770,33 @@ struct UBlockScript { } -const UBlockScript scriptList[] = { +constexpr UBlockScript scriptList[] = { {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO}, - {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES}, - {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS,UBLOCK_CJK_RADICALS_SUPPLEMENT }, - {UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS}, + {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS}, {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS}, {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS}, {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT}, {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES} }; +static_assert(std::ranges::all_of(scriptList, [](const auto& r) { return r.from <= r.to; })); +static_assert(std::ranges::is_sorted(scriptList, + [](const auto& l, const auto& r) + { + // avoid interleaving ranges; without the second part, + // this would pass the check: {{0, 10}, {5, 15}} + return (l.to < r.from || l.from < r.to); + })); static bool IsDBCS(sal_Unicode currentChar) { // for the locale of ja-JP, character U+0x005c and U+0x20ac should be ScriptType::Asian if( (currentChar == 0x005c || currentChar == 0x20ac) && (MsLangId::getConfiguredSystemLanguage() == LANGUAGE_JAPANESE) ) return true; - sal_uInt16 i; - bool bRet = false; UBlockCode block = ublock_getCode(currentChar); - for ( i = 0; i < SAL_N_ELEMENTS(scriptList); i++) { - if (block <= scriptList[i].to) break; - } - bRet = (i < SAL_N_ELEMENTS(scriptList) && block >= scriptList[i].from); - return bRet; + for (auto [from, to] : scriptList) // scriptList is sorted ascending + if (block <= to) // only the first block with to >= block can contain block + return block >= from; + return false; } static sal_Int32 lcl_getLengthB( std::u16string_view str, sal_Int32 nPos ) {
