sc/inc/table.hxx               |    4 ++
 sc/source/core/data/table3.cxx |   81 +++++++++++++++++++++++++++++++++--------
 2 files changed, 70 insertions(+), 15 deletions(-)

New commits:
commit 7b0aabe71d2455f6f643553a07f1056935cf190f
Author:     Luboš Luňák <l.lu...@collabora.com>
AuthorDate: Thu Nov 25 00:07:03 2021 +0100
Commit:     Luboš Luňák <l.lu...@collabora.com>
CommitDate: Thu Nov 25 14:27:36 2021 +0100

    sort, cache and binary search query items if they're many (tdf#136838)
    
    This makes autofilter even with tdf#136838 almost instanteous.
    
    Change-Id: I94b4b6d6ab6f8e73312d88c8b88c0f393707f117
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/125795
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lu...@collabora.com>

diff --git a/sc/inc/table.hxx b/sc/inc/table.hxx
index 332be67ee551..0bb6548d5dd9 100644
--- a/sc/inc/table.hxx
+++ b/sc/inc/table.hxx
@@ -959,6 +959,10 @@ public:
     struct ValidQueryCache
     {
         std::unordered_map<FormulaError, svl::SharedString> 
mCachedSharedErrorStrings;
+        std::vector<double> mCachedSortedItemValues;
+        std::vector<const rtl_uString*> mCachedSortedItemStrings;
+        bool mCachedSortedItemValuesReady = false;
+        bool mCachedSortedItemStringsReady = false;
     };
     bool ValidQuery(
         SCROW nRow, const ScQueryParam& rQueryParam, const ScRefCellValue* 
pCell = nullptr,
diff --git a/sc/source/core/data/table3.cxx b/sc/source/core/data/table3.cxx
index 270c80b9ed73..4da4f23c59b4 100644
--- a/sc/source/core/data/table3.cxx
+++ b/sc/source/core/data/table3.cxx
@@ -2971,7 +2971,7 @@ public:
 
 std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, SCCOL nCol, SCTAB 
nTab, const ScQueryParam& rParam,
     ScRefCellValue& aCell, bool* pbTestEqualCondition, const 
ScInterpreterContext* pContext, QueryEvaluator& aEval,
-    const ScQueryEntry& rEntry )
+    ScTable::ValidQueryCache* pValidQueryCache, const ScQueryEntry& rEntry )
 {
     std::pair<bool,bool> aRes(false, false);
     const ScQueryEntry::QueryItemsType& rItems = rEntry.GetQueryItems();
@@ -3007,14 +3007,35 @@ std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, 
SCCOL nCol, SCTAB nTab,
             valid = false;
         if(valid)
         {
-            for (const auto& rItem : rItems)
+            if(rItems.size() >= 100 && pValidQueryCache)
             {
-                // For speed don't bother comparing approximately here, 
usually there either
-                // will be an exact match or it wouldn't match anyway.
-                if (rItem.meType == ScQueryEntry::ByValue
-                    && value == rItem.mfVal)
+                // Sort, cache and binary search for the value in items.
+                // Don't bother comparing approximately.
+                auto& values = pValidQueryCache->mCachedSortedItemValues;
+                if(!pValidQueryCache->mCachedSortedItemValuesReady)
                 {
+                    values.reserve(rItems.size());
+                    for (const auto& rItem : rItems)
+                        if (rItem.meType == ScQueryEntry::ByValue)
+                            values.push_back(rItem.mfVal);
+                    std::sort(values.begin(), values.end());
+                    pValidQueryCache->mCachedSortedItemValuesReady = true;
+                }
+                auto it = std::lower_bound(values.begin(), values.end(), 
value);
+                if( it != values.end() && *it == value )
                     return std::make_pair(true, true);
+            }
+            else
+            {
+                for (const auto& rItem : rItems)
+                {
+                    // For speed don't bother comparing approximately here, 
usually there either
+                    // will be an exact match or it wouldn't match anyway.
+                    if (rItem.meType == ScQueryEntry::ByValue
+                        && value == rItem.mfVal)
+                    {
+                        return std::make_pair(true, true);
+                    }
                 }
             }
         }
@@ -3042,17 +3063,34 @@ std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, 
SCCOL nCol, SCTAB nTab,
         // generous as isQueryByString() but it should be enough and better be 
safe.
         if(cellSharedString != nullptr)
         {
-            if (rParam.bCaseSens)
+            if(rItems.size() >= 100 && pValidQueryCache)
             {
-                for (const auto& rItem : rItems)
+                // Sort, cache and binary search for the string in items.
+                // Since each SharedString is identified by pointer value,
+                // sorting by pointer value is enough.
+                auto& values = pValidQueryCache->mCachedSortedItemStrings;
+                if(!pValidQueryCache->mCachedSortedItemStringsReady)
                 {
-                    if ((rItem.meType == ScQueryEntry::ByString
-                            || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
-                        && cellSharedString->getData() == 
rItem.maString.getData())
+                    values.reserve(rItems.size());
+                    for (const auto& rItem : rItems)
                     {
-                        return std::make_pair(true, true);
+                        if (rItem.meType == ScQueryEntry::ByString
+                            || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
+                        {
+                            values.push_back(rParam.bCaseSens
+                                ? rItem.maString.getData()
+                                : rItem.maString.getDataIgnoreCase());
+                        }
                     }
+                    std::sort(values.begin(), values.end());
+                    pValidQueryCache->mCachedSortedItemStringsReady = true;
                 }
+                const rtl_uString* string = rParam.bCaseSens
+                    ? cellSharedString->getData()
+                    : cellSharedString->getDataIgnoreCase();
+                auto it = std::lower_bound(values.begin(), values.end(), 
string);
+                if( it != values.end() && *it == string )
+                    return std::make_pair(true, true);
             }
             else
             {
@@ -3060,7 +3098,9 @@ std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, 
SCCOL nCol, SCTAB nTab,
                 {
                     if ((rItem.meType == ScQueryEntry::ByString
                             || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
-                        && cellSharedString->getDataIgnoreCase() == 
rItem.maString.getDataIgnoreCase())
+                        && ( rParam.bCaseSens
+                            ? cellSharedString->getData() == 
rItem.maString.getData()
+                            : cellSharedString->getDataIgnoreCase() == 
rItem.maString.getDataIgnoreCase()))
                     {
                         return std::make_pair(true, true);
                     }
@@ -3176,7 +3216,7 @@ bool ScTable::ValidQuery(
             aCell = GetCellValue(nCol, nRow);
 
         std::pair<bool,bool> aRes = validQueryProcessEntry(nRow, nCol, nTab, 
rParam, aCell,
-            pbTestEqualCondition, pContext, aEval, rEntry);
+            pbTestEqualCondition, pContext, aEval, pValidQueryCache, rEntry);
 
         if (nPos == -1)
         {
commit c41b0bf4c32e1934021d2d607f3f2fe7bc755cc8
Author:     Luboš Luňák <l.lu...@collabora.com>
AuthorDate: Wed Nov 24 23:35:36 2021 +0100
Commit:     Luboš Luňák <l.lu...@collabora.com>
CommitDate: Thu Nov 25 14:27:23 2021 +0100

    fast query item searching also for ByValue with string
    
    The tdf#136838 comment #2 document has MaterialNumber column
    that is actually strings (even though they look like numbers),
    but autofilter creates ByValue query items (for whatever reason).
    So extend the conditions for fast searching to include this case,
    otherwise it'd be handled by the generic slower code.
    
    Change-Id: I0fe192b99cd2999282db53ba98587b712c42c762
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/125794
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lu...@collabora.com>

diff --git a/sc/source/core/data/table3.cxx b/sc/source/core/data/table3.cxx
index 877aeede34d8..270c80b9ed73 100644
--- a/sc/source/core/data/table3.cxx
+++ b/sc/source/core/data/table3.cxx
@@ -2415,11 +2415,16 @@ public:
     }
 
     static bool isQueryByValue(
-        const ScQueryEntry::Item& rItem, ScRefCellValue& rCell)
+        const ScQueryEntry::Item& rItem, const ScRefCellValue& rCell)
     {
         if (rItem.meType == ScQueryEntry::ByString)
             return false;
 
+        return isQueryByValueForCell(rCell);
+    }
+
+    static bool isQueryByValueForCell(const ScRefCellValue& rCell)
+    {
         if (rCell.meType == CELLTYPE_FORMULA && rCell.mpFormula->GetErrCode() 
!= FormulaError::NONE)
             // Error values are compared as string.
             return false;
@@ -3028,6 +3033,10 @@ std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, 
SCCOL nCol, SCTAB nTab,
             cellString = aEval.getCellString(aCell, nRow, rEntry, pContext, 
&cellSharedString);
             cellStringSet = true;
         }
+        // Allow also checking ScQueryEntry::ByValue if the cell is not 
numeric,
+        // as in that case isQueryByNumeric() would be false and 
isQueryByString() would
+        // be true because of SC_EQUAL making isTextMatchOp() true.
+        bool compareByValue = !QueryEvaluator::isQueryByValueForCell(aCell);
         // For ScQueryEntry::ByString check that the cell is represented by a 
shared string,
         // which means it's either a string cell or a formula error. This is 
not as
         // generous as isQueryByString() but it should be enough and better be 
safe.
@@ -3037,7 +3046,8 @@ std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, 
SCCOL nCol, SCTAB nTab,
             {
                 for (const auto& rItem : rItems)
                 {
-                    if (rItem.meType == ScQueryEntry::ByString
+                    if ((rItem.meType == ScQueryEntry::ByString
+                            || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
                         && cellSharedString->getData() == 
rItem.maString.getData())
                     {
                         return std::make_pair(true, true);
@@ -3048,7 +3058,8 @@ std::pair<bool,bool> validQueryProcessEntry(SCROW nRow, 
SCCOL nCol, SCTAB nTab,
             {
                 for (const auto& rItem : rItems)
                 {
-                    if (rItem.meType == ScQueryEntry::ByString
+                    if ((rItem.meType == ScQueryEntry::ByString
+                            || (compareByValue && rItem.meType == 
ScQueryEntry::ByValue))
                         && cellSharedString->getDataIgnoreCase() == 
rItem.maString.getDataIgnoreCase())
                     {
                         return std::make_pair(true, true);

Reply via email to