sc/qa/unit/ucalc_formula.cxx | 92 ++++++++++++++++++++++++++ sc/source/core/tool/interpr1.cxx | 130 ++++++++++++++++++++++--------------- sc/source/core/tool/rangecache.cxx | 5 - 3 files changed, 175 insertions(+), 52 deletions(-)
New commits: commit 7674399aac661eb503d7badc53b9a4d68bd9839d Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Fri May 27 19:51:40 2022 +0200 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Sat May 28 05:44:27 2022 +0200 try to range-reduce even COUNTIFS if not matching empty cells It's possible to do reduce range of COUNTIFS to non-empty data too, since empty cells cannot contribute to the result, as long as the criteria do not require matching empty cells. Without this queries like =COUNTIFS($A:$A,...) can spend most of their time clearing and searching the vConditions vector that's big and not useful for the trailing empty cells in that column. Change-Id: I8d1e7977f172ac9b2cf84af3f982e945be3cb46c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135049 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/qa/unit/ucalc_formula.cxx b/sc/qa/unit/ucalc_formula.cxx index 14d779d2e4f3..6bad4a40127c 100644 --- a/sc/qa/unit/ucalc_formula.cxx +++ b/sc/qa/unit/ucalc_formula.cxx @@ -308,6 +308,7 @@ public: void testFuncRowsHidden(); void testFuncSUMIFS(); void testFuncCOUNTIFEmpty(); + void testFuncCOUNTIFSRangeReduce(); void testFuncRefListArraySUBTOTAL(); void testFuncJumpMatrixArrayIF(); void testFuncJumpMatrixArrayOFFSET(); @@ -427,6 +428,7 @@ public: CPPUNIT_TEST(testFuncRowsHidden); CPPUNIT_TEST(testFuncSUMIFS); CPPUNIT_TEST(testFuncCOUNTIFEmpty); + CPPUNIT_TEST(testFuncCOUNTIFSRangeReduce); CPPUNIT_TEST(testFuncRefListArraySUBTOTAL); CPPUNIT_TEST(testFuncJumpMatrixArrayIF); CPPUNIT_TEST(testFuncJumpMatrixArrayOFFSET); @@ -9356,6 +9358,56 @@ void TestFormula::testFuncCOUNTIFEmpty() m_pDoc->DeleteTab(0); } +// Test that COUNTIFS counts properly empty cells if asked to. +void TestFormula::testFuncCOUNTIFSRangeReduce() +{ + sc::AutoCalcSwitch aACSwitch(*m_pDoc, true); // turn auto calc on. + m_pDoc->InsertTab(0, "Test"); + + // Data in A1:C9. + std::vector<std::vector<const char*>> aData = { + { "" }, + { "a", "1", "1" }, + { "b", "2", "2" }, + { "c", "4", "3" }, + { "d", "8", "4" }, + { "a", "16", "5" }, + { "" }, + { "b", "", "6" }, + { "c", "64", "7" } + }; + + insertRangeData(m_pDoc, ScAddress(0,0,0), aData); + + constexpr SCROW maxRow = 20; // so that the unittest is not slow in dbgutil builds + ScRange aSubRange( ScAddress( 0, 0, 0 ), ScAddress( 2, maxRow, 0 )); + m_pDoc->GetDataAreaSubrange(aSubRange); + // This is the range the data should be reduced to in ScInterpreter::IterateParametersIfs(). + CPPUNIT_ASSERT_EQUAL( SCROW(1), aSubRange.aStart.Row()); + CPPUNIT_ASSERT_EQUAL( SCROW(8), aSubRange.aEnd.Row()); + + m_pDoc->SetFormula( ScAddress(10, 0, 0), "=COUNTIFS($A1:$A" + OUString::number(maxRow+1) + + "; \"\"; $B1:$B" + OUString::number(maxRow+1) + + "; \"\"; $C1:$C" + OUString::number(maxRow+1) +"; \"\")", + formula::FormulaGrammar::GRAM_NATIVE_UI); + // But it should find out that it can't range reduce and must count all the empty rows. + CPPUNIT_ASSERT_EQUAL( double(maxRow + 1 - 7), m_pDoc->GetValue(ScAddress(10, 0, 0))); + + // Check also with criteria set as cell references, the middle one resulting in matching + // empty cells (which should cause ScInterpreter::IterateParametersIfs() to undo + // the range reduction). This should only match the A8-C8 row, but it also shouldn't crash. + // Matching empty cells using a cell reference needs a formula to set the cell to + // an empty string, plain empty cell wouldn't do, so use K2 for that. + m_pDoc->SetFormula( ScAddress(10, 1, 0), "=\"\"", formula::FormulaGrammar::GRAM_NATIVE_UI ); + m_pDoc->SetFormula( ScAddress(10, 0, 0), "=COUNTIFS($A1:$A" + OUString::number(maxRow+1) + + "; A8; $B1:$B" + OUString::number(maxRow+1) + + "; K2; $C1:$C" + OUString::number(maxRow+1) + "; C8)", + formula::FormulaGrammar::GRAM_NATIVE_UI); + CPPUNIT_ASSERT_EQUAL( double(1), m_pDoc->GetValue(ScAddress(10, 0, 0))); + + m_pDoc->DeleteTab(0); +} + // Test SUBTOTAL with reference lists in array context. void TestFormula::testFuncRefListArraySUBTOTAL() { diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index c6bcb29b28bb..0b3592976a71 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -5844,6 +5844,7 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf // matrix formula. vConditions.clear(); + // Range-reduce optimization SCCOL nStartColDiff = 0; SCCOL nEndColDiff = 0; SCROW nStartRowDiff = 0; @@ -5851,43 +5852,39 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf bool bRangeReduce = false; ScRange aMainRange; - // Range-reduce optimization - if (nParamCount % 2) // Not COUNTIFS + bool bHasDoubleRefCriteriaRanges = true; + // Do not attempt main-range reduce if any of the criteria-ranges are not double-refs. + // For COUNTIFS queries it's possible to range-reduce too, if the query is not supposed + // to match empty cells (will be checked and undone later if needed), so simply treat + // the first criteria range as the main range for purposes of detecting if this can be done. + for (sal_uInt16 nParamIdx = 2; nParamIdx < nParamCount; nParamIdx += 2 ) { - bool bHasDoubleRefCriteriaRanges = true; - // Do not attempt main-range reduce if any of the criteria-ranges are not double-refs. - for (sal_uInt16 nParamIdx = 2; nParamIdx < nParamCount; nParamIdx += 2 ) + const formula::FormulaToken* pCriteriaRangeToken = pStack[ sp-nParamIdx ]; + if (pCriteriaRangeToken->GetType() != svDoubleRef ) { - const formula::FormulaToken* pCriteriaRangeToken = pStack[ sp-nParamIdx ]; - if (pCriteriaRangeToken->GetType() != svDoubleRef ) - { - bHasDoubleRefCriteriaRanges = false; - break; - } + bHasDoubleRefCriteriaRanges = false; + break; } + } - // Probe the main range token, and try if we can shrink the range without altering results. - const formula::FormulaToken* pMainRangeToken = pStack[ sp-nParamCount ]; - if (pMainRangeToken->GetType() == svDoubleRef && bHasDoubleRefCriteriaRanges) + // Probe the main range token, and try if we can shrink the range without altering results. + const formula::FormulaToken* pMainRangeToken = pStack[ sp-nParamCount ]; + if (pMainRangeToken->GetType() == svDoubleRef && bHasDoubleRefCriteriaRanges) + { + const ScComplexRefData* pRefData = pMainRangeToken->GetDoubleRef(); + if (!pRefData->IsDeleted()) { - const ScComplexRefData* pRefData = pMainRangeToken->GetDoubleRef(); - if (!pRefData->IsDeleted()) + DoubleRefToRange( *pRefData, aMainRange); + if (aMainRange.aStart.Tab() == aMainRange.aEnd.Tab()) { - DoubleRefToRange( *pRefData, aMainRange); - - if (aMainRange.aStart.Tab() == aMainRange.aEnd.Tab()) - { - // Shrink the range to actual data content. - ScRange aSubRange = aMainRange; - mrDoc.GetDataAreaSubrange(aSubRange); - - nStartColDiff = aSubRange.aStart.Col() - aMainRange.aStart.Col(); - nStartRowDiff = aSubRange.aStart.Row() - aMainRange.aStart.Row(); - - nEndColDiff = aSubRange.aEnd.Col() - aMainRange.aEnd.Col(); - nEndRowDiff = aSubRange.aEnd.Row() - aMainRange.aEnd.Row(); - bRangeReduce = nStartColDiff || nStartRowDiff || nEndColDiff || nEndRowDiff; - } + // Shrink the range to actual data content. + ScRange aSubRange = aMainRange; + mrDoc.GetDataAreaSubrange(aSubRange); + nStartColDiff = aSubRange.aStart.Col() - aMainRange.aStart.Col(); + nStartRowDiff = aSubRange.aStart.Row() - aMainRange.aStart.Row(); + nEndColDiff = aSubRange.aEnd.Col() - aMainRange.aEnd.Col(); + nEndRowDiff = aSubRange.aEnd.Row() - aMainRange.aEnd.Row(); + bRangeReduce = nStartColDiff || nStartRowDiff || nEndColDiff || nEndRowDiff; } } } @@ -6077,6 +6074,56 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf return; } + ScQueryParam rParam; + ScQueryEntry& rEntry = rParam.GetEntry(0); + ScQueryEntry::Item& rItem = rEntry.GetQueryItem(); + rEntry.bDoQuery = true; + if (!bIsString) + { + rItem.meType = ScQueryEntry::ByValue; + rItem.mfVal = fVal; + rEntry.eOp = SC_EQUAL; + } + else + { + rParam.FillInExcelSyntax(mrDoc.GetSharedStringPool(), aString.getString(), 0, pFormatter); + if (rItem.meType == ScQueryEntry::ByString) + rParam.eSearchType = DetectSearchType(rItem.maString.getString(), mrDoc); + } + + // Undo bRangeReduce if asked to match empty cells (which should be rare). + assert(rEntry.GetQueryItems().size() == 1); + if((rEntry.IsQueryByEmpty() || rItem.mbMatchEmpty) && bRangeReduce) + { + bRangeReduce = false; + // All criteria ranges are svDoubleRef's, so only vConditions needs adjusting. + assert(vRefArrayConditions.empty()); + if(!vConditions.empty()) + { + std::vector<sal_uInt8> newConditions; + SCCOL newDimensionCols = nCol2 - nCol1 + 1; + SCROW newDimensionRows = nRow2 - nRow1 + 1; + newConditions.reserve( newDimensionCols * newDimensionRows ); + SCCOL col = nCol1; + for(; col < nCol1 + nStartColDiff; ++col) + newConditions.insert( newConditions.end(), newDimensionRows, 0 ); + for(; col <= nCol2 - nStartColDiff; ++col) + { + newConditions.insert( newConditions.end(), nStartRowDiff, 0 ); + SCCOL oldCol = col - ( nCol1 + nStartColDiff ); + auto it = vConditions.begin() + oldCol * nDimensionRows; + newConditions.insert( newConditions.end(), it, it + nDimensionRows ); + newConditions.insert( newConditions.end(), -nEndRowDiff, 0 ); + } + for(; col <= nCol2; ++col) + newConditions.insert( newConditions.end(), newDimensionRows, 0 ); + assert( newConditions.size() == o3tl::make_unsigned( newDimensionCols * newDimensionRows )); + vConditions = std::move( newConditions ); + nDimensionCols = newDimensionCols; + nDimensionRows = newDimensionRows; + } + } + if (bRangeReduce) { // All reference ranges must be of the same size as the main range. @@ -6115,25 +6162,8 @@ void ScInterpreter::IterateParametersIfs( double(*ResultFunc)( const sc::ParamIf if (vConditions.empty()) vConditions.resize( nDimensionCols * nDimensionRows, 0); - ScQueryParam rParam; - rParam.nRow1 = nRow1; - rParam.nRow2 = nRow2; - - ScQueryEntry& rEntry = rParam.GetEntry(0); - ScQueryEntry::Item& rItem = rEntry.GetQueryItem(); - rEntry.bDoQuery = true; - if (!bIsString) - { - rItem.meType = ScQueryEntry::ByValue; - rItem.mfVal = fVal; - rEntry.eOp = SC_EQUAL; - } - else - { - rParam.FillInExcelSyntax(mrDoc.GetSharedStringPool(), aString.getString(), 0, pFormatter); - if (rItem.meType == ScQueryEntry::ByString) - rParam.eSearchType = DetectSearchType(rItem.maString.getString(), mrDoc); - } + rParam.nRow1 = nRow1; + rParam.nRow2 = nRow2; rParam.nCol1 = nCol1; rParam.nCol2 = nCol2; rEntry.nField = nCol1; commit e3d3adbcde43965b517473387acda81e53e248ed Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Fri May 27 20:55:45 2022 +0200 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Sat May 28 05:44:15 2022 +0200 fix COUNTIFS when matching empty cells Change-Id: I8577324c0194d594a6df0b51431729b30ac8165c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135047 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/qa/unit/ucalc_formula.cxx b/sc/qa/unit/ucalc_formula.cxx index 9b26d2cbc4b0..14d779d2e4f3 100644 --- a/sc/qa/unit/ucalc_formula.cxx +++ b/sc/qa/unit/ucalc_formula.cxx @@ -307,6 +307,7 @@ public: void testFormulaErrorPropagation(); void testFuncRowsHidden(); void testFuncSUMIFS(); + void testFuncCOUNTIFEmpty(); void testFuncRefListArraySUBTOTAL(); void testFuncJumpMatrixArrayIF(); void testFuncJumpMatrixArrayOFFSET(); @@ -425,6 +426,7 @@ public: CPPUNIT_TEST(testFormulaErrorPropagation); CPPUNIT_TEST(testFuncRowsHidden); CPPUNIT_TEST(testFuncSUMIFS); + CPPUNIT_TEST(testFuncCOUNTIFEmpty); CPPUNIT_TEST(testFuncRefListArraySUBTOTAL); CPPUNIT_TEST(testFuncJumpMatrixArrayIF); CPPUNIT_TEST(testFuncJumpMatrixArrayOFFSET); @@ -9316,6 +9318,44 @@ void TestFormula::testFuncSUMIFS() m_pDoc->DeleteTab(0); } +// Test that COUNTIF counts properly empty cells if asked to. +void TestFormula::testFuncCOUNTIFEmpty() +{ + sc::AutoCalcSwitch aACSwitch(*m_pDoc, true); // turn auto calc on. + m_pDoc->InsertTab(0, "Test"); + + // Data in A1:A9. + std::vector<std::vector<const char*>> aData = { + { "" }, + { "a" }, + { "b" }, + { "c" }, + { "d" }, + { "a" }, + { "" }, + { "b" }, + { "c" } + }; + + insertRangeData(m_pDoc, ScAddress(0,0,0), aData); + + constexpr SCROW maxRow = 20; // so that the unittest is not slow in dbgutil builds + SCROW startRow = 0; + SCROW endRow = maxRow; + SCCOL startCol = 0; + SCCOL endCol = 0; + // ScSortedRangeCache would normally shrink data range to this. + CPPUNIT_ASSERT(m_pDoc->ShrinkToDataArea(0, startCol, startRow, endCol, endRow)); + CPPUNIT_ASSERT_EQUAL(SCROW(8), endRow); + + // But not if matching empty cells. + m_pDoc->SetFormula( ScAddress(10, 0, 0), "=COUNTIFS($A1:$A" + OUString::number(maxRow + 1) + "; \"\")", + formula::FormulaGrammar::GRAM_NATIVE_UI); + CPPUNIT_ASSERT_EQUAL( double(maxRow + 1 - 7), m_pDoc->GetValue(ScAddress(10, 0, 0))); + + m_pDoc->DeleteTab(0); +} + // Test SUBTOTAL with reference lists in array context. void TestFormula::testFuncRefListArraySUBTOTAL() { diff --git a/sc/source/core/tool/rangecache.cxx b/sc/source/core/tool/rangecache.cxx index 434ba2e6de1b..79bebcfa8fe6 100644 --- a/sc/source/core/tool/rangecache.cxx +++ b/sc/source/core/tool/rangecache.cxx @@ -73,8 +73,9 @@ ScSortedRangeCache::ScSortedRangeCache(ScDocument* pDoc, const ScRange& rRange, SCROW endRow = maRange.aEnd.Row(); SCCOL startCol = maRange.aStart.Col(); SCCOL endCol = maRange.aEnd.Col(); - if (!pDoc->ShrinkToDataArea(nTab, startCol, startRow, endCol, endRow)) - return; + if (!item.mbMatchEmpty) + if (!pDoc->ShrinkToDataArea(nTab, startCol, startRow, endCol, endRow)) + return; if (mValues == ValueType::Values) {