sc/source/filter/excel/xestyle.cxx | 6 - sc/source/filter/excel/xetable.cxx | 130 +++++++++++++++++++++++++------------ sc/source/filter/inc/xestyle.hxx | 3 sc/source/filter/inc/xetable.hxx | 23 ++++-- 4 files changed, 109 insertions(+), 53 deletions(-)
New commits: commit 05259e880db187f6698d6d545504775459d6e96a Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Wed Feb 16 19:18:59 2022 +0100 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Thu Feb 17 12:08:38 2022 +0100 optimize blank cell Excel export (tdf#133749) The code in XclExpRow::Finalize() apparently creates an array of indexes for blank cells, and then uses it to efficiently write many blank cells (or whatever it's all exactly). But especially with huge sheets all this processing is basically repeatedly checking for all the blank cells at the end of the rows. Optimize this by calculating where the all the remaining blank cells start and process those more efficiently or sometimes even just plain ignore them. Possibly this could be optimized even more if I understood the code better, but this seems to be good enough. I've also changed some of the loops to use indexing instead of iterators, as libstdc++ debug iterators are horribly slow here (which possibly may even make a difference for Jenkins builds). Change-Id: Iafe36ccd2795a85b10b98cf84c041a7427aabc29 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/130046 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/source/filter/excel/xestyle.cxx b/sc/source/filter/excel/xestyle.cxx index 291b51c6c83e..1dd6401a79eb 100644 --- a/sc/source/filter/excel/xestyle.cxx +++ b/sc/source/filter/excel/xestyle.cxx @@ -2027,12 +2027,6 @@ XclExpXFId::XclExpXFId() : { } -XclExpXFId::XclExpXFId( sal_uInt32 nXFId ) : - mnXFId( nXFId ), - mnXFIndex( EXC_XF_DEFAULTCELL ) -{ -} - void XclExpXFId::ConvertXFIndex( const XclExpRoot& rRoot ) { mnXFIndex = rRoot.GetXFBuffer().GetXFIndex( mnXFId ); diff --git a/sc/source/filter/excel/xetable.cxx b/sc/source/filter/excel/xetable.cxx index f8ec4440b74d..2164adcfacea 100644 --- a/sc/source/filter/excel/xetable.cxx +++ b/sc/source/filter/excel/xetable.cxx @@ -560,7 +560,7 @@ void XclExpCellBase::GetBlankXFIndexes( ScfUInt16Vec& /*rXFIndexes*/ ) const // default: do nothing } -void XclExpCellBase::RemoveUnusedBlankCells( const ScfUInt16Vec& /*rXFIndexes*/ ) +void XclExpCellBase::RemoveUnusedBlankCells( const ScfUInt16Vec& /*rXFIndexes*/, size_t /*nStartAllNotFound*/ ) { // default: do nothing } @@ -1277,7 +1277,7 @@ void XclExpMultiCellBase::GetXFIndexes( ScfUInt16Vec& rXFIndexes ) const } } -void XclExpMultiCellBase::RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes ) +void XclExpMultiCellBase::RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes, size_t nStartAllNotFound ) { // save last column before calling maXFIds.clear() sal_uInt16 nLastXclCol = GetLastXclCol(); @@ -1285,13 +1285,15 @@ void XclExpMultiCellBase::RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes // build new XF index vector, containing passed XF indexes maXFIds.clear(); - std::for_each(rXFIndexes.begin() + GetXclCol(), rXFIndexes.begin() + nLastXclCol + 1, - [this](const sal_uInt16& rXFIndex) { - XclExpMultiXFId aXFId( 0 ); - // AppendXFId() tests XclExpXFIndex::mnXFId, set it too - aXFId.mnXFId = aXFId.mnXFIndex = rXFIndex; - AppendXFId( aXFId ); - }); + // Process only all that possibly are not EXC_XF_NOTFOUND. + size_t nEnd = std::min<size_t>(nLastXclCol + 1, nStartAllNotFound); + for( size_t i = GetXclCol(); i < nEnd; ++i ) + { + XclExpMultiXFId aXFId( 0 ); + // AppendXFId() tests XclExpXFIndex::mnXFId, set it too + aXFId.mnXFId = aXFId.mnXFIndex = rXFIndexes[ i ]; + AppendXFId( aXFId ); + } // remove leading and trailing unused XF indexes if( !maXFIds.empty() && (maXFIds.front().mnXFIndex == EXC_XF_NOTFOUND) ) @@ -1305,6 +1307,17 @@ void XclExpMultiCellBase::RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes // The Save() function will skip all XF indexes equal to EXC_XF_NOTFOUND. } +sal_uInt16 XclExpMultiCellBase::GetStartColAllDefaultCell() const +{ + sal_uInt16 col = GetXclCol(); + for( const auto& rXFId : maXFIds ) + { + if( rXFId.mnXFIndex != EXC_XF_DEFAULTCELL ) + col += rXFId.mnCount; + } + return col; +} + XclExpBlankCell::XclExpBlankCell( const XclAddress& rXclPos, const XclExpMultiXFId& rXFId ) : XclExpMultiCellBase( EXC_ID3_BLANK, EXC_ID_MULBLANK, 0, rXclPos ) { @@ -1333,9 +1346,9 @@ void XclExpBlankCell::GetBlankXFIndexes( ScfUInt16Vec& rXFIndexes ) const GetXFIndexes( rXFIndexes ); } -void XclExpBlankCell::RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes ) +void XclExpBlankCell::RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes, size_t nStartAllNotFound ) { - RemoveUnusedXFIndexes( rXFIndexes ); + RemoveUnusedXFIndexes( rXFIndexes, nStartAllNotFound ); } void XclExpBlankCell::WriteContents( XclExpStream& /*rStrm*/, sal_uInt16 /*nRelCol*/ ) @@ -1887,6 +1900,17 @@ XclExpRow::XclExpRow( const XclExpRoot& rRoot, sal_uInt32 nXclRow, rProgress.Progress(); } +static size_t findFirstAllSameUntilEnd( const ScfUInt16Vec& rIndexes, sal_uInt16 value, + size_t searchStart = std::numeric_limits<size_t>::max()) +{ + for( size_t i = std::min(rIndexes.size(), searchStart); i >= 1; --i ) + { + if( rIndexes[ i - 1 ] != value ) + return i; + } + return 0; +} + void XclExpRow::AppendCell( XclExpCellRef const & xCell, bool bIsMergedBase ) { OSL_ENSURE( !mbAlwaysEmpty, "XclExpRow::AppendCell - row is marked to be always empty" ); @@ -1894,7 +1918,7 @@ void XclExpRow::AppendCell( XclExpCellRef const & xCell, bool bIsMergedBase ) InsertCell( xCell, maCellList.GetSize(), bIsMergedBase ); } -void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, bool bProgress ) +void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, size_t nStartColAllDefault, bool bProgress ) { size_t nPos, nSize; @@ -1939,8 +1963,8 @@ void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, bool bProgress ) // insert the cell, InsertCell() may merge it with existing BLANK records InsertCell( xNewCell, nPos, false ); // insert default XF indexes into aXFIndexes - ::std::fill( aXFIndexes.begin() + nFirstFreeXclCol, - aXFIndexes.begin() + nNextUsedXclCol, aXFId.mnXFIndex ); + for( size_t i = nFirstFreeXclCol; i < nNextUsedXclCol; ++i ) + aXFIndexes[ i ] = aXFId.mnXFIndex; // don't step forward with nPos, InsertCell() may remove records } else @@ -1950,28 +1974,41 @@ void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, bool bProgress ) // *** Find default row format *** ---------------------------------------- + // Often there will be many EXC_XF_DEFAULTCELL at the end, optimize by ignoring them. + size_t nStartSearchAllDefault = aXFIndexes.size(); + if( !maCellList.IsEmpty() && dynamic_cast< const XclExpBlankCell* >( maCellList.GetLastRecord())) + { + const XclExpBlankCell* pLastBlank = static_cast< const XclExpBlankCell* >( maCellList.GetLastRecord()); + assert(pLastBlank->GetLastXclCol() == aXFIndexes.size() - 1); + nStartSearchAllDefault = pLastBlank->GetStartColAllDefaultCell(); + } + size_t nStartAllDefault = findFirstAllSameUntilEnd( aXFIndexes, EXC_XF_DEFAULTCELL, nStartSearchAllDefault); + // find most used XF index in the row std::unordered_map< sal_uInt16, size_t > aIndexMap; sal_uInt16 nRowXFIndex = EXC_XF_DEFAULTCELL; - size_t nMaxXFCount = 0; const size_t nHalfIndexes = aXFIndexes.size() / 2; - for( const auto& rXFIndex : aXFIndexes ) + if( nStartAllDefault > nHalfIndexes ) // Otherwise most are EXC_XF_DEFAULTCELL. { - if( rXFIndex != EXC_XF_NOTFOUND ) + size_t nMaxXFCount = 0; + for( const auto& rXFIndex : aXFIndexes ) { - size_t& rnCount = aIndexMap[ rXFIndex ]; - ++rnCount; - if( rnCount > nMaxXFCount ) + if( rXFIndex != EXC_XF_NOTFOUND ) { - nRowXFIndex = rXFIndex; - nMaxXFCount = rnCount; - if (nMaxXFCount > nHalfIndexes) + size_t& rnCount = aIndexMap[ rXFIndex ]; + ++rnCount; + if( rnCount > nMaxXFCount ) { - // No other XF index can have a greater usage count, we - // don't need to loop through the remaining cells. - // Specifically for the tail of unused default - // cells/columns this makes a difference. - break; // for + nRowXFIndex = rXFIndex; + nMaxXFCount = rnCount; + if (nMaxXFCount > nHalfIndexes) + { + // No other XF index can have a greater usage count, we + // don't need to loop through the remaining cells. + // Specifically for the tail of unused default + // cells/columns this makes a difference. + break; // for + } } } } @@ -2004,17 +2041,19 @@ void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, bool bProgress ) // *** Remove unused BLANK cell records *** ------------------------------- + size_t maxStartAllDefault = std::max( nStartAllDefault, nStartColAllDefault ); if( bUseColDefXFs ) { // use column default XF indexes // #i194#: remove cell XF indexes equal to column default XF indexes - ScfUInt16Vec::const_iterator aColIt = rColXFIndexes.begin(); - for( auto& rXFIndex : aXFIndexes ) + for( size_t i = 0; i < maxStartAllDefault; ++i ) { - if( rXFIndex == *aColIt ) - rXFIndex = EXC_XF_NOTFOUND; - ++aColIt; + if( aXFIndexes[ i ] == rColXFIndexes[ i ] ) + aXFIndexes[ i ] = EXC_XF_NOTFOUND; } + // They can differ only up to maxNonDefault, in the rest they are the same. + for( size_t i = maxStartAllDefault; i < aXFIndexes.size(); ++i ) + aXFIndexes[ i ] = EXC_XF_NOTFOUND; } else { @@ -2028,11 +2067,12 @@ void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, bool bProgress ) } // remove unused parts of BLANK/MULBLANK cell records + size_t nStartAllNotFound = findFirstAllSameUntilEnd( aXFIndexes, EXC_XF_NOTFOUND, maxStartAllDefault ); nPos = 0; while( nPos < maCellList.GetSize() ) // do not cache list size, may change in the loop { XclExpCellBase* xCell = maCellList.GetRecord( nPos ); - xCell->RemoveUnusedBlankCells( aXFIndexes ); + xCell->RemoveUnusedBlankCells( aXFIndexes, nStartAllNotFound ); if( xCell->IsEmpty() ) maCellList.RemoveRecord( nPos ); else @@ -2174,26 +2214,32 @@ class RowFinalizeTask : public comphelper::ThreadTask { bool mbProgress; const ScfUInt16Vec& mrColXFIndexes; + size_t mnStartColAllDefault; std::vector< XclExpRow * > maRows; public: RowFinalizeTask( const std::shared_ptr<comphelper::ThreadTaskTag> & pTag, const ScfUInt16Vec& rColXFIndexes, + size_t nStartColAllDefault, bool bProgress ) : comphelper::ThreadTask( pTag ), mbProgress( bProgress ), - mrColXFIndexes( rColXFIndexes ) {} + mrColXFIndexes( rColXFIndexes ), + mnStartColAllDefault( nStartColAllDefault ) + {} void push_back( XclExpRow *pRow ) { maRows.push_back( pRow ); } virtual void doWork() override { for (XclExpRow* p : maRows) - p->Finalize( mrColXFIndexes, mbProgress ); + p->Finalize( mrColXFIndexes, mnStartColAllDefault, mbProgress ); } }; } -void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData, const ScfUInt16Vec& rColXFIndexes ) +void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData, + const ScfUInt16Vec& rColXFIndexes, + size_t nStartColAllDefault ) { // *** Finalize all rows *** ---------------------------------------------- @@ -2210,7 +2256,7 @@ void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData, const ScfUInt if (nThreads == 1) { for (auto& rEntry : maRowMap) - rEntry.second->Finalize( rColXFIndexes, true ); + rEntry.second->Finalize( rColXFIndexes, nStartColAllDefault, true ); } else { @@ -2218,7 +2264,7 @@ void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData, const ScfUInt std::shared_ptr<comphelper::ThreadTaskTag> pTag = comphelper::ThreadPool::createThreadTaskTag(); std::vector<std::unique_ptr<RowFinalizeTask>> aTasks(nThreads); for ( size_t i = 0; i < nThreads; i++ ) - aTasks[ i ].reset( new RowFinalizeTask( pTag, rColXFIndexes, i == 0 ) ); + aTasks[ i ].reset( new RowFinalizeTask( pTag, rColXFIndexes, nStartColAllDefault, i == 0 ) ); size_t nIdx = 0; for ( const auto& rEntry : maRowMap ) @@ -2688,13 +2734,17 @@ void XclExpCellTable::Finalize(bool bXLS) ScfUInt16Vec aColXFIndexes; maColInfoBfr.Finalize( aColXFIndexes, bXLS ); + // Usually many indexes towards the end will be EXC_XF_DEFAULTCELL, find + // the index that starts all EXC_XF_DEFAULTCELL until the end. + size_t nStartColAllDefault = findFirstAllSameUntilEnd( aColXFIndexes, EXC_XF_DEFAULTCELL ); + /* Finalize row buffer. This calculates all cell XF indexes from the XF identifiers. Then the XF index vector aColXFIndexes (filled above) is used to calculate the row default formats. With this, all unneeded blank cell records (equal to row default or column default) will be removed. The function returns the (most used) default row format in aDefRowData. */ XclExpDefaultRowData aDefRowData; - maRowBfr.Finalize( aDefRowData, aColXFIndexes ); + maRowBfr.Finalize( aDefRowData, aColXFIndexes, nStartColAllDefault ); // Initialize the DEFROWHEIGHT record. mxDefrowheight->SetDefaultData( aDefRowData ); diff --git a/sc/source/filter/inc/xestyle.hxx b/sc/source/filter/inc/xestyle.hxx index d922b45399e2..26ba9fcf8e39 100644 --- a/sc/source/filter/inc/xestyle.hxx +++ b/sc/source/filter/inc/xestyle.hxx @@ -414,7 +414,8 @@ struct XclExpXFId sal_uInt16 mnXFIndex; /// Real Excel XF index. explicit XclExpXFId(); - explicit XclExpXFId( sal_uInt32 nXFId ); + explicit XclExpXFId( sal_uInt32 nXFId ) + : mnXFId( nXFId ), mnXFIndex( EXC_XF_DEFAULTCELL ) {} /** Converts the XF identifier in mnXFId to an Excel XF index and stores it in mnXFIndex. */ void ConvertXFIndex( const XclExpRoot& rRoot ); diff --git a/sc/source/filter/inc/xetable.hxx b/sc/source/filter/inc/xetable.hxx index 1386f0de0c58..1e9e7cd45d2c 100644 --- a/sc/source/filter/inc/xetable.hxx +++ b/sc/source/filter/inc/xetable.hxx @@ -297,7 +297,7 @@ public: /** Derived classes for blank cells insert the Excel XF index(es) into the passed vector. */ virtual void GetBlankXFIndexes( ScfUInt16Vec& rXFIndexes ) const; /** Derived classes for blank cells remove unused Excel XF index(es). */ - virtual void RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes ); + virtual void RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes, size_t nStartAllNotFound ); protected: explicit XclExpCellBase( @@ -500,8 +500,13 @@ protected: void GetXFIndexes( ScfUInt16Vec& rXFIndexes ) const; /** Removes unused Excel XF index(es). - @param rXFIndexes Specifies which XF indexes are used. */ - void RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes ); + @param rXFIndexes Specifies which XF indexes are used. + @param nStartAllNotFound Index in rXFIndexes which starts EXC_XF_NOTFOUND until the end. + */ + void RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes, size_t nStartAllNotFound ); + + /** Return starting column at which all indexes until the end are EXC_XF_DEFAULTCELL .*/ + sal_uInt16 GetStartColAllDefaultCell() const; private: /** Derived classes write the remaining contents of the specified cell (without XF index). @@ -532,7 +537,9 @@ public: /** Inserts the Excel XF index(es) into the passed vector. */ virtual void GetBlankXFIndexes( ScfUInt16Vec& rXFIndexes ) const override; /** Tries to remove unused Excel XF index(es). */ - virtual void RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes ) override; + virtual void RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes, size_t nStartAllNotFound ) override; + + using XclExpMultiCellBase::GetStartColAllDefaultCell; private: /** Writes the remaining contents of the specified cell (without XF index). */ @@ -846,6 +853,7 @@ public: /** Converts all XF identifiers into the Excel XF indexes. */ void Finalize( const ScfUInt16Vec& rColXFIndexes, + size_t nStartColAllDefault, bool bUpdateProgress ); /** Returns the column index of the first used cell in this row. @@ -914,8 +922,11 @@ public: /** Converts all XF identifiers into the Excel XF indexes and calculates default formats. @param rDefRowData (out-param) The default row format is returned here. - @param rColXFIndexes The column default XF indexes. */ - void Finalize( XclExpDefaultRowData& rDefRowData, const ScfUInt16Vec& rColXFIndexes ); + @param rColXFIndexes The column default XF indexes. + @param nStartColAllDefault Index in rColXFIndexes which starts EXC_XF_DEFAULTCELL until the end. + */ + void Finalize( XclExpDefaultRowData& rDefRowData, const ScfUInt16Vec& rColXFIndexes, + size_t nStartColAllDefault ); /** Writes the DIMENSIONS record, all ROW records and all cell records. */ virtual void Save( XclExpStream& rStrm ) override;