sc/source/filter/excel/xestyle.cxx |    6 -
 sc/source/filter/excel/xetable.cxx |  130 +++++++++++++++++++++++++------------
 sc/source/filter/inc/xestyle.hxx   |    3 
 sc/source/filter/inc/xetable.hxx   |   23 ++++--
 4 files changed, 109 insertions(+), 53 deletions(-)

New commits:
commit 05259e880db187f6698d6d545504775459d6e96a
Author:     Luboš Luňák <l.lu...@collabora.com>
AuthorDate: Wed Feb 16 19:18:59 2022 +0100
Commit:     Luboš Luňák <l.lu...@collabora.com>
CommitDate: Thu Feb 17 12:08:38 2022 +0100

    optimize blank cell Excel export (tdf#133749)
    
    The code in XclExpRow::Finalize() apparently creates an array
    of indexes for blank cells, and then uses it
    to efficiently write many blank cells (or whatever it's all exactly).
    But especially with huge sheets all this processing is basically
    repeatedly checking for all the blank cells at the end of the rows.
    
    Optimize this by calculating where the all the remaining blank cells
    start and process those more efficiently or sometimes even just
    plain ignore them. Possibly this could be optimized even more if
    I understood the code better, but this seems to be good enough.
    
    I've also changed some of the loops to use indexing instead of
    iterators, as libstdc++ debug iterators are horribly slow here
    (which possibly may even make a difference for Jenkins builds).
    
    Change-Id: Iafe36ccd2795a85b10b98cf84c041a7427aabc29
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/130046
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lu...@collabora.com>

diff --git a/sc/source/filter/excel/xestyle.cxx 
b/sc/source/filter/excel/xestyle.cxx
index 291b51c6c83e..1dd6401a79eb 100644
--- a/sc/source/filter/excel/xestyle.cxx
+++ b/sc/source/filter/excel/xestyle.cxx
@@ -2027,12 +2027,6 @@ XclExpXFId::XclExpXFId() :
 {
 }
 
-XclExpXFId::XclExpXFId( sal_uInt32 nXFId ) :
-    mnXFId( nXFId ),
-    mnXFIndex( EXC_XF_DEFAULTCELL )
-{
-}
-
 void XclExpXFId::ConvertXFIndex( const XclExpRoot& rRoot )
 {
     mnXFIndex = rRoot.GetXFBuffer().GetXFIndex( mnXFId );
diff --git a/sc/source/filter/excel/xetable.cxx 
b/sc/source/filter/excel/xetable.cxx
index f8ec4440b74d..2164adcfacea 100644
--- a/sc/source/filter/excel/xetable.cxx
+++ b/sc/source/filter/excel/xetable.cxx
@@ -560,7 +560,7 @@ void XclExpCellBase::GetBlankXFIndexes( ScfUInt16Vec& 
/*rXFIndexes*/ ) const
     // default: do nothing
 }
 
-void XclExpCellBase::RemoveUnusedBlankCells( const ScfUInt16Vec& 
/*rXFIndexes*/ )
+void XclExpCellBase::RemoveUnusedBlankCells( const ScfUInt16Vec& 
/*rXFIndexes*/, size_t /*nStartAllNotFound*/ )
 {
     // default: do nothing
 }
@@ -1277,7 +1277,7 @@ void XclExpMultiCellBase::GetXFIndexes( ScfUInt16Vec& 
rXFIndexes ) const
     }
 }
 
-void XclExpMultiCellBase::RemoveUnusedXFIndexes( const ScfUInt16Vec& 
rXFIndexes )
+void XclExpMultiCellBase::RemoveUnusedXFIndexes( const ScfUInt16Vec& 
rXFIndexes, size_t nStartAllNotFound )
 {
     // save last column before calling maXFIds.clear()
     sal_uInt16 nLastXclCol = GetLastXclCol();
@@ -1285,13 +1285,15 @@ void XclExpMultiCellBase::RemoveUnusedXFIndexes( const 
ScfUInt16Vec& rXFIndexes
 
     // build new XF index vector, containing passed XF indexes
     maXFIds.clear();
-    std::for_each(rXFIndexes.begin() + GetXclCol(), rXFIndexes.begin() + 
nLastXclCol + 1,
-        [this](const sal_uInt16& rXFIndex) {
-            XclExpMultiXFId aXFId( 0 );
-            // AppendXFId() tests XclExpXFIndex::mnXFId, set it too
-            aXFId.mnXFId = aXFId.mnXFIndex = rXFIndex;
-            AppendXFId( aXFId );
-        });
+    // Process only all that possibly are not EXC_XF_NOTFOUND.
+    size_t nEnd = std::min<size_t>(nLastXclCol + 1, nStartAllNotFound);
+    for( size_t i = GetXclCol(); i < nEnd; ++i )
+    {
+        XclExpMultiXFId aXFId( 0 );
+        // AppendXFId() tests XclExpXFIndex::mnXFId, set it too
+        aXFId.mnXFId = aXFId.mnXFIndex = rXFIndexes[ i ];
+        AppendXFId( aXFId );
+    }
 
     // remove leading and trailing unused XF indexes
     if( !maXFIds.empty() && (maXFIds.front().mnXFIndex == EXC_XF_NOTFOUND) )
@@ -1305,6 +1307,17 @@ void XclExpMultiCellBase::RemoveUnusedXFIndexes( const 
ScfUInt16Vec& rXFIndexes
     // The Save() function will skip all XF indexes equal to EXC_XF_NOTFOUND.
 }
 
+sal_uInt16 XclExpMultiCellBase::GetStartColAllDefaultCell() const
+{
+    sal_uInt16 col = GetXclCol();
+    for( const auto& rXFId : maXFIds )
+    {
+        if( rXFId.mnXFIndex != EXC_XF_DEFAULTCELL )
+            col += rXFId.mnCount;
+    }
+    return col;
+}
+
 XclExpBlankCell::XclExpBlankCell( const XclAddress& rXclPos, const 
XclExpMultiXFId& rXFId ) :
     XclExpMultiCellBase( EXC_ID3_BLANK, EXC_ID_MULBLANK, 0, rXclPos )
 {
@@ -1333,9 +1346,9 @@ void XclExpBlankCell::GetBlankXFIndexes( ScfUInt16Vec& 
rXFIndexes ) const
     GetXFIndexes( rXFIndexes );
 }
 
-void XclExpBlankCell::RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes )
+void XclExpBlankCell::RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes, 
size_t nStartAllNotFound )
 {
-    RemoveUnusedXFIndexes( rXFIndexes );
+    RemoveUnusedXFIndexes( rXFIndexes, nStartAllNotFound );
 }
 
 void XclExpBlankCell::WriteContents( XclExpStream& /*rStrm*/, sal_uInt16 
/*nRelCol*/ )
@@ -1887,6 +1900,17 @@ XclExpRow::XclExpRow( const XclExpRoot& rRoot, 
sal_uInt32 nXclRow,
     rProgress.Progress();
 }
 
+static size_t findFirstAllSameUntilEnd( const ScfUInt16Vec& rIndexes, 
sal_uInt16 value,
+    size_t searchStart = std::numeric_limits<size_t>::max())
+{
+    for( size_t i = std::min(rIndexes.size(), searchStart); i >= 1; --i )
+    {
+        if( rIndexes[ i - 1 ] != value )
+            return i;
+    }
+    return 0;
+}
+
 void XclExpRow::AppendCell( XclExpCellRef const & xCell, bool bIsMergedBase )
 {
     OSL_ENSURE( !mbAlwaysEmpty, "XclExpRow::AppendCell - row is marked to be 
always empty" );
@@ -1894,7 +1918,7 @@ void XclExpRow::AppendCell( XclExpCellRef const & xCell, 
bool bIsMergedBase )
     InsertCell( xCell, maCellList.GetSize(), bIsMergedBase );
 }
 
-void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, bool bProgress )
+void XclExpRow::Finalize( const ScfUInt16Vec& rColXFIndexes, size_t 
nStartColAllDefault, bool bProgress )
 {
     size_t nPos, nSize;
 
@@ -1939,8 +1963,8 @@ void XclExpRow::Finalize( const ScfUInt16Vec& 
rColXFIndexes, bool bProgress )
                 // insert the cell, InsertCell() may merge it with existing 
BLANK records
                 InsertCell( xNewCell, nPos, false );
                 // insert default XF indexes into aXFIndexes
-                ::std::fill( aXFIndexes.begin() + nFirstFreeXclCol,
-                    aXFIndexes.begin() + nNextUsedXclCol, aXFId.mnXFIndex );
+                for( size_t i = nFirstFreeXclCol; i < nNextUsedXclCol; ++i )
+                    aXFIndexes[ i ] = aXFId.mnXFIndex;
                 // don't step forward with nPos, InsertCell() may remove 
records
             }
             else
@@ -1950,28 +1974,41 @@ void XclExpRow::Finalize( const ScfUInt16Vec& 
rColXFIndexes, bool bProgress )
 
     // *** Find default row format *** ----------------------------------------
 
+    // Often there will be many EXC_XF_DEFAULTCELL at the end, optimize by 
ignoring them.
+    size_t nStartSearchAllDefault = aXFIndexes.size();
+    if( !maCellList.IsEmpty() && dynamic_cast< const XclExpBlankCell* >( 
maCellList.GetLastRecord()))
+    {
+        const XclExpBlankCell* pLastBlank = static_cast< const 
XclExpBlankCell* >( maCellList.GetLastRecord());
+        assert(pLastBlank->GetLastXclCol() == aXFIndexes.size() - 1);
+        nStartSearchAllDefault = pLastBlank->GetStartColAllDefaultCell();
+    }
+    size_t nStartAllDefault = findFirstAllSameUntilEnd( aXFIndexes, 
EXC_XF_DEFAULTCELL, nStartSearchAllDefault);
+
     // find most used XF index in the row
     std::unordered_map< sal_uInt16, size_t > aIndexMap;
     sal_uInt16 nRowXFIndex = EXC_XF_DEFAULTCELL;
-    size_t nMaxXFCount = 0;
     const size_t nHalfIndexes = aXFIndexes.size() / 2;
-    for( const auto& rXFIndex : aXFIndexes )
+    if( nStartAllDefault > nHalfIndexes ) // Otherwise most are 
EXC_XF_DEFAULTCELL.
     {
-        if( rXFIndex != EXC_XF_NOTFOUND )
+        size_t nMaxXFCount = 0;
+        for( const auto& rXFIndex : aXFIndexes )
         {
-            size_t& rnCount = aIndexMap[ rXFIndex ];
-            ++rnCount;
-            if( rnCount > nMaxXFCount )
+            if( rXFIndex != EXC_XF_NOTFOUND )
             {
-                nRowXFIndex = rXFIndex;
-                nMaxXFCount = rnCount;
-                if (nMaxXFCount > nHalfIndexes)
+                size_t& rnCount = aIndexMap[ rXFIndex ];
+                ++rnCount;
+                if( rnCount > nMaxXFCount )
                 {
-                    // No other XF index can have a greater usage count, we
-                    // don't need to loop through the remaining cells.
-                    // Specifically for the tail of unused default
-                    // cells/columns this makes a difference.
-                    break;  // for
+                    nRowXFIndex = rXFIndex;
+                    nMaxXFCount = rnCount;
+                    if (nMaxXFCount > nHalfIndexes)
+                    {
+                        // No other XF index can have a greater usage count, we
+                        // don't need to loop through the remaining cells.
+                        // Specifically for the tail of unused default
+                        // cells/columns this makes a difference.
+                        break;  // for
+                    }
                 }
             }
         }
@@ -2004,17 +2041,19 @@ void XclExpRow::Finalize( const ScfUInt16Vec& 
rColXFIndexes, bool bProgress )
 
     // *** Remove unused BLANK cell records *** -------------------------------
 
+    size_t maxStartAllDefault = std::max( nStartAllDefault, 
nStartColAllDefault );
     if( bUseColDefXFs )
     {
         // use column default XF indexes
         // #i194#: remove cell XF indexes equal to column default XF indexes
-        ScfUInt16Vec::const_iterator aColIt = rColXFIndexes.begin();
-        for( auto& rXFIndex : aXFIndexes )
+        for( size_t i = 0; i < maxStartAllDefault; ++i )
         {
-            if( rXFIndex == *aColIt )
-                rXFIndex = EXC_XF_NOTFOUND;
-            ++aColIt;
+            if( aXFIndexes[ i ] == rColXFIndexes[ i ] )
+                aXFIndexes[ i ] = EXC_XF_NOTFOUND;
         }
+        // They can differ only up to maxNonDefault, in the rest they are the 
same.
+        for( size_t i = maxStartAllDefault; i < aXFIndexes.size(); ++i )
+            aXFIndexes[ i ] = EXC_XF_NOTFOUND;
     }
     else
     {
@@ -2028,11 +2067,12 @@ void XclExpRow::Finalize( const ScfUInt16Vec& 
rColXFIndexes, bool bProgress )
     }
 
     // remove unused parts of BLANK/MULBLANK cell records
+    size_t nStartAllNotFound = findFirstAllSameUntilEnd( aXFIndexes, 
EXC_XF_NOTFOUND, maxStartAllDefault );
     nPos = 0;
     while( nPos < maCellList.GetSize() )   // do not cache list size, may 
change in the loop
     {
         XclExpCellBase* xCell = maCellList.GetRecord( nPos );
-        xCell->RemoveUnusedBlankCells( aXFIndexes );
+        xCell->RemoveUnusedBlankCells( aXFIndexes, nStartAllNotFound );
         if( xCell->IsEmpty() )
             maCellList.RemoveRecord( nPos );
         else
@@ -2174,26 +2214,32 @@ class RowFinalizeTask : public comphelper::ThreadTask
 {
     bool mbProgress;
     const ScfUInt16Vec& mrColXFIndexes;
+    size_t mnStartColAllDefault;
     std::vector< XclExpRow * > maRows;
 public:
              RowFinalizeTask( const std::shared_ptr<comphelper::ThreadTaskTag> 
& pTag,
                               const ScfUInt16Vec& rColXFIndexes,
+                              size_t nStartColAllDefault,
                               bool bProgress ) :
                  comphelper::ThreadTask( pTag ),
                  mbProgress( bProgress ),
-                 mrColXFIndexes( rColXFIndexes ) {}
+                 mrColXFIndexes( rColXFIndexes ),
+                 mnStartColAllDefault( nStartColAllDefault )
+                 {}
 
     void     push_back( XclExpRow *pRow ) { maRows.push_back( pRow ); }
     virtual void doWork() override
     {
         for (XclExpRow* p : maRows)
-            p->Finalize( mrColXFIndexes, mbProgress );
+            p->Finalize( mrColXFIndexes, mnStartColAllDefault, mbProgress );
     }
 };
 
 }
 
-void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData, const 
ScfUInt16Vec& rColXFIndexes )
+void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData,
+                                const ScfUInt16Vec& rColXFIndexes,
+                                size_t nStartColAllDefault )
 {
     // *** Finalize all rows *** ----------------------------------------------
 
@@ -2210,7 +2256,7 @@ void XclExpRowBuffer::Finalize( XclExpDefaultRowData& 
rDefRowData, const ScfUInt
     if (nThreads == 1)
     {
         for (auto& rEntry : maRowMap)
-            rEntry.second->Finalize( rColXFIndexes, true );
+            rEntry.second->Finalize( rColXFIndexes, nStartColAllDefault, true 
);
     }
     else
     {
@@ -2218,7 +2264,7 @@ void XclExpRowBuffer::Finalize( XclExpDefaultRowData& 
rDefRowData, const ScfUInt
         std::shared_ptr<comphelper::ThreadTaskTag> pTag = 
comphelper::ThreadPool::createThreadTaskTag();
         std::vector<std::unique_ptr<RowFinalizeTask>> aTasks(nThreads);
         for ( size_t i = 0; i < nThreads; i++ )
-            aTasks[ i ].reset( new RowFinalizeTask( pTag, rColXFIndexes, i == 
0 ) );
+            aTasks[ i ].reset( new RowFinalizeTask( pTag, rColXFIndexes, 
nStartColAllDefault, i == 0 ) );
 
         size_t nIdx = 0;
         for ( const auto& rEntry : maRowMap )
@@ -2688,13 +2734,17 @@ void XclExpCellTable::Finalize(bool bXLS)
     ScfUInt16Vec aColXFIndexes;
     maColInfoBfr.Finalize( aColXFIndexes, bXLS );
 
+    // Usually many indexes towards the end will be EXC_XF_DEFAULTCELL, find
+    // the index that starts all EXC_XF_DEFAULTCELL until the end.
+    size_t nStartColAllDefault = findFirstAllSameUntilEnd( aColXFIndexes, 
EXC_XF_DEFAULTCELL );
+
     /*  Finalize row buffer. This calculates all cell XF indexes from the XF
         identifiers. Then the XF index vector aColXFIndexes (filled above) is
         used to calculate the row default formats. With this, all unneeded 
blank
         cell records (equal to row default or column default) will be removed.
         The function returns the (most used) default row format in 
aDefRowData. */
     XclExpDefaultRowData aDefRowData;
-    maRowBfr.Finalize( aDefRowData, aColXFIndexes );
+    maRowBfr.Finalize( aDefRowData, aColXFIndexes, nStartColAllDefault );
 
     // Initialize the DEFROWHEIGHT record.
     mxDefrowheight->SetDefaultData( aDefRowData );
diff --git a/sc/source/filter/inc/xestyle.hxx b/sc/source/filter/inc/xestyle.hxx
index d922b45399e2..26ba9fcf8e39 100644
--- a/sc/source/filter/inc/xestyle.hxx
+++ b/sc/source/filter/inc/xestyle.hxx
@@ -414,7 +414,8 @@ struct XclExpXFId
     sal_uInt16          mnXFIndex;      /// Real Excel XF index.
 
     explicit            XclExpXFId();
-    explicit            XclExpXFId( sal_uInt32 nXFId );
+    explicit            XclExpXFId( sal_uInt32 nXFId )
+        : mnXFId( nXFId ), mnXFIndex( EXC_XF_DEFAULTCELL ) {}
 
     /** Converts the XF identifier in mnXFId to an Excel XF index and stores 
it in mnXFIndex. */
     void                ConvertXFIndex( const XclExpRoot& rRoot );
diff --git a/sc/source/filter/inc/xetable.hxx b/sc/source/filter/inc/xetable.hxx
index 1386f0de0c58..1e9e7cd45d2c 100644
--- a/sc/source/filter/inc/xetable.hxx
+++ b/sc/source/filter/inc/xetable.hxx
@@ -297,7 +297,7 @@ public:
     /** Derived classes for blank cells insert the Excel XF index(es) into the 
passed vector. */
     virtual void        GetBlankXFIndexes( ScfUInt16Vec& rXFIndexes ) const;
     /** Derived classes for blank cells remove unused Excel XF index(es). */
-    virtual void        RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes 
);
+    virtual void        RemoveUnusedBlankCells( const ScfUInt16Vec& 
rXFIndexes, size_t nStartAllNotFound );
 
 protected:
     explicit            XclExpCellBase(
@@ -500,8 +500,13 @@ protected:
     void                GetXFIndexes( ScfUInt16Vec& rXFIndexes ) const;
 
     /** Removes unused Excel XF index(es).
-        @param rXFIndexes  Specifies which XF indexes are used. */
-    void                RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes 
);
+        @param rXFIndexes  Specifies which XF indexes are used.
+        @param nStartAllNotFound Index in rXFIndexes which starts 
EXC_XF_NOTFOUND until the end.
+    */
+    void                RemoveUnusedXFIndexes( const ScfUInt16Vec& rXFIndexes, 
size_t nStartAllNotFound );
+
+    /** Return starting column at which all indexes until the end are 
EXC_XF_DEFAULTCELL .*/
+    sal_uInt16          GetStartColAllDefaultCell() const;
 
 private:
     /** Derived classes write the remaining contents of the specified cell 
(without XF index).
@@ -532,7 +537,9 @@ public:
     /** Inserts the Excel XF index(es) into the passed vector. */
     virtual void        GetBlankXFIndexes( ScfUInt16Vec& rXFIndexes ) const 
override;
     /** Tries to remove unused Excel XF index(es). */
-    virtual void        RemoveUnusedBlankCells( const ScfUInt16Vec& rXFIndexes 
) override;
+    virtual void        RemoveUnusedBlankCells( const ScfUInt16Vec& 
rXFIndexes, size_t nStartAllNotFound ) override;
+
+    using               XclExpMultiCellBase::GetStartColAllDefaultCell;
 
 private:
     /** Writes the remaining contents of the specified cell (without XF 
index). */
@@ -846,6 +853,7 @@ public:
 
     /** Converts all XF identifiers into the Excel XF indexes. */
     void                Finalize( const ScfUInt16Vec& rColXFIndexes,
+                                  size_t nStartColAllDefault,
                                   bool bUpdateProgress );
 
     /** Returns the column index of the first used cell in this row.
@@ -914,8 +922,11 @@ public:
 
     /** Converts all XF identifiers into the Excel XF indexes and calculates 
default formats.
         @param rDefRowData  (out-param) The default row format is returned 
here.
-        @param rColXFIndexes  The column default XF indexes. */
-    void                Finalize( XclExpDefaultRowData& rDefRowData, const 
ScfUInt16Vec& rColXFIndexes );
+        @param rColXFIndexes  The column default XF indexes.
+        @param nStartColAllDefault Index in rColXFIndexes which starts 
EXC_XF_DEFAULTCELL until the end.
+    */
+    void                Finalize( XclExpDefaultRowData& rDefRowData, const 
ScfUInt16Vec& rColXFIndexes,
+                                  size_t nStartColAllDefault );
 
     /** Writes the DIMENSIONS record, all ROW records and all cell records. */
     virtual void        Save( XclExpStream& rStrm ) override;

Reply via email to