On Fri, 2012-05-11 at 16:32 +0100, Caolán McNamara wrote: > > So - how about the attached diff, hopefully rather easier to review & > > back-port :-) > > Seems sane, though it probably makes sense to tweak the trigger for > calling buildPageChainCache as the units for nNew there are in bytes > while the define is in "pages". And we might want to avoid calling it in > the case of a small relative jump in pages forward.
So I pushed this to master; I attach a speculative back-port to 3.5 - it gives the rest of the rather substantial speedup we need vs. where we were; I'd like to get it in. I also tested the 'get-position-of-byte-after-the-end' code path which seems to work nicely too. Thanks, Michael. -- michael.me...@suse.com <><, Pseudo Engineer, itinerant idiot
>From 3cbfc50821ce4ec44a2c27c097cc88f64f47e2c2 Mon Sep 17 00:00:00 2001 From: Michael Meeks <michael.me...@suse.com> Date: Fri, 11 May 2012 20:19:21 +0100 Subject: [PATCH] sot: Improve OLE2 offset-to-page computation with a FAT layout vector --- sot/source/sdstor/stgstrms.cxx | 85 ++++++++++++++++++++++++++++++++++------ sot/source/sdstor/stgstrms.hxx | 3 + 2 files changed, 76 insertions(+), 12 deletions(-) diff --git a/sot/source/sdstor/stgstrms.cxx b/sot/source/sdstor/stgstrms.cxx index 5e6c1ea..8d3d758 100644 --- a/sot/source/sdstor/stgstrms.cxx +++ b/sot/source/sdstor/stgstrms.cxx @@ -26,12 +26,12 @@ * ************************************************************************/ +#include <algorithm> #include <string.h> // memcpy() - +#include <sal/log.hxx> #include <osl/file.hxx> #include <tools/tempfile.hxx> -#include <tools/debug.hxx> #include "sot/stg.hxx" #include "stgelem.hxx" @@ -334,6 +334,37 @@ void StgStrm::SetEntry( StgDirEntry& r ) r.SetDirty(); } +/* + * The page chain, is basically a singly linked list of slots each + * point to the next page. Instead of traversing the file structure + * for this each time build a simple flat in-memory vector list + * of pages. + */ +bool StgStrm::buildPageChainCache() +{ + if (nSize > 0) + m_aPagesCache.reserve(nSize/nPageSize); + + sal_Int32 nBgn = nStart; + while (nBgn >= 0) + { + m_aPagesCache.push_back(nBgn); + sal_Int32 nOldBgn = nBgn; + nBgn = pFat->GetNextPage(nBgn); + if (nBgn == nOldBgn) + return false; + } + + return true; +} + +//See fdo#47644 for a .doc with a vast amount of pages where seeking around the +//document takes a colossal amount of time +// +//There's a cost to building a page cache, so only build one if the number of +//pages to seek through hits some sufficiently high value where it's worth it. +#define ARBITRARY_LARGE_AMOUNT_OF_PAGES 8 * 512 + // Compute page number and offset for the given byte position. // If the position is behind the size, set the stream right // behind the EOF. @@ -353,9 +384,35 @@ sal_Bool StgStrm::Pos2Page( sal_Int32 nBytePos ) nPos = nBytePos; if( nOld == nNew ) return sal_True; + + if (m_aPagesCache.empty() && nNew > ARBITRARY_LARGE_AMOUNT_OF_PAGES) + { + SAL_WARN("sot", "kicking off large seek helper\n"); + buildPageChainCache(); + } + + if (!m_aPagesCache.empty()) + { + size_t nIdx = nNew / nPageSize; + + // special case: seek to 1st byte of new, unallocated page + // (in case the file size is a multiple of the page size) + if( nBytePos == nSize && !nOffset && nIdx == m_aPagesCache.size() ) + { + nIdx--; + nOffset = nPageSize; + } + + if (nIdx < m_aPagesCache.size()) + { + nPage = m_aPagesCache[ nIdx ]; + return sal_Bool( nPage >= 0 ); + } + } + if( nNew > nOld ) { - // the new position is behind the current, so an incremental + // the new position is after the current, so an incremental // positioning is OK. Set the page relative position nRel = nNew - nOld; nBgn = nPage; @@ -404,6 +461,8 @@ StgPage* StgStrm::GetPhysPage( sal_Int32 nBytePos, sal_Bool bForce ) sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes ) { + m_aPagesCache.clear(); + sal_Int32 nTo = nStart; sal_Int32 nPgs = ( nBytes + nPageSize - 1 ) / nPageSize; while( nPgs-- ) @@ -430,6 +489,8 @@ sal_Bool StgStrm::Copy( sal_Int32 nFrom, sal_Int32 nBytes ) sal_Bool StgStrm::SetSize( sal_Int32 nBytes ) { + m_aPagesCache.clear(); + // round up to page size sal_Int32 nOld = ( ( nSize + nPageSize - 1 ) / nPageSize ) * nPageSize; sal_Int32 nNew = ( ( nBytes + nPageSize - 1 ) / nPageSize ) * nPageSize; @@ -528,6 +589,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA { if( bMake ) { + m_aPagesCache.clear(); + // create a new master page nFAT = nMaxPage++; pMaster = rIo.Copy( nFAT, STG_FREE ); @@ -582,6 +645,8 @@ sal_Int32 StgFATStrm::GetPage( short nOff, sal_Bool bMake, sal_uInt16 *pnMasterA sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage ) { + m_aPagesCache.clear(); + sal_Bool bRes = sal_True; if( nOff < rIo.aHdr.GetFAT1Size() ) rIo.aHdr.SetFATPage( nOff, nNewPage ); @@ -631,6 +696,8 @@ sal_Bool StgFATStrm::SetPage( short nOff, sal_Int32 nNewPage ) sal_Bool StgFATStrm::SetSize( sal_Int32 nBytes ) { + m_aPagesCache.clear(); + // Set the number of entries to a multiple of the page size short nOld = (short) ( ( nSize + ( nPageSize - 1 ) ) / nPageSize ); short nNew = (short) ( @@ -747,16 +814,10 @@ void StgDataStrm::Init( sal_Int32 nBgn, sal_Int32 nLen ) { // determine the actual size of the stream by scanning // the FAT chain and counting the # of pages allocated - nSize = 0; - sal_Int32 nOldBgn = -1; - while( nBgn >= 0 && nBgn != nOldBgn ) - { - nOldBgn = nBgn; - nBgn = pFat->GetNextPage( nBgn ); - if( nBgn == nOldBgn ) + bool bOk = buildPageChainCache(); + if (!bOk) rIo.SetError( ERRCODE_IO_WRONGFORMAT ); - nSize += nPageSize; - } + nSize = m_aPagesCache.size() * nPageSize; } } diff --git a/sot/source/sdstor/stgstrms.hxx b/sot/source/sdstor/stgstrms.hxx index a6a0ad1..6b1b8c3 100644 --- a/sot/source/sdstor/stgstrms.hxx +++ b/sot/source/sdstor/stgstrms.hxx @@ -30,6 +30,7 @@ #define _STGSTRMS_HXX #include <tools/stream.hxx> +#include <vector> class StgIo; class StgStrm; @@ -77,6 +78,8 @@ protected: sal_Int32 nPage; // current logical page short nOffset; // offset into current page short nPageSize; // logical page size + std::vector<sal_Int32> m_aPagesCache; + bool buildPageChainCache(); sal_Bool Copy( sal_Int32 nFrom, sal_Int32 nBytes ); StgStrm( StgIo& ); public: -- 1.7.7
_______________________________________________ LibreOffice mailing list LibreOffice@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice