src/docrecord.py | 39 ++++++++++++++++++++++++--------------- src/docstream.py | 20 +++++++++++++++----- 2 files changed, 39 insertions(+), 20 deletions(-)
New commits: commit f569aa25d496c7281b6cfaaa9ef739f880ee8089 Author: Miklos Vajna <[email protected]> Date: Sun May 12 21:47:40 2013 +0200 doc: handle the case when we only know if the offset end is compressed or not diff --git a/src/docstream.py b/src/docstream.py index cbacfd8..701c50b 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -674,10 +674,9 @@ class WordDocumentStream(DOCDirStream): index = i return index - def retrieveOffset(self, start, end): - """Retrieves text, defined by raw byte offsets.""" - - # Is the given offset compressed? + def __isOffsetCompressed(self, off): + """Is the given offset compressed?""" + compressed = None plcPcd = self.clx.pcdt.plcPcd for i in range(len(plcPcd.aCp)): aPcd = plcPcd.aPcd[i] @@ -686,8 +685,19 @@ class WordDocumentStream(DOCDirStream): offset = fcCompressed.fc/2 else: offset = fcCompressed.fc - if offset <= start: + if offset <= off: compressed = fcCompressed.fCompressed + return compressed + + def retrieveOffset(self, start, end): + """Retrieves text, defined by raw byte offsets.""" + + compressed = self.__isOffsetCompressed(start) + if compressed == None: + compressed = self.__isOffsetCompressed(end) + + if compressed == None: + raise Exception("should not happen") if compressed: return globals.encodeName(self.bytes[start:end]) commit b7ba0cc7b5bc0cb153ee032be54d2274864602c9 Author: Miklos Vajna <[email protected]> Date: Sun May 12 21:32:52 2013 +0200 PlcfHdd: handle multiple sections diff --git a/src/docrecord.py b/src/docrecord.py index 68ffdd2..59f1555 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -929,13 +929,8 @@ class PlcfHdd(DOCDirStream, PLC): self.pos = mainStream.fcPlcfHdd self.size = mainStream.lcbPlcfHdd - def dump(self): - print '<plcfHdd type="PlcfHdd" offset="%d" size="%d bytes">' % (self.pos, self.size) - offset = self.mainStream.ccpText + self.mainStream.ccpFtn - pos = self.pos - for i in range(self.getElements() - 1): - start = self.getuInt32(pos = pos) - end = self.getuInt32(pos = pos + 4) + def getContents(self, i): + if i <= 5: contentsMap = { 0: "Footnote separator", 1: "Footnote continuation separator", @@ -943,15 +938,29 @@ class PlcfHdd(DOCDirStream, PLC): 3: "Endnote separator", 4: "Endnote continuation separator", 5: "Endnote continuation notice", - - 6: "Even page header", - 7: "Odd page header", - 8: "Even page footer", - 9: "Odd page footer", - 10: "First page header", - 11: "First page footer", } - print '<aCP index="%d" contents="%s" start="%d" end="%d">' % (i, contentsMap[i], start, end) + return contentsMap[i] + else: + contentsMap = { + 0: "Even page header", + 1: "Odd page header", + 2: "Even page footer", + 3: "Odd page footer", + 4: "First page header", + 5: "First page footer", + } + sectionIndex = i / 6 + contentsIndex = i % 6 + return "%s (section #%s)" % (contentsMap[contentsIndex], sectionIndex) + + def dump(self): + print '<plcfHdd type="PlcfHdd" offset="%d" size="%d bytes">' % (self.pos, self.size) + offset = self.mainStream.ccpText + self.mainStream.ccpFtn + pos = self.pos + for i in range(self.getElements() - 1): + start = self.getuInt32(pos = pos) + end = self.getuInt32(pos = pos + 4) + print '<aCP index="%d" contents="%s" start="%d" end="%d">' % (i, self.getContents(i), start, end) print '<transformed value="%s"/>' % self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end)) pos += 4 print '</aCP>' _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
