docstream.py

Miklos Vajna Sun, 12 May 2013 12:48:39 -0700

 src/docrecord.py |   39 ++++++++++++++++++++++++---------------
 src/docstream.py |   20 +++++++++++++++-----
 2 files changed, 39 insertions(+), 20 deletions(-)


New commits:
commit f569aa25d496c7281b6cfaaa9ef739f880ee8089
Author: Miklos Vajna <[email protected]>
Date:   Sun May 12 21:47:40 2013 +0200

    doc: handle the case when we only know if the offset end is compressed or 
not

diff --git a/src/docstream.py b/src/docstream.py
index cbacfd8..701c50b 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -674,10 +674,9 @@ class WordDocumentStream(DOCDirStream):
                 index = i
         return index
 
-    def retrieveOffset(self, start, end):
-        """Retrieves text, defined by raw byte offsets."""
-
-        # Is the given offset compressed?
+    def __isOffsetCompressed(self, off):
+        """Is the given offset compressed?"""
+        compressed = None
         plcPcd = self.clx.pcdt.plcPcd
         for i in range(len(plcPcd.aCp)):
             aPcd = plcPcd.aPcd[i]
@@ -686,8 +685,19 @@ class WordDocumentStream(DOCDirStream):
                 offset = fcCompressed.fc/2
             else:
                 offset = fcCompressed.fc
-            if offset <= start:
+            if offset <= off:
                 compressed = fcCompressed.fCompressed
+        return compressed
+
+    def retrieveOffset(self, start, end):
+        """Retrieves text, defined by raw byte offsets."""
+
+        compressed = self.__isOffsetCompressed(start)
+        if compressed == None:
+            compressed = self.__isOffsetCompressed(end)
+
+        if compressed == None:
+            raise Exception("should not happen")
 
         if compressed:
             return globals.encodeName(self.bytes[start:end])
commit b7ba0cc7b5bc0cb153ee032be54d2274864602c9
Author: Miklos Vajna <[email protected]>
Date:   Sun May 12 21:32:52 2013 +0200

    PlcfHdd: handle multiple sections

diff --git a/src/docrecord.py b/src/docrecord.py
index 68ffdd2..59f1555 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -929,13 +929,8 @@ class PlcfHdd(DOCDirStream, PLC):
         self.pos = mainStream.fcPlcfHdd
         self.size = mainStream.lcbPlcfHdd
 
-    def dump(self):
-        print '<plcfHdd type="PlcfHdd" offset="%d" size="%d bytes">' % 
(self.pos, self.size)
-        offset = self.mainStream.ccpText + self.mainStream.ccpFtn
-        pos = self.pos
-        for i in range(self.getElements() - 1):
-            start = self.getuInt32(pos = pos)
-            end = self.getuInt32(pos = pos + 4)
+    def getContents(self, i):
+        if i <= 5:
             contentsMap = {
                     0: "Footnote separator",
                     1: "Footnote continuation separator",
@@ -943,15 +938,29 @@ class PlcfHdd(DOCDirStream, PLC):
                     3: "Endnote separator",
                     4: "Endnote continuation separator",
                     5: "Endnote continuation notice",
-
-                    6: "Even page header",
-                    7: "Odd page header",
-                    8: "Even page footer",
-                    9: "Odd page footer",
-                    10: "First page header",
-                    11: "First page footer",
                     }
-            print '<aCP index="%d" contents="%s" start="%d" end="%d">' % (i, 
contentsMap[i], start, end)
+            return contentsMap[i]
+        else:
+            contentsMap = {
+                    0: "Even page header",
+                    1: "Odd page header",
+                    2: "Even page footer",
+                    3: "Odd page footer",
+                    4: "First page header",
+                    5: "First page footer",
+                    }
+            sectionIndex = i / 6
+            contentsIndex = i % 6
+            return "%s (section #%s)" % (contentsMap[contentsIndex], 
sectionIndex)
+
+    def dump(self):
+        print '<plcfHdd type="PlcfHdd" offset="%d" size="%d bytes">' % 
(self.pos, self.size)
+        offset = self.mainStream.ccpText + self.mainStream.ccpFtn
+        pos = self.pos
+        for i in range(self.getElements() - 1):
+            start = self.getuInt32(pos = pos)
+            end = self.getuInt32(pos = pos + 4)
+            print '<aCP index="%d" contents="%s" start="%d" end="%d">' % (i, 
self.getContents(i), start, end)
             print '<transformed value="%s"/>' % 
self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end))
             pos += 4
             print '</aCP>'
_______________________________________________
Libreoffice-commits mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

[Libreoffice-commits] .: 2 commits - src/docrecord.py src/docstream.py

Reply via email to