[Libreoffice-commits] mso-dumper.git: src/docrecord.py src/docstream.py

2013-06-25 Thread Miklos Vajna
 src/docrecord.py |1 -
 src/docstream.py |3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

New commits:
commit c8a02ce6baad0b4e99c1eeb451ca1246eec9ae2c
Author: Miklos Vajna vmik...@suse.cz
Date:   Tue Jun 25 18:03:46 2013 +0200

Don't crash on abi13064-1.doc

http://bugzilla.abisource.com/show_bug.cgi?id=13064 says it's corrupted
anyway, so just make the code not crash on it.

diff --git a/src/docrecord.py b/src/docrecord.py
index c747d91..87ece9d 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -3194,7 +3194,6 @@ class SttbListNames(DOCDirStream):
 print 'string value=%s/' % 
globals.encodeName(self.bytes[self.pos:self.pos+2*cchData].decode('utf-16'), 
lowOnly = True)
 self.pos += 2*cchData
 print '/cchData'
-assert self.pos == self.mainStream.fcSttbListNames + self.size
 print '/sttbListNames'
 
 class SttbSavedBy(DOCDirStream):
diff --git a/src/docstream.py b/src/docstream.py
index cc0766b..398207c 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -952,6 +952,9 @@ class WordDocumentStream(DOCDirStream):
 
 def retrieveCPs(self, start, end):
 Retrieves a range of characters.
+if not len(self.clx.pcdt.plcPcd.aPcd):
+print 'info what=clx.pcdt.plcPcd.aPcd is empty, probably 
corrupted document/'
+return 
 ret = []
 i = start
 while i  end:
___
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits


[Libreoffice-commits] mso-dumper.git: src/docrecord.py src/docstream.py

2013-05-16 Thread Miklos Vajna
 src/docrecord.py |   47 +++
 src/docstream.py |5 -
 2 files changed, 51 insertions(+), 1 deletion(-)

New commits:
commit 353f15fa0823ae02b8f0d47333a1e1f734322d82
Author: Miklos Vajna vmik...@suse.cz
Date:   Thu May 16 19:09:18 2013 +0200

dump PlcftxbxBkd

diff --git a/src/docrecord.py b/src/docrecord.py
index 04bf1d0..fec80fe 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -2735,6 +2735,53 @@ class PlcftxbxTxt(DOCDirStream, PLC):
 print '/aCP'
 print '/plcftxbxTxt'
 
+class Tbkd(DOCDirStream):
+The Tbkd structure is used by the PlcftxbxBkd and PlcfTxbxHdrBkd 
structures to associate ranges of
+text from the Textboxes Document and the Header Textboxes Document with 
FTXBXS objects.
+size = 6 # 2.9.309
+def __init__(self, parent, offset):
+DOCDirStream.__init__(self, parent.bytes)
+self.parent = parent
+self.pos = self.posOrig = offset
+
+def dump(self):
+print 'aTbkd type=Tbkd offset=%d size=%d bytes' % (self.pos, 
Tbkd.size)
+self.printAndSet(itxbxs, self.readuInt16())
+self.printAndSet(dcpDepend, self.readuInt16())
+buf = self.readuInt16()
+self.printAndSet(reserved1, buf  0x03ff) # 1..10th bits
+self.printAndSet(fMarkDelete, self.getBit(buf, 10))
+self.printAndSet(fUnk, self.getBit(buf, 11))
+self.printAndSet(fTextOverflow, self.getBit(buf, 12))
+self.printAndSet(reserved2, (buf  0xe000)  13) # 14..16th bits
+print '/aTbkd'
+assert self.posOrig + Tbkd.size == self.pos
+
+class PlcftxbxBkd(DOCDirStream, PLC):
+Specifies which ranges of text go inside which textboxes.
+def __init__(self, mainStream):
+DOCDirStream.__init__(self, 
mainStream.doc.getDirectoryStreamByName(1Table).bytes, mainStream = 
mainStream)
+PLC.__init__(self, mainStream.lcbPlcfTxbxBkd, 6)
+self.pos = mainStream.fcPlcfTxbxBkd
+self.size = mainStream.lcbPlcfTxbxBkd
+
+def dump(self):
+print 'plcftxbxBkd type=PlcftxbxBkd offset=%d size=%d bytes' % 
(self.pos, self.size)
+offset = self.mainStream.ccpText + self.mainStream.ccpFtn
+pos = self.pos
+for i in range(self.getElements()):
+# aCp
+start = self.getuInt32(pos = pos)
+end = self.getuInt32(pos = pos + 4)
+print 'aCP index=%d start=%d end=%d' % (i, start, end)
+pos += 4
+
+# aTbkd
+Tbkd(self, self.getOffset(self.pos, i)).dump()
+print 'transformed value=%s/' % 
self.quoteAttr(self.mainStream.retrieveCPs(offset + start, offset + end))
+print '/aCP'
+print '/plcftxbxBkd'
+
 class PlcfSpa(DOCDirStream, PLC):
 The PlcfSpa structure is a PLC structure in which the data elements are
 SPA structures.
diff --git a/src/docstream.py b/src/docstream.py
index 158748e..849dc19 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -399,7 +399,7 @@ class WordDocumentStream(DOCDirStream):
 [fcPlfLfo],
 [lcbPlfLfo, self.handleLcbPlfLfo],
 [fcPlcfTxbxBkd],
-[lcbPlcfTxbxBkd],
+[lcbPlcfTxbxBkd, self.handleLcbPlcfTxbxBkd],
 [fcPlcfTxbxHdrBkd],
 [lcbPlcfTxbxHdrBkd],
 [fcDocUndoWord9],
@@ -601,6 +601,9 @@ class WordDocumentStream(DOCDirStream):
 def handleLcbSttbfBkmk(self):
 docrecord.SttbfBkmk(self).dump()
 
+def handleLcbPlcfTxbxBkd(self):
+docrecord.PlcftxbxBkd(self).dump()
+
 def dumpFibRgFcLcb97(self, name):
 print '%s type=FibRgFcLcb97 size=744 bytes' % name
 self.__dumpFibRgFcLcb97()
___
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits


[Libreoffice-commits] mso-dumper.git: src/docrecord.py src/docstream.py

2013-05-16 Thread Miklos Vajna
 src/docrecord.py |   11 ++-
 src/docstream.py |6 ++
 2 files changed, 12 insertions(+), 5 deletions(-)

New commits:
commit ddc6ca725bc05bdc82a00a30fe52bfea81dd97ad
Author: Miklos Vajna vmik...@suse.cz
Date:   Thu May 16 19:17:36 2013 +0200

factor out duplicated code

diff --git a/src/docrecord.py b/src/docrecord.py
index fec80fe..dac6d83 100644
--- a/src/docrecord.py
+++ b/src/docrecord.py
@@ -1067,7 +1067,7 @@ class PlcfHdd(DOCDirStream, PLC):
 
 def dump(self):
 print 'plcfHdd type=PlcfHdd offset=%d size=%d bytes' % 
(self.pos, self.size)
-offset = self.mainStream.ccpText + self.mainStream.ccpFtn
+offset = self.mainStream.getHeaderOffset()
 pos = self.pos
 for i in range(self.getElements() - 1):
 start = self.getuInt32(pos = pos)
@@ -1088,7 +1088,7 @@ class PlcfandTxt(DOCDirStream, PLC):
 
 def dump(self):
 print 'plcfandTxt type=PlcfandTxt offset=%d size=%d bytes' % 
(self.pos, self.size)
-offset = self.mainStream.ccpText + self.mainStream.ccpFtn + 
self.mainStream.ccpHdd
+offset = self.mainStream.getCommentOffset()
 pos = self.pos
 for i in range(self.getElements() - 1):
 start = self.getuInt32(pos = pos)
@@ -2706,7 +2706,8 @@ class FTXBXS(DOCDirStream):
 self.printAndSet(lid, self.readuInt32())
 self.printAndSet(txidUndo, self.readuInt32())
 print '/aFTXBXS'
-assert self.posOrig + FTXBXS.size == self.pos
+if not self.fReusable:
+assert self.posOrig + FTXBXS.size == self.pos
 
 class PlcftxbxTxt(DOCDirStream, PLC):
 Specifies which ranges of text are contained in which textboxes.
@@ -2718,7 +2719,7 @@ class PlcftxbxTxt(DOCDirStream, PLC):
 
 def dump(self):
 print 'plcftxbxTxt type=PlcftxbxTxt offset=%d size=%d bytes' % 
(self.pos, self.size)
-offset = self.mainStream.ccpText + self.mainStream.ccpFtn
+offset = self.mainStream.getHeaderOffset()
 pos = self.pos
 for i in range(self.getElements() - 1):
 # aCp
@@ -2767,7 +2768,7 @@ class PlcftxbxBkd(DOCDirStream, PLC):
 
 def dump(self):
 print 'plcftxbxBkd type=PlcftxbxBkd offset=%d size=%d bytes' % 
(self.pos, self.size)
-offset = self.mainStream.ccpText + self.mainStream.ccpFtn
+offset = self.mainStream.getHeaderOffset()
 pos = self.pos
 for i in range(self.getElements()):
 # aCp
diff --git a/src/docstream.py b/src/docstream.py
index 849dc19..e888299 100644
--- a/src/docstream.py
+++ b/src/docstream.py
@@ -893,4 +893,10 @@ class WordDocumentStream(DOCDirStream):
 i += 1
 return .join(ret)
 
+def getHeaderOffset(self):
+return self.ccpText + self.ccpFtn
+
+def getCommentOffset(self):
+return self.getHeaderOffset() + self.ccpHdd
+
 # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
___
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits