src/docdirstream.py | 22 ++++++++++++++++++ src/docrecord.py | 61 ++++++++++++++++++++++++++++------------------------ src/docstream.py | 48 ++++++++++++++++++++-------------------- 3 files changed, 80 insertions(+), 51 deletions(-)
New commits: commit e72cda1d059a30b5fa2e1a1e52568f011399dfc7 Author: Miklos Vajna <vmik...@suse.cz> Date: Thu Nov 8 16:23:44 2012 +0100 add DOCDirStream.getInt*() methods diff --git a/src/docdirstream.py b/src/docdirstream.py index 9f1ccd5..6fa4c17 100755 --- a/src/docdirstream.py +++ b/src/docdirstream.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import globals +import struct class DOCDirStream: """Represents one single word file subdirectory, like e.g. 'WordDocument'.""" @@ -23,6 +24,27 @@ class DOCDirStream: else: print '<%s value="%s">' % (key, value) + def getInt8(self, bytes = None, pos = None): + if not bytes: + bytes = self.bytes + if not pos: + pos = self.pos + return ord(struct.unpack("<c", bytes[pos:pos+1])[0]) + + def getInt16(self, bytes = None, pos = None): + if not bytes: + bytes = self.bytes + if not pos: + pos = self.pos + return struct.unpack("<H", bytes[pos:pos+2])[0] + + def getInt32(self, bytes = None, pos = None): + if not bytes: + bytes = self.bytes + if not pos: + pos = self.pos + return struct.unpack("<I", bytes[pos:pos+4])[0] + def getBit(self, byte, bitNumber): return (byte & (1 << bitNumber)) >> bitNumber diff --git a/src/docrecord.py b/src/docrecord.py index 2141c75..44552bb 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -13,7 +13,7 @@ class FcCompressed(DOCDirStream): def dump(self): print '<fcCompressed type="FcCompressed" offset="%d" size="%d bytes">' % (self.pos, self.size) - buf = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0] + buf = self.getInt32() self.pos += 4 self.printAndSet("fc", buf & ((2**32-1) >> 2)) # bits 0..29 self.printAndSet("fCompressed", self.getBit(buf, 30)) @@ -35,7 +35,7 @@ class Pcd(DOCDirStream): def dump(self): print '<pcd type="Pcd" offset="%d" size="%d bytes">' % (self.pos, self.size) - buf = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0] + buf = self.getInt16() self.pos += 2 self.printAndSet("fNoParaLast", self.getBit(buf, 0)) self.printAndSet("fR1", self.getBit(buf, 1)) @@ -75,8 +75,8 @@ class PlcPcd(DOCDirStream, PLC): pos = self.pos for i in range(self.getElements()): # aCp - start = struct.unpack("<I", self.bytes[pos:pos+4])[0] - end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0] + start = self.getInt32(pos = pos) + end = self.getInt32(pos = pos + 4) print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) pos += 4 @@ -105,7 +105,7 @@ class Sprm(DOCDirStream): 7: 3, } - self.sprm = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0] + self.sprm = self.getInt16() self.pos += 2 self.ispmd = (self.sprm & 0x1ff) # 1-9th bits @@ -114,11 +114,11 @@ class Sprm(DOCDirStream): self.spra = (self.sprm & 0xe000) >> 13 # 14-16th bits if self.operandSizeMap[self.spra] == 1: - self.operand = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]) + self.operand = self.getInt8() elif self.operandSizeMap[self.spra] == 2: - self.operand = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0] + self.operand = self.getInt16() elif self.operandSizeMap[self.spra] == 4: - self.operand = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0] # TODO generalize this + self.operand = self.getInt32() else: self.operand = "todo" @@ -163,7 +163,7 @@ class GrpPrlAndIstd(DOCDirStream): def dump(self): print '<grpPrlAndIstd type="GrpPrlAndIstd" offset="%d" size="%d bytes">' % (self.pos, self.size) pos = self.pos - self.printAndSet("istd", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("istd", self.getInt16()) pos += 2 while (self.size - (pos - self.pos)) > 0: prl = Prl(self.bytes, pos) @@ -179,10 +179,10 @@ class PapxInFkp(DOCDirStream): def dump(self): print '<papxInFkp type="PapxInFkp" offset="%d">' % self.pos - self.printAndSet("cb", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])) + self.printAndSet("cb", self.getInt8()) self.pos += 1 if self.cb == 0: - self.printAndSet("cb_", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])) + self.printAndSet("cb_", self.getInt8()) self.pos += 1 grpPrlAndIstd = GrpPrlAndIstd(self.bytes, self.pos, 2 * self.cb_) grpPrlAndIstd.dump() @@ -199,7 +199,7 @@ class BxPap(DOCDirStream): def dump(self): print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize()) - self.printAndSet("bOffset", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])) + self.printAndSet("bOffset", self.getInt8()) papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2) papxInFkp.dump() print '</bxPap>' @@ -217,12 +217,12 @@ class PapxFkp(DOCDirStream): def dump(self): print '<papxFkp type="PapxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size) - self.cpara = ord(struct.unpack("<c", self.bytes[self.pos+self.size-1:self.pos+self.size-1+1])[0]) + self.cpara = self.getInt8(pos = self.pos + self.size - 1) pos = self.pos for i in range(self.cpara): # rgfc - start = struct.unpack("<I", self.bytes[pos:pos+4])[0] - end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0] + start = self.getInt32(pos = pos) + end = self.getInt32(pos = pos + 4) print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end]) pos += 4 @@ -246,7 +246,7 @@ class PnFkpPapx(DOCDirStream): def dump(self): print '<%s type="PnFkpPapx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size) - buf = struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0] + buf = self.getInt32() self.pos += 4 self.printAndSet("pn", buf & (2**22-1)) papxFkp = PapxFkp(self.bytes, self.mainStream, self.pn*512, 512) @@ -266,8 +266,8 @@ class PlcBtePapx(DOCDirStream, PLC): pos = self.pos for i in range(self.getElements()): # aFC - start = struct.unpack("<I", self.bytes[pos:pos+4])[0] - end = struct.unpack("<I", self.bytes[pos+4:pos+8])[0] + start = self.getInt32(pos = pos) + end = self.getInt32(pos = pos + 4) print '<aFC index="%d" start="%d" end="%d">' % (i, start, end) pos += 4 @@ -286,9 +286,9 @@ class Pcdt(DOCDirStream): def dump(self): print '<pcdt type="Pcdt" offset="%d" size="%d bytes">' % (self.pos, self.size) - self.printAndSet("clxt", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])) + self.printAndSet("clxt", self.getInt8()) self.pos += 1 - self.printAndSet("lcb", struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet("lcb", self.getInt32()) self.pos += 4 PlcPcd(self.bytes, self.mainStream, self.pos, self.lcb).dump() print '</pcdt>' @@ -301,7 +301,7 @@ class Clx(DOCDirStream): def dump(self): print '<clx type="Clx" offset="%d" size="%d bytes">' % (self.pos, self.size) - firstByte = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]) + firstByte = self.getInt8() if firstByte == 0x02: print '<info what="Array of Prc, 0 elements"/>' Pcdt(self.bytes, self.mainStream, self.pos, self.size).dump() diff --git a/src/docstream.py b/src/docstream.py index 338232d..16657df 100755 --- a/src/docstream.py +++ b/src/docstream.py @@ -52,38 +52,38 @@ class WordDocumentStream(DOCDirStream): def dumpFib(self): print '<fib>' self.dumpFibBase("base") - self.printAndSet("csw", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("csw", self.getInt16()) self.pos += 2 self.dumpFibRgW97("fibRgW") - self.printAndSet("cslw", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("cslw", self.getInt16()) self.pos += 2 self.dumpFibRgLw97("fibRgLw") - self.printAndSet("cbRgFcLcb", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("cbRgFcLcb", self.getInt16()) self.pos += 2 self.dumpFibRgFcLcb("fibRgFcLcbBlob") - self.printAndSet("cswNew", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("cswNew", self.getInt16()) self.pos += 2 print '</fib>' def dumpFibBase(self, name): print '<%s type="FibBase" size="32 bytes">' % name - self.printAndSet("wIndent", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("wIndent", self.getInt16()) self.pos += 2 - self.printAndSet("nFib", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("nFib", self.getInt16()) self.pos += 2 - self.printAndSet("unused", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("unused", self.getInt16()) self.pos += 2 - self.printAndSet("lid", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("lid", self.getInt16()) self.pos += 2 - self.printAndSet("pnNext", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("pnNext", self.getInt16()) self.pos += 2 - buf = struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0] + buf = self.getInt16() self.pos += 2 self.printAndSet("fDot", self.getBit(buf, 0)) self.printAndSet("fGlsy", self.getBit(buf, 1)) @@ -102,16 +102,16 @@ class WordDocumentStream(DOCDirStream): self.printAndSet("fFarEast", self.getBit(buf, 14)) self.printAndSet("fObfuscated", self.getBit(buf, 15)) - self.printAndSet("nFibBack", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("nFibBack", self.getInt16()) self.pos += 2 - self.printAndSet("lKey", struct.unpack("<I", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet("lKey", self.getInt32()) self.pos += 4 - self.printAndSet("envr", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])) + self.printAndSet("envr", self.getInt8()) self.pos += 1 - buf = ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0]) + buf = self.getInt8() self.pos += 1 self.printAndSet("fMac", self.getBit(buf, 0)) @@ -121,13 +121,13 @@ class WordDocumentStream(DOCDirStream): self.printAndSet("reserved2", self.getBit(buf, 4)) self.printAndSet("fSpare0", (buf & (2**3-1))) - self.printAndSet("reserved3", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("reserved3", self.getInt16()) self.pos += 2 - self.printAndSet("reserved4", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("reserved4", self.getInt16()) self.pos += 2 - self.printAndSet("reserved5", struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet("reserved5", self.getInt32()) self.pos += 4 - self.printAndSet("reserved6", struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet("reserved6", self.getInt32()) self.pos += 4 print '</%s>' % name @@ -136,9 +136,9 @@ class WordDocumentStream(DOCDirStream): print '<%s type="FibRgW97" size="28 bytes">' % name for i in range(13): - self.printAndSet("reserved%d" % (i + 1), struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("reserved%d" % (i + 1), self.getInt16()) self.pos += 2 - self.printAndSet("lidFE", struct.unpack("<H", self.bytes[self.pos:self.pos+2])[0]) + self.printAndSet("lidFE", self.getInt16()) self.pos += 2 print '</%s>' % name @@ -171,7 +171,7 @@ class WordDocumentStream(DOCDirStream): "reserved14", ] for i in fields: - self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet(i, self.getInt32()) self.pos += 4 print '</%s>' % name @@ -375,7 +375,7 @@ class WordDocumentStream(DOCDirStream): ["lcbSttbfUssr"], ] for i in fields: - self.printAndSet(i[0], struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0], end = len(i) == 1) + self.printAndSet(i[0], self.getInt32(), end = len(i) == 1) self.pos += 4 if len(i) > 1: i[1]() @@ -433,7 +433,7 @@ class WordDocumentStream(DOCDirStream): "lcbBkdEdnOld", ] for i in fields: - self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet(i, self.getInt32()) self.pos += 4 def __dumpFibRgFcLcb2002(self): @@ -497,7 +497,7 @@ class WordDocumentStream(DOCDirStream): "lcbPlcflvcMixedXP", ] for i in fields: - self.printAndSet(i, struct.unpack("<L", self.bytes[self.pos:self.pos+4])[0]) + self.printAndSet(i, self.getInt32()) self.pos += 4 def dumpFibRgFcLcb2002(self, name): commit 128a02e1b20443e9e59fb7d0a17c26801143e5b8 Author: Miklos Vajna <vmik...@suse.cz> Date: Thu Nov 8 16:19:37 2012 +0100 eliminate one more place where PLC elements are counted manually diff --git a/src/docrecord.py b/src/docrecord.py index 884dc10..2141c75 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -56,7 +56,11 @@ class PLC: return (self.totalSize - 4) / (4 + self.structSize) # defined by 2.2.2 def getOffset(self, pos, i): - return pos + (4 * (self.getElements() + 1)) + (self.structSize * i) + return self.getPLCOffset(pos, self.getElements(), self.structSize, i) + + @staticmethod + def getPLCOffset(pos, elements, structSize, i): + return pos + (4 * (elements + 1)) + (structSize * i) class PlcPcd(DOCDirStream, PLC): """The PlcPcd structure is a PLC whose data elements are Pcds (8 bytes each).""" @@ -188,19 +192,22 @@ class PapxInFkp(DOCDirStream): class BxPap(DOCDirStream): """The BxPap structure specifies the offset of a PapxInFkp in PapxFkp.""" - def __init__(self, bytes, mainStream, offset, size, parentoffset): + def __init__(self, bytes, mainStream, offset, parentoffset): DOCDirStream.__init__(self, bytes) self.pos = offset - self.size = size self.parentpos = parentoffset def dump(self): - print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size) + print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize()) self.printAndSet("bOffset", ord(struct.unpack("<c", self.bytes[self.pos:self.pos+1])[0])) papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2) papxInFkp.dump() print '</bxPap>' + @staticmethod + def getSize(): + return 13 # in bytes, see 2.9.23 + class PapxFkp(DOCDirStream): """The PapxFkp structure maps paragraphs, table rows, and table cells to their properties.""" def __init__(self, bytes, mainStream, offset, size): @@ -221,8 +228,8 @@ class PapxFkp(DOCDirStream): pos += 4 # rgbx - offset = self.pos + ( 4 * ( self.cpara + 1 ) ) + ( 13 * i ) # TODO, 13 is hardwired here - bxPap = BxPap(self.bytes, self.mainStream, offset, 13, self.pos) # TODO 13 hardwired + offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.getSize(), i) + bxPap = BxPap(self.bytes, self.mainStream, offset, self.pos) bxPap.dump() print '</rgfc>' _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits