src/docrecord.py | 119 ++++++++++++++++++++++++++++++++++++------------------- src/docsprm.py | 63 +++++++++++++++++++++++++++++ src/docstream.py | 12 ++++- 3 files changed, 151 insertions(+), 43 deletions(-)
New commits: commit 7f649c24775cc5b306b6f19a5529e05b538ba54d Author: Miklos Vajna <vmik...@suse.cz> Date: Sat Dec 1 16:35:42 2012 +0100 dump Sepx diff --git a/src/docrecord.py b/src/docrecord.py index 9d19602..4232517 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -197,6 +197,22 @@ class PlcPcd(DOCDirStream, PLC): print '</aCP>' print '</plcPcd>' +class Sepx(DOCDirStream): + """The Sepx structure specifies an array of Prl structures and the size of the array.""" + def __init__(self, sed): + DOCDirStream.__init__(self, sed.plcfSed.mainStream.bytes) + self.pos = sed.fcSepx + + def dump(self): + print '<sepx type="Sepx" offset="%d">' % self.pos + self.printAndSet("cb", self.readInt16()) + pos = self.pos + while (self.cb - (pos - self.pos)) > 0: + prl = Prl(self.bytes, pos) + prl.dump() + pos += prl.getSize() + print '</sepx>' + class Sed(DOCDirStream): """The Sed structure specifies the location of the section properties.""" size = 12 # defined by 2.8.26 @@ -209,6 +225,7 @@ class Sed(DOCDirStream): print '<aSed type="Sed" offset="%d" size="%d bytes">' % (self.pos, Sed.size) self.printAndSet("fn", self.readuInt16()) self.printAndSet("fcSepx", self.readuInt32()) + Sepx(self).dump() self.printAndSet("fnMpr", self.readuInt16()) self.printAndSet("fcMpr", self.readuInt32()) print '</aSed>' @@ -286,6 +303,7 @@ class Sprm(DOCDirStream): nameMap = { 1: docsprm.parMap, 2: docsprm.chrMap, + 4: docsprm.secMap, 5: docsprm.tblMap, } print '<sprm value="%s" name="%s" ispmd="%s" fSpec="%s" sgc="%s" spra="%s" operandSize="%s" operand="%s"/>' % ( diff --git a/src/docsprm.py b/src/docsprm.py index 4108809..fcd01de 100644 --- a/src/docsprm.py +++ b/src/docsprm.py @@ -5,6 +5,69 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # +# see 2.6.4 of the spec +secMap = { + 0x3000: "sprmScnsPgn", + 0x3001: "sprmSiHeadingPgn", + 0xF203: "sprmSDxaColWidth", + 0xF204: "sprmSDxaColSpacing", + 0x3005: "sprmSFEvenlySpaced", + 0x3006: "sprmSFProtected", + 0x5007: "sprmSDmBinFirst", + 0x5008: "sprmSDmBinOther", + 0x3009: "sprmSBkc", + 0x300A: "sprmSFTitlePage", + 0x500B: "sprmSCcolumns", + 0x900C: "sprmSDxaColumns", + 0x300E: "sprmSNfcPgn", + 0x3011: "sprmSFPgnRestart", + 0x3012: "sprmSFEndnote", + 0x3013: "sprmSLnc", + 0x5015: "sprmSNLnnMod", + 0x9016: "sprmSDxaLnn", + 0xB017: "sprmSDyaHdrTop", + 0xB018: "sprmSDyaHdrBottom", + 0x3019: "sprmSLBetween", + 0x301A: "sprmSVjc", + 0x501B: "sprmSLnnMin", + 0x501C: "sprmSPgnStart97", + 0x301D: "sprmSBOrientation", + 0xB01F: "sprmSXaPage", + 0xB020: "sprmSYaPage", + 0xB021: "sprmSDxaLeft", + 0xB022: "sprmSDxaRight", + 0x9023: "sprmSDyaTop", + 0x9024: "sprmSDyaBottom", + 0xB025: "sprmSDzaGutter", + 0x5026: "sprmSDmPaperReq", + 0x3228: "sprmSFBiDi", + 0x322A: "sprmSFRTLGutter", + 0x702B: "sprmSBrcTop80", + 0x702C: "sprmSBrcLeft80", + 0x702D: "sprmSBrcBottom80", + 0x702E: "sprmSBrcRight80", + 0x522F: "sprmSPgbProp", + 0x7030: "sprmSDxtCharSpace", + 0x9031: "sprmSDyaLinePitch", + 0x5032: "sprmSClm", + 0x5033: "sprmSTextFlow", + 0xD234: "sprmSBrcTop", + 0xD235: "sprmSBrcLeft", + 0xD236: "sprmSBrcBottom", + 0xD237: "sprmSBrcRight", + 0x3239: "sprmSWall", + 0x703A: "sprmSRsid", + 0x303B: "sprmSFpc", + 0x303C: "sprmSRncFtn", + 0x303E: "sprmSRncEdn", + 0x503F: "sprmSNFtn", + 0x5040: "sprmSNfcFtnRef", + 0x5041: "sprmSNEdn", + 0x5042: "sprmSNfcEdnRef", + 0xD243: "sprmSPropRMark", + 0x7044: "sprmSPgnStart", + } + # see 2.6.3 of the spec tblMap = { 0x5400: "sprmTJc90", commit 93899d59903ae1253ae5d821914c05bbd03cecdf Author: Miklos Vajna <vmik...@suse.cz> Date: Sat Dec 1 15:09:58 2012 +0100 dump PlcfSed diff --git a/src/docrecord.py b/src/docrecord.py index 30e169f..9d19602 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -197,6 +197,49 @@ class PlcPcd(DOCDirStream, PLC): print '</aCP>' print '</plcPcd>' +class Sed(DOCDirStream): + """The Sed structure specifies the location of the section properties.""" + size = 12 # defined by 2.8.26 + def __init__(self, plcfSed, offset): + DOCDirStream.__init__(self, plcfSed.bytes) + self.pos = offset + self.plcfSed = plcfSed + + def dump(self): + print '<aSed type="Sed" offset="%d" size="%d bytes">' % (self.pos, Sed.size) + self.printAndSet("fn", self.readuInt16()) + self.printAndSet("fcSepx", self.readuInt32()) + self.printAndSet("fnMpr", self.readuInt16()) + self.printAndSet("fcMpr", self.readuInt32()) + print '</aSed>' + +class PlcfSed(DOCDirStream, PLC): + """The PlcfSed structure is a PLC structure where the data elements are Sed structures.""" + def __init__(self, mainStream, offset, size): + DOCDirStream.__init__(self, mainStream.doc.getDirectoryStreamByName("1Table").bytes, mainStream = mainStream) + PLC.__init__(self, size, Sed.size) + self.pos = offset + self.size = size + + def dump(self): + print '<plcSed type="PlcSed" offset="%d" size="%d bytes">' % (self.pos, self.size) + offset = self.mainStream.fcMin + pos = self.pos + for i in range(self.getElements()): + # aCp + start = self.getuInt32(pos = pos) + end = self.getuInt32(pos = pos + 4) + print '<aCP index="%d" start="%d" end="%d">' % (i, start, end) + pos += 4 + + # aSed + aSed = Sed(self, self.getOffset(self.pos, i)) + aSed.dump() + + print '<transformed value="%s"/>' % FcCompressed.getFCTransformedValue(self.mainStream.bytes, offset + start, offset + end) + print '</aCP>' + print '</plcSed>' + class Sprm(DOCDirStream): """The Sprm structure specifies a modification to a property of a character, paragraph, table, or section.""" def __init__(self, bytes, offset): @@ -1706,32 +1749,6 @@ class LPUpxTapx(DOCDirStream): self.pos = uPXPadding.pos print '</lPUpxTapx>' -class StkCharLpUpxGrLpUpxRM(DOCDirStream): - """The StkCharLPUpxGrLPUpxRM structure specifies revision-marking information and formatting for character styles.""" - def __init__(self, stkCharGRLPUPX): - DOCDirStream.__init__(self, stkCharGRLPUPX.bytes) - self.pos = stkCharGRLPUPX.pos - - def dump(self): - print '<stkCharLpUpxGrLpUpxRM type="StkCharLpUpxGrLpUpxRM" offset="%d">' % self.pos - self.printAndSet("cbStkCharUpxGrLpUpxRM", self.getuInt16()) - if self.cbStkCharUpxGrLpUpxRM != 0: - print '<todo what="StkCharLpUpxGrLpUpxRM: cbStkCharUpxGrLpUpxRM != 0 not implemented"/>' - print '</stkCharLpUpxGrLpUpxRM>' - -class StkParaLpUpxGrLpUpxRM(DOCDirStream): - """The StkParaLPUpxGrLPUpxRM structure specifies revision-marking information and formatting for paragraph styles.""" - def __init__(self, stkParaGRLPUPX): - DOCDirStream.__init__(self, stkParaGRLPUPX.bytes) - self.pos = stkParaGRLPUPX.pos - - def dump(self): - print '<stkParaLpUpxGrLpUpxRM type="StkParaLpUpxGrLpUpxRM" offset="%d">' % self.pos - self.printAndSet("cbStkParaUpxGrLpUpxRM", self.getuInt16()) - if self.cbStkParaUpxGrLpUpxRM != 0: - print '<todo what="StkParaLpUpxGrLpUpxRM: cbStkParaUpxGrLpUpxRM != 0 not implemented"/>' - print '</stkParaLpUpxGrLpUpxRM>' - class StkListGRLPUPX(DOCDirStream): """The StkListGRLPUPX structure that specifies the formatting properties for a list style.""" def __init__(self, grLPUpxSw): @@ -1772,15 +1789,16 @@ class StkCharGRLPUPX(DOCDirStream): DOCDirStream.__init__(self, grLPUpxSw.bytes) self.grLPUpxSw = grLPUpxSw self.pos = grLPUpxSw.pos + self.grLPUpxSw = grLPUpxSw def dump(self): print '<stkCharGRLPUPX type="StkCharGRLPUPX" offset="%d">' % self.pos - lpUpxChpx = LPUpxChpx(self) - lpUpxChpx.dump() - self.pos = lpUpxChpx.pos - stkCharLpUpxGrLpUpxRM = StkCharLpUpxGrLpUpxRM(self) - stkCharLpUpxGrLpUpxRM.dump() - self.pos = stkCharLpUpxGrLpUpxRM.pos + if self.grLPUpxSw.std.stdf.stdfBase.cupx == 1: + lpUpxChpx = LPUpxChpx(self) + lpUpxChpx.dump() + self.pos = lpUpxChpx.pos + else: + print '<todo what="StkCharGRLPUPX: cupx != 1"/>' print '</stkCharGRLPUPX>' class StkParaGRLPUPX(DOCDirStream): @@ -1789,18 +1807,19 @@ class StkParaGRLPUPX(DOCDirStream): DOCDirStream.__init__(self, grLPUpxSw.bytes) self.grLPUpxSw = grLPUpxSw self.pos = grLPUpxSw.pos + self.grLPUpxSw = grLPUpxSw def dump(self): print '<stkParaGRLPUPX type="StkParaGRLPUPX" offset="%d">' % self.pos - lPUpxPapx = LPUpxPapx(self) - lPUpxPapx.dump() - self.pos = lPUpxPapx.pos - lpUpxChpx = LPUpxChpx(self) - lpUpxChpx.dump() - self.pos = lpUpxChpx.pos - stkParaLpUpxGrLpUpxRM = StkParaLpUpxGrLpUpxRM(self) - stkParaLpUpxGrLpUpxRM.dump() - self.pos = stkParaLpUpxGrLpUpxRM.pos + if self.grLPUpxSw.std.stdf.stdfBase.cupx == 2: + lPUpxPapx = LPUpxPapx(self) + lPUpxPapx.dump() + self.pos = lPUpxPapx.pos + lpUpxChpx = LPUpxChpx(self) + lpUpxChpx.dump() + self.pos = lpUpxChpx.pos + else: + print '<todo what="StkParaGRLPUPX: cupx != 2"/>' print '</stkParaGRLPUPX>' class GrLPUpxSw(DOCDirStream): diff --git a/src/docstream.py b/src/docstream.py index 93b2a2a..23349a2 100644 --- a/src/docstream.py +++ b/src/docstream.py @@ -216,7 +216,7 @@ class WordDocumentStream(DOCDirStream): ["fcPlcfandTxt"], ["lcbPlcfandTxt", self.handleLcbPlcfandTxt], ["fcPlcfSed"], - ["lcbPlcfSed"], + ["lcbPlcfSed", self.handleLcbPlcfSed], ["fcPlcPad"], ["lcbPlcPad"], ["fcPlcfPhe"], @@ -393,8 +393,10 @@ class WordDocumentStream(DOCDirStream): for i in fields: value = self.readInt32() hasHandler = len(i) > 1 + # the spec says these must be ignored + needsIgnoring = ["lcbStshfOrig"] # a member needs handling if it defines the size of a struct and it's non-zero - needsHandling = i[0].startswith("lcb") and value != 0 + needsHandling = i[0].startswith("lcb") and value != 0 and (not i[0] in needsIgnoring) self.printAndSet(i[0], value, end = ((not hasHandler) and (not needsHandling)), offset = True) if hasHandler or needsHandling: if hasHandler: @@ -466,6 +468,12 @@ class WordDocumentStream(DOCDirStream): plcfBkl = docrecord.PlcfBkl(self, offset, size) plcfBkl.dump() + def handleLcbPlcfSed(self): + offset = self.fcPlcfSed + size = self.lcbPlcfSed + plcfSed = docrecord.PlcfSed(self, offset, size) + plcfSed.dump() + def dumpFibRgFcLcb97(self, name): print '<%s type="FibRgFcLcb97" size="744 bytes">' % name self.__dumpFibRgFcLcb97() _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits