msodumper/vsdstream.py | 202 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 164 insertions(+), 38 deletions(-)
New commits: commit 18e52f3e1c61d1d644b427cd354d694f6a457f34 Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 12:46:20 2014 +0100 VSD: start dumping the user-defined set diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 8dcd1bf..d2403a3 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -209,19 +209,22 @@ class PropertySetStream(DOCDirStream): self.printAndSet("CLSID2", self.readuInt32()) self.printAndSet("CLSID3", self.readuInt32()) self.printAndSet("NumPropertySets", self.readuInt32()) - GUID(self, "FMTID").dump() + GUID(self, "FMTID0").dump() self.printAndSet("Offset0", self.readuInt32()) + PropertySet(self, self.Offset0).dump() if self.NumPropertySets == 0x00000002: - print '<todo what="PropertySetStream::dump: handle NumPropertySets == 0x00000002"/>' - PropertySet(self).dump() + GUID(self, "FMTID1").dump() + self.printAndSet("Offset1", self.readuInt32()) + self.propertyIds = {} + PropertySet(self, self.Offset1).dump() print '</propertySetStream>' class PropertySet(DOCDirStream): - def __init__(self, parent): + def __init__(self, parent, offset): DOCDirStream.__init__(self, parent.bytes) self.parent = parent - self.pos = parent.Offset0 + self.pos = offset def getCodePage(self): for index, idAndOffset in enumerate(self.idsAndOffsets): commit c2cd85eddf02c74e0f26955e6e847ebc61d79593 Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 12:36:52 2014 +0100 VSD: dump PIDDSI diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 380c7ef..8dcd1bf 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -119,7 +119,34 @@ def createVSDFile(chars, params): return VSDFile(chars, params) -PropertyIdentifierDocumentSummaryInformation = { +PIDDSI = { + 0x00000001: "PIDDSI_CODEPAGE", + 0x00000002: "PIDDSI_CATEGORY", + 0x00000003: "PIDDSI_PRESFORMAT", + 0x00000004: "PIDDSI_BYTECOUNT", + 0x00000005: "PIDDSI_LINECOUNT", + 0x00000006: "PIDDSI_PARACOUNT", + 0x00000007: "PIDDSI_SLIDECOUNT", + 0x00000008: "PIDDSI_NOTECOUNT", + 0x00000009: "PIDDSI_HIDDENCOUNT", + 0x0000000A: "PIDDSI_MMCLIPCOUNT", + 0x0000000B: "PIDDSI_SCALE", + 0x0000000C: "PIDDSI_HEADINGPAIR", + 0x0000000D: "PIDDSI_DOCPARTS", + 0x0000000E: "PIDDSI_MANAGER", + 0x0000000F: "PIDDSI_COMPANY", + 0x00000010: "PIDDSI_LINKSDIRTY", + 0x00000011: "PIDDSI_CCHWITHSPACES", + 0x00000013: "PIDDSI_SHAREDDOC", + 0x00000014: "PIDDSI_LINKBASE", + 0x00000015: "PIDDSI_HLINKS", + 0x00000016: "PIDDSI_HYPERLINKSCHANGED", + 0x00000017: "PIDDSI_VERSION", + 0x00000018: "PIDDSI_DIGSIG", + 0x0000001A: "PIDDSI_CONTENTTYPE", + 0x0000001B: "PIDDSI_CONTENTSTATUS", + 0x0000001C: "PIDDSI_LANGUAGE", + 0x0000001D: "PIDDSI_DOCVERSION", } @@ -129,11 +156,11 @@ class DocumentSummaryInformationStream(DOCDirStream): def dump(self): print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size - PropertySetStream(self, PropertyIdentifierDocumentSummaryInformation).dump() + PropertySetStream(self, PIDDSI).dump() print '</stream>' -PropertyIdentifierSummaryInformation = { +PIDSI = { 0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER", 0x00000002: "PIDSI_TITLE", 0x00000003: "PIDSI_SUBJECT", @@ -162,7 +189,7 @@ class SummaryInformationStream(DOCDirStream): def dump(self): print '<stream name="\\x05SummaryInformation" size="%d">' % self.size - PropertySetStream(self, PropertyIdentifierSummaryInformation).dump() + PropertySetStream(self, PIDSI).dump() print '</stream>' commit 7432d469dd0333bdb7156484c6fb32fb03e1d173 Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 11:17:04 2014 +0100 VSD: dump GUIDs properly diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 481f086..380c7ef 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -182,10 +182,7 @@ class PropertySetStream(DOCDirStream): self.printAndSet("CLSID2", self.readuInt32()) self.printAndSet("CLSID3", self.readuInt32()) self.printAndSet("NumPropertySets", self.readuInt32()) - self.printAndSet("FMTID00", self.readuInt32()) - self.printAndSet("FMTID01", self.readuInt32()) - self.printAndSet("FMTID02", self.readuInt32()) - self.printAndSet("FMTID03", self.readuInt32()) + GUID(self, "FMTID").dump() self.printAndSet("Offset0", self.readuInt32()) if self.NumPropertySets == 0x00000002: print '<todo what="PropertySetStream::dump: handle NumPropertySets == 0x00000002"/>' @@ -363,4 +360,23 @@ class CodePageString(DOCDirStream): print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage print '</%s>' % self.name + +class GUID(DOCDirStream): + def __init__(self, parent, name): + DOCDirStream.__init__(self, parent.bytes) + self.pos = parent.pos + self.parent = parent + self.name = name + + def dump(self): + Data1 = self.readuInt32() + Data2 = self.readuInt16() + Data3 = self.readuInt16() + Data4 = [] + for i in range(8): + Data4.append(self.readuInt8()) + value = "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x" % (Data1, Data2, Data3, Data4[0], Data4[1], Data4[2], Data4[3], Data4[4], Data4[5], Data4[6], Data4[7]) + print '<%s type="GUID" value="%s"/>' % (self.name, value) + self.parent.pos = self.pos + # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: commit c4c94fa5d494c105f17d41e3d5e1e3973cf44e8c Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 10:37:52 2014 +0100 vsd: complete PropertyType enumeration diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 5132c87..481f086 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -119,12 +119,17 @@ def createVSDFile(chars, params): return VSDFile(chars, params) +PropertyIdentifierDocumentSummaryInformation = { +} + + class DocumentSummaryInformationStream(DOCDirStream): def __init__(self, bytes, params, doc): DOCDirStream.__init__(self, bytes, params, "\x05DocumentSummaryInformation", doc=doc) def dump(self): print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size + PropertySetStream(self, PropertyIdentifierDocumentSummaryInformation).dump() print '</stream>' @@ -264,6 +269,45 @@ PropertyType = { 0x0047: "VT_CF", 0x0048: "VT_CLSID", 0x0049: "VT_VERSIONED_STREAM", + 0x1002: "VT_VECTOR | VT_I2", + 0x1003: "VT_VECTOR | VT_I4", + 0x1004: "VT_VECTOR | VT_R4", + 0x1005: "VT_VECTOR | VT_R8", + 0x1006: "VT_VECTOR | VT_CY", + 0x1007: "VT_VECTOR | VT_DATE", + 0x1008: "VT_VECTOR | VT_BSTR", + 0x100A: "VT_VECTOR | VT_ERROR", + 0x100B: "VT_VECTOR | VT_BOOL", + 0x100C: "VT_VECTOR | VT_VARIANT", + 0x1010: "VT_VECTOR | VT_I1", + 0x1011: "VT_VECTOR | VT_UI1", + 0x1012: "VT_VECTOR | VT_UI2", + 0x1013: "VT_VECTOR | VT_UI4", + 0x1014: "VT_VECTOR | VT_I8", + 0x1015: "VT_VECTOR | VT_UI8", + 0x101E: "VT_VECTOR | VT_LPSTR", + 0x101F: "VT_VECTOR | VT_LPWSTR", + 0x1040: "VT_VECTOR | VT_FILETIME", + 0x1047: "VT_VECTOR | VT_CF", + 0x1048: "VT_VECTOR | VT_CLSID", + 0x2002: "VT_ARRAY | VT_I2", + 0x2003: "VT_ARRAY | VT_I4", + 0x2004: "VT_ARRAY | VT_R4", + 0x2005: "VT_ARRAY | VT_R8", + 0x2006: "VT_ARRAY | VT_CY", + 0x2007: "VT_ARRAY | VT_DATE", + 0x2008: "VT_ARRAY | VT_BSTR", + 0x200A: "VT_ARRAY | VT_ERROR", + 0x200B: "VT_ARRAY | VT_BOOL", + 0x200C: "VT_ARRAY | VT_VARIANT", + 0x200E: "VT_ARRAY | VT_DECIMAL", + 0x2010: "VT_ARRAY | VT_I1", + 0x2011: "VT_ARRAY | VT_UI1", + 0x2012: "VT_ARRAY | VT_UI2", + 0x2013: "VT_ARRAY | VT_UI4", + 0x2016: "VT_ARRAY | VT_INT", + 0x2017: "VT_ARRAY | VT_UINT", + } commit afbd91f0a3d180def770c4a070ddbd57cc6849c8 Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 10:25:58 2014 +0100 vsd: PIDSI is specific to the SummaryInformation stream diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index f91380c..5132c87 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -46,6 +46,8 @@ class VSDFile: def getStreamFromBytes(self, name, bytes): if name == "\x05SummaryInformation": return SummaryInformationStream(bytes, self.params, doc=self) + elif name == "\x05DocumentSummaryInformation": + return DocumentSummaryInformationStream(bytes, self.params, doc=self) else: return DOCDirStream(bytes, self.params, name, doc=self) @@ -117,20 +119,53 @@ def createVSDFile(chars, params): return VSDFile(chars, params) +class DocumentSummaryInformationStream(DOCDirStream): + def __init__(self, bytes, params, doc): + DOCDirStream.__init__(self, bytes, params, "\x05DocumentSummaryInformation", doc=doc) + + def dump(self): + print '<stream name="\\x05DocumentSummaryInformation" size="%d">' % self.size + print '</stream>' + + +PropertyIdentifierSummaryInformation = { + 0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER", + 0x00000002: "PIDSI_TITLE", + 0x00000003: "PIDSI_SUBJECT", + 0x00000004: "PIDSI_AUTHOR", + 0x00000005: "PIDSI_KEYWORDS", + 0x00000006: "PIDSI_COMMENTS", + 0x00000007: "PIDSI_TEMPLATE", + 0x00000008: "PIDSI_LASTAUTHOR", + 0x00000009: "PIDSI_REVNUMBER", + 0x0000000A: "PIDSI_EDITTIME", + 0x0000000B: "PIDSI_LASTPRINTED", + 0x0000000C: "PIDSI_CREATE_DTM", + 0x0000000D: "PIDSI_LASTSAVE_DTM", + 0x0000000E: "PIDSI_PAGECOUNT", + 0x0000000F: "PIDSI_WORDCOUNT", + 0x00000010: "PIDSI_CHARCOUNT", + 0x00000011: "PIDSI_THUMBNAIL", + 0x00000012: "PIDSI_APPNAME", + 0x00000013: "PIDSI_DOC_SECURITY", +} + + class SummaryInformationStream(DOCDirStream): def __init__(self, bytes, params, doc): DOCDirStream.__init__(self, bytes, params, "\x05SummaryInformation", doc=doc) def dump(self): print '<stream name="\\x05SummaryInformation" size="%d">' % self.size - PropertySetStream(self).dump() + PropertySetStream(self, PropertyIdentifierSummaryInformation).dump() print '</stream>' class PropertySetStream(DOCDirStream): - def __init__(self, parent): + def __init__(self, parent, PropertyIds): DOCDirStream.__init__(self, parent.bytes) self.parent = parent + self.propertyIds = PropertyIds def dump(self): print '<propertySetStream type="PropertySetStream" offset="%s">' % self.pos @@ -181,28 +216,6 @@ class PropertySet(DOCDirStream): self.typedPropertyValues.append(typedPropertyValue) print '</propertySet>' -PropertyIdentifier = { - 0x00000001: "CODEPAGE_PROPERTY_IDENTIFIER", - 0x00000002: "PIDSI_TITLE", - 0x00000003: "PIDSI_SUBJECT", - 0x00000004: "PIDSI_AUTHOR", - 0x00000005: "PIDSI_KEYWORDS", - 0x00000006: "PIDSI_COMMENTS", - 0x00000007: "PIDSI_TEMPLATE", - 0x00000008: "PIDSI_LASTAUTHOR", - 0x00000009: "PIDSI_REVNUMBER", - 0x0000000A: "PIDSI_EDITTIME", - 0x0000000B: "PIDSI_LASTPRINTED", - 0x0000000C: "PIDSI_CREATE_DTM", - 0x0000000D: "PIDSI_LASTSAVE_DTM", - 0x0000000E: "PIDSI_PAGECOUNT", - 0x0000000F: "PIDSI_WORDCOUNT", - 0x00000010: "PIDSI_CHARCOUNT", - 0x00000011: "PIDSI_THUMBNAIL", - 0x00000012: "PIDSI_APPNAME", - 0x00000013: "PIDSI_DOC_SECURITY", -} - class PropertyIdentifierAndOffset(DOCDirStream): def __init__(self, parent, index): @@ -213,7 +226,7 @@ class PropertyIdentifierAndOffset(DOCDirStream): def dump(self): print '<propertyIdentifierAndOffset%s type="PropertyIdentifierAndOffset" offset="%s">' % (self.index, self.pos) - self.printAndSet("PropertyIdentifier", self.readuInt32(), dict=PropertyIdentifier) + self.printAndSet("PropertyIdentifier", self.readuInt32(), dict=self.parent.parent.propertyIds) self.printAndSet("Offset", self.readuInt32()) print '</propertyIdentifierAndOffset%s>' % self.index self.parent.pos = self.pos commit 84940f4150fa56256ddff631fe6dc671e10f5b93 Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 09:59:03 2014 +0100 vsdstream: dump utf8 titles diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 4e99aa5..f91380c 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -290,12 +290,20 @@ class CodePageString(DOCDirStream): if c == 0: break bytes.append(c) + codepage = self.parent.parent.getCodePage() + if codepage < 0: + codepage += 2 ** 16 # signed -> unsigned encoding = "" - if self.parent.parent.getCodePage() == 1252: + if codepage == 1252: # http://msdn.microsoft.com/en-us/goglobal/bb964654 encoding = "latin1" + elif codepage == 65001: + # http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130%28v=vs.85%29.aspx + encoding = "utf-8" if len(encoding): print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes)).decode(encoding).encode('utf-8') + else: + print '<todo what="CodePageString::dump: unhandled codepage %s"/>' % codepage print '</%s>' % self.name # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: commit ae210089c3a5d6dd0932564c6f600be88739db45 Author: Miklos Vajna <[email protected]> Date: Wed Nov 26 09:41:38 2014 +0100 vsdstream: dump latin1 titles diff --git a/msodumper/vsdstream.py b/msodumper/vsdstream.py index 36c279a..4e99aa5 100644 --- a/msodumper/vsdstream.py +++ b/msodumper/vsdstream.py @@ -159,17 +159,26 @@ class PropertySet(DOCDirStream): self.parent = parent self.pos = parent.Offset0 + def getCodePage(self): + for index, idAndOffset in enumerate(self.idsAndOffsets): + if idAndOffset.PropertyIdentifier == 0x00000001: # CODEPAGE_PROPERTY_IDENTIFIER + return self.typedPropertyValues[index].Value + def dump(self): self.posOrig = self.pos print '<propertySet type="PropertySet" offset="%s">' % self.pos self.printAndSet("Size", self.readuInt32()) self.printAndSet("NumProperties", self.readuInt32()) - self.idsAndOffsets = {} + self.idsAndOffsets = [] for i in range(self.NumProperties): - self.idsAndOffsets[i] = PropertyIdentifierAndOffset(self, i) - self.idsAndOffsets[i].dump() + idAndOffset = PropertyIdentifierAndOffset(self, i) + idAndOffset.dump() + self.idsAndOffsets.append(idAndOffset) + self.typedPropertyValues = [] for i in range(self.NumProperties): - TypedPropertyValue(self, i).dump() + typedPropertyValue = TypedPropertyValue(self, i) + typedPropertyValue.dump() + self.typedPropertyValues.append(typedPropertyValue) print '</propertySet>' PropertyIdentifier = { @@ -269,6 +278,7 @@ class CodePageString(DOCDirStream): def __init__(self, parent, name): DOCDirStream.__init__(self, parent.bytes) self.pos = parent.pos + self.parent = parent self.name = name def dump(self): @@ -280,7 +290,12 @@ class CodePageString(DOCDirStream): if c == 0: break bytes.append(c) - print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes)) + encoding = "" + if self.parent.parent.getCodePage() == 1252: + # http://msdn.microsoft.com/en-us/goglobal/bb964654 + encoding = "latin1" + if len(encoding): + print '<Characters value="%s"/>' % "".join(map(lambda c: chr(c), bytes)).decode(encoding).encode('utf-8') print '</%s>' % self.name # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
