doc-dump.py | 2 ++ src/docrecord.py | 44 ++++++++++++++++++++++++-------------------- src/globals.py | 4 ++-- 3 files changed, 28 insertions(+), 22 deletions(-)
New commits: commit b2b4859c6f8d692804a3eb33f68981b337e11022 Author: Miklos Vajna <[email protected]> Date: Mon Nov 19 16:28:40 2012 +0100 FcCompressed: support fCompressed = 0 This seems to be used for non-ascii text. diff --git a/doc-dump.py b/doc-dump.py index ac1fba6..28e7bcb 100755 --- a/doc-dump.py +++ b/doc-dump.py @@ -1,6 +1,8 @@ #!/usr/bin/env python import sys +sys = reload(sys) +sys.setdefaultencoding("utf-8") sys.path.append(sys.path[0]+"/src") import globals import docstream diff --git a/src/docrecord.py b/src/docrecord.py index 64d2865..46369d3 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -21,11 +21,21 @@ class FcCompressed(DOCDirStream): self.printAndSet("r1", self.getBit(buf, 31)) print '</fcCompressed>' - def getTransformedAddress(self): - if self.fCompressed: - return self.fc/2 - else: - print '<todo what="FcCompressed: fCompressed = 0 not supported"/>' + def getTransformedValue(self, start, end): + if self.fCompressed: + offset = self.fc/2 + return globals.encodeName(self.mainStream.bytes[offset:offset+end-start]) + else: + offset = self.fc + return globals.encodeName(self.mainStream.bytes[offset:offset+end*2-start].decode('utf-16'), lowOnly = True) + + @staticmethod + def getFCTransformedValue(bytes, start, end): + # This is a bit ugly, but at this state we don't know yet if the text is compressed or not. + try: + return globals.encodeName(bytes[start:end].decode('utf-16'), lowOnly = True) + except UnicodeDecodeError: + return globals.encodeName(bytes[start:end]) class Pcd(DOCDirStream): """The Pcd structure specifies the location of text in the WordDocument Stream and additional properties for this text.""" @@ -85,8 +95,7 @@ class PlcPcd(DOCDirStream, PLC): aPcd = Pcd(self.bytes, self.mainStream, self.getOffset(self.pos, i), 8) aPcd.dump() - offset = aPcd.fc.getTransformedAddress() - print '<transformed value="%s"/>' % globals.encodeName(self.mainStream.bytes[offset:offset+end-start]) + print '<transformed value="%s"/>' % aPcd.fc.getTransformedValue(start, end) print '</aCP>' print '</plcPcd>' @@ -251,7 +260,7 @@ class ChpxFkp(DOCDirStream): start = self.getuInt32(pos = pos) end = self.getuInt32(pos = pos + 4) print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) - print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end]) + print '<transformed value="%s"/>' % FcCompressed.getFCTransformedValue(self.bytes, start, end) pos += 4 # rgbx @@ -280,7 +289,7 @@ class PapxFkp(DOCDirStream): start = self.getuInt32(pos = pos) end = self.getuInt32(pos = pos + 4) print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) - print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end]) + print '<transformed value="%s"/>' % FcCompressed.getFCTransformedValue(self.bytes, start, end) pos += 4 # rgbx commit e6ed69713adc34f7fe554b268dda3d97eb71ae74 Author: Miklos Vajna <[email protected]> Date: Mon Nov 19 16:05:14 2012 +0100 globals.encodeName: allow not encoding high characters diff --git a/src/globals.py b/src/globals.py index ca7a92e..dbb6690 100644 --- a/src/globals.py +++ b/src/globals.py @@ -151,7 +151,7 @@ def debug (msg): sys.stderr.write("DEBUG: %s\n"%msg) -def encodeName (name): +def encodeName (name, lowOnly = False): """Encode name that contains unprintable characters.""" n = len(name) @@ -160,7 +160,7 @@ def encodeName (name): newname = '' for i in xrange(0, n): - if ord(name[i]) <= 20 or ord(name[i]) >= 127: + if ord(name[i]) <= 20 or ((not lowOnly) and ord(name[i]) >= 127): newname += "\\x%2.2X"%ord(name[i]) else: newname += name[i] commit 52055fd71dda9407f8728b0584a2f3dc440f9f37 Author: Miklos Vajna <[email protected]> Date: Tue Nov 13 11:14:14 2012 +0100 drop pointless getSize methods diff --git a/src/docrecord.py b/src/docrecord.py index 9931173..64d2865 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -222,22 +222,19 @@ class PapxInFkp(DOCDirStream): class BxPap(DOCDirStream): """The BxPap structure specifies the offset of a PapxInFkp in PapxFkp.""" + size = 13 # in bytes, see 2.9.23 def __init__(self, bytes, mainStream, offset, parentoffset): DOCDirStream.__init__(self, bytes) self.pos = offset self.parentpos = parentoffset def dump(self): - print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.getSize()) + print '<bxPap type="BxPap" offset="%d" size="%d bytes">' % (self.pos, self.size) self.printAndSet("bOffset", self.getuInt8()) papxInFkp = PapxInFkp(self.bytes, self.mainStream, self.parentpos + self.bOffset*2) papxInFkp.dump() print '</bxPap>' - @staticmethod - def getSize(): - return 13 # in bytes, see 2.9.23 - class ChpxFkp(DOCDirStream): """The ChpxFkp structure maps text to its character properties.""" def __init__(self, bytes, mainStream, offset, size): @@ -287,7 +284,7 @@ class PapxFkp(DOCDirStream): pos += 4 # rgbx - offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.getSize(), i) + offset = PLC.getPLCOffset(self.pos, self.cpara, BxPap.size, i) bxPap = BxPap(self.bytes, self.mainStream, offset, self.pos) bxPap.dump() print '</rgfc>' @@ -514,9 +511,10 @@ class Stshif(DOCDirStream): def __init__(self, bytes, mainStream, offset): DOCDirStream.__init__(self, bytes, mainStream=mainStream) self.pos = offset + self.size = 18 def dump(self): - print '<stshif type="Stshif" offset="%d" size="%d bytes">' % (self.pos, self.getSize()) + print '<stshif type="Stshif" offset="%d" size="%d bytes">' % (self.pos, self.size) self.printAndSet("cstd", self.getuInt16()) self.pos += 2 self.printAndSet("cbSTDBaseInFile", self.getuInt16()) @@ -539,9 +537,6 @@ class Stshif(DOCDirStream): self.pos += 2 print '</stshif>' - def getSize(self): - return 18 - class LSD(DOCDirStream): """The LSD structure specifies the properties to be used for latent application-defined styles (see StshiLsd) when they are created.""" def __init__(self, bytes, offset): @@ -588,7 +583,7 @@ class STSHI(DOCDirStream): print '<stshi type="STSHI" offset="%d" size="%d bytes">' % (self.pos, self.size) self.stshif = Stshif(self.bytes, self.mainStream, self.pos) self.stshif.dump() - self.pos += self.stshif.getSize() + self.pos += self.stshif.size self.printAndSet("ftcBi", self.getuInt16()) self.pos += 2 stshiLsd = StshiLsd(self.bytes, self, self.pos) _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
