src/docdirstream.py | 20 ++++++++++++++++---- src/docrecord.py | 6 +++--- src/globals.py | 7 ++----- test/doc/pass/novell717594-2.doc |binary 4 files changed, 21 insertions(+), 12 deletions(-)
New commits: commit a3d4647e737f2640c96062cd481797fa71ba5ba4 Author: Miklos Vajna <vmik...@collabora.co.uk> Date: Tue Oct 22 17:29:08 2013 +0200 fix DOCDirStream::getString() For all the other methods that read numbers, the getFoo variant just reads the memory, while the readFoo version also adjusts the stream position. Do the same for strings: make getString() not adjust the stream position, add a readString() that does so and adapt callers. With that, we can use the new getString() in DopTypography, and then seek to the expected position, without trying to figure out what the new position is. Also, remove xml mode in globals::getUTF8FromUTF16(): that was used by docdirstream only, and now even that no longer uses it. diff --git a/src/docdirstream.py b/src/docdirstream.py index 5cf1493..e73c935 100644 --- a/src/docdirstream.py +++ b/src/docdirstream.py @@ -120,21 +120,33 @@ class DOCDirStream: self.pos += 8 return ret - def getString(self, limit = None): + def __getString(self, limit): bytes = [] count = 0 + pos = self.pos while True: if (not limit is None) and count == limit: break - i = self.readuInt8() - j = self.readuInt8() + i = self.getuInt8(pos = pos) + pos += 1 + j = self.getuInt8(pos = pos) + pos += 1 if i != 0 or j != 0: bytes.append(i) bytes.append(j) else: break count += 1 - return globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes)), xml = True) + return (self.quoteAttr(globals.encodeName(globals.getUTF8FromUTF16("".join(map(lambda x: chr(x), bytes))))), pos) + + def getString(self, limit = None): + ret, pos = self.__getString(limit) + return ret + + def readString(self, limit = None): + ret, pos = self.__getString(limit) + self.pos = pos + return ret def getBit(self, byte, bitNumber): return (byte & (1 << bitNumber)) >> bitNumber diff --git a/src/docrecord.py b/src/docrecord.py index 04cf8c0..a0d3ec3 100644 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -2035,10 +2035,10 @@ class DopTypography(DOCDirStream): self.printAndSet("cchLeadingPunct", self.readInt16()) self.printAndSet("rgxchFPunct", self.getString(self.cchFollowingPunct), hexdump = False) - self.pos += (202 - 2 * self.cchFollowingPunct) + self.pos += 202 self.printAndSet("rgxchLPunct", self.getString(self.cchLeadingPunct), hexdump = False) - self.pos += (102 - 2 * self.cchLeadingPunct) + self.pos += 102 print '</dopTypography>' assert self.pos == self.dop.pos + DopTypography.size @@ -2514,7 +2514,7 @@ class FFN(DOCDirStream): self.pos += 10 FontSignature(self.bytes, self.pos).dump() self.pos += 24 - print '<xszFfn value="%s"/>' % self.getString() + print '<xszFfn value="%s"/>' % self.readString() print '</ffn>' class SttbfFfn(DOCDirStream): diff --git a/src/globals.py b/src/globals.py index 31e995a..68aae93 100644 --- a/src/globals.py +++ b/src/globals.py @@ -412,7 +412,7 @@ def getDouble (bytes): return struct.unpack('<d', text)[0] -def getUTF8FromUTF16 (bytes, xml = False): +def getUTF8FromUTF16 (bytes): # little endian utf-16 strings byteCount = len(bytes) loopCount = int(byteCount/2) @@ -431,10 +431,7 @@ def getUTF8FromUTF16 (bytes, xml = False): try: text += unicode(code, 'utf-8') except UnicodeDecodeError: - close = "" - if xml: - close="/" - text += "<%d invalid chars%s>"%(len(code), close) + text += "<%d invalid chars>"%len(code) return text class StreamWrap(object): diff --git a/test/doc/pass/novell717594-2.doc b/test/doc/pass/novell717594-2.doc new file mode 100644 index 0000000..03c455d Binary files /dev/null and b/test/doc/pass/novell717594-2.doc differ _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits