Author: Tobias Pape <tob...@netshed.de> Branch: Changeset: r75:072c0a42c109 Date: 2013-02-20 16:06 +0100 http://bitbucket.org/pypy/lang-smalltalk/changeset/072c0a42c109/
Log: Refactor image version identification diff --git a/spyvm/squeakimage.py b/spyvm/squeakimage.py --- a/spyvm/squeakimage.py +++ b/spyvm/squeakimage.py @@ -6,19 +6,39 @@ from rpython.rlib import objectmodel -def chrs2int(b): +def chrs2int(b, unsigned): assert len(b) == 4 first = ord(b[0]) # big endian - if first & 0x80 != 0: - first = first - 0x100 - return first << 24 | ord(b[1]) << 16 | ord(b[2]) << 8 | ord(b[3]) + if not unsigned: + if first & 0x80 != 0: + first = first - 0x100 + return (first << 24 | ord(b[1]) << 16 | ord(b[2]) << 8 | ord(b[3])) -def swapped_chrs2int(b): +def swapped_chrs2int(b, unsigned): assert len(b) == 4 first = ord(b[3]) # little endian - if first & 0x80 != 0: - first = first - 0x100 - return first << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0]) + if not unsigned: + if first & 0x80 != 0: + first = first - 0x100 + return (first << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0])) + +def chrs2long(b, unsigned): + assert len(b) == 8 + first = ord(b[0]) # big endian + if not unsigned: + if first & 0x80 != 0: + first = first - 0x100 + return ( first << 56 | ord(b[1]) << 48 | ord(b[2]) << 40 | ord(b[3]) << 32 + | ord(b[4]) << 24 | ord(b[5]) << 16 | ord(b[6]) << 8 | ord(b[7]) ) + +def swapped_chrs2long(b, unsigned): + assert len(b) == 8 + first = ord(b[7]) # little endian + if not unsigned: + if first & 0x80 != 0: + first = first - 0x100 + return ( first << 56 | ord(b[6]) << 48 | ord(b[5]) << 40 | ord(b[4]) << 32 + | ord(b[3]) << 24 | ord(b[2]) << 16 | ord(b[1]) << 8 | ord(b[0]) ) # ____________________________________________________________ @@ -32,24 +52,51 @@ self.data = inputfile.read() finally: inputfile.close() - self.swap = False - self.pos = 0 - self.count = 0 + self.reset() def peek(self): if self.pos >= len(self.data): raise IndexError - if self.swap: - return swapped_chrs2int( self.data[self.pos:self.pos+4] ) + data_peek = self.data[self.pos:self.pos + self.word_size] + if self.use_long_read: + if self.swap: + return swapped_chrs2long(data_peek, False) + else: + return chrs2long(data_peek, False) else: - return chrs2int( self.data[self.pos:self.pos+4] ) + if self.swap: + return swapped_chrs2int(data_peek, False) + else: + return chrs2int(data_peek, False) + + def peek_unsigned(self): + if self.pos >= len(self.data): + raise IndexError + data_peek = self.data[self.pos:self.pos + self.word_size] + if self.use_long_read: + if self.swap: + return swapped_chrs2long(data_peek, True) + else: + return chrs2long(data_peek, True) + else: + if self.swap: + return swapped_chrs2int(data_peek, True) + else: + return chrs2int(data_peek, True) + def next(self): integer = self.peek() - self.pos += 4 - self.count += 4 + self.pos += self.word_size + self.count += self.word_size return integer + def reset(self): + self.swap = False + self.pos = 0 + self.count = 0 + self.be_32bit() + def reset_count(self): self.count = 0 @@ -59,25 +106,114 @@ self.pos += jump self.count += jump + def skipwords(self, jump): + self.skipbytes(jump * self.word_size) + assert (self.pos + jump) <= len(self.data) + self.pos += jump + self.count += jump + + + def length(self): + return len(self.data) + def close(self): pass # already closed + def be_64bit(self): + self.word_size = 8 + self.use_long_read = True + + def be_32bit(self): + self.word_size = 4 + self.use_long_read = False class CorruptImageError(Exception): pass +class UnsupportedImageError(Exception): + pass + # ____________________________________________________________ -# XXX hack to read Cog images. -# TODO implement Cog float byte reversal -SUPPORTED_VERSIONS = [6502, 6505] +class ImageVersion(object): + + def __init__(self, magic, is_big_endian, is_64bit, has_closures, has_floats_reversed): + self.magic = magic + self.is_big_endian = is_big_endian + self.is_64bit = is_64bit + self.has_closures = has_closures + self.has_floats_reversed = has_floats_reversed + +image_versions = { + 0x00001966: ImageVersion(6502, True, False, False, False), + 0x66190000: ImageVersion(6502, False, False, False, False), + 0x00001968: ImageVersion(6504, True, False, True, False), + 0x68190000: ImageVersion(6504, False, False, True, False), + 0x00001969: ImageVersion(6505, True, False, True, True ), + 0x69190000: ImageVersion(6505, False, False, True, True ), + 0x000109A0: ImageVersion(68000, True, True, False, False), + 0xA009010000000000: ImageVersion(68000, False, True, False, False), + 0x00000000000109A2: ImageVersion(68002, True, True, True, False), + 0xA209010000000000: ImageVersion(68002, False, True, True, False), + 0x00000000000109A3: ImageVersion(68003, True, True, True, True ), + 0xA309010000000000: ImageVersion(68003, False, True, True, True ), +} + + +def version(magic): + ver = image_versions.get(magic, None) + if ver is None: + raise CorruptImageError + # if ver.is_64bit or ver.has_floats_reversed: + # raise UnsupportedImageError + return ver + +possible_image_offset = 512 + +def version_from_stream(stream): + # 32 bit + try: + return version(stream.peek_unsigned()) + except CorruptImageError as e: + if stream.length() > possible_image_offset + 4: + stream.skipbytes(possible_image_offset) + try: + return version(stream.peek_unsigned()) + except CorruptImageError: + pass # raise original error + # 64 bit + stream.reset() + stream.be_64bit() + try: + v = version(stream.peek_unsigned()) + assert v.is_64bit + return v + except CorruptImageError as e: + if stream.length() > possible_image_offset + 4: + stream.skipbytes(possible_image_offset) + try: + v = version(stream.peek_unsigned()) + assert v.is_64bit + return v + except CorruptImageError: + pass # raise original error + raise + + + +def reader_for_image(space, stream): + ver = version_from_stream(stream) + if not ver.is_big_endian: + stream.swap = True + return ImageReader(space, stream, ver) class ImageReader(object): - def __init__(self, space, stream): + def __init__(self, space, stream, version): self.space = space self.stream = stream + self.version = version # dictionary mapping old address to chunk object self.chunks = {} self.chunklist = [] @@ -94,15 +230,13 @@ self.init_w_objects() self.fillin_w_objects() + def read_version(self): + # 1 word version + magic = self.stream.next() + assert self.version.magic == magic + def read_header(self): - # 1 word version - version = self.stream.peek() - if version not in SUPPORTED_VERSIONS: - self.stream.swap = True - version = self.stream.peek() - if version not in SUPPORTED_VERSIONS: - raise CorruptImageError - version = self.stream.next() + self.read_version() #------ # 1 word headersize headersize = self.stream.next() @@ -118,8 +252,7 @@ print "savedwindowssize", savedwindowssize fullscreenflag = self.stream.next() extravmmemory = self.stream.next() - # we called 9 times next, 1 word = 4 byte - self.stream.skipbytes(headersize - (9 * 4)) + self.stream.skipbytes(headersize - self.stream.pos) def read_body(self): import sys diff --git a/spyvm/test/test_miniimage.py b/spyvm/test/test_miniimage.py --- a/spyvm/test/test_miniimage.py +++ b/spyvm/test/test_miniimage.py @@ -21,7 +21,7 @@ module.space = space def open_miniimage(space): - return squeakimage.ImageReader(space, squeakimage.Stream(mini_image.open())) + return squeakimage.reader_for_image(space, squeakimage.Stream(mini_image.open())) def get_reader(): return reader diff --git a/spyvm/test/test_squeakimage.py b/spyvm/test/test_squeakimage.py --- a/spyvm/test/test_squeakimage.py +++ b/spyvm/test/test_squeakimage.py @@ -1,15 +1,16 @@ import py from spyvm import squeakimage -from spyvm.squeakimage import chrs2int +from spyvm.squeakimage import chrs2int, chrs2long, swapped_chrs2long from spyvm import objspace +from struct import pack + space = objspace.ObjSpace() # ----- helpers ---------------------------------------------- def ints2str(*ints): - import struct - return struct.pack(">" + "i" * len(ints), *ints) + return pack(">" + "i" * len(ints), *ints) def joinbits(values, lengths): result = 0 @@ -18,21 +19,37 @@ result += each return result -def imagereader_mock(string): +def imagestream_mock(string): import StringIO f = StringIO.StringIO(string) - stream = squeakimage.Stream(f) - return squeakimage.ImageReader(space, stream) + return squeakimage.Stream(f) +def imagereader_mock(string): + stream = imagestream_mock(string) + return squeakimage.reader_for_image(space, stream) + + +SIMPLE_VERSION_HEADER = pack(">i", 6502) +SIMPLE_VERSION_HEADER_LE = pack("<i", 6502) # ----- tests ------------------------------------------------ def test_chrs2int(): - assert 1 == chrs2int('\x00\x00\x00\x01') - assert -1 == chrs2int('\xFF\xFF\xFF\xFF') + assert 1 == chrs2int('\x00\x00\x00\x01', False) + assert -1 == chrs2int('\xFF\xFF\xFF\xFF', False) + assert 1 == chrs2int('\x00\x00\x00\x01', True) + assert 0xFFFFFFFF == chrs2int('\xFF\xFF\xFF\xFF', True) + +def test_chrs2long(): + assert 1 == chrs2long('\x00\x00\x00\x00\x00\x00\x00\x01', False) + assert -1 == chrs2long('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', False) + assert 1 == chrs2long('\x00\x00\x00\x00\x00\x00\x00\x01', True) + assert 0xFFFFFFFFFFFFFFFF == chrs2long('\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF', True) + assert 68002 == chrs2long(pack(">Q", 68002), False) + assert 68002 == swapped_chrs2long(pack("<Q", 68002), False) def test_stream(): - stream = imagereader_mock('\x00\x00\x19\x66').stream + stream = imagestream_mock(SIMPLE_VERSION_HEADER) n = stream.peek() assert n == 6502 n = stream.next() @@ -40,14 +57,14 @@ py.test.raises(IndexError, lambda: stream.next()) def test_stream_swap(): - stream = imagereader_mock('\x66\x19\x00\x00').stream + stream = imagestream_mock('\x66\x19\x00\x00') stream.swap = True first = stream.next() assert first == 6502 py.test.raises(IndexError, lambda: stream.next()) def test_stream_many(): - stream = imagereader_mock('\x00\x00\x19\x66' * 5).stream + stream = imagestream_mock(SIMPLE_VERSION_HEADER * 5) for each in range(5): first = stream.peek() assert first == 6502 @@ -56,14 +73,14 @@ py.test.raises(IndexError, lambda: stream.next()) def test_stream_skipbytes(): - stream = imagereader_mock('\xFF\xFF\xFF\x00\x00\x19\x66').stream + stream = imagestream_mock('\xFF\xFF\xFF' + SIMPLE_VERSION_HEADER) stream.skipbytes(3) value = stream.next() assert value == 6502 py.test.raises(IndexError, lambda: stream.next()) def test_stream_count(): - stream = imagereader_mock('\xFF' * 20).stream + stream = imagestream_mock('\xFF' * 20) stream.next() stream.next() stream.reset_count() @@ -85,41 +102,125 @@ def test_ints2str(): assert "\x00\x00\x00\x02" == ints2str(2) - assert '\x00\x00\x19\x66\x00\x00\x00\x02' == ints2str(6502,2) + assert SIMPLE_VERSION_HEADER + '\x00\x00\x00\x02' == ints2str(6502,2) def test_freeblock(): - r = imagereader_mock("\x00\x00\x00\x02") + r = imagereader_mock(SIMPLE_VERSION_HEADER + "\x00\x00\x00\x02") + r.read_version() py.test.raises(squeakimage.CorruptImageError, lambda: r.read_object()) def test_1wordobjectheader(): s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12])) - r = imagereader_mock(s) - assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0) == r.read_1wordobjectheader() + r = imagereader_mock(SIMPLE_VERSION_HEADER + s) + r.read_version() + l = len(SIMPLE_VERSION_HEADER) + assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0 + l) == r.read_1wordobjectheader() def test_1wordobjectheader2(): s = ints2str(joinbits([3, 1, 2, 3, 4], [2,6,4,5,12])) - r = imagereader_mock(s * 3) - assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0) == r.read_1wordobjectheader() - assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 4) == r.read_1wordobjectheader() - assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 8) == r.read_1wordobjectheader() + r = imagereader_mock(SIMPLE_VERSION_HEADER + (s * 3)) + r.read_version() + l = len(SIMPLE_VERSION_HEADER) + assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 0 + l) == r.read_1wordobjectheader() + assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 4 + l) == r.read_1wordobjectheader() + assert (squeakimage.ImageChunk(space, 1, 2, 3, 4), 8 + l) == r.read_1wordobjectheader() def test_2wordobjectheader(): s = ints2str(4200 + 1, joinbits([1, 1, 2, 3, 4], [2,6,4,5,12])) - r = imagereader_mock(s) - assert (squeakimage.ImageChunk(space, 1, 2, 4200, 4), 4) == r.read_2wordobjectheader() + r = imagereader_mock(SIMPLE_VERSION_HEADER + s) + r.read_version() + l = len(SIMPLE_VERSION_HEADER) + assert (squeakimage.ImageChunk(space, 1, 2, 4200, 4), 4 + l) == r.read_2wordobjectheader() def test_3wordobjectheader(): s = ints2str(1701 << 2, 4200 + 0, joinbits([0, 1, 2, 3, 4], [2,6,4,5,12])) - r = imagereader_mock(s) - assert (squeakimage.ImageChunk(space, 1701, 2, 4200, 4), 8) == r.read_3wordobjectheader() + r = imagereader_mock(SIMPLE_VERSION_HEADER + s) + r.read_version() + l = len(SIMPLE_VERSION_HEADER) + assert (squeakimage.ImageChunk(space, 1701, 2, 4200, 4), 8 + l) == r.read_3wordobjectheader() def test_read3wordheaderobject(): size = 42 s = ints2str(size << 2, 4200 + 0, joinbits([0, 1, 2, 3, 4], [2,6,4,5,12])) - r = imagereader_mock(s + '\x00\x00\x19\x66' * (size - 1)) + r = imagereader_mock(SIMPLE_VERSION_HEADER + s + SIMPLE_VERSION_HEADER * (size - 1)) + r.read_version() + l = len(SIMPLE_VERSION_HEADER) chunk, pos = r.read_object() chunk0 = squeakimage.ImageChunk(space, size, 2, 4200, 4) chunk0.data = [6502] * (size - 1) - assert pos == 8 + assert pos == 8 + l assert chunk0 == chunk +def test_simple_image(): + word_size = 4 + header_size = 16 * word_size + + image_1 = (SIMPLE_VERSION_HEADER # 1 + + pack(">i", header_size) # 2 64 byte header + + pack(">i", 0) # 3 no body + + pack(">i", 0) # 4 old base addresss unset + + pack(">i", 0) # 5 no spl objs array + + "\x12\x34\x56\x78" # 6 last hash + + pack(">h", 480) # 7 window 480 height + + pack(">h", 640) # window 640 width + + pack(">i", 0) # 8 not fullscreen + + pack(">i", 0) # 9 no extra memory + + ("\x00" * (header_size - (9 * word_size)))) + r = imagereader_mock(image_1) + # does not raise + r.read_header() + assert r.stream.pos == len(image_1) + + image_2 = (SIMPLE_VERSION_HEADER_LE # 1 + + pack("<i", header_size) # 2 64 byte header + + pack("<i", 0) # 3 no body + + pack("<i", 0) # 4 old base addresss unset + + pack("<i", 0) # 5 no spl objs array + + "\x12\x34\x56\x78" # 6 last hash + + pack("<h", 480) # 7 window 480 height + + pack("<h", 640) # window 640 width + + pack("<i", 0) # 8 not fullscreen + + pack("<i", 0) # 9 no extra memory + + ("\x00" * (header_size - (9 * word_size)))) + r = imagereader_mock(image_2) + # does not raise + r.read_header() + assert r.stream.pos == len(image_2) + +def test_simple_image64(): + word_size = 8 + header_size = 16 * word_size + + image_1 = (pack(">Q", 68002) # 1 version + + pack(">q", header_size) # 2 64 byte header + + pack(">q", 0) # 3 no body + + pack(">q", 0) # 4 old base addresss unset + + pack(">q", 0) # 5 no spl objs array + + ("\x12\x34\x56\x78" * 2)# 6 last hash + + pack(">H", 480) # 7 window 480 height + + pack(">H", 640) # window 640 width + + pack(">i", 0) # pad + + pack(">q", 0) # 8 not fullscreen + + pack(">q", 0) # 9 no extra memory + + ("\x00" * (header_size - (9 * word_size)))) + r = imagereader_mock(image_1) + # does not raise + r.read_header() + assert r.stream.pos == len(image_1) + + image_2 = (pack("<Q", 68002) # 1 version + + pack("<q", header_size) # 2 64 byte header + + pack("<q", 0) # 3 no body + + pack("<q", 0) # 4 old base addresss unset + + pack("<q", 0) # 5 no spl objs array + + ("\x12\x34\x56\x78" * 2)# 6 last hash + + pack("<H", 480) # 7 window 480 height + + pack("<H", 640) # window 640 width + + pack(">i", 0) # pad + + pack(">q", 0) # 8 not fullscreen + + pack("<q", 0) # 9 no extra memory + + ("\x00" * (header_size - (9 * word_size)))) + r = imagereader_mock(image_2) + # does not raise + r.read_header() + assert r.stream.pos == len(image_2) diff --git a/spyvm/tool/analyseimage.py b/spyvm/tool/analyseimage.py --- a/spyvm/tool/analyseimage.py +++ b/spyvm/tool/analyseimage.py @@ -11,10 +11,10 @@ minitest_image = image_dir.join('minitest.image') def get_miniimage(space): - return squeakimage.ImageReader(space, squeakimage.Stream(mini_image.open())) + return squeakimage.reader_for_image(space, squeakimage.Stream(mini_image.open())) def get_minitestimage(space): - return squeakimage.ImageReader(space, squeakimage.Stream(minitest_image.open())) + return squeakimage.reader_for_image(space, squeakimage.Stream(minitest_image.open())) def create_image(space, image_reader): image_reader.initialize() diff --git a/targetimageloadingsmalltalk.py b/targetimageloadingsmalltalk.py --- a/targetimageloadingsmalltalk.py +++ b/targetimageloadingsmalltalk.py @@ -45,7 +45,7 @@ else: print "usage:", argv[0], "<image name>" return -1 - reader = squeakimage.ImageReader(space, squeakimage.Stream(DummyFile(filename))) + reader = squeakimage.reader_for_image(space, squeakimage.Stream(DummyFile(filename))) reader.initialize() image = squeakimage.SqueakImage() image.from_reader(space, reader) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit