Brian Kelley wrote:
And jcw, could I see the python only reader please, please :)
Yeah, I was afraid you'd ask. Took me ages to find it on an old CD backup, even though I'm pretty well organized w.r.t. my backups these days (it's hard to find things by location when you don't know *where* they are and it's hard to find things by name when you don't remember *what* you called it!).
Attached, vintage 1999 code. It may no longer work due to MK 1.9 -> 2.x file format tweaks. Just for completeness, a Tcl version is at:
http://www.equi4.com/pub/sk/readkit.tcl
-jcw
unmk.py
Description: application/applefile# Decoding a MetaKit datafile in Python # # JCW/1999-11-13/2000-04-22/ import os, struct, shlex, StringIO, string, array reader = None freespace = None def HexDump(s): " a rudimentary hex data dump " v = [] for c in s: v.append("%02X" % ord(c)) return string.join(v) def DeduceWidth(numrows, size): " calculate bits per int, given row count and column size " w = 0 if numrows > 0: w = (size << 3) / numrows if numrows <= 7 and 0 < size <= 6: widthtab = [ ( 8, 16, 1, 32, 2, 4 ), # n = 1 ( 4, 8, 1, 16, 2, 0 ), # n = 2 ( 2, 4, 8, 1, 0, 16 ), # n = 3 ( 2, 4, 0, 8, 1, 0 ), # n = 4 ( 1, 2, 4, 0, 8, 0 ), # n = 5 ( 1, 2, 4, 0, 0, 8 ), # n = 6 ( 1, 2, 0, 4, 0, 0 ) ] # n = 7 w = widthtab[numrows-1][size-1] assert w > 0 assert (w & (w-1)) == 0 return w def CheckFreeSpace(): freespace.sort() curr = 0 gaps = 0 bytes = 0 print 'Free space summary:' for (pos, len) in freespace: if pos < curr: print "### Free space is corrupt: (%d,%d) overlaps %d" % \ (pos, len, curr) if pos > curr: print " Free: %6d..%-6d (%db)" % (curr, pos-1,pos-curr) gaps = gaps+1 bytes = bytes + (pos - curr) curr = pos + len print "%d bytes free in %d gaps, %db used, last used is %d" % \ (bytes, gaps, curr-bytes, curr) class IntVector: " An array which accesses ints of 0..32 bits " def _get_0b(self,index): return 0 def _get_1b(self,index): return (self.vector[index>>3] >> (index&7)) & 1 def _get_2b(self,index): return (self.vector[index>>2] >> ((index&3) * 2)) & 3 def _get_4b(self,index): return (self.vector[index>>1] >> ((index&1) * 4)) & 15 def __init__(self,width,data): type = 'b' if width == 0: self.__getitem__ = self._get_0b elif width == 1: self.__getitem__ = self._get_1b elif width == 2: self.__getitem__ = self._get_2b elif width == 4: self.__getitem__ = self._get_4b elif width == 8: type = 'b' elif width == 16: type = 'h' elif width == 32: type = 'l' else: assert None self.vector = array.array(type, data) def __getitem__(self,index): return self.vector[index] class Column: " A range of bytes on disk " def __init__(self): self.size = reader.pull() self.pos = 0 if self.size: self.pos = reader.pull() freespace.append((self.pos, self.size)) def __repr__(self): return 'Column: @%d [%db]' % (self.pos, self.size) def __len__(self): return self.size class ColOfInts (Column): " A column interpreted as vector of integers " def __init__(self, numrows): Column.__init__(self) self.numrows = numrows self.width = DeduceWidth(numrows, self.size) data = reader.fetch(self.pos, self.size) self.getter = IntVector(self.width, data) def __repr__(self): return 'ColOfInts: #%d/%d, @%d [%db]' % \ (self.numrows, self.width, self.pos, self.size) def __len__(self): return self.numrows def __getitem__(self,index): return self.getter[index] class BytesCol: " A data + size column pair " def __init__(self, numrows): self.data = Column() self.size = None self.pos = None if self.data.size: self.sizes = ColOfInts(numrows) self.offsets = [self.data.pos] for s in self.sizes: self.offsets.append(self.offsets[-1] + s) self.memos = Column() def __repr__(self): return 'BytesCol < %s, %s >' % (self.data, self.sizes) def __len__(self): return self.sizes.numrows def __getitem__(self,index): i1 = self.offsets[index] i2 = self.offsets[index+1] return "%10d-%-4d = %s" % (i1,i2,`reader.data[i1:i2]`) class View: " A view is a columnar version of a table " def __init__(self, parent=None, fields=None): self.parent = parent self.columns = [] self.sias = reader.pull() assert self.sias == 0 # not yet if fields is None: reader.descriptor = reader.read(reader.pull()) fields = reader.parseDesc() self.fields = fields self.numrows = reader.pull() for (name,code) in fields: if type(code) == type([]): col = Column() assert type(code) == type([]) savepos = reader.pos reader.pos = col.pos col = [] for r in xrange(self.numrows): v = View(self, code) col.append(v) reader.pos = savepos elif code in "IFD": col = ColOfInts(self.numrows) elif code in "BS": col = BytesCol(self.numrows) else: assert 0 self.columns.append(col) def dump(self,s=""): print s,"%2d rows: %s" % (self.numrows,self.fields) s = s + " " for i in range(len(self.columns)): t = (i, self.columns[i].size, self.columns[i].pos) if type(t[1]) == type(1): print s,"%2d = %s" % (i, self.columns[i]) n = t[1] if n > 16: n = 16 h = HexDump(reader.data[t[2]:t[2]+n]) if t[1] > 16: h = h + "..." print s," ", h else: #print dir(self.columns[i]), hasattr(self.columns[i], "memos") n = len(self.columns[i]) print s,"Col# %2d (%d views):" % (i, n) for r in range(n): if type(self.columns[i][r]) == type(""): print self.columns[i][r] else: self.columns[i][r].dump(s) class ReadMK: " This class will disect a MK datafile " def __init__(self,filename='data.mk'): self.data = open(filename,'rb').read() def setroot(self): self.pos = len(self.data)-8 freespace.append((0, 8)) freespace.append((self.pos-8, 16)) n, self.start = struct.unpack('>ll',self.read(8)) self.size = n & 0xffffff freespace.append((self.start, self.size)) self.pos = self.start #self.generation = self.pull() self.root = View() def read(self,len): s = self.data[self.pos:self.pos+len] self.pos = self.pos+len return s def fetch(self,pos,len): return self.data[pos:pos+len] def pull(self): " get the next var-length int from data stream " m = 0 if self.data[self.pos] == 0: m = ~0 i = 0 while 1: v=ord(self.data[self.pos]) self.pos = self.pos+1 i = (i<<7)+(v&0x7F) if v & 0x80: break return i ^ m def parseDesc(self): " convert description string to a parsed representation " s = StringIO.StringIO(self.descriptor) l = shlex.shlex(s) return self._parse(l) def _parse(self,lexer): props = [] while 1: name = lexer.get_token() if name == '': return props sep = lexer.get_token() if sep == ':': code = lexer.get_token() elif sep == '[': code = self._parse(lexer) if lexer.get_token() != ']': raise "expected ']' in description string" else: raise "malformed description string" props.append((name,code)) v = lexer.get_token() if v == '': return props if v == ']': lexer.push_token(v) return props if v != ',': raise "expected ',' in description string" def dumper(name='r01a'): global reader, freespace reader = ReadMK(name) freespace = [] reader.setroot() print "descriptor", reader.descriptor reader.root.columns[0][0].dump() #reader.root.columns[1][0].dump() if 1: # if __name__ == '__main__': global reader os.chdir('BlueJay:Desktop Folder:unmk:') dumper('s25a') CheckFreeSpace()_____________________________________________ Metakit mailing list - Metakit@equi4.com http://www.equi4.com/mailman/listinfo/metakit