Brian Kelley wrote:

And jcw, could I see the python only reader please, please :)

Yeah, I was afraid you'd ask. Took me ages to find it on an old CD backup, even though I'm pretty well organized w.r.t. my backups these days (it's hard to find things by location when you don't know *where* they are and it's hard to find things by name when you don't remember *what* you called it!).

Attached, vintage 1999 code. It may no longer work due to MK 1.9 -> 2.x file format tweaks. Just for completeness, a Tcl version is at:
http://www.equi4.com/pub/sk/readkit.tcl

-jcw



Attachment: unmk.py
Description: application/applefile

# Decoding a MetaKit datafile in Python
#
# JCW/1999-11-13/2000-04-22/

import os, struct, shlex, StringIO, string, array

reader = None
freespace = None

def HexDump(s):
	" a rudimentary hex data dump "
	v = []
	for c in s: v.append("%02X" % ord(c))
	return string.join(v)

def DeduceWidth(numrows, size):
	" calculate bits per int, given row count and column size "
	w = 0
	if numrows > 0:
		w = (size << 3) / numrows
		if numrows <= 7 and 0 < size <= 6:
			widthtab = [
				( 8, 16, 1, 32, 2, 4 ),	#  n = 1
                    ( 4, 8, 1, 16, 2, 0 ),	#  n = 2
                    ( 2, 4, 8, 1, 0, 16 ),	#  n = 3
                    ( 2, 4, 0, 8, 1, 0 ),		#  n = 4
                    ( 1, 2, 4, 0, 8, 0 ),		#  n = 5
                    ( 1, 2, 4, 0, 0, 8 ),		#  n = 6
                    ( 1, 2, 0, 4, 0, 0 ) ]	#  n = 7
			w = widthtab[numrows-1][size-1]
			assert w > 0
		assert (w & (w-1)) == 0
	return w

def CheckFreeSpace():
	freespace.sort()
	curr = 0
	gaps = 0
	bytes = 0
	print 'Free space summary:'
	for (pos, len) in freespace:
		if pos < curr:
			print "### Free space is corrupt: (%d,%d) overlaps %d" % \
				(pos, len, curr)
		if pos > curr:
			print "  Free: %6d..%-6d (%db)" % (curr, pos-1,pos-curr)
			gaps = gaps+1
			bytes = bytes + (pos - curr)
		curr = pos + len
	print "%d bytes free in %d gaps, %db used, last used is %d" % \
		(bytes, gaps, curr-bytes, curr)
		
class IntVector:
	
	" An array which accesses ints of 0..32 bits "
	
	def _get_0b(self,index):
		return 0
	def _get_1b(self,index):
		return (self.vector[index>>3] >> (index&7)) & 1
	def _get_2b(self,index):
		return (self.vector[index>>2] >> ((index&3) * 2)) & 3
	def _get_4b(self,index):
		return (self.vector[index>>1] >> ((index&1) * 4)) & 15
		
	def __init__(self,width,data):
		type = 'b'
		if width == 0:
			self.__getitem__ = self._get_0b
		elif width == 1:
			self.__getitem__ = self._get_1b
		elif width == 2:
			self.__getitem__ = self._get_2b
		elif width == 4:
			self.__getitem__ = self._get_4b
		elif width == 8:
			type = 'b'
		elif width == 16:
			type = 'h'
		elif width == 32:
			type = 'l'
		else:
			assert None
		self.vector = array.array(type, data)
		
	def __getitem__(self,index):
		return self.vector[index]
			
class Column:
	
	" A range of bytes on disk "
	
	def __init__(self):
		self.size = reader.pull()
		self.pos = 0
		if self.size:
			self.pos = reader.pull()
			freespace.append((self.pos, self.size))
			
	def __repr__(self):
		return 'Column: @%d [%db]' % (self.pos, self.size)
		
	def __len__(self):
		return self.size
		
class ColOfInts (Column):
	
	" A column interpreted as vector of integers "
	
	def __init__(self, numrows):
		Column.__init__(self)
		self.numrows = numrows
		self.width = DeduceWidth(numrows, self.size)
		data = reader.fetch(self.pos, self.size)
		self.getter = IntVector(self.width, data)
		
	def __repr__(self):
		return 'ColOfInts: #%d/%d, @%d [%db]' % \
			(self.numrows, self.width, self.pos, self.size)
			
	def __len__(self):
		return self.numrows
		
	def __getitem__(self,index):
		return self.getter[index]
	
class BytesCol:
	
	" A data + size column pair "
	
	def __init__(self, numrows):
		self.data = Column()
		self.size = None
		self.pos = None
		if self.data.size:
			self.sizes = ColOfInts(numrows)
			self.offsets = [self.data.pos]
			for s in self.sizes:
				self.offsets.append(self.offsets[-1] + s)
		self.memos = Column()
			
	def __repr__(self):
		return 'BytesCol < %s, %s >' % (self.data, self.sizes)
			
	def __len__(self):
		return self.sizes.numrows
		
	def __getitem__(self,index):
		i1 = self.offsets[index]
		i2 = self.offsets[index+1]
		return "%10d-%-4d = %s" % (i1,i2,`reader.data[i1:i2]`)
	
class View:
	
	" A view is a columnar version of a table "
	
	def __init__(self, parent=None, fields=None):
		self.parent = parent
		self.columns = []
		self.sias = reader.pull()
		assert self.sias == 0 # not yet
		if fields is None:
			reader.descriptor = reader.read(reader.pull())
			fields = reader.parseDesc()
		self.fields = fields
		self.numrows = reader.pull()
		for (name,code) in fields:
			if type(code) == type([]):
				col = Column()
				assert type(code) == type([])
				savepos = reader.pos
				reader.pos = col.pos
				col = []
				for r in xrange(self.numrows):
					v = View(self, code)
					col.append(v)
				reader.pos = savepos
			elif code in "IFD":
				col = ColOfInts(self.numrows)
			elif code in "BS":
				col = BytesCol(self.numrows)
			else:
				assert 0
			self.columns.append(col)
			
	def dump(self,s=""):
		print s,"%2d rows: %s" % (self.numrows,self.fields)
		s = s + "	"
		for i in range(len(self.columns)):
			t = (i, self.columns[i].size, self.columns[i].pos)
			if type(t[1]) == type(1):
				print s,"%2d = %s" % (i, self.columns[i])
				n = t[1]
				if n > 16: n = 16
				h = HexDump(reader.data[t[2]:t[2]+n])
				if t[1] > 16: h = h + "..."
				print s,"  ", h
			else:
				#print dir(self.columns[i]), hasattr(self.columns[i], "memos")
				n = len(self.columns[i])
				print s,"Col# %2d (%d views):" % (i, n)
				for r in range(n):
					if type(self.columns[i][r]) == type(""):
						print self.columns[i][r]
					else:
						self.columns[i][r].dump(s)
				
class ReadMK:

	" This class will disect a MK datafile "
	
	def __init__(self,filename='data.mk'):
		self.data = open(filename,'rb').read()
		
	def setroot(self):
		self.pos = len(self.data)-8
		freespace.append((0, 8))
		freespace.append((self.pos-8, 16))
		n, self.start = struct.unpack('>ll',self.read(8))
		self.size = n & 0xffffff
		freespace.append((self.start, self.size))
		self.pos = self.start
		#self.generation = self.pull()
		self.root = View()
		
	def read(self,len):
		s = self.data[self.pos:self.pos+len]
		self.pos = self.pos+len
		return s
		
	def fetch(self,pos,len):
		return self.data[pos:pos+len]
		
	def pull(self):
		" get the next var-length int from data stream "
		m = 0
		if self.data[self.pos] == 0: m = ~0
		i = 0
		while 1:
			v=ord(self.data[self.pos])
			self.pos = self.pos+1
			i = (i<<7)+(v&0x7F)
			if v & 0x80: break
		return i ^ m
			
	def parseDesc(self):
		" convert description string to a parsed representation "
		s = StringIO.StringIO(self.descriptor)
		l = shlex.shlex(s)
		return self._parse(l)
		
	def _parse(self,lexer):
		props = []
		while 1:
			name = lexer.get_token()
			if name == '': return props
			sep = lexer.get_token()
			if sep == ':':
				code = lexer.get_token()
			elif sep == '[':
				code = self._parse(lexer)
				if lexer.get_token() != ']':
					raise "expected ']' in description string"
			else:
				raise "malformed description string"
			props.append((name,code))
			v = lexer.get_token()
			if v == '': return props
			if v == ']':
				lexer.push_token(v)
				return props
			if v != ',':
				raise "expected ',' in description string"

def dumper(name='r01a'):
	global reader, freespace
	reader = ReadMK(name)
	freespace = []
	reader.setroot()
	print "descriptor", reader.descriptor
	reader.root.columns[0][0].dump()
	#reader.root.columns[1][0].dump()
	
if 1: # if __name__ == '__main__':
	global reader
	os.chdir('BlueJay:Desktop Folder:unmk:')
	dumper('s25a')
	CheckFreeSpace()
_____________________________________________
Metakit mailing list  -  Metakit@equi4.com
http://www.equi4.com/mailman/listinfo/metakit

Reply via email to