Dear Python's Gurus I have this nice over-my-head scripts
1. It loads two files (daifmaster.txt and sahihmaster.txt) HELP ONE Please I NEED to print these unified two sorted files as one = call it dsmaster.txt 2. It load file (Kanzmaster.txt) and sort it HELP TWO Please I need to print this sorted kanzmaster.txt and call it KSmaster.txt 3. Please Explain to me the lines def addCommand(self,command,callback,flags): self.callbacks[command]={'callback':callback,'command':command,'flags':f lags} def findCallback(self,command): #!/usr/bin/python # -*- coding: UTF-8 -*- import sys import codecs import re TYPE_MULTILINE=0x1 class ReaderBase: def __init__(self): self.callbacks={} def addCommand(self,command,callback,flags): self.callbacks[command]={'callback':callback,'command':command,'flags':f lags} def findCallback(self,command): try: return self.callbacks[command[0]] except KeyError: return None def processFile(self,path): #f=open(path, 'r') f = codecs.open(path, encoding='CP1256', mode='r') val='' citem=None for line in f.readlines(): parts=line.split('|') if citem!=None: if citem['flags']&TYPE_MULTILINE: val+=parts[0] for part in parts[1:]: if citem!=None: citem['callback'](citem['command'],val.strip()) lineparts=part.split(' ',1) cmd=lineparts[0] if(len(lineparts)>1): val=lineparts[1] else: val='' if len(cmd)>0: citem=self.findCallback(cmd) else: citem=None if citem!=None: citem['callback'](citem['command'],val.strip()) f.close() entryCount=0 class HadithEntry: def __init__(self): self.keys=[] self.values=[] def addValue(self,key,val): for i in range(len(self.keys)): if self.keys[i]==key: self.values[i]=val return self.keys.append(key) self.values.append(val) def write(self,f=sys.stdout): global entryCount entryCount=entryCount+1 for i in range(len(self.keys)): if not self.keys[i].endswith('-tmp'): f.write(('|'+self.keys[i]+str(entryCount)+' '+self.values[i]+'\n').encode('utf-8')) def getValue(self,key): for i in range(len(self.keys)): if self.keys[i]==key: return self.values[i] return '' hadithItems=[] sortByH_regex_punct = re.compile(u"[ ,،.:\\\\(\\\\)?{}\n.]") def sortByH(a,b): a=a.getValue('H-tmp') b=b.getValue('H-tmp') #remove punctuation before comparison #a=re.sub(sortByH_regex_punct,'',a) #b=re.sub(sortByH_regex_punct,'',b) if(a==b): return 0 if(a>b): return 1 return -1 currentEntry=None #reads the 40k hadith entries def fill1stHadithEntries(cmd,val): global hadithItems global currentEntry #the last tag in a record is H if cmd=='H': currentEntry=HadithEntry() hadithItems.append(currentEntry) a=val a=re.sub(sortByH_regex_punct,'',a) currentEntry.addValue('H-tmp',a) if currentEntry!=None: currentEntry.addValue(cmd,val) hadith2Items=[] #reads the 15K files format def fill2stHadithEntries(cmd,val): global hadith2Items global currentEntry if cmd=='R': currentEntry=HadithEntry() if currentEntry!=None: currentEntry.addValue(cmd,val) #The last tag in a record if F if cmd=='F': a=currentEntry.getValue('H') a=re.sub(sortByH_regex_punct,'',a) currentEntry.addValue('H-tmp',a) hadith2Items.append(currentEntry) tempEntry=None tempEntryPos=0 successMerges=0 #both lists must be sorted, otherwise it will fail. def mergeHadithEntries(val): global hadithItems global tempEntry global tempEntryPos global successMerges while tempEntryPos<len(hadithItems) and sortByH(val,hadithItems[tempEntryPos])>0: tempEntryPos=tempEntryPos+1 if tempEntryPos>=len(hadithItems): return if sortByH(val,hadithItems[tempEntryPos])==0: hadithItems[tempEntryPos].addValue('R',val.getValue('R')) #print 'found entry' successMerges=successMerges+1 return #print 'entry not found', val.getValue('H') reader=ReaderBase() reader.addCommand('H',fill1stHadithEntries,TYPE_MULTILINE) reader.addCommand('R',fill1stHadithEntries,0) reader.addCommand('F',fill1stHadithEntries,0) #reader.processFile('test.txt') print "Processing KanzMaster ...." reader.processFile('../../src/cd2/KanzMaster.txt') print "loaded ",len(hadithItems),"." print "Sorting..." hadithItems.sort(sortByH) currentEntry=None reader=ReaderBase() reader.addCommand('H',fill2stHadithEntries,TYPE_MULTILINE) reader.addCommand('R',fill2stHadithEntries,0) reader.addCommand('F',fill2stHadithEntries,0) print "Processing sahihMaster ...." reader.processFile('../../src/cd2/sahihMaster.txt') currentEntry=None print "Processing daifMaster ...." reader.processFile('../../src/cd2/daifMaster.txt') #sort before merging to speed up the merge process hadith2Items.sort(sortByH) print "loaded ",len(hadith2Items),"." #Now both maps are sorted print "Merging ...." map(mergeHadithEntries,hadith2Items) #write the merged items print "writing ",len(hadithItems),"..." map(HadithEntry.write,hadithItems) print "successMerges=",successMerges Regards Alan Darwish "Knowledgeable enemy is better than an ignorant friend" --- Outgoing mail is certified Virus Free. Checked by AVG anti-virus system (http://www.grisoft.com). Version: 6.0.778 / Virus Database: 525 - Release Date: 10/15/2004 _______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor