Dear Python's Gurus

I have this nice over-my-head scripts

1. It loads two files (daifmaster.txt and sahihmaster.txt)
HELP ONE
Please I NEED to print these unified two sorted files as one = call it
dsmaster.txt
 
2. It load file (Kanzmaster.txt) and sort it
HELP TWO
Please I need to print this sorted kanzmaster.txt and call it
KSmaster.txt

3. Please Explain to me the lines
        def addCommand(self,command,callback,flags):
        
self.callbacks[command]={'callback':callback,'command':command,'flags':f
lags}
        
        def findCallback(self,command):

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import sys
import codecs
import re

TYPE_MULTILINE=0x1

class ReaderBase:
        def __init__(self):
                self.callbacks={}
                
        def addCommand(self,command,callback,flags):
        
self.callbacks[command]={'callback':callback,'command':command,'flags':f
lags}
        
        def findCallback(self,command):
                try:
                        return self.callbacks[command[0]]
                except KeyError:
                        return None
                
        def processFile(self,path):
                #f=open(path, 'r')
                f = codecs.open(path, encoding='CP1256', mode='r')
                val=''
                citem=None
                for line in f.readlines():
                        parts=line.split('|')
                        if citem!=None:
                                if citem['flags']&TYPE_MULTILINE:
                                        val+=parts[0]
                        for part in parts[1:]:
                                if citem!=None:
        
citem['callback'](citem['command'],val.strip())
                                lineparts=part.split(' ',1)
                                cmd=lineparts[0]
                                if(len(lineparts)>1):
                                        val=lineparts[1]
                                else:
                                        val=''
                                if len(cmd)>0:
                                        citem=self.findCallback(cmd)
                                else:
                                        citem=None
                if citem!=None:
                        citem['callback'](citem['command'],val.strip())
                f.close()

entryCount=0            
class HadithEntry:
        def __init__(self):
                self.keys=[]
                self.values=[]
                
        def addValue(self,key,val):
                for i in range(len(self.keys)):
                        if self.keys[i]==key:
                                self.values[i]=val
                                return
                
                self.keys.append(key)
                self.values.append(val)
        
        def write(self,f=sys.stdout):
                global entryCount
                entryCount=entryCount+1
                for i in range(len(self.keys)):
                        if not self.keys[i].endswith('-tmp'):
        
f.write(('|'+self.keys[i]+str(entryCount)+'
'+self.values[i]+'\n').encode('utf-8'))
                        
        def getValue(self,key):
                for i in range(len(self.keys)):
                        if self.keys[i]==key:
                                return self.values[i]
                return ''

hadithItems=[]

sortByH_regex_punct = re.compile(u"[ ,،.:\\\\(\\\\)?{}\n.]")
def sortByH(a,b):
        a=a.getValue('H-tmp')
        b=b.getValue('H-tmp')
        
        #remove punctuation before comparison
        #a=re.sub(sortByH_regex_punct,'',a)
        #b=re.sub(sortByH_regex_punct,'',b)
        
        if(a==b):
                return 0
        if(a>b):
                return 1
        return -1
        
currentEntry=None
#reads the 40k hadith entries
def fill1stHadithEntries(cmd,val):
        global hadithItems
        global currentEntry
        
        #the last tag in a record is H
        if cmd=='H':
                currentEntry=HadithEntry()
                hadithItems.append(currentEntry)
                a=val
                a=re.sub(sortByH_regex_punct,'',a)
                currentEntry.addValue('H-tmp',a)
        
        if currentEntry!=None:
                currentEntry.addValue(cmd,val)

hadith2Items=[]
#reads the 15K files format
def fill2stHadithEntries(cmd,val):
        global hadith2Items
        global currentEntry
        
        if cmd=='R':
                currentEntry=HadithEntry()
        
        if currentEntry!=None:
                currentEntry.addValue(cmd,val)
        
        #The last tag in a record if F
        if cmd=='F':
                a=currentEntry.getValue('H')
                a=re.sub(sortByH_regex_punct,'',a)
                currentEntry.addValue('H-tmp',a)
                hadith2Items.append(currentEntry)

tempEntry=None
tempEntryPos=0
successMerges=0
#both lists must be sorted, otherwise it will fail.
def mergeHadithEntries(val):
        global hadithItems
        global tempEntry
        global tempEntryPos
        global successMerges
        
        while tempEntryPos<len(hadithItems) and
sortByH(val,hadithItems[tempEntryPos])>0:
                tempEntryPos=tempEntryPos+1
        if tempEntryPos>=len(hadithItems):
                return
        if sortByH(val,hadithItems[tempEntryPos])==0:
        
hadithItems[tempEntryPos].addValue('R',val.getValue('R'))
                #print 'found entry'
                successMerges=successMerges+1
                return
        
        #print 'entry not found', val.getValue('H')

reader=ReaderBase()
reader.addCommand('H',fill1stHadithEntries,TYPE_MULTILINE)
reader.addCommand('R',fill1stHadithEntries,0)
reader.addCommand('F',fill1stHadithEntries,0)

#reader.processFile('test.txt')
print "Processing KanzMaster ...."
reader.processFile('../../src/cd2/KanzMaster.txt')
print "loaded ",len(hadithItems),"."

print "Sorting..."
hadithItems.sort(sortByH)

currentEntry=None
reader=ReaderBase()
reader.addCommand('H',fill2stHadithEntries,TYPE_MULTILINE)
reader.addCommand('R',fill2stHadithEntries,0)
reader.addCommand('F',fill2stHadithEntries,0)
print "Processing sahihMaster ...."
reader.processFile('../../src/cd2/sahihMaster.txt')

currentEntry=None
print "Processing daifMaster ...."
reader.processFile('../../src/cd2/daifMaster.txt')

#sort before merging to speed up the merge process
hadith2Items.sort(sortByH)
print "loaded ",len(hadith2Items),"."

#Now both maps are sorted
print "Merging ...."
map(mergeHadithEntries,hadith2Items)

#write the merged items
print "writing ",len(hadithItems),"..."
map(HadithEntry.write,hadithItems)

print "successMerges=",successMerges


Regards

Alan Darwish

"Knowledgeable enemy is better than an ignorant friend"


---
Outgoing mail is certified Virus Free.
Checked by AVG anti-virus system (http://www.grisoft.com).
Version: 6.0.778 / Virus Database: 525 - Release Date: 10/15/2004
 

_______________________________________________
Tutor maillist  -  Tutor@python.org
http://mail.python.org/mailman/listinfo/tutor

Reply via email to