Hello, I'm a french undergraduated CS. student currently doing an internship in Santigo's University under the supervision of Eric Tanter. I'm working on the meta JIT compiler provided in the pypy project, trying to document it's performance on different kind of interpreters. I started two weeks ago.
I have a problem with the translation toolchain. I've been writing an interpreter for a small language inspired by Shriram Krishnamurthi's F1WAE from his book "Programming Languages: Application and Interpretation". My problem is the following: I have an interpreter that seems to work perfecty (at least on my examples) when I use pypy interpreters, but whenever I translate it, execution fails. The fact is: I have identified the source of the problem but I don't understand it. If you look at the parser file, on line 184, there is a "print" instruction. With this instruction on comment, the translated version of RPinterpret executed on test10run10 gets a Stack Overflow. Use a translated version of the same file (RPinterpret.py) with the print instruction in parser.py, you get the normal behavior (which you have anyway using "pypy RPinterpret.py test10runs10"). Looks like a bug, but since I'm a beginer (both on this work and on Python in general), maybe I have missed something. I have the same problem with the supposely-JITing interpreter, but it's worse since the translations takes 20min against 1min, so changing and experiencing on the JITing file seems much harder. Anyway, thanks for your help. I didn't know whether I was supposed to make a bug report or just contact the mailing list, but since I'll be working on this until september, might as well get in touch with you right now. Leonard de HARO PS: As I said, I just learned Python, so maybe my files aren't verry pretty, I apologize for that and I'll welcome any comment on my work you could make. Thanks again.
import treeClass ####################################### # Reimplementation of some functions from the library # ######################################## def belong(n,s,e): """ Raises True if s<=n<e """ return n<e and n>=s def CutWord(string,i): """Find the first blank character following string[i] in string. If there's none, return the length of the list.""" if string[i] in (' ','\n','\t'): return i else: try: return CutWord(string,i+1) except IndexError: return i+1 # Block of functions to define an identifier or a number alphaOrUnd = ('a','z','e','r','t','y','u','i','o','p','q','s','d','f','g','h','j','k','l','m','w','x','c','v','b','n','_') digit = ('0','1','2','3','4','5','6','7','8','9') def isAlphaOrUndChar(c): """ Check if the first character belongs to alphaOrUnd. """ try: return c[0] in alphaOrUnd except IndexError: return False def isAlphaNumOrUnd(c): """ Check if every character is either in alphaOrUnd or in digit. """ length =len(c) pc = 0 answer = True while answer and pc < length: answer = answer and (c[pc] in alphaOrUnd or c[pc] in digit) pc +=1 return answer and length >0 def IsIdentifier(word): """True if word is a correct identifier.""" return (isAlphaOrUndChar(word) and isAlphaNumOrUnd(word)) def IsNumber(c): """ True iff the string is only made of digits and non-empty.""" length =len(c) pc = 0 answer = True while answer and pc < length: answer = answer and c[pc] in digit pc +=1 return answer and length>0 # # Replace str.partition(' ') unavailable with TTC def partitionSpace(word): """ Same as string method partition(' ') """ length = len(word) pc = 0 head = tail = '' while pc < length: if word[pc]==' ': assert(pc>=0) head = word[0:pc] tail = word[pc+1:length] break else: pc += 1 return head, tail # Replace str.split() available, but we need the number of spaces deleted at the beging of the word def StripSpace(word): """ Same as str.strip(' ') but also return the number of spaces deleted at the begining. """ beg = 0 end = len(word) count = 0 while beg < end: if word[beg] == ' ': count += 1 beg += 1 else: break while end > beg: end -= 1 if word[end] != ' ': break if beg == end == len(word): return '', len(word) else: end += 1 assert(end>=0) return word[beg: end], count # ####################################### # Useful functions # ######################################## # Build associativity of braces # To optmisize (RPython): calculate size of stack and dic beforehand def BuildAssociativity(fileToUse): """ Build a associative table of braces. """ bracketMap = {} leftstack = [] pc = 0 for char in fileToUse: if char == '(' or char == '{': leftstack.append(pc) elif char == ')': left = leftstack.pop() if fileToUse[left] != '(': raise ValueError("Should be } at " + str(pc)) right = pc bracketMap[left] = right bracketMap[right] = left elif char == '}': left = leftstack.pop() if fileToUse[left] != '{': raise ValueError("Should be ) at "+str(pc)) right = pc bracketMap[left] = right bracketMap[right] = left else: pass pc += 1 return bracketMap # To make the dict corresponding to a cut string def FilterDic(dictry, start, end): """Return a new dictionnary containning only pairing of indices between start and (end-1) inclued.""" newDic ={} for i,k in dictry.items(): if belong(i,start,end) and belong(k,start,end): newDic[i-start]=k-start elif belong(i,start,end) and not belong(k,start,end): raise ValueError("Not a valid bracket matching") else: pass return newDic # Spliting code into meaningful blocks def SplittingCode(fileToUse, bracketMap): """ Splits the code into meaningful blocks. """ pc = 0 length = len(fileToUse) blocks = [] while pc<length: ch = fileToUse[pc] if (ch in (' ', '\t', '\n')): pass elif ch =='(' or ch=='{': matchingBracket = bracketMap[pc] + 1 assert(matchingBracket >=0 ) block = fileToUse[pc:matchingBracket] newDic = FilterDic(bracketMap, pc, matchingBracket) blocks.append((block, newDic)) pc = matchingBracket else: end = CutWord(fileToUse,pc) # print("This line fixes the stack overflow problem in the translated execution of RPinterpret.py") assert(end >= 0) blocks.append((fileToUse[pc:end], {})) pc = end pc += 1 return blocks ####################################### # Actual parsing functions # ######################################## # Parsing a function declaration def ParseFunc((block, dic)): """ Given a block defining a function, return the correspondant representation. There are only simple spaces. """ n = dic[0] if not (block[0] == '{' and block[n] == '}'): raise ValueError("Not a function block :\n"+ block) else: assert(n>=0) workingBlock = block[1:n] dic2 = FilterDic(dic,1,n) subBlocks = SplittingCode(workingBlock, dic2) # if len(subBlocks) != 2: raise ValueError("Only two sub-blocks expected in block :\n"+block) else: declaration, dd = subBlocks[0] # if len(dd.values()) != 2 : raise ValueError("No sub-blocks expected inside of :\n" + declaration) # end = dd[0] assert(end>=0) declareList = declaration[1:end].split(" ") if len(declareList) != 2: raise ValueError("Wrong declaration: \n" + declaration + "\nExpected form: <id> <id>") name = declareList[0] argName = declareList[1] bodyTree = ParseF1WAE(subBlocks[1]) return name, treeClass.Func(name,argName,bodyTree) # Parsing a function declaration def ParseF1WAE((block, dic)): """Parses <F1WAE>. Only simple spaces.""" if block[0] == '{': raise ValueError("Function declaration is not allowed in <F1WAE> :\n" + block) # elif block[0] == '(': lastPos = dic[0] assert(lastPos >= 0) blockContent, count = StripSpace(block[1:lastPos]) # First word in blockContent allows to identify the case head, tail = partitionSpace(blockContent) # if head == 'with': bodyWith = SplittingCode(tail, FilterDic(dic,len(head+' ')+1+count,dic[0])) if len(bodyWith) != 2: raise ValueError("Two expressions expected following keyword 'with':\n" + block) else: falseApp = ParseF1WAE(bodyWith[0]) #Same syntax as an App if not(isinstance(falseApp, treeClass.App)): raise ValueError("Wrong assignement in with block:\n" + block) else: return treeClass.With(falseApp.funName, falseApp.arg, ParseF1WAE(bodyWith[1])) # elif head == 'if': bodyWith = SplittingCode(tail, FilterDic(dic, len(head+' ')+1+count, dic[0])) length = len(bodyWith) if length == 3: # All fields requested. No 'pass' instructions in the language. cond = ParseF1WAE(bodyWith[0]) ctrue = ParseF1WAE(bodyWith[1]) cfalse = ParseF1WAE(bodyWith[2]) return treeClass.If(cond, ctrue, cfalse) else: raise ValueError("Too many (or no) instructions in 'if' block :\n"+block) # elif head[0] in ('+','-','*','/','%','='): # Treats the case were space is forgotten after operator if len(head)==1: # There is a space after the operator bodyOp = SplittingCode(tail,FilterDic(dic, len(head+' ')+1+count,dic[0])) else: # There's no space after the operator newHead = head[1:len(head)] +' ' bodyOp = SplittingCode((newHead + tail),FilterDic(dic, 1+1+count,dic[0])) if len(bodyOp) != 2: raise ValueError("Two expressions expected following operator :\n" + block) else: return treeClass.Op(head[0], ParseF1WAE(bodyOp[0]), ParseF1WAE(bodyOp[1])) # else: # An App or a parenthesized Num or Id bodyApp = SplittingCode(tail, FilterDic(dic,len(head+' ')+1+count,dic[0])) lenght = len(bodyApp) if lenght == 0: # Parenthesized Num or Id return ParseF1WAE((head, FilterDic(dic,1,dic[0]))) elif lenght == 1: #An App return treeClass.App(head, ParseF1WAE(bodyApp[0])) # else: # if IsIdentifier(block): return treeClass.Id(block) elif IsNumber(block): return treeClass.Num(int(block)) else: raise ValueError("Syntax Error in identifier :\n" + block) # Global parsing method def Parse(myFile): """ The global parsing function. """ myFile = (myFile.replace('\n',' ')).replace('\t',' ') # There are only simple spaces. Makes it easier to deal with. bracketMap = BuildAssociativity(myFile) codeInPieces = SplittingCode(myFile, bracketMap) # funcToDefine = [] prog = [] # for couple in codeInPieces: s,d = couple if s[0] == '{': funcToDefine.append((s,d)) else: prog.append((s,d)) # try: # Check that BNF is respected prog[1] raise ValueError("Only one <Prog> is allowed.") except IndexError: pass # # Create the function dictionnary funcDict = {} for funcDef in funcToDefine: name, descr = ParseFunc(funcDef) try: uselessVar = funcDict[name] raise ValueError("Function "+name+" already defined.") except KeyError: funcDict[name] = descr # # Create AST of main program ast = ParseF1WAE(prog[0]) return ast, funcDict
# <file> ::= <Def>* <Prog> <Def>* # <Prog> ::= <F1WAE> # <Def> ::= { ( <id> <id> ) # ( <F1WAE> ) } # <F1WAE> :: = <num> # | ( <op> <F1WAE> <F1WAE> ) # | ( with ( <id> <F1WAE> ) <F1WAE> ) # | <id> # | ( <id> <F1WAE>) # | ( if <F1WAE> <F1WAE> <F1WAE> ) # (if <cond> <true> <false>) True <=> !=0 # <op> ::= '+' | '-' | '*' | '/' | '%' | '=' # <num> ::= [ '0' - '9' ]+ # <id> ::= [ '_', 'a' - 'z', 'A' - 'Z'][ '_', 'a' - 'z', 'A' - 'Z', '0' -'9' ]* class File: def __init__(self, prog, funcDic): self.prog=prog #Should be a F1WAE self.funcDic=funcDic #Should be a dictionnary of keys name_of_function and values of class Func class Func: def __init__(self, name, argName, body): self.name=name # Id self.argName=argName # Id self.body=body # F1WAE class F1WAE: def __init__(self): pass class Node(F1WAE): def __init__(self): pass class Leaf(F1WAE): def __init__(self): pass class Num(Leaf): def __init__(self, n): self.n=n # Int class Id(Leaf): def __init__(self, name): self.name=name # Id class Op(Node): def __init__(self, op, lhs, rhs): self.op=op # Op self.lhs=lhs # F1WAE self.rhs=rhs # F1WAE class With(Node): def __init__(self, name, nameExpr, body): self.name=name # Id self.nameExpr=nameExpr # F1WAE self.body=body # F1WAE class App(Node): def __init__(self, funName, arg): self.funName=funName # Id, name of a function self.arg=arg # F1WAE class If(Node): def __init__(self, cond, ctrue, cfalse): self.cond=cond # Condition self.ctrue=ctrue # If condition is true self.cfalse=cfalse #If condition is false
import treeClass import parser import sys def Interpret(tree, funDict, contVar): """ Interpret the F1WAE AST given a set of defined functions. We use deferred substituion and eagerness.""" if isinstance(tree, treeClass.Num): return tree.n # elif isinstance(tree, treeClass.Op): left = Interpret(tree.lhs, funDict, contVar) right = Interpret(tree.rhs, funDict, contVar) if tree.op == '+': return left + right elif tree.op == '-': return left - right elif tree.op == '*': return left * right elif tree.op == '/': return left/right elif tree.op == '%': return left % right elif tree.op == '=': if left == right: return 1 else: return 0 else: raise ValueError("Parsing Error, symobl "+ tree.op+" shouldn't be here.") # elif isinstance(tree, treeClass.With): val = Interpret(tree.nameExpr, funDict, contVar) contVar[tree.name] = val #Eager return Interpret(tree.body, funDict, contVar) # elif isinstance(tree, treeClass.Id): try: return contVar[tree.name] except KeyError: raise ValueError("Interpret Error: free identifier :\n" + tree.name) # elif isinstance(tree, treeClass.App): try: # funDef = funDict[tree.funName] val = Interpret(tree.arg, funDict, contVar) if not isinstance(funDef, treeClass.Func): raise ValueError("Wrong Dictionnary.") newCont = {funDef.argName: val} # Eager return Interpret(funDef.body, funDict, newCont) # except KeyError: raise ValueError("Invalid function : " + tree.funName) # elif isinstance(tree, treeClass.If): condition = Interpret(tree.cond, funDict, contVar) if condition != 0: #True return Interpret(tree.ctrue, funDict, contVar) else: return Interpret(tree.cfalse, funDict, contVar) # else: # Not an <F1WAE> raise ValueError("Argument of Interpret is not a <F1WAE>:\n") def Main(file): t,d = parser.Parse(file) j = Interpret(t,d,{}) print("the answer is :" + str(j)) import os def run(fp): program_contents = "" while True: read = os.read(fp, 4096) if len(read) == 0: break program_contents += read os.close(fp) Main(program_contents) def entry_point(argv): try: filename = argv[1] except IndexError: print "You must supply a filename" return 1 run(os.open(filename, os.O_RDONLY, 0777)) return 0 def target(*args): return entry_point, None if __name__ == "__main__": entry_point(sys.argv)
test10runs10
Description: Binary data
_______________________________________________ pypy-dev mailing list pypy-dev@python.org http://mail.python.org/mailman/listinfo/pypy-dev