On Jan 5, 1:49 pm, Tim Chase <python.l...@tim.thechases.com> wrote: > vsoler wrote: > > Hence, I need toparseExcel formulas. Can I do it by means only of re > > (regular expressions)? > > > I know that for simple formulas such as "=3*A7+5" it is indeed > > possible. What about complex for formulas that include functions, > > sheet names and possibly other *.xls files? > > Where things start getting ugly is when you have nested function > calls, such as > > =if(Sum(A1:A25)>42,Min(B1:B25), if(Sum(C1:C25)>3.14, > (Min(C1:C25)+3)*18,Max(B1:B25))) > > Regular expressions don't do well with nested parens (especially > arbitrarily-nesting-depth such as are possible), so I'd suggest > going for a full-blown parsing solution like pyparsing. > > If you have fair control over what can be contained in the > formulas and you know they won't contain nested parens/functions, > you might be able to formulate some sort of "kinda, sorta, maybe > parses some forms of formulas" regexp. > > -tkc
This might give the OP a running start: from pyparsing import (CaselessKeyword, Suppress, Word, alphas, alphanums, nums, Optional, Group, oneOf, Forward, Regex, operatorPrecedence, opAssoc, dblQuotedString) test1 = "=3*A7+5" test2 = "=3*Sheet1!$A$7+5" test3 = "=if(Sum(A1:A25)>42,Min(B1:B25), " \ "if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25)))" EQ,EXCL,LPAR,RPAR,COLON,COMMA,DOLLAR = map(Suppress, '=!():,$') sheetRef = Word(alphas, alphanums) colRef = Optional(DOLLAR) + Word(alphas,max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row")) cellRange = (Group(cellRef("start") + COLON + cellRef("end")) ("range") | cellRef ) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + expr("if_true") + COMMA + expr("if_false") + RPAR) statFunc = lambda name : CaselessKeyword(name) + LPAR + cellRange + RPAR sumFunc = statFunc("sum") minFunc = statFunc("min") maxFunc = statFunc("max") aveFunc = statFunc("ave") funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc multOp = oneOf("* /") addOp = oneOf("+ -") numericLiteral = Regex(r"\-?\d+(\.\d+)?") operand = numericLiteral | funcCall | cellRange | cellRef arithExpr = operatorPrecedence(operand, [ (multOp, 2, opAssoc.LEFT), (addOp, 2, opAssoc.LEFT), ]) textOperand = dblQuotedString | cellRef textExpr = operatorPrecedence(textOperand, [ ('&', 2, opAssoc.LEFT), ]) expr << (arithExpr | textExpr) import pprint for test in (test1,test2, test3): print test pprint.pprint( (EQ + expr).parseString(test).asList() ) print Prints: =3*A7+5 [[['3', '*', ['A', '7']], '+', '5']] =3*Sheet1!$A$7+5 [[['3', '*', ['Sheet1', 'A', '7']], '+', '5']] =if(Sum(A1:A25)>42,Min(B1:B25), if(Sum(C1:C25)>3.14, (Min(C1:C25)+3) *18,Max(B1:B25))) ['if', ['sum', [['A', '1'], ['A', '25']], '>', '42'], 'min', [['B', '1'], ['B', '25']], 'if', ['sum', [['C', '1'], ['C', '25']], '>', '3.14'], [['min', [['C', '1'], ['C', '25']], '+', '3'], '*', '18'], 'max', [['B', '1'], ['B', '25']]] -- Paul -- http://mail.python.org/mailman/listinfo/python-list