Author: fperrad
Date: Sun Mar 4 03:34:29 2007
New Revision: 17325
Modified:
trunk/languages/lua/lib/luaregex.pir
trunk/languages/lua/lib/luastring.pir
trunk/languages/lua/t/rx_metachars
Log:
[Lua]
- improve regex
Modified: trunk/languages/lua/lib/luaregex.pir
==============================================================================
--- trunk/languages/lua/lib/luaregex.pir (original)
+++ trunk/languages/lua/lib/luaregex.pir Sun Mar 4 03:34:29 2007
@@ -184,16 +184,19 @@
=cut
-.sub "__onload" :load
+.sub '__onload' :load
load_bytecode 'PGE.pbc'
- $P0 = getclass "PGE::Exp::CCShortcut"
- $P1 = subclass $P0, "PGE::Exp::LuaCCShortcut"
+ $P0 = getclass 'PGE::Exp::CCShortcut'
+ $P1 = subclass $P0, 'PGE::Exp::LuaCCShortcut'
+
+ $P0 = getclass 'PGE::Exp'
+ $P1 = subclass $P0, 'PGE::Exp::LuaBalanced'
.end
-.namespace [ "PGE::LuaRegex" ]
+.namespace [ 'PGE::LuaRegex' ]
-.sub "compile_luaregex"
+.sub 'compile_luaregex'
.param pmc source
.param pmc adverbs :slurpy :named
@@ -211,7 +214,7 @@
target = downcase target
.local pmc match
- $P0 = get_global "luaregex"
+ $P0 = get_global 'luaregex'
match = $P0(source)
if target != 'parse' goto check
.return (match)
@@ -235,26 +238,26 @@
.end
-.sub "luaregex"
+.sub 'luaregex'
.param pmc mob
.local pmc optable
- optable = get_hll_global ["PGE::LuaRegex"], "$optable"
- $P0 = optable."parse"(mob)
+ optable = get_hll_global ['PGE::LuaRegex'], '$optable'
+ $P0 = optable.'parse'(mob)
.return ($P0)
.end
-.include "cclass.pasm"
+.include 'cclass.pasm'
-.sub "__onload" :load
+.sub '__onload' :load
.local pmc optable
- $I0 = find_type "PGE::OPTable"
+ $I0 = find_type 'PGE::OPTable'
optable = new $I0
- set_hll_global ["PGE::LuaRegex"], "$optable", optable
+ set_hll_global ['PGE::LuaRegex'], '$optable', optable
- $P0 = get_hll_global ["PGE::LuaRegex"], "parse_lit"
+ $P0 = get_hll_global ['PGE::LuaRegex'], 'parse_lit'
optable.newtok('term:', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
optable.newtok('term:^', 'equiv'=>'term:', 'nows'=>1,
'match'=>'PGE::Exp::Anchor')
@@ -278,14 +281,18 @@
optable.newtok('term:%W', 'equiv'=>'term:', 'nows'=>1,
'match'=>'PGE::Exp::LuaCCShortcut')
optable.newtok('term:%x', 'equiv'=>'term:', 'nows'=>1,
'match'=>'PGE::Exp::LuaCCShortcut')
optable.newtok('term:%X', 'equiv'=>'term:', 'nows'=>1,
'match'=>'PGE::Exp::LuaCCShortcut')
-# optable.newtok('term:%z', 'equiv'=>'term:', 'nows'=>1,
'match'=>'PGE::Exp::LuaCCShortcut')
-# optable.newtok('term:%Z', 'equiv'=>'term:', 'nows'=>1,
'match'=>'PGE::Exp::LuaCCShortcut')
optable.newtok('circumfix:( )', 'equiv'=>'term:', 'nows'=>1,
'nullterm'=>1, 'match'=>'PGE::Exp::CGroup')
$P0 = get_hll_global ['PGE::LuaRegex'], 'parse_enumclass'
optable.newtok('term:[', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
+ $P0 = get_hll_global ['PGE::LuaRegex'], 'parse_enumclass2'
optable.newtok('term:.', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
+ optable.newtok('term:%z', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
+ optable.newtok('term:%Z', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
+
+ $P0 = get_hll_global ['PGE::LuaRegex'], 'parse_balanced'
+ optable.newtok('term:%b', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
$P0 = get_hll_global ['PGE::LuaRegex'], 'parse_quant'
optable.newtok('postfix:*', 'looser'=>'term:', 'left'=>1, 'nows'=>1,
'parsed'=>$P0)
@@ -295,8 +302,8 @@
optable.newtok('infix:', 'looser'=>'postfix:*', 'right'=>1, 'nows'=>1,
'match'=>'PGE::Exp::Concat')
- $P0 = get_hll_global ["PGE::LuaRegex"], "compile_luaregex"
- compreg "PGE::LuaRegex", $P0
+ $P0 = get_hll_global ['PGE::LuaRegex'], 'compile_luaregex'
+ compreg 'PGE::LuaRegex', $P0
.end
@@ -324,22 +331,18 @@
.end
-.sub "parse_lit"
+.sub 'parse_lit'
.param pmc mob
.local pmc newfrom
.local string target
.local int pos, lastpos
.local int litstart, litlen
.local string initchar
- newfrom = get_hll_global ["PGE::Match"], "newfrom"
- (mob, target, $P0, $P1) = newfrom(mob, 0, "PGE::Exp::Literal")
+ newfrom = get_hll_global ['PGE::Match'], 'newfrom'
+ (mob, target, $P0, $P1) = newfrom(mob, 0, 'PGE::Exp::Literal')
pos = $P0
lastpos = length target
initchar = substr target, pos, 1
- unless initchar == '*' goto initchar_ok
- parse_error(mob, pos, "Quantifier follows nothing")
-
- initchar_ok:
if initchar == ')' goto end
inc pos
term_percent:
@@ -357,7 +360,7 @@
if pos <= lastpos goto term_backslash_ok
parse_error(mob, pos, "Search pattern not terminated")
term_backslash_ok:
- $I0 = index "abfnrtv", initchar
+ $I0 = index 'abfnrtv', initchar
if $I0 < 0 goto term_literal
initchar = substr "\a\b\f\n\r\t\x0b", $I0, 1
term_literal:
@@ -366,7 +369,7 @@
term_literal_loop:
if pos >= lastpos goto term_literal_end
$S0 = substr target, pos, 1
- $I0 = index "^$()%.[]*+-?", $S0
+ $I0 = index '()%.[]*+-?', $S0
# if not in circumfix:( ) throw error on end paren
if $I0 >= 0 goto term_literal_end
inc pos
@@ -391,7 +394,7 @@
.const int PGE_BACKTRACK_GREEDY = 1
.const int PGE_BACKTRACK_EAGER = 2
-.sub "parse_quant"
+.sub 'parse_quant'
.param pmc mob
.local string target
.local int min, max, backtrack
@@ -399,8 +402,8 @@
.local pmc mfrom, mpos
.local string key
key = mob['KEY']
- $P0 = get_hll_global ["PGE::Match"], "newfrom"
- (mob, target, mfrom, mpos) = $P0(mob, 0, "PGE::Exp::Quant")
+ $P0 = get_hll_global ['PGE::Match'], 'newfrom'
+ (mob, target, mfrom, mpos) = $P0(mob, 0, 'PGE::Exp::Quant')
pos = mfrom
lastpos = length target
min = 0
@@ -409,23 +412,21 @@
if key != '+' goto quant_max
min = 1
quant_max:
- if key != "?" goto quant_eager
+ if key != '?' goto quant_eager
max = 1
quant_eager:
- if key != "-" goto end
+ if key != '-' goto end
backtrack = PGE_BACKTRACK_EAGER
end:
- mob["min"] = min
- mob["max"] = max
- mob["backtrack"] = backtrack
+ mob['min'] = min
+ mob['max'] = max
+ mob['backtrack'] = backtrack
mpos = pos
.return (mob)
- err_range:
- parse_error(mob, pos, "Error in quantified range")
.end
-.sub "parse_enumclass"
+.sub 'parse_enumclass'
.param pmc mob
.local string target
.local pmc mfrom, mpos
@@ -434,17 +435,16 @@
.local string charlist
.local string key
key = mob['KEY']
- $P0 = get_hll_global ["PGE::Match"], "newfrom"
- (mob, target, mfrom, mpos) = $P0(mob, 0, "PGE::Exp::EnumCharList")
+ $P0 = get_hll_global ['PGE::Match'], 'newfrom'
+ (mob, target, mfrom, mpos) = $P0(mob, 0, 'PGE::Exp::EnumCharList')
pos = mfrom
- if key == '.' goto dot
lastpos = length target
- charlist = ""
- mob["isnegated"] = 0
+ charlist = ''
+ mob['isnegated'] = 0
isrange = 0
$S0 = substr target, pos, 1
- if $S0 != "^" goto scan_first
- mob["isnegated"] = 1
+ if $S0 != '^' goto scan_first
+ mob['isnegated'] = 1
inc pos
scan_first:
if pos >= lastpos goto err_close
@@ -456,13 +456,13 @@
if pos >= lastpos goto err_close
$S0 = substr target, pos, 1
inc pos
- if $S0 == "]" goto endclass
- if $S0 == "-" goto hyphenrange
+ if $S0 == ']' goto endclass
+ if $S0 == '-' goto hyphenrange
if $S0 != "\\" goto addchar
backslash:
$S0 = substr target, pos, 1
inc pos
- $I0 = index "nrtfae0b", $S0
+ $I0 = index 'nrtfae0b', $S0
if $I0 == -1 goto addchar
$S0 = substr "\n\r\t\f\a\e\0\b", $I0, 1
addchar:
@@ -486,18 +486,14 @@
goto scan
endclass:
if isrange == 0 goto end
- charlist .= "-"
- goto end
- dot:
- charlist = "\n"
- mob["isnegated"] = 1
+ charlist .= '-'
end:
mpos = pos
mob.'result_object'(charlist)
.return (mob)
err_close:
- parse_error(mob, pos, "Unmatched [")
+ parse_error(mob, pos, "malformed pattern (missing ']')")
err_range:
$S0 = 'Invalid [] range "'
$S1 = chr $I2
@@ -510,9 +506,58 @@
.end
-.namespace [ "PGE::Exp" ]
+.sub 'parse_enumclass2'
+ .param pmc mob
+ .local string target
+ .local pmc mfrom, mpos
+ .local int pos
+ .local string charlist
+ .local string key
+ key = mob['KEY']
+ $P0 = get_hll_global ['PGE::Match'], 'newfrom'
+ (mob, target, mfrom, mpos) = $P0(mob, 0, 'PGE::Exp::EnumCharList')
+ pos = mfrom
+ unless key == '.' goto zero
+ charlist = ''
+ mob['isnegated'] = 1
+ goto end
+ zero:
+ charlist = "\0"
+ mob['isnegated'] = 0
+ unless key == '%Z' goto end
+ mob['isnegated'] = 1
+ end:
+ mpos = pos
+ mob.'result_object'(charlist)
+ .return (mob)
+.end
+
+
+.sub 'parse_balanced'
+ .param pmc mob
+ .local string target
+ .local pmc mfrom, mpos
+ .local int pos, lastpos
+ .local string xy
+ $P0 = get_hll_global ['PGE::Match'], 'newfrom'
+ (mob, target, mfrom, mpos) = $P0(mob, 0, 'PGE::Exp::LuaBalanced')
+ pos = mfrom
+ lastpos = length target
+ if lastpos < 2 goto err
+ xy = substr target, pos, 2
+ pos += 2
+ mpos = pos
+ mob.'result_object'(xy)
+ .return (mob)
+
+ err:
+ parse_error(mob, pos, "unbalanced pattern")
+.end
+
+
+.namespace [ 'PGE::Exp' ]
-.sub "luaanalyze" :method
+.sub 'luaanalyze' :method
.param pmc pad
.local pmc exp
$I0 = 0
@@ -520,7 +565,7 @@
$I1 = defined self[$I0]
if $I1 == 0 goto end
$P0 = self[$I0]
- $P0 = $P0."luaanalyze"(pad)
+ $P0 = $P0.'luaanalyze'(pad)
self[$I0] = $P0
inc $I0
goto loop
@@ -528,28 +573,30 @@
.return (self)
.end
-.namespace [ "PGE::Exp::CGroup" ]
-.sub "luaanalyze" :method
+.namespace [ 'PGE::Exp::CGroup' ]
+
+.sub 'luaanalyze' :method
.param pmc pad
.local pmc exp
- self["iscapture"] = 0
- if self != "(" goto end
- self["iscapture"] = 1
- self["isscope"] = 0
- self["isarray"] = 0
- $I0 = pad["subpats"]
- self["cname"] = $I0
+ self['iscapture'] = 0
+ if self != '(' goto end
+ self['iscapture'] = 1
+ self['isscope'] = 0
+ self['isarray'] = 0
+ $I0 = pad['subpats']
+ self['cname'] = $I0
inc $I0
- pad["subpats"] = $I0
+ pad['subpats'] = $I0
end:
exp = self[0]
- exp = exp."luaanalyze"(pad)
+ exp = exp.'luaanalyze'(pad)
self[0] = exp
.return (self)
.end
+
.namespace [ 'PGE::Exp::LuaCCShortcut' ]
.sub 'reduce' :method
@@ -567,7 +614,6 @@
if token == '%U' goto upper
if token == '%W' goto word
if token == '%X' goto hexa
-# if token == '%Z' goto z
self['negate'] = 0
if token == '%a' goto letter
if token == '%c' goto ctrl
@@ -578,7 +624,6 @@
if token == '%u' goto upper
if token == '%w' goto word
if token == '%x' goto hexa
-# if token == '%z' goto z
self['cclass'] = .CCLASS_ANY
goto end
letter:
@@ -611,3 +656,29 @@
.return (self)
.end
+
+.namespace [ 'PGE::Exp::LuaBalanced' ]
+
+.sub 'reduce' :method
+ .param pmc next
+ .return (self)
+.end
+
+.sub 'pir' :method
+ .param pmc code
+ .param string label
+ .param string next
+
+ .local string x, y
+ $S0 = self
+ x = substr $S0, 0, 1
+ y = substr $S0, 1, 1
+
+ # TODO
+ code.emit(<<" CODE", label, $S0, next)
+ %0: # balanced %1
+ goto %2
+ CODE
+ .return ()
+.end
+
Modified: trunk/languages/lua/lib/luastring.pir
==============================================================================
--- trunk/languages/lua/lib/luastring.pir (original)
+++ trunk/languages/lua/lib/luastring.pir Sun Mar 4 03:34:29 2007
@@ -360,6 +360,11 @@
unless $I0 < $I1 goto L3
$P0 = capts[$I0]
$S0 = $P0.'text'()
+ $I2 = index $S0, "\0"
+ if $I2 < 0 goto L4
+ # sorry, strictly compatible
+ $S0 = substr $S0, 0, $I2
+L4:
new $P1, .LuaString
set $P1, $S0
ret[$I0] = $P1
@@ -368,13 +373,18 @@
L3:
.return (ret)
L1:
- unless whole == 1 goto L4
+ unless whole == 1 goto L5
set ret, 1
$S0 = match.'text'()
+ $I2 = index $S0, "\0"
+ if $I2 < 0 goto L6
+ # sorry, strictly compatible
+ $S0 = substr $S0, 0, $I2
+L6:
new $P1, .LuaString
set $P1, $S0
ret[0] = $P1
-L4:
+L5:
.return (ret)
.end
@@ -819,6 +829,11 @@
$P0 = match.'get_array'()
$P1 = $P0[i]
$S0 = $P1.'text'()
+ $I0 = index $S0, "\0"
+ if $I0 < 0 goto L1
+ # sorry, strictly compatible
+ $S0 = substr $S0, 0, $I0
+L1:
.return ($S0)
_handler:
error("invalid capture index")
Modified: trunk/languages/lua/t/rx_metachars
==============================================================================
--- trunk/languages/lua/t/rx_metachars (original)
+++ trunk/languages/lua/t/rx_metachars Sun Mar 4 03:34:29 2007
@@ -54,6 +54,7 @@
a%[d a[d a[d escaped
a%]d a]d a]d escaped
a%*d a*d a*d escaped
+*ad *ad *ad not escaped
a%+d a+d a+d escaped
a%-d a-d a-d escaped
a%?d a?d a?d escaped