Author: pmichaud
Date: Wed May 4 10:18:07 2005
New Revision: 7975
Modified:
trunk/compilers/pge/PGE/Exp.pir
trunk/compilers/pge/PGE/P6Rule.pir
trunk/t/p6rules/anchors.t
Log:
Added \b and \B assertions.
Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir (original)
+++ trunk/compilers/pge/PGE/Exp.pir Wed May 4 10:18:07 2005
@@ -13,7 +13,7 @@
PGE::Literal - match a literal string
PGE::Dot - match any character
PGE::CharClass - match of characters in various classes
- PGE::Anchor - matching of ^, ^^, $, $$ anchors
+ PGE::Anchor - matching of ^, ^^, $, $$, \b, \B anchors
PGE::Cut - :: and :::
PGE::Concat - concatenation of expressions
PGE::Alt - alternations
@@ -548,19 +548,37 @@
emit = find_global "PGE::Exp", "emit"
emit(code, "\n %s:", label)
token = self["token"]
+ if token == "\\b" goto word
+ if token == "\\B" goto word
if token == '$$' goto eos
if token == '$' goto eos
- emit(code, "if pos == 0 goto %s", next)
+ emit(code, " if pos == 0 goto %s", next)
unless token == '^^' goto end
- emit(code, "$I0 = pos - 1")
- emit(code, "$I1 = is_newline target, $I0")
- emit(code, "if $I1 goto %s", next)
+ emit(code, " $I0 = pos - 1")
+ emit(code, " $I1 = is_newline target, $I0")
+ emit(code, " if $I1 goto %s", next)
goto end
eos:
- emit(code, "if pos == lastpos goto %s", next)
+ emit(code, " if pos == lastpos goto %s", next)
unless token == '$$' goto end
- emit(code, "$I0 = is_newline target, pos")
- emit(code, "if $I0 goto %s", next)
+ emit(code, " $I0 = is_newline target, pos")
+ emit(code, " if $I0 goto %s", next)
+ goto end
+ word:
+ emit(code, " $I0 = 0")
+ emit(code, " unless pos > 0 goto %s_1", label)
+ emit(code, " $I2 = pos - 1")
+ emit(code, " $I0 = is_wordchar target, $I2")
+ emit(code, " %s_1:", label)
+ emit(code, " $I1 = 0")
+ emit(code, " unless pos < lastpos goto %s_2", label)
+ emit(code, " $I1 = is_wordchar target, pos")
+ emit(code, " %s_2:", label)
+ unless token == "\\b" goto word_1
+ emit(code, " if $I0 != $I1 goto %s", next)
+ goto end
+ word_1:
+ emit(code, " if $I0 == $I1 goto %s", next)
end:
emit(code, "goto fail")
.end
Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir Wed May 4 10:18:07 2005
@@ -44,6 +44,8 @@
p6meta['^^'] = $P0
p6meta['$'] = $P0
p6meta['$$'] = $P0
+ p6meta["\\B"] = $P0
+ p6meta["\\b"] = $P0
$P0 = find_global "PGE::P6Rule", "p6rule_parse_cut"
p6meta['::'] = $P0
p6meta[':::'] = $P0
@@ -208,7 +210,6 @@
exp = $P0("PGE::Exp::Anchor")
exp["token"] = token
if token != '^' goto end
- exp["isbos"] = 1
end:
.return (exp)
.end
Modified: trunk/t/p6rules/anchors.t
==============================================================================
--- trunk/t/p6rules/anchors.t (original)
+++ trunk/t/p6rules/anchors.t Wed May 4 10:18:07 2005
@@ -3,6 +3,7 @@
$str = q{abc
def
+-==
ghi};
p6rule_is ($str, '^abc', 'BOS abc');
@@ -21,3 +22,17 @@
p6rule_is ($str, 'ghi$', 'ghi EOS');
p6rule_isnt($str, 'gh$', 'gh EOS');
p6rule_isnt($str, 'de$', 'de EOS');
+
+p6rule_is ($str, '\bdef', 'word boundary \W\w');
+p6rule_is ($str, 'abc\b', 'word boundary \w\W');
+p6rule_is ($str, '\babc', 'BOS word boundary');
+p6rule_is ($str, 'ghi\b', 'EOS word boundary');
+p6rule_isnt($str, 'a\b', '\w\w word boundary');
+p6rule_isnt($str, '-\b', '\W\W word boundary');
+
+p6rule_isnt($str, '\Bdef', 'nonword boundary \W\w');
+p6rule_isnt($str, 'abc\B', 'nonword boundary \w\W');
+p6rule_isnt($str, '\Babc', 'BOS nonword boundary');
+p6rule_isnt($str, 'ghi\B', 'EOS nonword boundary');
+p6rule_is ($str, 'a\B', '\w\w nonword boundary');
+p6rule_is ($str, '-\B', '\W\W nonword boundary');