Author: pmichaud
Date: Wed May  4 10:18:07 2005
New Revision: 7975

Modified:
   trunk/compilers/pge/PGE/Exp.pir
   trunk/compilers/pge/PGE/P6Rule.pir
   trunk/t/p6rules/anchors.t
Log:
Added \b and \B assertions.


Modified: trunk/compilers/pge/PGE/Exp.pir
==============================================================================
--- trunk/compilers/pge/PGE/Exp.pir     (original)
+++ trunk/compilers/pge/PGE/Exp.pir     Wed May  4 10:18:07 2005
@@ -13,7 +13,7 @@
     PGE::Literal   - match a literal string
     PGE::Dot       - match any character
     PGE::CharClass - match of characters in various classes
-    PGE::Anchor    - matching of ^, ^^, $, $$ anchors
+    PGE::Anchor    - matching of ^, ^^, $, $$, \b, \B anchors
     PGE::Cut       - :: and :::
     PGE::Concat    - concatenation of expressions
     PGE::Alt       - alternations
@@ -548,19 +548,37 @@
     emit = find_global "PGE::Exp", "emit"
     emit(code, "\n  %s:", label)
     token = self["token"]
+    if token == "\\b" goto word
+    if token == "\\B" goto word
     if token == '$$' goto eos
     if token == '$' goto eos
-    emit(code, "if pos == 0 goto %s", next)
+    emit(code, "    if pos == 0 goto %s", next)
     unless token == '^^' goto end
-    emit(code, "$I0 = pos - 1")
-    emit(code, "$I1 = is_newline target, $I0")
-    emit(code, "if $I1 goto %s", next)
+    emit(code, "    $I0 = pos - 1")
+    emit(code, "    $I1 = is_newline target, $I0")
+    emit(code, "    if $I1 goto %s", next)
     goto end
   eos:
-    emit(code, "if pos == lastpos goto %s", next)
+    emit(code, "    if pos == lastpos goto %s", next)
     unless token == '$$' goto end
-    emit(code, "$I0 = is_newline target, pos")
-    emit(code, "if $I0 goto %s", next)
+    emit(code, "    $I0 = is_newline target, pos")
+    emit(code, "    if $I0 goto %s", next)
+    goto end
+  word:
+    emit(code, "    $I0 = 0")
+    emit(code, "    unless pos > 0 goto %s_1", label)
+    emit(code, "    $I2 = pos - 1")
+    emit(code, "    $I0 = is_wordchar target, $I2")
+    emit(code, "  %s_1:", label)
+    emit(code, "    $I1 = 0")
+    emit(code, "    unless pos < lastpos goto %s_2", label)
+    emit(code, "    $I1 = is_wordchar target, pos")
+    emit(code, "  %s_2:", label)
+    unless token == "\\b" goto word_1
+    emit(code, "    if $I0 != $I1 goto %s", next)
+    goto end
+  word_1:
+    emit(code, "    if $I0 == $I1 goto %s", next)
   end:
     emit(code, "goto fail")
 .end

Modified: trunk/compilers/pge/PGE/P6Rule.pir
==============================================================================
--- trunk/compilers/pge/PGE/P6Rule.pir  (original)
+++ trunk/compilers/pge/PGE/P6Rule.pir  Wed May  4 10:18:07 2005
@@ -44,6 +44,8 @@
     p6meta['^^'] = $P0
     p6meta['$'] = $P0
     p6meta['$$'] = $P0
+    p6meta["\\B"] = $P0
+    p6meta["\\b"] = $P0
     $P0 = find_global "PGE::P6Rule", "p6rule_parse_cut"
     p6meta['::'] = $P0
     p6meta[':::'] = $P0
@@ -208,7 +210,6 @@
     exp = $P0("PGE::Exp::Anchor")
     exp["token"] = token
     if token != '^' goto end
-    exp["isbos"] = 1
   end:
     .return (exp)
 .end

Modified: trunk/t/p6rules/anchors.t
==============================================================================
--- trunk/t/p6rules/anchors.t   (original)
+++ trunk/t/p6rules/anchors.t   Wed May  4 10:18:07 2005
@@ -3,6 +3,7 @@
 
 $str = q{abc
 def
+-==
 ghi};
 
 p6rule_is  ($str, '^abc', 'BOS abc');
@@ -21,3 +22,17 @@
 p6rule_is  ($str, 'ghi$', 'ghi EOS');
 p6rule_isnt($str, 'gh$', 'gh EOS');
 p6rule_isnt($str, 'de$', 'de EOS');
+
+p6rule_is  ($str, '\bdef', 'word boundary \W\w');
+p6rule_is  ($str, 'abc\b', 'word boundary \w\W');
+p6rule_is  ($str, '\babc', 'BOS word boundary');
+p6rule_is  ($str, 'ghi\b', 'EOS word boundary');
+p6rule_isnt($str, 'a\b',   '\w\w word boundary');
+p6rule_isnt($str, '-\b',   '\W\W word boundary');
+
+p6rule_isnt($str, '\Bdef', 'nonword boundary \W\w');
+p6rule_isnt($str, 'abc\B', 'nonword boundary \w\W');
+p6rule_isnt($str, '\Babc', 'BOS nonword boundary');
+p6rule_isnt($str, 'ghi\B', 'EOS nonword boundary');
+p6rule_is  ($str, 'a\B',   '\w\w nonword boundary');
+p6rule_is  ($str, '-\B',   '\W\W nonword boundary');

Reply via email to