Change 29445 by [EMAIL PROTECTED] on 2006/12/04 09:21:16

        Subject: [PATCH] \R is supposed to mean something else so switch to \g 
and make it more useful in the process
        From: demerphq <[EMAIL PROTECTED]>
        Date: Sun, 3 Dec 2006 16:55:55 +0100
        Message-ID: <[EMAIL PROTECTED]>

Affected files ...

... //depot/perl/pod/perl595delta.pod#16 edit
... //depot/perl/pod/perldiag.pod#462 edit
... //depot/perl/pod/perlre.pod#117 edit
... //depot/perl/regcomp.c#521 edit
... //depot/perl/regexec.c#499 edit
... //depot/perl/t/op/pat.t#271 edit
... //depot/perl/t/op/re_tests#111 edit

Differences ...

==== //depot/perl/pod/perl595delta.pod#16 (text) ====
Index: perl/pod/perl595delta.pod
--- perl/pod/perl595delta.pod#15~29372~ 2006-11-24 02:16:07.000000000 -0800
+++ perl/pod/perl595delta.pod   2006-12-04 01:21:16.000000000 -0800
@@ -122,8 +122,9 @@
 
 =item Relative backreferences
 
-A new syntax C<\R1> ("1" being any positive decimal integer) allows
-relative backreferencing. This should make it easier to embed patterns
+A new syntax C<\g{N}> or C<\gN> where "N" is a decimal integer allows a
+safer form of back-reference notation as well as allowing relative
+backreferences. This should make it easier to generate and embed patterns
 that contain backreferences. (Yves Orton)
 
 =back

==== //depot/perl/pod/perldiag.pod#462 (text) ====
Index: perl/pod/perldiag.pod
--- perl/pod/perldiag.pod#461~29372~    2006-11-24 02:16:07.000000000 -0800
+++ perl/pod/perldiag.pod       2006-12-04 01:21:16.000000000 -0800
@@ -3496,10 +3496,9 @@
 
 =item Reference to nonexistent or unclosed group in regex; marked by <-- HERE 
in m/%s/
 
-(F) You used something like C<\R7> in your regular expression, but there are
+(F) You used something like C<\g{-7}> in your regular expression, but there are
 not at least seven sets of closed capturing parentheses in the expression 
before
-where the C<\R7> was located. It's also possible you forgot to escape the
-backslash.
+where the C<\g{-7}> was located.
 
 The <-- HERE shows in the regular expression about where the problem was
 discovered.
@@ -4438,6 +4437,10 @@
 (F) You used a pattern of the form C<(*VERB:ARG)> but did not terminate
 the pattern with a C<)>. Fix the pattern and retry.
 
+=item Unterminated \g{...} pattern in regex; marked by <-- HERE in m/%s/
+
+(F) You missed a close brace on a \g{..} pattern (group reference) in
+a regular expression. Fix the pattern and retry.
 
 =item Unterminated <> operator
 

==== //depot/perl/pod/perlre.pod#117 (text) ====
Index: perl/pod/perlre.pod
--- perl/pod/perlre.pod#116~29413~      2006-11-29 01:30:02.000000000 -0800
+++ perl/pod/perlre.pod 2006-12-04 01:21:16.000000000 -0800
@@ -247,8 +247,9 @@
             Unsupported in lookbehind.
     \1       Backreference to a specific group.
             '1' may actually be any positive integer.
-    \R1      Relative backreference to a preceding closed group.
-            '1' may actually be any positive integer.
+    \g1      Backreference to a specific or previous group,
+    \g{-1}   number may be negative indicating a previous buffer and may
+             optionally be wrapped in curly brackets for safer parsing.
     \k<name> Named backreference
     \N{name} Named unicode character, or unicode escape
     \x12     Hexadecimal escape sequence
@@ -485,22 +486,28 @@
 before it.  And so on.  \1 through \9 are always interpreted as
 backreferences.
 
-X<relative backreference>
-In Perl 5.10 it is possible to relatively address a capture buffer by
-using the C<\RNNN> notation, where C<NNN> is negative offset to a
-preceding capture buffer. Thus C<\R1> refers to the last buffer,
-C<\R2> refers to the buffer before that. For example:
+X<\g{1}> X<\g{-1}> X<relative backreference>
+In order to provide a safer and easier way to construct patterns using
+backrefs, in Perl 5.10 the C<\g{N}> notation is provided. The curly
+brackets are optional, however omitting them is less safe as the meaning
+of the pattern can be changed by text (such as digits) following it.
+When N is a positive integer the C<\g{N}> notation is exactly equivalent
+to using normal backreferences. When N is a negative integer then it is
+a relative backreference referring to the previous N'th capturing group.
+
+Thus C<\g{-1}> refers to the last buffer, C<\g{-2}> refers to the
+buffer before that. For example:
 
         /
          (Y)            # buffer 1
          (              # buffer 2
             (X)         # buffer 3
-            \R1         # backref to buffer 3
-            \R3         # backref to buffer 1
+            \g{-1}      # backref to buffer 3
+            \g{-3}      # backref to buffer 1
          )
         /x
 
-and would match the same as C</(Y) ( (X) $3 $1 )/x>.
+and would match the same as C</(Y) ( (X) \3 \1 )/x>.
 
 Additionally, as of Perl 5.10 you may use named capture buffers and named
 backreferences. The notation is C<< (?<name>...) >> and C<< \k<name> >>
@@ -1066,10 +1073,10 @@
 
 An example of how this might be used is as follows:
 
-  /(?<NAME>(&NAME_PAT))(?<ADDR>(&ADDRESS_PAT))
+  /(?<NAME>(?&NAME_PAT))(?<ADDR>(?&ADDRESS_PAT))
    (?(DEFINE)
-     (<NAME_PAT>....)
-     (<ADRESS_PAT>....)
+     (?<NAME_PAT>....)
+     (?<ADRESS_PAT>....)
    )/x
 
 Note that capture buffers matched inside of recursion are not accessible

==== //depot/perl/regcomp.c#521 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#520~29441~   2006-12-03 10:37:15.000000000 -0800
+++ perl/regcomp.c      2006-12-04 01:21:16.000000000 -0800
@@ -4345,6 +4345,7 @@
        if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
             && data.last_start_min == 0 && data.last_end > 0
             && !RExC_seen_zerolen
+            && !(RExC_seen & REG_SEEN_VERBARG)
             && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
            r->extflags |= RXf_CHECK_ALL;
        scan_commit(pRExC_state, &data,&minlen,0);
@@ -6364,27 +6365,42 @@
        case 'c':
        case '0':
            goto defchar;
-       case 'R': 
+       case 'g': 
        case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            {
                I32 num;
-               bool isrel=(*RExC_parse=='R');
-               if (isrel)
+               bool isg = *RExC_parse == 'g';
+               bool isrel = 0; 
+               bool hasbrace = 0;
+               if (isg) {
                    RExC_parse++;
+                   if (*RExC_parse == '{') {
+                       RExC_parse++;
+                       hasbrace = 1;
+                   }
+                   if (*RExC_parse == '-') {
+                       RExC_parse++;
+                       isrel = 1;
+                   }
+               }   
                num = atoi(RExC_parse);
                 if (isrel) {
                     num = RExC_npar - num;
                     if (num < 1)
                         vFAIL("Reference to nonexistent or unclosed group");
                 }
-               if (num > 9 && num >= RExC_npar)
+               if (!isg && num > 9 && num >= RExC_npar)
                    goto defchar;
                else {
                    char * const parse_start = RExC_parse - 1; /* MJD */
                    while (isDIGIT(*RExC_parse))
                        RExC_parse++;
-
+                    if (hasbrace) {
+                        if (*RExC_parse != '}') 
+                            vFAIL("Unterminated \\g{...} pattern");
+                        RExC_parse++;
+                    }    
                    if (!SIZE_ONLY) {
                        if (num > (I32)RExC_rx->nparens)
                            vFAIL("Reference to nonexistent group");
@@ -6464,6 +6480,7 @@
                    case 'C':
                    case 'X':
                    case 'G':
+                   case 'g':
                    case 'Z':
                    case 'z':
                    case 'w':

==== //depot/perl/regexec.c#499 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#498~29394~   2006-11-27 00:02:35.000000000 -0800
+++ perl/regexec.c      2006-12-04 01:21:16.000000000 -0800
@@ -3561,6 +3561,11 @@
                PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
                PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
 
+                if (sv_yes_mark) {
+                    SV *sv_mrk = get_sv("REGMARK", 1);
+                    sv_setsv(sv_mrk, sv_yes_mark);
+                }
+
                CALLRUNOPS(aTHX);                       /* Scalar context. */
                SPAGAIN;
                if (SP == before)
@@ -4848,12 +4853,12 @@
         case SKIP:
             PL_reginput = locinput;
             if (scan->flags) {
-                /* (*CUT) : if we fail we cut here*/
+                /* (*SKIP) : if we fail we cut here*/
                 ST.mark_name = NULL;
                 ST.mark_loc = locinput;
                 PUSH_STATE_GOTO(SKIP_next,next);    
             } else {
-                /* (*CUT:NAME) : if there is a (*MARK:NAME) fail where it was, 
+                /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it 
was, 
                    otherwise do nothing.  Meaning we need to scan 
                  */
                 regmatch_state *cur = mark_state;
@@ -4869,7 +4874,7 @@
                     cur = cur->u.mark.prev_mark;
                 }
             }    
-            /* Didn't find our (*MARK:NAME) so ignore this (*CUT:NAME) */
+            /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
             break;    
        case SKIP_next_fail:
            if (ST.mark_name) {

==== //depot/perl/t/op/pat.t#271 (xtext) ====
Index: perl/t/op/pat.t
--- perl/t/op/pat.t#270~29441~  2006-12-03 10:37:15.000000000 -0800
+++ perl/t/op/pat.t     2006-12-04 01:21:16.000000000 -0800
@@ -4113,13 +4113,13 @@
 }
 {
     local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663";;
-    my $qr_barR1 = qr/(bar)\R1/;
+    my $qr_barR1 = qr/(bar)\g-1/;
     ok("foobarbarxyz" =~ $qr_barR1);
     ok("foobarbarxyz" =~ qr/foo${qr_barR1}xyz/);
     ok("foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/);
-    ok("foobarbarxyz" =~ qr/(foo)(bar)\R1xyz/);
+    ok("foobarbarxyz" =~ qr/(foo)(bar)\g{-1}xyz/);
     ok("foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/);
-    ok("foobarbarxyz" =~ qr/(foo(bar)\R1)xyz/);
+    ok("foobarbarxyz" =~ qr/(foo(bar)\g{-1})xyz/);
 } 
 {
     local $Message = "RT#41010";
@@ -4154,7 +4154,16 @@
     $doit->([EMAIL PROTECTED],@sstrs);
     $doit->([EMAIL PROTECTED],@dstrs);
 }
- 
+{
+    local $Message = "\$REGMARK";
+    our @r=();
+    ok('foofoo' =~ /foo (*MARK:foo) (?{push @r,$REGMARK}) /x);
+    iseq("@r","foo");           
+    iseq($REGMARK,"foo");
+    ok('foofoo' !~ /foo (*MARK:foo) (*FAIL) /x);
+    ok(!$REGMARK);
+    iseq($REGERROR,'foo');
+}
 # Test counter is at bottom of file. Put new tests above here.
 #-------------------------------------------------------------------
 # Keep the following tests last -- they may crash perl
@@ -4201,7 +4210,7 @@
 iseq(0+$::test,$::TestCount,"Got the right number of tests!");
 # Don't forget to update this!
 BEGIN {
-    $::TestCount = 1567; 
+    $::TestCount = 1573; 
     print "1..$::TestCount\n";
 }
 

==== //depot/perl/t/op/re_tests#111 (text) ====
Index: perl/t/op/re_tests
--- perl/t/op/re_tests#110~29420~       2006-11-29 07:26:00.000000000 -0800
+++ perl/t/op/re_tests  2006-12-04 01:21:16.000000000 -0800
@@ -1190,9 +1190,11 @@
 (a)(?:(?-1)|(?+1))(b)  abb     y       $1-$2   a-b
 (a)(?:(?-1)|(?+1))(b)  acb     n       -       -
 
-(foo)(\R2)     foofoo  y       $1-$2   foo-foo
-(foo)(\R2)(foo)(\R2)   foofoofoofoo    y       $1-$2-$3-$4     foo-foo-foo-foo
-(([abc]+) \R1)(([abc]+) \R1)   abc abccba cba  y       $2-$4   abc-cba
+(foo)(\g-2)    foofoo  y       $1-$2   foo-foo
+(foo)(\g-2)(foo)(\g-2) foofoofoofoo    y       $1-$2-$3-$4     foo-foo-foo-foo
+(([abc]+) \g-1)(([abc]+) \g{-1})       abc abccba cba  y       $2-$4   abc-cba
+(a)(b)(c)\g1\g2\g3     abcabc  y       $1$2$3  abc
+
 
 /(?'n'foo) \k<n>/      ..foo foo..     y       $1      foo
 /(?'n'foo) \k<n>/      ..foo foo..     y       $+{n}   foo
End of Patch.

Reply via email to