[perl.git] branch blead, updated. GitLive-blead-1389-ge061ec8

Rafael Garcia-Suarez Thu, 18 Jun 2009 02:21:21 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/e061ec81dcb60a85554e31a39475b89528de42f0?hp=5ad5b34cb2af84d4f37219a5dee752fca0459151>


- Log -----------------------------------------------------------------
commit e061ec81dcb60a85554e31a39475b89528de42f0
Merge: 5ad5b34... 71e9c53...
Author: Rafael Garcia-Suarez <[email protected]>
Date:   Thu Jun 18 11:20:53 2009 +0200

    Merge branch 'backslash-N' into blead

commit 71e9c5323d288a57ab4e6570f3aee42167c6d5e7
Author: Rafael Garcia-Suarez <[email protected]>
Date:   Wed Jun 17 18:54:41 2009 +0200

    Add perldelta entry for \N

M       pod/perl5110delta.pod

commit cbf4a1297456c507afef540d31b7fadad2d72425
Author: Rafael Garcia-Suarez <[email protected]>
Date:   Wed Jun 17 18:50:00 2009 +0200

    Some more tests for \N

M       t/op/re_tests

commit c741660aac46f1784b5ef16aeab5e4958a91df78
Author: Rafael Garcia-Suarez <[email protected]>
Date:   Tue Jun 16 08:40:50 2009 +0200

    Basic docs for \N

M       pod/perlre.pod
M       pod/perlrebackslash.pod
M       pod/perlrecharclass.pod
M       pod/perlreref.pod
M       pod/perltodo.pod

commit afefe6bfcf9956c77e5f9eee351e3d13be12ea3b
Author: Rafael Garcia-Suarez <[email protected]>
Date:   Tue Jun 16 08:27:23 2009 +0200

    Implement new regex escape \N
    
    \N, like in Perl 6, is equivalent to . but not influenced by /s.
    It matches any character except \n. Note that followed by { and
    a non-number, \N is still a named character.

M       embed.fnc
M       embed.h
M       proto.h
M       regcomp.c
M       t/op/re_tests
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc               |    2 +-
 embed.h                 |    2 +-
 pod/perl5110delta.pod   |    8 ++++++++
 pod/perlre.pod          |    1 +
 pod/perlrebackslash.pod |    1 +
 pod/perlrecharclass.pod |   10 ++++++++--
 pod/perlreref.pod       |    1 +
 pod/perltodo.pod        |    6 ------
 proto.h                 |    2 +-
 regcomp.c               |   32 +++++++++++++++++++++++---------
 t/op/re_tests           |   22 +++++++++++++++++++++-
 11 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 68f3817..439203c 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1593,7 +1593,7 @@ Es        |UV     |reg_recode     |const char value|NN SV 
**encp
 Es     |regnode*|regpiece      |NN struct RExC_state_t *pRExC_state \
                                |NN I32 *flagp|U32 depth
 Es     |regnode*|reg_namedseq  |NN struct RExC_state_t *pRExC_state \
-                               |NULLOK UV *valuep
+                               |NULLOK UV *valuep|NULLOK I32 *flagp
 Es     |void   |reginsert      |NN struct RExC_state_t *pRExC_state \
                                |U8 op|NN regnode *opnd|U32 depth
 Es     |void   |regtail        |NN struct RExC_state_t *pRExC_state \
diff --git a/embed.h b/embed.h
index e320dc5..9af17f6 100644
--- a/embed.h
+++ b/embed.h
@@ -3756,7 +3756,7 @@
 #define reg_node(a,b)          S_reg_node(aTHX_ a,b)
 #define reg_recode(a,b)                S_reg_recode(aTHX_ a,b)
 #define regpiece(a,b,c)                S_regpiece(aTHX_ a,b,c)
-#define reg_namedseq(a,b)      S_reg_namedseq(aTHX_ a,b)
+#define reg_namedseq(a,b,c)    S_reg_namedseq(aTHX_ a,b,c)
 #define reginsert(a,b,c,d)     S_reginsert(aTHX_ a,b,c,d)
 #define regtail(a,b,c,d)       S_regtail(aTHX_ a,b,c,d)
 #define reg_scan_name(a,b)     S_reg_scan_name(aTHX_ a,b)
diff --git a/pod/perl5110delta.pod b/pod/perl5110delta.pod
index afd1b35..720b5d2 100644
--- a/pod/perl5110delta.pod
+++ b/pod/perl5110delta.pod
@@ -109,6 +109,14 @@ to avoid relying on the object's underlying structure).
 This pragma allows you to lexically disable or enable overloading
 for some or all operations. (Yuval Kogman)
 
+=head2 C<\N> regex escape
+
+A new regex escape has been added, C<\N>. It will match any character that
+is not a newline, independently from the presence or absence of the single
+line match modifier C</s>. (If C<\N> is followed by an opening brace and
+by a letter, perl will still assume that a Unicode character name is
+coming, so compatibility is preserved.) (Rafael Garcia-Suarez)
+
 =head1 Modules and Pragmata
 
 =head2 Pragmata Changes
diff --git a/pod/perlre.pod b/pod/perlre.pod
index a076d3a..ee1c2cb 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -271,6 +271,7 @@ X<word> X<whitespace> X<character class> X<backreference>
     \g{name} Named backreference
     \k<name> Named backreference
     \K       Keep the stuff left of the \K, don't include it in $&
+    \N       Any character but \n
     \v       Vertical whitespace
     \V       Not vertical whitespace
     \h       Horizontal whitespace
diff --git a/pod/perlrebackslash.pod b/pod/perlrebackslash.pod
index ddd7abe..40f73fc 100644
--- a/pod/perlrebackslash.pod
+++ b/pod/perlrebackslash.pod
@@ -83,6 +83,7 @@ quoted constructs>.
  \l                Lowercase next character.
  \L                Lowercase till \E.
  \n                (Logical) newline character.
+ \N                Any character but newline.
  \N{}              Named (Unicode) character.
  \p{}, \pP         Character with a Unicode property.
  \P{}, \PP         Character without a Unicode property.
diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod
index a626dd9..930c0fc 100644
--- a/pod/perlrecharclass.pod
+++ b/pod/perlrecharclass.pod
@@ -38,7 +38,6 @@ Here are some examples:
  "\n" =~  /(?s:.)/  # Match (local 'single line' modifier)
  "ab" =~  /^.$/     # No match (dot matches one character)
 
-
 =head2 Backslashed sequences
 
 Perl regular expressions contain many backslashed sequences that
@@ -59,6 +58,7 @@ more detail below.
  \S             Match a non-white space character.
  \h             Match a horizontal white space character.
  \H             Match a character that isn't horizontal white space.
+ \N             Match a character that isn't newline.
  \v             Match a vertical white space character.
  \V             Match a character that isn't vertical white space.
  \pP, \p{Prop}  Match a character matching a Unicode property.
@@ -94,7 +94,7 @@ Any character that isn't matched by C<\w> will be matched by 
C<\W>.
 
 =head3 White space
 
-C<\s> matches any single character that is consider white space. In the
+C<\s> matches any single character that is considered white space. In the
 ASCII range, C<\s> matches the horizontal tab (C<\t>), the new line
 (C<\n>), the form feed (C<\f>), the carriage return (C<\r>), and the
 space (the vertical tab, C<\cK> is not matched by C<\s>).  The exact set
@@ -113,6 +113,12 @@ C<\h> will match any character that is considered 
horizontal white space;
 this includes the space and the tab characters. C<\H> will match any character
 that is not considered horizontal white space.
 
+C<\N>, like the dot, will match any character that is not a newline. The
+difference is that C<\N> will not be influenced by the single line C</s>
+regular expression modifier. (Note that, since C<\N{}> is also used for
+Unicode named characters, if C<\N> is followed by an opening brace and
+by a letter, perl will assume that a Unicode character name is coming.)
+
 C<\v> will match any character that is considered vertical white space;
 this includes the carriage return and line feed characters (newline).
 C<\V> will match any character that is not considered vertical white space.
diff --git a/pod/perlreref.pod b/pod/perlreref.pod
index b9fb3b0..87baab2 100644
--- a/pod/perlreref.pod
+++ b/pod/perlreref.pod
@@ -125,6 +125,7 @@ and L<perlunicode> for details.
    \S      A non-whitespace character
    \h      An horizontal white space
    \H      A non horizontal white space
+   \N      A non newline (like . without /s)
    \v      A vertical white space
    \V      A non vertical white space
    \R      A generic newline           (?>\v|\x0D\x0A)
diff --git a/pod/perltodo.pod b/pod/perltodo.pod
index cf0304a..c84b51e 100644
--- a/pod/perltodo.pod
+++ b/pod/perltodo.pod
@@ -1101,12 +1101,6 @@ combines the code in pp_entersub, pp_leavesub.  This 
should probably
 be done 1st in XS, and using B::Generate to patch the new OP into the
 optrees.
 
-=head2 C<\N>
-
-It should be possible to add a C<\N> regex assertion, meaning "every
-character except C<\n>Â° independently of the context. That would
-of course imply that C<\N> couldn't be followed by an opening C<{>.
-
 =head1 Big projects
 
 Tasks that will get your name mentioned in the description of the "Highlights
diff --git a/proto.h b/proto.h
index 78f17dd..285e05f 100644
--- a/proto.h
+++ b/proto.h
@@ -5178,7 +5178,7 @@ STATIC regnode*   S_regpiece(pTHX_ struct RExC_state_t 
*pRExC_state, I32 *flagp, U
 #define PERL_ARGS_ASSERT_REGPIECE      \
        assert(pRExC_state); assert(flagp)
 
-STATIC regnode*        S_reg_namedseq(pTHX_ struct RExC_state_t *pRExC_state, 
UV *valuep)
+STATIC regnode*        S_reg_namedseq(pTHX_ struct RExC_state_t *pRExC_state, 
UV *valuep, I32 *flagp)
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_REG_NAMEDSEQ  \
        assert(pRExC_state)
diff --git a/regcomp.c b/regcomp.c
index e061528..bc7086f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6553,7 +6553,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth)
 /* reg_namedseq(pRExC_state,UVp)
    
    This is expected to be called by a parser routine that has 
-   recognized'\N' and needs to handle the rest. RExC_parse is 
+   recognized '\N' and needs to handle the rest. RExC_parse is
    expected to point at the first char following the N at the time
    of the call.
    
@@ -6567,11 +6567,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth)
    be returned to indicate failure. (This will NOT be a valid pointer 
    to a regnode.)
    
-   If value is null then it is assumed that we are parsing normal text
+   If valuep is null then it is assumed that we are parsing normal text
    and inserts a new EXACT node into the program containing the resolved
    string and returns a pointer to the new node. If the string is 
    zerolength a NOTHING node is emitted.
-   
+
    On success RExC_parse is set to the char following the endbrace.
    Parsing failures will generate a fatal errorvia vFAIL(...)
    
@@ -6585,7 +6585,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth)
    
  */
 STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) 
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
 {
     char * name;        /* start of the content of the name */
     char * endbrace;    /* endbrace following the name */
@@ -6597,8 +6597,22 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV 
*valuep)
  
     PERL_ARGS_ASSERT_REG_NAMEDSEQ;
    
-    if (*RExC_parse != '{') {
-        vFAIL("Missing braces on \\N{}");
+    if (*RExC_parse != '{' ||
+           (*RExC_parse == '{' && RExC_parse[1]
+            && strchr("0123456789", RExC_parse[1])))
+    {
+       GET_RE_DEBUG_FLAGS_DECL;
+       if (valuep)
+           /* no bare \N in a charclass */
+           vFAIL("Missing braces on \\N{}");
+       GET_RE_DEBUG_FLAGS;
+       nextchar(pRExC_state);
+       ret = reg_node(pRExC_state, REG_ANY);
+       *flagp |= HASWIDTH|SIMPLE;
+       RExC_naughty++;
+       RExC_parse--;
+        Set_Node_Length(ret, 1); /* MJD */
+       return ret;
     }
     name = RExC_parse+1;
     endbrace = strchr(RExC_parse, '}');
@@ -7159,12 +7173,12 @@ tryagain:
            }
            break;
         case 'N': 
-            /* Handle \N{NAME} here and not below because it can be 
+            /* Handle \N and \N{NAME} here and not below because it can be
             multicharacter. join_exact() will join them up later on. 
             Also this makes sure that things like /\N{BLAH}+/ and 
             \N{BLAH} being multi char Just Happen. dmq*/
             ++RExC_parse;
-            ret= reg_namedseq(pRExC_state, NULL); 
+            ret= reg_namedseq(pRExC_state, NULL, flagp); 
             break;
        case 'k':    /* Handle \k<NAME> and \k'NAME' */
        parse_named_seq:
@@ -7964,7 +7978,7 @@ parseit:
                     from earlier versions, OTOH that behaviour was broken
                     as well. */
                     UV v; /* value is register so we cant & it /grrr */
-                    if (reg_namedseq(pRExC_state, &v)) {
+                    if (reg_namedseq(pRExC_state, &v, NULL)) {
                         goto parseit;
                     }
                     value= v; 
diff --git a/t/op/re_tests b/t/op/re_tests
index f9b070d..0c04840 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -31,6 +31,12 @@ ab*bc        abbbbc  y       $+[0]   6
 .{3,4} abbbbc  y       $&      abbb
 .{3,4} abbbbc  y       $-[0]   0
 .{3,4} abbbbc  y       $+[0]   4
+\N{1}  abbbbc  y       $&      a
+\N{1}  abbbbc  y       $-[0]   0
+\N{1}  abbbbc  y       $+[0]   1
+\N{3,4}        abbbbc  y       $&      abbb
+\N{3,4}        abbbbc  y       $-[0]   0
+\N{3,4}        abbbbc  y       $+[0]   4
 ab{0,}bc       abbbbc  y       $&      abbbbc
 ab{0,}bc       abbbbc  y       $-[0]   0
 ab{0,}bc       abbbbc  y       $+[0]   6
@@ -69,8 +75,11 @@ abc$ aabcd   n       -       -
 $      abc     y       $&      
 a.c    abc     y       $&      abc
 a.c    axc     y       $&      axc
+a\Nc   abc     y       $&      abc
 a.*c   axyzc   y       $&      axyzc
+a\N*c  axyzc   y       $&      axyzc
 a.*c   axyzd   n       -       -
+a\N*c  axyzd   n       -       -
 a[bc]d abc     n       -       -
 a[bc]d abd     y       $&      abd
 a[b]d  abd     y       $&      abd
@@ -78,6 +87,7 @@ a[b]d abd     y       $&      abd
 .[b].  abd     y       $&      abd
 .[b].  aBd     n       -       -
 (?i:.[b].)     abd     y       $&      abd
+(?i:\N[b]\N)   abd     y       $&      abd
 a[b-d]e        abd     n       -       -
 a[b-d]e        ace     y       $&      ace
 a[b-d] aac     y       $&      ac
@@ -315,6 +325,7 @@ a[-]?c      ac      y       $&      ac
 '$'i   ABC     y       $&      
 'a.c'i ABC     y       $&      ABC
 'a.c'i AXC     y       $&      AXC
+'a\Nc'i        ABC     y       $&      ABC
 'a.*?c'i       AXYZC   y       $&      AXYZC
 'a.*c'i        AXYZD   n       -       -
 'a[bc]d'i      ABC     n       -       -
@@ -497,8 +508,11 @@ a(?:b|(c|e){1,2}?|d)+?(.)  ace     y       $1$2    ce
 '(?-i:a)b'i    AB      n       -       -
 '((?-i:a))b'i  AB      n       -       -
 '((?-i:a.))b'i a\nB    n       -       -
+'((?-i:a\N))b'i        a\nB    n       -       -
 '((?s-i:a.))b'i        a\nB    y       $1      a\n
+'((?s-i:a\N))b'i       a\nB    n       -       -
 '((?s-i:a.))b'i        B\nB    n       -       -
+'((?s-i:a\N))b'i       B\nB    n       -       -
 (?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))       cabbbb  y       $&      
cabbbb
 
(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))
    caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb       y       $&      
caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
 '(ab)\d\1'i    Ab4ab   y       $1      Ab
@@ -531,6 +545,8 @@ x(~~)*(?:(?:F)?)?   x~~     y       -       -
 ((?s).)c(?!.)  a\nb\nc\n       y       $1:$&   \n:\nc
 ((?s)b.)c(?!.) a\nb\nc\n       y       $1      b\n
 ((?s)b.)c(?!.) a\nb\nc\n       y       $1:$&   b\n:b\nc
+((?s)b.)c(?!\N)        a\nb\nc\n       y       $1:$&   b\n:b\nc
+'(b.)c(?!\N)'s a\nb\nc\n       y       $1:$&   b\n:b\nc
 ^b     a\nb\nc\n       n       -       -
 ()^b   a\nb\nc\n       n       -       -
 ((?m)^b)       a\nb\nc\n       y       $1      b
@@ -1294,6 +1310,7 @@ X(\w+)(?=\s)|X(\w+)       Xab     y       [$1-$2] [-ab]
 (?|(?|(a)|(b))|(?|(c)|(d)))    c       y       $1      c
 (?|(?|(a)|(b))|(?|(c)|(d)))    d       y       $1      d
 (.)(?|(.)(.)x|(.)d)(.) abcde   y       $1-$2-$3-$4-$5- b-c--e--
+(\N)(?|(\N)(\N)x|(\N)d)(\N)    abcde   y       $1-$2-$3-$4-$5- b-c--e--
 #Bug #41492
 (?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A)    a       y       $&      a
 (?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A)    aa      y       $&      aa
@@ -1340,7 +1357,10 @@ foo(\h)bar       foo\tbar        y       $1      \t
 (\H)(\h)       foo\tbar        y       $1-$2   o-\t
 (\h)(\H)       foo\tbar        y       $1-$2   \t-b
 
-.*\z   foo\n   y       -       -
+.*\z   foo\n   y       -$&-    --
+\N*\z  foo\n   y       -$&-    --
+.*\Z   foo\n   y       -$&-    -foo-
+\N*\Z  foo\n   y       -$&-    -foo-
 ^(?:(\d)x)?\d$ 1       y       ${\(defined($1)?1:0)}   0       
 .*?(?:(\w)|(\w))x      abx     y       $1-$2   b-
 

--
Perl5 Master Repository

[perl.git] branch blead, updated. GitLive-blead-1389-ge061ec8

Reply via email to