[perl.git] branch blead, updated. v5.19.3-330-g37e7596

Karl Williamson Wed, 04 Sep 2013 21:54:22 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/37e7596b5cfb8d1be7030a330e12df2c13405f7a?hp=c17e15dee34dae6e6d689c296022c7ab3ba40d00>


- Log -----------------------------------------------------------------
commit 37e7596b5cfb8d1be7030a330e12df2c13405f7a
Author: Karl Williamson <[email protected]>
Date:   Wed Sep 4 22:31:01 2013 -0600

    Move functions prematurely placed into mathoms back to utf8.c
    
    These functions are still called by some CPAN-upstream modules, so can't
    go into mathoms until those are fixed.  There are other changes needed
    in these modules, so I'm deferring sending patching to their maintainers
    until I know all the necessary changes.

M       mathoms.c
M       utf8.c

commit 1c1d615a5faeb27465c1ef0ce54e745652ae4341
Author: Karl Williamson <[email protected]>
Date:   Wed Sep 4 22:26:07 2013 -0600

    regcomp.c: Don't use mathoms function
    
    utf8n_to_uvuni() is on its way out.  Instead, this code knows that the
    UTF-8 it is looking at is valid (having been already checked earlier in
    the regex compilation process), so use valid_utf8_to_uvchr().  This
    allows us to get rid of a flag variable.

M       regcomp.c

commit 57b0056d37c7c86d8db0e0ced8d2618311eb4457
Author: Karl Williamson <[email protected]>
Date:   Wed Sep 4 21:53:08 2013 -0600

    perlapi: Remove newly obsolete statement
    
    Since commit 010ab96b9b802bbf77168b5af384569e053cdb63, this function is
    now longer a wrapper, so shouldn't be described as such.

M       utf8.c
-----------------------------------------------------------------------

Summary of changes:
 mathoms.c | 54 ------------------------------------------------------
 regcomp.c |  3 +--
 utf8.c    | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/mathoms.c b/mathoms.c
index e33d0c6..cfb0d5b 100644
--- a/mathoms.c
+++ b/mathoms.c
@@ -704,14 +704,6 @@ Perl_init_i18nl14n(pTHX_ int printwarn)
     return init_i18nl10n(printwarn);
 }
 
-U8 *
-Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
-{
-    PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
-
-    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0);
-}
-
 bool
 Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep)
 {
@@ -1227,52 +1219,6 @@ ASCII_TO_NEED(const UV enc, const UV ch)
     return ch;
 }
 
-/*
-=for apidoc uvuni_to_utf8_flags
-
-Instead you almost certainly want to use L</uvchr_to_utf8> or
-L</uvchr_to_utf8_flags>>.
-
-This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
-which itself, while not deprecated, should be used only in isolated
-circumstances.  These functions were useful for code that wanted to handle
-both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
-v5.20, the distinctions between the platforms have mostly been made invisible
-to most code, so this function is quite unlikely to be what you want.
-
-=cut
-*/
-
-U8 *
-Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
-{
-    PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
-
-    return uvoffuni_to_utf8_flags(d, uv, flags);
-}
-
-/*
-=for apidoc utf8n_to_uvuni
-
-Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>.
-
-This function was usefulfor code that wanted to handle both EBCDIC and
-ASCII platforms with Unicode properties, but starting in Perl v5.20, the
-distinctions between the platforms have mostly been made invisible to most
-code, so this function is quite unlikely to be what you want.
-C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead.
-
-=cut
-*/
-
-UV
-Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 
flags)
-{
-    PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
-
-    return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags));
-}
-
 END_EXTERN_C
 
 #endif /* NO_MATHOMS */
diff --git a/regcomp.c b/regcomp.c
index e22dcd8..450ac90 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1473,7 +1473,7 @@ is the recommended Unicode-aware way of saying
     if ( UTF ) {                                                              \
         /* if it is UTF then it is either already folded, or does not need    \
          * folding */                                                         \
-        uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags);   \
+        uvc = valid_utf8_to_uvchr( (const U8*) uc, &len);                     \
     }                                                                         \
     else if (folder == PL_fold_latin1) {                                      \
         /* This folder implies Unicode rules, which in the range expressible  \
@@ -1573,7 +1573,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode 
*startbranch, regnode *firs
     HV *widecharmap = NULL;
     AV *revcharmap = newAV();
     regnode *cur;
-    const U32 uniflags = UTF8_ALLOW_DEFAULT;
     STRLEN len = 0;
     UV uvc = 0;
     U16 curword = 0;
diff --git a/utf8.c b/utf8.c
index 93e73a6..51b9822 100644
--- a/utf8.c
+++ b/utf8.c
@@ -967,8 +967,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, 
STRLEN *retlen)
 
 /* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known 
that
  * there are no malformations in the input UTF-8 string C<s>.  surrogates,
- * non-character code points, and non-Unicode code points are allowed.  A macro
- * in utf8.h is used to normally avoid this function wrapper */
+ * non-character code points, and non-Unicode code points are allowed. */
 
 UV
 Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
@@ -4714,6 +4713,64 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, 
UV l1, bool u1, const c
     return 1;
 }
 
+/* XXX The next four functions should likely be moved to mathoms.c once all
+ * occurrences of them are removed from the core; some cpan-upstream modules
+ * still use them */
+
+U8 *
+Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
+{
+    PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
+
+    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0);
+}
+
+UV
+Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 
flags)
+{
+    PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+
+    return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags));
+}
+
+/*
+=for apidoc uvuni_to_utf8_flags
+
+Instead you almost certainly want to use L</uvchr_to_utf8> or
+L</uvchr_to_utf8_flags>>.
+
+This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
+which itself, while not deprecated, should be used only in isolated
+circumstances.  These functions were useful for code that wanted to handle
+both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
+v5.20, the distinctions between the platforms have mostly been made invisible
+to most code, so this function is quite unlikely to be what you want.
+
+=cut
+*/
+
+U8 *
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+{
+    PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+
+    return uvoffuni_to_utf8_flags(d, uv, flags);
+}
+
+/*
+=for apidoc utf8n_to_uvuni
+
+Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>.
+
+This function was usefulfor code that wanted to handle both EBCDIC and
+ASCII platforms with Unicode properties, but starting in Perl v5.20, the
+distinctions between the platforms have mostly been made invisible to most
+code, so this function is quite unlikely to be what you want.
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead.
+
+=cut
+*/
+
 /*
  * Local variables:
  * c-indentation-style: bsd

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.19.3-330-g37e7596

Reply via email to