In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/37e7596b5cfb8d1be7030a330e12df2c13405f7a?hp=c17e15dee34dae6e6d689c296022c7ab3ba40d00>
- Log ----------------------------------------------------------------- commit 37e7596b5cfb8d1be7030a330e12df2c13405f7a Author: Karl Williamson <[email protected]> Date: Wed Sep 4 22:31:01 2013 -0600 Move functions prematurely placed into mathoms back to utf8.c These functions are still called by some CPAN-upstream modules, so can't go into mathoms until those are fixed. There are other changes needed in these modules, so I'm deferring sending patching to their maintainers until I know all the necessary changes. M mathoms.c M utf8.c commit 1c1d615a5faeb27465c1ef0ce54e745652ae4341 Author: Karl Williamson <[email protected]> Date: Wed Sep 4 22:26:07 2013 -0600 regcomp.c: Don't use mathoms function utf8n_to_uvuni() is on its way out. Instead, this code knows that the UTF-8 it is looking at is valid (having been already checked earlier in the regex compilation process), so use valid_utf8_to_uvchr(). This allows us to get rid of a flag variable. M regcomp.c commit 57b0056d37c7c86d8db0e0ced8d2618311eb4457 Author: Karl Williamson <[email protected]> Date: Wed Sep 4 21:53:08 2013 -0600 perlapi: Remove newly obsolete statement Since commit 010ab96b9b802bbf77168b5af384569e053cdb63, this function is now longer a wrapper, so shouldn't be described as such. M utf8.c ----------------------------------------------------------------------- Summary of changes: mathoms.c | 54 ------------------------------------------------------ regcomp.c | 3 +-- utf8.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 58 deletions(-) diff --git a/mathoms.c b/mathoms.c index e33d0c6..cfb0d5b 100644 --- a/mathoms.c +++ b/mathoms.c @@ -704,14 +704,6 @@ Perl_init_i18nl14n(pTHX_ int printwarn) return init_i18nl10n(printwarn); } -U8 * -Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) -{ - PERL_ARGS_ASSERT_UVUNI_TO_UTF8; - - return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0); -} - bool Perl_is_utf8_string_loc(pTHX_ const U8 *s, STRLEN len, const U8 **ep) { @@ -1227,52 +1219,6 @@ ASCII_TO_NEED(const UV enc, const UV ch) return ch; } -/* -=for apidoc uvuni_to_utf8_flags - -Instead you almost certainly want to use L</uvchr_to_utf8> or -L</uvchr_to_utf8_flags>>. - -This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>, -which itself, while not deprecated, should be used only in isolated -circumstances. These functions were useful for code that wanted to handle -both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl -v5.20, the distinctions between the platforms have mostly been made invisible -to most code, so this function is quite unlikely to be what you want. - -=cut -*/ - -U8 * -Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) -{ - PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS; - - return uvoffuni_to_utf8_flags(d, uv, flags); -} - -/* -=for apidoc utf8n_to_uvuni - -Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>. - -This function was usefulfor code that wanted to handle both EBCDIC and -ASCII platforms with Unicode properties, but starting in Perl v5.20, the -distinctions between the platforms have mostly been made invisible to most -code, so this function is quite unlikely to be what you want. -C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead. - -=cut -*/ - -UV -Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) -{ - PERL_ARGS_ASSERT_UTF8N_TO_UVUNI; - - return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags)); -} - END_EXTERN_C #endif /* NO_MATHOMS */ diff --git a/regcomp.c b/regcomp.c index e22dcd8..450ac90 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1473,7 +1473,7 @@ is the recommended Unicode-aware way of saying if ( UTF ) { \ /* if it is UTF then it is either already folded, or does not need \ * folding */ \ - uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags); \ + uvc = valid_utf8_to_uvchr( (const U8*) uc, &len); \ } \ else if (folder == PL_fold_latin1) { \ /* This folder implies Unicode rules, which in the range expressible \ @@ -1573,7 +1573,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs HV *widecharmap = NULL; AV *revcharmap = newAV(); regnode *cur; - const U32 uniflags = UTF8_ALLOW_DEFAULT; STRLEN len = 0; UV uvc = 0; U16 curword = 0; diff --git a/utf8.c b/utf8.c index 93e73a6..51b9822 100644 --- a/utf8.c +++ b/utf8.c @@ -967,8 +967,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen) /* Like L</utf8_to_uvchr_buf>(), but should only be called when it is known that * there are no malformations in the input UTF-8 string C<s>. surrogates, - * non-character code points, and non-Unicode code points are allowed. A macro - * in utf8.h is used to normally avoid this function wrapper */ + * non-character code points, and non-Unicode code points are allowed. */ UV Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) @@ -4714,6 +4713,64 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c return 1; } +/* XXX The next four functions should likely be moved to mathoms.c once all + * occurrences of them are removed from the core; some cpan-upstream modules + * still use them */ + +U8 * +Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) +{ + PERL_ARGS_ASSERT_UVUNI_TO_UTF8; + + return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0); +} + +UV +Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) +{ + PERL_ARGS_ASSERT_UTF8N_TO_UVUNI; + + return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags)); +} + +/* +=for apidoc uvuni_to_utf8_flags + +Instead you almost certainly want to use L</uvchr_to_utf8> or +L</uvchr_to_utf8_flags>>. + +This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>, +which itself, while not deprecated, should be used only in isolated +circumstances. These functions were useful for code that wanted to handle +both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl +v5.20, the distinctions between the platforms have mostly been made invisible +to most code, so this function is quite unlikely to be what you want. + +=cut +*/ + +U8 * +Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) +{ + PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS; + + return uvoffuni_to_utf8_flags(d, uv, flags); +} + +/* +=for apidoc utf8n_to_uvuni + +Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>. + +This function was usefulfor code that wanted to handle both EBCDIC and +ASCII platforms with Unicode properties, but starting in Perl v5.20, the +distinctions between the platforms have mostly been made invisible to most +code, so this function is quite unlikely to be what you want. +C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead. + +=cut +*/ + /* * Local variables: * c-indentation-style: bsd -- Perl5 Master Repository
