In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/3bc0c78cf118c075b168ed58223722e9bc7ffd5a?hp=3c5aa26268facf2f11222e9c32dbafc7f4963105>
- Log ----------------------------------------------------------------- commit 3bc0c78cf118c075b168ed58223722e9bc7ffd5a Author: Karl Williamson <[email protected]> Date: Tue Jun 6 01:54:46 2017 -0600 utf8.c: Clarify pod for three functions utf8_to_bytes(), bytes_from_utf8(), bytes_to_utf8() M utf8.c commit 09af03361c90e3f2094b595f3a92cf89ed56a3b9 Author: Karl Williamson <[email protected]> Date: Tue Jun 6 01:45:32 2017 -0600 utf8.c: Change formal parameter name The parameter "len" really is a pointer in utf8_to_bytes(), bytes_from_utf8(), and bytes_to_utf8(). Call it lenp. The documentation was sloppy about it; clean that up. M embed.fnc M proto.h M utf8.c ----------------------------------------------------------------------- Summary of changes: embed.fnc | 6 +++--- proto.h | 12 ++++++------ utf8.c | 58 +++++++++++++++++++++++++++++++++++----------------------- 3 files changed, 44 insertions(+), 32 deletions(-) diff --git a/embed.fnc b/embed.fnc index 42450451c7..f5ca638515 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1789,11 +1789,11 @@ AipdRn |U8* |utf8_hop |NN const U8 *s|SSize_t off AipdRn |U8* |utf8_hop_back|NN const U8 *s|SSize_t off|NN const U8 *start AipdRn |U8* |utf8_hop_forward|NN const U8 *s|SSize_t off|NN const U8 *end AipdRn |U8* |utf8_hop_safe |NN const U8 *s|SSize_t off|NN const U8 *start|NN const U8 *end -ApMd |U8* |utf8_to_bytes |NN U8 *s|NN STRLEN *len +ApMd |U8* |utf8_to_bytes |NN U8 *s|NN STRLEN *lenp Apd |int |bytes_cmp_utf8 |NN const U8 *b|STRLEN blen|NN const U8 *u \ |STRLEN ulen -ApMd |U8* |bytes_from_utf8|NN const U8 *s|NN STRLEN *len|NULLOK bool *is_utf8 -ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *len +ApMd |U8* |bytes_from_utf8|NN const U8 *s|NN STRLEN *lenp|NULLOK bool *is_utf8 +ApMd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *lenp ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen ApdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen ApMD |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen diff --git a/proto.h b/proto.h index 3b6922d503..06df4e611b 100644 --- a/proto.h +++ b/proto.h @@ -256,12 +256,12 @@ PERL_CALLCONV void Perl_boot_core_mro(pTHX); PERL_CALLCONV int Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen); #define PERL_ARGS_ASSERT_BYTES_CMP_UTF8 \ assert(b); assert(u) -PERL_CALLCONV U8* Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8); +PERL_CALLCONV U8* Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8); #define PERL_ARGS_ASSERT_BYTES_FROM_UTF8 \ - assert(s); assert(len) -PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *len); + assert(s); assert(lenp) +PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp); #define PERL_ARGS_ASSERT_BYTES_TO_UTF8 \ - assert(s); assert(len) + assert(s); assert(lenp) PERL_CALLCONV I32 Perl_call_argv(pTHX_ const char* sub_name, I32 flags, char** argv); #define PERL_ARGS_ASSERT_CALL_ARGV \ assert(sub_name); assert(argv) @@ -3606,9 +3606,9 @@ PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ const U8* s, const U8 *e) #define PERL_ARGS_ASSERT_UTF8_LENGTH \ assert(s); assert(e) -PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len); +PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp); #define PERL_ARGS_ASSERT_UTF8_TO_BYTES \ - assert(s); assert(len) + assert(s); assert(lenp) PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) __attribute__deprecated__; #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \ diff --git a/utf8.c b/utf8.c index 7e8a5db272..c84fa98310 100644 --- a/utf8.c +++ b/utf8.c @@ -1907,10 +1907,14 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen) /* =for apidoc utf8_to_bytes -Converts a string C<s> of length C<len> from UTF-8 into native byte encoding. +Converts a string C<"s"> of length C<*lenp> from UTF-8 into native byte encoding. Unlike L</bytes_to_utf8>, this over-writes the original string, and -updates C<len> to contain the new length. -Returns zero on failure, setting C<len> to -1. +updates C<*lenp> to contain the new length. +Returns zero on failure (leaving C<"s"> unchanged) setting C<*lenp> to -1. + +Upon successful return, the number of variants in the string can be computed by +saving the value of C<*lenp> before the call, and subtracting the after-call +value of C<*lenp> from it. If you need a copy of the string, see L</bytes_from_utf8>. @@ -1918,7 +1922,7 @@ If you need a copy of the string, see L</bytes_from_utf8>. */ U8 * -Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len) +Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp) { U8 * first_variant; @@ -1926,13 +1930,13 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len) PERL_UNUSED_CONTEXT; /* This is a no-op if no variants at all in the input */ - if (is_utf8_invariant_string_loc(s, *len, (const U8 **) &first_variant)) { + if (is_utf8_invariant_string_loc(s, *lenp, (const U8 **) &first_variant)) { return s; } { U8 * const save = s; - U8 * const send = s + *len; + U8 * const send = s + *lenp; U8 * d; /* Nothing before the first variant needs to be changed, so start the real @@ -1941,7 +1945,7 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len) while (s < send) { if (! UTF8_IS_INVARIANT(*s)) { if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) { - *len = ((STRLEN) -1); + *lenp = ((STRLEN) -1); return 0; } s++; @@ -1961,7 +1965,7 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len) *d++ = c; } *d = '\0'; - *len = d - save; + *lenp = d - save; return save; } @@ -1970,7 +1974,7 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len) /* =for apidoc bytes_from_utf8 -Converts a potentially UTF-8 encoded string C<s> of length C<len> into native +Converts a potentially UTF-8 encoded string C<s> of length C<*lenp> into native byte encoding. On input, the boolean C<*is_utf8> gives whether or not C<s> is actually encoded in UTF-8. @@ -1979,17 +1983,21 @@ the input string. Do nothing if C<*is_utf8> is 0, or if there are code points in the string not expressible in native byte encoding. In these cases, C<*is_utf8> and -C<*len> are unchanged, and the return value is the original C<s>. +C<*lenp> are unchanged, and the return value is the original C<s>. Otherwise, C<*is_utf8> is set to 0, and the return value is a pointer to a newly created string containing a downgraded copy of C<s>, and whose length is -returned in C<*len>, updated. +returned in C<*lenp>, updated. + +Upon successful return, the number of variants in the string can be computed by +saving the value of C<*lenp> before the call, and subtracting the after-call +value of C<*lenp> from it. =cut */ U8 * -Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8) +Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8) { U8 *d; const U8 *start = s; @@ -2002,7 +2010,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8) return (U8 *)start; /* ensure valid UTF-8 and chars < 256 before converting string */ - for (send = s + *len; s < send;) { + for (send = s + *lenp; s < send;) { if (! UTF8_IS_INVARIANT(*s)) { if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) { return (U8 *)start; @@ -2015,7 +2023,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8) *is_utf8 = FALSE; - Newx(d, (*len) - count + 1, U8); + Newx(d, (*lenp) - count + 1, U8); if (LIKELY(count)) { s = start; start = d; @@ -2029,13 +2037,13 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8) *d++ = c; } *d = '\0'; - *len = d - start; + *lenp = d - start; return (U8 *)start; } else { - Copy(start, d, *len, U8); - *(d + *len) = '\0'; + Copy(start, d, *lenp, U8); + *(d + *lenp) = '\0'; return (U8 *)d; } } @@ -2043,11 +2051,15 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8) /* =for apidoc bytes_to_utf8 -Converts a string C<s> of length C<len> bytes from the native encoding into +Converts a string C<s> of length C<*lenp> bytes from the native encoding into UTF-8. -Returns a pointer to the newly-created string, and sets C<len> to +Returns a pointer to the newly-created string, and sets C<*lenp> to reflect the new length in bytes. +Upon successful return, the number of variants in the string can be computed by +saving the value of C<*lenp> before the call, and subtracting it from the +after-call value of C<*lenp>. + A C<NUL> character will be written after the end of the string. If you want to convert to UTF-8 from encodings other than @@ -2058,16 +2070,16 @@ see L</sv_recode_to_utf8>(). */ U8* -Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *len) +Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp) { - const U8 * const send = s + (*len); + const U8 * const send = s + (*lenp); U8 *d; U8 *dst; PERL_ARGS_ASSERT_BYTES_TO_UTF8; PERL_UNUSED_CONTEXT; - Newx(d, (*len) * 2 + 1, U8); + Newx(d, (*lenp) * 2 + 1, U8); dst = d; while (s < send) { @@ -2075,7 +2087,7 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *len) s++; } *d = '\0'; - *len = d-dst; + *lenp = d-dst; return dst; } -- Perl5 Master Repository
