In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/945b524a8cdacbf82557d751252e6546f48d21ae?hp=4ba3adde25c4edf2e470f13677632b6d2f9e2fcd>
- Log ----------------------------------------------------------------- commit 945b524a8cdacbf82557d751252e6546f48d21ae Author: Jarkko Hietaniemi <[email protected]> Date: Wed Jul 23 22:32:45 2014 -0400 Share common constants as file statics. M numeric.c commit 75feedba47600d94d18d49dbcbdf46393b6c6cc5 Author: Jarkko Hietaniemi <[email protected]> Date: Wed Jul 23 10:38:50 2014 -0400 Use UV instead of Size_t. A strong reason for using UV are the promised custom codepoints, they go beyond 32-bit. The overflow logic didn't work if Size_t was 32-bit but the UV was 64-bit. Steal the battle-proven logic from grok_number_flags(). The numeric.xs or grok.t were not right in 32-bit, either. Add comments. M embed.fnc M ext/XS-APItest/numeric.xs M ext/XS-APItest/t/grok.t M numeric.c M proto.h ----------------------------------------------------------------------- Summary of changes: embed.fnc | 2 +- ext/XS-APItest/numeric.xs | 6 ++++- ext/XS-APItest/t/grok.t | 44 +++++++++++++++++++++++++----- numeric.c | 68 +++++++++++++++++++++-------------------------- proto.h | 2 +- 5 files changed, 75 insertions(+), 47 deletions(-) diff --git a/embed.fnc b/embed.fnc index d02e555..90c56ed 100644 --- a/embed.fnc +++ b/embed.fnc @@ -807,7 +807,7 @@ Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep Apd |int |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV *valuep|U32 flags ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send Apd |UV |grok_oct |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result -Apdn |Size_t |grok_atou |NN const char* pv|NULLOK const char** endptr +Apdn |UV |grok_atou |NN const char* pv|NULLOK const char** endptr : These are all indirectly referenced by globals.c. This is somewhat annoying. p |int |magic_clearenv |NN SV* sv|NN MAGIC* mg p |int |magic_clear_all_env|NN SV* sv|NN MAGIC* mg diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs index 56c11f7..6d1ef82 100644 --- a/ext/XS-APItest/numeric.xs +++ b/ext/XS-APItest/numeric.xs @@ -51,5 +51,9 @@ grok_atou(number, endsv) if (endsv == &PL_sv_undef) { PUSHs(sv_2mortal(newSVpvn(NULL, 0))); } else { - PUSHs(sv_2mortal(newSViv(endptr - pv))); + if (endptr) { + PUSHs(sv_2mortal(newSViv(endptr - pv))); + } else { + PUSHs(sv_2mortal(newSViv(0))); + } } diff --git a/ext/XS-APItest/t/grok.t b/ext/XS-APItest/t/grok.t index 501bea6..b41cb09 100644 --- a/ext/XS-APItest/t/grok.t +++ b/ext/XS-APItest/t/grok.t @@ -159,38 +159,65 @@ my @atous = [ "012", "012", $ATOU_MAX, 0 ], ); -if ($Config{sizesize} == 8) { +# Values near overflow point. +if ($Config{uvsize} == 8) { push @atous, ( + # 32-bit values no problem for 64-bit. + [ "4294967293", "", 4294967293, 10, ], [ "4294967294", "", 4294967294, 10, ], [ "4294967295", "", 4294967295, 10, ], [ "4294967296", "", 4294967296, 10, ], + [ "4294967297", "", 4294967297, 10, ], + # This is well within 64-bit. [ "9999999999", "", 9999999999, 10, ], + # Values valid up to 64-bit and beyond. + [ "18446744073709551613", "", 18446744073709551613, 20, ], [ "18446744073709551614", "", 18446744073709551614, 20, ], [ "18446744073709551615", "", $ATOU_MAX, 20, ], - [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ], + [ "18446744073709551616", "", $ATOU_MAX, 0, ], + [ "18446744073709551617", "", $ATOU_MAX, 0, ], ); -} elsif ($Config{sizesize} == 4) { +} elsif ($Config{uvsize} == 4) { push @atous, ( + # Values valid up to 32-bit and beyond. + [ "4294967293", "", 4294967293, 10, ], [ "4294967294", "", 4294967294, 10, ], [ "4294967295", "", $ATOU_MAX, 10, ], [ "4294967296", "", $ATOU_MAX, 0, ], + [ "4294967297", "", $ATOU_MAX, 0, ], + # Still beyond 32-bit. + [ "4999999999", "", $ATOU_MAX, 0, ], + [ "5678901234", "", $ATOU_MAX, 0, ], + [ "6789012345", "", $ATOU_MAX, 0, ], + [ "7890123456", "", $ATOU_MAX, 0, ], + [ "8901234567", "", $ATOU_MAX, 0, ], + [ "9012345678", "", $ATOU_MAX, 0, ], [ "9999999999", "", $ATOU_MAX, 0, ], + [ "10000000000", "", $ATOU_MAX, 0, ], + [ "12345678901", "", $ATOU_MAX, 0, ], + # 64-bit values are way beyond. + [ "18446744073709551613", "", $ATOU_MAX, 0, ], [ "18446744073709551614", "", $ATOU_MAX, 0, ], [ "18446744073709551615", "", $ATOU_MAX, 0, ], - [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ], + [ "18446744073709551616", "", $ATOU_MAX, 0, ], + [ "18446744073709551617", "", $ATOU_MAX, 0, ], ); } -# This will fail to fail once 128/256-bit systems arrive. +# These will fail to fail once 128/256-bit systems arrive. push @atous, ( - [ "99999999999999999999", "99999999999999999999", $ATOU_MAX, 0 ], + [ "23456789012345678901", "", $ATOU_MAX, 0 ], + [ "34567890123456789012", "", $ATOU_MAX, 0 ], + [ "98765432109876543210", "", $ATOU_MAX, 0 ], + [ "98765432109876543211", "", $ATOU_MAX, 0 ], + [ "99999999999999999999", "", $ATOU_MAX, 0 ], ); for my $grok (@atous) { @@ -207,7 +234,10 @@ for my $grok (@atous) { unless (length $grok->[1]) { is($out_len, $grok->[3], "'$input' $endsv - length sanity 2"); } # else { ... } ? - is($endsv, substr($input, $out_len), "'$input' $endsv - length success"); + if ($out_len) { + is($endsv, substr($input, $out_len), + "'$input' $endsv - length sanity 3"); + } # Then without endsv (undef == NULL). ($out_uv, $out_len) = grok_atou($input, undef); diff --git a/numeric.c b/numeric.c index 66e4e75..a203bf5 100644 --- a/numeric.c +++ b/numeric.c @@ -586,13 +586,14 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) return grok_number_flags(pv, len, valuep, 0); } +static const UV uv_max_div_10 = UV_MAX / 10; +static const U8 uv_max_mod_10 = UV_MAX % 10; + int Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) { const char *s = pv; const char * const send = pv + len; - const UV max_div_10 = UV_MAX / 10; - const char max_mod_10 = UV_MAX % 10; int numtype = 0; int sawinf = 0; int sawnan = 0; @@ -660,9 +661,9 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) each time for overflow. */ digit = *s - '0'; while (digit >= 0 && digit <= 9 - && (value < max_div_10 - || (value == max_div_10 - && digit <= max_mod_10))) { + && (value < uv_max_div_10 + || (value == uv_max_div_10 + && digit <= uv_max_mod_10))) { value = value * 10 + digit; if (++s < send) digit = *s - '0'; @@ -804,7 +805,7 @@ leading whitespace, or negative inputs. If such features are required, the calling code needs to explicitly implement those. If a valid value cannot be parsed, returns either zero (if non-digits -are met before any digits) or Size_t_MAX (if the value overflows). +are met before any digits) or UV_MAX (if the value overflows). Note that extraneous leading zeros also count as an overflow (meaning that only "0" is the zero). @@ -825,51 +826,44 @@ seen as a bug (global state controlled by user environment). =cut */ -Size_t +UV Perl_grok_atou(const char *pv, const char** endptr) { const char* s = pv; const char** eptr; const char* end2; /* Used in case endptr is NULL. */ - /* With Size_t_size of 8 or 4 this works out to be the start plus - * either 20 or 10. When 128 or 256-bit systems became reality, - * this overshoots (should get 39, 78, but gets 40, 80). */ - const char* maxend = s + 10 * (Size_t_size / 4); - Size_t val = 0; /* The return value. */ + UV val = 0; /* The return value. */ PERL_ARGS_ASSERT_GROK_ATOU; eptr = endptr ? endptr : &end2; - if (isDIGIT(*s) && !isDIGIT(*(s + 1))) { - /* Single-digit inputs are quite common cases, and in addition - * the case of zero ("0") here simplifies the decoding loop: - * not having to think whether "000" or "000123" are valid - * (now they are invalid). */ + if (isDIGIT(*s)) { + /* Single-digit inputs are quite common. */ val = *s++ - '0'; - } else { - Size_t tmp = 0; /* Temporary accumulator. */ - - while (s < maxend && *s) { - /* This could be unrolled like in grok_number(), but - * the expected uses of this are not speed-needy, and - * unlikely to need full 64-bitness. */ - if (isDIGIT(*s)) { - int digit = *s++ - '0'; - tmp = tmp * 10 + digit; - if (tmp > val) { /* This implictly rejects leading zeros. */ - val = tmp; - } else { /* Overflow. */ + if (isDIGIT(*s)) { + /* Extra leading zeros cause overflow. */ + if (val == 0) { + *eptr = NULL; + return UV_MAX; + } + while (isDIGIT(*s)) { + /* This could be unrolled like in grok_number(), but + * the expected uses of this are not speed-needy, and + * unlikely to need full 64-bitness. */ + U8 digit = *s++ - '0'; + if (val < uv_max_div_10 || + (val == uv_max_div_10 && digit <= uv_max_mod_10)) { + val = val * 10 + digit; + } else { *eptr = NULL; - return Size_t_MAX; + return UV_MAX; } - } else { - break; } } - if (s == pv) { - *eptr = NULL; /* If no progress, failed to parse anything. */ - return 0; - } + } + if (s == pv) { + *eptr = NULL; /* If no progress, failed to parse anything. */ + return 0; } if (endptr == NULL && *s) { return 0; /* If endptr is NULL, no trailing non-digits allowed. */ diff --git a/proto.h b/proto.h index 1eccc46..6abd867 100644 --- a/proto.h +++ b/proto.h @@ -1289,7 +1289,7 @@ PERL_CALLCONV int Perl_getcwd_sv(pTHX_ SV* sv) PERL_CALLCONV void Perl_gp_free(pTHX_ GV* gv); PERL_CALLCONV GP* Perl_gp_ref(pTHX_ GP* gp); -PERL_CALLCONV Size_t Perl_grok_atou(const char* pv, const char** endptr) +PERL_CALLCONV UV Perl_grok_atou(const char* pv, const char** endptr) __attribute__nonnull__(1); #define PERL_ARGS_ASSERT_GROK_ATOU \ assert(pv) -- Perl5 Master Repository
