In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/f43791029d4ac4a5dbfd6ad9b67cb5407ac32e2a?hp=6e75769d300856d7c215fc22d503cce13b734a0f>
- Log ----------------------------------------------------------------- commit f43791029d4ac4a5dbfd6ad9b67cb5407ac32e2a Author: Jarkko Hietaniemi <[email protected]> Date: Tue Jul 22 07:46:26 2014 -0400 Introduce maxend for paranoia. Also, comments. M numeric.c commit fdadaf77ca45094e35ce724d7c91001f84b083c7 Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 21:29:22 2014 -0400 Atol can be strtol in disguise, so grok_atou. M mg.c commit 68419f9c61ef7c22b1225655e7e3b38058c70a71 Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 15:16:33 2014 -0400 Add strtoul and strtol to avoidables. M t/porting/libperl.t commit 338aa8b061f430c2b3d9deaeed0aec523639aff7 Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 15:15:42 2014 -0400 Document grok_atou as strtoul replacement. M numeric.c M pod/perlclib.pod M pod/perlhacktips.pod commit 999448781bc711c8732271b98a45a724f7357c46 Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 14:59:58 2014 -0400 Use grok_atou instead of strtoul (no explicit strtol uses). M gv.c M utf8.c commit c98823ffa61e4daf92a7a17ab937753b2c280c13 Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 10:56:49 2014 -0400 Advertise grok_atou. M pod/perlclib.pod M pod/perlhacktips.pod commit a7941017b561ee4cf4e5f4ac4ebb6c9e684303ed Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 10:53:10 2014 -0400 Add atoi to avoidables. M t/porting/libperl.t commit dd52de8081680731af4e00f224c756ed5c3a510f Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 10:07:05 2014 -0400 Add tests for grok_atou. M ext/XS-APItest/numeric.xs M ext/XS-APItest/t/grok.t commit 96e440d2eb546f4493feffce002f2ec8886f13a3 Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 10:50:54 2014 -0400 Use grok_atou instead of atoi. Remaining atoi() uses include at least: ext/DynaLoader/dl_aix.xs, os2/os2.c, vms/vms.c M doio.c M ext/DynaLoader/dlutils.c M locale.c M malloc.c M perl.c M pp_sys.c M regcomp.c M toke.c M util.c commit 6313e54401f5531a23184b7afaaf6bc7cd4a81ec Author: Jarkko Hietaniemi <[email protected]> Date: Mon Jul 21 10:41:20 2014 -0400 Implement grok_atou as safe/strict atoi replacement. For earlier discussion, see: http://www.nntp.perl.org/group/perl.perl5.porters/2013/10/msg208680.html https://rt.perl.org/Public/Bug/Display.html?id=116118#txn-1250187 grok_atou is completely new code, instead of trying to bolt new parameters/flags to grok_number. This makes it easier to be extremely strict, and not worry about breaking grok_number. M embed.fnc M embed.h M numeric.c M perl.h M proto.h ----------------------------------------------------------------------- Summary of changes: doio.c | 2 +- embed.fnc | 1 + embed.h | 1 + ext/DynaLoader/dlutils.c | 2 +- ext/XS-APItest/numeric.xs | 23 ++++++++++ ext/XS-APItest/t/grok.t | 114 ++++++++++++++++++++++++++++++++++++++++++++++ gv.c | 2 +- locale.c | 3 +- malloc.c | 4 +- mg.c | 12 +++-- numeric.c | 89 ++++++++++++++++++++++++++++++++++++ perl.c | 24 ++++++---- perl.h | 3 +- pod/perlclib.pod | 14 ++++-- pod/perlhacktips.pod | 16 +++++++ pp_sys.c | 2 +- proto.h | 5 ++ regcomp.c | 51 ++++++++++++--------- t/porting/libperl.t | 10 +++- toke.c | 2 +- utf8.c | 17 ++++--- util.c | 24 ++++++---- 22 files changed, 357 insertions(+), 64 deletions(-) diff --git a/doio.c b/doio.c index 46d0796..a631eeb 100644 --- a/doio.c +++ b/doio.c @@ -391,7 +391,7 @@ Perl_do_open6(pTHX_ GV *gv, const char *oname, STRLEN len, num_svs = 0; } else if (isDIGIT(*type)) { - wanted_fd = atoi(type); + wanted_fd = grok_atou(type, NULL); } else { const IO* thatio; diff --git a/embed.fnc b/embed.fnc index 241a769..d02e555 100644 --- a/embed.fnc +++ b/embed.fnc @@ -807,6 +807,7 @@ Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep Apd |int |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV *valuep|U32 flags ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send Apd |UV |grok_oct |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result +Apdn |Size_t |grok_atou |NN const char* pv|NULLOK const char** endptr : These are all indirectly referenced by globals.c. This is somewhat annoying. p |int |magic_clearenv |NN SV* sv|NN MAGIC* mg p |int |magic_clear_all_env|NN SV* sv|NN MAGIC* mg diff --git a/embed.h b/embed.h index efa1735..7ca719d 100644 --- a/embed.h +++ b/embed.h @@ -170,6 +170,7 @@ #define getcwd_sv(a) Perl_getcwd_sv(aTHX_ a) #define gp_free(a) Perl_gp_free(aTHX_ a) #define gp_ref(a) Perl_gp_ref(aTHX_ a) +#define grok_atou Perl_grok_atou #define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d) #define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d) #define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c) diff --git a/ext/DynaLoader/dlutils.c b/ext/DynaLoader/dlutils.c index 29d9b91..dea981a 100644 --- a/ext/DynaLoader/dlutils.c +++ b/ext/DynaLoader/dlutils.c @@ -104,7 +104,7 @@ dl_generic_private_init(pTHX) /* called by dl_*.xs dl_private_init() */ } #endif if ( (perl_dl_nonlazy = getenv("PERL_DL_NONLAZY")) != NULL ) - dl_nonlazy = atoi(perl_dl_nonlazy); + dl_nonlazy = grok_atou(perl_dl_nonlazy, NULL); if (dl_nonlazy) DLDEBUG(1,PerlIO_printf(Perl_debug_log, "DynaLoader bind mode is 'non-lazy'\n")); #ifdef DL_LOADONCEONLY diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs index ab48dba..56c11f7 100644 --- a/ext/XS-APItest/numeric.xs +++ b/ext/XS-APItest/numeric.xs @@ -30,3 +30,26 @@ grok_number_flags(number, flags) PUSHs(sv_2mortal(newSViv(result))); if (result & IS_NUMBER_IN_UV) PUSHs(sv_2mortal(newSVuv(value))); + +void +grok_atou(number, endsv) + SV *number + SV *endsv + PREINIT: + STRLEN len; + const char *pv = SvPV(number, len); + UV result; + const char* endptr; + PPCODE: + EXTEND(SP,2); + if (endsv == &PL_sv_undef) { + result = grok_atou(pv, NULL); + } else { + result = grok_atou(pv, &endptr); + } + PUSHs(sv_2mortal(newSVuv(result))); + if (endsv == &PL_sv_undef) { + PUSHs(sv_2mortal(newSVpvn(NULL, 0))); + } else { + PUSHs(sv_2mortal(newSViv(endptr - pv))); + } diff --git a/ext/XS-APItest/t/grok.t b/ext/XS-APItest/t/grok.t index 2e035ee..501bea6 100644 --- a/ext/XS-APItest/t/grok.t +++ b/ext/XS-APItest/t/grok.t @@ -109,4 +109,118 @@ for my $grok (@groks) { is($out_flags, $grok->[3], "'$grok->[0]' flags $grok->[1] - check flags"); } +my $ATOU_MAX = ~0; + +# atou tests +my @atous = + ( + # [ input, endsv, out uv, out len ] + + # Basic cases. + [ "0", "", 0, 1 ], + [ "1", "", 1, 1 ], + [ "2", "", 2, 1 ], + [ "9", "", 9, 1 ], + [ "12", "", 12, 2 ], + [ "123", "", 123, 3 ], + + # Trailing whitespace is accepted or rejected, depending on endptr. + [ "0 ", " ", 0, 1 ], + [ "1 ", " ", 1, 1 ], + [ "2 ", " ", 2, 1 ], + [ "12 ", " ", 12, 2 ], + + # Trailing garbage is accepted or rejected, depending on endptr. + [ "0x", "x", 0, 1 ], + [ "1x", "x", 1, 1 ], + [ "2x", "x", 2, 1 ], + [ "12x", "x", 12, 2 ], + + # Leading whitespace is failure. + [ " 0", " 0", 0, 0 ], + [ " 1", " 1", 0, 0 ], + [ " 12", " 12", 0, 0 ], + + # Leading garbage is outright failure. + [ "x0", "x0", 0, 0 ], + [ "x1", "x1", 0, 0 ], + [ "x12", "x12", 0, 0 ], + + # We do not parse decimal point. + [ "12.3", ".3", 12, 2 ], + + # Leading pluses or minuses are no good. + [ "+12", "+12", 0, 0 ], + [ "-12", "-12", 0, 0 ], + + # Extra leading zeros cause overflow. + [ "00", "00", $ATOU_MAX, 0 ], + [ "01", "01", $ATOU_MAX, 0 ], + [ "012", "012", $ATOU_MAX, 0 ], + ); + +if ($Config{sizesize} == 8) { + push @atous, + ( + [ "4294967294", "", 4294967294, 10, ], + [ "4294967295", "", 4294967295, 10, ], + [ "4294967296", "", 4294967296, 10, ], + + [ "9999999999", "", 9999999999, 10, ], + + [ "18446744073709551614", "", 18446744073709551614, 20, ], + [ "18446744073709551615", "", $ATOU_MAX, 20, ], + [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ], + ); +} elsif ($Config{sizesize} == 4) { + push @atous, + ( + [ "4294967294", "", 4294967294, 10, ], + [ "4294967295", "", $ATOU_MAX, 10, ], + [ "4294967296", "", $ATOU_MAX, 0, ], + + [ "9999999999", "", $ATOU_MAX, 0, ], + + [ "18446744073709551614", "", $ATOU_MAX, 0, ], + [ "18446744073709551615", "", $ATOU_MAX, 0, ], + [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ], + ); +} + +# This will fail to fail once 128/256-bit systems arrive. +push @atous, + ( + [ "99999999999999999999", "99999999999999999999", $ATOU_MAX, 0 ], + ); + +for my $grok (@atous) { + my $input = $grok->[0]; + my $endsv = $grok->[1]; + + my ($out_uv, $out_len); + + # First with endsv. + ($out_uv, $out_len) = grok_atou($input, $endsv); + is($out_uv, $grok->[2], + "'$input' $endsv - number success (got $out_uv cf $grok->[2])"); + ok($grok->[3] <= length $input, "'$input' $endsv - length sanity 1"); + unless (length $grok->[1]) { + is($out_len, $grok->[3], "'$input' $endsv - length sanity 2"); + } # else { ... } ? + is($endsv, substr($input, $out_len), "'$input' $endsv - length success"); + + # Then without endsv (undef == NULL). + ($out_uv, $out_len) = grok_atou($input, undef); + if (length $grok->[1]) { + if ($grok->[2] == $ATOU_MAX) { + is($out_uv, $ATOU_MAX, "'$input' undef - number overflow"); + } else { + is($out_uv, 0, "'$input' undef - number zero"); + } + } else { + is($out_uv, $grok->[2], + "'$input' undef - number success (got $out_uv cf $grok->[2])"); + } +} + done_testing(); diff --git a/gv.c b/gv.c index 64bdbf1..8b43d91 100644 --- a/gv.c +++ b/gv.c @@ -1843,7 +1843,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, if (!isDIGIT(*end)) return addmg; } - paren = strtoul(name, NULL, 10); + paren = grok_atou(name, NULL); goto storeparen; } } diff --git a/locale.c b/locale.c index 85c438c..84ff0de 100644 --- a/locale.c +++ b/locale.c @@ -527,7 +527,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn) char *p; const bool locwarn = (printwarn > 1 || (printwarn && - (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); + (!(p = PerlEnv_getenv("PERL_BADLANG")) || + grok_atou(p, NULL)))); bool done = FALSE; #ifdef WIN32 /* In some systems you can find out the system default locale diff --git a/malloc.c b/malloc.c index a99663e..73a0480 100644 --- a/malloc.c +++ b/malloc.c @@ -1824,7 +1824,7 @@ Perl_mfree(Malloc_t where) if (bad_free_warn == -1) { dTHX; char *pbf = PerlEnv_getenv("PERL_BADFREE"); - bad_free_warn = (pbf) ? atoi(pbf) : 1; + bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1; } if (!bad_free_warn) return; @@ -1922,7 +1922,7 @@ Perl_realloc(void *mp, size_t nbytes) if (bad_free_warn == -1) { dTHX; char *pbf = PerlEnv_getenv("PERL_BADFREE"); - bad_free_warn = (pbf) ? atoi(pbf) : 1; + bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1; } if (!bad_free_warn) return NULL; diff --git a/mg.c b/mg.c index 28ed156..e1fc578 100644 --- a/mg.c +++ b/mg.c @@ -2891,6 +2891,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) { const char *p = SvPV_const(sv, len); Groups_t *gary = NULL; + const char* endptr; #ifdef _SC_NGROUPS_MAX int maxgrp = sysconf(_SC_NGROUPS_MAX); @@ -2902,19 +2903,20 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) while (isSPACE(*p)) ++p; - new_egid = (Gid_t)Atol(p); + new_egid = (Gid_t)grok_atou(p, &endptr); for (i = 0; i < maxgrp; ++i) { - while (*p && !isSPACE(*p)) - ++p; + if (endptr == NULL) + break; + p = endptr; while (isSPACE(*p)) ++p; if (!*p) break; - if(!gary) + if (!gary) Newx(gary, i + 1, Groups_t); else Renew(gary, i + 1, Groups_t); - gary[i] = (Groups_t)Atol(p); + gary[i] = (Groups_t)grok_atou(p, &endptr); } if (i) PERL_UNUSED_RESULT(setgroups(i, gary)); diff --git a/numeric.c b/numeric.c index 4876ece..7efd76e 100644 --- a/numeric.c +++ b/numeric.c @@ -786,6 +786,95 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) return 0; } +/* +=for perlapi + +grok_atou is a safer replacement for atoi and strtoul. + +atoi has severe problems with illegal inputs, cannot be used +for incremental parsing, and therefore should be avoided. + +atoi and strtoul are also affected by locale settings, which can +also be seen as a bug (global state controlled by user environment). + +grok_atou parses a C-style zero-byte terminated string. + +Returns the unsigned value, if a valid one can be parsed. + +Only the decimal digits '0'..'9' are accepted. + +As opposed to atoi or strtoul: +- does NOT allow optional leading whitespace +- does NOT allow negative inputs + +Also rejected are: +- leading plus signs +- leading zeros (meaning that only "0" is the zero) + +Trailing non-digit bytes are allowed if the endptr is non-NULL. +On return the *endptr will contain the pointer to the first non-digit byte. + +If the value overflows, returns Size_t_MAX, and sets the *endptr +to NULL, unless endptr is NULL. + +If the endptr is NULL, the first non-digit byte MUST be +the zero byte terminating the pv, or zero will be returned. + +=cut +*/ + +Size_t +Perl_grok_atou(const char *pv, const char** endptr) +{ + const char* s = pv; + const char** eptr; + const char* end2; /* Used in case endptr is NULL. */ + /* With Size_t_size of 8 or 4 this works out to be the start plus + * either 20 or 10. When 128 or 256-bit systems became reality, + * this overshoots (should get 39, 78, but gets 40 and 80). */ + const char* maxend = s + 10 * (Size_t_size / 4); + Size_t val = 0; /* The return value. */ + + PERL_ARGS_ASSERT_GROK_ATOU; + + eptr = endptr ? endptr : &end2; + if (isDIGIT(*s) && !isDIGIT(*(s + 1))) { + /* Quite common cases, and in addition the case of zero ("0") + * simplifies the decoding loop: not having to think whether + * "000" or "000123" are valid (now they are invalid). */ + val = *s++ - '0'; + } else { + Size_t tmp = 0; /* Temporary accumulator. */ + + while (s < maxend && *s) { + /* This could be unrolled like in grok_number(), but + * the expected uses of this are not speed-needy, and + * unlikely to need full 64-bitness. */ + if (isDIGIT(*s)) { + int digit = *s++ - '0'; + tmp = tmp * 10 + digit; + if (tmp > val) { /* Rejects leading zeros. */ + val = tmp; + } else { /* Overflow. */ + *eptr = NULL; + return Size_t_MAX; + } + } else { + break; + } + } + if (s == pv) { + *eptr = NULL; /* If no progress, failed to parse anything. */ + return 0; + } + } + if (endptr == NULL && *s) { + return 0; /* If endptr is NULL, no trailing non-digits allowed. */ + } + *eptr = s; + return val; +} + STATIC NV S_mulexp10(NV value, I32 exponent) { diff --git a/perl.c b/perl.c index 6e09931..e84f1d5 100644 --- a/perl.c +++ b/perl.c @@ -546,7 +546,12 @@ perl_destruct(pTHXx) { const char * const s = PerlEnv_getenv("PERL_DESTRUCT_LEVEL"); if (s) { - const int i = atoi(s); + int i; + if (strEQ(s, "-1")) { /* Special case: modperl folklore. */ + i = -1; + } else { + i = grok_atou(s, NULL); + } #ifdef DEBUGGING if (destruct_level < i) destruct_level = i; #endif @@ -1451,7 +1456,7 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env) { const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG"); - if (s && (atoi(s) == 1)) { + if (s && (grok_atou(s, NULL) == 1)) { unsigned char *seed= PERL_HASH_SEED; unsigned char *seed_end= PERL_HASH_SEED + PERL_HASH_SEED_BYTES; PerlIO_printf(Perl_debug_log, "HASH_FUNCTION = %s HASH_SEED = 0x", PERL_HASH_FUNC); @@ -2285,8 +2290,8 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit) #ifdef MYMALLOC { const char *s; - if ((s=PerlEnv_getenv("PERL_DEBUG_MSTATS")) && atoi(s) >= 2) - dump_mstats("after compilation:"); + if ((s=PerlEnv_getenv("PERL_DEBUG_MSTATS")) && grok_atou(s, NULL) >= 2) + dump_mstats("after compilation:"); } #endif @@ -3042,7 +3047,10 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) } } else if (isDIGIT(**s)) { - i = atoi(*s); + const char* e; + i = grok_atou(*s, &e); + if (e) + *s = e; for (; isWORDCHAR(**s); (*s)++) ; } else if (givehelp) { @@ -3650,9 +3658,9 @@ S_open_script(pTHX_ const char *scriptname, bool dosearch, bool *suidscript) if (strnEQ(scriptname, "/dev/fd/", 8) && isDIGIT(scriptname[8]) ) { const char *s = scriptname + 8; - fdscript = atoi(s); - while (isDIGIT(*s)) - s++; + const char* e; + fdscript = grok_atou(s, &e); + s = e; if (*s) { /* PSz 18 Feb 04 * Tell apart "normal" usage of fdscript, e.g. diff --git a/perl.h b/perl.h index 54f6dca..202e55e 100644 --- a/perl.h +++ b/perl.h @@ -1677,7 +1677,8 @@ typedef UVTYPE UV; # endif #endif -#define SSize_t_MAX (SSize_t)(~(size_t)0 >> 1) +#define Size_t_MAX (~(Size_t)0) +#define SSize_t_MAX (SSize_t)(~(Size_t)0 >> 1) #define IV_DIG (BIT_DIGITS(IVSIZE * 8)) #define UV_DIG (BIT_DIGITS(UVSIZE * 8)) diff --git a/pod/perlclib.pod b/pod/perlclib.pod index 23cca04..b4ebe4e 100644 --- a/pod/perlclib.pod +++ b/pod/perlclib.pod @@ -200,14 +200,20 @@ C<toUPPER_uni>, as described in L<perlapi/Character case changing>.) Instead Of: Use: atof(s) Atof(s) - atol(s) Atol(s) + atoi(s) grok_atou(s, &e) + atol(s) grok_atou(s, &e) strtod(s, &p) Nothing. Just don't use it. - strtol(s, &p, n) Strtol(s, &p, n) - strtoul(s, &p, n) Strtoul(s, &p, n) + strtol(s, &p, n) grok_atou(s, &e) + strtoul(s, &p, n) grok_atou(s, &e) Notice also the C<grok_bin>, C<grok_hex>, and C<grok_oct> functions in F<numeric.c> for converting strings representing numbers in the respective -bases into C<NV>s. +bases into C<NV>s. Note that grok_atou() doesn't handle negative inputs, +or leading whitespace (being purposefully strict). It also doesn't always +handle full IV/UV-range, being limited to Size_t. + +Note that strtol() and strtoul() may be disguised as Strtol(), Strtoul(), +Atol(), Atoul(). Avoid those, too. In theory C<Strtol> and C<Strtoul> may not be defined if the machine perl is built on doesn't actually have strtol and strtoul. But as those 2 diff --git a/pod/perlhacktips.pod b/pod/perlhacktips.pod index c673dde..3d477da 100644 --- a/pod/perlhacktips.pod +++ b/pod/perlhacktips.pod @@ -623,6 +623,22 @@ of the program is UTF-8. What happens is that the C<%s> and its operand are simply skipped without any notice. L<https://sourceware.org/bugzilla/show_bug.cgi?id=6530>. +=item * + +Do not use atoi() + +Use grok_atou() instead. atoi() has ill-defined behavior on overflows, +and cannot be used for incremental parsing. It is also affected by locale, +which is bad. + +=item * + +Do not use strtol() or strtoul() + +Use grok_atou() instead. strtol() or strtoul() (or their IV/UV-friendly +macro disguises, Strtol() and Strtoul(), or Atol() and Atoul() are +affected by locale, which is bad. + =back =head1 DEBUGGING diff --git a/pp_sys.c b/pp_sys.c index 54c12b3..501146e 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -3295,7 +3295,7 @@ PP(pp_fttty) if (GvIO(gv) && IoIFP(GvIOp(gv))) fd = PerlIO_fileno(IoIFP(GvIOp(gv))); else if (name && isDIGIT(*name)) - fd = atoi(name); + fd = grok_atou(name, NULL); else FT_RETURNUNDEF; if (fd < 0) { diff --git a/proto.h b/proto.h index 49a44d2..1eccc46 100644 --- a/proto.h +++ b/proto.h @@ -1289,6 +1289,11 @@ PERL_CALLCONV int Perl_getcwd_sv(pTHX_ SV* sv) PERL_CALLCONV void Perl_gp_free(pTHX_ GV* gv); PERL_CALLCONV GP* Perl_gp_ref(pTHX_ GP* gp); +PERL_CALLCONV Size_t Perl_grok_atou(const char* pv, const char** endptr) + __attribute__nonnull__(1); +#define PERL_ARGS_ASSERT_GROK_ATOU \ + assert(pv) + PERL_CALLCONV UV Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regcomp.c b/regcomp.c index 3d4d348..0f70a9e 100644 --- a/regcomp.c +++ b/regcomp.c @@ -9605,6 +9605,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) else if (*RExC_parse == '?') { /* (?...) */ bool is_logical = 0; const char * const seqstart = RExC_parse; + const char * endptr; if (has_intervening_patws) { RExC_parse++; vFAIL("In '(?...)', the '(' and '?' must be adjacent"); @@ -9814,12 +9815,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case '5': case '6': case '7': case '8': case '9': RExC_parse--; parse_recursion: - num = atoi(RExC_parse); - parse_start = RExC_parse - 1; /* MJD */ - if (*RExC_parse == '-') - RExC_parse++; - while (isDIGIT(*RExC_parse)) - RExC_parse++; + { + bool is_neg = FALSE; + parse_start = RExC_parse - 1; /* MJD */ + if (*RExC_parse == '-') { + RExC_parse++; + is_neg = TRUE; + } + num = grok_atou(RExC_parse, &endptr); + if (endptr) + RExC_parse = (char*)endptr; + if (is_neg) { + /* Some limit for num? */ + num = -num; + } + } if (*RExC_parse!=')') vFAIL("Expecting close bracket"); @@ -9996,9 +10006,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; parno = 0; if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) { - parno = atoi(RExC_parse++); - while (isDIGIT(*RExC_parse)) - RExC_parse++; + parno = grok_atou(RExC_parse, &endptr); + if (endptr) + RExC_parse = (char*)endptr; } else if (RExC_parse[0] == '&') { SV *sv_dat; RExC_parse++; @@ -10015,10 +10025,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) /* (?(1)...) */ char c; char *tmp; - parno = atoi(RExC_parse++); - - while (isDIGIT(*RExC_parse)) - RExC_parse++; + parno = grok_atou(RExC_parse, &endptr); + if (endptr) + RExC_parse = (char*)endptr; ret = reganode(pRExC_state, GROUPP, parno); insert_if_check_paren: @@ -10492,15 +10501,16 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) next++; } if (*next == '}') { /* got one */ + const char* endptr; if (!maxpos) maxpos = next; RExC_parse++; - min = atoi(RExC_parse); + min = grok_atou(RExC_parse, &endptr); if (*maxpos == ',') maxpos++; else maxpos = RExC_parse; - max = atoi(maxpos); + max = grok_atou(maxpos, &endptr); if (!max && *maxpos != '0') max = REG_INFTY; /* meaning "infinity" */ else if (max >= REG_INFTY) @@ -11147,18 +11157,17 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, } -/* return atoi(p), unless it's too big to sensibly be a backref, +/* Parse backref decimal value, unless it's too big to sensibly be a backref, * in which case return I32_MAX (rather than possibly 32-bit wrapping) */ static I32 S_backref_value(char *p) { - char *q = p; - - for (;isDIGIT(*q); q++) {} /* calculate length of num */ - if (q - p == 0 || q - p > 9) + const char* endptr; + Size_t val = grok_atou(p, &endptr); + if (endptr == p || endptr == NULL || val > 999999999) return I32_MAX; - return atoi(p); + return val; } diff --git a/t/porting/libperl.t b/t/porting/libperl.t index 7bd2198..9562556 100644 --- a/t/porting/libperl.t +++ b/t/porting/libperl.t @@ -482,7 +482,9 @@ for my $symbol (sort keys %expected) { # (One exception: for certain floating point outputs # the native sprintf is still used in some platforms, see below.) # -# XXX: add atoi() to %unexpected - unsafe and undefined failure modes. +# atoi has unsafe and undefined failure modes, and is affected by locale. +# +# strtol and strtoul are affected by locale. # my %unexpected; @@ -494,6 +496,12 @@ for my $str (qw(strcat strcpy strncat strncpy)) { $unexpected{$str} = undef; # No Configure symbol for these. } +$unexpected{atoi} = undef; # No Configure symbol for atoi. + +for my $str (qw(strtol strtoul)) { + $unexpected{$str} = "d_$str"; +} + for my $symbol (sort keys %unexpected) { if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { SKIP: { diff --git a/toke.c b/toke.c index 2842115..cb379ef 100644 --- a/toke.c +++ b/toke.c @@ -1686,7 +1686,7 @@ S_incline(pTHX_ const char *s) if (*e != '\n' && *e != '\0') return; /* false alarm */ - line_num = atoi(n)-1; + line_num = grok_atou(n, &e) - 1; if (t - s > 0) { const STRLEN len = t - s; diff --git a/utf8.c b/utf8.c index 279d96f..aa63504 100644 --- a/utf8.c +++ b/utf8.c @@ -3481,22 +3481,24 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) lend = l + lcur; if (*l == 'V') { /* Inversion list format */ - char *after_strtol = (char *) lend; + const char *after_atou = (char *) lend; UV element0; UV* other_elements_ptr; /* The first number is a count of the rest */ l++; - elements = Strtoul((char *)l, &after_strtol, 10); + elements = grok_atou((const char *)l, &after_atou); if (elements == 0) { invlist = _new_invlist(0); } else { - l = (U8 *) after_strtol; + while (isSPACE(*l)) l++; + l = (U8 *) after_atou; /* Get the 0th element, which is needed to setup the inversion list */ - element0 = (UV) Strtoul((char *)l, &after_strtol, 10); - l = (U8 *) after_strtol; + while (isSPACE(*l)) l++; + element0 = (UV) grok_atou((const char *)l, &after_atou); + l = (U8 *) after_atou; invlist = _setup_canned_invlist(elements, element0, &other_elements_ptr); elements--; @@ -3505,8 +3507,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) if (l > lend) { Perl_croak(aTHX_ "panic: Expecting %"UVuf" more elements than available", elements); } - *other_elements_ptr++ = (UV) Strtoul((char *)l, &after_strtol, 10); - l = (U8 *) after_strtol; + while (isSPACE(*l)) l++; + *other_elements_ptr++ = (UV) grok_atou((const char *)l, &after_atou); + l = (U8 *) after_atou; } } } diff --git a/util.c b/util.c index 4b48e62..9c28f9e 100644 --- a/util.c +++ b/util.c @@ -1380,7 +1380,7 @@ Perl_mess_sv(pTHX_ SV *basemsg, bool consume) int wi; /* The PERL_C_BACKTRACE_ON_WARN must be an integer of one or more. */ if ((ws = PerlEnv_getenv("PERL_C_BACKTRACE_ON_ERROR")) && - (wi = atoi(ws)) > 0) { + (wi = grok_atou(ws, NULL)) > 0) { Perl_dump_c_backtrace(aTHX_ Perl_debug_log, wi, 1); } } @@ -4381,9 +4381,9 @@ Perl_parse_unicode_opts(pTHX_ const char **popt) if (*p) { if (isDIGIT(*p)) { - opt = (U32) atoi(p); - while (isDIGIT(*p)) - p++; + const char* endptr; + opt = (U32) grok_atou(p, &endptr); + p = endptr; if (*p && *p != '\n' && *p != '\r') { if(isSPACE(*p)) goto the_end_of_the_opts_parser; else @@ -4698,7 +4698,7 @@ Perl_free_global_struct(pTHX_ struct perl_vars *plvarsp) * The default implementation reads a single env var, PERL_MEM_LOG, * expecting one or more of the following: * - * \d+ - fd fd to write to : must be 1st (atoi) + * \d+ - fd fd to write to : must be 1st (grok_atou) * 'm' - memlog was PERL_MEM_LOG=1 * 's' - svlog was PERL_SV_LOG=1 * 't' - timestamp was PERL_MEM_LOG_TIMESTAMP=1 @@ -4766,7 +4766,8 @@ S_mem_log_common(enum mem_log_type mlt, const UV n, * timeval. */ { STRLEN len; - int fd = atoi(pmlenv); + const char* endptr; + int fd = grok_atou(pmlenv, &endptr); /* Ignore endptr. */ if (!fd) fd = PERL_MEM_LOG_FD; @@ -5695,12 +5696,12 @@ static void atos_update(atos_context* ctx, /* Given an output buffer end |p| and its |start|, matches * for the atos output, extracting the source code location - * if possible, returning NULL otherwise. */ + * and returning non-NULL if possible, returning NULL otherwise. */ static const char* atos_parse(const char* p, const char* start, STRLEN* source_name_size, STRLEN* source_line) { - /* atos() outputs is something like: + /* atos() output is something like: * perl_parse (in miniperl) (perl.c:2314)\n\n". * We cannot use Perl regular expressions, because we need to * stay low-level. Therefore here we have a rolled-out version @@ -5710,11 +5711,14 @@ static const char* atos_parse(const char* p, * The matched regular expression is roughly "\(.*:\d+\)\s*$" */ const char* source_number_start; const char* source_name_end; + const char* source_line_end; + const char* close_paren; /* Skip trailing whitespace. */ while (p > start && isspace(*p)) p--; /* Now we should be at the close paren. */ if (p == start || *p != ')') return NULL; + close_paren = p; p--; /* Now we should be in the line number. */ if (p == start || !isdigit(*p)) @@ -5735,7 +5739,9 @@ static const char* atos_parse(const char* p, return NULL; p++; *source_name_size = source_name_end - p; - *source_line = atoi(source_number_start); + *source_line = grok_atou(source_number_start, &source_line_end); + if (source_line_end != close_paren) + return NULL; return p; } -- Perl5 Master Repository
