In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/f43791029d4ac4a5dbfd6ad9b67cb5407ac32e2a?hp=6e75769d300856d7c215fc22d503cce13b734a0f>

- Log -----------------------------------------------------------------
commit f43791029d4ac4a5dbfd6ad9b67cb5407ac32e2a
Author: Jarkko Hietaniemi <[email protected]>
Date:   Tue Jul 22 07:46:26 2014 -0400

    Introduce maxend for paranoia.  Also, comments.

M       numeric.c

commit fdadaf77ca45094e35ce724d7c91001f84b083c7
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 21:29:22 2014 -0400

    Atol can be strtol in disguise, so grok_atou.

M       mg.c

commit 68419f9c61ef7c22b1225655e7e3b38058c70a71
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 15:16:33 2014 -0400

    Add strtoul and strtol to avoidables.

M       t/porting/libperl.t

commit 338aa8b061f430c2b3d9deaeed0aec523639aff7
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 15:15:42 2014 -0400

    Document grok_atou as strtoul replacement.

M       numeric.c
M       pod/perlclib.pod
M       pod/perlhacktips.pod

commit 999448781bc711c8732271b98a45a724f7357c46
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 14:59:58 2014 -0400

    Use grok_atou instead of strtoul (no explicit strtol uses).

M       gv.c
M       utf8.c

commit c98823ffa61e4daf92a7a17ab937753b2c280c13
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 10:56:49 2014 -0400

    Advertise grok_atou.

M       pod/perlclib.pod
M       pod/perlhacktips.pod

commit a7941017b561ee4cf4e5f4ac4ebb6c9e684303ed
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 10:53:10 2014 -0400

    Add atoi to avoidables.

M       t/porting/libperl.t

commit dd52de8081680731af4e00f224c756ed5c3a510f
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 10:07:05 2014 -0400

    Add tests for grok_atou.

M       ext/XS-APItest/numeric.xs
M       ext/XS-APItest/t/grok.t

commit 96e440d2eb546f4493feffce002f2ec8886f13a3
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 10:50:54 2014 -0400

    Use grok_atou instead of atoi.
    
    Remaining atoi() uses include at least:
    ext/DynaLoader/dl_aix.xs, os2/os2.c, vms/vms.c

M       doio.c
M       ext/DynaLoader/dlutils.c
M       locale.c
M       malloc.c
M       perl.c
M       pp_sys.c
M       regcomp.c
M       toke.c
M       util.c

commit 6313e54401f5531a23184b7afaaf6bc7cd4a81ec
Author: Jarkko Hietaniemi <[email protected]>
Date:   Mon Jul 21 10:41:20 2014 -0400

    Implement grok_atou as safe/strict atoi replacement.
    
    For earlier discussion, see:
    http://www.nntp.perl.org/group/perl.perl5.porters/2013/10/msg208680.html
    https://rt.perl.org/Public/Bug/Display.html?id=116118#txn-1250187
    
    grok_atou is completely new code, instead of trying to bolt
    new parameters/flags to grok_number.  This makes it easier to
    be extremely strict, and not worry about breaking grok_number.

M       embed.fnc
M       embed.h
M       numeric.c
M       perl.h
M       proto.h
-----------------------------------------------------------------------

Summary of changes:
 doio.c                    |   2 +-
 embed.fnc                 |   1 +
 embed.h                   |   1 +
 ext/DynaLoader/dlutils.c  |   2 +-
 ext/XS-APItest/numeric.xs |  23 ++++++++++
 ext/XS-APItest/t/grok.t   | 114 ++++++++++++++++++++++++++++++++++++++++++++++
 gv.c                      |   2 +-
 locale.c                  |   3 +-
 malloc.c                  |   4 +-
 mg.c                      |  12 +++--
 numeric.c                 |  89 ++++++++++++++++++++++++++++++++++++
 perl.c                    |  24 ++++++----
 perl.h                    |   3 +-
 pod/perlclib.pod          |  14 ++++--
 pod/perlhacktips.pod      |  16 +++++++
 pp_sys.c                  |   2 +-
 proto.h                   |   5 ++
 regcomp.c                 |  51 ++++++++++++---------
 t/porting/libperl.t       |  10 +++-
 toke.c                    |   2 +-
 utf8.c                    |  17 ++++---
 util.c                    |  24 ++++++----
 22 files changed, 357 insertions(+), 64 deletions(-)

diff --git a/doio.c b/doio.c
index 46d0796..a631eeb 100644
--- a/doio.c
+++ b/doio.c
@@ -391,7 +391,7 @@ Perl_do_open6(pTHX_ GV *gv, const char *oname, STRLEN len,
                        num_svs = 0;
                    }
                    else if (isDIGIT(*type)) {
-                        wanted_fd = atoi(type);
+                        wanted_fd = grok_atou(type, NULL);
                    }
                    else {
                        const IO* thatio;
diff --git a/embed.fnc b/embed.fnc
index 241a769..d02e555 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -807,6 +807,7 @@ Apd |int    |grok_number    |NN const char *pv|STRLEN 
len|NULLOK UV *valuep
 Apd    |int    |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV 
*valuep|U32 flags
 ApdR   |bool   |grok_numeric_radix|NN const char **sp|NN const char *send
 Apd    |UV     |grok_oct       |NN const char* start|NN STRLEN* len_p|NN I32* 
flags|NULLOK NV *result
+Apdn   |Size_t         |grok_atou      |NN const char* pv|NULLOK const char** 
endptr
 : These are all indirectly referenced by globals.c. This is somewhat annoying.
 p      |int    |magic_clearenv |NN SV* sv|NN MAGIC* mg
 p      |int    |magic_clear_all_env|NN SV* sv|NN MAGIC* mg
diff --git a/embed.h b/embed.h
index efa1735..7ca719d 100644
--- a/embed.h
+++ b/embed.h
@@ -170,6 +170,7 @@
 #define getcwd_sv(a)           Perl_getcwd_sv(aTHX_ a)
 #define gp_free(a)             Perl_gp_free(aTHX_ a)
 #define gp_ref(a)              Perl_gp_ref(aTHX_ a)
+#define grok_atou              Perl_grok_atou
 #define grok_bin(a,b,c,d)      Perl_grok_bin(aTHX_ a,b,c,d)
 #define grok_hex(a,b,c,d)      Perl_grok_hex(aTHX_ a,b,c,d)
 #define grok_number(a,b,c)     Perl_grok_number(aTHX_ a,b,c)
diff --git a/ext/DynaLoader/dlutils.c b/ext/DynaLoader/dlutils.c
index 29d9b91..dea981a 100644
--- a/ext/DynaLoader/dlutils.c
+++ b/ext/DynaLoader/dlutils.c
@@ -104,7 +104,7 @@ dl_generic_private_init(pTHX)       /* called by dl_*.xs 
dl_private_init() */
     }
 #endif
     if ( (perl_dl_nonlazy = getenv("PERL_DL_NONLAZY")) != NULL )
-       dl_nonlazy = atoi(perl_dl_nonlazy);
+       dl_nonlazy = grok_atou(perl_dl_nonlazy, NULL);
     if (dl_nonlazy)
        DLDEBUG(1,PerlIO_printf(Perl_debug_log, "DynaLoader bind mode is 
'non-lazy'\n"));
 #ifdef DL_LOADONCEONLY
diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs
index ab48dba..56c11f7 100644
--- a/ext/XS-APItest/numeric.xs
+++ b/ext/XS-APItest/numeric.xs
@@ -30,3 +30,26 @@ grok_number_flags(number, flags)
        PUSHs(sv_2mortal(newSViv(result)));
        if (result & IS_NUMBER_IN_UV)
            PUSHs(sv_2mortal(newSVuv(value)));
+
+void
+grok_atou(number, endsv)
+       SV *number
+       SV *endsv
+    PREINIT:
+       STRLEN len;
+       const char *pv = SvPV(number, len);
+       UV result;
+       const char* endptr;
+    PPCODE:
+       EXTEND(SP,2);
+       if (endsv == &PL_sv_undef) {
+          result = grok_atou(pv, NULL);
+        } else {
+          result = grok_atou(pv, &endptr);
+        }
+       PUSHs(sv_2mortal(newSVuv(result)));
+       if (endsv == &PL_sv_undef) {
+          PUSHs(sv_2mortal(newSVpvn(NULL, 0)));
+       } else {
+          PUSHs(sv_2mortal(newSViv(endptr - pv)));
+       }
diff --git a/ext/XS-APItest/t/grok.t b/ext/XS-APItest/t/grok.t
index 2e035ee..501bea6 100644
--- a/ext/XS-APItest/t/grok.t
+++ b/ext/XS-APItest/t/grok.t
@@ -109,4 +109,118 @@ for my $grok (@groks) {
   is($out_flags, $grok->[3], "'$grok->[0]' flags $grok->[1] - check flags");
 }
 
+my $ATOU_MAX = ~0;
+
+# atou tests
+my @atous =
+  (
+   # [ input, endsv, out uv, out len ]
+
+   # Basic cases.
+   [ "0",    "",   0,   1 ],
+   [ "1",    "",   1,   1 ],
+   [ "2",    "",   2,   1 ],
+   [ "9",    "",   9,   1 ],
+   [ "12",   "",   12,  2 ],
+   [ "123",  "",   123, 3 ],
+
+   # Trailing whitespace  is accepted or rejected, depending on endptr.
+   [ "0 ",   " ",   0,  1 ],
+   [ "1 ",   " ",   1,  1 ],
+   [ "2 ",   " ",   2,  1 ],
+   [ "12 ",  " ",   12, 2 ],
+
+   # Trailing garbage is accepted or rejected, depending on endptr.
+   [ "0x",   "x",   0,  1 ],
+   [ "1x",   "x",   1,  1 ],
+   [ "2x",   "x",   2,  1 ],
+   [ "12x",  "x",   12, 2 ],
+
+   # Leading whitespace is failure.
+   [ " 0",   " 0",  0,  0 ],
+   [ " 1",   " 1",  0,  0 ],
+   [ " 12",  " 12", 0,  0 ],
+
+   # Leading garbage is outright failure.
+   [ "x0",   "x0",  0,  0 ],
+   [ "x1",   "x1",  0,  0 ],
+   [ "x12",  "x12", 0,  0 ],
+
+   # We do not parse decimal point.
+   [ "12.3",  ".3", 12, 2 ],
+
+   # Leading pluses or minuses are no good.
+   [ "+12", "+12",  0, 0 ],
+   [ "-12", "-12",  0, 0 ],
+
+   # Extra leading zeros cause overflow.
+   [ "00",   "00",  $ATOU_MAX,  0 ],
+   [ "01",   "01",  $ATOU_MAX,  0 ],
+   [ "012",  "012", $ATOU_MAX,  0 ],
+  );
+
+if ($Config{sizesize} == 8) {
+    push @atous,
+      (
+       [ "4294967294", "", 4294967294, 10, ],
+       [ "4294967295", "", 4294967295, 10, ],
+       [ "4294967296", "", 4294967296, 10, ],
+
+       [ "9999999999", "", 9999999999, 10, ],
+
+       [ "18446744073709551614", "", 18446744073709551614, 20, ],
+       [ "18446744073709551615", "", $ATOU_MAX, 20, ],
+       [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ],
+      );
+} elsif ($Config{sizesize} == 4) {
+    push @atous,
+      (
+       [ "4294967294", "", 4294967294, 10, ],
+       [ "4294967295", "", $ATOU_MAX, 10, ],
+       [ "4294967296", "", $ATOU_MAX, 0, ],
+
+       [ "9999999999", "", $ATOU_MAX, 0, ],
+
+       [ "18446744073709551614", "", $ATOU_MAX, 0, ],
+       [ "18446744073709551615", "", $ATOU_MAX, 0, ],
+       [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ],
+      );
+}
+
+# This will fail to fail once 128/256-bit systems arrive.
+push @atous,
+    (
+       [ "99999999999999999999", "99999999999999999999", $ATOU_MAX, 0 ],
+    );
+
+for my $grok (@atous) {
+    my $input = $grok->[0];
+    my $endsv = $grok->[1];
+
+    my ($out_uv, $out_len);
+
+    # First with endsv.
+    ($out_uv, $out_len) = grok_atou($input, $endsv);
+    is($out_uv,  $grok->[2],
+       "'$input' $endsv - number success (got $out_uv cf $grok->[2])");
+    ok($grok->[3] <= length $input, "'$input' $endsv - length sanity 1");
+    unless (length $grok->[1]) {
+        is($out_len, $grok->[3], "'$input' $endsv - length sanity 2");
+    } # else { ... } ?
+    is($endsv, substr($input, $out_len), "'$input' $endsv - length success");
+
+    # Then without endsv (undef == NULL).
+    ($out_uv, $out_len) = grok_atou($input, undef);
+    if (length $grok->[1]) {
+        if ($grok->[2] == $ATOU_MAX) {
+            is($out_uv,  $ATOU_MAX, "'$input' undef - number overflow");
+        } else {
+            is($out_uv,  0, "'$input' undef - number zero");
+        }
+    } else {
+        is($out_uv,  $grok->[2],
+           "'$input' undef - number success (got $out_uv cf $grok->[2])");
+    }
+}
+
 done_testing();
diff --git a/gv.c b/gv.c
index 64bdbf1..8b43d91 100644
--- a/gv.c
+++ b/gv.c
@@ -1843,7 +1843,7 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char 
*name, STRLEN len,
                    if (!isDIGIT(*end))
                         return addmg;
                }
-                paren = strtoul(name, NULL, 10);
+                paren = grok_atou(name, NULL);
                 goto storeparen;
            }
            }
diff --git a/locale.c b/locale.c
index 85c438c..84ff0de 100644
--- a/locale.c
+++ b/locale.c
@@ -527,7 +527,8 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
     char *p;
     const bool locwarn = (printwarn > 1 ||
                     (printwarn &&
-                     (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p))));
+                     (!(p = PerlEnv_getenv("PERL_BADLANG")) ||
+                      grok_atou(p, NULL))));
     bool done = FALSE;
 #ifdef WIN32
     /* In some systems you can find out the system default locale
diff --git a/malloc.c b/malloc.c
index a99663e..73a0480 100644
--- a/malloc.c
+++ b/malloc.c
@@ -1824,7 +1824,7 @@ Perl_mfree(Malloc_t where)
                if (bad_free_warn == -1) {
                    dTHX;
                    char *pbf = PerlEnv_getenv("PERL_BADFREE");
-                   bad_free_warn = (pbf) ? atoi(pbf) : 1;
+                   bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1;
                }
                if (!bad_free_warn)
                    return;
@@ -1922,7 +1922,7 @@ Perl_realloc(void *mp, size_t nbytes)
                if (bad_free_warn == -1) {
                    dTHX;
                    char *pbf = PerlEnv_getenv("PERL_BADFREE");
-                   bad_free_warn = (pbf) ? atoi(pbf) : 1;
+                   bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1;
                }
                if (!bad_free_warn)
                    return NULL;
diff --git a/mg.c b/mg.c
index 28ed156..e1fc578 100644
--- a/mg.c
+++ b/mg.c
@@ -2891,6 +2891,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
        {
            const char *p = SvPV_const(sv, len);
             Groups_t *gary = NULL;
+            const char* endptr;
 #ifdef _SC_NGROUPS_MAX
            int maxgrp = sysconf(_SC_NGROUPS_MAX);
 
@@ -2902,19 +2903,20 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
 
             while (isSPACE(*p))
                 ++p;
-            new_egid = (Gid_t)Atol(p);
+            new_egid = (Gid_t)grok_atou(p, &endptr);
             for (i = 0; i < maxgrp; ++i) {
-                while (*p && !isSPACE(*p))
-                    ++p;
+                if (endptr == NULL)
+                    break;
+                p = endptr;
                 while (isSPACE(*p))
                     ++p;
                 if (!*p)
                     break;
-                if(!gary)
+                if (!gary)
                     Newx(gary, i + 1, Groups_t);
                 else
                     Renew(gary, i + 1, Groups_t);
-                gary[i] = (Groups_t)Atol(p);
+                gary[i] = (Groups_t)grok_atou(p, &endptr);
             }
             if (i)
                 PERL_UNUSED_RESULT(setgroups(i, gary));
diff --git a/numeric.c b/numeric.c
index 4876ece..7efd76e 100644
--- a/numeric.c
+++ b/numeric.c
@@ -786,6 +786,95 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, 
UV *valuep, U32 flags)
   return 0;
 }
 
+/*
+=for perlapi
+
+grok_atou is a safer replacement for atoi and strtoul.
+
+atoi has severe problems with illegal inputs, cannot be used
+for incremental parsing, and therefore should be avoided.
+
+atoi and strtoul are also affected by locale settings, which can
+also be seen as a bug (global state controlled by user environment).
+
+grok_atou parses a C-style zero-byte terminated string.
+
+Returns the unsigned value, if a valid one can be parsed.
+
+Only the decimal digits '0'..'9' are accepted.
+
+As opposed to atoi or strtoul:
+- does NOT allow optional leading whitespace
+- does NOT allow negative inputs
+
+Also rejected are:
+- leading plus signs
+- leading zeros (meaning that only "0" is the zero)
+
+Trailing non-digit bytes are allowed if the endptr is non-NULL.
+On return the *endptr will contain the pointer to the first non-digit byte.
+
+If the value overflows, returns Size_t_MAX, and sets the *endptr
+to NULL, unless endptr is NULL.
+
+If the endptr is NULL, the first non-digit byte MUST be
+the zero byte terminating the pv, or zero will be returned.
+
+=cut
+*/
+
+Size_t
+Perl_grok_atou(const char *pv, const char** endptr)
+{
+    const char* s = pv;
+    const char** eptr;
+    const char* end2; /* Used in case endptr is NULL. */
+    /* With Size_t_size of 8 or 4 this works out to be the start plus
+     * either 20 or 10.  When 128 or 256-bit systems became reality,
+     * this overshoots (should get 39, 78, but gets 40 and 80). */
+    const char* maxend = s + 10 * (Size_t_size / 4);
+    Size_t val = 0; /* The return value. */
+
+    PERL_ARGS_ASSERT_GROK_ATOU;
+
+    eptr = endptr ? endptr : &end2;
+    if (isDIGIT(*s) && !isDIGIT(*(s + 1))) {
+        /* Quite common cases, and in addition the case of zero ("0")
+         * simplifies the decoding loop: not having to think whether
+         * "000" or "000123" are valid (now they are invalid). */
+        val = *s++ - '0';
+    } else {
+        Size_t tmp = 0; /* Temporary accumulator. */
+
+        while (s < maxend && *s) {
+            /* This could be unrolled like in grok_number(), but
+             * the expected uses of this are not speed-needy, and
+             * unlikely to need full 64-bitness. */
+            if (isDIGIT(*s)) {
+                int digit = *s++ - '0';
+                tmp = tmp * 10 + digit;
+                if (tmp > val) { /* Rejects leading zeros. */
+                    val = tmp;
+                } else { /* Overflow. */
+                    *eptr = NULL;
+                    return Size_t_MAX;
+                }
+            } else {
+                break;
+            }
+        }
+        if (s == pv) {
+            *eptr = NULL; /* If no progress, failed to parse anything. */
+            return 0;
+        }
+    }
+    if (endptr == NULL && *s) {
+        return 0; /* If endptr is NULL, no trailing non-digits allowed. */
+    }
+    *eptr = s;
+    return val;
+}
+
 STATIC NV
 S_mulexp10(NV value, I32 exponent)
 {
diff --git a/perl.c b/perl.c
index 6e09931..e84f1d5 100644
--- a/perl.c
+++ b/perl.c
@@ -546,7 +546,12 @@ perl_destruct(pTHXx)
     {
        const char * const s = PerlEnv_getenv("PERL_DESTRUCT_LEVEL");
        if (s) {
-            const int i = atoi(s);
+            int i;
+            if (strEQ(s, "-1")) { /* Special case: modperl folklore. */
+                i = -1;
+            } else {
+                i = grok_atou(s, NULL);
+            }
 #ifdef DEBUGGING
            if (destruct_level < i) destruct_level = i;
 #endif
@@ -1451,7 +1456,7 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, 
char **env)
     {
         const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
 
-        if (s && (atoi(s) == 1)) {
+        if (s && (grok_atou(s, NULL) == 1)) {
             unsigned char *seed= PERL_HASH_SEED;
             unsigned char *seed_end= PERL_HASH_SEED + PERL_HASH_SEED_BYTES;
             PerlIO_printf(Perl_debug_log, "HASH_FUNCTION = %s HASH_SEED = 0x", 
PERL_HASH_FUNC);
@@ -2285,8 +2290,8 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit)
 #ifdef MYMALLOC
     {
        const char *s;
-    if ((s=PerlEnv_getenv("PERL_DEBUG_MSTATS")) && atoi(s) >= 2)
-       dump_mstats("after compilation:");
+        if ((s=PerlEnv_getenv("PERL_DEBUG_MSTATS")) && grok_atou(s, NULL) >= 2)
+            dump_mstats("after compilation:");
     }
 #endif
 
@@ -3042,7 +3047,10 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
        }
     }
     else if (isDIGIT(**s)) {
-       i = atoi(*s);
+        const char* e;
+       i = grok_atou(*s, &e);
+        if (e)
+            *s = e;
        for (; isWORDCHAR(**s); (*s)++) ;
     }
     else if (givehelp) {
@@ -3650,9 +3658,9 @@ S_open_script(pTHX_ const char *scriptname, bool 
dosearch, bool *suidscript)
 
        if (strnEQ(scriptname, "/dev/fd/", 8) && isDIGIT(scriptname[8]) ) {
             const char *s = scriptname + 8;
-           fdscript = atoi(s);
-           while (isDIGIT(*s))
-               s++;
+            const char* e;
+           fdscript = grok_atou(s, &e);
+           s = e;
            if (*s) {
                /* PSz 18 Feb 04
                 * Tell apart "normal" usage of fdscript, e.g.
diff --git a/perl.h b/perl.h
index 54f6dca..202e55e 100644
--- a/perl.h
+++ b/perl.h
@@ -1677,7 +1677,8 @@ typedef UVTYPE UV;
 #  endif
 #endif
 
-#define SSize_t_MAX (SSize_t)(~(size_t)0 >> 1)
+#define Size_t_MAX (~(Size_t)0)
+#define SSize_t_MAX (SSize_t)(~(Size_t)0 >> 1)
 
 #define IV_DIG (BIT_DIGITS(IVSIZE * 8))
 #define UV_DIG (BIT_DIGITS(UVSIZE * 8))
diff --git a/pod/perlclib.pod b/pod/perlclib.pod
index 23cca04..b4ebe4e 100644
--- a/pod/perlclib.pod
+++ b/pod/perlclib.pod
@@ -200,14 +200,20 @@ C<toUPPER_uni>, as described in L<perlapi/Character case 
changing>.)
  Instead Of:                 Use:
 
  atof(s)                     Atof(s)
- atol(s)                     Atol(s)
+ atoi(s)                     grok_atou(s, &e)
+ atol(s)                     grok_atou(s, &e)
  strtod(s, &p)               Nothing.  Just don't use it.
- strtol(s, &p, n)            Strtol(s, &p, n)
- strtoul(s, &p, n)           Strtoul(s, &p, n)
+ strtol(s, &p, n)            grok_atou(s, &e)
+ strtoul(s, &p, n)           grok_atou(s, &e)
 
 Notice also the C<grok_bin>, C<grok_hex>, and C<grok_oct> functions in
 F<numeric.c> for converting strings representing numbers in the respective
-bases into C<NV>s.
+bases into C<NV>s.  Note that grok_atou() doesn't handle negative inputs,
+or leading whitespace (being purposefully strict).  It also doesn't always
+handle full IV/UV-range, being limited to Size_t.
+
+Note that strtol() and strtoul() may be disguised as Strtol(), Strtoul(),
+Atol(), Atoul().  Avoid those, too.
 
 In theory C<Strtol> and C<Strtoul> may not be defined if the machine perl is
 built on doesn't actually have strtol and strtoul. But as those 2
diff --git a/pod/perlhacktips.pod b/pod/perlhacktips.pod
index c673dde..3d477da 100644
--- a/pod/perlhacktips.pod
+++ b/pod/perlhacktips.pod
@@ -623,6 +623,22 @@ of the program is UTF-8.  What happens is that the C<%s> 
and its operand are
 simply skipped without any notice.
 L<https://sourceware.org/bugzilla/show_bug.cgi?id=6530>.
 
+=item *
+
+Do not use atoi()
+
+Use grok_atou() instead.  atoi() has ill-defined behavior on overflows,
+and cannot be used for incremental parsing.  It is also affected by locale,
+which is bad.
+
+=item *
+
+Do not use strtol() or strtoul()
+
+Use grok_atou() instead.  strtol() or strtoul() (or their IV/UV-friendly
+macro disguises, Strtol() and Strtoul(), or Atol() and Atoul() are
+affected by locale, which is bad.
+
 =back
 
 =head1 DEBUGGING
diff --git a/pp_sys.c b/pp_sys.c
index 54c12b3..501146e 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3295,7 +3295,7 @@ PP(pp_fttty)
     if (GvIO(gv) && IoIFP(GvIOp(gv)))
        fd = PerlIO_fileno(IoIFP(GvIOp(gv)));
     else if (name && isDIGIT(*name))
-        fd = atoi(name);
+        fd = grok_atou(name, NULL);
     else
        FT_RETURNUNDEF;
     if (fd < 0) {
diff --git a/proto.h b/proto.h
index 49a44d2..1eccc46 100644
--- a/proto.h
+++ b/proto.h
@@ -1289,6 +1289,11 @@ PERL_CALLCONV int        Perl_getcwd_sv(pTHX_ SV* sv)
 
 PERL_CALLCONV void     Perl_gp_free(pTHX_ GV* gv);
 PERL_CALLCONV GP*      Perl_gp_ref(pTHX_ GP* gp);
+PERL_CALLCONV Size_t   Perl_grok_atou(const char* pv, const char** endptr)
+                       __attribute__nonnull__(1);
+#define PERL_ARGS_ASSERT_GROK_ATOU     \
+       assert(pv)
+
 PERL_CALLCONV UV       Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, 
I32* flags, NV *result)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c
index 3d4d348..0f70a9e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9605,6 +9605,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
         else if (*RExC_parse == '?') { /* (?...) */
            bool is_logical = 0;
            const char * const seqstart = RExC_parse;
+            const char * endptr;
             if (has_intervening_patws) {
                 RExC_parse++;
                 vFAIL("In '(?...)', the '(' and '?' must be adjacent");
@@ -9814,12 +9815,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
            case '5': case '6': case '7': case '8': case '9':
                RExC_parse--;
               parse_recursion:
-               num = atoi(RExC_parse);
-               parse_start = RExC_parse - 1; /* MJD */
-               if (*RExC_parse == '-')
-                   RExC_parse++;
-               while (isDIGIT(*RExC_parse))
-                       RExC_parse++;
+                {
+                    bool is_neg = FALSE;
+                    parse_start = RExC_parse - 1; /* MJD */
+                    if (*RExC_parse == '-') {
+                        RExC_parse++;
+                        is_neg = TRUE;
+                    }
+                    num = grok_atou(RExC_parse, &endptr);
+                    if (endptr)
+                       RExC_parse = (char*)endptr;
+                    if (is_neg) {
+                        /* Some limit for num? */
+                        num = -num;
+                    }
+                }
                if (*RExC_parse!=')')
                    vFAIL("Expecting close bracket");
 
@@ -9996,9 +10006,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
                    RExC_parse++;
                    parno = 0;
                    if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
-                       parno = atoi(RExC_parse++);
-                       while (isDIGIT(*RExC_parse))
-                           RExC_parse++;
+                       parno = grok_atou(RExC_parse, &endptr);
+                       if (endptr)
+                            RExC_parse = (char*)endptr;
                    } else if (RExC_parse[0] == '&') {
                        SV *sv_dat;
                        RExC_parse++;
@@ -10015,10 +10025,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
                     /* (?(1)...) */
                    char c;
                    char *tmp;
-                   parno = atoi(RExC_parse++);
-
-                   while (isDIGIT(*RExC_parse))
-                       RExC_parse++;
+                   parno = grok_atou(RExC_parse, &endptr);
+                    if (endptr)
+                       RExC_parse = (char*)endptr;
                     ret = reganode(pRExC_state, GROUPP, parno);
 
                  insert_if_check_paren:
@@ -10492,15 +10501,16 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 
*flagp, U32 depth)
            next++;
        }
        if (*next == '}') {             /* got one */
+            const char* endptr;
            if (!maxpos)
                maxpos = next;
            RExC_parse++;
-           min = atoi(RExC_parse);
+           min = grok_atou(RExC_parse, &endptr);
            if (*maxpos == ',')
                maxpos++;
            else
                maxpos = RExC_parse;
-           max = atoi(maxpos);
+           max = grok_atou(maxpos, &endptr);
            if (!max && *maxpos != '0')
                max = REG_INFTY;                /* meaning "infinity" */
            else if (max >= REG_INFTY)
@@ -11147,18 +11157,17 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t 
*pRExC_state,
 }
 
 
-/* return atoi(p), unless it's too big to sensibly be a backref,
+/* Parse backref decimal value, unless it's too big to sensibly be a backref,
  * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
 
 static I32
 S_backref_value(char *p)
 {
-    char *q = p;
-
-    for (;isDIGIT(*q); q++) {} /* calculate length of num */
-    if (q - p == 0 || q - p > 9)
+    const char* endptr;
+    Size_t val = grok_atou(p, &endptr);
+    if (endptr == p || endptr == NULL || val > 999999999)
         return I32_MAX;
-    return atoi(p);
+    return val;
 }
 
 
diff --git a/t/porting/libperl.t b/t/porting/libperl.t
index 7bd2198..9562556 100644
--- a/t/porting/libperl.t
+++ b/t/porting/libperl.t
@@ -482,7 +482,9 @@ for my $symbol (sort keys %expected) {
 # (One exception: for certain floating point outputs
 # the native sprintf is still used in some platforms, see below.)
 #
-# XXX: add atoi() to %unexpected - unsafe and undefined failure modes.
+# atoi has unsafe and undefined failure modes, and is affected by locale.
+#
+# strtol and strtoul are affected by locale.
 #
 
 my %unexpected;
@@ -494,6 +496,12 @@ for my $str (qw(strcat strcpy strncat strncpy)) {
     $unexpected{$str} = undef; # No Configure symbol for these.
 }
 
+$unexpected{atoi} = undef; # No Configure symbol for atoi.
+
+for my $str (qw(strtol strtoul)) {
+    $unexpected{$str} = "d_$str";
+}
+
 for my $symbol (sort keys %unexpected) {
     if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) {
       SKIP: {
diff --git a/toke.c b/toke.c
index 2842115..cb379ef 100644
--- a/toke.c
+++ b/toke.c
@@ -1686,7 +1686,7 @@ S_incline(pTHX_ const char *s)
     if (*e != '\n' && *e != '\0')
        return;         /* false alarm */
 
-    line_num = atoi(n)-1;
+    line_num = grok_atou(n, &e) - 1;
 
     if (t - s > 0) {
        const STRLEN len = t - s;
diff --git a/utf8.c b/utf8.c
index 279d96f..aa63504 100644
--- a/utf8.c
+++ b/utf8.c
@@ -3481,22 +3481,24 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
     lend = l + lcur;
 
     if (*l == 'V') {    /*  Inversion list format */
-        char *after_strtol = (char *) lend;
+        const char *after_atou = (char *) lend;
         UV element0;
         UV* other_elements_ptr;
 
         /* The first number is a count of the rest */
         l++;
-        elements = Strtoul((char *)l, &after_strtol, 10);
+        elements = grok_atou((const char *)l, &after_atou);
         if (elements == 0) {
             invlist = _new_invlist(0);
         }
         else {
-            l = (U8 *) after_strtol;
+            while (isSPACE(*l)) l++;
+            l = (U8 *) after_atou;
 
             /* Get the 0th element, which is needed to setup the inversion 
list */
-            element0 = (UV) Strtoul((char *)l, &after_strtol, 10);
-            l = (U8 *) after_strtol;
+            while (isSPACE(*l)) l++;
+            element0 = (UV) grok_atou((const char *)l, &after_atou);
+            l = (U8 *) after_atou;
             invlist = _setup_canned_invlist(elements, element0, 
&other_elements_ptr);
             elements--;
 
@@ -3505,8 +3507,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
                 if (l > lend) {
                     Perl_croak(aTHX_ "panic: Expecting %"UVuf" more elements 
than available", elements);
                 }
-                *other_elements_ptr++ = (UV) Strtoul((char *)l, &after_strtol, 
10);
-                l = (U8 *) after_strtol;
+                while (isSPACE(*l)) l++;
+                *other_elements_ptr++ = (UV) grok_atou((const char *)l, 
&after_atou);
+                l = (U8 *) after_atou;
             }
         }
     }
diff --git a/util.c b/util.c
index 4b48e62..9c28f9e 100644
--- a/util.c
+++ b/util.c
@@ -1380,7 +1380,7 @@ Perl_mess_sv(pTHX_ SV *basemsg, bool consume)
         int wi;
         /* The PERL_C_BACKTRACE_ON_WARN must be an integer of one or more. */
         if ((ws = PerlEnv_getenv("PERL_C_BACKTRACE_ON_ERROR")) &&
-            (wi = atoi(ws)) > 0) {
+            (wi = grok_atou(ws, NULL)) > 0) {
             Perl_dump_c_backtrace(aTHX_ Perl_debug_log, wi, 1);
         }
     }
@@ -4381,9 +4381,9 @@ Perl_parse_unicode_opts(pTHX_ const char **popt)
 
   if (*p) {
        if (isDIGIT(*p)) {
-           opt = (U32) atoi(p);
-           while (isDIGIT(*p))
-               p++;
+            const char* endptr;
+            opt = (U32) grok_atou(p, &endptr);
+           p = endptr;
            if (*p && *p != '\n' && *p != '\r') {
             if(isSPACE(*p)) goto the_end_of_the_opts_parser;
             else
@@ -4698,7 +4698,7 @@ Perl_free_global_struct(pTHX_ struct perl_vars *plvarsp)
  * The default implementation reads a single env var, PERL_MEM_LOG,
  * expecting one or more of the following:
  *
- *    \d+ - fd         fd to write to          : must be 1st (atoi)
+ *    \d+ - fd         fd to write to          : must be 1st (grok_atou)
  *    'm' - memlog     was PERL_MEM_LOG=1
  *    's' - svlog      was PERL_SV_LOG=1
  *    't' - timestamp  was PERL_MEM_LOG_TIMESTAMP=1
@@ -4766,7 +4766,8 @@ S_mem_log_common(enum mem_log_type mlt, const UV n,
         * timeval. */
        {
            STRLEN len;
-           int fd = atoi(pmlenv);
+            const char* endptr;
+           int fd = grok_atou(pmlenv, &endptr); /* Ignore endptr. */
            if (!fd)
                fd = PERL_MEM_LOG_FD;
 
@@ -5695,12 +5696,12 @@ static void atos_update(atos_context* ctx,
 
 /* Given an output buffer end |p| and its |start|, matches
  * for the atos output, extracting the source code location
- * if possible, returning NULL otherwise. */
+ * and returning non-NULL if possible, returning NULL otherwise. */
 static const char* atos_parse(const char* p,
                               const char* start,
                               STRLEN* source_name_size,
                               STRLEN* source_line) {
-    /* atos() outputs is something like:
+    /* atos() output is something like:
      * perl_parse (in miniperl) (perl.c:2314)\n\n".
      * We cannot use Perl regular expressions, because we need to
      * stay low-level.  Therefore here we have a rolled-out version
@@ -5710,11 +5711,14 @@ static const char* atos_parse(const char* p,
      * The matched regular expression is roughly "\(.*:\d+\)\s*$" */
     const char* source_number_start;
     const char* source_name_end;
+    const char* source_line_end;
+    const char* close_paren;
     /* Skip trailing whitespace. */
     while (p > start && isspace(*p)) p--;
     /* Now we should be at the close paren. */
     if (p == start || *p != ')')
         return NULL;
+    close_paren = p;
     p--;
     /* Now we should be in the line number. */
     if (p == start || !isdigit(*p))
@@ -5735,7 +5739,9 @@ static const char* atos_parse(const char* p,
         return NULL;
     p++;
     *source_name_size = source_name_end - p;
-    *source_line = atoi(source_number_start);
+    *source_line = grok_atou(source_number_start, &source_line_end);
+    if (source_line_end != close_paren)
+        return NULL;
     return p;
 }
 

--
Perl5 Master Repository

Reply via email to