In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/945b524a8cdacbf82557d751252e6546f48d21ae?hp=4ba3adde25c4edf2e470f13677632b6d2f9e2fcd>

- Log -----------------------------------------------------------------
commit 945b524a8cdacbf82557d751252e6546f48d21ae
Author: Jarkko Hietaniemi <[email protected]>
Date:   Wed Jul 23 22:32:45 2014 -0400

    Share common constants as file statics.

M       numeric.c

commit 75feedba47600d94d18d49dbcbdf46393b6c6cc5
Author: Jarkko Hietaniemi <[email protected]>
Date:   Wed Jul 23 10:38:50 2014 -0400

    Use UV instead of Size_t.
    
    A strong reason for using UV are the promised custom codepoints,
    they go beyond 32-bit.
    
    The overflow logic didn't work if Size_t was 32-bit but the UV was 64-bit.
    Steal the battle-proven logic from grok_number_flags().
    
    The numeric.xs or grok.t were not right in 32-bit, either.
    
    Add comments.

M       embed.fnc
M       ext/XS-APItest/numeric.xs
M       ext/XS-APItest/t/grok.t
M       numeric.c
M       proto.h
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc                 |  2 +-
 ext/XS-APItest/numeric.xs |  6 ++++-
 ext/XS-APItest/t/grok.t   | 44 +++++++++++++++++++++++++-----
 numeric.c                 | 68 +++++++++++++++++++++--------------------------
 proto.h                   |  2 +-
 5 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index d02e555..90c56ed 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -807,7 +807,7 @@ Apd |int    |grok_number    |NN const char *pv|STRLEN 
len|NULLOK UV *valuep
 Apd    |int    |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV 
*valuep|U32 flags
 ApdR   |bool   |grok_numeric_radix|NN const char **sp|NN const char *send
 Apd    |UV     |grok_oct       |NN const char* start|NN STRLEN* len_p|NN I32* 
flags|NULLOK NV *result
-Apdn   |Size_t         |grok_atou      |NN const char* pv|NULLOK const char** 
endptr
+Apdn   |UV     |grok_atou      |NN const char* pv|NULLOK const char** endptr
 : These are all indirectly referenced by globals.c. This is somewhat annoying.
 p      |int    |magic_clearenv |NN SV* sv|NN MAGIC* mg
 p      |int    |magic_clear_all_env|NN SV* sv|NN MAGIC* mg
diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs
index 56c11f7..6d1ef82 100644
--- a/ext/XS-APItest/numeric.xs
+++ b/ext/XS-APItest/numeric.xs
@@ -51,5 +51,9 @@ grok_atou(number, endsv)
        if (endsv == &PL_sv_undef) {
           PUSHs(sv_2mortal(newSVpvn(NULL, 0)));
        } else {
-          PUSHs(sv_2mortal(newSViv(endptr - pv)));
+         if (endptr) {
+           PUSHs(sv_2mortal(newSViv(endptr - pv)));
+         } else {
+           PUSHs(sv_2mortal(newSViv(0)));
+         }
        }
diff --git a/ext/XS-APItest/t/grok.t b/ext/XS-APItest/t/grok.t
index 501bea6..b41cb09 100644
--- a/ext/XS-APItest/t/grok.t
+++ b/ext/XS-APItest/t/grok.t
@@ -159,38 +159,65 @@ my @atous =
    [ "012",  "012", $ATOU_MAX,  0 ],
   );
 
-if ($Config{sizesize} == 8) {
+# Values near overflow point.
+if ($Config{uvsize} == 8) {
     push @atous,
       (
+       # 32-bit values no problem for 64-bit.
+       [ "4294967293", "", 4294967293, 10, ],
        [ "4294967294", "", 4294967294, 10, ],
        [ "4294967295", "", 4294967295, 10, ],
        [ "4294967296", "", 4294967296, 10, ],
+       [ "4294967297", "", 4294967297, 10, ],
 
+       # This is well within 64-bit.
        [ "9999999999", "", 9999999999, 10, ],
 
+       # Values valid up to 64-bit and beyond.
+       [ "18446744073709551613", "", 18446744073709551613, 20, ],
        [ "18446744073709551614", "", 18446744073709551614, 20, ],
        [ "18446744073709551615", "", $ATOU_MAX, 20, ],
-       [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ],
+       [ "18446744073709551616", "", $ATOU_MAX, 0, ],
+       [ "18446744073709551617", "", $ATOU_MAX, 0, ],
       );
-} elsif ($Config{sizesize} == 4) {
+} elsif ($Config{uvsize} == 4) {
     push @atous,
       (
+       # Values valid up to 32-bit and beyond.
+       [ "4294967293", "", 4294967293, 10, ],
        [ "4294967294", "", 4294967294, 10, ],
        [ "4294967295", "", $ATOU_MAX, 10, ],
        [ "4294967296", "", $ATOU_MAX, 0, ],
+       [ "4294967297", "", $ATOU_MAX, 0, ],
 
+       # Still beyond 32-bit.
+       [ "4999999999", "", $ATOU_MAX, 0, ],
+       [ "5678901234", "", $ATOU_MAX, 0, ],
+       [ "6789012345", "", $ATOU_MAX, 0, ],
+       [ "7890123456", "", $ATOU_MAX, 0, ],
+       [ "8901234567", "", $ATOU_MAX, 0, ],
+       [ "9012345678", "", $ATOU_MAX, 0, ],
        [ "9999999999", "", $ATOU_MAX, 0, ],
+       [ "10000000000", "", $ATOU_MAX, 0, ],
+       [ "12345678901", "", $ATOU_MAX, 0, ],
 
+       # 64-bit values are way beyond.
+       [ "18446744073709551613", "", $ATOU_MAX, 0, ],
        [ "18446744073709551614", "", $ATOU_MAX, 0, ],
        [ "18446744073709551615", "", $ATOU_MAX, 0, ],
-       [ "18446744073709551616", "18446744073709551616", $ATOU_MAX, 0, ],
+       [ "18446744073709551616", "", $ATOU_MAX, 0, ],
+       [ "18446744073709551617", "", $ATOU_MAX, 0, ],
       );
 }
 
-# This will fail to fail once 128/256-bit systems arrive.
+# These will fail to fail once 128/256-bit systems arrive.
 push @atous,
     (
-       [ "99999999999999999999", "99999999999999999999", $ATOU_MAX, 0 ],
+       [ "23456789012345678901", "", $ATOU_MAX, 0 ],
+       [ "34567890123456789012", "", $ATOU_MAX, 0 ],
+       [ "98765432109876543210", "", $ATOU_MAX, 0 ],
+       [ "98765432109876543211", "", $ATOU_MAX, 0 ],
+       [ "99999999999999999999", "", $ATOU_MAX, 0 ],
     );
 
 for my $grok (@atous) {
@@ -207,7 +234,10 @@ for my $grok (@atous) {
     unless (length $grok->[1]) {
         is($out_len, $grok->[3], "'$input' $endsv - length sanity 2");
     } # else { ... } ?
-    is($endsv, substr($input, $out_len), "'$input' $endsv - length success");
+    if ($out_len) {
+        is($endsv, substr($input, $out_len),
+           "'$input' $endsv - length sanity 3");
+    }
 
     # Then without endsv (undef == NULL).
     ($out_uv, $out_len) = grok_atou($input, undef);
diff --git a/numeric.c b/numeric.c
index 66e4e75..a203bf5 100644
--- a/numeric.c
+++ b/numeric.c
@@ -586,13 +586,14 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV 
*valuep)
     return grok_number_flags(pv, len, valuep, 0);
 }
 
+static const UV uv_max_div_10 = UV_MAX / 10;
+static const U8 uv_max_mod_10 = UV_MAX % 10;
+
 int
 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 {
   const char *s = pv;
   const char * const send = pv + len;
-  const UV max_div_10 = UV_MAX / 10;
-  const char max_mod_10 = UV_MAX % 10;
   int numtype = 0;
   int sawinf = 0;
   int sawnan = 0;
@@ -660,9 +661,9 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV 
*valuep, U32 flags)
                                          each time for overflow.  */
                                       digit = *s - '0';
                                       while (digit >= 0 && digit <= 9
-                                             && (value < max_div_10
-                                                 || (value == max_div_10
-                                                     && digit <= max_mod_10))) 
{
+                                             && (value < uv_max_div_10
+                                                 || (value == uv_max_div_10
+                                                     && digit <= 
uv_max_mod_10))) {
                                         value = value * 10 + digit;
                                         if (++s < send)
                                           digit = *s - '0';
@@ -804,7 +805,7 @@ leading whitespace, or negative inputs.  If such features 
are
 required, the calling code needs to explicitly implement those.
 
 If a valid value cannot be parsed, returns either zero (if non-digits
-are met before any digits) or Size_t_MAX (if the value overflows).
+are met before any digits) or UV_MAX (if the value overflows).
 
 Note that extraneous leading zeros also count as an overflow
 (meaning that only "0" is the zero).
@@ -825,51 +826,44 @@ seen as a bug (global state controlled by user 
environment).
 =cut
 */
 
-Size_t
+UV
 Perl_grok_atou(const char *pv, const char** endptr)
 {
     const char* s = pv;
     const char** eptr;
     const char* end2; /* Used in case endptr is NULL. */
-    /* With Size_t_size of 8 or 4 this works out to be the start plus
-     * either 20 or 10.  When 128 or 256-bit systems became reality,
-     * this overshoots (should get 39, 78, but gets 40, 80). */
-    const char* maxend = s + 10 * (Size_t_size / 4);
-    Size_t val = 0; /* The return value. */
+    UV val = 0; /* The return value. */
 
     PERL_ARGS_ASSERT_GROK_ATOU;
 
     eptr = endptr ? endptr : &end2;
-    if (isDIGIT(*s) && !isDIGIT(*(s + 1))) {
-        /* Single-digit inputs are quite common cases, and in addition
-         * the case of zero ("0") here simplifies the decoding loop:
-         * not having to think whether "000" or "000123" are valid
-         * (now they are invalid). */
+    if (isDIGIT(*s)) {
+        /* Single-digit inputs are quite common. */
         val = *s++ - '0';
-    } else {
-        Size_t tmp = 0; /* Temporary accumulator. */
-
-        while (s < maxend && *s) {
-            /* This could be unrolled like in grok_number(), but
-             * the expected uses of this are not speed-needy, and
-             * unlikely to need full 64-bitness. */
-            if (isDIGIT(*s)) {
-                int digit = *s++ - '0';
-                tmp = tmp * 10 + digit;
-                if (tmp > val) { /* This implictly rejects leading zeros. */
-                    val = tmp;
-                } else { /* Overflow. */
+        if (isDIGIT(*s)) {
+            /* Extra leading zeros cause overflow. */
+            if (val == 0) {
+                *eptr = NULL;
+                return UV_MAX;
+            }
+            while (isDIGIT(*s)) {
+                /* This could be unrolled like in grok_number(), but
+                 * the expected uses of this are not speed-needy, and
+                 * unlikely to need full 64-bitness. */
+                U8 digit = *s++ - '0';
+                if (val < uv_max_div_10 ||
+                    (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
+                    val = val * 10 + digit;
+                } else {
                     *eptr = NULL;
-                    return Size_t_MAX;
+                    return UV_MAX;
                 }
-            } else {
-                break;
             }
         }
-        if (s == pv) {
-            *eptr = NULL; /* If no progress, failed to parse anything. */
-            return 0;
-        }
+    }
+    if (s == pv) {
+        *eptr = NULL; /* If no progress, failed to parse anything. */
+        return 0;
     }
     if (endptr == NULL && *s) {
         return 0; /* If endptr is NULL, no trailing non-digits allowed. */
diff --git a/proto.h b/proto.h
index 1eccc46..6abd867 100644
--- a/proto.h
+++ b/proto.h
@@ -1289,7 +1289,7 @@ PERL_CALLCONV int Perl_getcwd_sv(pTHX_ SV* sv)
 
 PERL_CALLCONV void     Perl_gp_free(pTHX_ GV* gv);
 PERL_CALLCONV GP*      Perl_gp_ref(pTHX_ GP* gp);
-PERL_CALLCONV Size_t   Perl_grok_atou(const char* pv, const char** endptr)
+PERL_CALLCONV UV       Perl_grok_atou(const char* pv, const char** endptr)
                        __attribute__nonnull__(1);
 #define PERL_ARGS_ASSERT_GROK_ATOU     \
        assert(pv)

--
Perl5 Master Repository

Reply via email to