Remove custom number parser and use C standard library instead. In order to
keep compatibility with earlier versions of the parser, we have to take
into account a couple of quirks:

- We did not consider "negative" numbers to be valid for anything other
  than base-10 numbers, whereas C standard library does. Adjust the tests
  to match the new behavior.
- We did not consider numbers such as "+4" to be valid, whereas C
  standard library does. Adjust the tests to match the new behavior.
- C standard library's strtoull does not do range checks on negative
  numbers, so we have to parse knowingly-negative numbers as signed.
- C standard library does not support binary numbers, so we keep around the
  relevant parts of the custom parser in place to support binary numbers.

Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com>
---

Notes:
    v5 -> v6:
    - Allowed more negative numbers (such as negative octals or hex)
    - Updated unit tests to check new cases
    - Small refactoring of code to reduce amount of noise
    - More verbose debug output
    
    v4 -> v5:
    - Added this commit

 app/test/test_cmdline_num.c     |  19 +-
 lib/cmdline/cmdline_parse_num.c | 364 ++++++++++++++++----------------
 2 files changed, 195 insertions(+), 188 deletions(-)

diff --git a/app/test/test_cmdline_num.c b/app/test/test_cmdline_num.c
index 9276de59bd..dd70f6f824 100644
--- a/app/test/test_cmdline_num.c
+++ b/app/test/test_cmdline_num.c
@@ -139,6 +139,8 @@ const struct num_signed_str num_valid_negative_strs[] = {
                {"-2147483648", INT32_MIN },
                {"-2147483649", INT32_MIN - 1LL },
                {"-9223372036854775808", INT64_MIN },
+               {"-0x8000000000000000", INT64_MIN },
+               {"-01000000000000000000000", INT64_MIN },
 };
 
 const struct num_unsigned_str num_garbage_positive_strs[] = {
@@ -175,12 +177,27 @@ const struct num_unsigned_str num_garbage_positive_strs[] 
= {
 
 const struct num_signed_str num_garbage_negative_strs[] = {
                /* valid strings with garbage on the end, should still be valid 
*/
+               /* negative numbers */
                {"-9223372036854775808\0garbage", INT64_MIN },
                {"-9223372036854775808\rgarbage", INT64_MIN },
                {"-9223372036854775808\tgarbage", INT64_MIN },
                {"-9223372036854775808\ngarbage", INT64_MIN },
                {"-9223372036854775808#garbage", INT64_MIN },
                {"-9223372036854775808 garbage", INT64_MIN },
+               /* negative hex */
+               {"-0x8000000000000000\0garbage", INT64_MIN },
+               {"-0x8000000000000000\rgarbage", INT64_MIN },
+               {"-0x8000000000000000\tgarbage", INT64_MIN },
+               {"-0x8000000000000000\ngarbage", INT64_MIN },
+               {"-0x8000000000000000#garbage", INT64_MIN },
+               {"-0x8000000000000000 garbage", INT64_MIN },
+               /* negative octal */
+               {"-01000000000000000000000\0garbage", INT64_MIN },
+               {"-01000000000000000000000\rgarbage", INT64_MIN },
+               {"-01000000000000000000000\tgarbage", INT64_MIN },
+               {"-01000000000000000000000\ngarbage", INT64_MIN },
+               {"-01000000000000000000000#garbage", INT64_MIN },
+               {"-01000000000000000000000 garbage", INT64_MIN },
 };
 
 const char * num_invalid_strs[] = {
@@ -197,7 +214,6 @@ const char * num_invalid_strs[] = {
                "0b01110101017001",
                /* false negative numbers */
                "-12345F623",
-               "-0x1234580A",
                "-0b0111010101",
                /* too long (128+ chars) */
                
("0b1111000011110000111100001111000011110000111100001111000011110000"
@@ -205,7 +221,6 @@ const char * num_invalid_strs[] = {
                "1E3",
                "0A",
                "-B",
-               "+4",
                "1.23G",
                "",
                " ",
diff --git a/lib/cmdline/cmdline_parse_num.c b/lib/cmdline/cmdline_parse_num.c
index f21796bedb..ff2bdc28c8 100644
--- a/lib/cmdline/cmdline_parse_num.c
+++ b/lib/cmdline/cmdline_parse_num.c
@@ -4,6 +4,7 @@
  * All rights reserved.
  */
 
+#include <errno.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <inttypes.h>
@@ -29,23 +30,6 @@ struct cmdline_token_ops cmdline_token_num_ops = {
        .get_help = cmdline_get_help_num,
 };
 
-enum num_parse_state_t {
-       START,
-       DEC_NEG,
-       BIN,
-       HEX,
-
-       ERROR,
-
-       FIRST_OK, /* not used */
-       ZERO_OK,
-       HEX_OK,
-       OCTAL_OK,
-       BIN_OK,
-       DEC_NEG_OK,
-       DEC_POS_OK,
-};
-
 /* Keep it sync with enum in .h */
 static const char * num_help[] = {
        "UINT8", "UINT16", "UINT32", "UINT64",
@@ -53,13 +37,13 @@ static const char * num_help[] = {
 };
 
 static inline int
-add_to_res(unsigned int c, uint64_t *res, unsigned int base)
+add_to_bin(unsigned int c, uint64_t *res)
 {
        /* overflow */
-       if ((UINT64_MAX - c) / base < *res)
+       if ((UINT64_MAX - c) / 2 < *res)
                return -1;
 
-       *res = (uint64_t) (*res * base + c);
+       *res = (uint64_t) (*res * 2 + c);
        return 0;
 }
 
@@ -88,138 +72,119 @@ check_res_size(struct cmdline_token_num_data *nd, 
unsigned ressize)
                        return -1;
                break;
        default:
+               debug_printf("Check size: unsupported number format: %s\n",
+                       num_help[nd->type]);
                return -1;
        }
        return 0;
 }
 
-/* parse an int */
-RTE_EXPORT_SYMBOL(cmdline_parse_num)
-int
-cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
-       unsigned ressize)
+static int
+check_parsed_num(enum cmdline_numtype type, int neg, uint64_t uintres)
 {
-       struct cmdline_token_num_data nd;
-       enum num_parse_state_t st = START;
+       int lo_ok, hi_ok;
+
+       switch (type) {
+       case RTE_UINT8:
+               lo_ok = !neg;
+               hi_ok = uintres <= UINT8_MAX;
+               break;
+       case RTE_UINT16:
+               lo_ok = !neg;
+               hi_ok = uintres <= UINT16_MAX;
+               break;
+       case RTE_UINT32:
+               lo_ok = !neg;
+               hi_ok = uintres <= UINT32_MAX;
+               break;
+       case RTE_UINT64:
+               lo_ok = !neg;
+               hi_ok = 1; /* can't be out of range if parsed successfully */
+               break;
+       case RTE_INT8:
+               lo_ok = !neg || (int64_t)uintres >= INT8_MIN;
+               hi_ok = neg || uintres <= INT8_MAX;
+               break;
+       case RTE_INT16:
+               lo_ok = !neg || (int64_t)uintres >= INT16_MIN;
+               hi_ok = neg || uintres <= INT16_MAX;
+               break;
+       case RTE_INT32:
+               lo_ok = !neg || (int64_t)uintres >= INT32_MIN;
+               hi_ok = neg || uintres <= INT32_MAX;
+               break;
+       case RTE_INT64:
+               lo_ok = 1; /* can't be out of range if parsed successfully */
+               hi_ok = neg || uintres <= INT64_MAX;
+               break;
+       default:
+               debug_printf("Check range failed: unsupported number format: 
%s\n",
+                               num_help[type]);
+               return -1;
+       }
+       /* check ranges */
+       if (!lo_ok || !hi_ok)
+               return -1;
+       return 0;
+}
+
+static int
+parse_num(const char *srcbuf, uint64_t *resptr)
+{
+       uint64_t uintres;
+       char *end;
+       int neg = *srcbuf == '-';
+
+       /*
+        * strtoull does not do range checks on negative numbers, so we need to
+        * use strtoll if we know the value we're parsing looks like a negative
+        * one. we use base 0 for both, 0 means autodetect base.
+        */
+       errno = 0;
+       if (neg)
+               uintres = (uint64_t)strtoll(srcbuf, &end, 0);
+       else
+               uintres = strtoull(srcbuf, &end, 0);
+
+       if (end == srcbuf || !cmdline_isendoftoken(*end) || errno == ERANGE)
+               return -1;
+
+       *resptr = uintres;
+       return end - srcbuf;
+}
+
+static int
+parse_bin(const char *srcbuf, uint64_t *res)
+{
+       uint64_t uintres = 0;
+       enum {
+               ERROR,
+               START,
+               BIN,
+               ZERO_OK,
+               BIN_OK,
+       } st = START;
        const char * buf;
        char c;
-       uint64_t res1 = 0;
-
-       if (!tk)
-               return -1;
-
-       if (!srcbuf || !*srcbuf)
-               return -1;
 
        buf = srcbuf;
        c = *buf;
-
-       memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd));
-
-       /* check that we have enough room in res */
-       if (res) {
-               if (check_res_size(&nd, ressize) < 0)
-                       return -1;
-       }
-
        while (st != ERROR && c && !cmdline_isendoftoken(c)) {
                debug_printf("%c %x -> ", c, c);
                switch (st) {
                case START:
-                       if (c == '-') {
-                               st = DEC_NEG;
-                       }
-                       else if (c == '0') {
+                       if (c == '0') {
                                st = ZERO_OK;
                        }
-                       else if (c >= '1' && c <= '9') {
-                               if (add_to_res(c - '0', &res1, 10) < 0)
-                                       st = ERROR;
-                               else
-                                       st = DEC_POS_OK;
-                       }
-                       else  {
+                       else {
                                st = ERROR;
                        }
                        break;
 
                case ZERO_OK:
-                       if (c == 'x') {
-                               st = HEX;
-                       }
-                       else if (c == 'b') {
+                       if (c == 'b') {
                                st = BIN;
                        }
-                       else if (c >= '0' && c <= '7') {
-                               if (add_to_res(c - '0', &res1, 10) < 0)
-                                       st = ERROR;
-                               else
-                                       st = OCTAL_OK;
-                       }
-                       else  {
-                               st = ERROR;
-                       }
-                       break;
-
-               case DEC_NEG:
-                       if (c >= '0' && c <= '9') {
-                               if (add_to_res(c - '0', &res1, 10) < 0)
-                                       st = ERROR;
-                               else
-                                       st = DEC_NEG_OK;
-                       }
-                       else {
-                               st = ERROR;
-                       }
-                       break;
-
-               case DEC_NEG_OK:
-                       if (c >= '0' && c <= '9') {
-                               if (add_to_res(c - '0', &res1, 10) < 0)
-                                       st = ERROR;
-                       }
-                       else {
-                               st = ERROR;
-                       }
-                       break;
-
-               case DEC_POS_OK:
-                       if (c >= '0' && c <= '9') {
-                               if (add_to_res(c - '0', &res1, 10) < 0)
-                                       st = ERROR;
-                       }
-                       else {
-                               st = ERROR;
-                       }
-                       break;
-
-               case HEX:
-                       st = HEX_OK;
-                       /* fall-through */
-               case HEX_OK:
-                       if (c >= '0' && c <= '9') {
-                               if (add_to_res(c - '0', &res1, 16) < 0)
-                                       st = ERROR;
-                       }
-                       else if (c >= 'a' && c <= 'f') {
-                               if (add_to_res(c - 'a' + 10, &res1, 16) < 0)
-                                       st = ERROR;
-                       }
-                       else if (c >= 'A' && c <= 'F') {
-                               if (add_to_res(c - 'A' + 10, &res1, 16) < 0)
-                                       st = ERROR;
-                       }
-                       else {
-                               st = ERROR;
-                       }
-                       break;
-
-
-               case OCTAL_OK:
-                       if (c >= '0' && c <= '7') {
-                               if (add_to_res(c - '0', &res1, 8) < 0)
-                                       st = ERROR;
-                       }
                        else {
                                st = ERROR;
                        }
@@ -230,7 +195,7 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char 
*srcbuf, void *res,
                        /* fall-through */
                case BIN_OK:
                        if (c >= '0' && c <= '1') {
-                               if (add_to_res(c - '0', &res1, 2) < 0)
+                               if (add_to_bin(c - '0', &uintres) < 0)
                                        st = ERROR;
                        }
                        else {
@@ -239,10 +204,10 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const 
char *srcbuf, void *res,
                        break;
                default:
                        debug_printf("not impl ");
-
+                       st = ERROR;
                }
 
-               debug_printf("(%"PRIu64")\n", res1);
+               debug_printf("(%"PRIu64")\n", uintres);
 
                buf ++;
                c = *buf;
@@ -252,66 +217,93 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const 
char *srcbuf, void *res,
                        return -1;
        }
 
-       switch (st) {
-       case ZERO_OK:
-       case DEC_POS_OK:
-       case HEX_OK:
-       case OCTAL_OK:
-       case BIN_OK:
-               if (nd.type == RTE_INT8 && res1 <= INT8_MAX) {
-                       if (res) *(int8_t *)res = (int8_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_INT16 && res1 <= INT16_MAX) {
-                       if (res) *(int16_t *)res = (int16_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_INT32 && res1 <= INT32_MAX) {
-                       if (res) *(int32_t *)res = (int32_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_INT64 && res1 <= INT64_MAX) {
-                       if (res) *(int64_t *)res = (int64_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_UINT8 && res1 <= UINT8_MAX) {
-                       if (res) *(uint8_t *)res = (uint8_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_UINT16  && res1 <= UINT16_MAX) {
-                       if (res) *(uint16_t *)res = (uint16_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_UINT32 && res1 <= UINT32_MAX) {
-                       if (res) *(uint32_t *)res = (uint32_t) res1;
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_UINT64) {
-                       if (res) *(uint64_t *)res = res1;
-                       return buf-srcbuf;
-               } else {
-                       return -1;
-               }
-               break;
+       if (st != BIN_OK)
+               return -1;
+
+       *res = uintres;
+       return buf - srcbuf;
+}
 
-       case DEC_NEG_OK:
-               if (nd.type == RTE_INT8 &&
-                               res1 <= INT8_MAX + 1) {
-                       if (res) *(int8_t *)res = (int8_t) (-res1);
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_INT16 &&
-                               res1 <= (uint16_t)INT16_MAX + 1) {
-                       if (res) *(int16_t *)res = (int16_t) (-res1);
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_INT32 &&
-                               res1 <= (uint32_t)INT32_MAX + 1) {
-                       if (res) *(int32_t *)res = (int32_t) (-res1);
-                       return buf-srcbuf;
-               } else if (nd.type == RTE_INT64 &&
-                               res1 <= (uint64_t)INT64_MAX + 1) {
-                       if (res) *(int64_t *)res = (int64_t) (-res1);
-                       return buf-srcbuf;
-               } else {
-                       return -1;
-               }
+static int
+write_num(enum cmdline_numtype type, void *res, uint64_t uintres)
+{
+       switch (type) {
+       case RTE_UINT8:
+               *(uint8_t *)res = (uint8_t)uintres;
+               break;
+       case RTE_UINT16:
+               *(uint16_t *)res = (uint16_t)uintres;
+               break;
+       case RTE_UINT32:
+               *(uint32_t *)res = (uint32_t)uintres;
+               break;
+       case RTE_UINT64:
+               *(uint64_t *)res = uintres;
+               break;
+       case RTE_INT8:
+               *(int8_t *)res = (int8_t)uintres;
+               break;
+       case RTE_INT16:
+               *(int16_t *)res = (int16_t)uintres;
+               break;
+       case RTE_INT32:
+               *(int32_t *)res = (int32_t)uintres;
+               break;
+       case RTE_INT64:
+               *(int64_t *)res = (int64_t)uintres;
                break;
        default:
-               debug_printf("error\n");
+               debug_printf("Write failed: unsupported number format: %s\n",
+                               num_help[type]);
                return -1;
        }
+       return 0;
+}
+
+/* parse an int */
+RTE_EXPORT_SYMBOL(cmdline_parse_num)
+int
+cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
+       unsigned ressize)
+{
+       struct cmdline_token_num_data nd;
+
+       if (!tk)
+               return -1;
+
+       if (!srcbuf || !*srcbuf)
+               return -1;
+
+       memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd));
+
+       /* check that we have enough room in res */
+       if (res && check_res_size(&nd, ressize) < 0)
+               return -1;
+
+       if (nd.type >= RTE_UINT8 && nd.type <= RTE_INT64) {
+               int ret, neg = *srcbuf == '-';
+               uint64_t uintres;
+
+               /* try parsing as number */
+               ret = parse_num(srcbuf, &uintres);
+
+               if (ret < 0) {
+                       /* parse failed, try parsing as binary */
+                       ret = parse_bin(srcbuf, &uintres);
+                       if (ret < 0)
+                               return -1;
+               }
+               /* check if we're within valid range */
+               if (check_parsed_num(nd.type, neg, uintres) < 0)
+                       return -1;
+
+               /* parsing succeeded, write the value if necessary */
+               if (res && write_num(nd.type, res, uintres) < 0)
+                       return -1;
+
+               return ret;
+       }
+       return -1;
 }
 
 
-- 
2.47.1

Reply via email to