Non-decimal integer literals

Peter Eisentraut Mon, 16 Aug 2021 02:52:15 -0700

Here is a patch to add support for hexadecimal, octal, and binaryinteger literals:


    0x42E
    0o112
    0b100101


per SQL:202x draft.

This adds support in the lexer as well as in the integer type inputfunctions.

Those core parts are straightforward enough, but there are a bunch ofother places where integers are parsed, and one could consider in eachcase whether they should get the same treatment, for example thereplication syntax lexer, or input function for oid, numeric, andint2vector. There are also some opportunities to move some code around,for example scanint8() could be in numutils.c. I have also looked withsome suspicion at some details of the number lexing in ecpg, but haven'tfound anything I could break yet. Suggestions are welcome.

From f2a9b37968a55bf91feb2b4753745c9f5a64be2e Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 16 Aug 2021 09:32:14 +0200
Subject: [PATCH v1] Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals:

    0x42E
    0o112
    0b100101

per SQL:202x draft.

This adds support in the lexer as well as in the integer type input
functions.
---
 doc/src/sgml/syntax.sgml             | 26 ++++++++
 src/backend/catalog/sql_features.txt |  1 +
 src/backend/parser/scan.l            | 70 ++++++++++++++------
 src/backend/utils/adt/int8.c         | 54 ++++++++++++++++
 src/backend/utils/adt/numutils.c     | 97 ++++++++++++++++++++++++++++
 src/fe_utils/psqlscan.l              | 55 +++++++++++-----
 src/interfaces/ecpg/preproc/pgc.l    | 64 +++++++++++-------
 src/test/regress/expected/int2.out   | 19 ++++++
 src/test/regress/expected/int4.out   | 37 +++++++++++
 src/test/regress/expected/int8.out   | 19 ++++++
 src/test/regress/sql/int2.sql        |  7 ++
 src/test/regress/sql/int4.sql        | 11 ++++
 src/test/regress/sql/int8.sql        |  7 ++
 13 files changed, 412 insertions(+), 55 deletions(-)

diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index d66560b587..8fb4b1228d 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
 </literallayout>
     </para>
 
+    <para>
+     Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+     <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+     (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+     digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+     digits (0 or 1).  Hexadecimal digits and the radix prefixes can be in
+     upper or lower case.  Note that only integers can have non-decimal forms,
+     not numbers with fractional parts.
+    </para>
+
+    <para>
+     These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o112
+0O755
+0x42e
+0XFFFF
+</literallayout>
+    </para>
+
     <para>
      <indexterm><primary>integer</primary></indexterm>
      <indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/sql_features.txt 
b/src/backend/catalog/sql_features.txt
index 9f424216e2..d6359503f3 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -526,6 +526,7 @@ T652        SQL-dynamic statements in SQL routines          
        NO
 T653   SQL-schema statements in external routines                      YES     
 T654   SQL-dynamic statements in external routines                     NO      
 T655   Cyclically dependent routines                   YES     
+T661   Non-decimal integer literals                    YES     SQL:202x draft
 T811   Basic SQL/JSON constructor functions                    NO      
 T812   SQL/JSON: JSON_OBJECTAGG                        NO      
 T813   SQL/JSON: JSON_ARRAYAGG with ORDER BY                   NO      
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 6e6824faeb..83458ffb30 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -262,7 +262,7 @@ quotecontinuefail   {whitespace}*"-"?
 xbstart                        [bB]{quote}
 xbinside               [^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart                        [xX]{quote}
 xhinside               [^']*
 
@@ -341,7 +341,7 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
+
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -380,24 +380,41 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
+
+integer                        
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
 
-param                  \${integer}
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 other                  .
 
@@ -977,12 +994,22 @@ other                     .
                                        SET_YYLLOC();
                                        return process_integer_literal(yytext, 
yylval);
                                }
-{decimal}              {
+{hexfail}              {
+                                       yyerror("invalid hexadecimal integer");
+                               }
+{octfail}              {
+                                       yyerror("invalid octal integer");
+                               }
+{binfail}              {
+                                       yyerror("invalid binary integer");
+                               }
+
+{numeric}              {
                                        SET_YYLLOC();
                                        yylval->str = pstrdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        SET_YYLLOC();
@@ -996,7 +1023,7 @@ other                      .
 {realfail1}            {
                                        /*
                                         * throw back the [Ee], and figure out 
whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is an {integer} or {numeric}.
                                         */
                                        yyless(yyleng - 1);
                                        SET_YYLLOC();
@@ -1296,7 +1323,7 @@ litbufdup(core_yyscan_t yyscanner)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
@@ -1306,7 +1333,14 @@ process_integer_literal(const char *token, YYSTYPE *lval)
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       if (token[0] == '0' && (token[1] == 'X' || token[1] == 'x'))
+               val = strtoint(token + 2, &endptr, 16);
+       else if (token[0] == '0' && (token[1] == 'O' || token[1] == 'o'))
+               val = strtoint(token + 2, &endptr, 8);
+       else if (token[0] == '0' && (token[1] == 'B' || token[1] == 'b'))
+               val = strtoint(token + 2, &endptr, 2);
+       else
+               val = strtoint(token, &endptr, 10);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 2168080dcc..c3ed944a6c 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -45,6 +45,17 @@ typedef struct
  * Formatting and conversion routines.
  *---------------------------------------------------------*/
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * scanint8 --- try to parse a string into an int8.
  *
@@ -84,6 +95,48 @@ scanint8(const char *str, bool errorOK, int64 *result)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -92,6 +145,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
                        unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..7c6520346e 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -173,6 +173,17 @@ pg_atoi(const char *s, int size, int c)
        return (int32) l;
 }
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * Convert input string to a signed 16 bit integer.
  *
@@ -208,6 +219,48 @@ pg_strtoint16(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -216,6 +269,7 @@ pg_strtoint16(const char *s)
                        unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -284,6 +338,48 @@ pg_strtoint32(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -292,6 +388,7 @@ pg_strtoint32(const char *s)
                        unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 0fab48a382..729aec562b 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -200,7 +200,7 @@ quotecontinuefail   {whitespace}*"-"?
 xbstart                        [bB]{quote}
 xbinside               [^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart                        [xX]{quote}
 xhinside               [^']*
 
@@ -279,7 +279,6 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -318,24 +317,41 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
+
+integer                        
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
+
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
 
-param                  \${integer}
+param                  \${decinteger}
 
 /* psql-specific: characters allowed in variable names */
 variable_char  [A-Za-z\200-\377_0-9]
@@ -842,10 +858,19 @@ other                     .
 {integer}              {
                                        ECHO;
                                }
-{decimal}              {
+{hexfail}              {
+                                       ECHO;
+                               }
+{octfail}              {
+                                       ECHO;
+                               }
+{binfail}              {
+                                       ECHO;
+                               }
+{numeric}              {
                                        ECHO;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        ECHO;
diff --git a/src/interfaces/ecpg/preproc/pgc.l 
b/src/interfaces/ecpg/preproc/pgc.l
index 7a0356638d..ebd1f3d7f4 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -305,7 +305,6 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -346,24 +345,41 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+integer                        
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
 
-param                  \${integer}
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
@@ -393,9 +409,6 @@ include_next        
[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import                 [iI][mM][pP][oO][rR][tT]
 undef                  [uU][nN][dD][eE][fF]
 
-/* C version of hex number */
-xch                            0[xX][0-9A-Fa-f]*
-
 ccomment               "//".*\n
 
 if                             [iI][fF]
@@ -408,7 +421,7 @@ endif                       [eE][nN][dD][iI][fF]
 struct                 [sS][tT][rR][uU][cC][tT]
 
 exec_sql               {exec}{space}*{sql}{space}*
-ipdigit                        ({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit                        
({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
 ip                             {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 
 /* we might want to parse all cpp include files */
@@ -926,11 +939,11 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {integer}              {
                                        return process_integer_literal(yytext, 
&base_yylval);
                                }
-{decimal}              {
+{numeric}              {
                                        base_yylval.str = mm_strdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        return process_integer_literal(yytext, 
&base_yylval);
@@ -942,7 +955,7 @@ cppline                     
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {realfail1}            {
                                        /*
                                         * throw back the [Ee], and figure out 
whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is an {integer} or {numeric}.
                                         */
                                        yyless(yyleng - 1);
                                        return process_integer_literal(yytext, 
&base_yylval);
@@ -1009,7 +1022,7 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                        return S_ANYTHING;
                                         }
 <C>{ccomment}          { ECHO; }
-<C>{xch}                       {
+<C>{hexinteger}                {
                                                char* endptr;
 
                                                errno = 0;
@@ -1546,7 +1559,7 @@ addlitchar(unsigned char ychar)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
@@ -1556,7 +1569,14 @@ process_integer_literal(const char *token, YYSTYPE *lval)
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       if (token[0] == '0' && (token[1] == 'X' || token[1] == 'x'))
+               val = strtoint(token + 2, &endptr, 16);
+       else if (token[0] == '0' && (token[1] == 'O' || token[1] == 'o'))
+               val = strtoint(token + 2, &endptr, 8);
+       else if (token[0] == '0' && (token[1] == 'B' || token[1] == 'b'))
+               val = strtoint(token + 2, &endptr, 2);
+       else
+               val = strtoint(token, &endptr, 10);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/test/regress/expected/int2.out 
b/src/test/regress/expected/int2.out
index 55ea7202cd..0ffa00a835 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -306,3 +306,22 @@ FROM (VALUES (-2.5::numeric),
   2.5 |          3
 (7 rows)
 
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2 
+------
+   37
+(1 row)
+
+SELECT int2 '0o112';
+ int2 
+------
+   74
+(1 row)
+
+SELECT int2 '0x42E';
+ int2 
+------
+ 1070
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out 
b/src/test/regress/expected/int4.out
index 9d20b3380f..8c1e4237e8 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -437,3 +437,40 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 ERROR:  integer out of range
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
 ERROR:  integer out of range
+-- non-decimal literals
+SELECT 0b100101;
+ ?column? 
+----------
+       37
+(1 row)
+
+SELECT 0o112;
+ ?column? 
+----------
+       74
+(1 row)
+
+SELECT 0x42E;
+ ?column? 
+----------
+     1070
+(1 row)
+
+SELECT int4 '0b100101';
+ int4 
+------
+   37
+(1 row)
+
+SELECT int4 '0o112';
+ int4 
+------
+   74
+(1 row)
+
+SELECT int4 '0x42E';
+ int4 
+------
+ 1070
+(1 row)
+
diff --git a/src/test/regress/expected/int8.out 
b/src/test/regress/expected/int8.out
index 36540ec456..0a1c2ae216 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -932,3 +932,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- 
overflow
 ERROR:  bigint out of range
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
 ERROR:  bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8 
+------
+   37
+(1 row)
+
+SELECT int8 '0o112';
+ int8 
+------
+   74
+(1 row)
+
+SELECT int8 '0x42E';
+ int8 
+------
+ 1070
+(1 row)
+
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index 613b344704..c4410fa62d 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -112,3 +112,10 @@ CREATE TABLE INT2_TBL(f1 int2);
              (0.5::numeric),
              (1.5::numeric),
              (2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o112';
+SELECT int2 '0x42E';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index 55ec07a147..c4da86ba33 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -176,3 +176,14 @@ CREATE TABLE INT4_TBL(f1 int4);
 
 SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT 0b100101;
+SELECT 0o112;
+SELECT 0x42E;
+
+SELECT int4 '0b100101';
+SELECT int4 '0o112';
+SELECT int4 '0x42E';
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 32940b4daa..4cc4830bdc 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -250,3 +250,10 @@ CREATE TABLE INT8_TBL(q1 int8, q2 int8);
 
 SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o112';
+SELECT int8 '0x42E';
-- 
2.32.0

Non-decimal integer literals

Reply via email to