There has been some other refactoring going on, which made this patch
set out of date. So here is an update.
The old pg_strtouint64() has been removed, so there is no longer a
naming concern with patch 0001. That one should be good to go.
I also found that yet another way to parse integers in pg_atoi() has
mostly faded away in utility, so I removed the last two callers and
removed the function in 0002 and 0003.
The remaining patches are as before, with some of the review comments
applied. I still need to write some lexing unit tests for ecpg, which I
haven't gotten to yet. This affects patches 0004 and 0005.
As mentioned before, patches 0006 and 0007 are more feature previews at
this point.
On 01.12.21 16:47, Peter Eisentraut wrote:
On 25.11.21 18:51, John Naylor wrote:
If we're going to change the comment anyway, "the parser" sounds more
natural. Aside from that, 0001 and 0002 can probably be pushed now, if
you like.
done
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -365,6 +365,10 @@ real ({integer}|{decimal})[Ee][-+]?{digit}+
realfail1 ({integer}|{decimal})[Ee]
realfail2 ({integer}|{decimal})[Ee][-+]
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
A comment might be good here to explain these are only in ECPG for
consistency with the other scanners. Not really important, though.
Yeah, it's a bit weird that not all the symbols are used in ecpg. I'll
look into explaining this better.
0006
+{hexfail} {
+ yyerror("invalid hexadecimal integer");
+ }
+{octfail} {
+ yyerror("invalid octal integer");
}
-{decimal} {
+{binfail} {
+ yyerror("invalid binary integer");
+ }
It seems these could use SET_YYLLOC(), since the error cursor doesn't
match other failure states:
ok
We might consider some tests for ECPG since lack of coverage has been
a problem.
right
Also, I'm curious: how does the spec work as far as deciding the year
of release, or feature-freezing of new items?
The schedule has recently been extended again, so the current plan is
for SQL:202x with x=3, with feature freeze in mid-2022.
So the feature patches in this thread are in my mind now targeting
PG15+1. But the preparation work (up to v5-0005, and some other number
parsing refactoring that I'm seeing) could be considered for PG15.
I'll move this to the next CF and come back with an updated patch set in
a little while.
From 4aa1329c3aad512f33a56a05fcc465793ef19b1d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 1/7] Move scanint8() to numutils.c
Move scanint8() to numutils.c and rename to pg_strtoint64(). We
already have a "16" and "32" version of that, and the code inside the
functions was aligned, so this move makes all three versions
consistent. The API is also changed to no longer provide the errorOK
case. Users that need the error checking can use strtoi64().
Discussion:
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
src/backend/parser/parse_node.c | 12 ++-
src/backend/replication/pgoutput/pgoutput.c | 9 ++-
src/backend/utils/adt/int8.c | 90 +--------------------
src/backend/utils/adt/numutils.c | 84 +++++++++++++++++++
src/bin/pgbench/pgbench.c | 4 +-
src/include/utils/builtins.h | 1 +
src/include/utils/int8.h | 25 ------
7 files changed, 103 insertions(+), 122 deletions(-)
delete mode 100644 src/include/utils/int8.h
diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c
index 8cfe6f67c0..0eefd5427a 100644
--- a/src/backend/parser/parse_node.c
+++ b/src/backend/parser/parse_node.c
@@ -26,7 +26,6 @@
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "utils/builtins.h"
-#include "utils/int8.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/varbit.h"
@@ -353,7 +352,6 @@ make_const(ParseState *pstate, A_Const *aconst)
{
Const *con;
Datum val;
- int64 val64;
Oid typeid;
int typelen;
bool typebyval;
@@ -384,8 +382,15 @@ make_const(ParseState *pstate, A_Const *aconst)
break;
case T_Float:
+ {
/* could be an oversize integer as well as a float ...
*/
- if (scanint8(aconst->val.fval.val, true, &val64))
+
+ int64 val64;
+ char *endptr;
+
+ errno = 0;
+ val64 = strtoi64(aconst->val.fval.val, &endptr, 10);
+ if (errno == 0 && *endptr == '\0')
{
/*
* It might actually fit in int32. Probably
only INT_MIN can
@@ -425,6 +430,7 @@ make_const(ParseState *pstate, A_Const *aconst)
typebyval = false;
}
break;
+ }
case T_String:
diff --git a/src/backend/replication/pgoutput/pgoutput.c
b/src/backend/replication/pgoutput/pgoutput.c
index 6f6a203dea..2f0f40c75d 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -21,7 +21,6 @@
#include "replication/logicalproto.h"
#include "replication/origin.h"
#include "replication/pgoutput.h"
-#include "utils/int8.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -205,7 +204,8 @@ parse_output_parameters(List *options, PGOutputData *data)
/* Check each param, whether or not we recognize it */
if (strcmp(defel->defname, "proto_version") == 0)
{
- int64 parsed;
+ unsigned long parsed;
+ char *endptr;
if (protocol_version_given)
ereport(ERROR,
@@ -213,12 +213,13 @@ parse_output_parameters(List *options, PGOutputData *data)
errmsg("conflicting or
redundant options")));
protocol_version_given = true;
- if (!scanint8(strVal(defel->arg), true, &parsed))
+ parsed = strtoul(strVal(defel->arg), &endptr, 10);
+ if (errno || *endptr != '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid
proto_version")));
- if (parsed > PG_UINT32_MAX || parsed < 0)
+ if (parsed > PG_UINT32_MAX)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("proto_version \"%s\"
out of range",
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 2168080dcc..f8f557526f 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -24,7 +24,6 @@
#include "nodes/supportnodes.h"
#include "optimizer/optimizer.h"
#include "utils/builtins.h"
-#include "utils/int8.h"
typedef struct
@@ -45,99 +44,14 @@ typedef struct
* Formatting and conversion routines.
*---------------------------------------------------------*/
-/*
- * scanint8 --- try to parse a string into an int8.
- *
- * If errorOK is false, ereport a useful error message if the string is bad.
- * If errorOK is true, just return "false" for bad input.
- */
-bool
-scanint8(const char *str, bool errorOK, int64 *result)
-{
- const char *ptr = str;
- int64 tmp = 0;
- bool neg = false;
-
- /*
- * Do our own scan, rather than relying on sscanf which might be broken
- * for long long.
- *
- * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
- * value as a negative number.
- */
-
- /* skip leading spaces */
- while (*ptr && isspace((unsigned char) *ptr))
- ptr++;
-
- /* handle sign */
- if (*ptr == '-')
- {
- ptr++;
- neg = true;
- }
- else if (*ptr == '+')
- ptr++;
-
- /* require at least one digit */
- if (unlikely(!isdigit((unsigned char) *ptr)))
- goto invalid_syntax;
-
- /* process digits */
- while (*ptr && isdigit((unsigned char) *ptr))
- {
- int8 digit = (*ptr++ - '0');
-
- if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
- unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
- goto out_of_range;
- }
-
- /* allow trailing whitespace, but not other trailing chars */
- while (*ptr != '\0' && isspace((unsigned char) *ptr))
- ptr++;
-
- if (unlikely(*ptr != '\0'))
- goto invalid_syntax;
-
- if (!neg)
- {
- /* could fail if input is most negative number */
- if (unlikely(tmp == PG_INT64_MIN))
- goto out_of_range;
- tmp = -tmp;
- }
-
- *result = tmp;
- return true;
-
-out_of_range:
- if (!errorOK)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("value \"%s\" is out of range for type
%s",
- str, "bigint")));
- return false;
-
-invalid_syntax:
- if (!errorOK)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s:
\"%s\"",
- "bigint", str)));
- return false;
-}
-
/* int8in()
*/
Datum
int8in(PG_FUNCTION_ARGS)
{
- char *str = PG_GETARG_CSTRING(0);
- int64 result;
+ char *num = PG_GETARG_CSTRING(0);
- (void) scanint8(str, false, &result);
- PG_RETURN_INT64(result);
+ PG_RETURN_INT64(pg_strtoint64(num));
}
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 6a9c00fdd3..7ac7e5dbd3 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -325,6 +325,90 @@ pg_strtoint32(const char *s)
return 0; /* keep compiler quiet
*/
}
+/*
+ * Convert input string to a signed 64 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. Will throw
+ * ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int64
+pg_strtoint64(const char *s)
+{
+ const char *ptr = s;
+ int64 tmp = 0;
+ bool neg = false;
+
+ /*
+ * Do our own scan, rather than relying on sscanf which might be broken
+ * for long long.
+ *
+ * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
+ * value as a negative number.
+ */
+
+ /* skip leading spaces */
+ while (*ptr && isspace((unsigned char) *ptr))
+ ptr++;
+
+ /* handle sign */
+ if (*ptr == '-')
+ {
+ ptr++;
+ neg = true;
+ }
+ else if (*ptr == '+')
+ ptr++;
+
+ /* require at least one digit */
+ if (unlikely(!isdigit((unsigned char) *ptr)))
+ goto invalid_syntax;
+
+ /* process digits */
+ while (*ptr && isdigit((unsigned char) *ptr))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+
+ /* allow trailing whitespace, but not other trailing chars */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (unlikely(*ptr != '\0'))
+ goto invalid_syntax;
+
+ if (!neg)
+ {
+ /* could fail if input is most negative number */
+ if (unlikely(tmp == PG_INT64_MIN))
+ goto out_of_range;
+ tmp = -tmp;
+ }
+
+ return tmp;
+
+out_of_range:
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ s, "bigint")));
+
+invalid_syntax:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "bigint", s)));
+
+ return 0; /* keep compiler quiet
*/
+}
+
/*
* pg_itoa: converts a signed 16-bit integer to its string representation
* and returns strlen(a).
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ea9639984c..6b5f8bc071 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -787,8 +787,8 @@ is_an_int(const char *str)
/*
* strtoint64 -- convert a string to 64-bit integer
*
- * This function is a slightly modified version of scanint8() from
- * src/backend/utils/adt/int8.c.
+ * This function is a slightly modified version of pg_strtoint64() from
+ * src/backend/utils/adt/numutils.c.
*
* The function returns whether the conversion worked, and if so
* "*result" is set to the result.
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index b07eefaf1e..1ef8359906 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -46,6 +46,7 @@ extern int namestrcmp(Name name, const char *str);
extern int32 pg_atoi(const char *s, int size, int c);
extern int16 pg_strtoint16(const char *s);
extern int32 pg_strtoint32(const char *s);
+extern int64 pg_strtoint64(const char *s);
extern int pg_itoa(int16 i, char *a);
extern int pg_ultoa_n(uint32 l, char *a);
extern int pg_ulltoa_n(uint64 l, char *a);
diff --git a/src/include/utils/int8.h b/src/include/utils/int8.h
deleted file mode 100644
index 6571188f90..0000000000
--- a/src/include/utils/int8.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * int8.h
- * Declarations for operations on 64-bit integers.
- *
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/utils/int8.h
- *
- * NOTES
- * These data types are supported on all 64-bit architectures, and may
- * be supported through libraries on some 32-bit machines. If your machine
- * is not currently supported, then please try to make it so, then post
- * patches to the postgresql.org hackers mailing list.
- *
- *-------------------------------------------------------------------------
- */
-#ifndef INT8_H
-#define INT8_H
-
-extern bool scanint8(const char *str, bool errorOK, int64 *result);
-
-#endif /* INT8_H */
base-commit: 8112bcf0cc602e00e95eab6c4bdc0eb73b5b547d
--
2.34.1
From 4651d0b09e9dcac554efba099a27c94748b33ccb Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 2/7] Remove one use of pg_atoi()
There was no real need to use this here instead of a simpler API.
---
src/backend/utils/adt/jsonpath_gram.y | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/backend/utils/adt/jsonpath_gram.y
b/src/backend/utils/adt/jsonpath_gram.y
index bd5d4488a0..5982672558 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -232,7 +232,7 @@ array_accessor:
;
any_level:
- INT_P { $$ =
pg_atoi($1.val, 4, 0); }
+ INT_P { $$ =
pg_strtoint32($1.val); }
| LAST_P { $$ = -1; }
;
--
2.34.1
From e32f1eed77d8040e2d79e5251b3c8f897dbeb223 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 3/7] Remove pg_atoi()
The last caller was int2vectorin(), and having such a general function
for one user didn't seem useful, so just put the required parts inline
and remove the function.
---
src/backend/utils/adt/int.c | 32 ++++++++++--
src/backend/utils/adt/numutils.c | 88 --------------------------------
src/include/utils/builtins.h | 1 -
3 files changed, 28 insertions(+), 93 deletions(-)
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index e9f108425c..ed2a9016f5 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -146,15 +146,39 @@ int2vectorin(PG_FUNCTION_ARGS)
result = (int2vector *) palloc0(Int2VectorSize(FUNC_MAX_ARGS));
- for (n = 0; *intString && n < FUNC_MAX_ARGS; n++)
+ for (n = 0; n < FUNC_MAX_ARGS; n++)
{
+ long l;
+ char *endp;
+
while (*intString && isspace((unsigned char) *intString))
intString++;
if (*intString == '\0')
break;
- result->values[n] = pg_atoi(intString, sizeof(int16), ' ');
- while (*intString && !isspace((unsigned char) *intString))
- intString++;
+
+ errno = 0;
+ l = strtol(intString, &endp, 10);
+
+ if (intString == endp)
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type
%s: \"%s\"",
+ "smallint",
intString)));
+
+ if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX)
+ ereport(ERROR,
+
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range
for type %s", intString,
+ "smallint")));
+
+ if (*endp && *endp != ' ')
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type
%s: \"%s\"",
+ "integer", intString)));
+
+ result->values[n] = l;
+ intString = endp;
}
while (*intString && isspace((unsigned char) *intString))
intString++;
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 7ac7e5dbd3..18de54da40 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -85,94 +85,6 @@ decimalLength64(const uint64 v)
return t + (v >= PowersOfTen[t]);
}
-/*
- * pg_atoi: convert string to integer
- *
- * allows any number of leading or trailing whitespace characters.
- *
- * 'size' is the sizeof() the desired integral result (1, 2, or 4 bytes).
- *
- * c, if not 0, is a terminator character that may appear after the
- * integer (plus whitespace). If 0, the string must end after the integer.
- *
- * Unlike plain atoi(), this will throw ereport() upon bad input format or
- * overflow.
- */
-int32
-pg_atoi(const char *s, int size, int c)
-{
- long l;
- char *badp;
-
- /*
- * Some versions of strtol treat the empty string as an error, but some
- * seem not to. Make an explicit test to be sure we catch it.
- */
- if (s == NULL)
- elog(ERROR, "NULL pointer");
- if (*s == 0)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s:
\"%s\"",
- "integer", s)));
-
- errno = 0;
- l = strtol(s, &badp, 10);
-
- /* We made no progress parsing the string, so bail out */
- if (s == badp)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s:
\"%s\"",
- "integer", s)));
-
- switch (size)
- {
- case sizeof(int32):
- if (errno == ERANGE
-#if defined(HAVE_LONG_INT_64)
- /* won't get ERANGE on these with 64-bit longs... */
- || l < INT_MIN || l > INT_MAX
-#endif
- )
- ereport(ERROR,
-
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("value \"%s\" is out of
range for type %s", s,
- "integer")));
- break;
- case sizeof(int16):
- if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX)
- ereport(ERROR,
-
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("value \"%s\" is out of
range for type %s", s,
- "smallint")));
- break;
- case sizeof(int8):
- if (errno == ERANGE || l < SCHAR_MIN || l > SCHAR_MAX)
- ereport(ERROR,
-
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("value \"%s\" is out of
range for 8-bit integer", s)));
- break;
- default:
- elog(ERROR, "unsupported result size: %d", size);
- }
-
- /*
- * Skip any trailing whitespace; if anything but whitespace remains
before
- * the terminating character, bail out
- */
- while (*badp && *badp != c && isspace((unsigned char) *badp))
- badp++;
-
- if (*badp && *badp != c)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s:
\"%s\"",
- "integer", s)));
-
- return (int32) l;
-}
-
/*
* Convert input string to a signed 16 bit integer.
*
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 1ef8359906..60339d3dcf 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -43,7 +43,6 @@ extern void namestrcpy(Name name, const char *str);
extern int namestrcmp(Name name, const char *str);
/* numutils.c */
-extern int32 pg_atoi(const char *s, int size, int c);
extern int16 pg_strtoint16(const char *s);
extern int32 pg_strtoint32(const char *s);
extern int64 pg_strtoint64(const char *s);
--
2.34.1
From fb606a29ba7ae45e8dcf84a4be2f39aa5a54a648 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 4/7] Add test case for trailing junk after numeric literals
PostgreSQL currently accepts numeric literals with trailing
non-digits, such as 123abc where the abc is treated as the next token.
This may be a bit surprising. This commit adds test cases for this;
subsequent commits intend to change this behavior.
Discussion:
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
src/test/regress/expected/numerology.out | 55 ++++++++++++++++++++++++
src/test/regress/sql/numerology.sql | 14 ++++++
2 files changed, 69 insertions(+)
diff --git a/src/test/regress/expected/numerology.out
b/src/test/regress/expected/numerology.out
index 44d6c435de..32c6d80c03 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -3,6 +3,61 @@
-- Test various combinations of numeric types and functions.
--
--
+-- Trailing junk in numeric literals
+--
+SELECT 123abc;
+ abc
+-----
+ 123
+(1 row)
+
+SELECT 0x0o;
+ x0o
+-----
+ 0
+(1 row)
+
+SELECT 1_2_3;
+ _2_3
+------
+ 1
+(1 row)
+
+SELECT 0.a;
+ a
+---
+ 0
+(1 row)
+
+SELECT 0.0a;
+ a
+-----
+ 0.0
+(1 row)
+
+SELECT .0a;
+ a
+-----
+ 0.0
+(1 row)
+
+SELECT 0.0e1a;
+ a
+---
+ 0
+(1 row)
+
+SELECT 0.0e;
+ e
+-----
+ 0.0
+(1 row)
+
+SELECT 0.0e+a;
+ERROR: syntax error at or near "+"
+LINE 1: SELECT 0.0e+a;
+ ^
+--
-- Test implicit type conversions
-- This fails for Postgres v6.1 (and earlier?)
-- so let's try explicit conversions for now - tgl 97/05/07
diff --git a/src/test/regress/sql/numerology.sql
b/src/test/regress/sql/numerology.sql
index fddb58f8fd..70447a95fa 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -3,6 +3,20 @@
-- Test various combinations of numeric types and functions.
--
+--
+-- Trailing junk in numeric literals
+--
+
+SELECT 123abc;
+SELECT 0x0o;
+SELECT 1_2_3;
+SELECT 0.a;
+SELECT 0.0a;
+SELECT .0a;
+SELECT 0.0e1a;
+SELECT 0.0e;
+SELECT 0.0e+a;
+
--
-- Test implicit type conversions
-- This fails for Postgres v6.1 (and earlier?)
--
2.34.1
From 8ee2a422ec86a055ccddd26f397760d372aec4a8 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 5/7] Reject trailing junk after numeric literals
After this, the PostgreSQL lexers no longer accept numeric literals
with trailing non-digits, such as 123abc, which would be scanned as
two tokens: 123 and abc. This is undocumented and surprising, and it
might also interfere with some extended numeric literal syntax being
contemplated for the future.
Discussion:
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
src/backend/parser/scan.l | 27 ++++++----
src/fe_utils/psqlscan.l | 21 +++++---
src/interfaces/ecpg/preproc/pgc.l | 4 ++
src/test/regress/expected/numerology.out | 68 +++++++++---------------
4 files changed, 61 insertions(+), 59 deletions(-)
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 76fd6996ed..f889c2faf7 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -399,6 +399,10 @@ real
({integer}|{decimal})[Ee][-+]?{digit}+
realfail1 ({integer}|{decimal})[Ee]
realfail2 ({integer}|{decimal})[Ee][-+]
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
+
param \${integer}
other .
@@ -996,19 +1000,24 @@ other .
return FCONST;
}
{realfail1} {
- /*
- * throw back the [Ee], and figure out
whether what
- * remains is an {integer} or {decimal}.
- */
- yyless(yyleng - 1);
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ yyerror("trailing junk after numeric
literal");
}
{realfail2} {
- /* throw back the [Ee][+-], and proceed
as above */
- yyless(yyleng - 2);
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ yyerror("trailing junk after numeric
literal");
+ }
+{integer_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric
literal");
+ }
+{decimal_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric
literal");
+ }
+{real_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric
literal");
}
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index db8a8dfaf2..09709e6151 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -337,6 +337,10 @@ real
({integer}|{decimal})[Ee][-+]?{digit}+
realfail1 ({integer}|{decimal})[Ee]
realfail2 ({integer}|{decimal})[Ee][-+]
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
+
param \${integer}
/* psql-specific: characters allowed in variable names */
@@ -855,17 +859,18 @@ other .
ECHO;
}
{realfail1} {
- /*
- * throw back the [Ee], and figure out
whether what
- * remains is an {integer} or {decimal}.
- * (in psql, we don't actually care...)
- */
- yyless(yyleng - 1);
ECHO;
}
{realfail2} {
- /* throw back the [Ee][+-], and proceed
as above */
- yyless(yyleng - 2);
+ ECHO;
+ }
+{integer_junk} {
+ ECHO;
+ }
+{decimal_junk} {
+ ECHO;
+ }
+{real_junk} {
ECHO;
}
diff --git a/src/interfaces/ecpg/preproc/pgc.l
b/src/interfaces/ecpg/preproc/pgc.l
index a2f8c7f3d8..110478059b 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -365,6 +365,10 @@ real
({integer}|{decimal})[Ee][-+]?{digit}+
realfail1 ({integer}|{decimal})[Ee]
realfail2 ({integer}|{decimal})[Ee][-+]
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
+
param \${integer}
/* special characters for other dbms */
diff --git a/src/test/regress/expected/numerology.out
b/src/test/regress/expected/numerology.out
index 32c6d80c03..2f176ccb52 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -6,57 +6,41 @@
-- Trailing junk in numeric literals
--
SELECT 123abc;
- abc
------
- 123
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "123a"
+LINE 1: SELECT 123abc;
+ ^
SELECT 0x0o;
- x0o
------
- 0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0x"
+LINE 1: SELECT 0x0o;
+ ^
SELECT 1_2_3;
- _2_3
-------
- 1
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "1_"
+LINE 1: SELECT 1_2_3;
+ ^
SELECT 0.a;
- a
----
- 0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.a"
+LINE 1: SELECT 0.a;
+ ^
SELECT 0.0a;
- a
------
- 0.0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.0a"
+LINE 1: SELECT 0.0a;
+ ^
SELECT .0a;
- a
------
- 0.0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near ".0a"
+LINE 1: SELECT .0a;
+ ^
SELECT 0.0e1a;
- a
----
- 0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.0e1a"
+LINE 1: SELECT 0.0e1a;
+ ^
SELECT 0.0e;
- e
------
- 0.0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.0e"
+LINE 1: SELECT 0.0e;
+ ^
SELECT 0.0e+a;
-ERROR: syntax error at or near "+"
+ERROR: trailing junk after numeric literal at or near "0.0e+"
LINE 1: SELECT 0.0e+a;
- ^
+ ^
--
-- Test implicit type conversions
-- This fails for Postgres v6.1 (and earlier?)
--
2.34.1
From 8cf484ed47263ecf257e3770715cfa83394f1fa4 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 6/7] Non-decimal integer literals
Add support for hexadecimal, octal, and binary integer literals:
0x42F
0o273
0b100101
per SQL:202x draft.
This adds support in the lexer as well as in the integer type input
functions.
Discussion:
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
doc/src/sgml/syntax.sgml | 26 ++++
src/backend/catalog/information_schema.sql | 6 +-
src/backend/catalog/sql_features.txt | 1 +
src/backend/parser/scan.l | 99 +++++++++++----
src/backend/utils/adt/numutils.c | 140 +++++++++++++++++++++
src/fe_utils/psqlscan.l | 78 +++++++++---
src/interfaces/ecpg/preproc/pgc.l | 93 +++++++-------
src/test/regress/expected/int2.out | 19 +++
src/test/regress/expected/int4.out | 19 +++
src/test/regress/expected/int8.out | 19 +++
src/test/regress/expected/numerology.out | 59 ++++++++-
src/test/regress/sql/int2.sql | 7 ++
src/test/regress/sql/int4.sql | 7 ++
src/test/regress/sql/int8.sql | 7 ++
src/test/regress/sql/numerology.sql | 21 +++-
15 files changed, 511 insertions(+), 90 deletions(-)
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index d66560b587..a4f04199c6 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
</literallayout>
</para>
+ <para>
+ Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+ <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+ (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+ digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+ digits (0 or 1). Hexadecimal digits and the radix prefixes can be in
+ upper or lower case. Note that only integers can have non-decimal forms,
+ not numbers with fractional parts.
+ </para>
+
+ <para>
+ These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+</literallayout>
+ </para>
+
<para>
<indexterm><primary>integer</primary></indexterm>
<indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/information_schema.sql
b/src/backend/catalog/information_schema.sql
index 11d9dd60c2..ce88c483a2 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod
int4) RETURNS integer
WHEN 1700 /*numeric*/ THEN
CASE WHEN $2 = -1
THEN null
- ELSE (($2 - 4) >> 16) & 65535
+ ELSE (($2 - 4) >> 16) & 0xFFFF
END
WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4)
RETURNS integer
WHEN $1 IN (1700) THEN
CASE WHEN $2 = -1
THEN null
- ELSE ($2 - 4) & 65535
+ ELSE ($2 - 4) & 0xFFFF
END
ELSE null
END;
@@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod
int4) RETURNS integer
WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
WHEN $1 IN (1186) /* interval */
- THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535
END
+ THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 &
0xFFFF END
ELSE null
END;
diff --git a/src/backend/catalog/sql_features.txt
b/src/backend/catalog/sql_features.txt
index b8a78f4d41..545cb45131 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -526,6 +526,7 @@ T652 SQL-dynamic statements in SQL routines
NO
T653 SQL-schema statements in external routines YES
T654 SQL-dynamic statements in external routines NO
T655 Cyclically dependent routines YES
+T661 Non-decimal integer literals YES SQL:202x draft
T811 Basic SQL/JSON constructor functions NO
T812 SQL/JSON: JSON_OBJECTAGG NO
T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index f889c2faf7..c55338b601 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t
yyscanner);
static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
static char *litbufdup(core_yyscan_t yyscanner);
static unsigned char unescape_single_char(unsigned char c, core_yyscan_t
yyscanner);
-static int process_integer_literal(const char *token, YYSTYPE *lval);
+static int process_integer_literal(const char *token, YYSTYPE *lval, int
base);
static void addunicode(pg_wchar c, yyscan_t yyscanner);
#define yyerror(msg) scanner_yyerror(msg, yyscanner)
@@ -385,25 +385,40 @@ operator {op_chars}+
* Unary minus is not part of a number here. Instead we pass it separately to
* the parser, and there it gets coerced via doNegate().
*
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
-digit [0-9]
-
-integer {digit}+
-decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail {digit}+\.\.
-real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
-
-integer_junk {integer}{ident_start}
-decimal_junk {decimal}{ident_start}
+decdigit [0-9]
+hexdigit [0-9A-Fa-f]
+octdigit [0-7]
+bindigit [0-1]
+
+decinteger {decdigit}+
+hexinteger 0[xX]{hexdigit}+
+octinteger 0[oO]{octdigit}+
+bininteger 0[bB]{bindigit}+
+
+hexfail 0[xX]
+octfail 0[oO]
+binfail 0[bB]
+
+numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail {decdigit}+\.\.
+
+real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1 ({decinteger}|{numeric})[Ee]
+realfail2 ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk {decinteger}{ident_start}
+hexinteger_junk {hexinteger}{ident_start}
+octinteger_junk {octinteger}{ident_start}
+bininteger_junk {bininteger}{ident_start}
+numeric_junk {numeric}{ident_start}
real_junk {real}{ident_start}
-param \${integer}
+param \${decinteger}
other .
@@ -979,20 +994,44 @@ other .
return PARAM;
}
-{integer} {
+{decinteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext,
yylval, 10);
+ }
+{hexinteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext +
2, yylval, 16);
+ }
+{octinteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext +
2, yylval, 8);
+ }
+{bininteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext +
2, yylval, 2);
+ }
+{hexfail} {
+ SET_YYLLOC();
+ yyerror("invalid hexadecimal integer");
+ }
+{octfail} {
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ yyerror("invalid octal integer");
}
-{decimal} {
+{binfail} {
+ SET_YYLLOC();
+ yyerror("invalid binary integer");
+ }
+{numeric} {
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
-{decimalfail} {
+{numericfail} {
/* throw back the .., and treat as
integer */
yyless(yyleng - 2);
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ return process_integer_literal(yytext,
yylval, 10);
}
{real} {
SET_YYLLOC();
@@ -1007,11 +1046,23 @@ other .
SET_YYLLOC();
yyerror("trailing junk after numeric
literal");
}
-{integer_junk} {
+{decinteger_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric
literal");
+ }
+{hexinteger_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric
literal");
+ }
+{octinteger_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric
literal");
+ }
+{bininteger_junk} {
SET_YYLLOC();
yyerror("trailing junk after numeric
literal");
}
-{decimal_junk} {
+{numeric_junk} {
SET_YYLLOC();
yyerror("trailing junk after numeric
literal");
}
@@ -1307,17 +1358,17 @@ litbufdup(core_yyscan_t yyscanner)
}
/*
- * Process {integer}. Note this will also do the right thing with {decimal},
+ * Process {*integer}. Note this will also do the right thing with {numeric},
* ie digits and a decimal point.
*/
static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
{
int val;
char *endptr;
errno = 0;
- val = strtoint(token, &endptr, 10);
+ val = strtoint(token, &endptr, base);
if (*endptr != '\0' || errno == ERANGE)
{
/* integer too large (or contains decimal pt), treat it as a
float */
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 18de54da40..358cee2ec4 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -85,6 +85,17 @@ decimalLength64(const uint64 v)
return t + (v >= PowersOfTen[t]);
}
+static const int8 hexlookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
/*
* Convert input string to a signed 16 bit integer.
*
@@ -120,6 +131,48 @@ pg_strtoint16(const char *s)
goto invalid_syntax;
/* process digits */
+ if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+ {
+ ptr += 2;
+ while (*ptr && isxdigit((unsigned char) *ptr))
+ {
+ int8 digit = hexlookup[(unsigned char) *ptr];
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+
+ ptr++;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else
+ {
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
@@ -128,6 +181,7 @@ pg_strtoint16(const char *s)
unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
+ }
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -196,6 +250,48 @@ pg_strtoint32(const char *s)
goto invalid_syntax;
/* process digits */
+ if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+ {
+ ptr += 2;
+ while (*ptr && isxdigit((unsigned char) *ptr))
+ {
+ int8 digit = hexlookup[(unsigned char) *ptr];
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+
+ ptr++;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else
+ {
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
@@ -204,6 +300,7 @@ pg_strtoint32(const char *s)
unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
+ }
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -280,6 +377,48 @@ pg_strtoint64(const char *s)
goto invalid_syntax;
/* process digits */
+ if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+ {
+ ptr += 2;
+ while (*ptr && isxdigit((unsigned char) *ptr))
+ {
+ int8 digit = hexlookup[(unsigned char) *ptr];
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+
+ ptr++;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else
+ {
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
@@ -288,6 +427,7 @@ pg_strtoint64(const char *s)
unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
+ }
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 09709e6151..af38f173fa 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -323,25 +323,40 @@ operator {op_chars}+
* Unary minus is not part of a number here. Instead we pass it separately to
* the parser, and there it gets coerced via doNegate().
*
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
-digit [0-9]
-
-integer {digit}+
-decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail {digit}+\.\.
-real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
-
-integer_junk {integer}{ident_start}
-decimal_junk {decimal}{ident_start}
+decdigit [0-9]
+hexdigit [0-9A-Fa-f]
+octdigit [0-7]
+bindigit [0-1]
+
+decinteger {decdigit}+
+hexinteger 0[xX]{hexdigit}+
+octinteger 0[oO]{octdigit}+
+bininteger 0[bB]{bindigit}+
+
+hexfail 0[xX]
+octfail 0[oO]
+binfail 0[bB]
+
+numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail {decdigit}+\.\.
+
+real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1 ({decinteger}|{numeric})[Ee]
+realfail2 ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk {decinteger}{ident_start}
+hexinteger_junk {hexinteger}{ident_start}
+octinteger_junk {octinteger}{ident_start}
+bininteger_junk {bininteger}{ident_start}
+numeric_junk {numeric}{ident_start}
real_junk {real}{ident_start}
-param \${integer}
+param \${decinteger}
/* psql-specific: characters allowed in variable names */
variable_char [A-Za-z\200-\377_0-9]
@@ -844,13 +859,31 @@ other .
ECHO;
}
-{integer} {
+{decinteger} {
+ ECHO;
+ }
+{hexinteger} {
+ ECHO;
+ }
+{octinteger} {
+ ECHO;
+ }
+{bininteger} {
+ ECHO;
+ }
+{hexfail} {
ECHO;
}
-{decimal} {
+{octfail} {
ECHO;
}
-{decimalfail} {
+{binfail} {
+ ECHO;
+ }
+{numeric} {
+ ECHO;
+ }
+{numericfail} {
/* throw back the .., and treat as
integer */
yyless(yyleng - 2);
ECHO;
@@ -864,10 +897,19 @@ other .
{realfail2} {
ECHO;
}
-{integer_junk} {
+{decinteger_junk} {
+ ECHO;
+ }
+{hexinteger_junk} {
+ ECHO;
+ }
+{octinteger_junk} {
+ ECHO;
+ }
+{bininteger_junk} {
ECHO;
}
-{decimal_junk} {
+{numeric_junk} {
ECHO;
}
{real_junk} {
diff --git a/src/interfaces/ecpg/preproc/pgc.l
b/src/interfaces/ecpg/preproc/pgc.l
index 110478059b..c4805bd91f 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -57,7 +57,7 @@ static bool include_next;
#define startlit() (literalbuf[0] = '\0', literallen = 0)
static void addlit(char *ytext, int yleng);
static void addlitchar(unsigned char);
-static int process_integer_literal(const char *token, YYSTYPE *lval);
+static int process_integer_literal(const char *token, YYSTYPE *lval, int
base);
static void parse_include(void);
static bool ecpg_isspace(char ch);
static bool isdefine(void);
@@ -351,25 +351,40 @@ operator {op_chars}+
* Unary minus is not part of a number here. Instead we pass it separately to
* the parser, and there it gets coerced via doNegate().
*
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
-digit [0-9]
-
-integer {digit}+
-decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail {digit}+\.\.
-real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
-
-integer_junk {integer}{ident_start}
-decimal_junk {decimal}{ident_start}
+decdigit [0-9]
+hexdigit [0-9A-Fa-f]
+octdigit [0-7]
+bindigit [0-1]
+
+decinteger {decdigit}+
+hexinteger 0[xX]{hexdigit}+
+octinteger 0[oO]{octdigit}+
+bininteger 0[bB]{bindigit}+
+
+hexfail 0[xX]
+octfail 0[oO]
+binfail 0[bB]
+
+numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail {decdigit}+\.\.
+
+real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1 ({decinteger}|{numeric})[Ee]
+realfail2 ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk {decinteger}{ident_start}
+hexinteger_junk {hexinteger}{ident_start}
+octinteger_junk {octinteger}{ident_start}
+bininteger_junk {bininteger}{ident_start}
+numeric_junk {numeric}{ident_start}
real_junk {real}{ident_start}
-param \${integer}
+param \${decinteger}
/* special characters for other dbms */
/* we have to react differently in compat mode */
@@ -399,9 +414,6 @@ include_next
[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
import [iI][mM][pP][oO][rR][tT]
undef [uU][nN][dD][eE][fF]
-/* C version of hex number */
-xch 0[xX][0-9A-Fa-f]*
-
ccomment "//".*\n
if [iI][fF]
@@ -414,7 +426,7 @@ endif [eE][nN][dD][iI][fF]
struct [sS][tT][rR][uU][cC][tT]
exec_sql {exec}{space}*{sql}{space}*
-ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit
({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
/* we might want to parse all cpp include files */
@@ -929,17 +941,20 @@ cppline
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
} /* <SQL> */
<C,SQL>{
-{integer} {
- return process_integer_literal(yytext,
&base_yylval);
+{decinteger} {
+ return process_integer_literal(yytext,
&base_yylval, 10);
+ }
+{hexinteger} {
+ return process_integer_literal(yytext +
2, &base_yylval, 16);
}
-{decimal} {
+{numeric} {
base_yylval.str = mm_strdup(yytext);
return FCONST;
}
-{decimalfail} {
+{numericfail} {
/* throw back the .., and treat as
integer */
yyless(yyleng - 2);
- return process_integer_literal(yytext,
&base_yylval);
+ return process_integer_literal(yytext,
&base_yylval, 10);
}
{real} {
base_yylval.str = mm_strdup(yytext);
@@ -948,18 +963,25 @@ cppline
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
{realfail1} {
/*
* throw back the [Ee], and figure out
whether what
- * remains is an {integer} or {decimal}.
+ * remains is an {decinteger} or
{numeric}.
*/
yyless(yyleng - 1);
- return process_integer_literal(yytext,
&base_yylval);
+ return process_integer_literal(yytext,
&base_yylval, 10);
}
{realfail2} {
/* throw back the [Ee][+-], and proceed
as above */
yyless(yyleng - 2);
- return process_integer_literal(yytext,
&base_yylval);
+ return process_integer_literal(yytext,
&base_yylval, 10);
}
} /* <C,SQL> */
+<SQL>{octinteger} {
+ return process_integer_literal(yytext +
2, &base_yylval, 8);
+ }
+<SQL>{bininteger} {
+ return process_integer_literal(yytext +
2, &base_yylval, 2);
+ }
+
<SQL>{
:{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
base_yylval.str = mm_strdup(yytext+1);
@@ -1015,19 +1037,6 @@ cppline
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
return S_ANYTHING;
}
<C>{ccomment} { ECHO; }
-<C>{xch} {
- char* endptr;
-
- errno = 0;
- base_yylval.ival =
strtoul((char *)yytext,&endptr,16);
- if (*endptr != '\0' || errno ==
ERANGE)
- {
- errno = 0;
- base_yylval.str =
mm_strdup(yytext);
- return SCONST;
- }
- return ICONST;
- }
<C>{cppinclude} {
if (system_includes)
{
@@ -1552,17 +1561,17 @@ addlitchar(unsigned char ychar)
}
/*
- * Process {integer}. Note this will also do the right thing with {decimal},
+ * Process {*integer}. Note this will also do the right thing with {numeric},
* ie digits and a decimal point.
*/
static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
{
int val;
char *endptr;
errno = 0;
- val = strtoint(token, &endptr, 10);
+ val = strtoint(token, &endptr, base);
if (*endptr != '\0' || errno == ERANGE)
{
/* integer too large (or contains decimal pt), treat it as a
float */
diff --git a/src/test/regress/expected/int2.out
b/src/test/regress/expected/int2.out
index 55ea7202cd..220e1493e8 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -306,3 +306,22 @@ FROM (VALUES (-2.5::numeric),
2.5 | 3
(7 rows)
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2
+------
+ 37
+(1 row)
+
+SELECT int2 '0o273';
+ int2
+------
+ 187
+(1 row)
+
+SELECT int2 '0x42F';
+ int2
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out
b/src/test/regress/expected/int4.out
index 9d20b3380f..6fdbd58b40 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -437,3 +437,22 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
ERROR: integer out of range
SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
ERROR: integer out of range
+-- non-decimal literals
+SELECT int4 '0b100101';
+ int4
+------
+ 37
+(1 row)
+
+SELECT int4 '0o273';
+ int4
+------
+ 187
+(1 row)
+
+SELECT int4 '0x42F';
+ int4
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int8.out
b/src/test/regress/expected/int8.out
index 36540ec456..edd15a4353 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -932,3 +932,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); --
overflow
ERROR: bigint out of range
SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
ERROR: bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8
+------
+ 37
+(1 row)
+
+SELECT int8 '0o273';
+ int8
+------
+ 187
+(1 row)
+
+SELECT int8 '0x42F';
+ int8
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/numerology.out
b/src/test/regress/expected/numerology.out
index 2f176ccb52..be3868d40f 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -3,14 +3,33 @@
-- Test various combinations of numeric types and functions.
--
--
--- Trailing junk in numeric literals
+-- numeric literals
--
+SELECT 0b100101;
+ ?column?
+----------
+ 37
+(1 row)
+
+SELECT 0o273;
+ ?column?
+----------
+ 187
+(1 row)
+
+SELECT 0x42F;
+ ?column?
+----------
+ 1071
+(1 row)
+
+-- error cases
SELECT 123abc;
ERROR: trailing junk after numeric literal at or near "123a"
LINE 1: SELECT 123abc;
^
SELECT 0x0o;
-ERROR: trailing junk after numeric literal at or near "0x"
+ERROR: trailing junk after numeric literal at or near "0x0o"
LINE 1: SELECT 0x0o;
^
SELECT 1_2_3;
@@ -41,6 +60,42 @@ SELECT 0.0e+a;
ERROR: trailing junk after numeric literal at or near "0.0e+"
LINE 1: SELECT 0.0e+a;
^
+SELECT 0b;
+ERROR: invalid binary integer at or near "0b"
+LINE 1: SELECT 0b;
+ ^
+SELECT 1b;
+ERROR: trailing junk after numeric literal at or near "1b"
+LINE 1: SELECT 1b;
+ ^
+SELECT 0b0x;
+ERROR: trailing junk after numeric literal at or near "0b0x"
+LINE 1: SELECT 0b0x;
+ ^
+SELECT 0o;
+ERROR: invalid octal integer at or near "0o"
+LINE 1: SELECT 0o;
+ ^
+SELECT 1o;
+ERROR: trailing junk after numeric literal at or near "1o"
+LINE 1: SELECT 1o;
+ ^
+SELECT 0o0x;
+ERROR: trailing junk after numeric literal at or near "0o0x"
+LINE 1: SELECT 0o0x;
+ ^
+SELECT 0x;
+ERROR: invalid hexadecimal integer at or near "0x"
+LINE 1: SELECT 0x;
+ ^
+SELECT 1x;
+ERROR: trailing junk after numeric literal at or near "1x"
+LINE 1: SELECT 1x;
+ ^
+SELECT 0x0y;
+ERROR: trailing junk after numeric literal at or near "0x0y"
+LINE 1: SELECT 0x0y;
+ ^
--
-- Test implicit type conversions
-- This fails for Postgres v6.1 (and earlier?)
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index 613b344704..0dee22fe6d 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -112,3 +112,10 @@ CREATE TABLE INT2_TBL(f1 int2);
(0.5::numeric),
(1.5::numeric),
(2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o273';
+SELECT int2 '0x42F';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index 55ec07a147..2a69b1614e 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -176,3 +176,10 @@ CREATE TABLE INT4_TBL(f1 int4);
SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int4 '0b100101';
+SELECT int4 '0o273';
+SELECT int4 '0x42F';
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 32940b4daa..b7ad696dd8 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -250,3 +250,10 @@ CREATE TABLE INT8_TBL(q1 int8, q2 int8);
SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o273';
+SELECT int8 '0x42F';
diff --git a/src/test/regress/sql/numerology.sql
b/src/test/regress/sql/numerology.sql
index 70447a95fa..fd7e02e536 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -3,10 +3,16 @@
-- Test various combinations of numeric types and functions.
--
+
--
--- Trailing junk in numeric literals
+-- numeric literals
--
+SELECT 0b100101;
+SELECT 0o273;
+SELECT 0x42F;
+
+-- error cases
SELECT 123abc;
SELECT 0x0o;
SELECT 1_2_3;
@@ -17,6 +23,19 @@
SELECT 0.0e;
SELECT 0.0e+a;
+SELECT 0b;
+SELECT 1b;
+SELECT 0b0x;
+
+SELECT 0o;
+SELECT 1o;
+SELECT 0o0x;
+
+SELECT 0x;
+SELECT 1x;
+SELECT 0x0y;
+
+
--
-- Test implicit type conversions
-- This fails for Postgres v6.1 (and earlier?)
--
2.34.1
From fb17e09849c74414947a4107dd13883b7347629c Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Thu, 30 Dec 2021 10:26:37 +0100
Subject: [PATCH v6 7/7] WIP: Underscores in numeric literals
Discussion:
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
src/backend/parser/Makefile | 2 +-
src/backend/parser/scan.l | 26 +++++++++++++++---
src/test/regress/expected/numerology.out | 34 +++++++++++++++++++++---
src/test/regress/sql/numerology.sql | 7 ++++-
4 files changed, 59 insertions(+), 10 deletions(-)
diff --git a/src/backend/parser/Makefile b/src/backend/parser/Makefile
index 5ddb9a92f0..827bc4c189 100644
--- a/src/backend/parser/Makefile
+++ b/src/backend/parser/Makefile
@@ -56,7 +56,7 @@ gram.c: BISON_CHECK_CMD = $(PERL) $(srcdir)/check_keywords.pl
$< $(top_srcdir)/s
scan.c: FLEXFLAGS = -CF -p -p
-scan.c: FLEX_NO_BACKUP=yes
+#scan.c: FLEX_NO_BACKUP=yes
scan.c: FLEX_FIX_WARNING=yes
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index c55338b601..7b6e6e3c9e 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -395,10 +395,10 @@ hexdigit [0-9A-Fa-f]
octdigit [0-7]
bindigit [0-1]
-decinteger {decdigit}+
-hexinteger 0[xX]{hexdigit}+
-octinteger 0[oO]{octdigit}+
-bininteger 0[bB]{bindigit}+
+decinteger {decdigit}(_?{decdigit})*
+hexinteger 0[xX](_?{hexdigit})+
+octinteger 0[oO](_?{octdigit})+
+bininteger 0[bB](_?{bindigit})+
hexfail 0[xX]
octfail 0[oO]
@@ -1367,6 +1367,24 @@ process_integer_literal(const char *token, YYSTYPE
*lval, int base)
int val;
char *endptr;
+ if (strchr(token, '_'))
+ {
+ char *newtoken = palloc(strlen(token));
+ const char *p1;
+ char *p2;
+
+ p1 = token;
+ p2 = newtoken;
+ while (*p1)
+ {
+ if (*p1 != '_')
+ *p2++ = *p1;
+ p1++;
+ }
+ *p2 = '\0';
+ token = newtoken;
+ }
+
errno = 0;
val = strtoint(token, &endptr, base);
if (*endptr != '\0' || errno == ERANGE)
diff --git a/src/test/regress/expected/numerology.out
b/src/test/regress/expected/numerology.out
index be3868d40f..cf5d528558 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -23,6 +23,36 @@ SELECT 0x42F;
1071
(1 row)
+SELECT 1_000_000;
+ ?column?
+----------
+ 1000000
+(1 row)
+
+SELECT 1_2_3;
+ ?column?
+----------
+ 123
+(1 row)
+
+SELECT 0x1EEE_FFFF;
+ ?column?
+-----------
+ 518979583
+(1 row)
+
+SELECT 0o2_73;
+ ?column?
+----------
+ 187
+(1 row)
+
+SELECT 0b_10_0101;
+ ?column?
+----------
+ 37
+(1 row)
+
-- error cases
SELECT 123abc;
ERROR: trailing junk after numeric literal at or near "123a"
@@ -32,10 +62,6 @@ SELECT 0x0o;
ERROR: trailing junk after numeric literal at or near "0x0o"
LINE 1: SELECT 0x0o;
^
-SELECT 1_2_3;
-ERROR: trailing junk after numeric literal at or near "1_"
-LINE 1: SELECT 1_2_3;
- ^
SELECT 0.a;
ERROR: trailing junk after numeric literal at or near "0.a"
LINE 1: SELECT 0.a;
diff --git a/src/test/regress/sql/numerology.sql
b/src/test/regress/sql/numerology.sql
index fd7e02e536..970654f0b7 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -12,10 +12,15 @@
SELECT 0o273;
SELECT 0x42F;
+SELECT 1_000_000;
+SELECT 1_2_3;
+SELECT 0x1EEE_FFFF;
+SELECT 0o2_73;
+SELECT 0b_10_0101;
+
-- error cases
SELECT 123abc;
SELECT 0x0o;
-SELECT 1_2_3;
SELECT 0.a;
SELECT 0.0a;
SELECT .0a;
--
2.34.1