On 2/11/16 1:27 AM, Pavel Stehule wrote:
I editorialized the docs and some of the comments. In particular, I
documented behavior of not truncating, and recommended casting to
name[] if user cares about that. Added a unit test to verify that
works. BTW, I saw mention in the thread about not truncated spaces,
but the function *does* truncate them, unless they're inside quotes,
where they're legitimate.
ok
I missed some of my edits. Updated patch with those in place attached.
Also added test for invalid characters.
I think "strict" would be more in line with other uses in code.
There are currently no other occurrences of 'strictmode' in the
code. There are loads of references to 'strict', but I didn't go
through all of them to see if any were used as externally visible
function parameter names.
I am sorry, I don't understand to this point. You unlike the name of
parameter "strictmode" ? Have you any proposal? Maybe "restrictive" ?
I would just call it strict. There's precedent for that in the code.
The almost all code +/- is related to human readable error messages. We
can move some code to separate static functions - read_quoted_ident,
read_unquoted_ident, but there will be some magic about parameters, and
the code will not be much better, than it is now.
What I'm saying is that most places that need to do de-quoting or
similar just run a simple while loop and use an in_quote variable to
track whether they're inside a quote or not. See
backend/utils/adt/rowtypes.c line 199 for an example.
As I said, I don't have a strong opinion on it, so if you prefer it this
way that's fine with me.
--
Jim Nasby, Data Architect, Blue Treble Consulting, Austin TX
Experts in Analytics, Data Architecture and PostgreSQL
Data in Trouble? Get it in Treble! http://BlueTreble.com
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 139aa2b..b4a2898 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1778,6 +1778,27 @@
<row>
<entry>
<indexterm>
+ <primary>parse_ident</primary>
+ </indexterm>
+ <literal><function>parse_ident(<parameter>str</parameter>
<type>text</type>,
+ [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT
true ] )</function></literal>
+ </entry>
+ <entry><type>text[]</type></entry>
+ <entry>Split <parameter>qualified identifier</parameter> into array
<parameter>parts</parameter>.
+ When <parameter>strictmode</parameter> is false, extra characters after
the identifier are ignored.
+ This is useful for parsing identifiers for objects like functions and
arrays that may have trailing
+ characters. By default, extra characters after the last identifier are
considered an error.
+ second parameter is false, then chars after last identifier are
ignored. Note that this function
+ does not truncate quoted identifiers. If you care about that you should
cast the result of this
+ function to name[].
+ </entry>
+ <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
+ <entry><literal>"SomeSchema,sometable"</literal></entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
<primary>pg_client_encoding</primary>
</indexterm>
<literal><function>pg_client_encoding()</function></literal>
diff --git a/src/backend/catalog/system_views.sql
b/src/backend/catalog/system_views.sql
index 923fe58..61d5b80 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -965,3 +965,10 @@ RETURNS jsonb
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'jsonb_set';
+
+CREATE OR REPLACE FUNCTION
+ parse_ident(str text, strictmode boolean DEFAULT true)
+RETURNS text[]
+LANGUAGE INTERNAL
+STRICT IMMUTABLE
+AS 'parse_ident';
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 2b4ab20..7aa5b76 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -130,6 +130,15 @@ scanstr(const char *s)
char *
downcase_truncate_identifier(const char *ident, int len, bool warn)
{
+ return downcase_identifier(ident, len, warn, true);
+}
+
+/*
+ * a workhorse for downcase_truncate_identifier
+ */
+char *
+downcase_identifier(const char *ident, int len, bool warn, bool truncate)
+{
char *result;
int i;
bool enc_is_single_byte;
@@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len,
bool warn)
}
result[i] = '\0';
- if (i >= NAMEDATALEN)
+ if (i >= NAMEDATALEN && truncate)
truncate_identifier(result, i, warn);
return result;
}
+
/*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
*
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index 6a306f3..f7d60b1 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -21,12 +21,15 @@
#include <unistd.h>
#include "access/sysattr.h"
+#include "access/htup_details.h"
#include "catalog/catalog.h"
+#include "catalog/namespace.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "funcapi.h"
#include "miscadmin.h"
+#include "parser/scansup.h"
#include "parser/keywords.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
@@ -38,6 +41,7 @@
#include "utils/ruleutils.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
+#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/timestamp.h"
@@ -598,3 +602,174 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
+
+
+/*
+ * This simple parser utility are compatible with lexer implementation,
+ * used only in parse_ident function
+ */
+static bool
+is_ident_start(unsigned char c)
+{
+ if (c == '_')
+ return true;
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ return true;
+
+ if (c >= 0200 && c <= 0377)
+ return true;
+
+ return false;
+}
+
+static bool
+is_ident_cont(unsigned char c)
+{
+ if (c >= '0' && c <= '9')
+ return true;
+
+ return is_ident_start(c);
+}
+
+/*
+ * parse_ident - parse SQL composed identifier to separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * last identifiers are disallowed.
+ */
+Datum
+parse_ident(PG_FUNCTION_ARGS)
+{
+ text *qualname;
+ char *qualname_str;
+ bool strict_mode;
+ ArrayBuildState *astate = NULL;
+ char *nextp;
+ bool after_dot = false;
+
+ qualname = PG_GETARG_TEXT_PP(0);
+ qualname_str = text_to_cstring(qualname);
+ strict_mode = PG_GETARG_BOOL(1);
+
+ nextp = qualname_str;
+
+ /* skip leading whitespace */
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+
+ for (;;)
+ {
+ char *curname;
+ char *endp;
+ bool missing_ident;
+
+ missing_ident = true;
+
+ if (*nextp == '\"')
+ {
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '\"');
+ if (endp == NULL)
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unclosed double
quotes"),
+ errdetail("string \"%s\" is
not valid identifier",
+
qualname_str)));
+ if (endp[1] != '\"')
+ break;
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ nextp = endp + 1;
+ *endp = '\0';
+
+ if (endp - curname == 0)
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("identifier should not be
empty: \"%s\"",
+
qualname_str)));
+
+ astate = accumArrayResult(astate,
+ CStringGetTextDatum(curname), false,
+ TEXTOID,
CurrentMemoryContext);
+ missing_ident = false;
+ }
+ else
+ {
+ if (is_ident_start((unsigned char) *nextp))
+ {
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * Unlike name, we don't implicitly truncate
identifiers. This
+ * is useful for allowing the user to check for
specific parts
+ * of the identifier being too long. It's easy
enough for the
+ * user to get the truncated names by casting
our output to
+ * name[].
+ */
+ downname = downcase_identifier(curname, len,
false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate,
+ PointerGetDatum(part), false,
+ TEXTOID,
CurrentMemoryContext);
+ missing_ident = false;
+ }
+ }
+
+ if (missing_ident)
+ {
+ /* Different error messages based on where we failed. */
+ if (*nextp == '.')
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing identifier before
\".\" symbol: \"%s\"",
+
qualname_str)));
+ else if (after_dot)
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing identifier after \".\"
symbol: \"%s\"",
+
qualname_str)));
+ else
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing identifier: \"%s\"",
+
qualname_str)));
+ }
+
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+
+ if (*nextp == '.')
+ {
+ after_dot = true;
+ nextp++;
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+ continue;
+ }
+ else if (*nextp == '\0')
+ {
+ break;
+ }
+ else
+ {
+ if (strict_mode)
+ ereport(ERROR,
+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("identifier contains disallowed
characters: \"%s\"",
+
qualname_str)));
+ break;
+ }
+ }
+
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 5ded13e..d7c09a7 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3451,6 +3451,9 @@ DESCR("I/O");
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f
t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_
_null_ _null_ ));
DESCR("convert namespace name to regnamespace");
+DATA(insert OID = 3318 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f
t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strictmode}" _null_ _null_
parse_ident _null_ _null_ _null_ ));
+DESCR("parse qualified identifier to array of identifiers");
+
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f
f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_
fmgr_internal_validator _null_ _null_ _null_ ));
DESCR("(internal)");
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f
t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator
_null_ _null_ _null_ ));
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index 4f4164b..4f95c81 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn);
+extern char *downcase_identifier(const char *ident, int len,
+ bool warn, bool
truncate);
+
extern void truncate_identifier(char *ident, int len, bool warn);
extern bool scanner_isspace(char ch);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 5e8e832..b1b6ef6 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -504,6 +504,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+extern Datum parse_ident(PG_FUNCTION_ARGS);
/* oid.c */
extern Datum oidin(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/name.out
b/src/test/regress/expected/name.out
index b359d52..66569f4 100644
--- a/src/test/regress/expected/name.out
+++ b/src/test/regress/expected/name.out
@@ -124,3 +124,55 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~
'.*asdf.*';
(2 rows)
DROP TABLE NAME_TBL;
+DO $$
+DECLARE r text[];
+BEGIN
+ r := parse_ident('Schemax.Tabley');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ r := parse_ident('"SchemaX"."TableY"');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+NOTICE: schemax.tabley
+NOTICE: "SchemaX"."TableY"
+SELECT parse_ident('foo.boo');
+ parse_ident
+-------------
+ {foo,boo}
+(1 row)
+
+SELECT parse_ident('foo.boo[]'); -- should fail
+ERROR: identifier contains disallowed characters: "foo.boo[]"
+SELECT parse_ident('foo.boo[]', strictmode => false); -- ok
+ parse_ident
+-------------
+ {foo,boo}
+(1 row)
+
+-- should fail
+SELECT parse_ident(' ');
+ERROR: missing identifier: " "
+SELECT parse_ident(' .aaa');
+ERROR: missing identifier before "." symbol: " .aaa"
+SELECT parse_ident(' aaa . ');
+ERROR: missing identifier after "." symbol: " aaa . "
+SELECT parse_ident('aaa.a%b');
+ERROR: identifier contains disallowed characters: "aaa.a%b"
+SELECT length(a[1]), length(a[2]) from
parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy')
as a ;
+ length | length
+--------+--------
+ 414 | 289
+(1 row)
+
+SELECT parse_ident(' first . " second " ." third ". " ' ||
repeat('x',66) || '"');
+ parse_ident
+-----------------------------------------------------------------------------------------------------------
+ {first," second "," third ","
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
+SELECT parse_ident(' first . " second " ." third ". " ' ||
repeat('x',66) || '"')::name[];
+ parse_ident
+------------------------------------------------------------------------------------------------------
+ {first," second "," third ","
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+(1 row)
+
diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql
index 1c7a671..2e70e4e 100644
--- a/src/test/regress/sql/name.sql
+++ b/src/test/regress/sql/name.sql
@@ -52,3 +52,28 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~
'[0-9]';
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
DROP TABLE NAME_TBL;
+
+DO $$
+DECLARE r text[];
+BEGIN
+ r := parse_ident('Schemax.Tabley');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ r := parse_ident('"SchemaX"."TableY"');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+END;
+$$;
+
+SELECT parse_ident('foo.boo');
+SELECT parse_ident('foo.boo[]'); -- should fail
+SELECT parse_ident('foo.boo[]', strictmode => false); -- ok
+
+-- should fail
+SELECT parse_ident(' ');
+SELECT parse_ident(' .aaa');
+SELECT parse_ident(' aaa . ');
+SELECT parse_ident('aaa.a%b');
+
+SELECT length(a[1]), length(a[2]) from
parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy')
as a ;
+
+SELECT parse_ident(' first . " second " ." third ". " ' ||
repeat('x',66) || '"');
+SELECT parse_ident(' first . " second " ." third ". " ' ||
repeat('x',66) || '"')::name[];
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers