Re: [HACKERS] proposal: function parse_ident

Pavel Stehule Tue, 08 Sep 2015 20:35:19 -0700

Hi

I sending the path with little bit enhanced parser - it doesn't support
utf8 alpha in identifiers yet


Regards

Pavel

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
new file mode 100644
index b3b78d2..75ea33a
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1707,1712 ****
--- 1707,1727 ----
        <row>
         <entry>
          <indexterm>
+          <primary>parse_ident</primary>
+         </indexterm>
+         <literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>,
+         OUT <parameter>parts</parameter> <type>text[]</type>, OUT <parameter>other</parameter> <type>text</type>)</function></literal>
+        </entry>
+        <entry><type>record</type></entry>
+        <entry>Split <parameter>qualified identifier</parameter> to array <parameter>parts</parameter>.
+        </entry>
+        <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
+        <entry><literal>("{SomeSchema,sometable}",)</literal></entry>
+       </row>
+ 
+       <row>
+        <entry>
+         <indexterm>
           <primary>pg_client_encoding</primary>
          </indexterm>
          <literal><function>pg_client_encoding()</function></literal>
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
new file mode 100644
index c0495d9..a116e8e
*** a/src/backend/utils/adt/misc.c
--- b/src/backend/utils/adt/misc.c
***************
*** 21,32 ****
--- 21,35 ----
  #include <unistd.h>
  
  #include "access/sysattr.h"
+ #include "access/htup_details.h"
  #include "catalog/catalog.h"
+ #include "catalog/namespace.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_type.h"
  #include "commands/dbcommands.h"
  #include "funcapi.h"
  #include "miscadmin.h"
+ #include "parser/scansup.h"
  #include "parser/keywords.h"
  #include "postmaster/syslogger.h"
  #include "rewrite/rewriteHandler.h"
***************
*** 38,43 ****
--- 41,47 ----
  #include "utils/ruleutils.h"
  #include "tcop/tcopprot.h"
  #include "utils/acl.h"
+ #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/timestamp.h"
  
*************** pg_column_is_updatable(PG_FUNCTION_ARGS)
*** 598,600 ****
--- 602,758 ----
  
  	PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
  }
+ 
+ 
+ /*
+  * This simple parser utility are compatible with lexer implementation,
+  * used only in parse_ident function
+  */
+ static bool
+ is_ident_start(char c)
+ {
+ 	if (c == '_')
+ 		return true;
+ 
+ 	if (c >= 'a' && c <= 'z')
+ 		return true;
+ 	if (c >= 'A' && c <= 'Z')
+ 		return true;
+ 
+ 	if (c >= 0200 && c <= 0377)
+ 		return true;
+ 
+ 	return true;
+ }
+ 
+ static bool
+ is_ident_cont(char c)
+ {
+ 	if (c >= '0' && c <= '9')
+ 		return true;
+ 
+ 	return is_ident_start(c);
+ }
+ 
+ /*
+  * parse_ident - decompose text identifier to parts of
+  * identifiers and the rest (doesn't follow a identifier rule).
+  */
+ Datum
+ parse_ident(PG_FUNCTION_ARGS)
+ {
+ 	text		*qualname = PG_GETARG_TEXT_PP(0);
+ 	char		*qualname_str;
+ 	Datum		values[2];
+ 	bool		nulls[2];
+ 	TupleDesc	tupdesc;
+ 	ArrayBuildState *astate = NULL;
+ 	char	*nextp;
+ 
+ 	qualname_str = text_to_cstring(qualname);
+ 	nextp = qualname_str;
+ 
+ 	/* Prepare result tuple desc */
+ 	tupdesc = CreateTemplateTupleDesc(2, false);
+ 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "parts",
+ 					    TEXTARRAYOID, -1, 0);
+ 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "other",
+ 					    TEXTOID, -1, 0);
+ 
+ 	BlessTupleDesc(tupdesc);
+ 
+ 	nulls[0] = false;
+ 	nulls[1] = true;
+ 
+ 	/* skip leading whitespace */
+ 	while (isspace((unsigned char) *nextp))
+ 		nextp++;
+ 
+ 	do
+ 	{
+ 		char		*curname;
+ 		char		*endp;
+ 		bool		missing_ident;
+ 
+ 		missing_ident = true;
+ 
+ 		if (*nextp == '\"')
+ 		{
+ 			curname = nextp + 1;
+ 			for (;;)
+ 			{
+ 				endp = strchr(nextp + 1, '\"');
+ 				if (endp == NULL)
+ 					elog(ERROR, "unclosed double quotes");
+ 				if (endp[1] != '\"')
+ 					break;
+ 				memmove(endp, endp + 1, strlen(endp));
+ 				nextp = endp;
+ 			}
+ 			nextp = endp + 1;
+ 			*endp = '\0';
+ 
+ 			if (endp - curname == 0)
+ 				elog(ERROR, "empty identifier");
+ 
+ 			astate = accumArrayResult(astate,
+ 				CStringGetTextDatum(curname), false,
+ 						    TEXTOID, CurrentMemoryContext);
+ 			missing_ident = false;
+ 		}
+ 		else
+ 		{
+ 			if (is_ident_start(*nextp))
+ 			{
+ 				char *downname;
+ 				int	len;
+ 				text	*part;
+ 
+ 				curname = nextp++;
+ 				while (is_ident_cont(*nextp))
+ 					nextp++;
+ 
+ 				len = nextp - curname;
+ 
+ 				downname = downcase_truncate_identifier(curname, len, false);
+ 				part = cstring_to_text_with_len(downname, len);
+ 				astate = accumArrayResult(astate,
+ 					PointerGetDatum(part), false,
+ 							    TEXTOID, CurrentMemoryContext);
+ 				missing_ident = false;
+ 			}
+ 		}
+ 
+ 		if (missing_ident)
+ 			elog(ERROR, "missing identifier");
+ 
+ 		while (isspace((unsigned char) *nextp))
+ 			nextp++;
+ 
+ 		if (*nextp == '.')
+ 		{
+ 			nextp++;
+ 			while (isspace((unsigned char) *nextp))
+ 				nextp++;
+ 			continue;
+ 		}
+ 		else if (*nextp == '\0')
+ 		{
+ 			break;
+ 		}
+ 		else
+ 		{
+ 			values[1] = CStringGetTextDatum(nextp);
+ 			nulls[1] = false;
+ 			break;
+ 		}
+ 	} while (true);
+ 
+ 
+ 	values[0] = makeArrayResult(astate, CurrentMemoryContext);
+ 	nulls[0] = false;
+ 
+ 	PG_RETURN_DATUM(HeapTupleGetDatum(
+ 			    heap_form_tuple(tupdesc, values, nulls)));
+ }
+     
\ No newline at end of file
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
new file mode 100644
index ddf7c67..3ae7b50
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
*************** DESCR("I/O");
*** 3516,3521 ****
--- 3516,3524 ----
  DATA(insert OID = 4086 (  to_regnamespace	PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 4089 "2275" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
  DESCR("convert namespace name to regnamespace");
  
+ DATA(insert OID = 3300 (  parse_ident		PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2249 "25" "{25,1009,25}" "{i,o,o}" "{identifier,parts,other}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
+ DESCR("parse pattern dbname.schema.object");
+ 
  DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
  DESCR("(internal)");
  DATA(insert OID = 2247 ( fmgr_c_validator	PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
new file mode 100644
index fc1679e..0cb491d
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
*************** extern Datum pg_typeof(PG_FUNCTION_ARGS)
*** 495,500 ****
--- 495,501 ----
  extern Datum pg_collation_for(PG_FUNCTION_ARGS);
  extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
  extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+ extern Datum parse_ident(PG_FUNCTION_ARGS);
  
  /* oid.c */
  extern Datum oidin(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out
new file mode 100644
index b359d52..2c84d8a
*** a/src/test/regress/expected/name.out
--- b/src/test/regress/expected/name.out
*************** SELECT '' AS two, c.f1 FROM NAME_TBL c W
*** 124,126 ****
--- 124,137 ----
  (2 rows)
  
  DROP TABLE NAME_TBL;
+ DO $$
+ DECLARE r record;
+ BEGIN
+   r := parse_ident('Schemax.Tabley');
+   RAISE NOTICE '%', format('%I.%I', r.parts[1], r.parts[2]);
+   r := parse_ident('"SchemaX"."TableY"');
+   RAISE NOTICE '%', format('%I.%I', r.parts[1], r.parts[2]);
+ END;
+ $$;
+ NOTICE:  schemax.tabley
+ NOTICE:  "SchemaX"."TableY"
diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql
new file mode 100644
index 1c7a671..e80f9aa
*** a/src/test/regress/sql/name.sql
--- b/src/test/regress/sql/name.sql
*************** SELECT '' AS three, c.f1 FROM NAME_TBL c
*** 52,54 ****
--- 52,64 ----
  SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
  
  DROP TABLE NAME_TBL;
+ 
+ DO $$
+ DECLARE r record;
+ BEGIN
+   r := parse_ident('Schemax.Tabley');
+   RAISE NOTICE '%', format('%I.%I', r.parts[1], r.parts[2]);
+   r := parse_ident('"SchemaX"."TableY"');
+   RAISE NOTICE '%', format('%I.%I', r.parts[1], r.parts[2]);
+ END;
+ $$;

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] proposal: function parse_ident

Reply via email to