On 12/22/18, Tom Lane <t...@sss.pgh.pa.us> wrote:
> John Naylor <jcnay...@gmail.com> writes:
>> Using a single file also gave me another idea: Take value and category
>> out of ScanKeyword, and replace them with an index into another array
>> containing those, which will only be accessed in the event of a hit.
>> That would shrink ScanKeyword to 4 bytes (offset, index), further
>> increasing locality of reference. Might not be worth it, but I can try
>> it after moving on to the core scanner.
>
> I like that idea a *lot*, actually, because it offers the opportunity
> to decouple this mechanism from all assumptions about what the
> auxiliary data for a keyword is.

Okay, in that case I went ahead and did it for WIP v3.

> (it'd be a good idea to have a switch that allows specifying the
> prefix of these constant names).

Done as an optional switch, and tested, but not yet used in favor of
the previous method as a fallback. I'll probably do it in the final
version to keep lines below 80, and to add 'core_' to the core keyword
vars.

> /* Payload data for keywords */
> typedef struct MyKeyword
> {
>     int16        value;
>     int16        category;
> } MyKeyword;

I tweaked this a bit to

typedef struct ScanKeywordAux
{
        int16   value;          /* grammar's token code */
        char            category;               /* see codes above */
} ScanKeywordAux;

It seems that category was only 2 bytes to make ScanKeyword a power of
2 (of course that was on 32 bit machines and doesn't hold true
anymore). Using char will save another few hundred bytes in the core
scanner. Since we're only accessing this once per identifier, we may
not need to worry so much about memory alignment.

> Aside from being arguably better from the locality-of-reference
> standpoint, this gets us out of the weird ifdef'ing you've got in
> the v2 patch.  The kwlist_d.h headers can be very ordinary headers.

Yeah, that's a nice (and for me unexpected) bonus.

-John Naylor
 src/common/keywords.c                     |  55 ++++++++++++
 src/include/common/keywords.h             |  12 +++
 src/pl/plpgsql/src/.gitignore             |   1 +
 src/pl/plpgsql/src/Makefile               |  13 +--
 src/pl/plpgsql/src/pl_scanner.c           | 127 +++++++--------------------
 src/pl/plpgsql/src/pl_unreserved_kwlist.h | 107 +++++++++++++++++++++++
 src/tools/gen_keywords.pl                 | 139 ++++++++++++++++++++++++++++++
 src/tools/msvc/Solution.pm                |  10 +++
 8 files changed, 361 insertions(+), 103 deletions(-)

diff --git a/src/common/keywords.c b/src/common/keywords.c
index 0c0c794c68..b0e5a721b6 100644
--- a/src/common/keywords.c
+++ b/src/common/keywords.c
@@ -112,3 +112,58 @@ ScanKeywordLookup(const char *text,
 
 	return NULL;
 }
+
+/* Like ScanKeywordLookup, but uses offsets into a keyword string. */
+int
+ScanKeywordLookupOffset(const char *string_to_lookup,
+						const char *kw_strings,
+						const uint16 *kw_offsets,
+						int num_keywords)
+{
+	int			len,
+				i;
+	char		word[NAMEDATALEN];
+	const uint16 *low;
+	const uint16 *high;
+
+	len = strlen(string_to_lookup);
+	/* We assume all keywords are shorter than NAMEDATALEN. */
+	if (len >= NAMEDATALEN)
+		return -1;
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+	 * produce the wrong translation in some locales (eg, Turkish).
+	 */
+	for (i = 0; i < len; i++)
+	{
+		char		ch = string_to_lookup[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch += 'a' - 'A';
+		word[i] = ch;
+	}
+	word[len] = '\0';
+
+	/*
+	 * Now do a binary search using plain strcmp() comparison.
+	 */
+	low = kw_offsets;
+	high = kw_offsets + (num_keywords - 1);
+	while (low <= high)
+	{
+		const uint16 *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(kw_strings + *middle, word);
+		if (difference == 0)
+			return middle - kw_offsets;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return -1;
+}
diff --git a/src/include/common/keywords.h b/src/include/common/keywords.h
index 0b31505b66..201d0fcc7a 100644
--- a/src/include/common/keywords.h
+++ b/src/include/common/keywords.h
@@ -28,6 +28,13 @@ typedef struct ScanKeyword
 	int16		category;		/* see codes above */
 } ScanKeyword;
 
+/* Payload data for keywords */
+typedef struct ScanKeywordAux
+{
+	int16		value;			/* grammar's token code */
+	char		category;		/* see codes above */
+} ScanKeywordAux;
+
 #ifndef FRONTEND
 extern PGDLLIMPORT const ScanKeyword ScanKeywords[];
 extern PGDLLIMPORT const int NumScanKeywords;
@@ -41,4 +48,9 @@ extern const ScanKeyword *ScanKeywordLookup(const char *text,
 				  const ScanKeyword *keywords,
 				  int num_keywords);
 
+int ScanKeywordLookupOffset(const char *string_to_lookup,
+						const char *kw_strings,
+						const uint16 *kw_offsets,
+						int num_keywords);
+
 #endif							/* KEYWORDS_H */
diff --git a/src/pl/plpgsql/src/.gitignore b/src/pl/plpgsql/src/.gitignore
index ff6ac965fd..a649302fdb 100644
--- a/src/pl/plpgsql/src/.gitignore
+++ b/src/pl/plpgsql/src/.gitignore
@@ -1,3 +1,4 @@
+/*kwlist_d.h
 /pl_gram.c
 /pl_gram.h
 /plerrcodes.h
diff --git a/src/pl/plpgsql/src/Makefile b/src/pl/plpgsql/src/Makefile
index 25a5a9d448..9112aa7d23 100644
--- a/src/pl/plpgsql/src/Makefile
+++ b/src/pl/plpgsql/src/Makefile
@@ -60,7 +60,7 @@ uninstall-headers:
 
 
 # Force these dependencies to be known even without dependency info built:
-pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o: plpgsql.h pl_gram.h plerrcodes.h
+pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o: plpgsql.h pl_gram.h plerrcodes.h pl_unreserved_kwlist_d.h
 
 # See notes in src/backend/parser/Makefile about the following two rules
 pl_gram.h: pl_gram.c
@@ -72,6 +72,9 @@ pl_gram.c: BISONFLAGS += -d
 plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.pl
 	$(PERL) $(srcdir)/generate-plerrcodes.pl $< > $@
 
+# generate keyword headers for the scanner
+pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(top_srcdir)/src/tools/gen_keywords.pl
+	$(PERL) $(top_srcdir)/src/tools/gen_keywords.pl $<
 
 check: submake
 	$(pg_regress_check) $(REGRESS_OPTS) $(REGRESS)
@@ -84,13 +87,13 @@ submake:
 	$(MAKE) -C $(top_builddir)/src/test/regress pg_regress$(X)
 
 
-distprep: pl_gram.h pl_gram.c plerrcodes.h
+distprep: pl_gram.h pl_gram.c plerrcodes.h pl_unreserved_kwlist_d.h
 
-# pl_gram.c, pl_gram.h and plerrcodes.h are in the distribution tarball,
-# so they are not cleaned here.
+# pl_gram.c, pl_gram.h, plerrcodes.h, the generated keyword headers are
+# in the distribution tarball, so they are not cleaned here.
 clean distclean: clean-lib
 	rm -f $(OBJS)
 	rm -rf $(pg_regress_clean_files)
 
 maintainer-clean: distclean
-	rm -f pl_gram.c pl_gram.h plerrcodes.h
+	rm -f pl_gram.c pl_gram.h plerrcodes.h pl_unreserved_kwlist_d.h
diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c
index ab18946847..b57fd16676 100644
--- a/src/pl/plpgsql/src/pl_scanner.c
+++ b/src/pl/plpgsql/src/pl_scanner.c
@@ -21,6 +21,8 @@
 #include "plpgsql.h"
 #include "pl_gram.h"			/* must be after parser/scanner.h */
 
+/* String lookup table for keywords */
+#include "pl_unreserved_kwlist_d.h"
 
 #define PG_KEYWORD(a,b,c) {a,b,c},
 
@@ -96,88 +98,11 @@ static const ScanKeyword reserved_keywords[] = {
 
 static const int num_reserved_keywords = lengthof(reserved_keywords);
 
-static const ScanKeyword unreserved_keywords[] = {
-	PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
-	PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
-	PG_KEYWORD("call", K_CALL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("commit", K_COMMIT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
-	PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
-	PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("do", K_DO, UNRESERVED_KEYWORD)
-	PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
-	PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
-	PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
-	PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
-	PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
-	PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
-	PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
-	PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
-	PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
-	PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
-	PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
-	PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
-	PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
-	PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
-	PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
-	PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
-	PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
-	PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("reset", K_RESET, UNRESERVED_KEYWORD)
-	PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("rollback", K_ROLLBACK, UNRESERVED_KEYWORD)
-	PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
-	PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
-	PG_KEYWORD("set", K_SET, UNRESERVED_KEYWORD)
-	PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
-	PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
-	PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
-	PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
-	PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
-	PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
+/* FIXME: Have to redefine this symbol for the WIP. */
+#define PG_KEYWORD(kwname, value, category) {value, category},
+
+static const ScanKeywordAux unreserved_keywords[] = {
+#include "pl_unreserved_kwlist.h"
 };
 
 static const int num_unreserved_keywords = lengthof(unreserved_keywords);
@@ -256,7 +181,7 @@ plpgsql_yylex(void)
 {
 	int			tok1;
 	TokenAuxData aux1;
-	const ScanKeyword *kw;
+	int kwnum;
 
 	tok1 = internal_yylex(&aux1);
 	if (tok1 == IDENT || tok1 == PARAM)
@@ -332,12 +257,14 @@ plpgsql_yylex(void)
 									   &aux1.lval.word))
 					tok1 = T_DATUM;
 				else if (!aux1.lval.word.quoted &&
-						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
-												 unreserved_keywords,
-												 num_unreserved_keywords)))
+						 (kwnum = ScanKeywordLookupOffset(aux1.lval.word.ident,
+												 pl_unreserved_kw_strings,
+												 pl_unreserved_kw_offsets,
+												 num_unreserved_keywords)) >= 0)
 				{
-					aux1.lval.keyword = kw->name;
-					tok1 = kw->value;
+					aux1.lval.keyword = pl_unreserved_kw_strings
+										+ pl_unreserved_kw_offsets[kwnum];
+					tok1 = (unreserved_keywords[kwnum]).value;
 				}
 				else
 					tok1 = T_WORD;
@@ -362,12 +289,14 @@ plpgsql_yylex(void)
 			{
 				/* try for unreserved keyword, then for variable name */
 				if (core_yy.scanbuf[aux1.lloc] != '"' &&
-					(kw = ScanKeywordLookup(aux1.lval.str,
-											unreserved_keywords,
-											num_unreserved_keywords)))
+					(kwnum = ScanKeywordLookupOffset(aux1.lval.str,
+											 pl_unreserved_kw_strings,
+											 pl_unreserved_kw_offsets,
+											 num_unreserved_keywords)) >= 0)
 				{
-					aux1.lval.keyword = kw->name;
-					tok1 = kw->value;
+					aux1.lval.keyword = pl_unreserved_kw_strings
+										+ pl_unreserved_kw_offsets[kwnum];
+					tok1 = (unreserved_keywords[kwnum]).value;
 				}
 				else if (plpgsql_parse_word(aux1.lval.str,
 											core_yy.scanbuf + aux1.lloc,
@@ -386,12 +315,14 @@ plpgsql_yylex(void)
 									   &aux1.lval.word))
 					tok1 = T_DATUM;
 				else if (!aux1.lval.word.quoted &&
-						 (kw = ScanKeywordLookup(aux1.lval.word.ident,
-												 unreserved_keywords,
-												 num_unreserved_keywords)))
+						 (kwnum = ScanKeywordLookupOffset(aux1.lval.word.ident,
+												 pl_unreserved_kw_strings,
+												 pl_unreserved_kw_offsets,
+												 num_unreserved_keywords)) >= 0)
 				{
-					aux1.lval.keyword = kw->name;
-					tok1 = kw->value;
+					aux1.lval.keyword = pl_unreserved_kw_strings
+										+ pl_unreserved_kw_offsets[kwnum];
+					tok1 = (unreserved_keywords[kwnum]).value;
 				}
 				else
 					tok1 = T_WORD;
diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h b/src/pl/plpgsql/src/pl_unreserved_kwlist.h
new file mode 100644
index 0000000000..5ad464b196
--- /dev/null
+++ b/src/pl/plpgsql/src/pl_unreserved_kwlist.h
@@ -0,0 +1,107 @@
+/*-------------------------------------------------------------------------
+ *
+ * pl_unreserved_kwlist.h
+ *
+ * The keyword lists are kept in their own source files for use by automatic
+ * tools.  The exact representation of a keyword is determined by the
+ * PG_KEYWORD macro, which is not defined in this file; it can be
+ * defined by the caller for special purposes.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/pl/plpgsql/src/pl_unreserved_kwlist.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+
+/*
+ * List of (keyword-name, keyword-token-value) pairs.
+ *
+ * !!WARNING!!: This list must be sorted, because binary
+ *		 search is used to locate entries.
+ */
+
+/* name, value, category */
+PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
+PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
+PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
+PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
+PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
+PG_KEYWORD("call", K_CALL, UNRESERVED_KEYWORD)
+PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
+PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
+PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
+PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
+PG_KEYWORD("commit", K_COMMIT, UNRESERVED_KEYWORD)
+PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
+PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
+PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
+PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
+PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
+PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
+PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
+PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
+PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
+PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
+PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
+PG_KEYWORD("do", K_DO, UNRESERVED_KEYWORD)
+PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
+PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
+PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
+PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
+PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
+PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
+PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
+PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
+PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
+PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
+PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
+PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
+PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
+PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
+PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
+PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
+PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
+PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
+PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
+PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
+PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
+PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
+PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
+PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
+PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
+PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
+PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
+PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
+PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
+PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
+PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
+PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
+PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
+PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
+PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
+PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
+PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
+PG_KEYWORD("reset", K_RESET, UNRESERVED_KEYWORD)
+PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
+PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
+PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
+PG_KEYWORD("rollback", K_ROLLBACK, UNRESERVED_KEYWORD)
+PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
+PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
+PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
+PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
+PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
+PG_KEYWORD("set", K_SET, UNRESERVED_KEYWORD)
+PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
+PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
+PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
+PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
+PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
+PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
+PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
+PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
+PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
+PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
diff --git a/src/tools/gen_keywords.pl b/src/tools/gen_keywords.pl
new file mode 100644
index 0000000000..6bd069ef3a
--- /dev/null
+++ b/src/tools/gen_keywords.pl
@@ -0,0 +1,139 @@
+#----------------------------------------------------------------------
+#
+# gen_keywords.pl
+#	Perl script that generates *kwlist_d.h from a given *kwlist.h
+#	keyword list file.  These headers are then included into files that
+#	call ScanKeywordLookup() on that keyword list.  The keyword name is
+#	is represented as an offset into a single string.
+#
+# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/tools/gen_keywords.pl
+#
+#----------------------------------------------------------------------
+
+
+my $kw_input_file;
+my $output_path = '';
+my $prefix;
+
+# Process command line switches.
+while (@ARGV)
+{
+	my $arg = shift @ARGV;
+	if ($arg !~ /^-/)
+	{
+		$kw_input_file = $arg;
+	}
+	elsif ($arg =~ /^-o/)
+	{
+		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
+	}
+	elsif ($arg =~ /^-p/)
+	{
+		$prefix = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
+	}
+	else
+	{
+		usage();
+	}
+}
+
+# Sanity check arguments.
+die "No input file.\n" if !$kw_input_file;
+
+# Make sure output_path ends in a slash.
+if ($output_path ne '' && substr($output_path, -1) ne '/')
+{
+	$output_path .= '/';
+}
+
+$kw_input_file =~ /((\w*)kwlist)\.h/;
+my $base_filename = $1;
+$prefix = $2 if !defined $prefix;
+my $kw_def_file = $output_path . $base_filename . '_d.h';
+
+open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!";
+open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!";
+
+# Opening boilerplate for keyword definition header.
+printf $kwdef <<EOM, $base_filename, uc $base_filename, uc $base_filename;
+/*-------------------------------------------------------------------------
+ *
+ * %s_d.h
+ *    List of keywords represented as a keyword string and offsets into it.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES
+ *  ******************************
+ *  *** DO NOT EDIT THIS FILE! ***
+ *  ******************************
+ *
+ *  It has been GENERATED by src/tools/gen_keywords.pl
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef %s_D_H
+#define %s_D_H
+
+EOM
+
+
+my $name;
+my $value;
+my $category;
+my @keywords;
+my $offset = 0;
+
+printf $kwdef "static const uint16 %skw_offsets[] = {\n\t", $prefix;
+
+while (<$kif>)
+{
+	if (/^PG_KEYWORD\("(\w+)",\s*\w+,\s*\w+\)/)
+	{
+		$name = $1;
+		push @keywords, $name;
+
+		# Emit ScanKeyword macros with numerical offsets instead of text.
+		print $kwdef "$offset,\n\t";
+
+		# Calculate the cumulative offset of the next keyword,
+		# taking into account the null terminator.
+		$offset += length($name) + 1;
+	}
+}
+
+print $kwdef "};\n\n";
+
+# Error out if the keyword names are not in ASCII order.
+for my $i (0..$#keywords - 1)
+{
+	die qq|The keyword "$keywords[$i + 1]" is out of order in $kw_input_file|
+	  if ($keywords[$i] cmp $keywords[$i + 1]) >= 0;
+}
+
+# Now generate the keyword string.
+printf $kwdef qq|static const char %skw_strings[] =\n\t"|, $prefix;
+print $kwdef join qq|\\0"\n\t"|, @keywords;
+print $kwdef qq|";\n\n|;
+printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_D_H */\n", uc $base_filename;
+
+
+sub usage
+{
+	die <<EOM;
+Usage: gen_keywords.pl [options] header...
+
+Options:
+    -o               output path
+    -p               optional prefix for generated data structures
+
+gen_keywords.pl transforms a list of keywords into a array of offsets
+into a single string.
+
+EOM
+}
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 0b7cdf8dd5..d0a9505b07 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -411,6 +411,16 @@ sub GenerateFiles
 		chdir('../../..');
 	}
 
+	if (IsNewer(
+			'src/pl/plpgsql/src/pl_unreserved_kwlist_d.h',
+			'src/pl/plpgsql/src/pl_unreserved_kwlist.h'))
+	{
+		print "Generating pl_unreserved_kwlist_d.h...\n";
+		chdir('src/pl/plpgsql/src');
+		system('perl ../../../tools/gen_keywords.pl pl_unreserved_kwlist.h');
+		chdir('../../../..');
+	}
+
 	if (IsNewer(
 			'src/interfaces/ecpg/preproc/preproc.y',
 			'src/backend/parser/gram.y'))

Reply via email to