Re: inconsistency and inefficiency in setup_conversion()

2018-10-02 Thread John Naylor
On 10/2/18, Michael Paquier  wrote:
> v4 does not apply anymore.  I am moving this patch to next commit fest,
> waiting on author.

v5 attached.

-John Naylor
From ea0a180bde325b0383ce7f0b3d48d1ce9e941393 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Mon, 2 Jul 2018 12:52:07 +0700
Subject: [PATCH v5 1/2] Add pg_language lookup.

This didn't seem worth doing before, but an upcoming commit will add
88 entries to pg_proc whose languge is 'c'. In addition, we can
now use 'internal' in Gen_fmgr.pl instead of looking up the OID.
This simplifies that script and its Makefile a bit.
---
 doc/src/sgml/bki.sgml|  6 +-
 src/backend/catalog/genbki.pl|  8 +++
 src/backend/utils/Gen_fmgrtab.pl |  9 ++-
 src/backend/utils/Makefile   |  8 +--
 src/include/catalog/pg_proc.dat  | 98 
 src/include/catalog/pg_proc.h|  2 +-
 src/tools/msvc/Solution.pm   |  4 +-
 7 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 0fb309a1bd..176fa8bf4d 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -408,8 +408,8 @@
 that's error-prone and hard to understand, so for frequently-referenced
 catalogs, genbki.pl provides mechanisms to write
 symbolic references instead.  Currently this is possible for references
-to access methods, functions, operators, opclasses, opfamilies, and
-types.  The rules are as follows:
+to access methods, functions, languages, operators, opclasses, opfamilies,
+and types.  The rules are as follows:

 

@@ -419,7 +419,7 @@
   Use of symbolic references is enabled in a particular catalog column
   by attaching BKI_LOOKUP(lookuprule)
   to the column's definition, where lookuprule
-  is pg_am, pg_proc,
+  is pg_am, pg_proc, pg_language,
   pg_operator, pg_opclass,
   pg_opfamily, or pg_type.
   BKI_LOOKUP can be attached to columns of
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 649200260a..f321662695 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -167,6 +167,13 @@ foreach my $row (@{ $catalog_data{pg_am} })
 	$amoids{ $row->{amname} } = $row->{oid};
 }
 
+# language OID lookup
+my %langoids;
+foreach my $row (@{ $catalog_data{pg_language} })
+{
+	$langoids{ $row->{lanname} } = $row->{oid};
+}
+
 # opclass OID lookup
 my %opcoids;
 foreach my $row (@{ $catalog_data{pg_opclass} })
@@ -241,6 +248,7 @@ foreach my $row (@{ $catalog_data{pg_type} })
 # Map catalog name to OID lookup.
 my %lookup_kind = (
 	pg_am   => \%amoids,
+	pg_language => \%langoids,
 	pg_opclass  => \%opcoids,
 	pg_operator => \%operoids,
 	pg_opfamily => \%opfoids,
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index fa30436895..b288c5e5fd 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -59,6 +59,8 @@ die "No include path; you must specify -I.\n" if !$include_path;
 # Note: We pass data file names as arguments and then look for matching
 # headers to parse the schema from. This is backwards from genbki.pl,
 # but the Makefile dependencies look more sensible this way.
+# We currently only need pg_proc, but retain the possibility of reading
+# more than one data file.
 my %catalogs;
 my %catalog_data;
 foreach my $datfile (@input_files)
@@ -78,13 +80,10 @@ foreach my $datfile (@input_files)
 	$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 0);
 }
 
-# Fetch some values for later.
+# Fetch a value for later.
 my $FirstBootstrapObjectId =
   Catalog::FindDefinedSymbol('access/transam.h', $include_path,
 	'FirstBootstrapObjectId');
-my $INTERNALlanguageId =
-  Catalog::FindDefinedSymbolFromData($catalog_data{pg_language},
-	'INTERNALlanguageId');
 
 # Collect certain fields from pg_proc.dat.
 my @fmgr = ();
@@ -94,7 +93,7 @@ foreach my $row (@{ $catalog_data{pg_proc} })
 	my %bki_values = %$row;
 
 	# Select out just the rows for internal-language procedures.
-	next if $bki_values{prolang} ne $INTERNALlanguageId;
+	next if $bki_values{prolang} ne 'internal';
 
 	push @fmgr,
 	  {
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index e797539d09..da40f6b6c4 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -31,15 +31,11 @@ generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp $(top_
 
 $(SUBDIRS:%=%-recursive): fmgr-stamp errcodes.h
 
-FMGR_DATA := $(addprefix $(top_srcdir)/src/include/catalog/,\
-	pg_language.dat pg_proc.dat \
-	)
-
 # fmgr-stamp records the last time we ran Gen_fmgrtab.pl.  We don't rely on
 # the timestamps of the individual output files, because the Perl script
 # won't update them if they didn't change (to avoid unnecessary recompiles).
-fmgr-stamp: Gen_fmgrtab.pl $(catalogdir)/Catalog.pm $(FMGR_DATA) $(top_srcdir)/src/include/access/transam.h
-	$(PER

Re: Sync ECPG scanner with core

2018-10-01 Thread John Naylor
On 9/30/18, Michael Paquier  wrote:
> It would be nice to add this patch to the next commit fest:
> https://commitfest.postgresql.org/20/

Done, thanks.

-John Naylor



Re: Sync ECPG scanner with core

2018-10-02 Thread John Naylor
It seems the pach tester is confused by the addition of the
demonstration diff file. I'm reattaching just the patchset to see if
it turns green.

-John Naylor
From 107e3c8a0b65b0196ea4370a724c8b2a1b0fdf79 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sun, 30 Sep 2018 12:51:41 +0700
Subject: [PATCH v1 1/4] First pass at syncing ECPG scanner with the core
 scanner.

Adjust whitespace and formatting, clean up some comments, and move
the block of whitespace rules.
---
 src/backend/parser/scan.l |   2 +-
 src/fe_utils/psqlscan.l   |   2 +-
 src/interfaces/ecpg/preproc/pgc.l | 773 --
 3 files changed, 408 insertions(+), 369 deletions(-)

diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 950b8b8591..a2454732a1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -192,7 +192,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
  *
  * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
  */
 
 space			[ \t\n\r\f]
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index fdf49875a7..25253b54ea 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -151,7 +151,7 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
  *
  * XXX if you change the set of whitespace characters, fix scanner_isspace()
- * to agree, and see also the plpgsql lexer.
+ * to agree.
  */
 
 space			[ \t\n\r\f]
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 0792118cfe..b96f17ca20 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -108,16 +108,19 @@ static struct _if_value
  * We use exclusive states for quoted strings, extended comments,
  * and to eliminate parsing troubles for numeric strings.
  * Exclusive states:
- *	 bit string literal
- *	 extended C-style comments in C
- *	 extended C-style comments in SQL
- *	 delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
- *	 hexadecimal numeric string - thomas 1997-11-16
- *	 standard quoted strings - thomas 1997-07-30
- *	 standard quoted strings in C - michael
- *	 extended quoted strings (support backslash escape sequences)
- *	 national character quoted strings
+ *   bit string literal
+ *   extended C-style comments in C
+ *   extended C-style comments in SQL
+ *   delimited identifiers (double-quoted identifiers)
+ *  
+ *   hexadecimal numeric string
+ *   standard quoted strings
+ *   extended quoted strings (support backslash escape sequences)
+ *   national character quoted strings
+ *   standard quoted strings in C
  *   $foo$ quoted strings
+ *  
+ *  
  *   quoted identifier with Unicode escapes
  *   quoted string with Unicode escapes
  */
@@ -138,6 +141,48 @@ static struct _if_value
 %x xui
 %x xus
 
+/*
+ * In order to make the world safe for Windows and Mac clients as well as
+ * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
+ * sequence will be seen as two successive newlines, but that doesn't cause
+ * any problems.  SQL-style comments, which start with -- and extend to the
+ * next newline, are treated as equivalent to a single whitespace character.
+ *
+ * NOTE a fine point: if there is no newline following --, we will absorb
+ * everything to the end of the input as a comment.  This is correct.  Older
+ * versions of Postgres failed to recognize -- as a comment if the input
+ * did not end with a newline.
+ *
+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix ecpg_isspace()
+ * to agree.
+ */
+
+space			[ \t\n\r\f]
+horiz_space		[ \t\f]
+newline			[\n\r]
+non_newline		[^\n\r]
+
+comment			("--"{non_newline}*)
+
+whitespace		({space}+|{comment})
+
+/*
+ * SQL requires at least one newline in the whitespace separating
+ * string literals that are to be concatenated.  Silly, but who are we
+ * to argue?  Note that {whitespace_with_newline} should not have * after
+ * it, whereas {whitespace} should generally have a * after it...
+ */
+
+horiz_whitespace		({horiz_space}|{comment})
+whitespace_with_newline	({horiz_whitespace}*{newline}{whitespace}*)
+
+quote			'
+quotestop		{quote}{whitespace}*
+quotecontinue	{quote}{whitespace_with_newline}{quote}
+quotefail		{quote}{whitespace}*"-"
+
 /* Bit string
  */
 xbstart			[bB]{quote}
@@ -216,17 +261,17 @@ xdcinside		({xdcqq}|{xdcqdq}|{xdcother})
  * The "extended comment" syntax closely resembles allowable operator syntax.
  * The tricky part here is to get lex to recognize a string starting with
  * slash-star as a comment, when interpreting it as an operator would produce
- * a longer match --- remember lex will prefer a longer match!	

Re: WIP: Avoid creation of the free space map for small tables

2018-10-07 Thread John Naylor
On 10/6/18, Thomas Munro  wrote:
> On Sat, Oct 6, 2018 at 7:47 AM John Naylor  wrote:
>> A while back, Robert Haas noticed that the space taken up by very
>> small tables is dominated by the FSM [1]. Tom suggested that we could
>> prevent creation of the FSM until the heap has reached a certain
>> threshold size [2]. Attached is a WIP patch to implement that. I've
>> also attached a SQL script to demonstrate the change in behavior for
>> various scenarios.
>
> Hi John,
>
> You'll need to tweak the test in contrib/pageinspect/sql/page.sql,
> because it's currently asserting that there is an FSM on a small table
> so make check-world fails.

Whoops, sorry about that; the attached patch passes make check-world.
While looking into that, I also found a regression: If the cached
target block is the last block in the relation and there is no free
space, that block will be tried twice. That's been fixed as well.

Thanks,
-John Naylor
From 49c80647737edfaae82014f2c94a84114f3688f6 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sun, 7 Oct 2018 21:34:23 +0700
Subject: [PATCH v2] Avoid creation of the free space map for small tables.

The FSM isn't created if the heap has fewer than 10 blocks. If the last
known good block has insufficient space, try every block before extending
the heap.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.

If the heap tuples are all deleted, the FSM stays but has no leaf blocks
(same as before). Although it exists, it won't be re-extended until
the heap passes the threshold again.
---
 contrib/pageinspect/expected/page.out | 77 +-
 contrib/pageinspect/sql/page.sql  | 33 +---
 src/backend/access/heap/hio.c | 96 +--
 src/backend/storage/freespace/freespace.c | 35 -
 src/include/storage/freespace.h   |  4 +
 5 files changed, 169 insertions(+), 76 deletions(-)

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,1) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 
8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0 
 ---
   8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
- fsm_1 

-  8192
-(1 row)
-
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
+ERROR:  block number 10 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
  vm_0 
 --
  8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test1"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+ERROR:  block number 1 is out of range for relation "test_rel_forks"
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
 ERROR:  relation "xxx" does not exist
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
 ERROR:  invalid fork name
 HINT:  Valid fork names are "main", "fsm", "vm", and "init".
-SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
+SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
+ fsm_page_contents 
+---
+ 0: 192   +
+ 1: 192   +
+ 3: 192   +
+ 7: 192   +
+ 15: 192  +
+ 31: 192  +
+ 63: 192  +
+ 127: 192 +
+ 255: 192 +
+ 511: 192 +
+ 1023: 192+
+ 2047: 192+
+ 4095: 192+
+ fp_next_slot: 0  +
+ 
+(1 row)
+
+SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('

Re: generating bootstrap entries for array types

2018-09-20 Thread John Naylor
On 9/20/18, Dagfinn Ilmari Mannsåker  wrote:
> Hi again John,

Hi Ilmari,
My apologies -- your messages ended up in my spam folder. I only saw
this thread again when Tom replied. Thanks for the review! I'll keep a
look out for any future messages.

-John Naylor



Re: generating bootstrap entries for array types

2018-09-20 Thread John Naylor
On 9/21/18, Tom Lane  wrote:
> I did some other minor hacking (mostly, fixing the documentation)
> and pushed it.

Thank you.

-John Naylor



Re: doc - add missing documentation for "acldefault"

2018-09-19 Thread John Naylor
On 9/19/18, Tom Lane  wrote:
> However, I don't object to documenting any function that has its
> own pg_description string.

Speaking of, that description string seems to have been neglected.
I've attached a remedy for that.

-John Naylor
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 860571440a..6b7088b9ce 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2073,7 +2073,7 @@
 { oid => '1365', descr => 'make ACL item',
   proname => 'makeaclitem', prorettype => 'aclitem',
   proargtypes => 'oid oid text bool', prosrc => 'makeaclitem' },
-{ oid => '3943', descr => 'TODO',
+{ oid => '3943', descr => 'show default privileges, for use by the information schema',
   proname => 'acldefault', prorettype => '_aclitem', proargtypes => 'char oid',
   prosrc => 'acldefault_sql' },
 { oid => '1689',


Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-27 Thread John Naylor
On 12/27/18, Tom Lane  wrote:
> diff --git a/src/tools/gen_keywords.pl b/src/tools/gen_keywords.pl
> + elsif ($arg =~ /^-o/)
> + {
> + $output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
> + }
>
> My perl-fu is not great, but it looks like this will accept arguments
> like "-ofilename", which is a style I don't like at all.  I'd rather
> either insist on the filename being separate or write the switch like
> "-o=filename".  Also, project style when taking both forms is usually
> more like
>   -o filename
>   --offset=filename

This style was cargo-culted from the catalog scripts. I can settle on
just the first form if you like.

> +$kw_input_file =~ /((\w*)kwlist)\.h/;
> +my $base_filename = $1;
> +$prefix = $2 if !defined $prefix;
>
> Hmm, what happens if the input filename does not end with "kwlist.h"?

If that's a maintainability hazard, I can force every invocation to
provide a prefix instead.

> I looked very briefly at v4-0002, and I'm not very convinced about
> the "middle" aspect of that optimization.  It seems unmaintainable,
> plus you've not exhibited how the preferred keywords would get selected
> in the first place (wiring them into the Perl script is surely not
> acceptable).

What if the second argument of the macro held this info? Something like:

PG_KEYWORD("security", FULL_SEARCH, UNRESERVED_KEYWORD)
PG_KEYWORD("select", OPTIMIZE, SELECT, RESERVED_KEYWORD)

with a warning emitted if more than one keyword per range has
OPTIMIZE. That would require all keyword lists to have that second
argument, but selecting a preferred keyword would be optional.

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-27 Thread John Naylor
On 12/26/18, John Naylor  wrote:
> On 12/26/18, Robert Haas  wrote:
>> I wonder if we could do something really simple like a lookup based on
>> the first character of the scan keyword. It looks to me like there are
>> 440 keywords right now, and the most common starting letter is 'c',
>> which is the first letter of 51 keywords. So dispatching based on the
>> first letter clips at least 3 steps off the binary search.  I don't
>> know whether that's enough to be worthwhile, but it's probably pretty
>> simple to implement.

> I agree it'd be fairly simple to do, and might raise the bar for doing
> anything more complex.

I went ahead and did this for v4, but split out into a separate patch.
In addition, I used a heuristic to bypass binary search for the most
common keywords. Normally, the middle value is computed
mathematically, but I found that in each range of keywords beginning
with the same letter, there is often 1 or 2 common keywords that are
good first guesses, such as select, from, join, limit, where. I taught
the lookup to try those first, and then compute subsequent steps the
usual way.

Barring additional bikeshedding on 0001, I'll plan on implementing
offset-based lookup for the other keyword types and retire the old
ScanKeyword. Once that's done, we can benchmark and compare with the
optimizations in 0002.

-John Naylor
From ed78318df69bd3fa1543f3dc04d0a6ca9794a81c Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Wed, 26 Dec 2018 21:28:27 -0500
Subject: [PATCH v4 1/2] WIP Use offset-based keyword lookup.

(For WIP, only for plpgsql unreserved keywords)

Replace binary search over an array of ScanKeyword structs with a binary
search over an array of offsets into a keyword string. Access auxillary
data only after a keyword hit. This has better locality of reference and
a smaller memory footprint.
---
 src/common/keywords.c |  55 +
 src/include/common/keywords.h |  12 ++
 src/pl/plpgsql/src/.gitignore |   1 +
 src/pl/plpgsql/src/Makefile   |  13 +-
 src/pl/plpgsql/src/pl_scanner.c   | 128 +---
 src/pl/plpgsql/src/pl_unreserved_kwlist.h | 108 +
 src/tools/gen_keywords.pl | 137 ++
 src/tools/msvc/Solution.pm|  10 ++
 8 files changed, 361 insertions(+), 103 deletions(-)
 create mode 100644 src/pl/plpgsql/src/pl_unreserved_kwlist.h
 create mode 100644 src/tools/gen_keywords.pl

diff --git a/src/common/keywords.c b/src/common/keywords.c
index 0c0c794c68..b0e5a721b6 100644
--- a/src/common/keywords.c
+++ b/src/common/keywords.c
@@ -112,3 +112,58 @@ ScanKeywordLookup(const char *text,
 
 	return NULL;
 }
+
+/* Like ScanKeywordLookup, but uses offsets into a keyword string. */
+int
+ScanKeywordLookupOffset(const char *string_to_lookup,
+		const char *kw_strings,
+		const uint16 *kw_offsets,
+		int num_keywords)
+{
+	int			len,
+i;
+	char		word[NAMEDATALEN];
+	const uint16 *low;
+	const uint16 *high;
+
+	len = strlen(string_to_lookup);
+	/* We assume all keywords are shorter than NAMEDATALEN. */
+	if (len >= NAMEDATALEN)
+		return -1;
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+	 * produce the wrong translation in some locales (eg, Turkish).
+	 */
+	for (i = 0; i < len; i++)
+	{
+		char		ch = string_to_lookup[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch += 'a' - 'A';
+		word[i] = ch;
+	}
+	word[len] = '\0';
+
+	/*
+	 * Now do a binary search using plain strcmp() comparison.
+	 */
+	low = kw_offsets;
+	high = kw_offsets + (num_keywords - 1);
+	while (low <= high)
+	{
+		const uint16 *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(kw_strings + *middle, word);
+		if (difference == 0)
+			return middle - kw_offsets;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return -1;
+}
diff --git a/src/include/common/keywords.h b/src/include/common/keywords.h
index 0b31505b66..201d0fcc7a 100644
--- a/src/include/common/keywords.h
+++ b/src/include/common/keywords.h
@@ -28,6 +28,13 @@ typedef struct ScanKeyword
 	int16		category;		/* see codes above */
 } ScanKeyword;
 
+/* Payload data for keywords */
+typedef struct ScanKeywordAux
+{
+	int16		value;			/* grammar's token code */
+	char		category;		/* see codes above */
+} ScanKeywordAux;
+
 #ifndef FRONTEND
 extern PGDLLIMPORT const ScanKeyword ScanKeywords[];
 extern PGDLLIMPORT const int NumScanKeywords;
@@ -41,4 +48,9 @@ extern const ScanKeyword *ScanKeywordLookup(const char *text,
   const ScanKeyword *keywords,
   int num_keywords);
 
+int ScanKeywordLookupOffset(const char *string_to_lookup,
+		const char *kw_strings,
+		const uint16 *kw_offsets,
+		int num_keywords);
+
 #endif			/* KEYWORDS_H */
diff --git a/src/pl/plpgsql/src/.gitignore b/src/pl/plpgsql/src/.gitignore

Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-29 Thread John Naylor
I think 0001 with complete keyword lookup replacement is in decent
enough shape to post. Make check-world passes. A few notes and
caveats:

-I added an --extern option to the script for the core keyword
headers. This also capitalizes variables.
-ECPG keyword lookup is a bit different in that the ecpg and sql
lookup functions are wrapped in a single function rather than called
separately within pgc.l. It might be worth untangling that, but I have
not done so.
-Some variable names haven't changed even though now they're only
referring to token values, which might be confusing.
-I haven't checked if I need to install the generated headers.
-I haven't measured performance or binary size. If anyone is excited
enough to do that, great, otherwise I'll do that as time permits.
-There are probably makefile bugs.

Now, on to previous review points:

On 12/27/18, Tom Lane  wrote:
> +/* Payload data for keywords */
> +typedef struct ScanKeywordAux
> +{
> + int16   value;  /* grammar's token code */
> + charcategory;   /* see codes above */
> +} ScanKeywordAux;
>
> There isn't really any point in changing category to "char", because
> alignment considerations will mandate that sizeof(ScanKeywordAux) be
> a multiple of 2 anyway.  With some compilers we could get around that
> with a pragma to force non-aligned storage, but doing so would be a
> net loss on most non-Intel architectures.

Reverted, especially since we can skip the struct entirely for some
callers as you pointed out below.

> diff --git a/src/pl/plpgsql/src/.gitignore b/src/pl/plpgsql/src/.gitignore
> @@ -1,3 +1,4 @@
> +/*kwlist_d.h
>
> Not a fan of using wildcards in .gitignore files, at least not when
> there's just one or two files you intend to match.

Removed.

>  # Force these dependencies to be known even without dependency info built:
> -pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o:
> plpgsql.h pl_gram.h plerrcodes.h
> +pl_gram.o pl_handler.o pl_comp.o pl_exec.o pl_funcs.o pl_scanner.o:
> plpgsql.h pl_gram.h plerrcodes.h pl_unreserved_kwlist_d.h
>
> Hm, do we really need any more than pl_scanner.o to depend on that header?

I think you're right, so separated into a new rule.

> +# Parse keyword header for names.
> +my @keywords;
> +while (<$kif>)
> +{
> + if (/^PG_KEYWORD\("(\w+)",\s*\w+,\s*\w+\)/)
>
> This is assuming more than it should about the number of arguments for
> PG_KEYWORD, as well as what's in them.  I think it'd be sufficient to
> match like this:
>
>   if (/^PG_KEYWORD\("(\w+)",/)

...and...

> diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h
> b/src/pl/plpgsql/src/pl_unreserved_kwlist.h
> +/* name, value, category */
> +PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
>
> Likewise, I'd just have these be two-argument macros.  There's no reason
> for the various kwlist.h headers to agree on the number of payload
> arguments for PG_KEYWORD.

Both done, however...

> +/* FIXME: Have to redefine this symbol for the WIP. */
> +#undef PG_KEYWORD
> +#define PG_KEYWORD(kwname, value, category) {value, category},
> +
> +static const ScanKeywordAux unreserved_keywords[] = {
> +#include "pl_unreserved_kwlist.h"
>  };
>
> The category isn't useful for this keyword list, so couldn't you
> just make this an array of uint16 values?

Yes, this works for the unreserved keywords. The reserved ones still
need the aux struct to work with the core scanner, even though scan.l
doesn't reference category either. This has the consequence that we
can't dispense with category, e.g.:

PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)

...unless we do without the struct entirely, but that's not without
disadvantages as you mentioned.

I decided to export the struct (rather than just int16 for category)
to the frontend, even though we have to set the token values to zero,
since there might someday be another field of use to the frontend.
Also to avoid confusion.

> I don't mind allowing the prefix to default to empty.  What I was
> concerned about was that base_filename could end up undefined.
> Probably the thing to do is to generate base_filename separately,
> say by stripping any initial ".*/" sequence and then substitute
> '_' for '.'.

I removed assumptions about the filename.

> +Options:
> +-o   output path
> +-p   optional prefix for generated data structures
>
> This usage message is pretty vague about how you write the options
> (cf gripe above).

I tried it like this:

Usage: gen_keywords.pl [--output/-o ] [--prefix/-p ] input_file
--output  Output directory
--prefix  String prepended to var names in the output file


On 12/27/18, Andrew Dunstan  wrote

Re: WIP: Avoid creation of the free space map for small tables

2019-01-16 Thread John Naylor
On Wed, Jan 16, 2019 at 8:41 AM Amit Kapila  wrote:
>
> On Fri, Jan 11, 2019 at 3:54 AM John Naylor  
> wrote:
> 1.
> Commit message:
> > Any pages with wasted free space become visible at next relation extension, 
> > so we still control table bloat.
>
> I think the free space will be available after the next pass of
> vacuum, no?  How can relation extension make it available?

To explain, this diagram shows the map as it looks for different small
table sizes:

0123
A
NA
ANA
NANA

So for a 3-block table, the alternating strategy never checks block 1.
Any free space block 1 has acquired via delete-and-vacuum will become
visible if it extends to 4 blocks. We are accepting a small amount of
bloat for improved performance, as discussed. Would it help to include
this diagram in the README?

> 2.
> +2. For very small heap relations, the FSM would be relatively large and
> +wasteful, so as of PostgreSQL 12 we refrain from creating the FSM for
> +heaps with HEAP_FSM_CREATION_THRESHOLD pages or fewer, both to save space
> +and to improve performance.  To locate free space in this case, we simply
> +iterate over the heap, trying alternating pages in turn.  There may be some
> +wasted free space in this case, but it becomes visible again upon next
> +relation extension.
>
> a. Again, how space becomes available at next relation extension.
> b. I think there is no use of mentioning the version number in the
> above comment, this code will be present from PG-12, so one can find
> out from which version this optimization is added.

It fits with the reference to PG 8.4 earlier in the document. I chose
to be consistent, but to be honest, I'm not much in favor of a lot of
version references in code/READMEs.

> 3.
> BlockNumber
>  RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
> Size oldSpaceAvail, Size spaceNeeded)
> {
> ..
> + /* First try the local map, if it exists. */
> + if (oldPage < fsm_local_map.nblocks)
> + {
> ..
> }
>
> The comment doesn't appear to be completely in sync with the code.
> Can't we just check whether "fsm_local_map.nblocks >  0", if so, we
> can use a macro for the same? I have changed this in the attached
> patch, see what you think about it.  I have used it at a few other
> places as well.

The macro adds clarity, so I'm in favor of using it.

> 4.
> + * When we initialize the map, the whole heap is potentially available to
> + * try.  If a caller wanted to reset the map after another backend extends
> + * the relation, this will only flag new blocks as available.  No callers
> + * do this currently, however.
> + */
> +static void
> +fsm_local_set(Relation rel, BlockNumber curr_nblocks)
> {
> ..
> + if (blkno >= fsm_local_map.nblocks + 2)
> ..
> }
>
>
> The way you have tried to support the case as quoted in the comment
> "If a caller wanted to reset the map after another backend extends .."
> doesn't appear to be solid and I am not sure if it is correct either.

I removed this case in v9 and you objected to that as unnecessary, so
I reverted it for v10.

> We don't have any way to test the same, so I suggest let's try to
> simplify the case w.r.t current requirement of this API.  I think we
> should
> some simple logic to try every other block like:
>
> + blkno = cur_nblocks - 1;
> + while (true)
> + {
> + fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
> + if (blkno >= 2)
> + blkno -= 2;
> + else
> + break;
> + }
>
> I have changed this in the attached patch.

Fine by me.

> 5.
> +/*
> + * Search the local map for an available block to try, in descending order.
> + *
> + * For use when there is no FSM.
> + */
> +static BlockNumber
> +fsm_local_search(void)
>
> We should give a brief explanation as to why we try in descending
> order.  I have added some explanation in the attached patch, see what
> you think about it?
>
> Apart from the above, I have modified a few comments.

I'll include these with some grammar corrections in the next version.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-16 Thread John Naylor
On Wed, Jan 16, 2019 at 11:40 AM John Naylor
 wrote:
 On Wed, Jan 16, 2019 at 8:41 AM Amit Kapila
 wrote:
  can use a macro for the same? I have changed this in the attached
  patch, see what you think about it.  I have used it at a few other
  places as well.

 The macro adds clarity, so I'm in favor of using it.

It just occured to me that the style FSM_LOCAL_MAP_EXISTS seems more
common for macros that refer to constants, and FSMLocalMapExists for
expressions, but I've only seen a small amount of the code base. Do we
have a style preference here, or is it more a matter of matching the
surrounding code?


--
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: explain plans with information about (modified) gucs

2019-01-16 Thread John Naylor
> [v5]

Hi Tomas,
Peter suggested upthread to use 'settings' rather than 'gucs' for the
explain option (and output?), and you seemed to agree. Are you going
to include that in a future version? Speaking of the output, v5's
default text doesn't match that of formatted text ('GUCs' / 'GUC').



Re: WIP: Avoid creation of the free space map for small tables

2019-01-17 Thread John Naylor
On Wed, Jan 16, 2019 at 10:35 PM Amit Kapila  wrote:
> Yes, I think it would be good if you can explain the concept of
> local-map with the help of this example.

> Then let's not add a reference to the version number in this case.  I

Okay, done in v14. I kept your spelling of the new macro. One minor
detail added: use uint8 rather than char for the local map array. This
seems to be preferred, especially in this file.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 38bc2d9b4ae3f0d1ab6d21c54ed638c5aee6d637 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Thu, 17 Jan 2019 12:25:30 -0500
Subject: [PATCH v14 1/2] Avoid creation of the free space map for small heap 
 relations.

Previously, all heaps had FSMs. For very small tables, this means that
the FSM took up more space than the heap did. This is wasteful, so now
we refrain from creating the FSM for heaps with 4 pages or fewer. If the
last known target block has insufficient space, we still try to insert
into some other page before before giving up and extending the relation,
since doing otherwise leads to table bloat. Testing showed that trying
every page penalized performance slightly, so we compromise and try
every other page. This way, we visit at most two pages. Any pages with
wasted free space become visible at next relation extension, so we still
control table bloat. As a bonus, directly attempting one or two pages
can even be faster than consulting the FSM would have been.
---
 contrib/pageinspect/expected/page.out |  77 +++---
 contrib/pageinspect/sql/page.sql  |  33 +--
 doc/src/sgml/storage.sgml |  13 +-
 src/backend/access/brin/brin.c|   2 +-
 src/backend/access/brin/brin_pageops.c|  10 +-
 src/backend/access/heap/hio.c |  47 ++--
 src/backend/access/heap/vacuumlazy.c  |  17 +-
 src/backend/access/transam/xact.c |  14 ++
 src/backend/storage/freespace/README  |  32 ++-
 src/backend/storage/freespace/freespace.c | 272 +-
 src/backend/storage/freespace/indexfsm.c  |   6 +-
 src/include/storage/freespace.h   |   9 +-
 src/test/regress/expected/fsm.out |  62 +
 src/test/regress/parallel_schedule|   6 +
 src/test/regress/serial_schedule  |   1 +
 src/test/regress/sql/fsm.sql  |  46 
 16 files changed, 545 insertions(+), 102 deletions(-)
 create mode 100644 src/test/regress/expected/fsm.out
 create mode 100644 src/test/regress/sql/fsm.sql

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,1) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 
8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0 
 ---
   8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
- fsm_1 

-  8192
-(1 row)
-
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
+ERROR:  block number 10 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
  vm_0 
 --
  8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test1"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+ERROR:  block number 1 is out of range for relation "test_rel_forks"
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
 ERROR:  relation "xxx" does not exist
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
 ERROR:  invalid fork name
 HINT:  Valid fork names are "main", "fsm", "vm

Re: Delay locking partitions during INSERT and UPDATE

2019-01-18 Thread John Naylor
On 11/22/18, David Rowley  wrote:
> If required, such operations could LOCK TABLE the top partitioned
> table to block the DML operation. There's already a risk of similar
> deadlocks from such operations done on multiple separate tables when
> the order they're done is not the same as the order the tables are
> written in a query, although, in that case, the window for the
> deadlock is likely to be much smaller.

Is this something that would need documentation anywhere?

> With this done, the performance of an INSERT into a 10k partition
> partitioned table looks like:
>
> Setup:
> create table hashp (a int) partition by hash(a);
> select 'create table hashp'||x::Text || ' partition of hashp for
> values with (modulus 1, remainder '||x::text||');' from
> generate_Series(0,) x;
> \gexec
>
> hashp_insert.sql:
> \set p_a random(1,1000)
> insert into hashp values(:p_a);
>
> Results:
> $ psql -c "truncate hashp;" postgres && pgbench -n -f hashp_insert.sql
> -M prepared -c 4 -j 4 -T 60 postgres

I used a similar test, but with unlogged tables, and "-c 2", and got:

normal table: 32000tps
10k partitions / master: 82tps
10k partitions / patch: 7000tps

So far I haven't gotten quite as good performance as you and Tomas,
although it's still a ~85x improvement.

-John Naylor



Re: Delay locking partitions during INSERT and UPDATE

2019-01-19 Thread John Naylor
On Sat, Jan 19, 2019 at 10:59 AM Tomas Vondra
 wrote:
>
> On 1/19/19 12:05 AM, John Naylor wrote:
> > I used a similar test, but with unlogged tables, and "-c 2", and got:
> >
> > normal table: 32000tps
> > 10k partitions / master: 82tps
> > 10k partitions / patch: 7000tps
> >
> > So far I haven't gotten quite as good performance as you and Tomas,
> > although it's still a ~85x improvement.
>
> What hardware are you running the tests on? I wouldn't be surprised if
> you were hitting some CPU or I/O bottleneck, which we're not hitting.

9 year-old laptop, Core i3. Side note, I miswrote my test parameters
above -- should be "-c4 -j2".

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: Speeding up text_position_next with multibyte encodings

2019-01-14 Thread John Naylor
On Sun, Dec 23, 2018 at 9:33 AM Tomas Vondra
 wrote:
> So, what is the expected speedup in a "good/average" case? Do we have
> some reasonable real-world workload mixing these cases that could be
> used as a realistic benchmark?

Not sure about a realistic mix, but I investigated the tradeoffs.
First, using the test function Heikki posted upthread [1], I tried a 2
character needle needle with different haystack sizes, and looked for
the position where master and patch were roughly equal (average of 3,
within 1%). Beyond this point, the patch regresses. To keep it simple
I used UTF-8 only.

haystackposition
<=23   (patch always faster)
30  23
100 58
1000   ~550
100~55

For very short haystacks, the patch is always faster or regresses only
when the needle is close to the end. For longer haystacks, the patch
will be faster if the position searched for is less than ~55% of the
way to the end, slower if after. Next, I tested finding the position
of a 2 character needle in a couple positions towards the front of a
1000 character haystack, plus not present and at the end for
comparison. As seen earlier [1], the worst-case regression for this
haystack length was 4.4x slower for UTF-8, but that had a skip table
collision, and this time I used characters with no bytes in common.
Average of 3, with a loop count of 1,000,000:

UTF-8
pos.  masterpatch  diff
* 3880ms 144ms-96%
100   4440ms1410ms-68%
333   5700ms4280ms-25%
999   9370ms   12500ms 34%

EUC-KR
pos.  master patchdiff
* 3900ms 112ms-97%
100   4410ms1289ms-71%
333   5680ms3970ms-30%
999   9370ms   11600ms 24%

The patch is much faster than master when the needle is near the front
of a large haystack or not there at all.

The majority of cases are measurably faster, and the best case is at
least 20x faster. On the whole I'd say this patch is a performance win
even without further optimization. I'm marking it ready for committer.

[1] 
https://www.postgresql.org/message-id/05d95c5c-1fb7-29ea-1c5d-e7e941a0a14d%40iki.fi
--

-John Naylor



Re: explain plans with information about (modified) gucs

2019-01-21 Thread John Naylor
On Sun, Jan 20, 2019 at 2:31 PM Tomas Vondra
 wrote:
> Attached is v6 of the patch, adopting "settings" instead of "guc".

Ok, looks good. I tried changing config values with the .conf file,
alter system, and alter database, and tried a few queries with
auto_explain. I made a pass through the config parameters, and don't
see anything obviously left out. I have no comments on the source
code.

One thing stands out: For the docs on auto_explain, all other options
have "Only superusers can change this setting.", but log_settings
doesn't.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-21 Thread John Naylor
On Mon, Jan 21, 2019 at 6:32 AM Amit Kapila  wrote:
> So we won't allow transfer of FSM files if their size is below
> HEAP_FSM_CREATION_THRESHOLD.  What will be its behavior in link mode?
> It seems that the old files will remain there. Will it create any
> problem when we try to create the files via the new server, can you
> once test this case?

I tried upgrading in --link mode, and on the new cluster, enlarging
the table past the threshold causes a new FSM to be created as
expected.

> Also, another case to think in this regard is the upgrade for standby
> servers, if you read below paragraph from the user manual [1], you
> will see what I am worried about?
>
> "What this does is to record the links created by pg_upgrade's link
> mode that connect files in the old and new clusters on the primary
> server. It then finds matching files in the standby's old cluster and
> creates links for them in the standby's new cluster. Files that were
> not linked on the primary are copied from the primary to the standby.
> (They are usually small.)"
>
> [1] - https://www.postgresql.org/docs/devel/pgupgrade.html

Trying this, I ran into a couple problems. I'm probably doing
something wrong, but I can't help but think there's a pg_upgrade
bug/feature I'm unaware of:

I set up my test to have primary directory data1 and for the secondary
standby/data1. I instructed pg_upgrade to upgrade data1 into data1u,
and I tried the rsync recipe in the docs quoted above, and the
upgraded standby wouldn't go into recovery. While debugging that, I
found surprisingly that pg_upgrade also went further and upgraded
standby/data1 into standby/data1u. I tried deleting standby/data1u
before running the rsync command and still nothing. Because the
upgraded secondary is non-functional, I can't really answer your
question.

Not sure if this is normal, but the pg_upgraded new cluster no longer
had the replication slot. Re-adding it didn't allow my upgraded
secondary to go into recovery, either. (I made sure to copy the
recovery settings, so that can't be the problem)


--
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



is there a reason we don't xlog index free space?

2019-01-21 Thread John Naylor
I knew that XLogRecordPageWithFreeSpace() is only called by heapam.c,
but I figured indexes had their own logic to xlog free space. While
investigating something else I found that brin indexes, at least, on
the secondary have no FSMs. Is this deliberate?

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-23 Thread John Naylor
On Wed, Jan 23, 2019 at 7:09 AM Amit Kapila  wrote:
> I think the first two patches (a) removal of dead code in bootstrap
> and (b) the core patch to avoid creation of FSM file for the small
> table are good now.  I have prepared the patches along with commit
> message.  There is no change except for some changes in README and
> commit message of the second patch.  Kindly let me know what you think
> about them?

Good to hear! The additional language is fine. In "Once the FSM is
created for heap", I would just change that to "...for a heap".

> I think these two patches can go even without the upgrade patch
> (during pg_upgrade, conditionally skip transfer of FSMs.) which is
> still under discussion.  However, I am not in a hurry if you or other
> thinks that upgrade patch must be committed along with the second
> patch.  I think the upgrade patch is generally going on track but
> might need some more review.

The pg_upgrade piece is a nice-to-have feature and not essential, so
can go in later. Additional review is also welcome.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-23 Thread John Naylor
On Mon, Jan 21, 2019 at 6:32 AM Amit Kapila  wrote:
> Also, another case to think in this regard is the upgrade for standby
> servers, if you read below paragraph from the user manual [1], you
> will see what I am worried about?
>
> "What this does is to record the links created by pg_upgrade's link
> mode that connect files in the old and new clusters on the primary
> server. It then finds matching files in the standby's old cluster and
> creates links for them in the standby's new cluster. Files that were
> not linked on the primary are copied from the primary to the standby.
> (They are usually small.)"
>
> [1] - https://www.postgresql.org/docs/devel/pgupgrade.html

I am still not able to get the upgraded standby to go into recovery
without resorting to pg_basebackup, but in another attempt to
investigate your question I tried the following (data1 = old cluster,
data2 = new cluster):


mkdir -p data1 data2 standby

echo 'heap' > data1/foo
echo 'fsm' > data1/foo_fsm

# simulate streaming replication
rsync --archive data1 standby

# simulate pg_upgrade, skipping FSM
ln data1/foo -t data2/

rsync --archive --delete --hard-links --size-only --no-inc-recursive
data1 data2 standby

# result
ls standby/data1
ls standby/data2


The result is that foo_fsm is not copied to standby/data2, contrary to
what the docs above imply for other unlinked files. Can anyone shed
light on this?

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-19 Thread John Naylor
On Sat, Jan 19, 2019 at 8:06 AM Amit Kapila  wrote:
>
> On Thu, Jan 17, 2019 at 11:13 PM John Naylor
> Few more comments:
> 1.
> I think we should not allow to create FSM for toast tables as well
> till there size reaches HEAP_FSM_CREATION_THRESHOLD.  If you try below
> test, you can see that FSM will be created for the toast table even if
> the size of toast relation is 1 page.
...
> I have fixed this in the attached patch, kindly verify it once and see
> if you can add the test for same as well.

Works for me. For v16, I've added and tested similar logic to
pg_upgrade and verified that toast tables work the same as normal
tables in recovery. I used a slightly different method to generate the
long random string to avoid creating a function. Also, some cosmetic
adjustments -- I changed the regression test to use 'i' instead of 'g'
to match the use of generate_series in most other tests, and made
capitalization more consistent.

> 2.
> -CREATE TABLE test1 (a int, b int);
> -INSERT INTO test1 VALUES (16777217, 131584);
> +CREATE TABLE test_rel_forks (a
> int);
> +-- Make sure there are enough blocks in the heap for the FSM to be created.
> +INSERT INTO test_rel_forks SELECT g
> from generate_series(1,1) g;
>
> -VACUUM test1;  -- set up FSM
> +-- set up FSM and VM
> +VACUUM test_rel_forks;
>
> This test will create 45 pages instead of 1.  I know that to create
> FSM, we now need more than 4 pages, but 45 seems to be on the higher
> side.  I think we should not unnecessarily populate more data if there
> is no particular need for it, let's restrict the number of pages to 5
> if possible.

Good idea, done here and in the fsm regression test.

> 3.
> -SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
> - fsm_1
> 
> -  8192
> -(1 row)
> -
> -SELECT octet_length
> (get_raw_page('test1', 'vm', 0)) AS vm_0;
> +SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
> +ERROR:  block number 10 is out of range for relation "test_rel_forks"
>
> Why have you changed the test definition here?  Previously test checks
> the existing FSM page, but now it tries to access out of range page.

The patch is hard to read here, but I still have a test for the
existing FSM page:

-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0
 ---
   8192
 (1 row)

I have a test for in-range and out-of-range for each relation fork.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From ecf0cecee8b0adf8529ddb1fbdabd041172de5e3 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sat, 19 Jan 2019 18:39:39 -0500
Subject: [PATCH v16 1/2] Avoid creation of the free space map for small heap
 relations.

Previously, all heaps had FSMs. For very small tables, this means that
the FSM took up more space than the heap did. This is wasteful, so now
we refrain from creating the FSM for heaps with 4 pages or fewer. If the
last known target block has insufficient space, we still try to insert
into some other page before before giving up and extending the relation,
since doing otherwise leads to table bloat. Testing showed that trying
every page penalized performance slightly, so we compromise and try
every other page. This way, we visit at most two pages. Any pages with
wasted free space become visible at next relation extension, so we still
control table bloat. As a bonus, directly attempting one or two pages
can even be faster than consulting the FSM would have been.
---
 contrib/pageinspect/expected/page.out |  77 +++---
 contrib/pageinspect/sql/page.sql  |  33 ++-
 doc/src/sgml/storage.sgml |  13 +-
 src/backend/access/brin/brin.c|   2 +-
 src/backend/access/brin/brin_pageops.c|  10 +-
 src/backend/access/heap/hio.c |  47 ++--
 src/backend/access/heap/vacuumlazy.c  |  17 +-
 src/backend/access/transam/xact.c |  14 ++
 src/backend/storage/freespace/README  |  33 ++-
 src/backend/storage/freespace/freespace.c | 275 +-
 src/backend/storage/freespace/indexfsm.c  |   6 +-
 src/include/storage/freespace.h   |   9 +-
 src/test/regress/expected/fsm.out |  75 ++
 src/test/regress/parallel_schedule|   6 +
 src/test/regress/serial_schedule  |   1 +
 src/test/regress/sql/fsm.sql  |  55 +
 16 files changed, 571 insertions(+), 102 deletions(-)
 create mode 100644 src/test/regress/expected/fsm.out
 create mode 100644 src/test/regress/sql/fsm.sql

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/

Re: WIP: Avoid creation of the free space map for small tables

2018-12-12 Thread John Naylor
On 11/24/18, Amit Kapila  wrote:
> 4. You have mentioned above that "system catalogs created during
> bootstrap still have a FSM if they have any data." and I can also see
> this behavior, have you investigated this point further?

I found the cause of this. There is some special-case code in md.c to
create any file if it's opened in bootstrap mode. I removed this and a
similar special case (attached), and make check still passes. After
digging through the history, I'm guessing this has been useless code
since about 2001, when certain special catalogs were removed.

-John Naylor
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 4c6a50509f..9331c8b1d1 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -310,13 +310,7 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 	{
 		int			save_errno = errno;
 
-		/*
-		 * During bootstrap, there are cases where a system relation will be
-		 * accessed (by internal backend processes) before the bootstrap
-		 * script nominally creates it.  Therefore, allow the file to exist
-		 * already, even if isRedo is not set.  (See also mdopen)
-		 */
-		if (isRedo || IsBootstrapProcessingMode())
+		if (isRedo)
 			fd = PathNameOpenFile(path, O_RDWR | PG_BINARY);
 		if (fd < 0)
 		{
@@ -572,26 +566,15 @@ mdopen(SMgrRelation reln, ForkNumber forknum, int behavior)
 
 	if (fd < 0)
 	{
-		/*
-		 * During bootstrap, there are cases where a system relation will be
-		 * accessed (by internal backend processes) before the bootstrap
-		 * script nominally creates it.  Therefore, accept mdopen() as a
-		 * substitute for mdcreate() in bootstrap mode only. (See mdcreate)
-		 */
-		if (IsBootstrapProcessingMode())
-			fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
-		if (fd < 0)
+		if ((behavior & EXTENSION_RETURN_NULL) &&
+			FILE_POSSIBLY_DELETED(errno))
 		{
-			if ((behavior & EXTENSION_RETURN_NULL) &&
-FILE_POSSIBLY_DELETED(errno))
-			{
-pfree(path);
-return NULL;
-			}
-			ereport(ERROR,
-	(errcode_for_file_access(),
-	 errmsg("could not open file \"%s\": %m", path)));
+			pfree(path);
+			return NULL;
 		}
+		ereport(ERROR,
+(errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
 	}
 
 	pfree(path);


Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-18 Thread John Naylor
On 12/17/18, Tom Lane  wrote:
> John Naylor  writes:
>> Since PL/pgSQL uses the core scanner, we'd need to use offsets in its
>> reserved_keywords[], too. Those don't change much, so we can probably
>> get away with hard-coding the offsets and the giant string in that
>> case. (If that's not acceptable, we could separate that out to
>> pl_reserved_kwlist.h and reuse the above tooling to generate
>> pl_reserved_kwlist_{offset,string}.h, but that's more complex.)
>
> plpgsql isn't as stable as all that: people propose new syntax for it
> all the time.  I do not think a hand-maintained array would be pleasant
> at all.

Okay.

> Also, wouldn't we also adopt this technology for its unreserved keywords,
> too?

We wouldn't be forced to, but there might be other reasons to do so.
Were you thinking of code consistency (within pl_scanner.c or
globally)? Or something else?

If we did adopt this setup for plpgsql unreserved keywords,
ecpg/preproc/ecpg_keywords.c and ecpg/preproc/c_keywords.c would be
left using the current ScanKeyword struct for search. Using offset
search for all 5 types of keywords would be globally consistent, but
it also means additional headers, generated headers, and makefile
rules.

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-21 Thread John Naylor
On 12/20/18, Tom Lane  wrote:
> I'd be inclined to put the script in src/tools, I think.  IMO src/common
> is for code that actually gets built into our executables.

Done.

>> which takes
>> pl_unreserved_kwlist.h as input and outputs
>> pl_unreserved_kwlist_offset.h and pl_unreserved_kwlist_string.h.
>
> I wonder whether we'd not be better off producing just one output
> file, in which we have the offsets emitted as PG_KEYWORD macros
> and then the giant string emitted as a macro definition, ie
> something like
>
> #define PG_KEYWORD_STRING \
>   "absolute\0" \
>   "alias\0" \
>   ...
>
> That simplifies the Makefile-hacking, at least, and it possibly gives
> callers more flexibility about what they actually want to do with the
> string.

Okay, I tried that. Since the script is turning one header into
another, I borrowed the "*_d.h" nomenclature from the catalogs. Using
a single file required some #ifdef hacks in the output file. Maybe
there's a cleaner way to do this, but I don't know what it is.

Using a single file also gave me another idea: Take value and category
out of ScanKeyword, and replace them with an index into another array
containing those, which will only be accessed in the event of a hit.
That would shrink ScanKeyword to 4 bytes (offset, index), further
increasing locality of reference. Might not be worth it, but I can try
it after moving on to the core scanner.

> I'm for "not change things unnecessarily".  People might well be
> scraping the keyword list out of parser/kwlist.h for other purposes
> right now --- indeed, it's defined the way it is exactly to let
> people do that.  I don't see a good reason to force them to redo
> whatever tooling they have that depends on that.  So let's build
> kwlist_offsets.h alongside that, but not change kwlist.h itself.

Done.

>> I used the global .gitignore, but maybe that's an abuse of it.
>
> Yeah, I'd say it is.

Moved.

>> +# TODO: Error out if the keyword names are not in ASCII order.
>
> +many for including such a check.

Done.

> Also note that we don't require people to have Perl installed when
> building from a tarball.  Therefore, these derived headers must get
> built during "make distprep" and removed by maintainer-clean but
> not distclean.  I think this also has some implications for VPATH
> builds, but as long as you follow the pattern used for other
> derived header files (e.g. fmgroids.h), you should be fine.

Done. I also blindly added support for MSVC.

-John Naylor
 src/common/keywords.c |  55 
 src/include/common/keywords.h |  14 +++
 src/pl/plpgsql/src/.gitignore |   1 +
 src/pl/plpgsql/src/Makefile   |   9 +-
 src/pl/plpgsql/src/pl_scanner.c   | 119 ++
 src/pl/plpgsql/src/pl_unreserved_kwlist.h | 107 +++
 src/tools/gen_keywords.pl | 136 ++
 src/tools/msvc/Solution.pm|  10 +++
 8 files changed, 356 insertions(+), 95 deletions(-)

diff --git a/src/common/keywords.c b/src/common/keywords.c
index 0c0c794c68..0fb14a0372 100644
--- a/src/common/keywords.c
+++ b/src/common/keywords.c
@@ -112,3 +112,58 @@ ScanKeywordLookup(const char *text,
 
 	return NULL;
 }
+
+/* Like ScanKeywordLookup, but uses offsets into a keyword string. */
+const ScanKeywordOffset *
+ScanKeywordLookupOffset(const char *text,
+		const ScanKeywordOffset *keywords,
+		int num_keywords,
+		const char *keyword_string)
+{
+	int			len,
+i;
+	char		word[NAMEDATALEN];
+	const ScanKeywordOffset *low;
+	const ScanKeywordOffset *high;
+
+	len = strlen(text);
+	/* We assume all keywords are shorter than NAMEDATALEN. */
+	if (len >= NAMEDATALEN)
+		return NULL;
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+	 * produce the wrong translation in some locales (eg, Turkish).
+	 */
+	for (i = 0; i < len; i++)
+	{
+		char		ch = text[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch += 'a' - 'A';
+		word[i] = ch;
+	}
+	word[len] = '\0';
+
+	/*
+	 * Now do a binary search using plain strcmp() comparison.
+	 */
+	low = keywords;
+	high = keywords + (num_keywords - 1);
+	while (low <= high)
+	{
+		const ScanKeywordOffset *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(keyword_string + middle->offset, word);
+		if (difference == 0)
+			return middle;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return NULL;
+}
diff --git a/src/include/common/keywords.h b/src/include/common/keywords.h
index 0b31505b66..7cd7bf4461 100644
--- a/src/include/common/keywords.h
+++ b/src/include/common/keywords.h
@@ -28,11 +28,20 @@ typedef struct ScanKeyword
 	int16		category;		/* 

Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-23 Thread John Naylor
On 12/22/18, Tom Lane  wrote:
> John Naylor  writes:
>> Using a single file also gave me another idea: Take value and category
>> out of ScanKeyword, and replace them with an index into another array
>> containing those, which will only be accessed in the event of a hit.
>> That would shrink ScanKeyword to 4 bytes (offset, index), further
>> increasing locality of reference. Might not be worth it, but I can try
>> it after moving on to the core scanner.
>
> I like that idea a *lot*, actually, because it offers the opportunity
> to decouple this mechanism from all assumptions about what the
> auxiliary data for a keyword is.

Okay, in that case I went ahead and did it for WIP v3.

> (it'd be a good idea to have a switch that allows specifying the
> prefix of these constant names).

Done as an optional switch, and tested, but not yet used in favor of
the previous method as a fallback. I'll probably do it in the final
version to keep lines below 80, and to add 'core_' to the core keyword
vars.

> /* Payload data for keywords */
> typedef struct MyKeyword
> {
> int16value;
> int16category;
> } MyKeyword;

I tweaked this a bit to

typedef struct ScanKeywordAux
{
int16   value;  /* grammar's token code */
charcategory;   /* see codes above */
} ScanKeywordAux;

It seems that category was only 2 bytes to make ScanKeyword a power of
2 (of course that was on 32 bit machines and doesn't hold true
anymore). Using char will save another few hundred bytes in the core
scanner. Since we're only accessing this once per identifier, we may
not need to worry so much about memory alignment.

> Aside from being arguably better from the locality-of-reference
> standpoint, this gets us out of the weird ifdef'ing you've got in
> the v2 patch.  The kwlist_d.h headers can be very ordinary headers.

Yeah, that's a nice (and for me unexpected) bonus.

-John Naylor
 src/common/keywords.c |  55 
 src/include/common/keywords.h |  12 +++
 src/pl/plpgsql/src/.gitignore |   1 +
 src/pl/plpgsql/src/Makefile   |  13 +--
 src/pl/plpgsql/src/pl_scanner.c   | 127 +++
 src/pl/plpgsql/src/pl_unreserved_kwlist.h | 107 +++
 src/tools/gen_keywords.pl | 139 ++
 src/tools/msvc/Solution.pm|  10 +++
 8 files changed, 361 insertions(+), 103 deletions(-)

diff --git a/src/common/keywords.c b/src/common/keywords.c
index 0c0c794c68..b0e5a721b6 100644
--- a/src/common/keywords.c
+++ b/src/common/keywords.c
@@ -112,3 +112,58 @@ ScanKeywordLookup(const char *text,
 
 	return NULL;
 }
+
+/* Like ScanKeywordLookup, but uses offsets into a keyword string. */
+int
+ScanKeywordLookupOffset(const char *string_to_lookup,
+		const char *kw_strings,
+		const uint16 *kw_offsets,
+		int num_keywords)
+{
+	int			len,
+i;
+	char		word[NAMEDATALEN];
+	const uint16 *low;
+	const uint16 *high;
+
+	len = strlen(string_to_lookup);
+	/* We assume all keywords are shorter than NAMEDATALEN. */
+	if (len >= NAMEDATALEN)
+		return -1;
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use tolower() since it may
+	 * produce the wrong translation in some locales (eg, Turkish).
+	 */
+	for (i = 0; i < len; i++)
+	{
+		char		ch = string_to_lookup[i];
+
+		if (ch >= 'A' && ch <= 'Z')
+			ch += 'a' - 'A';
+		word[i] = ch;
+	}
+	word[len] = '\0';
+
+	/*
+	 * Now do a binary search using plain strcmp() comparison.
+	 */
+	low = kw_offsets;
+	high = kw_offsets + (num_keywords - 1);
+	while (low <= high)
+	{
+		const uint16 *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(kw_strings + *middle, word);
+		if (difference == 0)
+			return middle - kw_offsets;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return -1;
+}
diff --git a/src/include/common/keywords.h b/src/include/common/keywords.h
index 0b31505b66..201d0fcc7a 100644
--- a/src/include/common/keywords.h
+++ b/src/include/common/keywords.h
@@ -28,6 +28,13 @@ typedef struct ScanKeyword
 	int16		category;		/* see codes above */
 } ScanKeyword;
 
+/* Payload data for keywords */
+typedef struct ScanKeywordAux
+{
+	int16		value;			/* grammar's token code */
+	char		category;		/* see codes above */
+} ScanKeywordAux;
+
 #ifndef FRONTEND
 extern PGDLLIMPORT const ScanKeyword ScanKeywords[];
 extern PGDLLIMPORT const int NumScanKeywords;
@@ -41,4 +48,9 @@ extern const ScanKeyword *ScanKeywordLookup(const char *text,
   const ScanKeyword *keywords,
   int num_keywords);
 
+int ScanKeywordLookupOffset(const char *string_to_lookup,
+		const char *kw_strings,
+		const uint16 *kw_offsets,
+		int num_keywords);
+
 #endif			/* KEYWORDS_H */
diff --git a/src/pl/plpgs

Re: Speeding up text_position_next with multibyte encodings

2018-12-26 Thread John Naylor
On 12/22/18, Heikki Linnakangas  wrote:
> On 14/12/2018 20:20, John Naylor wrote:
> I'm afraid that script doesn't work as a performance test. The
> position() function is immutable, so the result gets cached in the plan
> cache. All you're measuring is the speed to get the constant from the
> plan cache :-(.

That makes perfect sense now. I should have been more skeptical about
the small and medium sizes having similar times. :/

> I rewrote the same tests with a little C helper function, attached, to
> fix that, and to eliminate any PL/pgSQL overhead.

Thanks for that, I'll probably have occasion to do something like this
for other tests.

> You chose interesting characters for the UTF-8 test. The haystack is a
> repeating pattern of byte sequence EC 99 84, and the needle is a
> repeating pattern of EC 84 B1. In the 'long' test, the lengths in the
> skip table are '2', '1' and '250'. But the search bounces between the
> '2' and '1' cases, and never hits the byte that would allow it to skip
> 250 bytes. Interesting case, I had not realized that that can happen.

Me neither, that was unintentional.

> But I don't think we need to put much weight on that, you could come up
> with other scenarios where the current code has skip table collisions, too.

Okay.

> So overall, I think it's still a worthwhile tradeoff, given that that is
> a worst case scenario. If you choose less pathological UTF-8 codepoints,
> or there is no match or the match is closer to the beginning of the
> string, the patch wins.

On 12/23/18, Tomas Vondra  wrote:
> So, what is the expected speedup in a "good/average" case? Do we have
> some reasonable real-world workload mixing these cases that could be
> used as a realistic benchmark?

I'll investigate some "better" cases.

-John Naylor



Re: automatically assigning catalog toast oids

2018-12-11 Thread John Naylor
On 12/11/18, Andres Freund  wrote:
> I've attached a patch implementing that.  I'm not particularly in love
> with FirstGenbkiObjectId as the symbol, but I couldn't think of
> something more descriptive.

How about FirstAutoAssignedObjectId?


-John Naylor



reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-16 Thread John Naylor
On 10/15/18, Tom Lane  wrote:
> Andres Freund  writes:
>> On 2018-10-15 16:36:26 -0400, Tom Lane wrote:
>>> We could possibly fix these by changing the data structure so that
>>> what's in a ScanKeywords entry is an offset into some giant string
>>> constant somewhere.  No idea how that would affect performance, but
>>> I do notice that we could reduce the sizeof(ScanKeyword), which can't
>>> hurt.
>
>> Yea, that might even help performancewise. Alternatively we could change
>> ScanKeyword to store the keyword name inline, but that'd be a measurable
>> size increase...
>
> Yeah.  It also seems like doing it this way would improve locality of
> access: the pieces of the giant string would presumably be in the same
> order as the ScanKeywords entries, whereas with the current setup,
> who knows where the compiler has put 'em or in what order.
>
> We'd need some tooling to generate the constants that way, though;
> I can't see how to make it directly from kwlist.h.

A few months ago I was looking into faster search algorithms for
ScanKeywordLookup(), so this is interesting to me. While an optimal
full replacement would be a lot of work, the above ideas are much less
invasive and would still have some benefit. Unless anyone intends to
work on this, I'd like to flesh out the offset-into-giant-string
approach a bit further:

Since there are several callers of the current approach that don't use
the core keyword list, we'd have to keep the existing struct and
lookup function, to keep the complexity manageable. Once we have an
offset-based struct and function, it makes sense to use it for all
searches of core keywords. This includes not only the core scanner,
but also adt/rule_utils.c, fe_utils/string_utils.c, and
ecpg/preproc/keywords.c.

There would need to be a header with offsets replacing name strings,
generated from parser/kwlist.h, maybe kwlist_offset.h. It'd probably
be convenient if it was emitted into the common/ dir. The giant string
would likely need its own header (kwlist_string.h?).

Since PL/pgSQL uses the core scanner, we'd need to use offsets in its
reserved_keywords[], too. Those don't change much, so we can probably
get away with hard-coding the offsets and the giant string in that
case. (If that's not acceptable, we could separate that out to
pl_reserved_kwlist.h and reuse the above tooling to generate
pl_reserved_kwlist_{offset,string}.h, but that's more complex.)

The rest should be just a SMOP. Any issues I left out?

-John Naylor



Re: inconsistency and inefficiency in setup_conversion()

2018-12-14 Thread John Naylor
On 12/1/18, Dmitry Dolgov <9erthali...@gmail.com> wrote:
> I see that the author keeps patch updated, but I'm a bit worried because of
> lack of full review since probably May. I'm moving it to the next CF, let's
> see
> if there would be more feedback.
>
> P.S. adding Daniel, since he is assigned as a reviewer.

Having heard nothing in a while, I've removed Daniel as a reviewer to
make room for someone else. He is, of course free to re-add himself.
v8 is attached.

Since it's been a few months since last discussion, I'd like to
summarize the purpose of this patch and advocate for its inclusion in
v12:

1. Correctness
In the intro thread [1], I showed that object comments on some
conversions are wrong, and hard to fix given the current setup. This
is a documentation bug of sorts.

2. Clean-up
Currently, utils/mb/conversion_procs/Makefile has an ad-hoc script to
generate the SQL file, which has to be duplicated in the MSVC tooling,
and executed by initdb.c. Storing the conversions in .dat files
removes the need for any of that.

3. Performance
This patch shaves 5-6% off of initdb. Not as much as hoped, but still
a nice bonus.

[1] 
https://www.postgresql.org/message-id/CAJVSVGWtUqxpfAaxS88vEGvi%2BjKzWZb2EStu5io-UPc4p9rSJg%40mail.gmail.com


--John Naylor
From bdbdb36129708d1d417f8db9009b377c3d4e3e90 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Fri, 14 Dec 2018 15:19:54 -0500
Subject: [PATCH v8 1/2] Add pg_language lookup.

This didn't seem worth doing before, but an upcoming commit will add
88 entries to pg_proc whose languge is 'c'. In addition, we can
now use 'internal' in Gen_fmgr.pl instead of looking up the OID.
This simplifies that script and its Makefile a bit.
---
 doc/src/sgml/bki.sgml|  6 +--
 src/backend/catalog/genbki.pl|  8 +++
 src/backend/utils/Gen_fmgrtab.pl |  7 ++-
 src/backend/utils/Makefile   |  8 +--
 src/include/catalog/pg_proc.dat  | 92 
 src/include/catalog/pg_proc.h|  2 +-
 src/tools/msvc/Solution.pm   |  4 +-
 7 files changed, 64 insertions(+), 63 deletions(-)

diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 786abb95d4..37bb7a2346 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -409,8 +409,8 @@
 that's error-prone and hard to understand, so for frequently-referenced
 catalogs, genbki.pl provides mechanisms to write
 symbolic references instead.  Currently this is possible for references
-to access methods, functions, operators, opclasses, opfamilies, and
-types.  The rules are as follows:
+to access methods, functions, languages, operators, opclasses, opfamilies,
+and types.  The rules are as follows:

 

@@ -420,7 +420,7 @@
   Use of symbolic references is enabled in a particular catalog column
   by attaching BKI_LOOKUP(lookuprule)
   to the column's definition, where lookuprule
-  is pg_am, pg_proc,
+  is pg_am, pg_proc, pg_language,
   pg_operator, pg_opclass,
   pg_opfamily, or pg_type.
   BKI_LOOKUP can be attached to columns of
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 8e2a2480be..8ccfcb7724 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -182,6 +182,13 @@ foreach my $row (@{ $catalog_data{pg_am} })
 	$amoids{ $row->{amname} } = $row->{oid};
 }
 
+# language OID lookup
+my %langoids;
+foreach my $row (@{ $catalog_data{pg_language} })
+{
+	$langoids{ $row->{lanname} } = $row->{oid};
+}
+
 # opclass OID lookup
 my %opcoids;
 foreach my $row (@{ $catalog_data{pg_opclass} })
@@ -256,6 +263,7 @@ foreach my $row (@{ $catalog_data{pg_type} })
 # Map catalog name to OID lookup.
 my %lookup_kind = (
 	pg_am   => \%amoids,
+	pg_language => \%langoids,
 	pg_opclass  => \%opcoids,
 	pg_operator => \%operoids,
 	pg_opfamily => \%opfoids,
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index ed16737a6a..b6809c7277 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -59,6 +59,8 @@ die "No include path; you must specify -I.\n" if !$include_path;
 # Note: We pass data file names as arguments and then look for matching
 # headers to parse the schema from. This is backwards from genbki.pl,
 # but the Makefile dependencies look more sensible this way.
+# We currently only need pg_proc, but retain the possibility of reading
+# more than one data file.
 my %catalogs;
 my %catalog_data;
 foreach my $datfile (@input_files)
@@ -82,9 +84,6 @@ foreach my $datfile (@input_files)
 my $FirstGenbkiObjectId =
   Catalog::FindDefinedSymbol('access/transam.h', $include_path,
 	'FirstGenbkiObjectId');
-my $INTERNALlanguageId =
-  Catalog::FindDefinedSymbolFromData($catalog_data{pg_language},
-	'INTERNALlanguageId');
 
 # Collect certain fields from pg_proc.dat.
 my @fmgr = ();
@@ -94,7 +93,7 @@ foreach my $row (@{ $catalog_data{pg_proc

Re: Speeding up text_position_next with multibyte encodings

2018-12-14 Thread John Naylor
On 12/14/18, John Naylor  wrote:
> I signed up to be a reviewer, and I will be busy next month, so I went
> ahead and fixed the typo in the patch that broke assert-enabled
> builds. While at it, I standardized on the spelling "start_ptr" in a
> few places to match the rest of the file. It's a bit concerning that
> it wouldn't compile with asserts, but the patch was written by a
> committer and seems to work.

I just noticed that the contrib/citext test fails. I've set the status
to waiting on author.

-John Naylor



Re: automatically assigning catalog toast oids

2018-12-14 Thread John Naylor



> On 12/13/18, Tom Lane  wrote:
> Looking at the existing entries, it seems like we'd have to have
> one special case: schema public has OID 2200 but is intentionally
> not pinned.

setup_depend() mentions other exceptions, but only this one currently has any 
effect as far as I can see:

"pg_database: it's a feature, not a bug, that template1 is not pinned."

-John Naylor



Re: Speeding up text_position_next with multibyte encodings

2018-12-14 Thread John Naylor
On 11/30/18, Dmitry Dolgov <9erthali...@gmail.com> wrote:
> Unfortunately, patch doesn't compile anymore due:
>
> varlena.c: In function ‘text_position_next_internal’:
> varlena.c:1337:13: error: ‘start_ptr’ undeclared (first use in this
> function)
>   Assert(start_ptr >= haystack && start_ptr <= haystack_end);
>
> Could you please send an updated version? For now I'm moving it to the next
> CF.

I signed up to be a reviewer, and I will be busy next month, so I went
ahead and fixed the typo in the patch that broke assert-enabled
builds. While at it, I standardized on the spelling "start_ptr" in a
few places to match the rest of the file. It's a bit concerning that
it wouldn't compile with asserts, but the patch was written by a
committer and seems to work.

On 10/19/18, Heikki Linnakangas  wrote:
> This is a win in most cases. One case is slower: calling position() with
> a large haystack input, where the match is near the end of the string.

I wanted to test this case in detail, so I ran the attached script,
which runs position() in three scenarios:
short:   1 character needle at the end of a ~1000 character haystack,
repeated 1 million times
medium: 50 character needle at the end of a ~1 million character
haystack, repeated 1 million times
long:  250 character needle at the end of an ~80 million character
haystack (~230MB, comfortably below the 256MB limit Heikki reported),
done just one time

I took the average of 5 runs using Korean characters in both UTF-8
(3-byte) and EUC-KR (2-byte) encodings.

UTF-8
length  master  patch
short   2.26s   2.51s
med 2.28s   2.54s
long3.07s   3.11s

EUC-KR
length  master  patch
short   2.29s   2.37s
med 2.29s   2.36s
long1.75s   1.71s

With UTF-8, the patch is 11-12% slower on short and medium strings,
and about the same on long strings. With EUC-KR, the patch is about 3%
slower on short and medium strings, and 2-3% faster on long strings.
It seems the worst case is not that bad, and could be optimized, as
Heikki said.

-John Naylor
From ca7a228174acb8b85b87642bb6df182139587c05 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sun, 18 Nov 2018 17:40:09 +0700
Subject: [PATCH v2] Use single-byte Boyer-Moore-Horspool search even with
 multibyte encodings.

The old implementation first converted the input strings to arrays of
wchars, and performed the on those. However, the conversion is expensive,
and for a large input string, consumes a lot of memory.

Avoid the conversion, and instead use the single-byte algorithm even with
multibyte encodings. That has a couple of problems, though. Firstly, we
might get fooled if the needle string's byte sequence appears embedded
inside a different string. We might incorrectly return a match in the
middle of a multi-byte character. The second problem is that the
text_position function needs to return the position of the match, counted
in characters, rather than bytes. We can work around both of those problems
by an extra step after finding a match. Walk the string one character at a
time, starting from the beginning, until we reach the position of the match.
We can then check if the match was found at a valid character boundary,
which solves the first problem, and we can count the characters, so that we
can return the character offset. Walking the characters is faster than the
wchar conversion, especially in the case that there are no matches and we
can avoid it altogether. Also, avoiding the large allocation allows these
functions to work with inputs larger than 256 MB, even with multibyte
encodings.

For UTF-8, we can even skip the step to verify that the match lands on a
character boundary, because in UTF-8, the byte sequence of one character
cannot appear in the middle of a different character. I think many other
encodings have the same property, but I wasn't sure, so I only enabled
that optimization for UTF-8.

Most of the callers of the text_position_setup/next functions were actually
not interested in the position of the match, counted in characters. To
cater for them, refactor the text_position_next() interface into two parts:
searching for the next match (text_position_next()), and returning the
current match's position as a pointer (text_position_get_match_ptr()) or
as a character offset (text_position_get_match_pos()). Most callers of
text_position_setup/next are not interested in the character offsets, so
getting the pointer to the match within the original string is a more
convenient API for them. It also allows skipping the character counting
with UTF-8 encoding altogether.
---
 src/backend/utils/adt/varlena.c | 475 +---
 1 file changed, 253 insertions(+), 222 deletions(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0fd3b15748..90

Re: WIP: Avoid creation of the free space map for small tables

2018-11-30 Thread John Naylor
On 11/29/18, Amit Kapila  wrote:
> On Thu, Nov 29, 2018 at 3:07 PM John Naylor  wrote:
>> Done. I tried adding it to several schedules, but for some reason
>> vacuuming an empty table failed to truncate the heap to 0 blocks.
>> Putting the test in its own group fixed the problem, but that doesn't
>> seem ideal.
>>
>
> It might be because it fails the should_attempt_truncation() check.
> See below code:
>
> if (should_attempt_truncation(vacrelstats))
> lazy_truncate_heap(onerel, vacrelstats, vac_strategy);

I see. I think truncating the FSM is not essential to show either the
old or new behavior -- I could skip that portion to enable running the
test in a parallel group.

>> Can you please repeat the copy test you have done above with
>> fillfactor as 20 and 30?
>
> I will send the results in a separate email soon.

I ran the attached scripts which populates 100 tables with either 4 or
8 blocks. The test tables were pre-filled with one tuple and vacuumed
so that the FSMs were already created when testing the master branch.
The patch branch was compiled with a threshold of 8, but testing
inserts of 4 pages effectively simulates a threshold of 4. Config was
stock, except for fsync = off. I took the average of 40 runs (2
complete tests of 20 runs each) after removing the 10% highest and
lowest:

fillfactor=20
# blocksmaster  patch
4   19.1ms  17.5ms
8   33.4ms  30.9ms

fillfactor=30
# blocksmaster  patch
4   20.1ms  19.7ms
8   34.7ms  34.9ms

It seems the patch might be a bit faster with fillfactor=20, but I'm
at a loss as to why that would be. Previous testing with a higher
threshold showed a significant performance penalty starting around 10
blocks [1], but that used truncation rather than deletion, and had a
fill-factor of 10.

--
[1] 
https://www.postgresql.org/message-id/CAJVSVGWCRMyi8sSqguf6PfFcpM3hwNY5YhPZTt-8Q3ZGv0UGYw%40mail.gmail.com

-John Naylor


fsm-copy-setup.sql
Description: application/sql


fsm-copy-test-v3.sql
Description: application/sql


Re: WIP: Avoid creation of the free space map for small tables

2018-12-07 Thread John Naylor
On 12/6/18, Amit Kapila  wrote:
> On Thu, Dec 6, 2018 at 10:53 PM John Naylor  wrote:
>>
>> I've added an additional regression test for finding the right block
>> and removed a test I thought was redundant. I've kept the test file in
>> its own schedule.
>>
>
> +# --
> +# fsm does a vacuum, and running it in parallel seems to prevent heap
> truncation.
> +# --
> +test: fsm
> +
>
> It is not clear to me from the comment why running it in parallel
> prevents heap truncation, can you explain what behavior are you seeing
> and what makes you think that running it in parallel caused it?

One of the tests deletes all records from the relation and vacuums. In
serial schedule, the heap and FSM are truncated; in parallel they are
not. Make check fails, since since the tests measure relation size.
Taking a closer look, I'm even more alarmed to discover that vacuum
doesn't even seem to remove deleted rows in parallel schedule (that
was in the last test I added), which makes no sense and causes that
test to fail. I looked in vacuum.sql for possible clues, but didn't
see any. I'll have to investigate further.

-John Naylor



Re: Thinking about EXPLAIN ALTER TABLE

2018-12-07 Thread John Naylor
On 12/7/18, Greg Stark  wrote:
> I've been poking around with a feature I've wanted a number of times
> in the past, "EXPLAIN ALTER TABLE".

I believe I've seen your messages to that effect in the archives, so
I've had it in the back of my mind as well. I think it could be very
useful.

> 3. Whether a full table constraint validation is going to happen

Is it possible that this can occur via index-only scan and that this
feature could know that?

> For the most part ALTER TABLE is already structured such that this is
> pretty easy. It does a lot of preparatory work without doing catalog
> updates and I can just call that same preparatory work without calling
> the subsequent work phases.

One thing that came to mind: Since the input is not a plan tree, it
seems it would be more difficult to keep EXPLAIN in sync with
additional ALTER TABLE features. Do you have any thoughts about that?

> postgres***=# explain alter table t alter column i set not null;
> ┌─┐
> │   QUERY PLAN│
> ├─┤
> │ Lock Level: AccessExclusiveLock │
> │ ALTER TABLE: t  │
> │   Relation: t   │
> │   Rewrite: none │
> │   Relation: t2  │
> │   Rewrite: none │
> └─┘

In this example, I assume the two relations are partitions? With many
partitions, this could get unwieldy pretty fast. I imagine there could
be a shorthand notation.

-John Naylor


docs: outdated reference to recursive expression evaluation

2018-12-08 Thread John Naylor
In confg.sgml, in the section about max_stack_depth, there's this sentence:

"The safety margin is needed because the stack depth is not checked in
every routine in the server, but only in key potentially-recursive
routines such as expression evaluation."

Since the change in expression evaluation in v10, there's probably a
better example of recursive routines, but I'm not sure what that would
be.

-John Naylor



Re: WIP: Avoid creation of the free space map for small tables

2018-12-08 Thread John Naylor
On 12/8/18, Amit Kapila  wrote:
> On Fri, Dec 7, 2018 at 7:25 PM John Naylor  wrote:
> I couldn't resist the temptation to figure out what's going on here.
> The newly added tests have deletes followed by vacuum and then you
> check whether the vacuum has removed the data by checking heap and or
> FSM size.  Now, when you run such a test in parallel, the vacuum can
> sometimes skip removing the rows because there are parallel
> transactions open which can see the deleted rows.

Ah yes, of course.

> You can easily
> verify this phenomenon by running the newly added tests in one session
> in psql when there is another parallel session which has an open
> transaction.  For example:
>
> Session-1
> Begin;
> Insert into foo values(1);
>
> Session-2
> \i fsm.sql
>
> Now, you should see the results similar to what you are seeing when
> you ran the fsm test by adding it to one of the parallel group.  Can
> you test this at your end and confirm whether my analysis is correct
> or not.

Yes, I see the same behavior.

> So, you can keep the test as you have in parallel_schedule, but
> comment needs to be changed.  Also, you need to add the new test in
> serial_schedule.  I have done both the changes in the attached patch,
> kindly confirm if this looks correct to you.

Looks good to me. I'll just note that the new line in the serial
schedule has an extra space at the end. Thanks for looking into this.

-John Naylor



Re: WIP: Avoid creation of the free space map for small tables

2018-12-02 Thread John Naylor
On 12/1/18, Amit Kapila  wrote:
> Can you check whether the number of pages after test are the same with
> and without a patch in this setup?

I did verify that the number of pages was as intended.

-John Naylor



Re: WIP: Avoid creation of the free space map for small tables

2018-12-02 Thread John Naylor
On 12/3/18, Amit Kapila  wrote:
>> During pg_upgrade, skip transfer of FSMs if they wouldn't have been
>> created on the new cluster.
>
> I think in some cases, it won't be consistent with HEAD's behavior.
> After truncate, we leave the FSM as it is, so the case where before
> upgrade the relation was truncated, we won't create the FSM in new
> cluster and that will be inconsistent with the behavior of HEAD.

To be precise, with the TRUNCATE statement, the FSM (everything but
the main relation fork, I think) is deleted, but using DELETE to
remove all rows from the table will preserve the forks. In the latter
case, when VACUUM truncates the FSM, it removes all leaf pages leaving
behind the root page and one mid-level page. I haven't changed this
behavior.

> I think similar anomaly will be there when we delete rows from the table
> such that after deletion size of relation becomes smaller than
> HEAP_FSM_CREATION_THRESHOLD.

Yes, in that case there will be inconsistency, but I'm comfortable
with it. Others may not be.

> I am not sure if it is a good idea to *not* transfer FSM files during
> upgrade unless we ensure that we remove FSM whenever the relation size
> falls below HEAP_FSM_CREATION_THRESHOLD.  What do you think?  BTW,
> what is your reasoning for not removing FSM on truncate?

My reasoning is that if we ever went past the threshold, it's likely
we'll do so again, so I didn't feel it was worth the extra code
complexity to remove the FSM. In the pg_upgrade case, however, it is
simple to not copy the FSM.

-John Naylor



Re: WIP: Avoid creation of the free space map for small tables

2018-12-02 Thread John Naylor
On 12/3/18, Amit Kapila  wrote:
> On Mon, Dec 3, 2018 at 9:46 AM Amit Kapila  wrote:
>>
>> On Thu, Nov 29, 2018 at 3:07 PM John Naylor  wrote:
>> >
>>
> v8 code:
> +fsm_local_set(Relation rel, BlockNumber new_nblocks)
> +{
> + BlockNumber blkno,
> + cached_target_block;
> +
> + /*
> + * Mark blocks available starting after the last block number we have
> + * cached, and ending at the current last block in the relation.
> + * When we first set the map, this will flag all blocks as available
> + * to try.  If we reset the map while waiting for a relation
> + * extension lock, this will only flag new blocks as available,
> + * if any were created by another backend.
> + */
> + for (blkno = fsm_local_map.nblocks; blkno < new_nblocks; blkno++)
> + fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
>
> v9 code:
> +static void
> +fsm_local_set(Relation rel, BlockNumber nblocks)
> +{
> + BlockNumber blkno,
> + cached_target_block;
> +
> + for (blkno = 0; blkno < nblocks; blkno++)
> + fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
>
> What is the reason for the above code change in the latest patch version?

Per your recent comment, we no longer check relation size if we waited
on a relation extension lock, so this is essentially a reversion to an
earlier version. Keeping v8 would have the advantage that it'd be
simple to change our minds about this. Do you have an opinion about
that?

> It would be good if you add few comments atop functions
> GetPageWithFreeSpace, RecordAndGetPageWithFreeSpace and
> RecordPageWithFreeSpace about their interaction with local map.

Good idea, will do.

-John Naylor



Re: automatically assigning catalog toast oids

2018-12-10 Thread John Naylor
On 12/9/18, Tom Lane  wrote:
> Another thing I seriously dislike is that this allows people to omit OIDs
> from .dat entries in catalogs where we traditionally hand-assign OIDs.
> That's not a good idea because it would mean those entries don't have
> stable OIDs, whereas the whole point of hand assignment is to ensure
> all built-in objects of a particular type have stable OIDs.  Now, you
> could argue about the usefulness of that policy for any given catalog;
> but if we decide that catalog X doesn't need stable OIDs then that should
> be an intentional policy change, not something that can happen because
> one lazy hacker didn't follow the policy.

On this point, I believe this could have happened anyway. pg_opclass
has a mix of hand- and initdb-assigned oids, and there was nothing
previously to stop that from creeping into any other catalog, as far
as I can tell.

-John Naylor



Re: docs: outdated reference to recursive expression evaluation

2018-12-10 Thread John Naylor
On 12/8/18, Tom Lane  wrote:
> John Naylor  writes:
>> In confg.sgml, in the section about max_stack_depth, there's this
>> sentence:
>> "The safety margin is needed because the stack depth is not checked in
>> every routine in the server, but only in key potentially-recursive
>> routines such as expression evaluation."
>
>> Since the change in expression evaluation in v10, there's probably a
>> better example of recursive routines, but I'm not sure what that would
>> be.
>
> We could say "expression compilation" and it'd still be valid.  Or just
> drop the last four words altogether.  I don't think we want to expend the
> verbiage to be more precise here, since it's only a tangential point.

I'm inclined to agree. If you like, here's a patch to leave out the example.

-John Naylor
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index fc4f17be41..4a7121a51f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1675,12 +1675,11 @@ include_dir 'conf.d'
 enforced by the kernel (as set by ulimit -s or local
 equivalent), less a safety margin of a megabyte or so.  The safety
 margin is needed because the stack depth is not checked in every
-routine in the server, but only in key potentially-recursive routines
-such as expression evaluation.  The default setting is two
-megabytes (2MB), which is conservatively small and
-unlikely to risk crashes.  However, it might be too small to allow
-execution of complex functions.  Only superusers can change this
-setting.
+routine in the server, but only in key potentially-recursive routines.
+The default setting is two megabytes (2MB), which
+is conservatively small and unlikely to risk crashes.  However,
+it might be too small to allow execution of complex functions.
+Only superusers can change this setting.

 



Re: WIP: Avoid creation of the free space map for small tables

2018-11-29 Thread John Naylor
> Is the point 3 change related to pgindent?  I think even if you want
> these, then don't prepare other patches on top of this, keep it
> entirely separate.

Both removed.

>> Also, we don't quite have a consensus on the threshold value, but I
>> have set it to 4 pages for v8. If this is still considered too
>> expensive (and basic tests show it shouldn't be), I suspect it'd be
>> better to interleave the available block numbers as described a couple
>> days ago than lower the threshold further.
>>
>
> Can you please repeat the copy test you have done above with
> fillfactor as 20 and 30?

I will send the results in a separate email soon.

> Few more comments:
> ---
> 1. I think we can add some test(s) to test the new functionality, may
> be something on the lines of what Robert has originally provided as an
> example of this behavior [1].

Done. I tried adding it to several schedules, but for some reason
vacuuming an empty table failed to truncate the heap to 0 blocks.
Putting the test in its own group fixed the problem, but that doesn't
seem ideal.

> 2.
> The similar call is required in AbortSubTransaction function as well.
> I suggest to add it after pgstat_progress_end_command in both
> functions.

Done.

> 3.
>> agree we shouldn't change that without a reason to. One simple idea is
>> add a 3rd boolean parameter to GetPageWithFreeSpace() to control
>> whether it gives up if the FSM fork doesn't indicate free space, like
> I have the exact fix in my mind, so let's do it that way.

Done. This also reverts comments and variable names that referred to
updating the local map after relation extension.

While at it, I changed a couple conditionals to check the locally
cached nblocks rather than the threshold. No functional change, but
looks more precise. Might save a few cycles as well.

>> > 5. Your logic to update FSM on standby seems okay, but can you show
>> > some tests which proves its sanity?
>>
>> I believe to convince myself it was working, I used the individual
>> commands in the sql file in [3], then used the size function on the
>> secondary. I'll redo that to verify.

I've verified the standby behaves precisely as the primary, as far as
the aforementioned script goes.

-John Naylor
From c4bef3bf790745638d42e5eb6b303b85a9a828c4 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Thu, 29 Nov 2018 16:31:41 +0700
Subject: [PATCH v9 1/2] Avoid creation of the free space map for small tables.

The FSM isn't created if the heap has 4 blocks or fewer. If the last
known target block has insufficient space, try every block before extending
the heap.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.
---
 contrib/pageinspect/expected/page.out |  77 +++
 contrib/pageinspect/sql/page.sql  |  33 +--
 doc/src/sgml/storage.sgml |  13 +-
 src/backend/access/brin/brin.c|   2 +-
 src/backend/access/brin/brin_pageops.c|  10 +-
 src/backend/access/heap/hio.c |  47 +++--
 src/backend/access/transam/xact.c |  14 ++
 src/backend/commands/vacuumlazy.c |  17 +-
 src/backend/storage/freespace/README  |  11 +-
 src/backend/storage/freespace/freespace.c | 233 +-
 src/backend/storage/freespace/indexfsm.c  |   6 +-
 src/include/storage/freespace.h   |   9 +-
 src/test/regress/expected/fsm.out |  55 +
 src/test/regress/parallel_schedule|   5 +
 src/test/regress/sql/fsm.sql  |  37 
 15 files changed, 464 insertions(+), 105 deletions(-)
 create mode 100644 src/test/regress/expected/fsm.out
 create mode 100644 src/test/regress/sql/fsm.sql

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,1) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 
8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_ra

Re: WIP: Avoid creation of the free space map for small tables

2018-12-06 Thread John Naylor
On 12/3/18, Amit Kapila  wrote:
> On Mon, Dec 3, 2018 at 11:15 AM John Naylor  wrote:
>> Per your recent comment, we no longer check relation size if we waited
>> on a relation extension lock, so this is essentially a reversion to an
>> earlier version.
>>
>
> fsm_local_set is being called from RecordAndGetPageWithFreeSpace and
> GetPageWithFreeSpace whereas the change we have discussed was specific
> to GetPageWithFreeSpace, so not sure if we need any change in
> fsm_local_set.

Not needed, but I assumed wrongly you'd think it unclear otherwise.
I've now restored the generality and updated the comments to be closer
to v8.

> It would be good if you add few comments atop functions
> GetPageWithFreeSpace, RecordAndGetPageWithFreeSpace and
> RecordPageWithFreeSpace about their interaction with local map.

Done. Also additional minor comment editing.

I've added an additional regression test for finding the right block
and removed a test I thought was redundant. I've kept the test file in
its own schedule.

-John Naylor
From 7494ff89be7ad4e07f277f3bcc6e866c5527ce45 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Thu, 6 Dec 2018 10:49:03 -0600
Subject: [PATCH v10 1/2] Avoid creation of the free space map for small
 tables.

The FSM isn't created if the heap has 4 blocks or fewer. If the last
known target block has insufficient space, try every block before extending
the heap.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.
---
 contrib/pageinspect/expected/page.out |  77 +++
 contrib/pageinspect/sql/page.sql  |  33 +--
 doc/src/sgml/storage.sgml |  13 +-
 src/backend/access/brin/brin.c|   2 +-
 src/backend/access/brin/brin_pageops.c|  10 +-
 src/backend/access/heap/hio.c |  47 ++--
 src/backend/access/transam/xact.c |  14 ++
 src/backend/commands/vacuumlazy.c |  17 +-
 src/backend/storage/freespace/README  |  11 +-
 src/backend/storage/freespace/freespace.c | 251 +-
 src/backend/storage/freespace/indexfsm.c  |   6 +-
 src/include/storage/freespace.h   |   9 +-
 src/test/regress/expected/fsm.out |  62 ++
 src/test/regress/parallel_schedule|   5 +
 src/test/regress/sql/fsm.sql  |  46 
 15 files changed, 498 insertions(+), 105 deletions(-)
 create mode 100644 src/test/regress/expected/fsm.out
 create mode 100644 src/test/regress/sql/fsm.sql

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,1) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 
8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0 
 ---
   8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
- fsm_1 

-  8192
-(1 row)
-
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
+ERROR:  block number 10 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
  vm_0 
 --
  8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test1"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+ERROR:  block number 1 is out of range for relation "test_rel_forks"
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
 ERROR:  relation "xxx" does not exist
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
 ERROR:  invalid fork name
 HINT:  Valid fork names are "main", "fsm", "vm", and "init".
-SELECT get_r

Re: WIP: Avoid creation of the free space map for small tables

2018-11-20 Thread John Naylor
I wrote:

> On 11/19/18, Amit Kapila  wrote:
> [ abortive states ]
>> I think it might come from any other place between when you set it and
>> before it got cleared (like any intermediate buffer and pin related
>> API's).
>
> Okay, I will look into that.

LockBuffer(), visibilitymap_pin(), and GetVisibilityMapPins() don't
call errors at this level. I don't immediately see any additional good
places from which to clear the local map.

-John Naylor



Re: inconsistency and inefficiency in setup_conversion()

2018-11-26 Thread John Naylor
v7 is a rebase over recent catalog changes.

-John Naylor
From 52aff43dcc91733c1b941fed4582d9c0602004d0 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sat, 13 Oct 2018 19:28:08 +0700
Subject: [PATCH v7 1/2] Add pg_language lookup.

This didn't seem worth doing before, but an upcoming commit will add
88 entries to pg_proc whose languge is 'c'. In addition, we can
now use 'internal' in Gen_fmgr.pl instead of looking up the OID.
This simplifies that script and its Makefile a bit.
---
 doc/src/sgml/bki.sgml|  6 +--
 src/backend/catalog/genbki.pl|  8 +++
 src/backend/utils/Gen_fmgrtab.pl |  9 ++--
 src/backend/utils/Makefile   |  8 +--
 src/include/catalog/pg_proc.dat  | 92 
 src/include/catalog/pg_proc.h|  2 +-
 src/tools/msvc/Solution.pm   |  4 +-
 7 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 786abb95d4..37bb7a2346 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -409,8 +409,8 @@
 that's error-prone and hard to understand, so for frequently-referenced
 catalogs, genbki.pl provides mechanisms to write
 symbolic references instead.  Currently this is possible for references
-to access methods, functions, operators, opclasses, opfamilies, and
-types.  The rules are as follows:
+to access methods, functions, languages, operators, opclasses, opfamilies,
+and types.  The rules are as follows:

 

@@ -420,7 +420,7 @@
   Use of symbolic references is enabled in a particular catalog column
   by attaching BKI_LOOKUP(lookuprule)
   to the column's definition, where lookuprule
-  is pg_am, pg_proc,
+  is pg_am, pg_proc, pg_language,
   pg_operator, pg_opclass,
   pg_opfamily, or pg_type.
   BKI_LOOKUP can be attached to columns of
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index edc8ea9f53..3410816882 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -175,6 +175,13 @@ foreach my $row (@{ $catalog_data{pg_am} })
 	$amoids{ $row->{amname} } = $row->{oid};
 }
 
+# language OID lookup
+my %langoids;
+foreach my $row (@{ $catalog_data{pg_language} })
+{
+	$langoids{ $row->{lanname} } = $row->{oid};
+}
+
 # opclass OID lookup
 my %opcoids;
 foreach my $row (@{ $catalog_data{pg_opclass} })
@@ -249,6 +256,7 @@ foreach my $row (@{ $catalog_data{pg_type} })
 # Map catalog name to OID lookup.
 my %lookup_kind = (
 	pg_am   => \%amoids,
+	pg_language => \%langoids,
 	pg_opclass  => \%opcoids,
 	pg_operator => \%operoids,
 	pg_opfamily => \%opfoids,
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index ca28291355..87d250f318 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -59,6 +59,8 @@ die "No include path; you must specify -I.\n" if !$include_path;
 # Note: We pass data file names as arguments and then look for matching
 # headers to parse the schema from. This is backwards from genbki.pl,
 # but the Makefile dependencies look more sensible this way.
+# We currently only need pg_proc, but retain the possibility of reading
+# more than one data file.
 my %catalogs;
 my %catalog_data;
 foreach my $datfile (@input_files)
@@ -78,13 +80,10 @@ foreach my $datfile (@input_files)
 	$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 0);
 }
 
-# Fetch some values for later.
+# Fetch a value for later.
 my $FirstBootstrapObjectId =
   Catalog::FindDefinedSymbol('access/transam.h', $include_path,
 	'FirstBootstrapObjectId');
-my $INTERNALlanguageId =
-  Catalog::FindDefinedSymbolFromData($catalog_data{pg_language},
-	'INTERNALlanguageId');
 
 # Collect certain fields from pg_proc.dat.
 my @fmgr = ();
@@ -94,7 +93,7 @@ foreach my $row (@{ $catalog_data{pg_proc} })
 	my %bki_values = %$row;
 
 	# Select out just the rows for internal-language procedures.
-	next if $bki_values{prolang} ne $INTERNALlanguageId;
+	next if $bki_values{prolang} ne 'internal';
 
 	push @fmgr,
 	  {
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index e797539d09..da40f6b6c4 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -31,15 +31,11 @@ generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp $(top_
 
 $(SUBDIRS:%=%-recursive): fmgr-stamp errcodes.h
 
-FMGR_DATA := $(addprefix $(top_srcdir)/src/include/catalog/,\
-	pg_language.dat pg_proc.dat \
-	)
-
 # fmgr-stamp records the last time we ran Gen_fmgrtab.pl.  We don't rely on
 # the timestamps of the individual output files, because the Perl script
 # won't update them if they didn't change (to avoid unnecessary recompiles).
-fmgr-stamp: Gen_fmgrtab.pl $(catalogdir)/Catalog.pm $(FMGR_DATA) $(top_srcdir)/src/include/access/transam.h
-	$(PERL) -I $(catalogdir) $< -I $(top_srcdir)/src/include/ $(FMGR_DATA)
+fmgr-stamp: Gen_fmgrtab.pl $(catalogd

Re: WIP: Avoid creation of the free space map for small tables

2018-11-26 Thread John Naylor
On 11/24/18, Amit Kapila  wrote:
> On Fri, Nov 23, 2018 at 11:56 AM John Naylor  wrote:
>> On 11/16/18, Amit Kapila  wrote:
>>
>> >
>> > 3.
>> >  static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16
>> > slot,
>> > -uint8 newValue, uint8 minValue);
>> > + uint8 newValue, uint8 minValue);
>> >
>> > This appears to be a spurious change.
>>
>> 2 and 3 are separated into 0001.
>>
>
> Is the point 3 change related to pgindent?  I think even if you want
> these, then don't prepare other patches on top of this, keep it
> entirely separate.

There are some places in the codebase that have spaces after tabs for
no apparent reason (not to line up function parameters or pointer
declarations). pgindent hasn't been able to fix this. If you wish, you
are of course free to apply 0001 separately at any time.

>> Also, we don't quite have a consensus on the threshold value, but I
>> have set it to 4 pages for v8. If this is still considered too
>> expensive (and basic tests show it shouldn't be), I suspect it'd be
>> better to interleave the available block numbers as described a couple
>> days ago than lower the threshold further.
>>
>
> Can you please repeat the copy test you have done above with
> fillfactor as 20 and 30?

I did two kinds of tests. The first had a fill-factor of 10 [1], the
second had the default storage, but I prevented the backend from
caching the target block [2], to fully exercise the free space code.
Would you like me to repeat the first one with 20 and 30? And do you
think it is useful enough to test the copying of 4 blocks and not
smaller numbers?

> Few more comments:
> ---
> 1. I think we can add some test(s) to test the new functionality, may
> be something on the lines of what Robert has originally provided as an
> example of this behavior [1].

Maybe the SQL script attached to [3] (which I probably based on
Robert's report) can be cleaned up into a regression test.

> 3.
> GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
> {
> ..
> + if (target_block == InvalidBlockNumber &&
> + rel->rd_rel->relkind == RELKIND_RELATION)
> + {
> + nblocks = RelationGetNumberOfBlocks(rel);
> +
> + if (nblocks > HEAP_FSM_CREATION_THRESHOLD)
> + {
> + /*
> + * If the FSM knows nothing of the rel, try the last page before
> + * we give up and extend.  This avoids one-tuple-per-page syndrome
> + * during bootstrapping or in a recently-started system.
> + */
> + target_block = nblocks - 1;
> + }
> ..
> }
>
> Moving this check inside GetPageWithFreeSpace has one disadvantage, we
> will always consider last block which can have some inadvertent
> effects.  Consider when this function gets called from
> RelationGetBufferForTuple just before extension, it can consider to
> check for the last block even though that is already being done in the
> begining when GetPageWithFreeSpace was called.  I am not completely
> sure how much this is a case to worry because  it will help to check
> last block when the same is concurrently added and FSM is not updated
> for same.  I am slightly worried because the unpatched code doesn't
> care for such case and we have no intention to change this behaviour.
> What do you think?

I see what you mean. If the other backend extended by 1 block, the
intention is to keep it out of the FSM at first, and by extension, not
visible in other ways. The comment implies that's debatable, but I
agree we shouldn't change that without a reason to. One simple idea is
add a 3rd boolean parameter to GetPageWithFreeSpace() to control
whether it gives up if the FSM fork doesn't indicate free space, like

if (target_block == InvalidBlockNumber &&
rel->rd_rel->relkind == RELKIND_RELATION &&
!check_fsm_only)
{
nblocks = RelationGetNumberOfBlocks(rel);

> 4. You have mentioned above that "system catalogs created during
> bootstrap still have a FSM if they have any data." and I can also see
> this behavior, have you investigated this point further?

Code reading didn't uncover the cause. I might have to step through
with a debugger or something similar. I should find time for that next
month.

> 5. Your logic to update FSM on standby seems okay, but can you show
> some tests which proves its sanity?

I believe to convince myself it was working, I used the individual
commands in the sql file in [3], then used the size function on the
secondary. I'll redo that to verify.

--
[1] 
https://www.postgresql.org/message-id/CAJVSVGWCRMyi8sSqguf6PfFcpM3hwNY5YhPZTt-8Q3ZGv0UGYw%40mail.gmail.com
[2] 
https://www.postgresql.org/message-id/CAJVSVGWMXzsqYpPhO3Snz4n5y8Tq-QiviuSCKyB5czCTnq9rzA%40mail.gmail.com
[3] 
https://www.postgresql.org/message-id/CAJVSVGWvB13PzpbLEecFuGFc5V2fsO736BsdTakPiPAcdMM5tQ%40mail.gmail.com


-John Naylor



Re: WIP: Avoid creation of the free space map for small tables

2018-11-22 Thread John Naylor
On 11/16/18, Amit Kapila  wrote:

I've attached v8, which includes the 2-state map and addresses the points below:

> Some assorted comments:
> 1.
>  
> -Each heap and index relation, except for hash indexes, has a Free Space
> Map
> +Each heap relation, unless it is very small, and each index relation,
> +except for hash indexes, has a Free Space Map
>  (FSM) to keep track of available space in the relation. It's stored
>
> It appears that line has ended abruptly.

Revised.

> 2.
> page = BufferGetPage(buffer);
> + targetBlock = BufferGetBlockNumber(buffer);
>
>   if (!PageIsNew(page))
>   elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
> - BufferGetBlockNumber(buffer),
> + targetBlock,
>   RelationGetRelationName(relation));
>
>   PageInit(page, BufferGetPageSize(buffer), 0);
> @@ -623,7 +641,18 @@ loop:
>   * current backend to make more insertions or not, which is probably a
>   * good bet most of the time.  So for now, don't add it to FSM yet.
>   */
> - RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
> + RelationSetTargetBlock(relation, targetBlock);
>
> Is this related to this patch? If not, I suggest let's do it
> separately if required.
>
> 3.
>  static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
> -uint8 newValue, uint8 minValue);
> + uint8 newValue, uint8 minValue);
>
> This appears to be a spurious change.

2 and 3 are separated into 0001.

> 4.
> @@ -378,24 +386,15 @@ RelationGetBufferForTuple(Relation relation, Size
> len,
>   * target.
>   */
>   targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
> +
> + /*
> + * In case we used an in-memory map of available blocks, reset
> + * it for next use.
> + */
> + if (targetBlock < HEAP_FSM_CREATION_THRESHOLD)
> + ClearLocalMap();
>
> How will you clear the local map during error?  I think you need to
> clear it in abort path and you can name the function as
> FSMClearLocalMap or something like that.

Done. I've put this call last before abort processing.

> 5.
> +/*#define TRACE_TARGETBLOCK */
>
> Debugging leftover, do you want to retain this and related stuff
> during the development of patch?

I have kept it aside as a separate patch but not attached it for now.

Also, we don't quite have a consensus on the threshold value, but I
have set it to 4 pages for v8. If this is still considered too
expensive (and basic tests show it shouldn't be), I suspect it'd be
better to interleave the available block numbers as described a couple
days ago than lower the threshold further.

I have looked at zhio.c, and it seems trivial to adapt zheap to this patchset.

-John Naylor
From d1752858aaaf048fd94461dbd79fe0bb0f262b98 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Fri, 23 Nov 2018 12:55:16 +0700
Subject: [PATCH v8 1/3] Minor cosmetic adjustments for consistency.

---
 src/backend/access/heap/hio.c | 5 +++--
 src/backend/storage/freespace/freespace.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index b8b5871559..7f7c9db635 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -600,10 +600,11 @@ loop:
 	 * risk wiping out valid data).
 	 */
 	page = BufferGetPage(buffer);
+	targetBlock = BufferGetBlockNumber(buffer);
 
 	if (!PageIsNew(page))
 		elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
-			 BufferGetBlockNumber(buffer),
+			 targetBlock,
 			 RelationGetRelationName(relation));
 
 	PageInit(page, BufferGetPageSize(buffer), 0);
@@ -623,7 +624,7 @@ loop:
 	 * current backend to make more insertions or not, which is probably a
 	 * good bet most of the time.  So for now, don't add it to FSM yet.
 	 */
-	RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
+	RelationSetTargetBlock(relation, targetBlock);
 
 	return buffer;
 }
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 7c4ad1c449..47a991e21d 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -106,7 +106,7 @@ static Size fsm_space_cat_to_avail(uint8 cat);
 
 /* workhorse functions for various operations */
 static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
-   uint8 newValue, uint8 minValue);
+uint8 newValue, uint8 minValue);
 static BlockNumber fsm_search(Relation rel, uint8 min_cat);
 static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
 BlockNumber start, BlockNumber end,
-- 
2.17.1

From e6226e8ee3d51e651b3a0b7035ad6676b4cf4bc6 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Fri, 23 Nov 2018 12:58:07 +0700
Subject: [PATCH v8 2/3] Avoid creation of the free space map for small tables

Re: Sync ECPG scanner with core

2018-11-20 Thread John Naylor
I wrote:

> On 11/14/18, Tom Lane  wrote:
>> Alvaro Herrera  writes:
>>> Maybe time to compile the ecpg test cases during "make check-world"?
>>
>> I'm dubious that the lexer is a significant part of that, though I could
>> be wrong ...
>
> If it were, it'd be much easier to try a Flex flag other than the
> default, most compact representation, something like -Cfe. That'd be a
> prerequisite for no-backtracking to matter anyway. That's easy enough
> to test, so I'll volunteer to do that sometime.

The preproc phase of make check in the ecpg dir only takes 150ms.
Compiling and linking the test executables takes another 2s, and
running the tests takes another 5s on my machine. Even without setting
up and tearing down the temp instance, preproc is trivial.

-John Naylor



Re: [RFC] Removing "magic" oids

2018-11-20 Thread John Naylor
On 11/20/18, Andres Freund  wrote:
> I'm
> sure we'll find some things to adapt around the margins, but imo the
> patch as a whole looks pretty reasonable.

bki.sgml is now a bit out of date. I've attached a draft fix.

-John Naylor
diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 0fb309a1bd..1919bac936 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -89,7 +89,7 @@
The CATALOG line can also be annotated, with some
other BKI property macros described in genbki.h, to
define other properties of the catalog as a whole, such as whether
-   it has OIDs (by default, it does).
+   it is a shared relation.
   
 
   
@@ -354,7 +354,7 @@
 also needed if the row's OID must be referenced from C code.
 If neither case applies, the oid metadata field can
 be omitted, in which case the bootstrap code assigns an OID
-automatically, or leaves it zero in a catalog that has no OIDs.
+automatically.
 In practice we usually preassign OIDs for all or none of the pre-loaded
 rows in a given catalog, even if only some of them are actually
 cross-referenced.
@@ -387,15 +387,16 @@
 through the catalog headers and .dat files
 to see which ones do not appear.  You can also use
 the duplicate_oids script to check for mistakes.
-(genbki.pl will also detect duplicate OIDs
+(genbki.pl will assign OIDs for any rows that
+didn't get one hand-assigned to them and also detect duplicate OIDs
 at compile time.)

 

 The OID counter starts at 1 at the beginning of a bootstrap run.
-If a catalog row is in a table that requires OIDs, but no OID was
-preassigned by an oid field, then it will
-receive an OID of 1 or above.
+If a row from a source other than postgres.bki
+is inserted into a table that requires OIDs, then it will receive an
+OID of 1 or above.

   
 
@@ -714,7 +715,6 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
  tableoid
  bootstrap
  shared_relation
- without_oids
  rowtype_oid oid
  (name1 =
  type1
@@ -766,7 +766,6 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
  
   The table is created as shared if shared_relation is
   specified.
-  It will have OIDs unless without_oids is specified.
   The table's row type OID (pg_type OID) can optionally
   be specified via the rowtype_oid clause; if not specified,
   an OID is automatically generated for it.  (The rowtype_oid
@@ -805,7 +804,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
 

 
- insert OID = oid_value ( value1 value2 ... )
+ insert OID = oid_value ( oid_value value1 value2 ... )
 
 
 
@@ -813,11 +812,10 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
   Insert a new row into the open table using value1, value2, etc., for its column
-  values and oid_value for its OID.  If
-  oid_value is zero
-  (0) or the clause is omitted, and the table has OIDs, then the
-  next available OID is assigned.
+  values and oid_value
+  for its OID. Note that OID is stored in an ordinary column; the
+  OID = oid_value
+  statement is only there for debugging purposes.
  
 
  
@@ -991,10 +989,10 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
int4 and text, respectively, and insert
two rows into the table:
 
-create test_table 420 (cola = int4, colb = text)
+create test_table 420 (oid = oid, cola = int4, colb = text)
 open test_table
-insert OID=421 ( 1 "value1" )
-insert OID=422 ( 2 _null_ )
+insert OID=421 ( 421 1 "value1" )
+insert OID=422 ( 422 2 _null_ )
 close test_table
 
   


Re: WIP: Avoid creation of the free space map for small tables

2018-11-18 Thread John Naylor
On 11/16/18, Amit Kapila  wrote:
> +/* Status codes for the local map. */
> +#define FSM_LOCAL_ZERO 0x00 /* Beyond the end of the relation */
> +#define FSM_LOCAL_AVAIL 0x01 /* Available to try */
> +#define FSM_LOCAL_TRIED 0x02 /* Already tried, not enough space */
>
> Instead of maintaining three states, can't we do with two states
> (Available and Not Available), basically combine 0 and 2 in your case.
> I think it will save some cycles in
> fsm_local_set, where each time you need to initialize all the entries
> in the map.  I think we can argue that it is not much overhead, but I
> think it is better code-wise also if we can make it happen with fewer
> states.

That'd work too, but let's consider this scenario: We have a 2-block
table that has no free space. After trying each block, the local cache
looks like

0123
TT00

Let's say we have to wait to acquire a relation extension lock,
because another backend had already started extending the heap by 1
block. We call GetPageWithFreeSpace() and now the local map looks like

0123
TTA0

By using bitwise OR to set availability, the already-tried blocks
remain as they are. With only 2 states, the map would look like this
instead:

0123
AAAN

If we assume that an insert into the newly-created block 2 will almost
always succeed, we don't have to worry about wasting time re-checking
the first 2 full blocks. Does that sound right to you?


> Some assorted comments:
> 1.
>  
> -Each heap and index relation, except for hash indexes, has a Free Space
> Map
> +Each heap relation, unless it is very small, and each index relation,
> +except for hash indexes, has a Free Space Map
>  (FSM) to keep track of available space in the relation. It's stored
>
> It appears that line has ended abruptly.

Not sure what you're referring to here.

> 2.
> page = BufferGetPage(buffer);
> + targetBlock = BufferGetBlockNumber(buffer);
>
>   if (!PageIsNew(page))
>   elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
> - BufferGetBlockNumber(buffer),
> + targetBlock,
>   RelationGetRelationName(relation));
>
>   PageInit(page, BufferGetPageSize(buffer), 0);
> @@ -623,7 +641,18 @@ loop:
>   * current backend to make more insertions or not, which is probably a
>   * good bet most of the time.  So for now, don't add it to FSM yet.
>   */
> - RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
> + RelationSetTargetBlock(relation, targetBlock);
>
> Is this related to this patch? If not, I suggest let's do it
> separately if required.

I will separate this out.

> 3.
>  static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
> -uint8 newValue, uint8 minValue);
> + uint8 newValue, uint8 minValue);
>
> This appears to be a spurious change.

It was intentional, but I will include it separately as above.

> 4.
> @@ -378,24 +386,15 @@ RelationGetBufferForTuple(Relation relation, Size
> len,
>   * target.
>   */
>   targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
> +
> + /*
> + * In case we used an in-memory map of available blocks, reset
> + * it for next use.
> + */
> + if (targetBlock < HEAP_FSM_CREATION_THRESHOLD)
> + ClearLocalMap();
>
> How will you clear the local map during error?  I think you need to
> clear it in abort path and you can name the function as
> FSMClearLocalMap or something like that.

That sounds right, and I will rename the function that way. For the
abort path, were you referring to this or somewhere else?

if (!PageIsNew(page))
elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
 targetBlock,
 RelationGetRelationName(relation));

> 5.
> +/*#define TRACE_TARGETBLOCK */
>
> Debugging leftover, do you want to retain this and related stuff
> during the development of patch?

I modeled this after TRACE_VISIBILITYMAP in visibilitymap.c. It's
useful for development, but I don't particularly care whether it's in
the final verision.

Also, I found an off-by-one error that caused an unnecessary
smgrexists() call in tables with threshold + 1 pages. This will be
fixed in the next version.

-John Naylor



Re: WIP: Avoid creation of the free space map for small tables

2018-11-19 Thread John Naylor
On 11/19/18, Amit Kapila  wrote:
> On Mon, Nov 19, 2018 at 7:30 AM John Naylor  wrote:
>> Let's say we have to wait to acquire a relation extension lock,
>> because another backend had already started extending the heap by 1
>> block. We call GetPageWithFreeSpace() and now the local map looks like
>>
>> 0123
>> TTA0
>>
>> By using bitwise OR to set availability, the already-tried blocks
>> remain as they are. With only 2 states, the map would look like this
>> instead:
>>
>> 0123
>> AAAN
>>

> In my mind for such a case it should look like below:
> 0123
> NNAN

Okay, to retain that behavior with only 2 status codes, I have
implemented the map as a struct with 2 members: the cached number of
blocks, plus the same array I had before. This also allows a more
efficient implementation at the micro level. I just need to do some
more testing on it.

[ abortive states ]
> I think it might come from any other place between when you set it and
> before it got cleared (like any intermediate buffer and pin related
> API's).

Okay, I will look into that.

> One other thing that slightly bothers me is the call to
> RelationGetNumberOfBlocks via fsm_allow_writes.  It seems that call
> will happen quite frequently in this code-path and can have some
> performance impact.  As of now, I don't have any idea to avoid it or
> reduce it more than what you already have in the patch, but I think we
> should try some more to avoid it.  Let me know if you have any ideas
> around that?

FWIW, I believe that the callers of RecordPageWithFreeSpace() will
almost always avoid that call. Otherwise, there is at least one detail
that could use attention: If rel->rd_rel->relpages shows fewer pages
than the threshold, than the code doesn't trust it to be true. Might
be worth revisiting.
Aside from that, I will have to think about it.

More generally, I have a couple ideas about performance:

1. Only mark available every other block such that visible blocks are
interleaved as the relation extends. To explain, this diagram shows a
relation extending, with 1 meaning marked available and 0 meaning
marked not-available.

A
NA
ANA
NANA

So for a 3-block table, we never check block 1. Any free space it has
acquired will become visible when it extends to 4 blocks. For a
4-block threshold, we only check 2 blocks or less. This reduces the
number of lock/pin events but still controls bloat. We could also
check both blocks of a 2-block table.

2. During manual testing I seem to remember times that the FSM code
was invoked even though I expected the smgr entry to have a cached
target block. Perhaps VACUUM or something is clearing that away
unnecessarily. It seems worthwhile to verify and investigate, but that
seems like a separate project.

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-08 Thread John Naylor
On Tue, Jan 8, 2019 at 3:04 PM Tom Lane  wrote:
> > I'll take a crack at separating into a module.  I'll wait a bit in
> > case there are any stylistic suggestions on the patch as it stands.
>
> I had a go at that myself.  I'm sure there's plenty to criticize in
> the result, but at least it passes make check-world ;-)

Just a couple comments about the module:

-If you qualify the function's module name as you did
(PerfectHash::generate_hash_function), you don't have to export the
function into the callers namespace, so you can skip the @EXPORT_OK
setting. Most of our modules don't export.

-There is a bit of a cognitive clash between $case_sensitive in
gen_keywordlist.pl and $case_insensitive in PerfectHash.pm. They each
make sense in their own file, but might it be worth using one or the
other?

-As for the graph algorithm, I'd have to play with it to understand
how it works.


In the committed keyword patch, I noticed that in common/keywords.c,
the array length is defined with

ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS]

but other keyword arrays just have ...[]. Is there a reason for the difference?



Re: WIP: Avoid creation of the free space map for small tables

2019-01-08 Thread John Naylor
On Sun, Dec 30, 2018 at 10:50 PM Amit Kapila  wrote:
>
> On Sun, Dec 30, 2018 at 3:49 AM John Naylor  wrote:
> > > I also tried to insert more
> > > records till 8 pages and same regression is observed! So I guess even
> > > HEAP_FSM_CREATION_THRESHOLD = 4 is not perfect!
> >
> > That's curious, because once the table exceeds the threshold, it would
> > be allowed to update the FSM, and in the process write 3 pages that it
> > didn't have to in the 4 page test. The master branch has the FSM
> > already, so I would expect the 8 page case to regress more.
> >
>
> It is not clear to me why you think there should be regression at 8
> pages when HEAP_FSM_CREATION_THRESHOLD is 4.  Basically, once FSM
> starts getting updated, we should be same as HEAD as it won't take any
> special path?

In this particular test, the FSM is already created ahead of time for
the master branch, so we can compare accessing FSM versus checking
every page. My reasoning is that passing the threshold would take some
time to create 3 FSM pages with the patch, leading to a larger
regression. It seems we don't observe this, however.

On Sun, Dec 30, 2018 at 10:59 PM Mithun Cy  wrote:
> I have some minor comments for pg_upgrade patch
> 1. Now we call stat main fork file in transfer_relfile()
> +sret = stat(old_file, );
>
> +/* Save the size of the first segment of the main fork. */
> +if (type_suffix[0] == '\0' && segno == 0)
> +first_seg_size = statbuf.st_size;
>
> But we do not handle the case if stat has returned any error!

How about this:

/* Did file open fail? */
if (stat(old_file, ) != 0)
{
/* Extent, fsm, or vm does not exist?  That's OK, just return */
if (errno == ENOENT &&
(type_suffix[0] != '\0' || segno != 0))
return first_seg_size;
else
pg_fatal("error while checking for file existence \"%s.%s\"
(\"%s\" to \"%s\"): %s\n",
 map->nspname, map->relname, old_file, new_file,
 strerror(errno));
}

/* Save the size of the first segment of the main fork. */
else if (type_suffix[0] == '\0' && segno == 0)
first_seg_size = statbuf.st_size;

/* If extent, fsm, or vm is empty, just return */
else if (statbuf.st_size == 0)
return first_seg_size;

> 2. src/bin/pg_upgrade/pg_upgrade.h
>
>  char   *relname;
> +
> +charrelkind;/* relation relkind -- see pg_class.h */
>
> I think we can remove the added empty line.

In the full context:

-/* the rest are used only for logging and error reporting */
+
+/* These are used only for logging and error reporting. */
 char   *nspname;/* namespaces */
 char   *relname;
+
+charrelkind;/* relation relkind -- see pg_class.h */

Relkind is not used for logging or error reporting, so the space sets
it apart from the previous members. I could instead put relkind before
those other two...

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-09 Thread John Naylor
On Wed, Jan 9, 2019 at 5:33 PM Tom Lane  wrote:
> really need here.  We could go with "[no-]case-insensitive", perhaps.
> Or "[no-]case-fold", which is at least a little shorter and less
> double-negative-y.

I'd be in favor of --[no-]case-fold.

On Tue, Jan 8, 2019 at 5:53 PM Tom Lane  wrote:
> I improved the comment about how come the hash table entry assignment
> works.

I've gone over the algorithm in more detail and I don't see any nicer
way to write it. This comment in PerfectHash.pm:

(It does change '_', else we could just skip adjusting
# $cn here at all, for typical keyword strings.)

...seems a bit out of place in the module, because of its reference to
keywords, of interest right now to its only caller. Maybe a bit of
context here. (I also don't quite understand why we could
hypothetically skip the adjustment.)

Lastly, the keyword headers still have a dire warning about ASCII
order and binary search. Those could be softened to match the comment
in gen_keywordlist.pl.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-07 Thread John Naylor
On 1/3/19, Amit Kapila  wrote:
> Yeah, that makes sense, John, can you provide a patch on top of the
> current patch where we check either the last block or every other
> block.

I've attached two patches for testing. Each one applies on top of the
current patch.

Mithun, I'll respond to your other review comments later this week.

-John Naylor
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 2ab1cb58b7..d6352aabd9 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -1103,15 +1103,9 @@ fsm_allow_writes(Relation rel, BlockNumber heapblk,
 static void
 fsm_local_set(Relation rel, BlockNumber curr_nblocks)
 {
-	BlockNumber blkno,
-cached_target_block;
+	BlockNumber cached_target_block;
 
-	/*
-	 * Mark blocks available starting after the last block we have mapped,
-	 * and ending at the current last block in the relation.
-	 */
-	for (blkno = fsm_local_map.nblocks; blkno < curr_nblocks; blkno++)
-		fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
+	fsm_local_map.map[curr_nblocks - 1] = FSM_LOCAL_AVAIL;
 
 	/* Cache the number of blocks. */
 	fsm_local_map.nblocks = curr_nblocks;
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 2ab1cb58b7..820a2579ae 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -1110,8 +1110,15 @@ fsm_local_set(Relation rel, BlockNumber curr_nblocks)
 	 * Mark blocks available starting after the last block we have mapped,
 	 * and ending at the current last block in the relation.
 	 */
-	for (blkno = fsm_local_map.nblocks; blkno < curr_nblocks; blkno++)
+	blkno = curr_nblocks - 1;
+	for (;;)
+	{
 		fsm_local_map.map[blkno] = FSM_LOCAL_AVAIL;
+		if (blkno >= fsm_local_map.nblocks + 2)
+			blkno -= 2;
+		else
+			break;
+	}
 
 	/* Cache the number of blocks. */
 	fsm_local_map.nblocks = curr_nblocks;
diff --git a/src/test/regress/expected/fsm.out b/src/test/regress/expected/fsm.out
index 4c44c4bd6e..973b9319e7 100644
--- a/src/test/regress/expected/fsm.out
+++ b/src/test/regress/expected/fsm.out
@@ -2,16 +2,16 @@
 -- Free Space Map test
 --
 CREATE TABLE fsm_check_size (num int, str text);
--- Fill 2 blocks with as many large records as will fit
+-- Fill 3 blocks with as many large records as will fit
 -- No FSM
 INSERT INTO fsm_check_size SELECT g, rpad('', 1024, 'a')
-FROM generate_series(1,7*2) g;
+FROM generate_series(1,7*3) g;
 VACUUM fsm_check_size;
 SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
 pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
  heap_size | fsm_size 
 ---+--
- 16384 |0
+ 24576 |0
 (1 row)
 
 -- Clear some space on block 0
@@ -26,7 +26,7 @@ SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
 pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
  heap_size | fsm_size 
 ---+--
- 16384 |0
+ 24576 |0
 (1 row)
 
 -- Extend table with enough blocks to exceed the FSM threshold
diff --git a/src/test/regress/sql/fsm.sql b/src/test/regress/sql/fsm.sql
index fad99da1c2..a864c7fd88 100644
--- a/src/test/regress/sql/fsm.sql
+++ b/src/test/regress/sql/fsm.sql
@@ -4,10 +4,10 @@
 
 CREATE TABLE fsm_check_size (num int, str text);
 
--- Fill 2 blocks with as many large records as will fit
+-- Fill 3 blocks with as many large records as will fit
 -- No FSM
 INSERT INTO fsm_check_size SELECT g, rpad('', 1024, 'a')
-FROM generate_series(1,7*2) g;
+FROM generate_series(1,7*3) g;
 VACUUM fsm_check_size;
 SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
 pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
@@ -16,7 +16,7 @@ pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
 DELETE FROM fsm_check_size where num <= 5;
 VACUUM fsm_check_size;
 
--- Insert small record in block 1 to set the cached smgr targetBlock
+-- Insert small record in block 2 to set the cached smgr targetBlock
 INSERT INTO fsm_check_size values(99, 'b');
 
 -- Insert large record and make sure it goes in block 0 rather than


Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-07 Thread John Naylor
On 1/6/19, Tom Lane  wrote:
> I've pushed that version (v8 + max_kw_len); if the buildfarm doesn't
> fall over, we can move on with looking at hashing.

Thank you. The API adjustment looks good, and I'm glad that splitting
out the aux info led to even further cleanups.

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-04 Thread John Naylor
On 12/30/18, Andres Freund  wrote:
> I tried to take this for a spin, an for me the build fails because various
> frontend programs don't have KeywordOffsets/Strings defined, but reference
> it
> through various functions exposed to the frontend (like fmtId()).  That I
> see
> that error but you don't is probably related to me using -fuse-ld=gold in
> CFLAGS.
>
> I can "fix" this by including kwlist_d.h in common/keywords.c
> regardless of FRONTEND. That also lead me to discover that the build
> dependencies somewhere aren't correctly set-up, because I need to
> force a clean rebuild to trigger the problem again, just changing
> keywords.c back doesn't trigger the problem.

Hmm, that was a typo, and I didn't notice even when I found I had to
include kwlist_d.h in ecpg/keywords.c. :-(  I've fixed both of those
in the attached v6.

As far as dependencies, I'm far from sure I have it up to par. That
piece could use some discussion.

On 1/4/19, Tom Lane  wrote:
> Aside from the possible linkage problem, this will need a minor rebase
> over 4879a5172, which rearranged some of plpgsql's calls of
> ScanKeywordLookup.
>
> While I don't think it's going to be hard to resolve these issues,
> I'm wondering where we want to go with this.  Is anyone excited
> about pursuing the perfect-hash-function idea?  (Joerg's example
> function looked pretty neat to me.)  If we are going to do that,
> does it make sense to push this version beforehand?

If it does, for v6 I've also done the rebase, updated the copyright
year, and fixed an error in MSVC.

-John Naylor
From fb846eeabd450b6932e57f7e8d4f0aebc125d178 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Fri, 4 Jan 2019 14:35:40 -0500
Subject: [PATCH v6] Use offset-based keyword lookup.

Replace binary search over an array of ScanKeyword structs with a binary
search over an array of offsets into a keyword string. Access auxillary
data only after a keyword hit. This has better locality of reference and
a smaller memory footprint.
---
 .../pg_stat_statements/pg_stat_statements.c   |   2 +
 src/backend/parser/parser.c   |   4 +-
 src/backend/parser/scan.l |  38 ++--
 src/backend/utils/adt/misc.c  |   2 +-
 src/backend/utils/adt/ruleutils.c |   9 +-
 src/common/.gitignore |   1 +
 src/common/Makefile   |  25 ++-
 src/common/keywords.c |  42 +++--
 src/fe_utils/string_utils.c   |   9 +-
 src/include/common/keywords.h |  19 +-
 src/include/parser/kwlist.h   |   2 +-
 src/include/parser/scanner.h  |   8 +-
 src/interfaces/ecpg/preproc/.gitignore|   2 +
 src/interfaces/ecpg/preproc/Makefile  |  20 ++-
 src/interfaces/ecpg/preproc/c_keywords.c  |  65 ++-
 src/interfaces/ecpg/preproc/c_kwlist.h|  53 ++
 src/interfaces/ecpg/preproc/ecpg_keywords.c   |  81 ++---
 src/interfaces/ecpg/preproc/ecpg_kwlist.h |  68 +++
 src/interfaces/ecpg/preproc/keywords.c|   6 +-
 src/interfaces/ecpg/preproc/pgc.l |  22 +--
 src/interfaces/ecpg/preproc/preproc_extern.h  |   8 +-
 src/pl/plpgsql/src/.gitignore |   2 +
 src/pl/plpgsql/src/Makefile   |  16 +-
 src/pl/plpgsql/src/pl_reserved_kwlist.h   |  55 ++
 src/pl/plpgsql/src/pl_scanner.c   | 167 --
 src/pl/plpgsql/src/pl_unreserved_kwlist.h | 111 
 src/tools/gen_keywords.pl | 136 ++
 src/tools/msvc/Solution.pm|  44 +
 src/tools/msvc/clean.bat  |   6 +
 29 files changed, 696 insertions(+), 327 deletions(-)
 create mode 100644 src/common/.gitignore
 create mode 100644 src/interfaces/ecpg/preproc/c_kwlist.h
 create mode 100644 src/interfaces/ecpg/preproc/ecpg_kwlist.h
 create mode 100644 src/pl/plpgsql/src/pl_reserved_kwlist.h
 create mode 100644 src/pl/plpgsql/src/pl_unreserved_kwlist.h
 create mode 100644 src/tools/gen_keywords.pl

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index e8ef966bb5..4776de7595 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -3076,6 +3076,8 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
 	yyscanner = scanner_init(query,
 			 ,
 			 ScanKeywords,
+			 KeywordString,
+			 KeywordOffsets,
 			 NumScanKeywords);
 
 	/* we don't want to re-emit any escape string warnings */
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index 7e9b1222fd..19fa7bb8ae 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -40,8 +40,8 @@ raw_parser(const char *str)
 	int			yyresult;
 
 	/* initialize the flex scanner */
-	yys

Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-04 Thread John Naylor
On 1/3/19, Joerg Sonnenberger  wrote:
> Hello John,
> I was pointed at your patch on IRC and decided to look into adding my
> own pieces. What I can provide you is a fast perfect hash function
> generator.  I've attached a sample hash function based on the current
> main keyword list. hash() essentially gives you the number of the only
> possible match, a final strcmp/memcmp is still necessary to verify that
> it is an actual keyword though. The |0x20 can be dropped if all cases
> have pre-lower-cased the input already. This would replace the binary
> search in the lookup functions. Returning offsets directly would be easy
> as well. That allows writing a single string where each entry is prefixed
> with a type mask, the token id, the length of the keyword and the actual
> keyword text. Does that sound useful to you?

Judging by previous responses, there is still interest in using
perfect hash functions, so thanks for this. I'm not knowledgeable
enough to judge its implementation, so I'll leave that for others.

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-04 Thread John Naylor
On 12/27/18, Tom Lane  wrote:
> If you really are hot about saving that other 440 bytes, the way to
> do it would be to drop the struct entirely and use two parallel
> arrays, an int16[] for value and a char[] (or better uint8[]) for
> category.  Those would be filled by reading kwlist.h twice with
> different definitions for PG_KEYWORD.  Not sure it's worth the
> trouble though --- in particular, not clear that it's a win from
> the standpoint of number of cache lines touched.

Understood. That said, after re-implementing all keyword lookups, I
wondered if there'd be a notational benefit to dropping the struct,
especially since as yet no caller uses both token and category. It
makes pl_scanner.c and its reserved keyword list a bit nicer, and gets
rid of the need to force frontend to have 'zero' token numbers, but
I'm not sure it's a clear win. I've attached a patch (applies on top
of v6), gzipped to avoid confusing the cfbot.

-John Naylor


keyword-nostruct.patch.gz
Description: GNU Zip compressed data


Re: WIP: Avoid creation of the free space map for small tables

2019-01-10 Thread John Naylor
On Wed, Jan 9, 2019 at 10:50 PM Amit Kapila  wrote:
> Thanks, Mithun for performance testing, it really helps us to choose
> the right strategy here.  Once John provides next version, it would be
> good to see the results of regular pgbench (read-write) runs (say at
> 50 and 300 scale factor) and the results of large copy.  I don't think
> there will be any problem, but we should just double check that.

Attached is v12 using the alternating-page strategy. I've updated the
comments and README as needed. In addition, I've

-handled a possible stat() call failure during pg_upgrade
-added one more assertion
-moved the new README material into a separate paragraph
-added a comment to FSMClearLocalMap() about transaction abort
-corrected an outdated comment that erroneously referred to extension
rather than creation
-fleshed out the draft commit messages
From 854ba27740ed26e0d3e89f6228186f2b0914b21e Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Thu, 10 Jan 2019 16:37:57 -0500
Subject: [PATCH v12 1/2] Avoid creation of the free space map for small heap
 relations.

Previously, all heaps had FSMs. For very small tables, this means that
the FSM took up more space than the heap did. This is wasteful, so now
we refrain from creating the FSM for heaps with 4 pages or fewer. If the
last known target block has insufficient space, we still try to insert
into some other page before before giving up and extending the relation,
since doing otherwise leads to table bloat. Testing showed that trying
every page penalized performance slightly, so we compromise and try
every other page. This way, we visit at most two pages. Any pages with
wasted free space become visible at next relation extension, so we still
control table bloat. As a bonus, directly attempting one or two pages
can even be faster than consulting the FSM would have been.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.
---
 contrib/pageinspect/expected/page.out |  77 +++
 contrib/pageinspect/sql/page.sql  |  33 +--
 doc/src/sgml/storage.sgml |  13 +-
 src/backend/access/brin/brin.c|   2 +-
 src/backend/access/brin/brin_pageops.c|  10 +-
 src/backend/access/heap/hio.c |  47 ++--
 src/backend/access/transam/xact.c |  14 ++
 src/backend/commands/vacuumlazy.c |  17 +-
 src/backend/storage/freespace/README  |  13 +-
 src/backend/storage/freespace/freespace.c | 262 +-
 src/backend/storage/freespace/indexfsm.c  |   6 +-
 src/include/storage/freespace.h   |   9 +-
 src/test/regress/expected/fsm.out |  62 +
 src/test/regress/parallel_schedule|   6 +
 src/test/regress/serial_schedule  |   1 +
 src/test/regress/sql/fsm.sql  |  46 
 16 files changed, 516 insertions(+), 102 deletions(-)
 create mode 100644 src/test/regress/expected/fsm.out
 create mode 100644 src/test/regress/sql/fsm.sql

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,1) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 
8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0 
 ---
   8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
- fsm_1 

-  8192
-(1 row)
-
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
+ERROR:  block number 10 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
  vm_0 
 --
  8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test1"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm

Re: unnecessary creation of FSM files during bootstrap mode

2019-01-10 Thread John Naylor
Thought I'd ping...
(sorry for the top post)

On Sat, Dec 15, 2018 at 12:02 AM Amit Kapila  wrote:
>
> As pointed out by John Naylor [1], it seems during bootstrap mode, we
> are always creating FSM files which are not required.  In commit's
> b9d01fe288 and 3908473c80, we have added some code where we allowed
> the creation of files during mdopen even if they didn't exist during
> the bootstrap mode.  The comments in the code say: "During bootstrap,
> there are cases where a system relation will be accessed (by internal
> backend processes) before the bootstrap script nominally creates it."
> I am sure this will be the case when that code is added but is it
> required today?  While going through commit 3908473c80, I came across
> below comment:
>
> - * During bootstrap processing, we skip that check, because pg_time,
> - * pg_variable, and pg_log get created before their .bki file entries
> - * are processed.
> - */
> + fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
>
> The system tables mentioned in above commit are not present today, so
> do we really need that code and even if it is required shall we do it
> only for 'main' or 'init' forks?
>
> Tom, as you are a committer of the commits b9d01fe288 and 3908473c80,
> do you remember anything in this regard?
>
>
> [1] - 
> https://www.postgresql.org/message-id/CAJVSVGVtf%2B-2sQGVyEZJQh15dpVicpFA6BBiPLugaD4oBEaiHg%40mail.gmail.com
>
> --
> With Regards,
> Amit Kapila.
> EnterpriseDB: http://www.enterprisedb.com
>



Re: WIP: Avoid creation of the free space map for small tables

2019-01-09 Thread John Naylor
On Wed, Jan 9, 2019 at 7:33 AM Mithun Cy  wrote:
> 2. v11-Every-other-page and v11-last-page patches improve the
> performance from base.
> 3. IMHO v11-Every-other-page would be ideal to consider it improves
> the performance and also to an extent avoid expansion if space is
> already available.

Good to hear. I'll clean up the every-other-page patch and include it
in my next version.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-09 Thread John Naylor
On Wed, Jan 9, 2019 at 2:04 PM Tom Lane  wrote:
>
> I wrote:
> > John Naylor  writes:
> >> -There is a bit of a cognitive clash between $case_sensitive in
> >> gen_keywordlist.pl and $case_insensitive in PerfectHash.pm. They each
> >> make sense in their own file, but might it be worth using one or the
> >> other?

> Working on the fmgr-oid-lookup idea gave me the thought that
> PerfectHash.pm ought to support fixed-length keys.  Rather than start
> adding random parameters to the function, I borrowed an idea from
> PostgresNode.pm and made the options be keyword-style parameters.  Now
> the impedance mismatch about case sensitivity is handled with
>
> my $f = PerfectHash::generate_hash_function(\@keywords, $funcname,
> case_insensitive => !$case_sensitive);
>
> which is at least a little clearer than before, though I'm not sure
> if it entirely solves the problem.

It's a bit clearer, but thinking about this some more, it makes sense
for gen_keywordlist.pl to use $case_insensitive, because right now
every instance of the var is "!$case_sensitive". In the attached (on
top of v4), I change the command line option to --citext, and add the
ability to negate it within the option, as '--no-citext'. It's kind of
a double negative for the C-keywords invocation, but we can have the
option for both cases, so we don't need to worry about what the
default is (which is case_insensitive=1).

--
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/common/Makefile b/src/common/Makefile
index d0c2b970eb..5f9327de59 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -124,7 +124,7 @@ libpgcommon_srv.a: $(OBJS_SRV)
 
 # generate SQL keyword lookup table to be included into keywords*.o.
 kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS)
-	$(GEN_KEYWORDLIST) --extern $<
+	$(GEN_KEYWORDLIST) --extern --citext $<
 
 # Dependencies of keywords*.o need to be managed explicitly to make sure
 # that you don't get broken parsing code, even in a non-enable-depend build.
diff --git a/src/interfaces/ecpg/preproc/Makefile b/src/interfaces/ecpg/preproc/Makefile
index 6c02f97622..96dd76317f 100644
--- a/src/interfaces/ecpg/preproc/Makefile
+++ b/src/interfaces/ecpg/preproc/Makefile
@@ -60,10 +60,10 @@ preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg.
 
 # generate keyword headers
 c_kwlist_d.h: c_kwlist.h $(GEN_KEYWORDLIST_DEPS)
-	$(GEN_KEYWORDLIST) --varname ScanCKeywords --case $<
+	$(GEN_KEYWORDLIST) --varname ScanCKeywords --no-citext  $<
 
 ecpg_kwlist_d.h: ecpg_kwlist.h $(GEN_KEYWORDLIST_DEPS)
-	$(GEN_KEYWORDLIST) --varname ScanECPGKeywords $<
+	$(GEN_KEYWORDLIST) --varname ScanECPGKeywords --citext $<
 
 # Force these dependencies to be known even without dependency info built:
 ecpg_keywords.o c_keywords.o keywords.o preproc.o pgc.o parser.o: preproc.h
diff --git a/src/pl/plpgsql/src/Makefile b/src/pl/plpgsql/src/Makefile
index 8a0f294323..bc4ee50682 100644
--- a/src/pl/plpgsql/src/Makefile
+++ b/src/pl/plpgsql/src/Makefile
@@ -80,10 +80,10 @@ plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.p
 
 # generate keyword headers for the scanner
 pl_reserved_kwlist_d.h: pl_reserved_kwlist.h $(GEN_KEYWORDLIST_DEPS)
-	$(GEN_KEYWORDLIST) --varname ReservedPLKeywords $<
+	$(GEN_KEYWORDLIST) --varname ReservedPLKeywords --citext $<
 
 pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h $(GEN_KEYWORDLIST_DEPS)
-	$(GEN_KEYWORDLIST) --varname UnreservedPLKeywords $<
+	$(GEN_KEYWORDLIST) --varname UnreservedPLKeywords --citext $<
 
 
 check: submake
diff --git a/src/tools/gen_keywordlist.pl b/src/tools/gen_keywordlist.pl
index 2744e1dc26..927511d7c4 100644
--- a/src/tools/gen_keywordlist.pl
+++ b/src/tools/gen_keywordlist.pl
@@ -35,13 +35,13 @@ use PerfectHash;
 
 my $output_path = '';
 my $extern = 0;
-my $case_sensitive = 0;
+my $case_insensitive = 1;
 my $varname = 'ScanKeywords';
 
 GetOptions(
 	'output:s'   => \$output_path,
 	'extern' => \$extern,
-	'case-sensitive' => \$case_sensitive,
+	'citext!'=> \$case_insensitive,
 	'varname:s'  => \$varname) || usage();
 
 my $kw_input_file = shift @ARGV || die "No input file.\n";
@@ -97,7 +97,7 @@ while (<$kif>)
 }
 
 # When being case-insensitive, insist that the input be all-lower-case.
-if (!$case_sensitive)
+if ($case_insensitive)
 {
 	foreach my $kw (@keywords)
 	{
@@ -157,7 +157,7 @@ printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords;
 my $funcname = $varname . "_hash_func";
 
 my $f = PerfectHash::generate_hash_function(\@keywords, $funcname,
-	case_insensitive => !$case_sensitive);
+	case_insensitive => $case_insensitive);
 
 printf $kwdef qq|static %s\n|, $f

Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-09 Thread John Naylor
On Tue, Jan 8, 2019 at 5:31 PM Tom Lane  wrote:
>
> John Naylor  writes:
> > In the committed keyword patch, I noticed that in common/keywords.c,
> > the array length is defined with
> > ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS]
> > but other keyword arrays just have ...[]. Is there a reason for the 
> > difference?
>
> The length macro was readily available there so I used it.  AFAIR
> that wasn't true elsewhere, though I might've missed something.
> It's pretty much just belt-and-suspenders coding anyway, since all
> those arrays are machine generated ...

I tried using the available num_keywords macro in plpgsql and it
worked fine, but it makes the lines really long. Alternatively, as in
the attached, we could remove the single use of the core macro and
maybe add comments to the generated magic numbers.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/common/keywords.c b/src/common/keywords.c
index 84f779feb9..edec4a5094 100644
--- a/src/common/keywords.c
+++ b/src/common/keywords.c
@@ -26,7 +26,7 @@
 
 #define PG_KEYWORD(kwname, value, category) category,
 
-const uint8 ScanKeywordCategories[SCANKEYWORDS_NUM_KEYWORDS] = {
+const uint8 ScanKeywordCategories[] = {
 #include "parser/kwlist.h"
 };
 
diff --git a/src/tools/gen_keywordlist.pl b/src/tools/gen_keywordlist.pl
index 927511d7c4..8c4c5da828 100644
--- a/src/tools/gen_keywordlist.pl
+++ b/src/tools/gen_keywordlist.pl
@@ -147,11 +147,6 @@ foreach my $name (@keywords)
 
 print $kwdef "};\n\n";
 
-# Emit a macro defining the number of keywords.
-# (In some places it's useful to have access to that as a constant.)
-
-printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords;
-
 # Emit the definition of the hash function.
 
 my $funcname = $varname . "_hash_func";
@@ -168,8 +163,8 @@ printf $kwdef "const ScanKeywordList %s = {\n", $varname;
 printf $kwdef qq|\t%s_kw_string,\n|, $varname;
 printf $kwdef qq|\t%s_kw_offsets,\n|, $varname;
 printf $kwdef qq|\t%s,\n|, $funcname;
-printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname;
-printf $kwdef qq|\t%d\n|, $max_len;
+printf $kwdef qq|\t%d,\t/* number of keywords */\n|, scalar @keywords;
+printf $kwdef qq|\t%d\t/* max keyword length */\n|, $max_len;
 printf $kwdef "};\n\n";
 
 printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename;


Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2019-01-09 Thread John Naylor
On Wed, Jan 9, 2019 at 2:44 PM Tom Lane  wrote:
> [patch to shrink oid index]

It would help maintaining its newfound sveltness if we warned if a
higher oid was assigned, as in the attached. I used 6200 as a soft
limit, but that could be anything similiar.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index e7ead7dcc0..201343c2f6 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -74,6 +74,7 @@ my %catalog_data;
 my @toast_decls;
 my @index_decls;
 my %oidcounts;
+my $max_proc_oid = 6200;
 
 foreach my $header (@input_files)
 {
@@ -230,6 +231,13 @@ foreach my $row (@{ $catalog_data{pg_opfamily} })
 my %procoids;
 foreach my $row (@{ $catalog_data{pg_proc} })
 {
+	# Warn if a large OID has been newly assigned.
+	if ($row->{oid} > $max_proc_oid)
+	{
+		warn sprintf "Consider using an OID smaller than %d for %s (see commit 8ff5f824dca75)\n",
+			$max_proc_oid, $row->{proname};
+	}
+
 	# Generate an entry under just the proname (corresponds to regproc lookup)
 	my $prokey = $row->{proname};
 	if (defined $procoids{$prokey})


automatically assigning catalog toast oids

2018-12-08 Thread John Naylor
Commit 96cdeae07 added toast tables to most catalogs. One disadvantage
is that the toast declarations require hard-coded oids, even though
only shared catalogs actually need stable oids. Now that we can assign
oids on the fly, it makes sense to do so for toast tables as well, as
in the attached.

-John Naylor
 src/backend/catalog/genbki.pl  | 26 ++
 src/include/catalog/toasting.h | 32 ++--
 2 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index edc8ea9f53..f4d7160c6a 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -285,6 +285,7 @@ my @tables_needing_macros;
 foreach my $catname (@catnames)
 {
 	my $catalog = $catalogs{$catname};
+	my $has_toastable_type = 0;
 
 	# Create one definition header with macro definitions for each catalog.
 	my $def_file = $output_path . $catname . '_d.h';
@@ -345,6 +346,12 @@ EOM
 		# Build hash of column names for use later
 		$attnames{$attname} = 1;
 
+		# Flag catalogs with toastable datatypes.
+		if ($types{$atttype}->{typstorage} eq 'x')
+		{
+			$has_toastable_type = 1;
+		}
+
 		# Emit column definitions
 		if (!$first)
 		{
@@ -506,6 +513,25 @@ EOM
 		}
 	}
 
+	# Create toast declarations for normal catalogs.  To prevent
+	# circular dependencies and to avoid adding complexity to VACUUM
+	# FULL logic, exclude pg_class, pg_attribute, and pg_index.  Also,
+	# to prevent pg_upgrade from seeing a non-empty new cluster, exclude
+	# pg_largeobject and pg_largeobject_metadata from the set as large
+	# object data is handled as user data.  Those relations have no reason
+	# to use a toast table anyway.  Toast tables and indexes for shared
+	# catalogs need stable oids, so must be specified in toasting.h.
+	if ($has_toastable_type and !$catalog->{shared_relation}
+		and $catname ne 'pg_class'
+		and $catname ne 'pg_attribute'
+		and $catname ne 'pg_index'
+		and $catname ne 'pg_largeobject'
+		and $catname ne 'pg_largeobject_metadata')
+	{
+		push @toast_decls, sprintf "declare toast %s %s on %s\n",
+		  $maxoid++, $maxoid++, $catname;
+	}
+
 	print $bki "close $catname\n";
 	printf $def "\n#endif\t\t\t\t\t\t\t/* %s_D_H */\n", uc $catname;
 
diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h
index f259890e43..1c389c685d 100644
--- a/src/include/catalog/toasting.h
+++ b/src/include/catalog/toasting.h
@@ -42,37 +42,9 @@ extern void BootstrapToastTable(char *relName,
  * the OID to assign to the toast table, and the OID to assign to the
  * toast table's index.  The reason we hard-wire these OIDs is that we
  * need stable OIDs for shared relations, and that includes toast tables
- * of shared relations.
+ * of shared relations.  Toast table commands for normal catalogs are
+ * generated by genbki.pl, and oids for those are assigned on the fly.
  */
-
-/* normal catalogs */
-DECLARE_TOAST(pg_aggregate, 4159, 4160);
-DECLARE_TOAST(pg_attrdef, 2830, 2831);
-DECLARE_TOAST(pg_collation, 4161, 4162);
-DECLARE_TOAST(pg_constraint, 2832, 2833);
-DECLARE_TOAST(pg_default_acl, 4143, 4144);
-DECLARE_TOAST(pg_description, 2834, 2835);
-DECLARE_TOAST(pg_event_trigger, 4145, 4146);
-DECLARE_TOAST(pg_extension, 4147, 4148);
-DECLARE_TOAST(pg_foreign_data_wrapper, 4149, 4150);
-DECLARE_TOAST(pg_foreign_server, 4151, 4152);
-DECLARE_TOAST(pg_foreign_table, 4153, 4154);
-DECLARE_TOAST(pg_init_privs, 4155, 4156);
-DECLARE_TOAST(pg_language, 4157, 4158);
-DECLARE_TOAST(pg_namespace, 4163, 4164);
-DECLARE_TOAST(pg_partitioned_table, 4165, 4166);
-DECLARE_TOAST(pg_policy, 4167, 4168);
-DECLARE_TOAST(pg_proc, 2836, 2837);
-DECLARE_TOAST(pg_rewrite, 2838, 2839);
-DECLARE_TOAST(pg_seclabel, 3598, 3599);
-DECLARE_TOAST(pg_statistic, 2840, 2841);
-DECLARE_TOAST(pg_statistic_ext, 3439, 3440);
-DECLARE_TOAST(pg_trigger, 2336, 2337);
-DECLARE_TOAST(pg_ts_dict, 4169, 4170);
-DECLARE_TOAST(pg_type, 4171, 4172);
-DECLARE_TOAST(pg_user_mapping, 4173, 4174);
-
-/* shared catalogs */
 DECLARE_TOAST(pg_authid, 4175, 4176);
 #define PgAuthidToastTable 4175
 #define PgAuthidToastIndex 4176


Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-19 Thread John Naylor
On 12/19/18, Andrew Gierth  wrote:
> Is there any particular reason not to go further and use a perfect hash
> function for the lookup, rather than binary search?

When I was investigating faster algorithms, I ruled out gperf based on
discussions in the archives. The approach here has modest goals and
shouldn't be too invasive.

With the makefile support and separate keyword files in place, that'll
be one less thing to do if we ever decide to replace binary search.
The giant string will likely be useful as well.

Since we're on the subject, I think some kind of trie would be ideal
performance-wise, but a large amount of work. The nice thing about a
trie is that it can be faster then a hash table for a key miss. I
found a paper that described some space-efficient trie variations [1],
but we'd likely have to code the algorithm and a way to emit a C code
representation of it. I've found some libraries, but that would have
more of the same difficulties in practicality that gperf had.

[1] https://infoscience.epfl.ch/record/64394/files/triesearches.pdf

-John Naylor



Re: reducing the footprint of ScanKeyword (was Re: Large writable variables)

2018-12-19 Thread John Naylor
On 12/18/18, Tom Lane  wrote:
> I'd be kind of inclined to convert all uses of ScanKeyword to the new way,
> if only for consistency's sake.  On the other hand, I'm not the one
> volunteering to do the work.

That's reasonable, as long as the design is nailed down first. Along
those lines, attached is a heavily WIP patch that only touches plpgsql
unreserved keywords, to test out the new methodology in a limited
area. After settling APIs and name/directory bikeshedding, I'll move
on to the other four keyword types.

There's a new Perl script, src/common/gen_keywords.pl, which takes
pl_unreserved_kwlist.h as input and outputs
pl_unreserved_kwlist_offset.h and pl_unreserved_kwlist_string.h. The
output headers are not installed or symlinked anywhere. Since the
input keyword lists will never be #included directly, they might be
better as .txt files, like errcodes.txt. If we went that far, we might
also remove the PG_KEYWORD macros (they'd still be in the output
files) and rename parser/kwlist.h to common/core_kwlist.txt. There's
also a case for not changing things unnecessarily, especially if
there's ever a new reason to include the base keyword list directly.

To keep the other keyword types functional, I had to add a separate
new struct ScanKeywordOffset and new function
ScanKeywordLookupOffset(), so the patch is a bit messier than the
final will be. With a 4-byte offset, ScankeyWordOffset is 8 bytes,
down from 12, and is now a power of 2.

I used the global .gitignore, but maybe that's an abuse of it.

Make check passes, but I don't know how well it stresses keyword use.
I'll create a commitfest entry soon.

-John Naylor
 .gitignore|   2 +
 src/common/gen_keywords.pl| 151 ++
 src/common/keywords.c |  55 +++
 src/include/common/keywords.h |  14 +++
 src/pl/plpgsql/src/Makefile   |  13 ++-
 src/pl/plpgsql/src/pl_scanner.c   | 108 -
 src/pl/plpgsql/src/pl_unreserved_kwlist.h | 107 +
 7 files changed, 355 insertions(+), 95 deletions(-)

diff --git a/.gitignore b/.gitignore
index 794e35b73c..8a9a05c20a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,8 @@ win32ver.rc
 *.exe
 lib*dll.def
 lib*.pc
+*kwlist_offset.h
+*kwlist_string.h
 
 # Local excludes in root directory
 /GNUmakefile
diff --git a/src/common/gen_keywords.pl b/src/common/gen_keywords.pl
new file mode 100644
index 00..a8a8576d43
--- /dev/null
+++ b/src/common/gen_keywords.pl
@@ -0,0 +1,151 @@
+#--
+#
+# gen_keywords.pl
+#	Perl script that generates *_offset.h and *_string.h from a given
+#	keyword list file.  These headers are then included into files that
+#	call ScanKeywordLookup() on that keyword list.  The keyword name is
+#	is represented as an offset into a single string.
+#
+# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/commen/gen_keywords.pl
+#
+#--
+
+
+my $output_path = '';
+my $kw_input_file;
+
+# Process command line switches.
+while (@ARGV)
+{
+	my $arg = shift @ARGV;
+	if ($arg !~ /^-/)
+	{
+		$kw_input_file = $arg;
+	}
+	elsif ($arg =~ /^-o/)
+	{
+		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
+	}
+	else
+	{
+		usage();
+	}
+}
+
+
+# Make sure output_path ends in a slash.
+if ($output_path ne '' && substr($output_path, -1) ne '/')
+{
+	$output_path .= '/';
+}
+
+$kw_input_file =~ /(\w*kwlist)\.h/;
+my $base_filename = $1;
+
+my $kw_offset_file = $output_path . $base_filename . '_offset.h';
+my $kw_string_file = $output_path . $base_filename . '_string.h';
+
+open(my $kif, '<', $kw_input_file)  || die "$kw_input_file: $!";
+open(my $kof, '>', $kw_offset_file) || die "$kw_offset_file: $!";
+open(my $ksf, '>', $kw_string_file) || die "$kw_string_file: $!";
+
+# Opening boilerplate for keyword offset header.
+printf $kof <)
+{
+	if (/^PG_KEYWORD\("(\w+)",\s+(\w+),\s+(\w+)\)/)
+	{
+		$name = $1;
+		$value = $2;
+		$category = $3;
+
+		push @keywords, $name;
+
+		# Emit ScanKeyword macros with numerical offsets instead of text.
+		print $kof "PG_KEYWORD($offset, $value, $category)\n";
+
+		# Calculate the cumulative offset of the next keyword,
+		# taking into account the null terminator.
+		$offset += length($name) +1;
+	}
+}
+
+# TODO: Error out if the keyword names are not in ASCII order.
+
+printf $ksf qq|const char *%s_string =\n"|, $base_filename;
+print  $ksf join qq|\\0"\n"|, @keywords;
+printf $ksf qq|";\n\n#endif\t\t\t\t\t\t\t/* %s_STRING_H */\n|, uc $base_filename;
+
+
+sub usage
+{
+	die <= NAMEDATALEN)
+		return NULL;
+
+	/*
+	 * Apply an ASCII-only downcasing.  We must not use to

Re: Delay locking partitions during INSERT and UPDATE

2019-01-24 Thread John Naylor
On Wed, Jan 23, 2019 at 7:56 PM David Rowley
 wrote:
> On Thu, 24 Jan 2019 at 13:38, John Naylor  wrote:
> > Hmm, now instead of an 85x speedup over master in the 10k partition
> > case, I only get 20x. Anyone else see this?
>
> What's it like with fsync off?

No change. Just in case, I tried git bisect and also recreated the
cluster, but never got the same performance as my first test, so not
sure what happened.
--
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: Delay locking partitions during INSERT and UPDATE

2019-01-24 Thread John Naylor
On Thu, Jan 24, 2019 at 4:17 PM Tomas Vondra
 wrote:
> I can still see about the same performance as before (on both clusters).

Thanks for checking! I think the thing to do now is have a committer
look at it. It's a small patch with obvious performance effects --
there's just the question of whether the locking behavior with
concurrent DDL is as safe as it is now.

Anyone have anything else?

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-24 Thread John Naylor
On Wed, Jan 23, 2019 at 11:17 PM Amit Kapila  wrote:
>
> On Thu, Jan 24, 2019 at 3:39 AM John Naylor  
> wrote:
> > mkdir -p data1 data2 standby
> >
> > echo 'heap' > data1/foo
> > echo 'fsm' > data1/foo_fsm
> >
> > # simulate streaming replication
> > rsync --archive data1 standby
> >
> > # simulate pg_upgrade, skipping FSM
> > ln data1/foo -t data2/
> >
> > rsync --archive --delete --hard-links --size-only --no-inc-recursive
> > data1 data2 standby
> >
> > # result
> > ls standby/data1
> > ls standby/data2
> >
> >
> > The result is that foo_fsm is not copied to standby/data2, contrary to
> > what the docs above imply for other unlinked files. Can anyone shed
> > light on this?
> >
>
> Is foo_fsm present in standby/data1?

Yes it is.

> I think what doc means to say is
> that it copies any unlinked files present in primary's new cluster
> (which in your case will be data2).

In that case, I'm still confused why that doc says, "Unfortunately,
rsync needlessly copies files associated with temporary and unlogged
tables because these files don't normally exist on standby servers."
I fail to see why the primary's new cluster would have these if they
weren't linked. And in the case we're discussing here, the skipped
FSMs won't be on data2, so won't end up in standby/data2.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: Delay locking partitions during INSERT and UPDATE

2019-01-22 Thread John Naylor
The cfbot reports this patch no longer applies.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-27 Thread John Naylor
On Sat, Jan 26, 2019 at 2:14 PM Amit Kapila  wrote:
>
> On Sat, Jan 26, 2019 at 5:05 AM John Naylor  
> wrote:
> >
> > So, in v19 we check pg_class.relpages and if it's
> > a heap and less than or equal the threshold we call stat on the 0th
> > segment to verify.
> >
>
> Okay, but the way logic is implemented appears clumsy to me.

> The function transfer_relfile has no clue about skipping of FSM stuff,
> but it contains comments about it.

Yeah, I wasn't entirely happy with how that turned out.

> I think there is some value in using the information from
> this function to skip fsm files, but the code doesn't appear to fit
> well, how about moving this check to new function
> new_cluster_needs_fsm()?

For v21, new_cluster_needs_fsm() has all responsibility for obtaining
the info it needs. I think this is much cleaner, but there is a small
bit of code duplication since it now has to form the file name. One
thing we could do is form the the base old/new file names in
transfer_single_new_db() and pass those to transfer_relfile(), which
will only add suffixes and segment numbers. We could then pass the
base old file name to new_cluster_needs_fsm() and use it as is. Not
sure if that's worthwhile, though.

> The order in which relkind and relpages is used in the above code is
> different from the order in which it is mentioned in the query, it
> won't matter, but keeping in order will make look code consistent.  I
> have made this and some more minor code adjustments in the attached
> patch.  If you like those, you can include them in the next version of
> your patch.

Okay, done.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 2b70b65f43c05234b81e939b0ed49e34b97d5667 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sun, 27 Jan 2019 21:42:07 +0100
Subject: [PATCH v21 3/3] During pg_upgrade, conditionally skip transfer of
 FSMs.

If a heap on the old cluster has 4 pages or fewer, and the old cluster
was PG v11 or earlier, don't copy or link the FSM. This will shrink
space usage for installations with large numbers of small tables.
---
 src/bin/pg_upgrade/info.c| 16 +++--
 src/bin/pg_upgrade/pg_upgrade.h  |  6 
 src/bin/pg_upgrade/relfilenode.c | 58 ++--
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 2f925f086c..902bfc647e 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
+	map->relpages = old_rel->relpages;
+	map->relkind = old_rel->relkind;
 
 	/*
 	 * old_relfilenode might differ from pg_class.oid (and hence
@@ -418,6 +420,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	char	   *nspname = NULL;
 	char	   *relname = NULL;
 	char	   *tablespace = NULL;
+	char	   *relkind = NULL;
 	int			i_spclocation,
 i_nspname,
 i_relname,
@@ -425,7 +428,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 i_indtable,
 i_toastheap,
 i_relfilenode,
-i_reltablespace;
+i_reltablespace,
+i_relpages,
+i_relkind;
 	char		query[QUERY_ALLOC];
 	char	   *last_namespace = NULL,
 			   *last_tablespace = NULL;
@@ -494,7 +499,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	 */
 	snprintf(query + strlen(query), sizeof(query) - strlen(query),
 			 "SELECT all_rels.*, n.nspname, c.relname, "
-			 "  c.relfilenode, c.reltablespace, %s "
+			 "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
 			 "FROM (SELECT * FROM regular_heap "
 			 "  UNION ALL "
 			 "  SELECT * FROM toast_heap "
@@ -525,6 +530,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	i_relname = PQfnumber(res, "relname");
 	i_relfilenode = PQfnumber(res, "relfilenode");
 	i_reltablespace = PQfnumber(res, "reltablespace");
+	i_relpages = PQfnumber(res, "relpages");
+	i_relkind = PQfnumber(res, "relkind");
 	i_spclocation = PQfnumber(res, "spclocation");
 
 	for (relnum = 0; relnum < ntups; relnum++)
@@ -556,6 +563,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 		curr->relname = pg_strdup(relname);
 
 		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
+		curr->relpages = atoi(PQgetvalue(res, relnum, i_relpages));
+
+		relkind = PQgetvalue(res, relnum, i_relkind);
+		curr->relkind = relkind[0];
+
 		curr->tblsp_alloc = false;
 
 		/* Is the tablespace oid non-default? */
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 2f67eee22b..baeb8ff0f8 100644
--- a/src/bin/pg_upgrade/pg_upgrade

Re: WIP: Avoid creation of the free space map for small tables

2019-01-27 Thread John Naylor
On Mon, Jan 28, 2019 at 3:53 AM Amit Kapila  wrote:
> On Thu, Jan 24, 2019 at 9:14 AM Amit Kapila  wrote:
> > Sure, apart from this I have run pgindent on the patches and make some
> > changes accordingly.  Latest patches attached (only second patch has
> > some changes).  I will take one more pass on Monday morning (28th Jan)
> > and will commit unless you or others see any problem.
>
> Pushed these two patches.

Thank you for your input and detailed review! Thank you Mithun for testing!

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-01-27 Thread John Naylor
On Mon, Jan 28, 2019 at 4:53 AM Amit Kapila  wrote:
> There are a few buildfarm failures due to this commit, see my email on
> pgsql-committers.  If you have time, you can also once look into
> those.

I didn't see anything in common with the configs of the failed
members. None have a non-default BLCKSZ that I can see.

Looking at this typical example from woodlouse:

== pgsql.build/src/test/regress/regression.diffs
==
--- C:/buildfarm/buildenv/HEAD/pgsql.build/src/test/regress/expected/fsm.out
2019-01-28 04:43:09.031456700 +0100
+++ C:/buildfarm/buildenv/HEAD/pgsql.build/src/test/regress/results/fsm.out
2019-01-28 05:06:20.351100400 +0100
@@ -26,7 +26,7 @@
 pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
  heap_size | fsm_size
 ---+--
- 24576 |0
+ 32768 |0
 (1 row)

***It seems like the relation extended when the new records should
have gone into block 0.

 -- Extend table with enough blocks to exceed the FSM threshold
@@ -56,7 +56,7 @@
 SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
  fsm_size
 --
-16384
+24576
 (1 row)

***And here it seems vacuum didn't truncate the FSM. I wonder if the
heap didn't get truncated either.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: pgsql: Avoid creation of the free space map for small heap relations.

2019-01-27 Thread John Naylor
On Mon, Jan 28, 2019 at 6:58 AM Amit Kapila  wrote:
>
> On Mon, Jan 28, 2019 at 11:10 AM Andrew Gierth
> you just need to create free space on a page that didn't have enough
> > before? It might be worth tweaking the fillfactor rather than trying to
> > delete anything.
> >
>
> No, it also depends on vacuum to remove rows and then truncate the
> relation accordingly.

That particular test could be removed -- it's just verifying behavior
that's already been there for years and is a direct consquence of
normal truncation combined with the addressing scheme of the FSM
logic.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: [HACKERS] PATCH: multivariate histograms and MCV lists

2019-03-30 Thread John Naylor
I believe I found a typo in mcv.c, fix attached.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


mcv-comment-fix.patch
Description: Binary data


Re: pgsql: Avoid creation of the free space map for small heap relations, t

2019-02-25 Thread John Naylor
On Tue, Feb 26, 2019 at 8:59 AM Amit Kapila  wrote:
> I will look into it today and respond back with my findings.  John,
> see if you also get time to investigate this.

Will do, but it will likely be tomorrow

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-02-20 Thread John Naylor
On Thu, Feb 21, 2019 at 7:58 AM Amit Kapila  wrote:
> So here you are inserting 4-byte integer and 1024-bytes variable
> length record.  So the tuple length will be tuple_header (24-bytes) +
> 4-bytes for integer + 4-bytes header for variable length data + 1024
> bytes of actual data.  So, the length will be 1056 which is already
> MAXALIGN.  I took the new comments added in your latest version of the
> patch and added them to the previous version of the patch.   Kindly
> see if I have not missed anything while merging the patch-versions?

OK, that makes sense. Looks fine to me.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: pgsql: Avoid creation of the free space map for small heap relations, t

2019-02-26 Thread John Naylor
On Wed, Feb 27, 2019 at 5:06 AM Amit Kapila  wrote:
> I have tried this test many times (more than 1000 times) by varying
> thread count, but couldn't reproduce it.  My colleague, Kuntal has
> tried a similar test overnight, but the issue didn't reproduce which
> is not surprising to me seeing the nature of the problem.  As I could
> reproduce it via the debugger, I think we can go ahead with the fix.
> I have improved the comments in the attached patch and I have also
> tried to address Tom's concern w.r.t comments by adding additional
> comments atop of data-structure used to maintain the local map.

The flaw in my thinking was treating extension too similarly too
finding an existing block. With your patch clearing the local map in
the correct place, it seems the call at hio.c:682 is now superfluous?
It wasn't sufficient before, so should this call be removed so as not
to mislead? Or maybe keep it to be safe, with a comment like "This
should already be cleared by now, but make sure it is."

+ * To maintain good performance, we only mark every other page as available
+ * in this map.

I think this implementation detail is not needed to comment on the
struct. I think the pointer to the README is sufficient.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: pgsql: Avoid creation of the free space map for small heap relations, t

2019-02-26 Thread John Naylor
On Tue, Feb 26, 2019 at 10:28 AM Amit Kapila  wrote:
> John, others, can you review my findings and patch?

I can confirm your findings with your debugging steps. Nice work!

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-03-06 Thread John Naylor
On Fri, Jan 25, 2019 at 9:50 AM Amit Kapila  wrote:
> Once we agree on the code, we need to test below scenarios:
> (a) upgrade from all supported versions to the latest version
> (b) upgrade standby with and without using rsync.

Although the code hasn't been reviewed yet, I went ahead and tested
(a) on v21 of the pg_upgrade patch [1]. To do this I dumped out a 9.4
instance with the regression database and restored it to all supported
versions. To make it work with pg_upgrade, I first had to drop tables
with oids, drop functions referring to C libraries, and drop the
later-removed '=>' operator. Then I pg_upgrade'd in copy mode from all
versions to HEAD with the patch applied. pg_upgrade worked without
error, and the following query returned 0 bytes on all the new
clusters:

select sum(pg_relation_size(oid, 'fsm')) as total_fsm_size
from pg_class where relkind in ('r', 't')
and pg_relation_size(oid, 'main') <= 4 * 8192;

The complementary query (> 4 * 8192) returned the same number of bytes
as in the original 9.4 instance.

To make it easy to find, the latest regression test patch, which is
intended to be independent of block-size, was in [2].

[1] 
https://www.postgresql.org/message-id/CACPNZCu4cOdm3uGnNEGXivy7Gz8UWyQjynDpdkPGabQ18_zK6g%40mail.gmail.com

[2] 
https://www.postgresql.org/message-id/CACPNZCsWa%3Ddd0K%2BFiODwM%3DLEsepAHVJCoSx2Avew%3DxBEX3Ywjw%40mail.gmail.com

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: Why don't we have a small reserved OID range for patch revisions?

2019-02-23 Thread John Naylor
I wrote:

> Along those lines, here's a draft patch to do just that. It handles
> array type oids as well. Run it like this:
>
> perl  reformat_dat_file.pl  --map-from 9000  --map-to 2000  *.dat
>
> There is some attempt at documentation. So far it doesn't map by
> default, but that could be changed if we agreed on the convention of
> 9000 or whatever.

In case we don't want to lose track of this, I added it to the March
commitfest with a target of v13. (I didn't see a way to  add it to the
July commitfest)

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: pgsql: Avoid creation of the free space map for small heap relations, t

2019-02-27 Thread John Naylor
On Thu, Feb 28, 2019 at 7:24 AM Amit Kapila  wrote:
> > The flaw in my thinking was treating extension too similarly too
> > finding an existing block. With your patch clearing the local map in
> > the correct place, it seems the call at hio.c:682 is now superfluous?
>
> What if get some valid block from the first call to
> GetPageWithFreeSpace via local map which has required space?  I think
> in that case we will need the call at hio.c:682.  Am I missing
> something?

Are you referring to the call at line 393? Then the map will be
cleared on line 507 before we return that buffer.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: pgsql: Avoid creation of the free space map for small heap relations, t

2019-02-27 Thread John Naylor
On Thu, Feb 28, 2019 at 10:25 AM Amit Kapila  wrote:
>
> Here's an updated patch based on comments by you.  I will proceed with
> this unless you have any more comments.

Looks good to me. I would just adjust the grammar in the comment, from
"This prevents us to use the map", to "This prevents us from using the
map". Perhaps also a comma after "first".

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-02-22 Thread John Naylor
On Thu, Feb 21, 2019 at 9:27 PM Alvaro Herrera  wrote:
>
> I think this test is going to break on nonstandard block sizes.  While
> we don't promise that all tests work on such installs (particularly
> planner ones), it seems fairly easy to cope with this one -- just use a
> record size expressed as a fraction of current_setting('block_size').
> So instead of "1024" you'd write current_setting('block_size') / 8.
> And then display the relation size in terms of pages, not bytes, so
> divide pg_relation_size by block size.

I've done this for v6, tested on 16k block size.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 157f74155beb53c455b0a85b6eebdd22a60ce5f5 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sat, 23 Feb 2019 08:48:31 +0100
Subject: [PATCH v6] Add more tests for FSM.

In commit b0eaa4c51bb, we left out a test that used a vacuum to remove dead
rows as the behavior of test was not predictable.  This test has been
rewritten to use fillfactor instead to control free space.  Since we no
longer need to remove dead rows as part of the test, put the fsm regression
test in a parallel group.
---
 src/test/regress/expected/fsm.out  | 59 +-
 src/test/regress/parallel_schedule |  8 +---
 src/test/regress/serial_schedule   |  2 +-
 src/test/regress/sql/fsm.sql   | 41 +
 4 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/src/test/regress/expected/fsm.out b/src/test/regress/expected/fsm.out
index b02993188c..698d4b0be5 100644
--- a/src/test/regress/expected/fsm.out
+++ b/src/test/regress/expected/fsm.out
@@ -1,15 +1,40 @@
 --
 -- Free Space Map test
 --
+SELECT current_setting('block_size')::integer AS blocksize,
+current_setting('block_size')::integer / 8 AS strsize
+\gset
 CREATE TABLE fsm_check_size (num int, str text);
--- With one block, there should be no FSM
-INSERT INTO fsm_check_size VALUES(1, 'a');
+-- Fill 3 blocks with one record each
+ALTER TABLE fsm_check_size SET (fillfactor=15);
+INSERT INTO fsm_check_size SELECT i, rpad('', :strsize, 'a')
+FROM generate_series(1,3) i;
+-- There should be no FSM
 VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'main') AS heap_size,
-pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
- heap_size | fsm_size 
+--
-  8192 |0
+SELECT pg_relation_size('fsm_check_size', 'main') / :blocksize AS heap_nblocks,
+pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
+ heap_nblocks | fsm_nblocks 
+--+-
+3 |   0
+(1 row)
+
+-- The following operations are for testing the functionality of the local
+-- in-memory map. In particular, we want to be able to insert into some
+-- other block than the one at the end of the heap, without using a FSM.
+-- Fill most of the last block
+ALTER TABLE fsm_check_size SET (fillfactor=100);
+INSERT INTO fsm_check_size SELECT i, rpad('', :strsize, 'a')
+FROM generate_series(101,105) i;
+-- Make sure records can go into any block but the last one
+ALTER TABLE fsm_check_size SET (fillfactor=30);
+-- Insert large record and make sure it does not cause the relation to extend
+INSERT INTO fsm_check_size VALUES (111, rpad('', :strsize, 'a'));
+VACUUM fsm_check_size;
+SELECT pg_relation_size('fsm_check_size', 'main') / :blocksize AS heap_nblocks,
+pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
+ heap_nblocks | fsm_nblocks 
+--+-
+3 |   0
 (1 row)
 
 -- Extend table with enough blocks to exceed the FSM threshold
@@ -26,23 +51,23 @@ num = 11;
 END;
 $$;
 VACUUM fsm_check_size;
-SELECT pg_relation_size('fsm_check_size', 'fsm') AS fsm_size;
- fsm_size 
---
-24576
+SELECT pg_relation_size('fsm_check_size', 'fsm') / :blocksize AS fsm_nblocks;
+ fsm_nblocks 
+-
+   3
 (1 row)
 
 -- Add long random string to extend TOAST table to 1 block
 INSERT INTO fsm_check_size
 VALUES(0, (SELECT string_agg(md5(chr(i)), '')
-		   FROM generate_series(1,100) i));
+		   FROM generate_series(1, :blocksize / 100) i));
 VACUUM fsm_check_size;
-SELECT pg_relation_size(reltoastrelid, 'main') AS toast_size,
-pg_relation_size(reltoastrelid, 'fsm') AS toast_fsm_size
+SELECT pg_relation_size(reltoastrelid, 'main') / :blocksize AS toast_nblocks,
+pg_relation_size(reltoastrelid, 'fsm') / :blocksize AS toast_fsm_nblocks
 FROM pg_class WHERE relname = 'fsm_check_size';
- toast_size | toast_fsm_size 
-+
-   8192 |  0
+ toast_nblocks | toast_fsm_nblocks 
+---+---
+ 1 | 0
 (1 row)
 
 DROP TABLE fsm_check_size;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4051a4ad4e..a956775dd1 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/re

Re: WIP: Avoid creation of the free space map for small tables

2019-02-23 Thread John Naylor
On Fri, Feb 22, 2019 at 3:59 AM Amit Kapila  wrote:
> The reason for not pushing much on making the test pass for
> nonstandard block sizes is that when I tried existing tests, there
> were already some failures.

FWIW, I currently see 8 failures (attached).

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


jcn-16blk-regress.diffs
Description: Binary data


outdated reference to tuple header OIDs

2019-03-14 Thread John Naylor
It seems this is a leftover from commit 578b229718e8. Patch attached.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


remove-ref-to-sys-oid-col.patch
Description: Binary data


Re: WIP: Avoid creation of the free space map for small tables

2019-03-14 Thread John Naylor
On Thu, Mar 14, 2019 at 2:17 PM Amit Kapila  wrote:
>
> 1. Added an Assert in new_cluster_needs_fsm() that old cluster version
> should be >= 804 as that is where fsm support has been added.

There is already an explicit check for 804 in the caller, and the HEAD
code is already resilient to FSMs not existing, so I think this is
superfluous.

> 2. Reverted the old cluster version check to <= 1100.  There was
> nothing wrong in the way you have written a check, but I find most of
> the other places in the code uses that way.
> 3. At one place changed the function header to be consistent with the
> nearby code, run pgindent on the code and changed the commit message.

Looks good to me, thanks.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: Why don't we have a small reserved OID range for patch revisions?

2019-03-12 Thread John Naylor
On Tue, Mar 12, 2019 at 5:36 AM Tom Lane  wrote:
> This seems committable from my end --- any further comments?

I gave it a read and it looks good to me, but I haven't tried to run it.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-03-10 Thread John Naylor
On Fri, Mar 8, 2019 at 7:43 PM Amit Kapila  wrote:
> Few minor comments:
> 1.
> warning C4715: 'new_cluster_needs_fsm': not all control paths return a value
>
> Getting this new warning in the patch.

Hmm, I don't get that in a couple versions of gcc. Your compiler must
not know that pg_fatal() cannot return. I blindly added a fix.

> 2.
>
> This comment line doesn't seem to be related to this patch.  If so, I
> think we can avoid having any additional change which is not related
> to the functionality of this patch.  Feel free to submit it
> separately, if you think it is an improvement.

> +
> + /* Transfer any VM files if we can trust their contents. */
>   if (vm_crashsafe_match)

Well, I guess the current comment is still ok, so reverted. If I were
to do a separate cleanup patch, I would rather remove the
vm_must_add_frozenbit parameter -- there's no reason I can see for
calls that transfer the heap and FSM to know about this.

I also changed references to the 'first segment of the main fork'
where there will almost always only be one segment. This was a vestige
of the earlier algorithm I had.

> 3. Can we add a note about this in the Notes section of pg_upgrade
> documentation [1]?

Done.

> Have you done any performance testing of this patch?  I mean to say
> now that we added a new stat call for each table, we should see if
> that has any impact.  Ideally, that should be compensated by the fact
> that we are now not transferring *fsm files for small relations.  How
> about constructing a test where all relations are greater than 4 pages
> and then try to upgrade them.  We can check for a cluster with a
> different number of relations say 10K, 20K, 50K, 100K.

I have not, but I agree it should be done. I will try to do so soon.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From b65084daf15d198c9fdd986992b0147290c8e690 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Sun, 10 Mar 2019 22:01:21 +0800
Subject: [PATCH v23] During pg_upgrade, conditionally skip transfer of FSMs.

If a heap on the old cluster has 4 pages or fewer, and the old cluster
was PG v11 or earlier, don't copy or link the FSM. This will shrink
space usage for installations with large numbers of small tables.
---
 doc/src/sgml/ref/pgupgrade.sgml  |  7 
 src/bin/pg_upgrade/info.c| 16 +++--
 src/bin/pg_upgrade/pg_upgrade.h  |  6 
 src/bin/pg_upgrade/relfilenode.c | 58 +++-
 4 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 7e1afaf0a5..c896882dd1 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -792,6 +792,13 @@ psql --username=postgres --file=script.sql postgres
is down.
   
 
+  
+   In PostgreSQL 12 and later small tables by
+   default don't have a free space map, as a space optimization.  If you are
+   upgrading a pre-12 cluster, the free space maps of small tables will
+   likewise not be transferred to the new cluster.  
+  
+
  
 
  
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 2f925f086c..902bfc647e 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
+	map->relpages = old_rel->relpages;
+	map->relkind = old_rel->relkind;
 
 	/*
 	 * old_relfilenode might differ from pg_class.oid (and hence
@@ -418,6 +420,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	char	   *nspname = NULL;
 	char	   *relname = NULL;
 	char	   *tablespace = NULL;
+	char	   *relkind = NULL;
 	int			i_spclocation,
 i_nspname,
 i_relname,
@@ -425,7 +428,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 i_indtable,
 i_toastheap,
 i_relfilenode,
-i_reltablespace;
+i_reltablespace,
+i_relpages,
+i_relkind;
 	char		query[QUERY_ALLOC];
 	char	   *last_namespace = NULL,
 			   *last_tablespace = NULL;
@@ -494,7 +499,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	 */
 	snprintf(query + strlen(query), sizeof(query) - strlen(query),
 			 "SELECT all_rels.*, n.nspname, c.relname, "
-			 "  c.relfilenode, c.reltablespace, %s "
+			 "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
 			 "FROM (SELECT * FROM regular_heap "
 			 "  UNION ALL "
 			 "  SELECT * FROM toast_heap "
@@ -525,6 +530,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	i_relname = PQfnumber(res, "relname");
 	i_relfilenode = PQfnumber(res, "relfilenode");
 	i_reltablespace = PQfnumber(res, "reltablespace");
+	i_relpages = PQfnumber(res, "relpages&quo

Re: Why don't we have a small reserved OID range for patch revisions?

2019-03-10 Thread John Naylor
On Sat, Mar 9, 2019 at 1:14 AM Tom Lane  wrote:
> I took a quick look at this.  I went ahead and pushed the parts that
> were just code cleanup in reformat_dat_file.pl, since that seemed
> pretty uncontroversial.  As far as the rest of it goes:

Okay, thanks.

> * I'm really not terribly happy with sticking this functionality into
> reformat_dat_file.pl.  First, there's an issue of discoverability:
> it's not obvious that a script named that way would have such an
> ability.  Second, it clutters the script in a way that seems to me
> to hinder its usefulness as a basis for one-off hacks.  So I'd really
> rather have a separate script named something like "renumber_oids.pl",
> even if there's a good deal of code duplication between it and
> reformat_dat_file.pl.

> * In my vision of what this might be good for, I think it's important
> that it be possible to specify a range of input OIDs to renumber, not
> just "everything above N".  I agree the output range only needs a
> starting OID.

Now it looks like:

perl  renumber_oids.pl  --first-mapped-oid 8000 --last-mapped-oid 8999
 --first-target-oid 2000  *.dat

To prevent a maintenance headache, I didn't copy any of the formatting
logic over. You'll also have to run reformat_dat_files.pl afterwards
to restore that. It seems to work, but I haven't tested thoroughly.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From c6f5a1f6016feeb7f7ea94e504be6d23a264ca07 Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Mon, 11 Mar 2019 01:05:15 +0800
Subject: [PATCH v2] Provide script to renumber OIDs.

Historically, It's been considered good practice to choose an OID that's
at the beginning of the range shown by the unused_oids script, but
nowadays that space is getting cramped, leading to merge conflicts.

Instead, allow developers to use high-numbered OIDs, and remap them
to a lower range at some future date.
---
 doc/src/sgml/bki.sgml|  16 +++
 src/include/catalog/renumber_oids.pl | 193 +++
 2 files changed, 209 insertions(+)
 create mode 100644 src/include/catalog/renumber_oids.pl

diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 3c5830bc8a..7e553dc30d 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -392,6 +392,22 @@
 at compile time.)

 
+   
+For a variety of reasons, newly assigned OIDs should occupy the lower
+end of the reserved range.  Because multiple patches considered for
+inclusion into PostgreSQL could be using
+the same new OIDs, there is the possibilty of conflict.  To mitigate
+this, there is a convention that OIDs 8000 and over are reserved for
+development.  This reduces the effort in rebasing over the HEAD branch.
+To enable committers to move these OIDs to the lower range easily,
+renumber_oids.pl can map OIDs as in the following,
+following, which maps any OIDs between 8000 and 8999, inclusive, to
+unused OIDs starting at 2000:
+
+$ perl  renumber_oids.pl  --first-mapped-oid 8000 --last-mapped-oid 8999  --first-target-oid 2000  *.dat
+
+   
+

 The OID counter starts at 1 at the beginning of a bootstrap run.
 If a row from a source other than postgres.bki
diff --git a/src/include/catalog/renumber_oids.pl b/src/include/catalog/renumber_oids.pl
new file mode 100644
index 00..79537465bc
--- /dev/null
+++ b/src/include/catalog/renumber_oids.pl
@@ -0,0 +1,193 @@
+#!/usr/bin/perl
+#--
+#
+# renumber_oids.pl
+#Perl script that shifts a range of high-numbered OIDs in the given
+#catalog data file(s) to a lower range.
+#
+#Note: This does not format the output, so you will still need to
+#run reformat_dat_file.pl.
+#
+# Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/include/catalog/renumber_oids.pl
+#
+#--
+
+use strict;
+use warnings;
+
+use FindBin;
+use Getopt::Long;
+use Data::Dumper;
+$Data::Dumper::Terse = 1;
+
+# If you copy this script to somewhere other than src/include/catalog,
+# you'll need to modify this "use lib" or provide a suitable -I switch.
+use lib "$FindBin::RealBin/../../backend/catalog/";
+use Catalog;
+
+# Must run in src/include/catalog
+chdir $FindBin::RealBin or die "could not cd to $FindBin::RealBin: $!\n";
+
+my $FirstGenbkiObjectId =
+  Catalog::FindDefinedSymbol('access/transam.h', '..',
+	'FirstGenbkiObjectId');
+
+# Process command line switches.
+my $output_path = '';
+my $first_mapped_oid;
+my $last_mapped_oid = $FirstGenbkiObjectId - 1;
+my $first_target_oid = 1;
+
+GetOptions(
+	'output:s'   => \$output_path,
+	'first-m

Re: WIP: Avoid creation of the free space map for small tables

2019-03-13 Thread John Naylor
On Fri, Mar 8, 2019 at 7:43 PM Amit Kapila  wrote:

> Have you done any performance testing of this patch?  I mean to say
> now that we added a new stat call for each table, we should see if
> that has any impact.  Ideally, that should be compensated by the fact
> that we are now not transferring *fsm files for small relations.

To be precise, it will only call stat if pg_class.relpages is below
the threshold. I suppose I could hack a database where all the
relpages values are wrong, but that seems like a waste of time.

> How
> about constructing a test where all relations are greater than 4 pages
> and then try to upgrade them.  We can check for a cluster with a
> different number of relations say 10K, 20K, 50K, 100K.

I did both greater and less than 4 pages for 10k relations. Since
pg_upgrade is O(# relations), I don't see a point in going higher.

First, I had a problem: On MacOS with their "gcc" wrapper around
clang, I got a segfault 11 when compiled with no debugging symbols. I
added "CFLAGS=-O0" and it worked fine. Since this doesn't happen in a
debugging build, I'm not sure how to investigate this. IIRC, this
doesn't happen for me on Linux gcc.

Since it was at least running now, I measured by putting
gettimeofday() calls around transfer_all_new_tablespaces(). I did 10
runs each and took the average, except for patch/1-page case since it
was obviously faster after a couple runs.

5 pages:
masterpatch
5.59s 5.64s

The variation within the builds is up to +/- 0.2s, so there is no
difference, as expected.

1 page:
masterpatch
5.62s 4.25s

Clearly, linking is much slower than stat.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: WIP: Avoid creation of the free space map for small tables

2019-03-13 Thread John Naylor
On Wed, Mar 13, 2019 at 8:18 PM Amit Kapila  wrote:
> > First, I had a problem: On MacOS with their "gcc" wrapper around
> > clang, I got a segfault 11 when compiled with no debugging symbols.
> >
>
> Did you get this problem with the patch or both with and without the
> patch?  If it is only with patch, then we definitely need to
> investigate.

Only with the patch.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: jsonpath

2019-03-19 Thread John Naylor
On Sun, Feb 24, 2019 at 5:03 PM Alexander Korotkov
 wrote:
> On Wed, Jan 30, 2019 at 5:28 AM Andres Freund  wrote:
> > Why -CF, and why is -p repeated?
>
> BTW, for our SQL grammar we have
>
> > scan.c: FLEXFLAGS = -CF -p -p
>
> Is it kind of default?

I just saw this in the committed patch. This is not default, it's
chosen for maximum performance at the expense of binary/memory size.
That's fine, but with a little effort you can also make the scanner
non-backtracking for additional performance, as in the attached.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


jsonpath-scan-nobackup.patch
Description: Binary data


Re: jsonpath

2019-03-21 Thread John Naylor
On Thu, Mar 21, 2019 at 9:59 PM Alexander Korotkov
 wrote:
> Attaches patches improving jsonpath parser.  First one introduces
> cosmetic changes, while second gets rid of backtracking.  I'm also
> planning to add high-level comment for both grammar and lexer.

The cosmetic changes look good to me. I just noticed a couple things
about the comments.

0001:

+/* Check if current scanstring value constitutes a keyword */

'is a keyword' is better. 'Constitutes' implies parts of a whole.

+ * Resize scanstring for appending of given length.  Reinitilize if required.

s/Reinitilize/Reinitialize/

The first sentence is not entirely clear to me.


0002:

These two rules are not strictly necessary:

{unicode}+\\ {
/* throw back the \\, and treat as unicode */
yyless(yyleng - 1);
parseUnicode(yytext, yyleng);
}

{hex_char}+\\ {
/* throw back the \\, and treat as hex */
yyless(yyleng - 1);
parseHexChars(yytext, yyleng);
}

...and only seem to be there because of how these are written:

{unicode}+ { parseUnicode(yytext, yyleng); }
{hex_char}+ { parseHexChars(yytext, yyleng); }
{unicode}*{unicodefail} { yyerror(NULL, "Unicode
sequence is invalid"); }
{hex_char}*{hex_fail} { yyerror(NULL, "Hex character
sequence is invalid"); }

I don't understand the reasoning here -- is it a micro-optimization?
The following is simpler, allow the rules I mentioned to be removed,
and make check still passes. I would prefer it unless there is a
performance penalty, in which case a comment to describe the
additional complexity would be helpful.

{unicode} { parseUnicode(yytext, yyleng); }
{hex_char} { parseHexChars(yytext, yyleng); }
{unicodefail} { yyerror(NULL, "Unicode sequence is invalid"); }
{hex_fail} { yyerror(NULL, "Hex character sequence is
invalid"); }

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



Re: use Getopt::Long for catalog scripts

2019-02-07 Thread John Naylor
On Thu, Feb 7, 2019 at 6:53 PM David Fetter  wrote:
> [some style suggestions]

I've included these in v2, and also some additional tweaks for consistency.

-- 
John Naylorhttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 0bf2b3ffdaa16d85856074dda8e15c09fc99d0fe Mon Sep 17 00:00:00 2001
From: John Naylor 
Date: Thu, 7 Feb 2019 20:10:50 +0100
Subject: [PATCH v2] Use Getopt::Long for catalog scripts

Replace hand-rolled option parsing with the Getopt module. This is
shorter and easier to read. In passing, make some cosmetic adjustments
for consistency.
---
 src/backend/catalog/Makefile |  2 +-
 src/backend/catalog/genbki.pl| 49 +---
 src/backend/utils/Gen_fmgrtab.pl | 38 -
 src/backend/utils/Makefile   |  2 +-
 4 files changed, 28 insertions(+), 63 deletions(-)

diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index d9f92ba701..b06ee49ca8 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -89,7 +89,7 @@ generated-header-symlinks: $(top_builddir)/src/include/catalog/header-stamp
 # version number when it changes.
 bki-stamp: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure.in
 	$(PERL) -I $(catalogdir) $< \
-		-I $(top_srcdir)/src/include/ --set-version=$(MAJORVERSION) \
+		--include-path=$(top_srcdir)/src/include/ --set-version=$(MAJORVERSION) \
 		$(POSTGRES_BKI_SRCS)
 	touch $@
 
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index be81094ffb..4935e00fb2 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -16,6 +16,7 @@
 
 use strict;
 use warnings;
+use Getopt::Long;
 
 use File::Basename;
 use File::Spec;
@@ -23,43 +24,21 @@ BEGIN  { use lib File::Spec->rel2abs(dirname(__FILE__)); }
 
 use Catalog;
 
-my @input_files;
 my $output_path = '';
 my $major_version;
 my $include_path;
 
-# Process command line switches.
-while (@ARGV)
-{
-	my $arg = shift @ARGV;
-	if ($arg !~ /^-/)
-	{
-		push @input_files, $arg;
-	}
-	elsif ($arg =~ /^-I/)
-	{
-		$include_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
-	}
-	elsif ($arg =~ /^-o/)
-	{
-		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
-	}
-	elsif ($arg =~ /^--set-version=(.*)$/)
-	{
-		$major_version = $1;
-		die "Invalid version string.\n"
-		  if !($major_version =~ /^\d+$/);
-	}
-	else
-	{
-		usage();
-	}
-}
+GetOptions(
+	'output:s'   => \$output_path,
+	'set-version:s'  => \$major_version,
+	'include-path:s' => \$include_path) || usage();
 
 # Sanity check arguments.
-die "No input files.\n" if !@input_files;
-die "--set-version must be specified.\n" if !defined $major_version;
-die "-I, the header include path, must be specified.\n" if !$include_path;
+die "No input files.\n" unless @ARGV;
+die "--set-version must be specified.\n" unless $major_version;
+die "Invalid version string: $major_version\n"
+  unless $major_version =~ /^\d+$/;
+die "--include-path must be specified.\n" unless $include_path;
 
 # Make sure paths end with a slash.
 if ($output_path ne '' && substr($output_path, -1) ne '/')
@@ -79,7 +58,7 @@ my @toast_decls;
 my @index_decls;
 my %oidcounts;
 
-foreach my $header (@input_files)
+foreach my $header (@ARGV)
 {
 	$header =~ /(.+)\.h$/
 	  or die "Input files need to be header files.\n";
@@ -917,12 +896,12 @@ sub form_pg_type_symbol
 sub usage
 {
 	die <] [--include-path/-i ] header...
 
 Options:
--I   include path
--o   output path
+--output Output directory (default '.')
 --set-versionPostgreSQL version number for initdb cross-check
+--include-path   Include path in source tree
 
 genbki.pl generates BKI files and symbol definition
 headers from specially formatted header files and .dat
diff --git a/src/backend/utils/Gen_fmgrtab.pl b/src/backend/utils/Gen_fmgrtab.pl
index f17a78ebcd..2ffacae666 100644
--- a/src/backend/utils/Gen_fmgrtab.pl
+++ b/src/backend/utils/Gen_fmgrtab.pl
@@ -18,32 +18,14 @@ use Catalog;
 
 use strict;
 use warnings;
+use Getopt::Long;
 
-# Collect arguments
-my @input_files;
 my $output_path = '';
 my $include_path;
 
-while (@ARGV)
-{
-	my $arg = shift @ARGV;
-	if ($arg !~ /^-/)
-	{
-		push @input_files, $arg;
-	}
-	elsif ($arg =~ /^-o/)
-	{
-		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
-	}
-	elsif ($arg =~ /^-I/)
-	{
-		$include_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
-	}
-	else
-	{
-		usage();
-	}
-}
+GetOptions(
+	'output:s'   => \$output_path,
+	'include-path:s' => \$include_path) || usage();
 
 # Make sure output_path ends in a slash.
 if ($output_path ne '' && substr($output_path, -1) ne '/')
@@ -52,8 +34,8 @@ if ($output_path ne '' && subs

<    1   2   3   4   5   6   7   8   9   10   >