date:20140310

Re: [HACKERS] inherit support for foreign tables

2014-03-10 Thread Etsuro Fujita


(2014/03/11 14:07), Kyotaro HORIGUCHI wrote:

This seems far better than silently performing the command,
except for the duplicated message:( New bitmap might required to
avoid the duplication..


I rewrote it in more tidy way. ATController collects all affected
tables on ATRewriteCatalogs as first stage, then emit NOTICE
message according to the affected relations list. The message
looks like,

| =# alter table passwd alter column uname set default 'hoge';
| NOTICE:  This command affects 2 foreign tables: cf1, cf2
| ALTER TABLE

Do you feel this too large or complicated? I think so a bit..


No.  I think that would be a useful message for the user.

My feeling is it would be better to show this kind of messages for all 
the affected tables whether or not the affected ones are foreign.  How 
about introducing a VERBOSE option for ALTER TABLE?  Though, I think 
that should be implemented as a separate patch.


Thanks,

Best regards,
Etsuro Fujita


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] inherit support for foreign tables

2014-03-10 Thread Kyotaro HORIGUCHI

Hello, 

> This seems far better than silently performing the command,
> except for the duplicated message:( New bitmap might required to
> avoid the duplication..

I rewrote it in more tidy way. ATController collects all affected
tables on ATRewriteCatalogs as first stage, then emit NOTICE
message according to the affected relations list. The message
looks like,

| =# alter table passwd alter column uname set default 'hoge';
| NOTICE:  This command affects 2 foreign tables: cf1, cf2
| ALTER TABLE

Do you feel this too large or complicated? I think so a bit..

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 8ace8bd..b4e53c1 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -258,6 +258,17 @@ CREATE TABLE products (
even if the value came from the default value definition.
   
 
+  
+   
+Note that constraints can be defined on foreign tables too, but such
+constraints are not enforced on insert or update.  Those constraints are
+"assertive", and work only to tell planner that some kind of optimization
+such as constraint exclusion can be considerd.  This seems useless, but
+allows us to use foriegn table as child table (see
+) to off-load to multiple servers.
+   
+  
+
   
Check Constraints
 
@@ -2017,8 +2028,8 @@ CREATE TABLE capitals (
   
 
   
-   In PostgreSQL, a table can inherit from
-   zero or more other tables, and a query can reference either all
+   In PostgreSQL, a table or foreign table can
+   inherit from zero or more other tables, and a query can reference either all
rows of a table or all rows of a table plus all of its descendant tables.
The latter behavior is the default.
For example, the following query finds the names of all cities,
diff --git a/doc/src/sgml/ref/alter_foreign_table.sgml b/doc/src/sgml/ref/alter_foreign_table.sgml
index 4d8cfc5..f7a382e 100644
--- a/doc/src/sgml/ref/alter_foreign_table.sgml
+++ b/doc/src/sgml/ref/alter_foreign_table.sgml
@@ -42,6 +42,8 @@ ALTER FOREIGN TABLE [ IF EXISTS ] namecolumn_name SET ( attribute_option = value [, ... ] )
 ALTER [ COLUMN ] column_name RESET ( attribute_option [, ... ] )
 ALTER [ COLUMN ] column_name OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ])
+INHERIT parent_table
+NO INHERIT parent_table
 OWNER TO new_owner
 OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ])
 
@@ -178,6 +180,26 @@ ALTER FOREIGN TABLE [ IF EXISTS ] name
 

+INHERIT parent_table
+
+ 
+  This form adds the target foreign table as a new child of the specified
+  parent table.  The parent table must be a plain table.
+ 
+
+   
+
+   
+NO INHERIT parent_table
+
+ 
+  This form removes the target foreign table from the list of children of
+  the specified parent table.
+ 
+
+   
+
+   
 OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ] )
 
  
@@ -306,6 +328,16 @@ ALTER FOREIGN TABLE [ IF EXISTS ] name
   
  
+
+ 
+  parent_name
+  
+   
+A parent table to associate or de-associate with this foreign table.
+The parent table must be a plain table.
+   
+  
+ 
 
  
 
diff --git a/doc/src/sgml/ref/create_foreign_table.sgml b/doc/src/sgml/ref/create_foreign_table.sgml
index 06a7087..cc11dee 100644
--- a/doc/src/sgml/ref/create_foreign_table.sgml
+++ b/doc/src/sgml/ref/create_foreign_table.sgml
@@ -22,6 +22,7 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name
 column_name data_type [ OPTIONS ( option 'value' [, ... ] ) ] [ COLLATE collation ] [ column_constraint [ ... ] ]
 [, ... ]
 ] )
+[ INHERITS ( parent_table [, ... ] ) ]
   SERVER server_name
 [ OPTIONS ( option 'value' [, ... ] ) ]
 
@@ -159,6 +160,18 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name

 

+parent_table
+
+ 
+  The name of an existing table from which the new foreign table
+  automatically inherits all columns.  The specified parent table
+  must be a plain table.  See  for the
+  details of table inheritance.
+ 
+
+   
+
+   
 server_name
 
  
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index f263b42..b4a084c 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -256,6 +256,48 @@ has_subclass(Oid relationId)
 
 
 /*
+ * Check wether the inheritance tree contains foreign table(s).
+ */
+bool
+contains_foreign(Oid parentrelId, LOCKMODE lockmode)
+{
+	bool		result = false;
+	List	   *tableOIDs;
+	ListCell   *lc;
+
+	/* Find all members of the inheritance tree */
+	tableOIDs = find_all_inheritors(parentrelId, lockmode, NULL);
+
+	/* There are no children */
+	if (list_length(tableOIDs) < 2)
+		return result;
+
+	foreach(lc, tableOIDs)
+	{
+		Oid			childOID = lfirst_oid(lc);
+		Relation	childrel;
+
+		/* Parent should not be foreign */
+		if (childOID

Re: [HACKERS] gaussian distribution pgbench

2014-03-10 Thread KONDO Mitsumasa


(2014/03/09 1:49), Fabien COELHO wrote:


Hello Mitsumasa-san,


New "\setrandom" interface is here.
 \setrandom var min max [gaussian threshold | exponential threshold]



Attached patch realizes this interface, but it has little bit ugly codeing in
executeStatement() and process_commands()..


I think it is not too bad. The "ignore extra arguments on the line" is a little
pre-existing mess anyway.

All right.


What do you think?


I'm okay with this UI and its implementation.

OK.

Attached patch is updated in the document. I don't like complex sentence,
so I　use  tag a lot. If you like this documents, please mark ready for 
commiter.


Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center






*** a/contrib/pgbench/pgbench.c
--- b/contrib/pgbench/pgbench.c
***
*** 98,103  static int	pthread_join(pthread_t th, void **thread_return);
--- 98,106 
  #define LOG_STEP_SECONDS	5	/* seconds between log messages */
  #define DEFAULT_NXACTS	10		/* default nxacts */
  
+ #define MIN_GAUSSIAN_THRESHOLD		2.0	/* minimum threshold for gauss */
+ #define MIN_EXPONENTIAL_THRESHOLD	2.0	/* minimum threshold for exp */
+ 
  int			nxacts = 0;			/* number of transactions per client */
  int			duration = 0;		/* duration in seconds */
  
***
*** 169,174  bool		is_connect;			/* establish connection for each transaction */
--- 172,185 
  bool		is_latencies;		/* report per-command latencies */
  int			main_pid;			/* main process id used in log filename */
  
+ /* gaussian distribution tests: */
+ double		stdev_threshold;   /* standard deviation threshold */
+ booluse_gaussian = false;
+ 
+ /* exponential distribution tests: */
+ double		exp_threshold;   /* threshold for exponential */
+ bool		use_exponential = false;
+ 
  char	   *pghost = "";
  char	   *pgport = "";
  char	   *login = NULL;
***
*** 330,335  static char *select_only = {
--- 341,428 
  	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
  };
  
+ /* --exponential case */
+ static char *exponential_tpc_b = {
+ 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setrandom aid 1 :naccounts exponential :exp_threshold\n"
+ 	"\\setrandom bid 1 :nbranches\n"
+ 	"\\setrandom tid 1 :ntellers\n"
+ 	"\\setrandom delta -5000 5000\n"
+ 	"BEGIN;\n"
+ 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ 	"UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
+ 	"UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
+ 	"INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ 	"END;\n"
+ };
+ 
+ /* --exponential with -N case */
+ static char *exponential_simple_update = {
+ 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setrandom aid 1 :naccounts exponential :exp_threshold\n"
+ 	"\\setrandom bid 1 :nbranches\n"
+ 	"\\setrandom tid 1 :ntellers\n"
+ 	"\\setrandom delta -5000 5000\n"
+ 	"BEGIN;\n"
+ 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ 	"INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ 	"END;\n"
+ };
+ 
+ /* --exponential with -S case */
+ static char *exponential_select_only = {
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setrandom aid 1 :naccounts exponential :exp_threshold\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ };
+ 
+ /* --gaussian case */
+ static char *gaussian_tpc_b = {
+ 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"\\setrandom aid 1 :naccounts gaussian :stdev_threshold\n"
+ 	"\\setrandom bid 1 :nbranches\n"
+ 	"\\setrandom tid 1 :ntellers\n"
+ 	"\\setrandom delta -5000 5000\n"
+ 	"BEGIN;\n"
+ 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ 	"UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
+ 	"UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
+ 	"INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ 	"END;\n"
+ };
+ 
+ /* --gaussian with -N case */
+ static char *gaussian_simple_update = {
+ 	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ 	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ 	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ 	"

Re: [HACKERS] [ISSUE] pg_dump: schema with OID 0 does not exist

2014-03-10 Thread Prakash Itnal

Hi,

Can someone confirm is this really an issue? or any reasons for missing
rows?


On Tue, Feb 25, 2014 at 3:51 PM, Prakash Itnal  wrote:

> Hi,
>
> Recently we observed below errors while taking dump after upgrading from
> 9.0.13 to 9.1.9.
>
> pg_dump: schema with OID 0 does not exist
>
> I referred similar issues reported previously (
> http://www.postgresql.org/message-id/CAGWYGjXRJj=zugejv0ckvn4zf9hb92q+7e3aqfcvbgbmb9z...@mail.gmail.com)
> and get issue resolved after identifying and inserting some of the missing
> rows from *pg_opclass* table. Below are the rows that are missed after
> upgrade (from *pg_opclass *table):
>
>405 | aclitem_ops |   11 |   10 |  2235
> |  1033 | t  |  0
>
>783 | box_ops |   11 |   10 |  2593
> |   603 | t  |  0
>
>783 | point_ops   |   11 |   10 |  1029
> |   600 | t  |603
>
>783 | poly_ops|   11 |   10 |  2594
> |   604 | t  |603
>
>783 | circle_ops  |   11 |   10 |  2595
> |   718 | t  |603
>
>   2742 | _int4_ops   |   11 |   10 |  2745
> |  1007 | t  | 23
>
>   2742 | _text_ops   |   11 |   10 |  2745
> |  1009 | t  | 25
>
>   2742 | _abstime_ops|   11 |   10 |  2745
> |  1023 | t  |702
>
>
> Can some one help me to understand whether it is an issue? If not how and
> why these rows got disappeared after upgrade?
>
> Since we have an fully automated environment we do not encourage manual
> intervention. So I am trying to understand how can we handle these issues.
>
> --
> Cheers,
> Prakash
>



-- 
Cheers,
Prakash

Re: [HACKERS] issue log message to suggest VACUUM FULL if a table is nearly empty

2014-03-10 Thread Amit Kapila

On Mon, Mar 10, 2014 at 1:13 PM, Haribabu Kommi
 wrote:
> On Mon, Mar 10, 2014 at 4:24 PM, Amit Kapila  wrote:
>> On Mon, Mar 10, 2014 at 5:58 AM, Wang, Jing  
>> wrote:
>> > Enclosed is the patch to implement the requirement that issue log message 
>> > to
>> > suggest VACUUM FULL if a table is nearly empty.
>> >
>> > The requirement comes from the Postgresql TODO list.
>> >
>> I think it would be better if we can use some existing stats to issue warning
>> message rather than traversing the FSM for all pages. For example after
>> vacuuming page in lazy_scan_heap(), we update the freespace for page.
>> You can refer below line in lazy_scan_heap().
>> freespace = PageGetHeapFreeSpace(page);
>>
>> Now it might be possible that we might not get freespace info easily as
>> it is not accumulated for previous vacuum's. Incase there is no viable
>> way to get it through vacuum stats, we are already updating fsm after
>> vacuum by FreeSpaceMapVacuum(), where I think it should be possible
>> to get freespace.
>
> yes this way it works without extra penalty. But the problem is how to 
> calculate
> the free space which is left in the skipped pages because of visibility bit.

One way could be by extrapolating (vac_estimate_reltuples) like we do for
some other stats, but not sure if we can get the correct estimates. The
main reason is that if you observe that code path, all the decisions are
mainly done on the basis of vacrelstats. I have not checked in detail if by
using any other stats, this purpose can be achieved, may be once you can
look into it.

By the way have you checked if FreeSpaceMapVacuum() can serve your
purpose, because this call already traverses FSM in depth-first order to
update the freespace. So may be by using this call or wrapper on this
such that it returns total freespace as well apart from updating freespace
can serve the need.

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Why is AccessShareLock held until end of transaction?

2014-03-10 Thread Tom Lane

Joe Conway  writes:
> I am probably missing something obvious, but why does the
> AccessShareLock remain held on a table after a SELECT statement is
> complete when in a transaction block?

*Any* lock acquired by user command is held till end of transaction;
AccessShareLock isn't special.

In general, releasing early would increase the risk of undesirable
behaviors such as tables changing definition mid-transaction.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [bug fix] pg_ctl always uses the same event source

2014-03-10 Thread Amit Kapila

On Mon, Mar 10, 2014 at 2:39 PM, MauMau  wrote:
> From: "Amit Kapila" 
>
>>> If I understand correctly that objection was on changing Default Event
>>> Source name, and the patch now doesn't contain that change, it's
>>> just a bug fix for letting pg_ctl know the non-default event source
>>> set by user.
>>>
>>> Please clarify if I misunderstood something, else this should be changed
>>> to Ready For Committer.
>>
>>
>> Tom/Andres, please let me know if you have objection for this patch,
>> because
>> as per my understanding all the objectionable part of patch is removed
>> from final
>> patch and it's a defect fix to make pg_ctl aware of Event Source name set
>> in
>> postgresql.conf.
>>
>> If there is no objection, I will again change it to Ready For Committer.
>
>
> Hi, Amit-san, I really appreciate your cooperation.

Thanks.

> Yes, I removed the
> default value change that caused objection, so the patch can be marked ready
> for committer.  I understand the patch was marked needs for review by
> misunderstanding Tom-san's opinion.
>
> I remember that I read "silence means no objection, or implicit agreement"
> somewhere in the community site or ML.  So I think it would be no problem to
> set the status to ready for committer again.

Okay, Done.

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Retain dynamic shared memory segments for postmaster lifetime

2014-03-10 Thread Amit Kapila

On Mon, Mar 10, 2014 at 11:37 PM, Robert Haas  wrote:
> Looks good, committed.  However, I changed it so that
> dsm_keep_segment() does not also perform the equivalent of
> dsm_keep_mapping(); those are two separate operations.

So are you expecting that if some one needs to retain dynamic segment's
till PM lifetime, they should call both dsm_keep_segment() and
dsm_keep_mapping()?

If we don't call both, it can lead to following warning:
postgres=# select dsm_demo_create('this message is from session-new', 1);
WARNING:  dynamic shared memory leak: segment 1402373971 still referenced

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [PATCH] Store Extension Options

2014-03-10 Thread Alvaro Herrera

Fabrízio de Royes Mello escribió:
> On Fri, Mar 7, 2014 at 5:56 PM, Alvaro Herrera 
> wrote:
> 
> > I am reworking this patch, both to accomodate earlier review comments
> > and to fix the multiple verify step of namespaces, as noted by Tom in
> > 4530.1390023...@sss.pgh.pa.us
> >
> Alvaro,
> 
> Do you need some help?

Would you give this version of the patch a look?  I reworked your patch
a bit, mainly to add a bit of checking to custom namespaces.  In this
code, before you can add a namespaced option to an object, you need to
register the namespace.  There are two interfaces for this: C code can
call registerOptionNamespace().  This patch adds a call to plpgsql that
does so (It's not my intention that plpgsql is modified in any way by
this patch; this part of the patch is here just for demonstration purposes.)

I expect most extension modules would do things that way.

The other interface for namespace registration is a SQL-callable
function.  I expect Jim Nasby to do something like
  SELECT pg_register_option_namespace('decibel');
  ALTER TABLE nasby SET (decibel.seed = true);
which seems to cover what he wanted.

If you register a namespace, you can later do "ALTER TABLE .. SET
(yournsp.foobar=blah)" and your value will be stored in catalogs.  Note
that if you have a C module, you can register the options themselves,
using add_bool_reloption() and so on; that way, your option will be
type-checked.  If you don't "add" your options, they will not be
checked.  This is in line with what we do for custom GUCs: if we know
about them, they are checked, otherwise we just pass them around
untouched.

Note one weird thing related to TOAST tables: we need to tell
transformRelOptions specifically whether we want custom namespaces to be
kept in its output or not.  This is because we want such options in the
main table, but not in the toast table; and we call transformRelOptions
on both tables with the whole array of values.  That's what the new
"include_custom" bit is for.  For most callers that bit is true, but
when a table is being processed and the options are for the toast table,
that option is false.

Another thing to note is that I've separated the checking of the
namespaces from the transformation.  There's actually very little
duplicated work that we're saving from doing things that way AFAICS, but
the new interface does make more sense than the old one.  This is per
the thread I linked to previously.  (Note there is surely a better way
to do the HEAP_RELOPT_NAMESPACES than a #define with the "static const
char * const valid[]" thingy sprinkled all over the place; I assume we
can just declare that once in the header.  I will fix that later.)

I haven't touched pg_dump yet, but if this proposed design sits well
with everyone, my intention is that the dump output will contain the
pg_register_option_namespace() calls necessary so that a table
definition will be able to do the SET calls to set the values the
original table has, and succeed.  In other words, restoring a dump will
preserve the values you had, without a need of having the module loaded
in the new server.  I think this is what was discussed.  Robert, do you
agree?

I think there is more work to do here, mainly to ensure that the
internal representation makes sense for C users (i.e. can extensions get
at the values they previously set).  At this point I'm interested in
getting no objections to the SQL interface and the pg_dump bits.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
*** a/doc/src/sgml/ref/alter_index.sgml
--- b/doc/src/sgml/ref/alter_index.sgml
***
*** 82,87  ALTER INDEX [ IF EXISTS ] name RESE
--- 82,93 

to get the desired effects.

+  
+
+  Custom storage parameters are of the form namespace.option. See
+  example below.
+
+  

***
*** 202,207  ALTER INDEX distributors SET (fillfactor = 75);
--- 208,219 
  REINDEX INDEX distributors;

+   
+To set a custom storage parameter:
+ 
+ ALTER INDEX distributors
+   SET (somenamespace.optionname=true);
+ 

*** a/doc/src/sgml/ref/alter_table.sgml
--- b/doc/src/sgml/ref/alter_table.sgml
***
*** 213,218  ALTER TABLE [ IF EXISTS ] name
--- 213,226 
of statistics by the PostgreSQL query
planner, refer to .

+ 
+  
+   
+ Custom storage parameters are of the form namespace.option. See
+ example below.
+   
+  
+ 

***
*** 476,481  ALTER TABLE [ IF EXISTS ] name
--- 484,493 
 ALTER TABLE does not treat OIDS as a
 storage parameter.  Instead use the SET WITH OIDS
 and SET WITHOUT OIDS forms to change OID status.
+A custom name can be used as namespace to define a storage parameter.
+Storage option pattern: namespace.option=value
+(names

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Peter Geoghegan

On Mon, Mar 10, 2014 at 4:19 AM, Alexander Korotkov
 wrote:
> Here it is.

So it looks like what you have here is analogous to the other problems
that I fixed with both GiST and GIN. That isn't surprising, and this
does fix my test-case. I'm not terribly happy about the lack of
explanation for the hashing in that loop, though. Why use COMP_CRC32()
at all, for one thing?

Why do this for non-primitive jsonb hashing?

COMP_CRC32(stack->hash_state, PATH_SEPARATOR, 1);

Where PATH_SEPARATOR is:

#define PATH_SEPARATOR ("\0")

Actually, come to think of it, why not just use one hashing function
everywhere? i.e., jsonb_hash(PG_FUNCTION_ARGS)? It's already very
similar. Pretty much every hash operator support function 1 (i.e. a
particular type's hash function) is implemented with hash_any(). Can't
we just do the same here? In any case it isn't obvious why the
requirements for those two things (the hashing mechanism used by the
jsonb_hash_ops GIN opclass, and the hash operator class support
function 1 hash function) cannot be the same thing.

-- 
Peter Geoghegan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Bruce Momjian

On Mon, Mar 10, 2014 at 08:12:20PM +0100, Pavel Stehule wrote:
> Remember pg_upgrade is using pg_dump, which then connecting to a
> backend, so passing that super-lock mode there is not ideal.  The fixes
> in 9.3 improve locking in all user cases, not just upgrades.
> 
> 
> 
> nice

FYI, the 9.3.0 release notes have all the details on pg_upgrade
improvements.  This is the pg_dump fix:

Add per-resource-owner lock caches (Jeff Janes)

This speeds up lock bookkeeping at statement completion in
mlti-statement transactions that hold many locks; it is
particularly useful for pg_dump.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

  + Everyone has their own god. +


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cleaner build output when not much has changed

2014-03-10 Thread Alvaro Herrera

Gurjeet Singh wrote:
> On Tue, Nov 26, 2013 at 12:37 PM, Tom Lane  wrote:
> 
> > Gurjeet Singh  writes:
> > > I was looking for ways to reduce the noise in Postgres make output,
> > > specifically, I wanted to eliminate the "Nothing to be done for `all' "
> > > messages, since they don't add much value, and just ad to the clutter.
> >
> > Why don't you just use "make -s" if you don't want to see that?
> > The example output you show is not much less verbose than before.
> 
> I have a shell function that now adds --no-print-directory to my make
> command. This patch combined with that switch makes the output really clean
> (at least from my perspective). Since the use of a command-line switch can
> be easily left to personal choice, I am not proposing to add that or its
> makefile-equivalent. But modifying the makefiles to suppress noise is not
> that everyone can be expected to do, and do it right.

FWIW you can add a src/Makefile.custom file with this:

all:
@true

and it will get rid of most noise.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Andrew Dunstan



On 03/10/2014 10:50 AM, Andrew Dunstan wrote:


Thanks for your work on this.

It's just occurred to me that we'll need to add hstore_to_jsonb 
functions and a cast to match the hstore_to_json functions and cast.


That should be fairly simple - I'll work on that. It need not hold up 
progress with what's in this patch.


Here's a patch sans docs for this, to be applied on top of Peter's 
patch. It's actually kinda useful as it demonstrates how non-jsonb code 
can construct jsonb values directy.


cheers

andrew

diff --git a/contrib/hstore/Makefile b/contrib/hstore/Makefile
index 43b7e5f..bf21c65 100644
--- a/contrib/hstore/Makefile
+++ b/contrib/hstore/Makefile
@@ -5,7 +5,8 @@ OBJS = hstore_io.o hstore_op.o hstore_gist.o hstore_gin.o hstore_compat.o \
 	crc32.o
 
 EXTENSION = hstore
-DATA = hstore--1.2.sql hstore--1.1--1.2.sql hstore--1.0--1.1.sql \
+DATA = hstore--1.3.sql hstore--1.2--1.3.sql \
+	hstore--1.2.sql hstore--1.1--1.2.sql hstore--1.0--1.1.sql \
 	hstore--unpackaged--1.0.sql
 
 REGRESS = hstore
diff --git a/contrib/hstore/expected/hstore.out b/contrib/hstore/expected/hstore.out
index 2114143..9749e45 100644
--- a/contrib/hstore/expected/hstore.out
+++ b/contrib/hstore/expected/hstore.out
@@ -1453,7 +1453,7 @@ select count(*) from testhstore where h = 'pos=>98, line=>371, node=>CBA, indexe
  1
 (1 row)
 
--- json
+-- json and jsonb
 select hstore_to_json('"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4');
  hstore_to_json  
 -
@@ -1472,6 +1472,24 @@ select hstore_to_json_loose('"a key" =>1, b => t, c => null, d=> 12345, e => 012
  {"b": true, "c": null, "d": 12345, "e": "012345", "f": 1.234, "g": 2.345e+4, "a key": 1}
 (1 row)
 
+select hstore_to_jsonb('"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4');
+ hstore_to_jsonb 
+-
+ {"b": "t", "c": null, "d": "12345", "e": "012345", "f": "1.234", "g": "2.345e+4", "a key": "1"}
+(1 row)
+
+select cast( hstore  '"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4' as jsonb);
+  jsonb  
+-
+ {"b": "t", "c": null, "d": "12345", "e": "012345", "f": "1.234", "g": "2.345e+4", "a key": "1"}
+(1 row)
+
+select hstore_to_jsonb_loose('"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4');
+ hstore_to_jsonb_loose 
+---
+ {"b": true, "c": null, "d": 12345, "e": "012345", "f": 1.234, "g": 23450, "a key": 1}
+(1 row)
+
 create table test_json_agg (f1 text, f2 hstore);
 insert into test_json_agg values ('rec1','"a key" =>1, b => t, c => null, d=> 12345, e => 012345, f=> 1.234, g=> 2.345e+4'),
('rec2','"a key" =>2, b => f, c => "null", d=> -12345, e => 012345.6, f=> -1.234, g=> 0.345e-4');
diff --git a/contrib/hstore/hstore--1.2--1.3.sql b/contrib/hstore/hstore--1.2--1.3.sql
new file mode 100644
index 000..0a70560
--- /dev/null
+++ b/contrib/hstore/hstore--1.2--1.3.sql
@@ -0,0 +1,17 @@
+/* contrib/hstore/hstore--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION hstore UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION hstore_to_jsonb(hstore)
+RETURNS jsonb
+AS 'MODULE_PATHNAME', 'hstore_to_jsonb'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE CAST (hstore AS jsonb)
+  WITH FUNCTION hstore_to_jsonb(hstore);
+
+CREATE FUNCTION hstore_to_jsonb_loose(hstore)
+RETURNS jsonb
+AS 'MODULE_PATHNAME', 'hstore_to_jsonb_loose'
+LANGUAGE C IMMUTABLE STRICT;
diff --git a/contrib/hstore/hstore--1.3.sql b/contrib/hstore/hstore--1.3.sql
new file mode 100644
index 000..995ade1
--- /dev/null
+++ b/contrib/hstore/hstore--1.3.sql
@@ -0,0 +1,550 @@
+/* contrib/hstore/hstore--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION hstore" to load this file. \quit
+
+CREATE TYPE hstore;
+
+CREATE FUNCTION hstore_in(cstring)
+RETURNS hstore
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+CREATE FUNCTION hstore_out(hstore)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+CREATE FUNCTION hstore_recv(internal)
+RETURNS hstore
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+CREATE FUNCTION hstore_send(hstore)
+RETURNS bytea
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+CREATE TYPE hstore (
+INTERNAL

[HACKERS] Why is AccessShareLock held until end of transaction?

2014-03-10 Thread Joe Conway

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

I am probably missing something obvious, but why does the
AccessShareLock remain held on a table after a SELECT statement is
complete when in a transaction block? E.g.:

8<-
create table t1 ();
begin;
select * from t1;
select relation::regclass, locktype, mode
 from pg_locks
 where pid = pg_backend_pid();
 relation |  locktype  |  mode
- --++-
 pg_locks | relation   | AccessShareLock
 t1   | relation   | AccessShareLock
  | virtualxid | ExclusiveLock
(3 rows)
8<-

The reason I ask is that I ran into a deadlock situation which was
caused by one session running two SELECT statements in a transaction,
while a second session attempted to create a new table with foreign
keys to two of the tables involved in the first session:

8<-
- -- at some earlier point
create table t1(id int primary key);
create table t2(id int primary key);

- -- in session 1
begin;
select * from t1;


- -- in session 2
create table t3
(
  id int,
  t2id int references t2(id),
  t1id int references t1(id)
);


- -- in session 1
select * from t2;

8<-

Thoughts?

Thanks,

Joe

- -- 
Joe Conway
credativ LLC: http://www.credativ.us
Linux, PostgreSQL, and general Open Source
Training, Service, Consulting, & 24x7 Support
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.14 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/

iQIcBAEBAgAGBQJTHk9lAAoJEDfy90M199hlb2MP/1EtJwmsnsKvzhInXxKx1Jyb
uoKlq2a7v7GT79V7WstXRusuCdVN0f2C4HmvF9zIR108xUyxa7kK9IbRjEvfxVtd
oOZWRJrOzVKdUiBKqiA9xUwoKCxlNn2CuVbc3jzmyTB9fyzv59lGcDYcAjjwZoc0
rKboaeKVfoz3KRuKbhw+KfthtDWwdUeQ6pifttHm/vF4oAE1i9wyL4glV0x5Rmu+
ktkZItGpGjOh3lxJpCmON0rsx7K/SSSyZJ0pTpbjdDTKyl/3JkfgxLZXrF8AlOm0
L6XrMx4+yvjnN68NMTgy3talUU4hW5wTSebNihe6sw5YndkkLInjLwzfrTsYxtf0
cgYZ9g8PUI2MkePWJTgtkEqT3LE9PTMGXmD+NFL8E+rVbpzklXB8du0oKJRorC6x
0hzJSfZmOYCU8LDwagzPRXH9fncNT3oPxDcFMSUkWxQ3ha0TNMa9DKiPSxkJskSb
YVpIObda1b/JW9cT4LrvlNxVW0uk9TfiQpbXRcZTXEyCGYikHfm2Js1gwtcmL/LY
HiSXRadoT3n9890FzbRO3Mk3YRvz7VQyetOHtOjD8fRx5s7azoZHPNnNucgR5fVx
laAEBwY7wXppMbnmM7hAb6RYP/dV4yXoF4SVcnRMc2sm0sgOZkTT/2Muo6fHAW6E
SCEpW0nREbho3qaxPb+J
=io9e
-END PGP SIGNATURE-


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Auto-tuning work_mem and maintenance_work_mem

2014-03-10 Thread Josh Berkus

On 03/10/2014 03:16 PM, Kevin Grittner wrote:
> I only have anecdotal evidence, though.  I have seen it help dozens
> of times, and have yet to see it hurt.  That said, most people on
> this list are probably capable of engineering a benchmark which
> will show whichever result they would prefer.  I would prefer to
> hear about other data points based on field experience with
> production systems.  I haven't offered the trivial patch because
> when I've raised the point before, there didn't seem to be anyone
> else who had the same experience.  It's good to hear that Andres
> has seen this, too.

The problem with cpu_tuple_cost is that it's used in several places by
the planner and makes it hard to model what the effect of any change
would be.  If we had a good general benchmark which actually gave the
query planner a workout, we could come up with some reasonable default
settings, but right now we can't.

Back in 2004-2006 era, when CPU speeds had leapfrogged ahead of disk
speeds (which were largely unchanged from 2000), I was routinely
*lowering* cpu_tuple_cost (and cpu_index_tuple_cost) to get better
plans.  This was baked into early versions of Greenplum for that reason.

So I'm not saying that we shouldn't change the default for
cpu_tuple_cost.  I'm saying that we currently don't have enough
information on *when* and *how much* to change it.

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: show relation and tuple infos of a lock to acquire

2014-03-10 Thread Christian Kruse

Hi,

On 10/03/14 14:59, Robert Haas wrote:
> On Mon, Mar 10, 2014 at 7:44 AM, Christian Kruse
>  wrote:
> > [ response to review ]
> 
> This response seems to have made no mention of point #7 from Amit's
> review, which seems to me to be a rather important one.

Just didn't notice it because the previous point was the same. NULL'd
the tuple there, too:

--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1307,7 +1307,7 @@ retry:
if (TransactionIdIsValid(xwait))
{
index_endscan(index_scan);
-   XactLockTableWait(xwait);
+   XactLockTableWaitWithInfo(heap, NULL, xwait);
goto retry;
}

Best regards,

-- 
 Christian Kruse   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services



pgpkcb2npz9m2.pgp
Description: PGP signature

Re: [HACKERS] Auto-tuning work_mem and maintenance_work_mem

2014-03-10 Thread Kevin Grittner

Andres Freund  wrote:
> On 2014-02-16 21:26:47 -0500, Robert Haas wrote:

>> I don't really know about cpu_tuple_cost.  Kevin's often
>> advocated raising it, but I haven't heard anyone else advocate
>> for that. I think we need data points from more people to know
>> whether or not that's a good idea in general.
>
> FWIW It's a good idea in my experience.

This is more about the balance among the various cpu_* costs than
the balance between cpu_* costs and the *_page costs.  I usually
need to adjust the page costs, too; and given how heavily cached
many machines are, I'm usually moving them down.  But if you think
about the work involved in moving to a new tuple, do you really
think it's only twice the cost of moving to a new index entry on an
index scan?  Or only four times as expensive as executing an
average operator function?  In my experience setting cpu_tuple_cost
higher tends to better model costs, and prevent CPU-sucking scans
of large numbers of rows.

I only have anecdotal evidence, though.  I have seen it help dozens
of times, and have yet to see it hurt.  That said, most people on
this list are probably capable of engineering a benchmark which
will show whichever result they would prefer.  I would prefer to
hear about other data points based on field experience with
production systems.  I haven't offered the trivial patch because
when I've raised the point before, there didn't seem to be anyone
else who had the same experience.  It's good to hear that Andres
has seen this, too.

FWIW, even though I'm repeating something I've mentioned before,
whenever raising this setting did help, 0.03 was high enough to see
the benefit.  Several times I have also tried 0.05 just to test
whether I was wandering near a tipping point for a bad choice from
this.  I have never had 0.05 produce plans noticeably better or
worse than 0.03.

--
Kevin Grittner
EDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Josh Berkus

On 03/10/2014 02:08 PM, Robert Haas wrote:
> On Mon, Mar 10, 2014 at 4:55 PM, Josh Berkus  wrote:
>> Yeah, that's my thoughts.  Although I might wait for recvlogical.  Will
>> put documentation wordsmithing on my todo list once Andres commits.
> 
> Is this your way of announcing that Andres is getting a commit bit, or
> did you just mis-speak?

Hah.  No, I have no such knowledge.  I was using "commit" as in the git
sense, as in "commits to his fork".

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Alvaro Herrera

Robert Haas escribió:
> On Mon, Mar 10, 2014 at 3:33 PM, Alvaro Herrera
>  wrote:
> > Robert Haas escribió:
> >> I've committed this patch now with a few further tweaks, leaving this
> >> issue unaddressed.  It may well be something that needs improvement,
> >> but I don't think it's a big enough issue to justify holding back a
> >> commit.
> >
> > Hmm, is the buildfarm exercising any of this?
> 
> I think it isn't, apart from whether it builds.  Apparently the
> buildfarm only runs installcheck on contrib, not check.  And the
> test_decoding plugin only runs under installcheck, not check.  Also,
> it's not going to test walsender/walreceiver at all, but that's harder
> to fix.

So the buildfarm exercises pg_upgrade, to some extent, by way of a
custom module,
https://github.com/PGBuildFarm/client-code/blob/master/PGBuild/Modules/TestUpgrade.pm
As far as I can tell, test_decoding wants to do the same thing (i.e. get
make check to run).  Is the best option to write a new TestLogical.pm
module for the buildfarm, or should we somehow think about how to
generalize the pg_upgrade trick so that animal caretakers can enable
runs of test_decoding by simply upgrading to a newer version of the
buildfarm script?

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Robert Haas

On Mon, Mar 10, 2014 at 4:55 PM, Josh Berkus  wrote:
> On 03/10/2014 12:46 PM, Robert Haas wrote:
>> On Mon, Mar 10, 2014 at 3:38 PM, Josh Berkus  wrote:
>>> On 03/10/2014 11:54 AM, Robert Haas wrote:
 I've committed this patch now with a few further tweaks, leaving this
 issue unaddressed.  It may well be something that needs improvement,
 but I don't think it's a big enough issue to justify holding back a
 commit.
>>>
>>> Wait, does this mean Changesets is committed? Or only part of it?
>>
>> The core of the feature was b89e151054a05f0f6d356ca52e3b725dd0505e53,
>> but that only allowed it through the SQL interface.  The new commit,
>> 8722017bbcbc95e311bbaa6d21cd028e296e5e35, makes it available via
>> walsender interface.  There isn't a client for that interface yet, but
>> if you're wondering whether it's time to break out the champagne, I'm
>> thinking probably.
>
> Yeah, that's my thoughts.  Although I might wait for recvlogical.  Will
> put documentation wordsmithing on my todo list once Andres commits.

Is this your way of announcing that Andres is getting a commit bit, or
did you just mis-speak?

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Josh Berkus

On 03/10/2014 12:46 PM, Robert Haas wrote:
> On Mon, Mar 10, 2014 at 3:38 PM, Josh Berkus  wrote:
>> On 03/10/2014 11:54 AM, Robert Haas wrote:
>>> I've committed this patch now with a few further tweaks, leaving this
>>> issue unaddressed.  It may well be something that needs improvement,
>>> but I don't think it's a big enough issue to justify holding back a
>>> commit.
>>
>> Wait, does this mean Changesets is committed? Or only part of it?
> 
> The core of the feature was b89e151054a05f0f6d356ca52e3b725dd0505e53,
> but that only allowed it through the SQL interface.  The new commit,
> 8722017bbcbc95e311bbaa6d21cd028e296e5e35, makes it available via
> walsender interface.  There isn't a client for that interface yet, but
> if you're wondering whether it's time to break out the champagne, I'm
> thinking probably.

Yeah, that's my thoughts.  Although I might wait for recvlogical.  Will
put documentation wordsmithing on my todo list once Andres commits.

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] calculating an aspect of shared buffer state from a background worker

2014-03-10 Thread Robert Berry

Thank you both for the thoughtful and helpful responses.

The utility of the length of the free list is somewhat dubious.  I imagine
it could be useful to answer the question of "is there a chance that
increasing shared buffers would be useless?" in an optimization context.
 Agreed it's not useful in most steady state scenarios.

I saw the approach in the pg_buffercache contrib module and am looking for
lockless alternatives for at least estimating the size of free buffers.

I'm a relatively inexperienced so I'd be curious to know whether there is a
danger beyond an inconsistent result in traversing / sampling the
BufferDescriptors without a lock?

Also I got the impression that there is a ring approach to freeing buffers,
and that assuming the descriptors are allocated in sequential addresses,
taking the difference in the first and last could be used to get a rough
estimate accounting for sizes or other shenanigans?

Thank you again, the clues to look at buffer descriptors and ShmemInitStruct
are very helpful.

Best Regards,
Robert

On Mon, Mar 10, 2014 at 7:33 AM, Tom Lane  wrote:

> Robert Berry  writes:
> > I'm looking at doing a calculation to determine the number of free
> buffers
> > available.  A n example ratio that is based on some data structures in
> > freelist.c as follows:
>
> > (StrategyControl->lastFreeBuffer - StrategyControl->firstFreeBuffer) /
> > (double) NBuffers
>
> > Is there a way to get access to the StrategyControl pointer in the
> context
> > of a background worker?
>
> The BufferStrategyControl struct is in shared memory, so you can certainly
> get at it.  One way would be to modify freelist.c to export its static
> pointer variable.  Alternatively, you could call ShmemInitStruct an extra
> time to look up the struct for yourself, and then save it in your own
> static variable.
>
> Having said that, though, I'm pretty dubious of the premise.  I trust you
> realize that the above calculation is entirely wrong; firstFreeBuffer and
> lastFreeBuffer are list head and tail pointers, and have no numerical
> relation to the list length.  The only way to determine the list length
> accurately would be to chase down the whole list, which you'd have to hold
> the BufFreelistLock while doing, which'd be disastrous for performance if
> the list was long.  (If you're okay with modifying the backend code you
> could dodge this by teaching freelist.c to maintain a counter, I guess.)
>
> An even bigger issue is that it's not clear that the length of the free
> list is actually a useful number to have; in steady-state usage it
> frequently is always zero.  Buffers only get put back on the freelist if
> they're invalidated, eg by dropping the relation they belonged to.  Normal
> usage tends to allocate buffers by reclaiming ones whose usage_count has
> reached zero in the clock sweep algorithm.  So a better picture of the
> availability of buffers would require scanning the buffer pool to see how
> many there are of each usage_count level.
>
> regards, tom lane
>

Re: [HACKERS] on_exit_reset fails to clear DSM-related exit actions

2014-03-10 Thread Alvaro Herrera

Robert Haas escribió:

> A related point that's been bugging me for a while, and has just
> struck me again, is that background workers for which
> BGWORKER_SHMEM_ACCESS is not passed probably ought to be detaching
> shared memory (and DSMs).  Currently, and since Alvaro originally
> added the facility, the death of a  non-BGWORKER_SHMEM_ACCESS backend
> is used in postmaster.c to decide whether to call HandleChildCrash().
> But such workers could still clobber shared memory arbitrarily; they
> haven't unmapped it.  Oddly, the code in postmaster.c is only cares
> about the flag when checking EXIT_STATUS_0()/EXIT_STATUS_1(), not when
> checking ReleasePostmasterChildSlot()...

Clearly there's not a lot of consistency on that.  I don't think I had
made up my mind completely about such details.  I do remember that
unmapping/detaching the shared memory segment didn't cross my mind; the
flag, as I recall, only controls (controlled) whether to attach to it
explicitely.  

IOW feel free to whack around.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] GSoC 2014 - mentors, students and admins

2014-03-10 Thread Thom Brown

All students and mentors (and backup mentors) should now register to
this year's GSoC.  Students only have until Friday next week (up until
21st March 19:00 UTC) to apply.

Thanks

Thom


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] COPY table FROM STDIN doesn't show count tag

2014-03-10 Thread Tom Lane

I wrote:
> Also, I'm thinking we should back-patch the aspects of the patch
> needed to fix the wrong-line-number issue.  That appears to have been
> introduced in 9.2; older versions of PG get the above example right.

I've done that.  For reference' sake, here's an updated patch against
HEAD with just the uncommitted changes.

regards, tom lane

diff -c new/common.c new-wholepatch/common.c
*** new/common.c	Mon Mar 10 14:55:49 2014
--- new-wholepatch/common.c	Mon Mar 10 12:49:02 2014
***
*** 631,638 
   * When the command string contained no such COPY command, this function
   * degenerates to an AcceptResult() call.
   *
!  * Changes its argument to point to the last PGresult of the command string,
!  * or NULL if that result was for a COPY FROM STDIN or COPY TO STDOUT.
   *
   * Returns true on complete success, false otherwise.  Possible failure modes
   * include purely client-side problems; check the transaction status for the
--- 631,637 
   * When the command string contained no such COPY command, this function
   * degenerates to an AcceptResult() call.
   *
!  * Changes its argument to point to the last PGresult of the command string.
   *
   * Returns true on complete success, false otherwise.  Possible failure modes
   * include purely client-side problems; check the transaction status for the
***
*** 641,654 
  static bool
  ProcessResult(PGresult **results)
  {
- 	PGresult   *next_result;
  	bool		success = true;
  	bool		first_cycle = true;
  
! 	do
  	{
  		ExecStatusType result_status;
  		bool		is_copy;
  
  		if (!AcceptResult(*results))
  		{
--- 640,653 
  static bool
  ProcessResult(PGresult **results)
  {
  	bool		success = true;
  	bool		first_cycle = true;
  
! 	for (;;)
  	{
  		ExecStatusType result_status;
  		bool		is_copy;
+ 		PGresult   *next_result;
  
  		if (!AcceptResult(*results))
  		{
***
*** 693,698 
--- 692,698 
  			 * otherwise use queryFout or cur_cmd_source as appropriate.
  			 */
  			FILE	   *copystream = pset.copyStream;
+ 			PGresult   *copy_result;
  
  			SetCancelConn();
  			if (result_status == PGRES_COPY_OUT)
***
*** 700,706 
  if (!copystream)
  	copystream = pset.queryFout;
  success = handleCopyOut(pset.db,
! 		copystream) && success;
  			}
  			else
  			{
--- 700,707 
  if (!copystream)
  	copystream = pset.queryFout;
  success = handleCopyOut(pset.db,
! 		copystream,
! 		©_result) && success;
  			}
  			else
  			{
***
*** 708,737 
  	copystream = pset.cur_cmd_source;
  success = handleCopyIn(pset.db,
  	   copystream,
! 	   PQbinaryTuples(*results)) && success;
  			}
  			ResetCancelConn();
  
! 			/*
! 			 * Call PQgetResult() once more.  In the typical case of a
! 			 * single-command string, it will return NULL.	Otherwise, we'll
! 			 * have other results to process that may include other COPYs.
! 			 */
  			PQclear(*results);
! 			*results = next_result = PQgetResult(pset.db);
  		}
  		else if (first_cycle)
  			/* fast path: no COPY commands; PQexec visited all results */
  			break;
- 		else if ((next_result = PQgetResult(pset.db)))
- 		{
- 			/* non-COPY command(s) after a COPY: keep the last one */
- 			PQclear(*results);
- 			*results = next_result;
  		}
  
  		first_cycle = false;
! 	} while (next_result);
  
  	/* may need this to recover from conn loss during COPY */
  	if (!first_cycle && !CheckConnection())
--- 709,742 
  	copystream = pset.cur_cmd_source;
  success = handleCopyIn(pset.db,
  	   copystream,
! 	   PQbinaryTuples(*results),
! 	   ©_result) && success;
  			}
  			ResetCancelConn();
  
! 			/* replace the COPY_OUT/IN result with COPY command exit status */
  			PQclear(*results);
! 			*results = copy_result;
  		}
  		else if (first_cycle)
+ 		{
  			/* fast path: no COPY commands; PQexec visited all results */
  			break;
  		}
  
+ 		/*
+ 		 * Check PQgetResult() again.  In the typical case of a single-command
+ 		 * string, it will return NULL.  Otherwise, we'll have other results
+ 		 * to process that may include other COPYs.  We keep the last result.
+ 		 */
+ 		next_result = PQgetResult(pset.db);
+ 		if (!next_result)
+ 			break;
+ 
+ 		PQclear(*results);
+ 		*results = next_result;
  		first_cycle = false;
! 	}
  
  	/* may need this to recover from conn loss during COPY */
  	if (!first_cycle && !CheckConnection())
diff -c new/copy.c new-wholepatch/copy.c
*** new/copy.c	Mon Mar 10 14:56:21 2014
--- new-wholepatch/copy.c	Mon Mar 10 12:50:27 2014
***
*** 429,444 
   * conn should be a database connection that you just issued COPY TO on
   * and got back a PGRES_COPY_OUT result.
   * copystream is the file stream for the data to go to.
   *
   * result is true if successful, false if not.
   */
  bool
! handleCopyOut(PGconn *conn, FILE *copystream)
  {
  	bool		OK =

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Robert Haas

On Mon, Mar 10, 2014 at 3:38 PM, Josh Berkus  wrote:
> On 03/10/2014 11:54 AM, Robert Haas wrote:
>> I've committed this patch now with a few further tweaks, leaving this
>> issue unaddressed.  It may well be something that needs improvement,
>> but I don't think it's a big enough issue to justify holding back a
>> commit.
>
> Wait, does this mean Changesets is committed? Or only part of it?

The core of the feature was b89e151054a05f0f6d356ca52e3b725dd0505e53,
but that only allowed it through the SQL interface.  The new commit,
8722017bbcbc95e311bbaa6d21cd028e296e5e35, makes it available via
walsender interface.  There isn't a client for that interface yet, but
if you're wondering whether it's time to break out the champagne, I'm
thinking probably.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] extension_control_path

2014-03-10 Thread Stephen Frost

* Robert Haas (robertmh...@gmail.com) wrote:
> On Fri, Mar 7, 2014 at 10:19 PM, Peter Eisentraut  wrote:
> >> What the $directory proposal achieves is allowing a fully relocatable
> >> extension layout, where you just have to drop a directory anywhere in
> >> the file system and it just works (*).
> >
> > If that's what you want (and it sounds attractive), why couldn't we just
> > locate libraries using extension_control_path as well (which presumably
> > contains the control file we are just processing).  No need for another
> > indirection.  Libraries and control files being in separate directory
> > hierarchies is a historical artifact, but it's not something that can't
> > be changed if it's not what we want.
> >
> > The problem I have with this $directory proposal, if I understand it
> > correctly, is that it requires editing of the control file at
> > installation time.  I don't like this for three reasons: it's not clear
> > how this should actually be done, creating more broken extension build
> > scripts (a big problem already); control files are part of the "code",
> > so to speak, not a configuration file, and so munging it in a
> > site-specific way creates a hybrid type of file that will be difficult
> > to properly manage; it doesn't allow running an extension before
> > installation (unless I overwrite the control file in my own source
> > tree), which is my main use case for this.
> 
> +1.

I agree with this- it wasn't my intent to require any hacking of the
control file on install.  At least my recollection (and it might be
wrong- I'm feeling a bit under-the-weather at the moment..) was that I
was looking for a way to explicitly say "look for the .so in the same
directory as the control file" and then had another thought of "allow a
fully-qualified path to be used for the control file @ CREATE EXTENSION
time".

> >> What happens if you have more than one 'prefix.so' file in your path?
> >
> > The same thing that happens if you have more than one prefix.control in
> > your path.  You take the first one you find.
> >
> > Yes, if those are actually two different path settings, you need to keep
> > those aligned.  But that would be a one-time setting by the database
> > administrator, not a per-extension-and-packager setting, so it's
> > manageable.  If it still bothers you, put them both in the same path, as
> > I suggested above.
> 
> It's tempting to think that when you install an extension, we should
> search the directory where the control file was found for the .so
> first - and if so, use THAT library every time, not any other one.  Of
> course the problem with that is that we'd then need to remember that
> in the system catalogs, which might pose a problem in terms of letting
> people reorganize the filesystem hierarchy without getting too
> bothered about what PostgreSQL is remembering internally.

I'd like to be able to specify "same directory as the control file"
somehow since that's what I expect non-packaged extensions to mostly
want.  I also don't like having to hack the control file.  Nor do I
particularly like having to hack the postgresql.conf every time you add
an extension (made doubly worse if you have to hand-edit two paths for
every additional extension).  I agree that it also presents challenges
for how we store that information internally.

Thanks,

Stephen


signature.asc
Description: Digital signature

Re: [HACKERS] Torn page hazard in ginRedoUpdateMetapage()

2014-03-10 Thread Robert Haas

On Mon, Apr 30, 2012 at 1:34 PM, Noah Misch  wrote:
> When GIN changes a metapage, we WAL-log its ex-header content and never use a
> backup block.  This reduces WAL volume since the vast majority of the metapage
> is unused.  However, ginRedoUpdateMetapage() only restores the WAL-logged
> content if the metapage LSN predates the WAL record LSN.  If a metapage write
> tore and updated the LSN but not the other content, we would fail to complete
> the update.  Instead, unconditionally reinitialize the metapage similar to how
> _bt_restore_meta() handles the situation.
>
> I found this problem by code reading and did not attempt to build a test case
> illustrating its practical consequences.  It's possible that there's no
> problem in practice on account of some reason I haven't contemplated.

The attached patch doesn't apply any more, but it looks like this
issue still exists.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Andres Freund

On 2014-03-10 12:38:42 -0700, Josh Berkus wrote:
> On 03/10/2014 11:54 AM, Robert Haas wrote:
> > I've committed this patch now with a few further tweaks, leaving this
> > issue unaddressed.  It may well be something that needs improvement,
> > but I don't think it's a big enough issue to justify holding back a
> > commit.
> 
> Wait, does this mean Changesets is committed? Or only part of it?

The docs and pg_recvlogical aren't yet, everything else is. Working on
rebasing/copy-editing the former two right now.

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Josh Berkus

On 03/10/2014 11:54 AM, Robert Haas wrote:
> I've committed this patch now with a few further tweaks, leaving this
> issue unaddressed.  It may well be something that needs improvement,
> but I don't think it's a big enough issue to justify holding back a
> commit.

Wait, does this mean Changesets is committed? Or only part of it?

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Andres Freund

Hi,

On 2014-03-10 16:33:33 -0300, Alvaro Herrera wrote:
> Robert Haas escribió:
> > I've committed this patch now with a few further tweaks, leaving this
> > issue unaddressed.  It may well be something that needs improvement,
> > but I don't think it's a big enough issue to justify holding back a
> > commit.
> 
> Hmm, is the buildfarm exercising any of this?

Not sufficiently yet, no. The logical decoding facilities themselves are
actually covered by tests in contrib/test_decoding, but due to the
issues mentioned in 20140303224325.gj17...@awork2.anarazel.de they
aren't run.
The walsender interface isn't tested at all. Be it new or old
functionality. I have some hopes for Peter's client test patches
there...

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Robert Haas

On Mon, Mar 10, 2014 at 3:33 PM, Alvaro Herrera
 wrote:
> Robert Haas escribió:
>> I've committed this patch now with a few further tweaks, leaving this
>> issue unaddressed.  It may well be something that needs improvement,
>> but I don't think it's a big enough issue to justify holding back a
>> commit.
>
> Hmm, is the buildfarm exercising any of this?

I think it isn't, apart from whether it builds.  Apparently the
buildfarm only runs installcheck on contrib, not check.  And the
test_decoding plugin only runs under installcheck, not check.  Also,
it's not going to test walsender/walreceiver at all, but that's harder
to fix.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] on_exit_reset fails to clear DSM-related exit actions

2014-03-10 Thread Robert Haas

On Mon, Mar 10, 2014 at 11:48 AM, Tom Lane  wrote:
> Robert Haas  writes:
>> On Fri, Mar 7, 2014 at 2:40 PM, Robert Haas  wrote:
>>> Hmm.  So the problematic sequence of events is where a postmaster
>>> child forks, and then exits without exec-ing, perhaps because e.g.
>>> exec fails?
>
>> I've attempted a fix for this case.  The attached patch makes
>> test_shm_mq fork() a child process that calls on_exit_reset() and then
>> exits.  Without the fix I just pushed, this causes the tests to fail;
>> with this fix, this does not cause the tests to fail.
>
>> I'm not entirely sure that this is exactly right, but I think it's an
>> improvement.
>
> This looks generally sane to me, but I wonder whether you should also
> teach PGSharedMemoryDetach() to physically detach from DSM segments
> not just the main shmem seg.  Or maybe better, adjust most of the
> places that call on_exit_reset and then PGSharedMemoryDetach to also
> make a detach-from-DSM call.

Hmm, good catch.  Maybe we should invent a new function that is
defined to mean "detach ALL shared memory segments".

A related point that's been bugging me for a while, and has just
struck me again, is that background workers for which
BGWORKER_SHMEM_ACCESS is not passed probably ought to be detaching
shared memory (and DSMs).  Currently, and since Alvaro originally
added the facility, the death of a  non-BGWORKER_SHMEM_ACCESS backend
is used in postmaster.c to decide whether to call HandleChildCrash().
But such workers could still clobber shared memory arbitrarily; they
haven't unmapped it.  Oddly, the code in postmaster.c is only cares
about the flag when checking EXIT_STATUS_0()/EXIT_STATUS_1(), not when
checking ReleasePostmasterChildSlot()...

> It looks like both sysv_shmem.c and
> win32_shmem.c have internal callers of PGSharedMemoryDetach, which
> probably shouldn't affect DSM.
>
> You could argue that that's unnecessary on the grounds that the postmaster
> will never have any DSM segments attached, but I think it would be
> good coding practice anyway.  People who copy-and-paste those bits of
> code into other places are not likely to think of adding DSM calls.

Well, actually the postmaster WILL have the control segment attached
(not nothing else).

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Alvaro Herrera

Robert Haas escribió:

> I've committed this patch now with a few further tweaks, leaving this
> issue unaddressed.  It may well be something that needs improvement,
> but I don't think it's a big enough issue to justify holding back a
> commit.

Hmm, is the buildfarm exercising any of this?

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Pavel Stehule

2014-03-10 20:11 GMT+01:00 Bruce Momjian :

> On Mon, Mar 10, 2014 at 07:40:42PM +0100, Pavel Stehule wrote:
> >
> > >
> > > There were several bottlenecks in this area removed in 9.2 and 9.3.
> > > Unfortunately the worst of those bottlenecks were in the server,
> so they
> > depend
> > > on what database you are upgrading from, and so won't help you much
> > upgrading
> > > from 9.1.
> >
> > Yes, I assume 9.3 will be much better, though Jeff is right that if
> it
> > is pg_dump locking that is hurting you, you  might not see a win
> even in
> > 9.3.
> >
> >
> > I'll see it next year when we plan to migrate to 9.4
> >
> > I though so some form of "superlock" can be interesting, because nobody
> can
> > work with database when it is upgraded.
>
> Remember pg_upgrade is using pg_dump, which then connecting to a
> backend, so passing that super-lock mode there is not ideal.  The fixes
> in 9.3 improve locking in all user cases, not just upgrades.
>
>
nice

Thank you

Pavel


>  --
>   Bruce Momjian  http://momjian.us
>   EnterpriseDB http://enterprisedb.com
>
>   + Everyone has their own god. +
>

Re: [HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Bruce Momjian

On Mon, Mar 10, 2014 at 07:40:42PM +0100, Pavel Stehule wrote:
> 
> >
> > There were several bottlenecks in this area removed in 9.2 and 9.3.
> > Unfortunately the worst of those bottlenecks were in the server, so they
> depend
> > on what database you are upgrading from, and so won't help you much
> upgrading
> > from 9.1.
> 
> Yes, I assume 9.3 will be much better, though Jeff is right that if it
> is pg_dump locking that is hurting you, you  might not see a win even in
> 9.3.
> 
> 
> I'll see it next year when we plan to migrate to 9.4
> 
> I though so some form of "superlock" can be interesting, because nobody can
> work with database when it is upgraded.

Remember pg_upgrade is using pg_dump, which then connecting to a
backend, so passing that super-lock mode there is not ideal.  The fixes
in 9.3 improve locking in all user cases, not just upgrades.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

  + Everyone has their own god. +


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] extension_control_path

2014-03-10 Thread Robert Haas

On Fri, Mar 7, 2014 at 10:19 PM, Peter Eisentraut  wrote:
>> What the $directory proposal achieves is allowing a fully relocatable
>> extension layout, where you just have to drop a directory anywhere in
>> the file system and it just works (*).
>
> If that's what you want (and it sounds attractive), why couldn't we just
> locate libraries using extension_control_path as well (which presumably
> contains the control file we are just processing).  No need for another
> indirection.  Libraries and control files being in separate directory
> hierarchies is a historical artifact, but it's not something that can't
> be changed if it's not what we want.
>
> The problem I have with this $directory proposal, if I understand it
> correctly, is that it requires editing of the control file at
> installation time.  I don't like this for three reasons: it's not clear
> how this should actually be done, creating more broken extension build
> scripts (a big problem already); control files are part of the "code",
> so to speak, not a configuration file, and so munging it in a
> site-specific way creates a hybrid type of file that will be difficult
> to properly manage; it doesn't allow running an extension before
> installation (unless I overwrite the control file in my own source
> tree), which is my main use case for this.

+1.

>> What happens if you have more than one 'prefix.so' file in your path?
>
> The same thing that happens if you have more than one prefix.control in
> your path.  You take the first one you find.
>
> Yes, if those are actually two different path settings, you need to keep
> those aligned.  But that would be a one-time setting by the database
> administrator, not a per-extension-and-packager setting, so it's
> manageable.  If it still bothers you, put them both in the same path, as
> I suggested above.

It's tempting to think that when you install an extension, we should
search the directory where the control file was found for the .so
first - and if so, use THAT library every time, not any other one.  Of
course the problem with that is that we'd then need to remember that
in the system catalogs, which might pose a problem in terms of letting
people reorganize the filesystem hierarchy without getting too
bothered about what PostgreSQL is remembering internally.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: show relation and tuple infos of a lock to acquire

2014-03-10 Thread Robert Haas

On Mon, Mar 10, 2014 at 7:44 AM, Christian Kruse
 wrote:
> [ response to review ]

This response seems to have made no mention of point #7 from Amit's
review, which seems to me to be a rather important one.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Changeset Extraction v7.9.1

2014-03-10 Thread Robert Haas

On Fri, Mar 7, 2014 at 7:44 AM, Andres Freund  wrote:
> I've attached a new version of the walsender patch. It's been rebased
> ontop of Heikki's latest commit to walsender.c. I've changed a fair bit
> of stuff:
> * The sleeptime is now computed to sleep until we either need to send a
>   keepalive or kill ourselves, as Heikki sugggested.
> * Sleep time computation, sending pings, checking timeouts is now done
>   in separate functions.
> * Comment and codestyle improvements.
>
> Although they are shorter and simpler now, I have not managed to unify
> the three loops however. They seem to be too different to unify them
> inside one. I tried a common function with an 'wait_for' bitmask
> argument, but that turned out to be fairly illegible. The checks in
> WalSndWaitForWal() and WalSndLoop() just seem to be too different.
>
> I'd be grateful if you (or somebody else!) could have a quick look at
> body of the loops in WalSndWriteData(), WalSndWaitForWal() and
> WalSndLoop(). Maybe I am just staring at it the wrong way.

I've committed this patch now with a few further tweaks, leaving this
issue unaddressed.  It may well be something that needs improvement,
but I don't think it's a big enough issue to justify holding back a
commit.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Pavel Stehule

> >
> > There were several bottlenecks in this area removed in 9.2 and 9.3.
> > Unfortunately the worst of those bottlenecks were in the server, so they
> depend
> > on what database you are upgrading from, and so won't help you much
> upgrading
> > from 9.1.
>
> Yes, I assume 9.3 will be much better, though Jeff is right that if it
> is pg_dump locking that is hurting you, you  might not see a win even in
> 9.3.
>

I'll see it next year when we plan to migrate to 9.4

I though so some form of "superlock" can be interesting, because nobody can
work with database when it is upgraded.

Regards

Pavel

>
> --
>   Bruce Momjian  http://momjian.us
>   EnterpriseDB http://enterprisedb.com
>
>   + Everyone has their own god. +
>

Re: [HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Bruce Momjian

On Mon, Mar 10, 2014 at 09:54:36AM -0700, Jeff Janes wrote:
> On Mon, Mar 10, 2014 at 6:58 AM, Pavel Stehule  
> wrote:
> 
> Hello
> 
> I had to migrate our databases from 9.1 to 9.2. We have high number of
> databases per cluster (more than 1000) and high number of tables (indexes)
> per database (sometimes more than 10K, exceptionally more than 100K).
> 
> I seen two problems:
> 
> a) too long files pg_upgrade_dump_db.sql, pg_upgrade_dump_all.sql in
> postgres HOME directory. Is not possible to change a directory for these
> files.
> 
> 
> Those files should go into whatever your current directory is when you execute
> pg_upgrade.  Why not just cd into whatever directory you want them to be in?
> 
>  
> 
> b) very slow first stage of upgrade - schema export is very slow without
> high IO or CPU utilization.
> 
> 
> Just the pg_upgrade executable has low IO and CPU utilization, or the entire
> server does?
> 
> There were several bottlenecks in this area removed in 9.2 and 9.3.  
> Unfortunately the worst of those bottlenecks were in the server, so they 
> depend
> on what database you are upgrading from, and so won't help you much upgrading
> from 9.1.

Yes, I assume 9.3 will be much better, though Jeff is right that if it
is pg_dump locking that is hurting you, you  might not see a win even in
9.3.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

  + Everyone has their own god. +


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] COPY table FROM STDIN doesn't show count tag

2014-03-10 Thread Tom Lane

Rajeev rastogi  writes:
> On 12th December 2013, Rajeev Rastogi Wrote:
>> On 9th December, Amit Khandelkar wrote:
>>> But copystream can be different than pset.cur_cmd_source , right ?

>> As per the earlier code, condition result was always true. So pset.lineno 
>> was always incremented.
>> In the earlier code pset.cur_cmd_source was sent as parameter to function 
>> and inside the function same parameter was used with the name copystream. So 
>> on entry of this function both will be one and same.

The problem with that argument is you're assuming that the previous
behavior was correct :-(.  It isn't.  If you try a case like this:

$ cat int8data
123 456
123 4567890123456789
4567890123456789123
45678901234567894567890123456789
4567890123456789-4567890123456789
$ cat testcase.sql
select 1+1;

\copy int8_tbl from 'int8data'

select 1/0;

select 2/0;
$ psql -f testcase.sql regression
 ?column? 
--
2
(1 row)

psql:testcase.sql:11: ERROR:  division by zero
psql:testcase.sql:13: ERROR:  division by zero

the script line numbers shown in the error messages are *wrong*,
because handleCopyIn has incorrectly incremented pset.lineno because
it thought it was reading from the current script file.  So the
override_file business is wrong, and getting rid of it with a separate
copyStream variable is a good thing.

However, there wasn't much else that I liked about the patch :-(.
It seemed bizarre to me that the copy source/sink selection logic was
partially in ProcessResult and partially in handleCopyOut/handleCopyIn.
Also you'd created a memory leak because ProcessResult now failed to
PQclear the original PGRES_COPY_OUT/IN PGresult.  I did a bit of work
to clean that up, and the attached updated patch is the result.

Unfortunately, while testing it I noticed that there's a potentially
fatal backwards-compatibility problem, namely that the "COPY n" status
gets printed on stdout, which is the same place that COPY OUT data is
going.  While this isn't such a big problem for interactive use,
usages like this one are pretty popular:

 psql -c 'copy mytable to stdout' mydatabase | some-program

With the patch, "COPY n" gets included in the data sent to some-program,
which never happened before and is surely not what the user wants.
The same if the -c string uses \copy.

There are several things we could do about this:

1. Treat this as a non-backwards-compatible change, and document that
people have to use -q if they don't want the COPY tag in the output.
I'm not sure this is acceptable.

2. Kluge ProcessResult so that it continues to not pass back a PGresult
for the COPY TO STDOUT case, or does so only in limited circumstances
(perhaps only if isatty(stdout), for instance).

3. Modify PrintQueryStatus so that command status goes to stderr not
stdout.  While this is probably how it should've been done in the first
place, this would be a far more severe compatibility break than #1.
(For one thing, there are probably scripts out there that think that any
output to stderr is an error message.)  I'm afraid this one is definitely
not acceptable, though it would be by far the cleanest solution were it
not for compatibility concerns.

4. As #3, but print the command status to stderr only if it's "COPY n",
otherwise to stdout.  This is a smaller compatibility break than #3,
but still a break since COPY status was formerly issued to stdout
in non TO STDOUT/FROM STDIN cases.  (Note that PrintQueryStatus can't
tell whether it was COPY TO STDOUT rather than any other kind of COPY;
if we want that to factor into the behavior, we need ProcessResult to
do it.)

5. Give up on the print-the-tag aspect of the change, and just fix the
wrong-line-number issue (so we'd still introduce the copyStream variable,
but not change how PGresults are passed around).

I'm inclined to think #2 is the best answer if we can't stomach #1.
But the exact rule for when to print a COPY OUT result probably
still requires some debate.  Or maybe someone has another idea?

Also, I'm thinking we should back-patch the aspects of the patch
needed to fix the wrong-line-number issue.  That appears to have been
introduced in 9.2; older versions of PG get the above example right.

Comments?

regards, tom lane

diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index 3a820fa..136eed1 100644
*** a/src/bin/psql/common.c
--- b/src/bin/psql/common.c
*** StoreQueryTuple(const PGresult *result)
*** 628,638 
   * command.  In that event, we'll marshal data for the COPY and then cycle
   * through any subsequent PGresult objects.
   *
!  * When the command string contained no affected COPY command, this function
   * degenerates to an AcceptResult() call.
   *
!  * Changes its argument to point to the last PGresult of the command string,
!  * or NULL if that result was for a COPY FROM STDIN or COPY TO STDOUT.
   *
   * Returns true on complete success, false otherwise.  Possible

Re: [HACKERS] Retain dynamic shared memory segments for postmaster lifetime

2014-03-10 Thread Robert Haas

On Mon, Mar 10, 2014 at 12:44 PM, Amit Kapila  wrote:
> On Mon, Mar 10, 2014 at 9:48 PM, Amit Kapila  wrote:
>> On Mon, Mar 10, 2014 at 8:18 PM, Robert Haas  wrote:
>>> I took a look at this patch.  It seems to me that it doesn't do a very
>>> good job maintaining the abstraction boundary between what the dsm.c
>>> layer knows about and what the dsm_impl.c layer knows about.  However,
>>> AFAICS, these problems are purely cosmetic, so I took a crack at
>>> fixing them.  I retitled the new implementation-layer function to
>>> dsm_impl_keep_segment(), swapped the order of the arguments for
>>> consistency with other code, adjusted the dsm_impl.c code slightly to
>>> avoid assuming that only the Windows implementation works on Windows
>>> (that's currently true, but we could probably make the mmap
>>> implementation work there as well), and retooled some of the comments
>>> to read better in English.  I'm happy with the attached version but
>>> don't have a Windows box to test it there.
>>
>> Thank you for looking into patch. I have verified that attached patch
>> works fine on Windows.
>>
>> One observation in new version of patch:
>>
>> + {
>> + char name[64];
>> +
>> + snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
>> + _dosmaperr(GetLastError());
>> + ereport(ERROR,
>> + (errcode_for_dynamic_shared_memory(),
>> + errmsg("could not duplicate handle: %m")));
>> + }
>
> I have updated the patch to change message as below:
> errmsg("could not duplicate handle for \"%s\": %m",
>name)));
>
> Let me know your suggestions?

Looks good, committed.  However, I changed it so that
dsm_keep_segment() does not also perform the equivalent of
dsm_keep_mapping(); those are two separate operations.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Jeff Janes

On Mon, Mar 10, 2014 at 6:58 AM, Pavel Stehule wrote:

> Hello
>
> I had to migrate our databases from 9.1 to 9.2. We have high number of
> databases per cluster (more than 1000) and high number of tables (indexes)
> per database (sometimes more than 10K, exceptionally more than 100K).
>
> I seen two problems:
>
> a) too long files pg_upgrade_dump_db.sql, pg_upgrade_dump_all.sql in
> postgres HOME directory. Is not possible to change a directory for these
> files.
>

Those files should go into whatever your current directory is when you
execute pg_upgrade.  Why not just cd into whatever directory you want them
to be in?

> b) very slow first stage of upgrade - schema export is very slow without
> high IO or CPU utilization.
>

Just the pg_upgrade executable has low IO and CPU utilization, or the
entire server does?

There were several bottlenecks in this area removed in 9.2 and 9.3.
 Unfortunately the worst of those bottlenecks were in the server, so they
depend on what database you are upgrading from, and so won't help you much
upgrading from 9.1.

Cheers,

Jeff

Re: [HACKERS] Retain dynamic shared memory segments for postmaster lifetime

2014-03-10 Thread Amit Kapila

On Mon, Mar 10, 2014 at 9:48 PM, Amit Kapila  wrote:
> On Mon, Mar 10, 2014 at 8:18 PM, Robert Haas  wrote:
>> I took a look at this patch.  It seems to me that it doesn't do a very
>> good job maintaining the abstraction boundary between what the dsm.c
>> layer knows about and what the dsm_impl.c layer knows about.  However,
>> AFAICS, these problems are purely cosmetic, so I took a crack at
>> fixing them.  I retitled the new implementation-layer function to
>> dsm_impl_keep_segment(), swapped the order of the arguments for
>> consistency with other code, adjusted the dsm_impl.c code slightly to
>> avoid assuming that only the Windows implementation works on Windows
>> (that's currently true, but we could probably make the mmap
>> implementation work there as well), and retooled some of the comments
>> to read better in English.  I'm happy with the attached version but
>> don't have a Windows box to test it there.
>
> Thank you for looking into patch. I have verified that attached patch
> works fine on Windows.
>
> One observation in new version of patch:
>
> + {
> + char name[64];
> +
> + snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
> + _dosmaperr(GetLastError());
> + ereport(ERROR,
> + (errcode_for_dynamic_shared_memory(),
> + errmsg("could not duplicate handle: %m")));
> + }

I have updated the patch to change message as below:
errmsg("could not duplicate handle for \"%s\": %m",
   name)));

Let me know your suggestions?


With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


dsm_keep_segment_v5.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Retain dynamic shared memory segments for postmaster lifetime

2014-03-10 Thread Amit Kapila

On Mon, Mar 10, 2014 at 8:18 PM, Robert Haas  wrote:
> I took a look at this patch.  It seems to me that it doesn't do a very
> good job maintaining the abstraction boundary between what the dsm.c
> layer knows about and what the dsm_impl.c layer knows about.  However,
> AFAICS, these problems are purely cosmetic, so I took a crack at
> fixing them.  I retitled the new implementation-layer function to
> dsm_impl_keep_segment(), swapped the order of the arguments for
> consistency with other code, adjusted the dsm_impl.c code slightly to
> avoid assuming that only the Windows implementation works on Windows
> (that's currently true, but we could probably make the mmap
> implementation work there as well), and retooled some of the comments
> to read better in English.  I'm happy with the attached version but
> don't have a Windows box to test it there.

Thank you for looking into patch. I have verified that attached patch
works fine on Windows.

One observation in new version of patch:

+ {
+ char name[64];
+
+ snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
+ _dosmaperr(GetLastError());
+ ereport(ERROR,
+ (errcode_for_dynamic_shared_memory(),
+ errmsg("could not duplicate handle: %m")));
+ }

Now, the patch doesn't use segment *name* in errmsg which makes
it and handle passed in function dsm_impl_keep_segment() redundant.
I think its better to use name in errmsg as it is used at other places
in code as well and make the error more meaningful. However if you
feel it is better otherwise, then we should remove not required variable
and parameter in function dsm_impl_keep_segment()

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Little confusing things about client_min_messages.

2014-03-10 Thread Tomonari Katsumata

Hi

2014-03-10 23:45 GMT+09:00 Tom Lane :

> Tomonari Katsumata  writes:
> > Adding FATAL and PANIC to client_min_messages is done at below-commit.
> > 8ac386226d76b29a9f54c26b157e04e9b8368606
> >
> http://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=8ac386226d76b29a9f54c26b157e04e9b8368606
>
> > According to the commit log, it seems that the purpose
> > is suppressing to be sent error message to client when "DROP TABLE".
> > In those days(pre 8.1), we did not have "DROP IF EXISTS" syntax,
> > so it was useful.
>
> > If this was the reason, now(from 8.2) we have "DROP IF EXISTS" syntax,
>
> Uh, that was one example of what it might be good for; I doubt that the
> use-case has now vanished entirely.  While I'm still dubious about the
> reliability of suppressing error messages, if people have been using this
> type of coding for nearly 10 years then it probably works well enough
> ... and more to the point, they won't thank us for arbitrarily removing
> it.
>

Maybe so.


>
> I think we should leave established practice alone here.  It might be
> confusing at first glance, but that doesn't mean it's the wrong thing.
>
>
> I see.
If we delete it, it maybe become more confusing thing.

Thank you for your opinion.

regards,
---
Tomonari Katsumata

Re: [HACKERS] on_exit_reset fails to clear DSM-related exit actions

2014-03-10 Thread Tom Lane

Robert Haas  writes:
> On Fri, Mar 7, 2014 at 2:40 PM, Robert Haas  wrote:
>> Hmm.  So the problematic sequence of events is where a postmaster
>> child forks, and then exits without exec-ing, perhaps because e.g.
>> exec fails?

> I've attempted a fix for this case.  The attached patch makes
> test_shm_mq fork() a child process that calls on_exit_reset() and then
> exits.  Without the fix I just pushed, this causes the tests to fail;
> with this fix, this does not cause the tests to fail.

> I'm not entirely sure that this is exactly right, but I think it's an
> improvement.

This looks generally sane to me, but I wonder whether you should also
teach PGSharedMemoryDetach() to physically detach from DSM segments
not just the main shmem seg.  Or maybe better, adjust most of the
places that call on_exit_reset and then PGSharedMemoryDetach to also
make a detach-from-DSM call.  It looks like both sysv_shmem.c and
win32_shmem.c have internal callers of PGSharedMemoryDetach, which
probably shouldn't affect DSM.

You could argue that that's unnecessary on the grounds that the postmaster
will never have any DSM segments attached, but I think it would be
good coding practice anyway.  People who copy-and-paste those bits of
code into other places are not likely to think of adding DSM calls.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Andrew Dunstan



On 03/10/2014 05:18 AM, Peter Geoghegan wrote:

On Fri, Mar 7, 2014 at 9:00 AM, Bruce Momjian  wrote:

OK, it sounds like the adjustments are minimal, like not using the
high-order bit.

Attached patch is a refinement of the work of Oleg, Teodor and Andrew.
Revisions are mostly my own, although Andrew contributed too.

Changes include:

* Extensive relocation, and moderate restructuring of code. Many
comments added, while many existing comments were copy-edited. Nothing
remains in contrib. jsonb is a distinct, in-core type with no
user-visible relationship to hstore. There is no code dependency
between the two. The amount of code redundancy this turned out to
create (between jsonb and an unchanged hstore) is, in my estimation,
quite acceptable.

* B-Tree and hash operator classes for the core type are included. A
GiST operator class, and two GIN operator classes are also included.
Obviously this is where I spent most time by far.

* Everything else that was in hstore in the last revision (the
complement of the hstore2 opclasses) is removed entirely. The patch is
much smaller. If we just consider code (excluding tests and
documentation), the diffstat seems far more manageable:




Thanks for your work on this.

It's just occurred to me that we'll need to add hstore_to_jsonb 
functions and a cast to match the hstore_to_json functions and cast.


That should be fairly simple - I'll work on that. It need not hold up 
progress with what's in this patch.


cheers

andrew


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Retain dynamic shared memory segments for postmaster lifetime

2014-03-10 Thread Robert Haas

On Sat, Feb 8, 2014 at 2:31 AM, Amit Kapila  wrote:
>> On Thu, Feb 6, 2014 at 3:42 PM, Kyotaro HORIGUCHI
>>  wrote:
>>> Hello, I've understood how this works and seems working as
>>> expected.
>>>
>>>
>>> The orphan section handles on postmaster have become a matter of
>>> documentation.
>
> I had explained this in function header of dsm_keep_segment().
>
>>> Besides all above, I'd like to see a comment for the win32 code
>>> about the 'DuplicateHandle hack', specifically, description that
>>> the DuplicateHandle pushes the copy of the section handle to the
>>> postmaster so the section can retain for the postmaster lifetime.
>
> I had added a new function in dsm_impl.c for platform specific
> handling and explained about new behaviour in function header.
>
>
>> - "Global/PostgreSQL.%u" is the same literal constant with that
>>   occurred in dsm_impl_windows. It should be defined as a
>>   constant (or a macro).
>
> Changed to hash define.
>
>> - dms_impl_windows uses errcode_for_dynamic_shared_memory() for
>>   ereport and it finally falls down to
>>   errcode_for_file_access(). I think it is preferable, maybe
>
> Changed as per suggestion.
>
> Please find new version of patch attached with this mail containing
> above changes.

I took a look at this patch.  It seems to me that it doesn't do a very
good job maintaining the abstraction boundary between what the dsm.c
layer knows about and what the dsm_impl.c layer knows about.  However,
AFAICS, these problems are purely cosmetic, so I took a crack at
fixing them.  I retitled the new implementation-layer function to
dsm_impl_keep_segment(), swapped the order of the arguments for
consistency with other code, adjusted the dsm_impl.c code slightly to
avoid assuming that only the Windows implementation works on Windows
(that's currently true, but we could probably make the mmap
implementation work there as well), and retooled some of the comments
to read better in English.  I'm happy with the attached version but
don't have a Windows box to test it there.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c
index 31e592e..c7dc971 100644
--- a/src/backend/storage/ipc/dsm.c
+++ b/src/backend/storage/ipc/dsm.c
@@ -886,6 +886,34 @@ dsm_keep_mapping(dsm_segment *seg)
 }
 
 /*
+ * Keep a dynamic shared memory segment until postmaster shutdown.
+ *
+ * This function should not be called more than once per segment;
+ * on Windows, doing so will create unnecessary handles which will
+ * consume system resources to no benefit.
+ */
+void
+dsm_keep_segment(dsm_segment *seg)
+{
+	/*
+	 * Bump reference count for this segment in shared memory. This will
+	 * ensure that even if there is no session which is attached to this
+	 * segment, it will remain until postmaster shutdown.
+	 */
+	LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+	dsm_control->item[seg->control_slot].refcnt++;
+	LWLockRelease(DynamicSharedMemoryControlLock);
+
+	dsm_impl_keep_segment(seg->handle, seg->impl_private);
+
+	if (seg->resowner != NULL)
+	{
+		ResourceOwnerForgetDSM(seg->resowner, seg);
+		seg->resowner = NULL;
+	}
+}
+
+/*
  * Find an existing mapping for a shared memory segment, if there is one.
  */
 dsm_segment *
diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c
index a8d8a64..221044a 100644
--- a/src/backend/storage/ipc/dsm_impl.c
+++ b/src/backend/storage/ipc/dsm_impl.c
@@ -67,6 +67,7 @@
 #include "storage/fd.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
+#include "postmaster/postmaster.h"
 
 #ifdef USE_DSM_POSIX
 static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
@@ -113,6 +114,8 @@ int dynamic_shared_memory_type;
 /* Size of buffer to be used for zero-filling. */
 #define ZBUFFER_SIZE8192
 
+#define SEGMENT_NAME_PREFIX			"Global/PostgreSQL"
+
 /*--
  * Perform a low-level shared memory operation in a platform-specific way,
  * as dictated by the selected implementation.  Each implementation is
@@ -635,7 +638,7 @@ dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
 	 * convention similar to main shared memory. We can change here once
 	 * issue mentioned in GetSharedMemName is resolved.
 	 */
-	snprintf(name, 64, "Global/PostgreSQL.%u", handle);
+	snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
 
 	/*
 	 * Handle teardown cases.  Since Windows automatically destroys the object
@@ -982,6 +985,45 @@ dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
 }
 #endif
 
+/*
+ * Implementation-specific actions that must be performed when a segment
+ * is to be preserved until postmaster shutdown.
+ *
+ * Except on Windows, we don't need to do anything at all.  But since Windows
+ * cleans up segments automatically when no references remain, we duplicate
+ * the segment handle into the postmaster process.  The postmaster needn't
+ * do anything to receive the

Re: [HACKERS] Little confusing things about client_min_messages.

2014-03-10 Thread Tom Lane

Tomonari Katsumata  writes:
> Adding FATAL and PANIC to client_min_messages is done at below-commit.
> 8ac386226d76b29a9f54c26b157e04e9b8368606
> http://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=8ac386226d76b29a9f54c26b157e04e9b8368606

> According to the commit log, it seems that the purpose
> is suppressing to be sent error message to client when "DROP TABLE".
> In those days(pre 8.1), we did not have "DROP IF EXISTS" syntax,
> so it was useful.

> If this was the reason, now(from 8.2) we have "DROP IF EXISTS" syntax,

Uh, that was one example of what it might be good for; I doubt that the
use-case has now vanished entirely.  While I'm still dubious about the
reliability of suppressing error messages, if people have been using this
type of coding for nearly 10 years then it probably works well enough
... and more to the point, they won't thank us for arbitrarily removing
it.

I think we should leave established practice alone here.  It might be
confusing at first glance, but that doesn't mean it's the wrong thing.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] calculating an aspect of shared buffer state from a background worker

2014-03-10 Thread Tom Lane

Robert Berry  writes:
> I'm looking at doing a calculation to determine the number of free buffers
> available.  A n example ratio that is based on some data structures in
> freelist.c as follows:

> (StrategyControl->lastFreeBuffer - StrategyControl->firstFreeBuffer) /
> (double) NBuffers

> Is there a way to get access to the StrategyControl pointer in the context
> of a background worker?

The BufferStrategyControl struct is in shared memory, so you can certainly
get at it.  One way would be to modify freelist.c to export its static
pointer variable.  Alternatively, you could call ShmemInitStruct an extra
time to look up the struct for yourself, and then save it in your own
static variable.

Having said that, though, I'm pretty dubious of the premise.  I trust you
realize that the above calculation is entirely wrong; firstFreeBuffer and
lastFreeBuffer are list head and tail pointers, and have no numerical
relation to the list length.  The only way to determine the list length
accurately would be to chase down the whole list, which you'd have to hold
the BufFreelistLock while doing, which'd be disastrous for performance if
the list was long.  (If you're okay with modifying the backend code you
could dodge this by teaching freelist.c to maintain a counter, I guess.)

An even bigger issue is that it's not clear that the length of the free
list is actually a useful number to have; in steady-state usage it
frequently is always zero.  Buffers only get put back on the freelist if
they're invalidated, eg by dropping the relation they belonged to.  Normal
usage tends to allocate buffers by reclaiming ones whose usage_count has
reached zero in the clock sweep algorithm.  So a better picture of the
availability of buffers would require scanning the buffer pool to see how
many there are of each usage_count level.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Performance Improvement by reducing WAL for Update Operation

2014-03-10 Thread Amit Kapila

On Tue, Mar 4, 2014 at 4:13 PM, Heikki Linnakangas
 wrote:
> Agreed. Amit, do you have the test setup at hand, can you check the
> performance of this one more time?

Are you expecting more performance numbers than I have posted?
Is there anything more left for patch which you are expecting?

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] on_exit_reset fails to clear DSM-related exit actions

2014-03-10 Thread Robert Haas

On Fri, Mar 7, 2014 at 2:40 PM, Robert Haas  wrote:
>> The big picture here is that in the scenario being debated in the other
>> thread, exit() in a child process forked from a backend will execute that
>> backend's on_detach actions *even if the code had done on_exit_reset after
>> the fork*.
>
> Hmm.  So the problematic sequence of events is where a postmaster
> child forks, and then exits without exec-ing, perhaps because e.g.
> exec fails?

I've attempted a fix for this case.  The attached patch makes
test_shm_mq fork() a child process that calls on_exit_reset() and then
exits.  Without the fix I just pushed, this causes the tests to fail;
with this fix, this does not cause the tests to fail.

I'm not entirely sure that this is exactly right, but I think it's an
improvement.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/contrib/test_shm_mq/test.c b/contrib/test_shm_mq/test.c
index 59f18ec..f6b9dd4 100644
--- a/contrib/test_shm_mq/test.c
+++ b/contrib/test_shm_mq/test.c
@@ -13,8 +13,11 @@
 
 #include "postgres.h"
 
+#include 
+
 #include "fmgr.h"
 #include "miscadmin.h"
+#include "storage/ipc.h"
 
 #include "test_shm_mq.h"
 
@@ -72,6 +75,15 @@ test_shm_mq(PG_FUNCTION_ARGS)
 	/* Set up dynamic shared memory segment and background workers. */
 	test_shm_mq_setup(queue_size, nworkers, &seg, &outqh, &inqh);
 
+	/* Try forking a child that immediately exits. */
+	if (fork() == 0)
+	{
+		elog(LOG, "child is PID %d", getpid());
+		on_exit_reset();
+		exit(0);
+	}
+	elog(LOG, "parent is PID %d", getpid());
+
 	/* Send the initial message. */
 	res = shm_mq_send(outqh, message_size, message_contents, false);
 	if (res != SHM_MQ_SUCCESS)

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] pg_upgrade on high number tables database issues

2014-03-10 Thread Pavel Stehule

Hello

I had to migrate our databases from 9.1 to 9.2. We have high number of
databases per cluster (more than 1000) and high number of tables (indexes)
per database (sometimes more than 10K, exceptionally more than 100K).

I seen two problems:

a) too long files pg_upgrade_dump_db.sql, pg_upgrade_dump_all.sql in
postgres HOME directory. Is not possible to change a directory for these
files.

b) very slow first stage of upgrade - schema export is very slow without
high IO or CPU utilization.

Regards

Pavel

Re: [HACKERS] Standby node using replication slot not visible in pg_stat_replication while catching up

2014-03-10 Thread Michael Paquier

On Mon, Mar 10, 2014 at 9:24 PM, Andres Freund  wrote:
> Hi,
>
> On 2014-03-10 21:06:53 +0900, Michael Paquier wrote:
>> I have been playing a bit with the replication slots, and I noticed a
>> weird behavior in such a scenario:
>> 1) Create a master/slave cluster, and have slave use a replication slot
>> 2) Stop the master
>> 3) Create a certain amount of WAL, during my tests I played with 4~5GB of WAL
>> 4) Restart the slave, it catches up with the WALs that master has
>> retained in pg_xlog.
>> I noticed that while the standby using the replication slot catches
>> up, it is not visible in pg_stat_replication on master. This makes
>> monitoring of the replication lag difficult to follow, particularly in
>> the case where the standby disconnects from the master. Once the
>> standby has caught up, it reappears once again in pg_stat_replication.
>> I didn't have a look at the code to see what is happening, but is this
>> behavior expected?
>
> Does the use of replication slots actually alter the behaviour? I don't
> see how the slot code could influence things to that degree here. Could
> it be that it's just restoring code from the standby's pg_xlog or using
> restore_command?
Sorry for the noise, I'm feeling stupid. Yes the standby was using a
restore_command so it recovered the WAL from archives before reporting
activity back to master.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Standby node using replication slot not visible in pg_stat_replication while catching up

2014-03-10 Thread Andres Freund

Hi,

On 2014-03-10 21:06:53 +0900, Michael Paquier wrote:
> I have been playing a bit with the replication slots, and I noticed a
> weird behavior in such a scenario:
> 1) Create a master/slave cluster, and have slave use a replication slot
> 2) Stop the master
> 3) Create a certain amount of WAL, during my tests I played with 4~5GB of WAL
> 4) Restart the slave, it catches up with the WALs that master has
> retained in pg_xlog.
> I noticed that while the standby using the replication slot catches
> up, it is not visible in pg_stat_replication on master. This makes
> monitoring of the replication lag difficult to follow, particularly in
> the case where the standby disconnects from the master. Once the
> standby has caught up, it reappears once again in pg_stat_replication.
> I didn't have a look at the code to see what is happening, but is this
> behavior expected?

Does the use of replication slots actually alter the behaviour? I don't
see how the slot code could influence things to that degree here. Could
it be that it's just restoring code from the standby's pg_xlog or using
restore_command?

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] WIP patch (v2) for updatable security barrier views

2014-03-10 Thread Dean Rasheed

On 10 March 2014 03:36, Craig Ringer  wrote:
> I've found an issue with updatable security barrier views. Locking is
> being pushed down into the subquery. Locking is thus applied before
> user-supplied quals are, so we potentially lock too many rows.
>
> I'm looking into the code now, but in the mean time, here's a demo of
> the problem:
>
>
>
> regress=> CREATE TABLE t1(x integer, y integer);
> CREATE TABLE
> regress=> INSERT INTO t1(x,y) VALUES (1,1), (2,2), (3,3), (4,4);
> INSERT 0 4
> regress=> CREATE VIEW v1 WITH (security_barrier) AS SELECT x, y FROM t1
> WHERE x % 2 = 0;
> CREATE VIEW
> regress=> EXPLAIN SELECT * FROM v1 FOR UPDATE;
>   QUERY PLAN
> ---
>  LockRows  (cost=0.00..42.43 rows=11 width=40)
>->  Subquery Scan on v1  (cost=0.00..42.32 rows=11 width=40)
>  ->  LockRows  (cost=0.00..42.21 rows=11 width=14)
>->  Seq Scan on t1  (cost=0.00..42.10 rows=11 width=14)
>  Filter: ((x % 2) = 0)
>  Planning time: 0.140 ms
> (6 rows)
>

That has nothing to do with *updatable* security barrier views,
because that's not an update. In fact you get that exact same plan on
HEAD, without the updatable security barrier views patch.


>
> or, preventing pushdown with a wrapper function to demonstrate the problem:
>
> regress=> CREATE FUNCTION is_one(integer) RETURNS boolean AS $$ DECLARE
> result integer; BEGIN SELECT $1 = 1
>
> regress=> EXPLAIN SELECT * FROM v1 WHERE is_one(x) FOR UPDATE;
>   QUERY PLAN
> ---
>  LockRows  (cost=0.00..45.11 rows=4 width=40)
>->  Subquery Scan on v1  (cost=0.00..45.07 rows=4 width=40)
>  Filter: is_one(v1.x)
>  ->  LockRows  (cost=0.00..42.21 rows=11 width=14)
>->  Seq Scan on t1  (cost=0.00..42.10 rows=11 width=14)
>  Filter: ((x % 2) = 0)
>  Planning time: 0.147 ms
> (7 rows)
>
>
>
>
>
>
> OK, so it looks like the code:
>
>
>
> /*
>  * Now deal with any PlanRowMark on this RTE by requesting a
> lock
>  * of the same strength on the RTE copied down to the subquery.
>  */
> rc = get_plan_rowmark(root->rowMarks, rt_index);
> if (rc != NULL)
> {
> switch (rc->markType)
> {
> /*  */
> }
> root->rowMarks = list_delete(root->rowMarks, rc);
> }
>
>
> isn't actually appropriate. We should _not_ be pushing locking down into
> the subquery.
>

That code isn't being invoked in this test case because you're just
selecting from the view, so it's the normal view expansion code, not
the new securityQuals expansion code.


> Instead, we should be retargeting the rowmark so it points to the new
> subquery RTE, marking rows _after_filtering. We want a plan like:
>
>
>
> regress=> EXPLAIN SELECT * FROM v1 WHERE is_one(x) FOR UPDATE;
>   QUERY PLAN
> ---
>  LockRows  (cost=0.00..45.11 rows=4 width=40)
>->  Subquery Scan on v1  (cost=0.00..45.07 rows=4 width=40)
>  Filter: is_one(v1.x)
> ->  Seq Scan on t1  (cost=0.00..42.10 rows=11 width=14)
>Filter: ((x % 2) = 0)
>  Planning time: 0.147 ms
> (7 rows)
>
>
> I'm not too sure what the best way to do that is. Time permitting I'll
> see if I can work out the RowMark code and set something up.
>

Agreed, that seems like it would be a saner plan, but the place to
look to fix it isn't in the updatable security barriers view patch.
Perhaps the updatable security barriers view patch suffers from the
same problem, but first I'd like to know what that problem is in HEAD.

Regards,
Dean


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Standby node using replication slot not visible in pg_stat_replication while catching up

2014-03-10 Thread Michael Paquier

Hi all,

I have been playing a bit with the replication slots, and I noticed a
weird behavior in such a scenario:
1) Create a master/slave cluster, and have slave use a replication slot
2) Stop the master
3) Create a certain amount of WAL, during my tests I played with 4~5GB of WAL
4) Restart the slave, it catches up with the WALs that master has
retained in pg_xlog.
I noticed that while the standby using the replication slot catches
up, it is not visible in pg_stat_replication on master. This makes
monitoring of the replication lag difficult to follow, particularly in
the case where the standby disconnects from the master. Once the
standby has caught up, it reappears once again in pg_stat_replication.
I didn't have a look at the code to see what is happening, but is this
behavior expected?
Regards,
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: show relation and tuple infos of a lock to acquire

2014-03-10 Thread Christian Kruse

Hi,

thanks for the continued review.

On 09/03/14 12:15, Amit Kapila wrote:
> 1.
> "> ISTM that you should be printing just the value and the unique index
> > there, and not any information about the tuple proper.
> 
> Good point, I will have a look at this."
> 
> This point is still not handled in patch, […]

There have been some complaints about this by Andres, so I left it.

> […] due to which the message it displays seems to be
> incomplete. Message it displays is as below:
> 
> LOG:  process 1800 still waiting for ShareLock on transaction 679 after 
> 1014.000
>  ms
> CONTEXT:  while attempting to lock in relation "public"."idx_t1" of database 
> pos
> tgres

Well, there is no tuple information available, so we have to leave it out.

> Here it is not clear what it attempts *lock in*

Ok, I reworded the message as you suggested below. Should make this
clearer as well.

> 2. In function XactLockTableWaitErrorContextCallback(), ignore dropped
> columns in tuple, else it will lead to following failure:
> […]
> Problem is in Session-2 (cache lookup failed), when it tries to print values
> for dropped attribute.

Urghs. I really thought I had done this in the patch before. Thanks
for pointing out, fixed in attached patch.

> 3.
> " in relation \"%s\".\"%s\" of database %s",
> Why to display only relation name in quotes?
> I think database name should also be displayed in quotes.

Didn't think that it would make sense; the quoting makes sense for the
schema and relation name, but I did not see any need to quote the
database name. However, fixed in attached patch.

> 4.
> Context message:
> "while attempting to lock tuple (0,2) ".
> 
> I think it will be better to rephrase it (may be by using 'operate on'
> instead of 'lock').
> The reason is that actually we trying to lock on transaction, so it doesn't 
> make
> good sense to use "lock on tuple"

Fixed.

> 5.
> + XactLockTableWaitWithInfo(relation, &tp, xwait);
> 
> + MultiXactIdWaitWithErrorContext(relation, &tp, (MultiXactId) xwait,
> +  MultiXactStatusUpdate, NULL, infomask);
> 
> I think we should make the name of MultiXactIdWaitWithErrorContext()
> similar to XactLockTableWaitWithInfo.

Fixed as well.

Can you explain why you prefer the WithInfo naming? Just curious, it
seemed to me the more descriptive name should have been preferred.

> 6.
> @@ -1981,7 +1981,8 @@ EvalPlanQualFetch(EState *estate, Relation
> relation, int lockmode,
>   if (TransactionIdIsValid(SnapshotDirty.xmax))
>   {
>   ReleaseBuffer(buffer);
> - XactLockTableWait(SnapshotDirty.xmax);
> + XactLockTableWaitWithInfo(relation, &tuple,
> +  SnapshotDirty.xmax);
> 
> In function EvalPlanQualFetch(), we are using SnapshotDirty to fetch
> the tuple, so I think there is a chance that it will log the tuple which
> otherwise will not be visible. I don't think this is right.

Hm, after checking source I tend to agree. I replaced it with NULL.

> 8.
>  void
>  WaitForLockers(LOCKTAG heaplocktag, LOCKMODE lockmode)
>  {
> - List   *l;
> + List   *l;
> 
> Extra spacing not needed.

What is the policy to do here? The extra spaces have been added by
pg_indent, and there have been more changes in a couple of other files
which I had to drop manually as well. How should this been handled
generally?

> 9.
> /*
>  * XactLockTableWaitErrorContextCallback
>  * error context callback set up by
>  * XactLockTableWaitWithInfo. It reports some
>  * tuple information and the relation of a lock to aquire
>  *
>  */
> Please improve indentation of above function header

Heh. Seems that Emacs' fill-paragraph messed this up. Thanks, fixed.

> 10.
> +#include "access/htup.h"
> +
> +struct XactLockTableWaitLockInfo
> +{
> + Relation rel;
> + HeapTuple tuple;
> +};
> 
> I think it is better to add comments for this structure.
> You can refer comments for struct HeapUpdateFailureData

Fixed.

> 
> 11.
> + *
> + * Use this instead of calling XactTableLockWait()
> 
> In above comment, function name used is wrong and I am not sure this
> is right statement to use because still we are using XactLockTableWait()
> at few places like in function Do_MultiXactIdWait() […]

Fixed function name, but I'd like to keep the wording;
Do_MultiXactIdWait() is wrapped by MultiXactIdWaitWithInfo() and
therefore sets up the context as well.

> […] and recently this is used in function SnapBuildFindSnapshot().

Hm, should we add the context there, too?

> 12.
> heap_update()
> {
> ..
> ..
> /*
>  * There was no UPDATE in the MultiXact; or it aborted. No
>  * TransactionIdIsInProgress() call needed here, since we called
>  * MultiXactIdWait() above.
> 
> Change the function name in above comment.

Fixed.

Best regards,

-- 
 Christian Kruse   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 71ec740..0cf3537 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -107,6 +1

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Alexander Korotkov

On Mon, Mar 10, 2014 at 3:04 PM, Peter Geoghegan  wrote:

> On Mon, Mar 10, 2014 at 3:47 AM, Alexander Korotkov
>  wrote:
> > Fix is attached.
>
> Could you post a patch with regression tests, please?
>

Here it is.

--
With best regards,
Alexander Korotkov.


jsonb-hash-ops-fix-2.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Peter Geoghegan

On Mon, Mar 10, 2014 at 3:47 AM, Alexander Korotkov
 wrote:
> Fix is attached.

Could you post a patch with regression tests, please?


-- 
Peter Geoghegan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Alexander Korotkov

On Mon, Mar 10, 2014 at 2:19 PM, Peter Geoghegan  wrote:

> On Mon, Mar 10, 2014 at 3:00 AM, Alexander Korotkov
>  wrote:
> > I din't get comment about "leftmost" element. There is absolutely no
> > distinguish between array elements. All elements are extracted into same
> > keys independent of their indexes. It seems to have no change since I
> wrote
> > hstore_hash_ops.  Could you share test case to illustrate what you mean?
>
> I don't have time to post that at the moment, but offhand I *think*
> your confusion may be due to the fact that the json_hash_ops opclass
> (as I call it) was previously consistent with the behavior of the
> other GIN opclass (the default). The problem is that they (well, at
> least the default GIN and GiST opclasses) were inconsistent with how
> the containment operator behaved in respect of jsonb array elements
> generally.
>
> Here is the commit on our feature branch where I fixed the problem for
> the default GIN opclass:
>
>
> https://github.com/feodor/postgres/commit/6f5e4fe9fc34f9512919b1c8b6a54952ab288640s
>
> If it doesn't explain the problem, you may still wish to comment on
> the correctness of this fix. I am still waiting on feedback from Oleg
> and Teodor.
>

Apparently, there is bug in calculation of hashes. Array elements were
hashed incrementally while each of them should be hashed separately. That
cause an effect of distinguishing array elements by their indexes. Not sure
about when this bug was added.
Fix is attached.

--
With best regards,
Alexander Korotkov.


jsonb-hash-ops-fix.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Peter Geoghegan

On Mon, Mar 10, 2014 at 3:21 AM, Peter Geoghegan  wrote:
> Sorry, I realize now that that must be incorrect. Still, please take a
> look at the commit linked to.

To be clear, I mean that my explanation of why this was missed before
was incorrect, not my contention that it's a problem right now (for
whatever reason).

I fat-fingered the URL that linked to the GIN opclass bugfix commit:
https://github.com/feodor/postgres/commit/6f5e4fe9fc34f9512919b1c8b6a54952ab288640



-- 
Peter Geoghegan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Peter Geoghegan

On Mon, Mar 10, 2014 at 3:19 AM, Peter Geoghegan  wrote:
> I don't have time to post that at the moment, but offhand I *think*
> your confusion may be due to the fact that the json_hash_ops opclass
> (as I call it) was previously consistent with the behavior of the
> other GIN opclass (the default). The problem is that they (well, at
> least the default GIN and GiST opclasses) were inconsistent with how
> the containment operator behaved in respect of jsonb array elements
> generally.

Sorry, I realize now that that must be incorrect. Still, please take a
look at the commit linked to.


-- 
Peter Geoghegan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Peter Geoghegan

On Mon, Mar 10, 2014 at 3:00 AM, Alexander Korotkov
 wrote:
> I din't get comment about "leftmost" element. There is absolutely no
> distinguish between array elements. All elements are extracted into same
> keys independent of their indexes. It seems to have no change since I wrote
> hstore_hash_ops.  Could you share test case to illustrate what you mean?

I don't have time to post that at the moment, but offhand I *think*
your confusion may be due to the fact that the json_hash_ops opclass
(as I call it) was previously consistent with the behavior of the
other GIN opclass (the default). The problem is that they (well, at
least the default GIN and GiST opclasses) were inconsistent with how
the containment operator behaved in respect of jsonb array elements
generally.

Here is the commit on our feature branch where I fixed the problem for
the default GIN opclass:

https://github.com/feodor/postgres/commit/6f5e4fe9fc34f9512919b1c8b6a54952ab288640s

If it doesn't explain the problem, you may still wish to comment on
the correctness of this fix. I am still waiting on feedback from Oleg
and Teodor.

-- 
Peter Geoghegan

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Database/kernel community topic at Collaboration Summit 2014

2014-03-10 Thread Mel Gorman

Hi,

Arrangements have been made to hold a meeting between database and kernel
developers at Collaboration Summit 2014 http://sched.co/1hEBRuq on March
27th 2014. This was organised after discussions on pain points encountered
by the PostgreSQL community. Originally the plan had been to just have a
topic for LSF/MM there was much more interest in the topic than anticipated
so the Collaboration Summit meeting will be much more open.

If there are developers attending Collaboration Summit that work in
the database or kernel communities, it would be great if you could come
along. Previous discussions were on the PostgreSQL list and that should be
expanded in case we accidentally build postgres-only features. The intent
is to identify the problems encountered by databases and where relevant,
test cases that can be used to demonstrate them if they exist.  While the
kernel community may be aware of some of the problems, they are not always
widely known or understood. There is a belief that some interfaces are fine
when in reality applications cannot use them properly. The ideal outcome
of the meeting would be concrete proposals on kernel features that could
be developed over the course of time to address any identified problem.

For reference, this is a summary of the discussion that took place when
the topic was proposed for LSF/MM.

Thanks.

---8<---

On testing of modern kernels


Josh Berkus claims that most people are using Postgres with 2.6.19 and
consequently there may be poor awareness of recent kernel developments.
This is a disturbingly large window of opportunity for problems to have
been introduced.

Minimally, Postgres has concerns about IO-related stalls which may or may
not exist in current kernels. There were indications that large writes
starve reads. There have been variants of this style of bug in the past but
it's unclear what the exact shape of this problem is and if IO-less dirty
throttling affected it. It is possible that Postgres was burned in the past
by data being written back from reclaim context in low memory situations.
That would have looked like massive stalls with drops in IO throughput
but it was fixed in relatively recent kernels. Any data on historical
tests would be helpful. Alternatively, a pgbench-based reproduction test
could potentially be used by people in the kernel community that track
performance over time and have access to a suitable testing rig.

It was mentioned that Postgres has an tool called pg_test_fsync which
was mentioned in the context of testing different wal_sync_methods. Potentially
it could also be used for evaluating some kernel patches.

Gregory Smith highlighted the existence of a benchmark wrapper for pgbench
called pgbench-tools: https://github.com/gregs1104/pgbench-tools . It can
track statistics of interest to Postgres as well as report in interesting
metrics such as transaction latency. He had a lot of information on testing
requirements and some very interesting tuning information and it's worth
reading the whole mail

http://www.postgresql.org/message-id/52d99161.60...@gmail.com

Postgres bug reports and LKML
-

It is claimed that LKML does not welcome bug reports but it's less clear
what the basis of this claim is.  Is it because the reports are ignored? A
possible explanation is that they are simply getting lost in the LKML noise
and there would be better luck if the bug report was cc'd to a specific
subsystem list. A second possibility is the bug report is against an old
kernel and unless it is reproduced on a recent kernel the bug report will
be ignored. Finally it is possible that there is not enough data available
to debug the problem. The worst explanation is that to date the problem
has not been fixable but the details of this have been lost and are now
unknown. Is is possible that some of these bug reports can be refreshed
so at least there is a chance they get addressed?

Apparently there were changes to the reclaim algorithms that crippled
performance without any sysctls. The problem may be compounded by the
introduction of adaptive replacement cache in the shape of the thrash
detection patches currently being reviewed.  Postgres investigated the
use of ARC in the past and ultimately abandoned it. Details are in the
archives (http://www.Postgres.org/search/?m=1&q=arc&l=1&d=-1&s=r). I
have not read then, just noting they exist for future reference.

Sysctls to control VM behaviour are not popular as such tuning parameters
are often used as an excuse to not properly fix the problem. Would it be
possible to describe a test case that shows 2.6.19 performing well and a
modern kernel failing? That would give the VM people a concrete basis to
work from to either fix the problem or identify exactly what sysctls are
required to make this work.

I am confident that any bug related to VM reclaim in this area has been lost.
At least, I recall no instances of it being discussed on li

Re: [HACKERS] jsonb and nested hstore

2014-03-10 Thread Alexander Korotkov

On Mon, Mar 10, 2014 at 1:18 PM, Peter Geoghegan  wrote:

> * The jsonb_hash_ops non-default GIN opclass is broken. It has its own
> idiosyncratic notion of what constitutes containment, that sees it
> only return, say, jsonb arrays that have a matching string as their
> "leftmost" element (if we ask it if it contains within it another
> array with the same string). Because of the limited number of
> indexable operators (only @>), I'd put this opclass in the same
> category as GiST in terms of my willingness to forgo it for a release,
> even if it did receive a loud applause at pgConf.EU. Again, it might
> be some disparity between the opertors as they existed in hstore2 at
> one time, and as they exist in the core code now, but I doubt it, not
> least since the regression tests didn't pick this up, and it's such a
> basic thing. Perhaps Oleg and Teodor just need to explain this to me.
>

I din't get comment about "leftmost" element. There is absolutely no
distinguish between array elements. All elements are extracted into same
keys independent of their indexes. It seems to have no change since I wrote
hstore_hash_ops.  Could you share test case to illustrate what you mean?

--
With best regards,
Alexander Korotkov.

Re: [HACKERS] Row-security on updatable s.b. views

2014-03-10 Thread Craig Ringer

On 03/08/2014 01:56 AM, Tom Lane wrote:
> Craig Ringer  writes:
>> What I'm concerned about is the locking. It looks to me like we're
>> causing the user to lock rows that they may not intend to lock, by
>> applying a LockRows step *before* the user supplied qual. (I'm going to
>> test that tomorrow, it's sleep time in Australia).
> 
> The fact that there are two LockRows nodes seems outright broken.
> The one at the top of the plan is correctly placed, but how did the
> other one get in there?

I initially thought it was the updatable security barrier views code
pushing the RowMark down into the generated subquery. But if I remove
the pushdown code the inner LockRows node still seems to get emitted.

In fact, it's not a new issue. In vanilla 9.3.1:


regress=> select version();
   version

--
 PostgreSQL 9.3.1 on x86_64-unknown-linux-gnu, compiled by gcc (GCC)
4.8.1 20130603 (Red Hat 4.8.1-1), 64-bit
(1 row)

regress=> CREATE TABLE t1(x integer, y integer);
CREATE TABLE
regress=> INSERT INTO t1(x,y) VALUES (1,1), (2,2), (3,3), (4,4);
INSERT 0 4
regress=> CREATE VIEW v1 WITH (security_barrier) AS SELECT x, y FROM t1
  WHERE x % 2 = 0;
CREATE VIEW
regress=> CREATE OR REPLACE FUNCTION user_qual() RETURNS boolean AS $$
  BEGIN RETURN TRUE; END;
  $$ LANGUAGE plpgsql;
CREATE FUNCTION

regress=>  EXPLAIN SELECT * FROM v1 WHERE user_qual() FOR UPDATE;
  QUERY PLAN
---
 LockRows  (cost=0.00..45.11 rows=4 width=40)
   ->  Subquery Scan on v1  (cost=0.00..45.07 rows=4 width=40)
 Filter: user_qual()
 ->  LockRows  (cost=0.00..42.21 rows=11 width=14)
   ->  Seq Scan on t1  (cost=0.00..42.10 rows=11 width=14)
 Filter: ((x % 2) = 0)
(6 rows)



so it looks like security barrier views are locking rows they should not be.

I can confirm that on 9.3.1 with:

CREATE OR REPLACE FUNCTION row_is(integer, integer) RETURNS boolean as
$$ begin return (select $1 = $2); end; $$ language plpgsql;

then in two sessions:

SELECT * FROM v1 WHERE row_is(x, 2) FOR UPDATE;

and

SELECT * FROM v1 WHERE row_is(x, 4) FOR UPDATE;


These should not block each other, but do.

So there's a pre-existing bug here.

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [bug fix] pg_ctl always uses the same event source

2014-03-10 Thread MauMau


From: "Amit Kapila" 

If I understand correctly that objection was on changing Default Event
Source name, and the patch now doesn't contain that change, it's
just a bug fix for letting pg_ctl know the non-default event source
set by user.

Please clarify if I misunderstood something, else this should be changed
to Ready For Committer.


Tom/Andres, please let me know if you have objection for this patch, 
because

as per my understanding all the objectionable part of patch is removed
from final
patch and it's a defect fix to make pg_ctl aware of Event Source name set 
in

postgresql.conf.

If there is no objection, I will again change it to Ready For Committer.


Hi, Amit-san, I really appreciate your cooperation.  Yes, I removed the 
default value change that caused objection, so the patch can be marked ready 
for committer.  I understand the patch was marked needs for review by 
misunderstanding Tom-san's opinion.


I remember that I read "silence means no objection, or implicit agreement" 
somewhere in the community site or ML.  So I think it would be no problem to 
set the status to ready for committer again.


Regards
MauMau




--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] extension_control_path

2014-03-10 Thread Dimitri Fontaine

Peter Eisentraut  writes:
> Aside from those details, it seems clear that any reasonably complete
> move-extensions-elsewhere feature will need some kind of build system
> support.  I have various ideas on that and would gladly contribute some
> of them, but it's not going to happen within two weeks.

+1

Note that I am currently working on such a build system, so feel free to
send me off-list emails about your thoughs, I'm interested and could
integrate them into what I'm building.

> At this point I suggest that we work toward the minimum viable product:
> the extension_control_path feature as originally proposed (plus the
> crash fixes), and let the field work out best practices.  As you
> describe, you can work around all the other issues by patching various
> text files, but you currently cannot move the extension control file in
> any way, and that's a real deficiency.  (I once experimented with bind
> mounts to work around that -- a real mess ;-) )

Please find attached the v2 version of the patch, including fixes for
the crash and documentation aspects you've listed before.

Regards,
-- 
Dimitri Fontaine06 63 07 10 78
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support

*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
***
*** 6008,6013  dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
--- 6008,6068 

   
  
+  
+   extension_control_path (string)
+   
+extension_control_path configuration parameter
+   
+   extension packaging
+   
+
+ The command CREATE EXTENSION searches for the extension
+ control file in order to install it. The value
+ for extension_control_path is used to search for
+ the name.control files.
+
+ 
+
+ Note that unless using the directory control file
+ parameter, the extension scripts and auxilliary files are searched in
+ the extension_control_path too.
+
+ 
+
+ The value for extension_control_path must be a list
+ of absolute directory paths separated by colons (or semi-colons on
+ Windows). If a list element starts with the special
+ string $extdir, the
+ compiled-in PostgreSQL package extension
+ directory is substituted for $extdir; this is where
+ the extensions provided by the standard
+ PostgreSQL distribution are installed.
+ (Use pg_config --extdir to find out the name of
+ this directory.) For example:
+ 
+ extension_control_path = '/usr/local/postgresql/extension:/home/my_project:$extdir'
+ 
+ or, in a Windows environment:
+ 
+ extension_control_path = 'C:\tools\postgresql;H:\my_project\lib;$extdir'
+ 
+
+ 
+
+ The default value for this parameter is '$extdir'.
+
+ 
+
+ This parameter can be changed at run time by superusers, but a
+ setting done that way will only persist until the end of the
+ client connection, so this method should be reserved for
+ development purposes. The recommended way to set this parameter
+ is in the postgresql.conf configuration
+ file.
+
+   
+  
+ 
   
gin_fuzzy_search_limit (integer)

*** a/src/backend/commands/extension.c
--- b/src/backend/commands/extension.c
***
*** 25,30 
--- 25,31 
  
  #include 
  #include 
+ #include 
  #include 
  
  #include "access/htup_details.h"
***
*** 60,71 
--- 61,76 
  bool		creating_extension = false;
  Oid			CurrentExtensionObject = InvalidOid;
  
+ /* GUC extension_control_path */
+ char   *Extension_control_path;
+ 
  /*
   * Internal data structure to hold the results of parsing a control file
   */
  typedef struct ExtensionControlFile
  {
  	char	   *name;			/* name of the extension */
+ 	char	   *filename;		/* full path of the extension control file */
  	char	   *directory;		/* directory for script files */
  	char	   *default_version;	/* default install target version, if any */
  	char	   *module_pathname;	/* string to substitute for MODULE_PATHNAME */
***
*** 342,397  is_extension_script_filename(const char *filename)
  	return (extension != NULL) && (strcmp(extension, ".sql") == 0);
  }
  
  static char *
! get_extension_control_directory(void)
  {
  	char		sharepath[MAXPGPATH];
! 	char	   *result;
  
  	get_share_path(my_exec_path, sharepath);
! 	result = (char *) palloc(MAXPGPATH);
! 	snprintf(result, MAXPGPATH, "%s/extension", sharepath);
  
! 	return result;
  }
  
  static char *
! get_extension_control_filename(const char *extname)
  {
- 	char		sharepath[MAXPGPATH];
  	char	   *result;
  
! 	get_share_path(my_exec_path, sharepath);
  	result = (char *) palloc(MAXPGPATH);
! 	snprintf(result, MAXPGPATH, "%s/extension/%s.control",
! 			 sharepath, extname);
  
  	return

Re: [HACKERS] inherit support for foreign tables

2014-03-10 Thread Kyotaro HORIGUCHI

Hello. As a minimal implementation, I made an attempt that emit
NOTICE message when alter table affects foreign tables. It looks
like following,

| =# alter table passwd add column added int, add column added2 int;
| NOTICE:  This command affects foreign relation "cf1"
| NOTICE:  This command affects foreign relation "cf1"
| ALTER TABLE
| =# select * from passwd;
| ERROR:  missing data for column "added"
| CONTEXT:  COPY cf1, line 1: "root:x:0:0:root:/root:/bin/bash"
| =# 

This seems far better than silently performing the command,
except for the duplicated message:( New bitmap might required to
avoid the duplication..

I made the changes above and below as an attempt in the attached
patch foreign_inherit-v4.patch 

> I think the problem is foreign childs in inheritance tables
> prevents all menber in the inheritance relation from using
> parameterized paths, correct?
...
> Hmm. I tried minimal implementation to do that. This omits cost
> recalculation but seems to work as expected. This seems enough if
> cost recalc is trivial here.

Any comments?

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 8ace8bd..b4e53c1 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -258,6 +258,17 @@ CREATE TABLE products (
even if the value came from the default value definition.
   
 
+  
+   
+Note that constraints can be defined on foreign tables too, but such
+constraints are not enforced on insert or update.  Those constraints are
+"assertive", and work only to tell planner that some kind of optimization
+such as constraint exclusion can be considerd.  This seems useless, but
+allows us to use foriegn table as child table (see
+) to off-load to multiple servers.
+   
+  
+
   
Check Constraints
 
@@ -2017,8 +2028,8 @@ CREATE TABLE capitals (
   
 
   
-   In PostgreSQL, a table can inherit from
-   zero or more other tables, and a query can reference either all
+   In PostgreSQL, a table or foreign table can
+   inherit from zero or more other tables, and a query can reference either all
rows of a table or all rows of a table plus all of its descendant tables.
The latter behavior is the default.
For example, the following query finds the names of all cities,
diff --git a/doc/src/sgml/ref/alter_foreign_table.sgml b/doc/src/sgml/ref/alter_foreign_table.sgml
index 4d8cfc5..f7a382e 100644
--- a/doc/src/sgml/ref/alter_foreign_table.sgml
+++ b/doc/src/sgml/ref/alter_foreign_table.sgml
@@ -42,6 +42,8 @@ ALTER FOREIGN TABLE [ IF EXISTS ] namecolumn_name SET ( attribute_option = value [, ... ] )
 ALTER [ COLUMN ] column_name RESET ( attribute_option [, ... ] )
 ALTER [ COLUMN ] column_name OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ])
+INHERIT parent_table
+NO INHERIT parent_table
 OWNER TO new_owner
 OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ])
 
@@ -178,6 +180,26 @@ ALTER FOREIGN TABLE [ IF EXISTS ] name
 

+INHERIT parent_table
+
+ 
+  This form adds the target foreign table as a new child of the specified
+  parent table.  The parent table must be a plain table.
+ 
+
+   
+
+   
+NO INHERIT parent_table
+
+ 
+  This form removes the target foreign table from the list of children of
+  the specified parent table.
+ 
+
+   
+
+   
 OPTIONS ( [ ADD | SET | DROP ] option ['value'] [, ... ] )
 
  
@@ -306,6 +328,16 @@ ALTER FOREIGN TABLE [ IF EXISTS ] name
   
  
+
+ 
+  parent_name
+  
+   
+A parent table to associate or de-associate with this foreign table.
+The parent table must be a plain table.
+   
+  
+ 
 
  
 
diff --git a/doc/src/sgml/ref/create_foreign_table.sgml b/doc/src/sgml/ref/create_foreign_table.sgml
index 06a7087..cc11dee 100644
--- a/doc/src/sgml/ref/create_foreign_table.sgml
+++ b/doc/src/sgml/ref/create_foreign_table.sgml
@@ -22,6 +22,7 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name
 column_name data_type [ OPTIONS ( option 'value' [, ... ] ) ] [ COLLATE collation ] [ column_constraint [ ... ] ]
 [, ... ]
 ] )
+[ INHERITS ( parent_table [, ... ] ) ]
   SERVER server_name
 [ OPTIONS ( option 'value' [, ... ] ) ]
 
@@ -159,6 +160,18 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name

 

+parent_table
+
+ 
+  The name of an existing table from which the new foreign table
+  automatically inherits all columns.  The specified parent table
+  must be a plain table.  See  for the
+  details of table inheritance.
+ 
+
+   
+
+   
 server_name
 
  
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index f263b42..b4a084c 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -256,6 +256,48 @@ has_subclass(Oid relationId)
 
 
 /*
+ * Check wether the inheritance tree co

Re: [HACKERS] issue log message to suggest VACUUM FULL if a table is nearly empty

2014-03-10 Thread Haribabu Kommi

On Mon, Mar 10, 2014 at 4:24 PM, Amit Kapila  wrote:
>
> On Mon, Mar 10, 2014 at 5:58 AM, Wang, Jing  wrote:
> > Enclosed is the patch to implement the requirement that issue log message to
> > suggest VACUUM FULL if a table is nearly empty.
> >
> > The requirement comes from the Postgresql TODO list.
> >
> > [Solution details]
> >
> > A check function is added in the function 'lazy_vacuum_rel' to check if the
> > table is large enough and contains large numbers of unused rows. If it is
> > then issue a log message that suggesting using 'VACUUM FULL' on the table.
> >
> > The judgement policy is as following:
> >
> > If the relpage of the table > RELPAGES_VALUES_THRESHOLD(default 1000) then
> > the table is considered to be large enough.
> >
> > If the free_space/total_space > FREESPACE_PERCENTAGE_THRESHOLD(default 0.5)
> > then the table is considered to have large numbers of unused rows.
> >
> > The free_space is calculated by reading the details from the FSM pages. This
> > may increase the IO, but expecting very less FSM pages thus it shouldn't
> > cause
>
> I think it would be better if we can use some existing stats to issue warning
> message rather than traversing the FSM for all pages. For example after
> vacuuming page in lazy_scan_heap(), we update the freespace for page.
> You can refer below line in lazy_scan_heap().
> freespace = PageGetHeapFreeSpace(page);
>
> Now it might be possible that we might not get freespace info easily as
> it is not accumulated for previous vacuum's. Incase there is no viable
> way to get it through vacuum stats, we are already updating fsm after
> vacuum by FreeSpaceMapVacuum(), where I think it should be possible
> to get freespace.

yes this way it works without extra penalty. But the problem is how to calculate
the free space which is left in the skipped pages because of visibility bit.

In a normal scenario, the pages which are getting skipped during vacuum process
are less in number means then this approach is a good choice.

Regards,
Hari Babu
Fujitsu Australia


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Get more from indices.

2014-03-10 Thread Kyotaro HORIGUCHI

Oops! I found a bug in this patch. The previous v8 patch missed
the case that build_index_pathkeys() could build a partial
pathkeys from the index tlist.

This causes the situation follows,

===
=# \d cu11
 Table "public.cu11"
 Column |  Type   | Modifiers 
+-+---
 a  | integer | not null
 b  | integer | not null
 c  | integer | 
 d  | text| 
Indexes:
"cu11_a_b_idx" UNIQUE, btree (a, b)

s=# explain (costs off) select * from cu11 order by a, c ,d;
  QUERY PLAN   
---
 Index Scan using cu11_a_b_idx on cu11
(1 row)
===

Where the simple ORDER BY a, c, d on the table with index on
columns (a, b) results simple index scan which cannot perform the
order.

The attached v9 patche is fixed by adding a check for the case
into index_pathkeys_are_extensible(), and rebase to current HEAD.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index bfb4b9f..ff5c88c 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1790,6 +1790,7 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
 	WRITE_BOOL_FIELD(predOK);
 	WRITE_BOOL_FIELD(unique);
 	WRITE_BOOL_FIELD(immediate);
+	WRITE_BOOL_FIELD(allnotnull);
 	WRITE_BOOL_FIELD(hypothetical);
 	/* we don't bother with fields copied from the pg_am entry */
 }
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index a912174..4376e95 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -951,8 +951,11 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	{
 		index_pathkeys = build_index_pathkeys(root, index,
 			  ForwardScanDirection);
-		useful_pathkeys = truncate_useless_pathkeys(root, rel,
-	index_pathkeys);
+		if (index_pathkeys_are_extensible(root, index, index_pathkeys))
+			useful_pathkeys = root->query_pathkeys;
+		else
+			useful_pathkeys = truncate_useless_pathkeys(root, rel,
+		index_pathkeys);
 		orderbyclauses = NIL;
 		orderbyclausecols = NIL;
 	}
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 9179c61..01479f4 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -502,6 +502,65 @@ build_index_pathkeys(PlannerInfo *root,
 }
 
 /*
+ * index_pathkeys_are_extensible
+ *	  Check whether the pathkeys are extensible to query_pathkeys.
+ */
+bool
+index_pathkeys_are_extensible(PlannerInfo *root,
+			  IndexOptInfo *index,
+			  List *pathkeys)
+{
+	bool		result;
+	ListCell   *lc1;
+
+	if (root->query_pathkeys == NIL || pathkeys == NIL)
+		return false;
+
+	/* This index is not suitable for pathkey extension */
+	if (!index->unique || !index->immediate || !index->allnotnull)
+		return false;
+
+	/* pathkeys is a prefixing proper subset of index tlist */
+	if (list_length(pathkeys) < list_length(index->indextlist))
+		return false;
+
+	if (!pathkeys_contained_in(pathkeys, root->query_pathkeys))
+		return false;
+
+	if (list_length(pathkeys) == list_length(root->query_pathkeys))
+		return true;
+
+	result = true;
+	foreach(lc1, root->query_pathkeys)
+	{
+		PathKey*pathkey = (PathKey *) lfirst(lc1);
+		bool		found = false;
+		ListCell   *lc2;
+
+		foreach(lc2, pathkey->pk_eclass->ec_members)
+		{
+			EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2);
+
+			if (!bms_equal(member->em_relids, index->rel->relids) ||
+!IsA(member->em_expr, Var))
+continue;
+			else
+			{
+found = true;
+break;
+			}
+		}
+
+		if (!found)
+		{
+			result = false;
+			break;
+		}
+	}
+	return result;
+}
+
+/*
  * build_expression_pathkey
  *	  Build a pathkeys list that describes an ordering by a single expression
  *	  using the given sort operator.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 73ba2f6..c61cddb 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -333,6 +333,26 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 			info->immediate = index->indimmediate;
 			info->hypothetical = false;
 
+			info->allnotnull = true;
+			for (i = 0; i < ncolumns; i++)
+			{
+int			attrno = info->indexkeys[i];
+
+if (attrno == 0)
+{
+	info->allnotnull = false;
+	break;
+}
+else if (attrno > 0)
+{
+	if (!relation->rd_att->attrs[attrno - 1]->attnotnull)
+	{
+		info->allnotnull = false;
+		break;
+	}
+}
+			}
+
 			/*
 			 * Estimate the index size.  If it's not a partial index, we lock
 			 * the number-of-tuples estimate to equal the parent table; if it
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index c607b36..119bb31 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -525,6 +525,7 @@ ty

74 matches

Mail list logo