date:20120327

[HACKERS] triggers and inheritance tree

2012-03-27 Thread Jaime Casanova

Hi,

i was trying to create triggers that redirect INSERT/UPDATE/DELETE
actions from parent to childs, but found that UPDATE/DELETE doesn't
get redirected. Actually, the triggers BEFORE UPDATE and BEFORE DELETE
aren't even fired.

I haven't tried with AFTER triggers to see if they are fired but i
tried on 8.4 to 9.1 and all of these have the same behaviour

attached is a simple contained test of this

PS: i'm hoping this is just me needed to sleep

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
drop database if exists test;
create database test;

\c test

create language plpgsql;

create schema schema1;
create schema schema2;

create table t1(i int primary key, t text);
create table schema1.t1() inherits (public.t1);
create table schema2.t1() inherits (public.t1);

create function f_insert_trigger() returns trigger as $$
begin
	raise notice 'trigger on %', TG_OP;

insert into schema1.t1 values(new.i, new.t);
	return null;
end;
$$ language plpgsql;

create function f_update_trigger() returns trigger as $$
begin
	raise notice 'trigger on %', TG_OP;

update schema1.t1 set i = new.i, t = new.t where i = old.i and t = old.t;
	return null;
end;
$$ language plpgsql;

create function f_delete_trigger() returns trigger as $$
begin
	raise notice 'trigger on %', TG_OP;

delete from schema1.t1 where i = old.i and t = old.t;
	return null;
end;
$$ language plpgsql;

create trigger trg_insert before insert on public.t1 for each row execute procedure f_insert_trigger();
create trigger trg_update before update on public.t1 for each row execute procedure f_update_trigger();
create trigger trg_delete before delete on public.t1 for each row execute procedure f_delete_trigger();

-- insert some data
insert into schema1.t1 values(1, 'test');
insert into schema2.t1 values(2, 'test');
insert into schema1.t1 values(3, 'fixed row');

-- test triggers

-- ok, this one is redirected
insert into t1 values(4, 'redirected');
select * from t1;
select * from schema1.t1;

-- bad, update and delete don't respect the triggers

update t1 set t = t || ' updated' where t = 'test';
select * from t1;

delete from t1 where t = 'test updated';
select * from t1;


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] [PATCH] Lazy hashaggregate when no aggregation is needed

2012-03-27 Thread Ants Aasma

A user complained on pgsql-performance that SELECT col FROM table
GROUP BY col LIMIT 2; performs a full table scan. ISTM that it's safe
to return tuples from hash-aggregate as they are found when no
aggregate functions are in use. Attached is a first shot at that. The
planner is modified so that when the optimization applies, hash table
size check is compared against the limit and start up cost comes from
the input. The executor is modified so that when the hash table is not
filled yet and the optimization applies, nodes are returned
immediately.

Can somebody poke holes in this? The patch definitely needs some code
cleanup in nodeAgg.c, but otherwise it passes regression tests and
seems to work as intended. It also optimizes the SELECT DISTINCT col
FROM table LIMIT 2; case, but not SELECT DISTINCT ON (col) col FROM
table LIMIT 2 because it is explicitly forced to use sorted
aggregation.

Ants Aasma
-- 
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt
Web: http://www.postgresql-support.de
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index c9aa921..53cdd09 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -274,9 +274,10 @@ static Bitmapset *find_unaggregated_cols(AggState *aggstate);
 static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
 static void build_hash_table(AggState *aggstate);
 static AggHashEntry lookup_hash_entry(AggState *aggstate,
-  TupleTableSlot *inputslot);
+  TupleTableSlot *inputslot, bool *isnew);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
+static TupleTableSlot *agg_fill_hash_and_retrieve(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
 
@@ -920,12 +921,11 @@ hash_agg_entry_size(int numAggs)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static AggHashEntry
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
+lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot, bool *isnew)
 {
 	TupleTableSlot *hashslot = aggstate->hashslot;
 	ListCell   *l;
 	AggHashEntry entry;
-	bool		isnew;
 
 	/* if first time through, initialize hashslot by cloning input slot */
 	if (hashslot->tts_tupleDescriptor == NULL)
@@ -948,9 +948,9 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 	/* find or create the hashtable entry using the filtered tuple */
 	entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
 hashslot,
-&isnew);
+isnew);
 
-	if (isnew)
+	if (*isnew)
 	{
 		/* initialize aggregates for new tuple group */
 		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
@@ -1004,7 +1004,12 @@ ExecAgg(AggState *node)
 	if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
 	{
 		if (!node->table_filled)
-			agg_fill_hash_table(node);
+		{
+			if (node->numaggs)
+agg_fill_hash_table(node);
+			else
+return agg_fill_hash_and_retrieve(node);
+		}
 		return agg_retrieve_hash_table(node);
 	}
 	else
@@ -1222,6 +1227,7 @@ agg_fill_hash_table(AggState *aggstate)
 	ExprContext *tmpcontext;
 	AggHashEntry entry;
 	TupleTableSlot *outerslot;
+	bool 		isnew;
 
 	/*
 	 * get state info from node
@@ -1243,7 +1249,7 @@ agg_fill_hash_table(AggState *aggstate)
 		tmpcontext->ecxt_outertuple = outerslot;
 
 		/* Find or build hashtable entry for this tuple's group */
-		entry = lookup_hash_entry(aggstate, outerslot);
+		entry = lookup_hash_entry(aggstate, outerslot, &isnew);
 
 		/* Advance the aggregates */
 		advance_aggregates(aggstate, entry->pergroup);
@@ -1258,6 +1264,101 @@ agg_fill_hash_table(AggState *aggstate)
 }
 
 /*
+ * ExecAgg for hashed case: phase 1, read input and build hash table
+ * return found tuples immediately.
+ */
+static TupleTableSlot *
+agg_fill_hash_and_retrieve(AggState *aggstate)
+{
+	PlanState  *outerPlan;
+	ExprContext *tmpcontext;
+	AggHashEntry entry;
+	TupleTableSlot *outerslot;
+	bool		isnew;
+	ExprContext *econtext;
+	TupleTableSlot *firstSlot;
+
+	/*
+	 * get state info from node
+	 */
+	outerPlan = outerPlanState(aggstate);
+	/* tmpcontext is the per-input-tuple expression context */
+	tmpcontext = aggstate->tmpcontext;
+
+	econtext = aggstate->ss.ps.ps_ExprContext;
+	firstSlot = aggstate->ss.ss_ScanTupleSlot;
+
+	Assert(aggstate->numaggs == 0);
+
+	/*
+	 * Process each outer-plan tuple, and then fetch the next one, until we
+	 * exhaust the outer plan.
+	 */
+	for (;;)
+	{
+		outerslot = ExecProcNode(outerPlan);
+		if (TupIsNull(outerslot))
+		{
+			aggstate->table_filled = true;
+			/* Initialize to walk the hash table */
+			ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+			return NULL;
+		}
+		/* set up for advance_aggregates call */
+		tmpcontext->ecxt_outertuple = outerslot;
+
+		/* Find or build hashtable entry for t

[HACKERS] Re: [COMMITTERS] pgsql: pg_test_timing utility, to measure clock monotonicity and timing

2012-03-27 Thread Fujii Masao

On Wed, Mar 28, 2012 at 5:17 AM, Robert Haas  wrote:
> pg_test_timing utility, to measure clock monotonicity and timing cost.

When I compiled this, I got a compiler warning. Attached patch
silences the warning.

Also I found one trivial problem in the doc of pg_test_timing. The
patch fixes that.

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/contrib/pg_test_timing/pg_test_timing.c
--- b/contrib/pg_test_timing/pg_test_timing.c
***
*** 155,161  test_timing(int32 duration)
  if (found || histogram[i])
  {
  found = 1;
! printf("%9ld: %10ld %8.5f%%\n", 1l << i, histogram[i],
  (double) histogram[i] * 100 / loop_count);
  }
  }
--- 155,161 
  if (found || histogram[i])
  {
  found = 1;
! printf("%9ld: %10lld %8.5f%%\n", 1l << i, histogram[i],
  (double) histogram[i] * 100 / loop_count);
  }
  }
*** a/doc/src/sgml/pgtesttiming.sgml
--- b/doc/src/sgml/pgtesttiming.sgml
***
*** 28,35  pg_test_timing [options]
  
  
   
!   -d
!   --duration

 
  Specifies the test duration, in seconds. Longer durations
--- 28,35 
  
  
   
!   -d duration
!   --duration=duration

 
  Specifies the test duration, in seconds. Longer durations

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pgsql_fdw, FDW for PostgreSQL server

2012-03-27 Thread Shigeru HANADA

(2012/03/27 20:32), Thom Brown wrote:
> 2012/3/26 Shigeru HANADA:
>> * pgsql_fdw_v17.patch
>> - Adds pgsql_fdw as contrib module
>> * pgsql_fdw_pushdown_v10.patch
>> - Adds WHERE push down capability to pgsql_fdw
>> * pgsql_fdw_analyze_v1.patch
>> - Adds pgsql_fdw_analyze function for updating local stats
> 
> Hmm... I've applied them using the latest Git master, and in the order
> specified, but I can't build the contrib module.  What am I doing
> wrong?

I'm sorry, but I couldn't reproduce the errors with this procedure.

$ git checkout master
$ git pull upstream master  # make master branch up-to-date
$ git clean -fd # remove files for other branches
$ make clean# just in case
$ patch -p1 < /path/to/pgsql_fdw_v17.patch
$ patch -p1 < /path/to/pgsql_fdw_pushdown_v10.patch
$ patch -p1 < /path/to/pgsql_fdw_analyze_v1.patch
$ make  # make core first for libpq et al.
$ cd contrib/pgsql_fdw
$ make  # pgsql_fdw

Please try "git clean" and "make clean", if you have not.
FWIW, I'm using GNU Make 3.82 and gcc 4.6.0 on Fedora 15.

Regards,
-- 
Shigeru HANADA

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Reporting WAL file containing checkpoint's REDO record in pg_controldata's result

2012-03-27 Thread Fujii Masao

On Wed, Mar 28, 2012 at 12:30 AM, Alvaro Herrera
 wrote:
>
> Excerpts from Fujii Masao's message of mar mar 27 06:40:34 -0300 2012:
>
>> Anyway, should I add this patch into the next CF? Or is anyone planning
>> to commit the patch for 9.2?
>
> I think the correct thing to do here is add to next CF, and if some
> committer has enough interest in getting it quickly it can be grabbed
> from there and committed into 9.2.

Yep! I've added it to next CF.

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: add timing of buffer I/O requests

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 8:41 PM, Greg Stark  wrote:
> On Tue, Mar 27, 2012 at 7:58 PM, Robert Haas  wrote:
>> I've committed the core of this.  I left out the stats collector
>> stuff, because it's still per-table and I think perhaps we should back
>> off to just per-database.  I changed it so that it does not conflate
>> wait time with I/O time.  Maybe there should be a separate method of
>> measuring wait time, but I don't think it's a good idea for the
>> per-backend stats to measure a different thing than what gets reported
>> up to the stats collector - we should have ONE definition of each
>> counter.  I also tweaked the EXPLAIN output format a bit, and the
>> docs.
>
> Maybe I missed some earlier discussoin -- I've been having trouble
> keeping up with the lists.
>
> But was there discussion of why this is a GUC? Why not just another
> parameter to EXPLAIN like the others?
> i.e. EXPLAIN (ANALYZE, BUFFERS, IOTIMING)

Because you want to be able to expose the data even for queries that
aren't explained.  Right now, you can do that with pg_stat_statements;
and the original patch also had per-table counters, but I didn't
commit that part due to some concerns about stats bloat.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: add timing of buffer I/O requests

2012-03-27 Thread Greg Stark

On Tue, Mar 27, 2012 at 7:58 PM, Robert Haas  wrote:
> I've committed the core of this.  I left out the stats collector
> stuff, because it's still per-table and I think perhaps we should back
> off to just per-database.  I changed it so that it does not conflate
> wait time with I/O time.  Maybe there should be a separate method of
> measuring wait time, but I don't think it's a good idea for the
> per-backend stats to measure a different thing than what gets reported
> up to the stats collector - we should have ONE definition of each
> counter.  I also tweaked the EXPLAIN output format a bit, and the
> docs.

Maybe I missed some earlier discussoin -- I've been having trouble
keeping up with the lists.

But was there discussion of why this is a GUC? Why not just another
parameter to EXPLAIN like the others?
i.e. EXPLAIN (ANALYZE, BUFFERS, IOTIMING)

-- 
greg

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: add timing of buffer I/O requests

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 3:22 PM, Ants Aasma  wrote:
> On Tue, Mar 27, 2012 at 9:58 PM, Robert Haas  wrote:
>> I've committed the core of this.  I left out the stats collector
>> stuff, because it's still per-table and I think perhaps we should back
>> off to just per-database.  I changed it so that it does not conflate
>> wait time with I/O time.  Maybe there should be a separate method of
>> measuring wait time, but I don't think it's a good idea for the
>> per-backend stats to measure a different thing than what gets reported
>> up to the stats collector - we should have ONE definition of each
>> counter.  I also tweaked the EXPLAIN output format a bit, and the
>> docs.
>
> Thank you for working on this.
>
> Taking a fresh look at it, I agree with you that conflating waiting
> for backend local IO, and IO performed by some other backend might
> paint us into a corner. For most workloads the wait for IO performed
> by others should be the minority anyway.
>
> I won't really miss the per table stats. But if the pg_stat_statements
> normalisation patch gets commited, it would be really neat to also
> have IO waits there. It would be much easier to quickly diagnose "what
> is causing all this IO" issues.

So, the pg_stat_statements part of this is committed now.  Do you want
to prepare a revised patch to add per-database counters to the
statistics collector?  I think that might be a good idea...

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Daniel Farina

On Tue, Mar 27, 2012 at 1:30 PM, Andrew Dunstan  wrote:
>> Well, that does sort of leave an arguable vulnerability.  Should the
>> same user only be allowed to kill the process from a connection to
>> the same database?
>>
>
> It might be a reasonable restriction in theory, but I doubt it's much of a
> security gain.

If this restriction makes anyone feel better, it doesn't bother/change
anything for me in the slightest (for both  terminate and cancel), and
that way no interesting pg_hba.conf trickery will be broken, as far as
I can see.

-- 
fdr

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Another review of URI for libpq, v7 submission

2012-03-27 Thread Alex

Alex  writes:

> Peter Eisentraut  writes:
>>
>> Attached is a patch on top of your v9 with two small fixes:
>>
>> - Don't provide a check target in libpq/Makefile if it's not
>> implemented.
>>
>> - Use the configured port number for running the tests (otherwise it
>> runs against my system installation, for example).
>
> Neat, thank you.

Attached is a gzipped v10, complete with the above fixes and fixes to
bugs found by Heikki.  Documentation and code comments are also finally
updated.

Of all the command-line utilities which can accept conninfo string, only
psql mentions that, so only its documentation was updated.  Other
utilities, like pg_dump and pg_restore can also work with either
conninfo or URI, however this remains undocumented.

--
Alex

libpq-uri-v10.patch.gz
Description: libpq-uri-v10.patch.gz

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: pg_stat_statements normalisation without invasive changes to the parser (was: Next steps on pg_stat_statements normalisation)

2012-03-27 Thread Tom Lane

[ just for the archives' sake ]

Peter Geoghegan  writes:
> On 27 March 2012 18:15, Tom Lane  wrote:
>> Now, if what it wants to know about is the parameterization status
>> of the query, things aren't ideal because most of the info is hidden
>> in parse-callback fields that aren't of globally exposed types.  However
>> we could at least duplicate the behavior you have here, because you're
>> only passing canonicalize = true in cases where no parse callback will
>> be registered at all, so pg_stat_statements could equivalently test for
>> pstate->p_paramref_hook == NULL.

> It has been suggested to me before that comparisons with function
> pointers - using them as a flag, in effect - is generally iffy, but
> that particular usage seems reasonable to me.

Well, testing function pointers for null is certainly OK --- note that
all our hook function call sites do that.  It's true that testing for
equality to a particular function's name can fail on some platforms
because of jump table hacks.  Thus for example, if you had a need to
know that parse_variable_parameters parameter management was in use,
it wouldn't do to test whether p_coerce_param_hook ==
variable_coerce_param_hook.  (Not that you could anyway, what with that
being a static function, but exposing it as global wouldn't offer a safe
solution.)

If we had a need to make this information available, I think what we'd
want to do is insist that p_ref_hook_state entries be subclasses of
Node, so that plugins could apply IsA tests on the node tag to figure
out what style of parameter management was in use.  This would also mean
exposing the struct definitions globally, which you'd need anyway else
the plugins couldn't safely access the struct contents.

I don't particularly want to go there without very compelling reasons,
but that would be the direction to head in if we had to.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Speed dblink using alternate libpq tuple storage

2012-03-27 Thread Marko Kreen

On Sat, Mar 24, 2012 at 02:22:24AM +0200, Marko Kreen wrote:
> Main advantage of including PQgetRow() together with low-level
> rowproc API is that it allows de-emphasizing more complex parts of
> rowproc API (exceptions, early exit, skipresult, custom error msg).
> And drop/undocument them or simply call them postgres-internal-only.

I thought more about exceptions and PQgetRow and found
interesting pattern:

- Exceptions are problematic if always allowed.  Although PQexec() is
  easy to fix in current code, trying to keep to promise of "exceptions
  are allowed from everywhere" adds non-trivial maintainability overhead
  to future libpq changes, so instead we should simply fix documentation.
  Especially as I cannot see any usage scenario that would benefit from
  such promise.

- Multiple SELECTs from PQexec() are problematic even without
  exceptions: additional documentation is needed how to detect
  that rows are coming from new statement.

Now the interesting one:

- PQregisterProcessor() API allows changing the callback permanently.
  Thus breaking any user code which simply calls PQexec()
  and expects regular PGresult back.  Again, nothing to fix
  code-wise, need to document that callback should be set
  only for current query, later changed back.

My conclusion is that row-processor API is low-level expert API and
quite easy to misuse.  It would be preferable to have something more
robust as end-user API, the PQgetRow() is my suggestion for that.
Thus I see 3 choices:

1) Push row-processor as main API anyway and describe all dangerous
   scenarios in documentation.
2) Have both PQgetRow() and row-processor available in ,
   PQgetRow() as preferred API and row-processor for expert usage,
   with proper documentation what works and what does not.
3) Have PQgetRow() in , move row-processor to .

I guess this needs committer decision which way to go?

Second conclusion is that current dblink row-processor usage is broken
when user uses multiple SELECTs in SQL as dblink uses plain PQexec().
Simplest fix would be to use PQexecParams() instead, but if keeping old
behaviour is important, then dblink needs to emulate PQexec() resultset
behaviour with row-processor or PQgetRow().

-- 
marko

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 4:05 PM, Andres Freund  wrote:
>> > Looking up oids and such before calling the trigger doesn't seem to be
>> > problematic from my pov. Command triggers are a superuser only facility,
>> > additional information they gain are no problem.
>> > Thinking about that I think we should enforce command trigger functions
>> > to be security definer functions.
>> I don't see any benefit from that restriction.
> The reason I was thinking it might be a good idea is that they get might get
> more knowledge passed than the user could get directly otherwise. Especially
> if we extend the scheme to more places/informations.

As long as the superuser gets to decide whether or not a given
function is installed as a command trigger, there's no harm in
allowing him to make it either SECURITY INVOKER or SECURITY DEFINER.
I agree that making it SECURITY DEFINER will often be useful and
appropriate; I just see no reason to enforce that restriction
programatically.

> What are the phases we have:
>
> * after parse
>  * logging
> * after resolving name
> * after checking permisssions  (interwined with the former)
>  * override permission check? INSTEAD?
> * after locking (interwined with the former)
>  * except it needs to be connected to resolving the name/permission check
> this doesn't seem to be an attractive hook point
> * after validation
>  * additional validation likely want to hook here
> * after execution
>  * replication might want to hook here
>
> Am I missing an interesting phase?

I'd sort of categorize it like this:

- pre-parse
- post-parse
- at permissions-checking time
- post permissions-checking/name-lookup, before starting the main work
of the command, i.e. pre-execution
- "event" type triggers that happen when specific actions are
performed (e.g. CREATE, DROP, etc.) or as subcommands fire (e.g. in
ALTER TABLE, we could fire an alter-table-subcommand trigger every
time we execute a subcommand)
- post-execution

> Allowing all that probably seems to require a substantial refactoring of
> commands/ and catalog/

Probably.  But we don't need to allow it all at once.  What we need to
do is pick one or two things that are relatively easily done,
implement those first, and then build on it.  Pre-parse, post-parse,
CREATE-event, DROP-event, and post-execution hooks are all pretty easy
to implement without major refactoring, I think.  OTOH,
post-permissions-checking-pre-execution is going to be hard to
implement without refactoring, and ALTER-event hooks are going to be
hard, too.

> I think you need a surprisingly high amount of context to know when to ignore
> a trigger. Especially as its not exactly easy to transfer knowledge from one
> to the next.

I'm not convinced, but maybe it would be easier to resolve this in the
context of a specific proposal.

> I don't think creating *new* DDL from the parsetree can really count as
> statement based replication. And again, I don't think implementing that will
> cost that much effort.
> How would it help us to know - as individual events! - which tuples where
> created where? Reassembling that will be a huge mess. I basically fail to see
> *any* use case besides access checking.

I think if you'd said this to me two years ago, I would have believed
you, but poking through this code in the last year or two, I've come
to the conclusion that there are a lot of subtle things that get
worked out after parse time, during execution.  Aside from things like
recursing down the inheritance hierarchy and maybe creating some
indexes or sequences when creating a table, there's also subtle things
like working out exactly what index we're creating: name, access
method, operator class, collation, etc.  And I'm pretty sure that if
we examine the code carefully we'll find there are a bunch more.

> I fear a bit that this discussion is leading to something thats never going to
> materialize because it would require a huge amount of work to get there.

Again, the way to avoid that is to tackle the simple cases first and
then work on the harder cases after that, but I don't think that's
what the current patch is doing.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Andrew Dunstan




On 03/27/2012 03:14 PM, Kevin Grittner wrote:

Andres Freund  wrote:

On Tuesday, March 27, 2012 07:51:59 PM Kevin Grittner wrote:

Well, I guess if you have different people sharing the same
user-ID, you probably wouldn't want that.


As Tom pointed out, if there's another person sharing the user ID
you're using, and you don't trust them, their ability to cancel
your session is likely way down the list of concerns you should
have.

Hm. I don't think that is an entirely valid argumentation. The
same user could have entirely different databases. They even could
have distinct access countrol via the clients ip.
I have seen the same cluster being used for prod/test instances at
smaller shops several times.

Whether thats a valid usecase I have no idea.


Well, that does sort of leave an arguable vulnerability.  Should the
same user only be allowed to kill the process from a connection to
the same database?



It might be a reasonable restriction in theory, but I doubt it's much of 
a security gain.



cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pg_test_timing tool for EXPLAIN ANALYZE overhead

2012-03-27 Thread Robert Haas

On Wed, Feb 22, 2012 at 6:53 AM, Greg Smith  wrote:
> A look back on this now that I'm done with it does raise one large question
> though.  I added some examples of how to measure timing overhead using psql.
>  While I like the broken down timing data that this utility provides, I'm
> not sure whether it's worth adding a contrib module just to get it now
> though.  Extension that's packaged on something like PGXN and easy to
> obtain?  Absolutely--but maybe that's a developer only level thing.  Maybe
> the only code worth distributing is the little SQL example of how to measure
> the overhead, along with some reference good/bad numbers.  That plus the
> intro to timer trivia could turn this into a documentation section only, no
> code change.  I've dreamed of running something like this on every system in
> the build farm.  Even if that's a valuable exercise, even then it may only
> be worth doing once, then reverting.

I had similar concerns, but decided to go ahead and commit this.  It
doesn't relate particularly closely to the buffer I/O timings patch,
but I think it's worth having.  We clearly need something that is
integrated with the PostgreSQL sources, because there is more than one
way to access timers, and this code will measure the overhead of doing
what PostgreSQL does, rather than the overhead of reading times in
some other way.  Now, that doesn't preclude shipping this on PGXN, but
we've certainly got other things in contrib that are clearly a lot
less useful than this, and we've discussed before the folly of
shipping a database product without a full set of diagnostic tools.
Since this tool was developed to answer questions about whether
certain PostgreSQL options are safe to enable or whether they'll have
too much overhead, I think that's a sufficient reason to include it in
contrib, especially because you took the time to write some very good
documentation for it.  I wonder whether we should perhaps move some of
this discussion of clock sources into the main documentation
somewhere, since it's not specifically related to this utility, but I
didn't try to do that for the moment and just left it as you had it.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Andres Freund

Hi,

On Tuesday, March 27, 2012 07:34:46 PM Robert Haas wrote:
> On Tue, Mar 27, 2012 at 11:58 AM, Andres Freund  
wrote:
> > I still think the likeliest way towards that is defining some behaviour
> > we want regarding
> > * naming conflict handling
> > * locking
> > 
> > And then religiously make sure the patch adheres to that. That might need
> > some refactoring of existing code (like putting a art of the utility.c
> > handling of create table into its own function and such), but should be
> > doable.
> > 
> > I think BEFORE command triggers ideally should run
> > * before permission checks
> > * before locking
> > * before internal checks are done (nameing conflicts, type checks and
> > such)
> It is possible to do this, and it would actually be much easier and
> less invasive to implement than what Dimitri has done here, but the
> downside is that you won't have done the name lookup either.  See my
> last email to Dimitri for a long explanation of why that restriction
> is not easily circumventable: name lookups, locking, and permission
> checks are necessarily and inextricably intertwined.
Read your other mail. Comming back to it I think the above might be to 
restrictive to be usefull for a big part of use-cases. So your idea of more 
hook points below has some merits.

> Still, if others
> agree this is useful, I think it would be a lot easier to get right
> than what we have now.
I think its pretty important to have something thats usable rather easily 
which requires names to be resolved and thus permission checks already 
performed and (some) locks already acquired.
I think quite some of the possible usages need the facility to be as simple as 
possible and won't care about already acquired locks/names.


> Some of what we're now doing as part of parse analysis should really
> be reclassified.  For example, the ProcessUtility branch that handles
> T_CreateStmt and T_CreateForeignTableStmt should really be split out
> as a separate function that lives in tablecmds.c, and I think at least
> some of what's in transformCreateStmt should be moved to tablecmds.c
> as well.  The current arrangement makes it really hard to guarantee
> things like "the name gets looked up just once", which seems obviously
> desirable, since strange things will surely happen if the whole
> command doesn't agree on which OID it's operating on.
Yes, I aggree, most of that should go from utility.c.

> > Looking up oids and such before calling the trigger doesn't seem to be
> > problematic from my pov. Command triggers are a superuser only facility,
> > additional information they gain are no problem.
> > Thinking about that I think we should enforce command trigger functions
> > to be security definer functions.
> I don't see any benefit from that restriction.
The reason I was thinking it might be a good idea is that they get might get 
more knowledge passed than the user could get directly otherwise. Especially 
if we extend the scheme to more places/informations.

> >> I actually think that, to really meet all needs here, we may need the
> >> ability to get control in more than one place.
> > Not sure what you mean by that. Before/after permission checks,
> > before/after consistency checks? That sort of thing?
> Yes.  For example, above you proposed a kind of trigger that fires
> really early - basically after parsing but before everything else.
> What Dimitri has implemented is a much later trigger that is still
> before the meat of the command, but not before *everything*.  And then
> there's an AFTER trigger, which could fire either after an individual
> piece or stage of the command, or after the whole command is complete,
> either of which seems potentially useful depending on the
> circumstances.  I almost think that the BEFORE/AFTER name serves to
> confuse rather than to clarify, in this case.  It's really a series of
> specific hook points: whenever we parse a command (but before we
> execute it), after security and sanity checks but before we begin
> executing the command, before or after various subcommands, after the
> whole command is done, and maybe a few others.  When we say BEFORE or
> AFTER, we implicitly assume that we want at most two of the things
> from that list, and I am not at all sure that's what going to be
> enough.
You might have a point there. Not sure if the complexity of explaining all the 
different hook points is worth the pain.

What are the phases we have:

* after parse
  * logging
* after resolving name
* after checking permisssions  (interwined with the former)
  * override permission check? INSTEAD?
* after locking (interwined with the former)
  * except it needs to be connected to resolving the name/permission check 
this doesn't seem to be an attractive hook point
* after validation
  * additional validation likely want to hook here
* after execution
  * replication might want to hook here

Am I missing an interesting phase?

Allowing all that probably seems to require a substanti

Re: [HACKERS] Re: pg_stat_statements normalisation without invasive changes to the parser (was: Next steps on pg_stat_statements normalisation)

2012-03-27 Thread Peter Geoghegan

On 27 March 2012 20:26, Tom Lane  wrote:
> I've committed the core-backend parts of this, just to get them out of
> the way.  Have yet to look at the pg_stat_statements code itself.

Thanks. I'm glad that we have that out of the way.

> I ended up choosing not to apply that bit.  I remain of the opinion that
> this behavior is fundamentally inconsistent with the general rules for
> assigning parse locations to analyzed constructs, and I see no reason to
> propagate that inconsistency further than we absolutely have to.

Fair enough.

-- 
Peter Geoghegan       http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training and Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: pg_stat_statements normalisation without invasive changes to the parser (was: Next steps on pg_stat_statements normalisation)

2012-03-27 Thread Tom Lane

Peter Geoghegan  writes:
> I've attached a patch with the required modifications.

I've committed the core-backend parts of this, just to get them out of
the way.  Have yet to look at the pg_stat_statements code itself.

> I restored the location field to the ParamCoerceHook signature, but
> the removal of code to modify the param location remains (again, not
> because I need it, but because I happen to think that it ought to be
> consistent with Const).

I ended up choosing not to apply that bit.  I remain of the opinion that
this behavior is fundamentally inconsistent with the general rules for
assigning parse locations to analyzed constructs, and I see no reason to
propagate that inconsistency further than we absolutely have to.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: add timing of buffer I/O requests

2012-03-27 Thread Ants Aasma

On Tue, Mar 27, 2012 at 9:58 PM, Robert Haas  wrote:
> I've committed the core of this.  I left out the stats collector
> stuff, because it's still per-table and I think perhaps we should back
> off to just per-database.  I changed it so that it does not conflate
> wait time with I/O time.  Maybe there should be a separate method of
> measuring wait time, but I don't think it's a good idea for the
> per-backend stats to measure a different thing than what gets reported
> up to the stats collector - we should have ONE definition of each
> counter.  I also tweaked the EXPLAIN output format a bit, and the
> docs.

Thank you for working on this.

Taking a fresh look at it, I agree with you that conflating waiting
for backend local IO, and IO performed by some other backend might
paint us into a corner. For most workloads the wait for IO performed
by others should be the minority anyway.

I won't really miss the per table stats. But if the pg_stat_statements
normalisation patch gets commited, it would be really neat to also
have IO waits there. It would be much easier to quickly diagnose "what
is causing all this IO" issues.

Thanks again,
Ants Aasma
--
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt
Web: http://www.postgresql-support.de

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: add timing of buffer I/O requests

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 2:58 PM, Robert Haas  wrote:
> On Thu, Mar 22, 2012 at 7:38 PM, Ants Aasma  wrote:
>> On Wed, Mar 21, 2012 at 5:01 PM, Robert Haas  wrote:
>>> This seems to have bitrotted again.  :-(
>>>
>>> Can you please rebase again?
>>
>> Rebased.
>
> I've committed the core of this.  I left out the stats collector
> stuff, because it's still per-table and I think perhaps we should back
> off to just per-database.  I changed it so that it does not conflate
> wait time with I/O time.  Maybe there should be a separate method of
> measuring wait time, but I don't think it's a good idea for the
> per-backend stats to measure a different thing than what gets reported
> up to the stats collector - we should have ONE definition of each
> counter.  I also tweaked the EXPLAIN output format a bit, and the
> docs.

And I've now committed the pg_stat_statements code as well, hopefully
not stomping too badly one what Tom's apparently in the midst of doing
with Peter's pg_stat_statements patch.  I committed this almost
exactly as submitted; just a minor code style correction and a few
documentation enhancements.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Another review of URI for libpq, v7 submission

2012-03-27 Thread Alex


Peter Eisentraut  writes:

> On tor, 2012-03-22 at 23:42 +0200, Alex wrote:
>> Okay, at last here's v9, rebased against current master branch.
>
> Attached is a patch on top of your v9 with two small fixes:
>
> - Don't provide a check target in libpq/Makefile if it's not
> implemented.
>
> - Use the configured port number for running the tests (otherwise it
> runs against my system installation, for example).

Neat, thank you.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Kevin Grittner

Andres Freund  wrote:
> On Tuesday, March 27, 2012 07:51:59 PM Kevin Grittner wrote:
>>> Well, I guess if you have different people sharing the same
>>> user-ID, you probably wouldn't want that.
>> 
>>  
>> As Tom pointed out, if there's another person sharing the user ID
>> you're using, and you don't trust them, their ability to cancel
>> your session is likely way down the list of concerns you should
>> have.
> Hm. I don't think that is an entirely valid argumentation. The
> same user could have entirely different databases. They even could
> have distinct access countrol via the clients ip.
> I have seen the same cluster being used for prod/test instances at
> smaller shops several times. 
> 
> Whether thats a valid usecase I have no idea.
 
Well, that does sort of leave an arguable vulnerability.  Should the
same user only be allowed to kill the process from a connection to
the same database?
 
-Kevin

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: add timing of buffer I/O requests

2012-03-27 Thread Robert Haas

On Thu, Mar 22, 2012 at 7:38 PM, Ants Aasma  wrote:
> On Wed, Mar 21, 2012 at 5:01 PM, Robert Haas  wrote:
>> This seems to have bitrotted again.  :-(
>>
>> Can you please rebase again?
>
> Rebased.

I've committed the core of this.  I left out the stats collector
stuff, because it's still per-table and I think perhaps we should back
off to just per-database.  I changed it so that it does not conflate
wait time with I/O time.  Maybe there should be a separate method of
measuring wait time, but I don't think it's a good idea for the
per-backend stats to measure a different thing than what gets reported
up to the stats collector - we should have ONE definition of each
counter.  I also tweaked the EXPLAIN output format a bit, and the
docs.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Andres Freund

On Tuesday, March 27, 2012 07:51:59 PM Kevin Grittner wrote:
> > Well, I guess if you have different people sharing the same
> > user-ID, you probably wouldn't want that.
> 
>  
> As Tom pointed out, if there's another person sharing the user ID
> you're using, and you don't trust them, their ability to cancel your
> session is likely way down the list of concerns you should have.
Hm. I don't think that is an entirely valid argumentation. The same user could 
have entirely different databases. They even could have distinct access 
countrol via the clients ip.
I have seen the same cluster being used for prod/test instances at smaller 
shops several times. 

Whether thats a valid usecase I have no idea.

Andres

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Daniel Farina

On Tue, Mar 27, 2012 at 10:48 AM, Daniel Farina  wrote:
> On Tue, Mar 27, 2012 at 10:46 AM, Alvaro Herrera
>  wrote:
>> Isn't it the case that many web applications run under some common
>> database user regardless of the underlying webapp user?  I wouldn't say
>> that's an unimportant case.  Granted, the webapp user wouldn't have
>> permission to run arbitrary queries in the first place.
>
> I thought as we have cancel_backend implemented (which this is a small
> extension of), the PGPROC roleid must be a spot-on match.

I read your email again.  I thought common => meant "same base
roleid", not "the same roleid", so I thought role inheritance was
getting into this, which it isn't. Nevermind.

-- 
fdr

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Kevin Grittner

Robert Haas  wrote:
> Daniel Farina  wrote:

>> Is there a hypothetical DBA that doesn't want a mere-mortal user
>> to be able to signal one of their own backends to do "cancel
>> query, rollback the transaction, then close the socket"?  If so,
>> why?

Setting aside possible bugs in the mechanism, I have trouble
imagining a use-case where it would be undesirable to allow this.

> Well, I guess if you have different people sharing the same
> user-ID, you probably wouldn't want that.

As Tom pointed out, if there's another person sharing the user ID
you're using, and you don't trust them, their ability to cancel your
session is likely way down the list of concerns you should have.

-Kevin

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Daniel Farina

On Tue, Mar 27, 2012 at 10:46 AM, Alvaro Herrera
 wrote:
> Isn't it the case that many web applications run under some common
> database user regardless of the underlying webapp user?  I wouldn't say
> that's an unimportant case.  Granted, the webapp user wouldn't have
> permission to run arbitrary queries in the first place.

I thought as we have cancel_backend implemented (which this is a small
extension of), the PGPROC roleid must be a spot-on match.

-- 
fdr

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Alvaro Herrera


Excerpts from Robert Haas's message of mar mar 27 14:38:47 -0300 2012:
> On Tue, Mar 27, 2012 at 1:26 PM, Daniel Farina  wrote:
> > On Mon, Mar 26, 2012 at 4:53 PM, Robert Haas  wrote:
> >> I think the more important question is a policy question: do we want
> >> it to work like this?  It seems like a policy question that ought to
> >> be left to the DBA, but we have no policy management framework for
> >> DBAs to configure what they do or do not wish to allow.  Still, if
> >> we've decided it's OK to allow cancelling, I don't see any real reason
> >> why this should be treated differently.
> >
> > Is there a hypothetical DBA that doesn't want a mere-mortal user to be
> > able to signal one of their own backends to do "cancel query, rollback
> > the transaction, then close the socket"?  If so, why?
> 
> Well, I guess if you have different people sharing the same user-ID,
> you probably wouldn't want that.
> 
> But maybe that's not an important case.

Isn't it the case that many web applications run under some common
database user regardless of the underlying webapp user?  I wouldn't say
that's an unimportant case.  Granted, the webapp user wouldn't have
permission to run arbitrary queries in the first place.

-- 
Álvaro Herrera 
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 1:46 PM, Alvaro Herrera
 wrote:
> Excerpts from Robert Haas's message of mar mar 27 14:38:47 -0300 2012:
>> On Tue, Mar 27, 2012 at 1:26 PM, Daniel Farina  wrote:
>> > On Mon, Mar 26, 2012 at 4:53 PM, Robert Haas  wrote:
>> >> I think the more important question is a policy question: do we want
>> >> it to work like this?  It seems like a policy question that ought to
>> >> be left to the DBA, but we have no policy management framework for
>> >> DBAs to configure what they do or do not wish to allow.  Still, if
>> >> we've decided it's OK to allow cancelling, I don't see any real reason
>> >> why this should be treated differently.
>> >
>> > Is there a hypothetical DBA that doesn't want a mere-mortal user to be
>> > able to signal one of their own backends to do "cancel query, rollback
>> > the transaction, then close the socket"?  If so, why?
>>
>> Well, I guess if you have different people sharing the same user-ID,
>> you probably wouldn't want that.
>>
>> But maybe that's not an important case.
>
> Isn't it the case that many web applications run under some common
> database user regardless of the underlying webapp user?

Yes.

> I wouldn't say
> that's an unimportant case.  Granted, the webapp user wouldn't have
> permission to run arbitrary queries in the first place.

Right.  That's why I'm thinking maybe it doesn't matter.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 1:26 PM, Daniel Farina  wrote:
> On Mon, Mar 26, 2012 at 4:53 PM, Robert Haas  wrote:
>> I think the more important question is a policy question: do we want
>> it to work like this?  It seems like a policy question that ought to
>> be left to the DBA, but we have no policy management framework for
>> DBAs to configure what they do or do not wish to allow.  Still, if
>> we've decided it's OK to allow cancelling, I don't see any real reason
>> why this should be treated differently.
>
> Is there a hypothetical DBA that doesn't want a mere-mortal user to be
> able to signal one of their own backends to do "cancel query, rollback
> the transaction, then close the socket"?  If so, why?

Well, I guess if you have different people sharing the same user-ID,
you probably wouldn't want that.

But maybe that's not an important case.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Daniel Farina

On Mon, Mar 26, 2012 at 4:53 PM, Robert Haas  wrote:
> I think the more important question is a policy question: do we want
> it to work like this?  It seems like a policy question that ought to
> be left to the DBA, but we have no policy management framework for
> DBAs to configure what they do or do not wish to allow.  Still, if
> we've decided it's OK to allow cancelling, I don't see any real reason
> why this should be treated differently.

Is there a hypothetical DBA that doesn't want a mere-mortal user to be
able to signal one of their own backends to do "cancel query, rollback
the transaction, then close the socket"?  If so, why?

-- 
fdr

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] 9.2 commitfest closure (was Command Triggers, v16)

2012-03-27 Thread Robert Haas

On Mon, Mar 26, 2012 at 7:39 PM, Tom Lane  wrote:
>> Fine. What do you propose, specifically?
>
> The end of the month is coming up.  How about we propose to close the
> 'fest on April 1st?  Anything that's not committable by then goes to
> the 9.3 list.  If one week seems too short, how about 2 weeks?

Let's split the difference: how about we close it a week from this
Friday.  That would be April 6, 2012, ten days from today.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 11:58 AM, Andres Freund  wrote:
> I still think the likeliest way towards that is defining some behaviour we 
> want
> regarding
> * naming conflict handling
> * locking
>
> And then religiously make sure the patch adheres to that. That might need some
> refactoring of existing code (like putting a art of the utility.c handling of
> create table into its own function and such), but should be doable.
>
> I think BEFORE command triggers ideally should run
> * before permission checks
> * before locking
> * before internal checks are done (nameing conflicts, type checks and such)

It is possible to do this, and it would actually be much easier and
less invasive to implement than what Dimitri has done here, but the
downside is that you won't have done the name lookup either.  See my
last email to Dimitri for a long explanation of why that restriction
is not easily circumventable: name lookups, locking, and permission
checks are necessarily and inextricably intertwined.  Still, if others
agree this is useful, I think it would be a lot easier to get right
than what we have now.

> Obviously some things will be caught before that (parse analysis of some
> commands) and I think we won't be able to fully stop that, but its not totally
> consistent now and while doing some work in the path of this patch seems
> sensible it cannot do-over everything wrt this.

Some of what we're now doing as part of parse analysis should really
be reclassified.  For example, the ProcessUtility branch that handles
T_CreateStmt and T_CreateForeignTableStmt should really be split out
as a separate function that lives in tablecmds.c, and I think at least
some of what's in transformCreateStmt should be moved to tablecmds.c
as well.  The current arrangement makes it really hard to guarantee
things like "the name gets looked up just once", which seems obviously
desirable, since strange things will surely happen if the whole
command doesn't agree on which OID it's operating on.

> Looking up oids and such before calling the trigger doesn't seem to be
> problematic from my pov. Command triggers are a superuser only facility,
> additional information they gain are no problem.
> Thinking about that I think we should enforce command trigger functions to be
> security definer functions.

I don't see any benefit from that restriction.

>> I actually think that, to really meet all needs here, we may need the
>> ability to get control in more than one place.
> Not sure what you mean by that. Before/after permission checks, before/after
> consistency checks? That sort of thing?

Yes.  For example, above you proposed a kind of trigger that fires
really early - basically after parsing but before everything else.
What Dimitri has implemented is a much later trigger that is still
before the meat of the command, but not before *everything*.  And then
there's an AFTER trigger, which could fire either after an individual
piece or stage of the command, or after the whole command is complete,
either of which seems potentially useful depending on the
circumstances.  I almost think that the BEFORE/AFTER name serves to
confuse rather than to clarify, in this case.  It's really a series of
specific hook points: whenever we parse a command (but before we
execute it), after security and sanity checks but before we begin
executing the command, before or after various subcommands, after the
whole command is done, and maybe a few others.  When we say BEFORE or
AFTER, we implicitly assume that we want at most two of the things
from that list, and I am not at all sure that's what going to be
enough.

One thing I had thought about doing, in the context of sepgsql, and we
may yet do it, is create a hook that gets invoked whenever we need to
decide whether it's OK to perform an action on an object.  For
example, consider ALTER TABLE .. ADD FOREIGN KEY: we'd ask the hook
both "is it OK to add a foreign key to table X?" and also "is it OK to
make a foreign key refer to table Y"?  This doesn't fit into the
command-trigger framework at all, but it's definitely useful for
sepgsql, and I bet it's good for other things, too - maybe not that
specific example, but that kind of thing.

>> I think that KaiGai only wants to be able to deny things that would
>> normally be allowed, but I suspect some of those other folks would
>> also like to be able to allow things that would normally be denied.
> Denying seems to be easier than allowing stuff safely

Yes.

>> For those use cases, you want to get control at permissions-checking
>> time.  However, where Dimitri has the hooks right now, BEFORE triggers
>> for ALTER commands fire after you've already looked up the object that
>> you're manipulating.  That has the advantage of allowing you to use
>> the OID of the object, rather than its name, to make policy decisions;
>> but by that time it's too late to cancel a denial-of-access by the
>> first-line permissions checks.
> Why? Just throw a access denied except

Re: [HACKERS] Another review of URI for libpq, v7 submission

2012-03-27 Thread Peter Eisentraut

On tor, 2012-03-22 at 23:42 +0200, Alex wrote:
> Okay, at last here's v9, rebased against current master branch.

Attached is a patch on top of your v9 with two small fixes:

- Don't provide a check target in libpq/Makefile if it's not
implemented.

- Use the configured port number for running the tests (otherwise it
runs against my system installation, for example).

diff --git i/src/interfaces/libpq/Makefile w/src/interfaces/libpq/Makefile
index f19f272..266e3db 100644
--- i/src/interfaces/libpq/Makefile
+++ w/src/interfaces/libpq/Makefile
@@ -121,7 +121,7 @@ install: all installdirs install-lib
 	$(INSTALL_DATA) $(srcdir)/pqexpbuffer.h '$(DESTDIR)$(includedir_internal)'
 	$(INSTALL_DATA) $(srcdir)/pg_service.conf.sample '$(DESTDIR)$(datadir)/pg_service.conf.sample'
 
-check installcheck:
+installcheck:
 	$(MAKE) -C test $@
 
 installdirs: installdirs-lib
diff --git i/src/interfaces/libpq/test/Makefile w/src/interfaces/libpq/test/Makefile
index 964bb20..869a2f0 100644
--- i/src/interfaces/libpq/test/Makefile
+++ w/src/interfaces/libpq/test/Makefile
@@ -5,7 +5,7 @@ include $(top_builddir)/src/Makefile.global
 all: installcheck
 
 installcheck:
-	BINDIR='$(bindir)' SUBDIR='$(subdir)' $(SHELL) ./regress.sh
+	BINDIR='$(bindir)' PGPORT='$(DEF_PGPORT)' SUBDIR='$(subdir)' $(SHELL) ./regress.sh
 
 clean distclean maintainer-clean:
 	rm -f regress.out regress.diff

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: pg_stat_statements normalisation without invasive changes to the parser (was: Next steps on pg_stat_statements normalisation)

2012-03-27 Thread Tom Lane

Peter Geoghegan  writes:
>> On 22 March 2012 17:19, Tom Lane  wrote:
>>> Either way, I think we'd be a lot better advised to define a single
>>> hook "post_parse_analysis_hook" and make the core code responsible
>>> for calling it at the appropriate places, rather than supposing that
>>> the contrib module knows exactly which core functions ought to be
>>> the places to do it.

> I have done this too.

The "canonicalize" argument to the proposed hook seems like a bit of a
crock.  You've got the core code magically setting that in a way that
responds to extremely pg_stat_statements-specific concerns, and I am not
very sure it's right even for those concerns.

I am thinking that perhaps a reasonable signature for the hook function
would be

void post_parse_analyze (ParseState *pstate, Query *query);

with the expectation that it could dig whatever it wants to know out
of the ParseState (in particular the sourceText is available there,
and in general this should provide everything that's known at parse
time).

Now, if what it wants to know about is the parameterization status
of the query, things aren't ideal because most of the info is hidden
in parse-callback fields that aren't of globally exposed types.  However
we could at least duplicate the behavior you have here, because you're
only passing canonicalize = true in cases where no parse callback will
be registered at all, so pg_stat_statements could equivalently test for
pstate->p_paramref_hook == NULL.

Thoughts, other ideas?

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 11:05 AM, Dimitri Fontaine
 wrote:
>> I agree that it's not a very helpful decision, but I'm not the one who
>> said we wanted command triggers rather than event triggers.  :-)
>
> Color me unconvinced about event triggers. That's not answering my use
> cases.

Could we get a list of those use cases, maybe on a wiki page
somewhere, and add to it all the use cases that KaiGai Kohei or others
who are interested in this are aware of?  Or maybe we can just discuss
via email, but it's going to be hard to agree that we've got something
that meets the requirements or doesn't if we're all imagining
different sets of requirements.  The main use cases I can think of
are:

1. Replication.  That is, after we perform a DDL operation, we call a
trigger and tell it what we did, so that it can make a record of that
information and ship it to another machine, where it will arrange to
do the same thing on the remote side.

2. Redirection.  That is, before we perform a DDL operation, we call a
trigger and tell it what we've been asked to do, so that it can go
execute the request elsewhere (e.g. the master rather than the Hot
Standby slave).

3. Additional security or policy checks.  That is, before we perform a
DDL operation, we call a trigger and tell it what we've been asked to
do, so that it can throw an error if it doesn't like our credentials
or, say, the naming convention we've used for our column names.

4. Relaxation of security checks.  That is, we let a trigger get
control at permissions-checking time and let it make the go-or-no-go
decision in lieu of the usual permissions-checking code.

5. Auditing.  Either when something is attempted (i.e. before) or
after it happens, we log the attempt/event somewhere.

Anything else?

In that list of use cases, it seems to me that you want BEFORE and
AFTER triggers to have somewhat different firing points.  For the
BEFORE cases, you really need the command trigger to fire early, and
once.  For example, if someone says "ALTER TABLE dimitri ADD COLUMN
fontaine INTEGER, DROP COLUMN IF EXISTS haas", permissions on dimitri
should really only get checked once, not once for each subcommand.
That's the point at which you need to get control for #3 or #4, and it
would be workable for #5 as well; I'm less sure about #2.  On the
other hand, for the AFTER cases I've listed here, I think you really
want to know what *actually* happened, not what somebody thought about
doing.  You want to know which tables, sequences, etc. *actually* got
created or dropped, not the ones that the user mentioned.  If the user
mentioned a table but we didn't drop it (and we also didn't error out,
because IF EXISTS) is used, none of the AFTER cases really care; if we
dropped other stuff (because of CASCADE) the AFTER cases may very well
care.

Another thing to think about with respect to deciding on the correct
firing points is that you can't fire easily the trigger after we've
identified the object in question but before we've checked permissions
on it, which otherwise seems like an awfully desirable thing to do for
use cases 3, 4, and 5 from the above list, and maybe 2 as well.  We
don't want to try to take locks on objects that the current user
doesn't have permission to access, because then a user with no
permissions whatsoever on an object can interfere with access by
authorized users.  On the flip side, we can't reliably check
permissions before we've locked the object, because somebody else
might rename or drop it after we check permissions and before we get
the lock.  Noah Misch invented a clever technique that I then used to
fix a bunch of these problems in 9.2: the fixed code (sadly, not all
cases are fixed yet, due to the fact that we ran out of time in the
development cycle) looks up the object name, checks permissions
(erroring out if the check fails), and then locks the object.  Once it
gets the lock, it checks whether any shared-invalidation messages have
been processed since the point just before we looked up the object
name.  If so, it redoes the name lookup.  If the referrent of the name
has not changed, we're done; if it has, we drop the old lock and
relock the new object and loop around again, not being content until
we're sure that the object we locked is still the referrant of the
name.  This leads to much more logical behavior than the old way of
doing things, and not incidentally gets rid of a lot of errors of the
form "cache lookup failed for relation %u" that users of existing
releases will remember, probably not too fondly.

However, it's got serious implications for triggers that want to relax
security policy, because the scope of what you can do inside that loop
is pretty limited.  You can't really do anything to the relation while
you're checking permissions on it, because you haven't locked it yet.
If you injected a trigger there, it would have to be similarly
limited, and I don't know how we'd enforce that, and it would have to
be prepared to get called m

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Andres Freund

Hi,

On Tuesday, March 27, 2012 04:29:58 PM Robert Haas wrote:
> On Tue, Mar 27, 2012 at 9:37 AM, Andres Freund  
wrote:
> > Not necessarily. One use-case is doing something *before* something
> > happens like enforcing naming conventions, data types et al during
> > development/schema migration.
> 
> That is definitely a valid use case.  The only reason why we don't
> have something like that built into the ObjectAccessHook framework is
> because we haven't yet figured out a clean way to make it work.   Most
> of KaiGai's previous attempts involved passing a bunch of garbage
> selected apparently at random into the hook function, and I rejected
> that as unmaintainable.  Dimitri's code here doesn't have that problem
> - it passes in a consistent set of information across the board.  But
> I still think it's unmaintainable, because there's no consistency
> about when triggers get invoked, or whether they get invoked at all.
> We need something that combines the strengths of both approaches.
Yes.

I still think the likeliest way towards that is defining some behaviour we want 
regarding
* naming conflict handling
* locking

And then religiously make sure the patch adheres to that. That might need some 
refactoring of existing code (like putting a art of the utility.c handling of 
create table into its own function and such), but should be doable.

I think BEFORE command triggers ideally should run
* before permission checks
* before locking
* before internal checks are done (nameing conflicts, type checks and such)

Obviously some things will be caught before that (parse analysis of some 
commands) and I think we won't be able to fully stop that, but its not totally 
consistent now and while doing some work in the path of this patch seems 
sensible it cannot do-over everything wrt this.

Looking up oids and such before calling the trigger doesn't seem to be 
problematic from my pov. Command triggers are a superuser only facility, 
additional information they gain are no problem.
Thinking about that I think we should enforce command trigger functions to be 
security definer functions.

> I actually think that, to really meet all needs here, we may need the
> ability to get control in more than one place.
Not sure what you mean by that. Before/after permission checks, before/after 
consistency checks? That sort of thing?

> For example, one thing
> that KaiGai has wanted to do (and I bet Dimitri would live to be able
> to do it too, and I'm almost sure that Dan Farina would like to do it)
> is override the built-in security policy for particular commands.
Dim definitely seems to want that: https://github.com/dimitri/pgextwlist ;)

> I think that KaiGai only wants to be able to deny things that would
> normally be allowed, but I suspect some of those other folks would
> also like to be able to allow things that would normally be denied.
Denying seems to be easier than allowing stuff safely

> For those use cases, you want to get control at permissions-checking
> time.  However, where Dimitri has the hooks right now, BEFORE triggers
> for ALTER commands fire after you've already looked up the object that
> you're manipulating.  That has the advantage of allowing you to use
> the OID of the object, rather than its name, to make policy decisions;
> but by that time it's too late to cancel a denial-of-access by the
> first-line permissions checks.
Why? Just throw a access denied exception? Unless its done after the locking 
that won't be visible by anything but timing?

Additional granting is more complex though, I am definitely with you there. 
That will probably need INSTEAD triggers which I don't see for now. Those will 
have their own share of problems.

> Dimitri also mentioned wanting to be able to cancel the actual operation - 
> and presumably, do something else instead, like go execute it on a different 
> node, and I think that's another valid use case for this kind of trigger.  
> It's going to take some work, though, to figure out what the right set of 
> control points is, and it's probably going to require some refactoring of
> the existing code, which is a mess that I have been slowly trying to clean
> up.
I commend your bravery...

> >> In the interest of getting event triggers, you're arguing that we should
> >> contort the definition of the work "command" to include sub-commands,
> >> but not necessarily commands that turn out to be a no-op, and maybe
> >> things that are sort of like what commands do even though nobody
> >> actually ever executed a command by that name.  I just don't think
> >> that's a good idea.  We either have triggers on commands, and they
> >> execute once per command, period; or we have triggers on events and
> >> they execute every time that event happens.
> > I don't think thats a very helpfull definition. What on earth would you
> > want to do with plain passing of
> > CREATE SCHEMA blub CREATE TABLE foo() CREATE TABLE bar();
> > So some decomposition seems to be ne

Re: [HACKERS] Reporting WAL file containing checkpoint's REDO record in pg_controldata's result

2012-03-27 Thread Alvaro Herrera


Excerpts from Fujii Masao's message of mar mar 27 06:40:34 -0300 2012:

> Anyway, should I add this patch into the next CF? Or is anyone planning
> to commit the patch for 9.2?

I think the correct thing to do here is add to next CF, and if some
committer has enough interest in getting it quickly it can be grabbed
from there and committed into 9.2.

-- 
Álvaro Herrera 
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Dimitri Fontaine

Robert Haas  writes:
> I actually think that, to really meet all needs here, we may need the
> ability to get control in more than one place.  For example, one thing
> that KaiGai has wanted to do (and I bet Dimitri would live to be able
> to do it too, and I'm almost sure that Dan Farina would like to do it)
> is override the built-in security policy for particular commands.  I

I had that in a previous version of the patch, and removed it because
you were concerned about our ability to review it in time for 9.2, which
has obviously been a right decision.

That was called INSTEAD OF command triggers, and they could call a
SECURITY DEFINER function.

> I agree that it's not a very helpful decision, but I'm not the one who
> said we wanted command triggers rather than event triggers.  :-)

Color me unconvinced about event triggers. That's not answering my use
cases.

> that.  But how is that any different with Dimitri's approach?  You can
> get a callback AFTER CREATE TABLE, and you'll get the table name.  Now
> what?  If you get the trigger in C you can get the node tree, but
> that's hardly any better.  You're still going to need to do some
> pretty tricky push-ups to get reliable replication.  It's not at all

What you do with the parse tree is rewrite the command. It's possible to
do, but would entail exposing the internal parser state which Tom
objects too. I'm now thinking that can be maintained as a C extension.

> evident to me that the parse-tree is any better a place to start than
> the system catalog representation; in fact, I would argue that it's
> probably much worse, because you'll have to exactly replicate whatever
> the backend did to decide what catalog entries to create, or you'll
> get drift between servers.

Try to build a command string from the catalogs… even if you can store a
snapshot of them before and after the command. Remember that you might
want to “replicate” to things that are NOT a PostgreSQL server.

> ambiguity.  If you say that we're going to have a trigger on the
> CREATE SEQUENCE command, then what happens when the user creates a
> sequence via some other method?  The current patch says that we should
> handle that by calling the CREATE SEQUENCE trigger if it happens to be
> convenient because we're going through the same code path that a
> normal CREATE SEQUENCE would have gone through, but if it uses a
> different code path then let's not bother.  Otherwise, how do you

Yes, the current set of which commands fire which triggers is explained
by how the code is written wrt standard_ProcessUtility() calls. We could
mark re-entrant calls and disable the command trigger feature, it would
not be our first backend global variable in flight.

> Dimitri is not the first or last person to want to get control during
> DDL operations, and KaiGai's already done a lot of work figuring out
> how to make it work reasonably.  Pre-create hooks don't exist in that
> machinery not because nobody wants them, but because it's hard.  This

I've been talking with Kaigai about using the Command Trigger
infrastructure to implement its control hooks, while reviewing one of
his patches, and he said that's not low-level enough for him.

> whole problem is hard.  It would be wrong to paint it as a problem
> that is unsolvable or not valuable, but it would be equally wrong to
> expect that it's easy or that anyone's first attempt (mine, yours,
> Dimitri's, KaiGai's, or Tom Lane's) is going to fall painlessly into
> place without anyone needing to sweat a little blood.

Sweating over that feature is a good summary of a whole lot of my and
some others' time lately.

Regards,
--
Dimitri Fontaine
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Dimitri Fontaine

Robert Haas  writes:
> I am coming more and more strongly to the conclusion that we're going
> in the wrong direction here.  It seems to me that you're spending an
> enormous amount of energy implementing something that goes by the name
> COMMAND TRIGGER when what you really want is an EVENT TRIGGER.

No.

The following two features are not allowed by what you call an EVENT
TRIGGER yet the very reason why I started working on COMMAND TRIGGERs:

 - BEFORE COMMAND TRIGGER
 - Having the command string available in the command trigger

Now, because of scheduling, the current patch has been reduced not to
include the second feature yet, which is a good trade-off for now. Yet
it's entirely possible to implement such feature as an extension once
9.2 is out given current COMMAND TRIGGER patch.

> I realize this represents a radical design change from what you have
> right now, but what you have right now is messy and ill-defined and I

That's only because I've not been doing the hard choices alone, I wanted
to be able to speak about them here, and the only time that discussion
happen is when serious hand down code review is being done.

My take?  Let's make the hard decisions together. Mechanisms are
implemented. The plural is what is causing problems here, but that also
mean we can indeed implement several policies now.

I've been proposing a non-messy policy in a previous mail, which I
realize the patch is not properly implementing now. I'd think moving the
patch to implement said policy (or another one after discussion) is next
step.

> don't think you can easily fix it.  You're exposing great gobs of
> implementation details which means that, inevitably, every time anyone
> wants to refactor some code, the semantics of command triggers are
> going to change, or else the developer will have to go to great
> lengths to ensure that they don't.  I don't think either of those
> things is going to make anyone very happy.

I guess you can't really have your cake and eat it too, right?

Regards,
--
Dimitri Fontaine
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Storage Manager crash at mdwrite()

2012-03-27 Thread Tareq Aljabban

On Fri, Mar 16, 2012 at 8:34 PM, Greg Stark  wrote:

> On Fri, Mar 16, 2012 at 11:29 PM, Jeff Davis  wrote:
> > There is a lot of difference between those two. In particular, it looks
> > like the problem you are seeing is coming from the background writer,
> > which is not running during initdb.
>
> The difference that comes to mind is that the postmaster forks. If the
> library opens any connections prior to forking and then uses them
> after forking that would work at first but it would get confused
> quickly once more than one backend tries to use the same connection.
> The data being sent would all be mixed together and they would see
> responses to requests other processes sent.
> You need to ensure that any network connections are opened up *after*
> the new processes are forked.
>

It's true.. it turned out that the reason of the problem is that HDFS has
problems when dealing with forked processes.. However, there's no clear
suggestion on how to fix this.
I attached gdb to the writer process and got the following backtrace:

#0  0xb76f0430 in __kernel_vsyscall ()
#1  0xb6b2893d in ___newselect_nocancel () at
../sysdeps/unix/syscall-template.S:82
#2  0x0840ab46 in pg_usleep (microsec=20) at pgsleep.c:43
#3  0x0829ca9a in BgWriterNap () at bgwriter.c:642
#4  0x0829c882 in BackgroundWriterMain () at bgwriter.c:540
#5  0x0811b0ec in AuxiliaryProcessMain (argc=2, argv=0xbf982308) at
bootstrap.c:417
#6  0x082a9af1 in StartChildProcess (type=BgWriterProcess) at
postmaster.c:4427
#7  0x082a75de in reaper (postgres_signal_arg=17) at postmaster.c:2390
#8  
#9  0xb76f0430 in __kernel_vsyscall ()
#10 0xb6b2893d in ___newselect_nocancel () at
../sysdeps/unix/syscall-template.S:82
#11 0x082a5b62 in ServerLoop () at postmaster.c:1391
#12 0x082a53e2 in PostmasterMain (argc=3, argv=0xa525c28) at
postmaster.c:1092
#13 0x0822dfa8 in main (argc=3, argv=0xa525c28) at main.c:188

Any ideas?

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 9:37 AM, Andres Freund  wrote:
> Not necessarily. One use-case is doing something *before* something happens
> like enforcing naming conventions, data types et al during development/schema
> migration.

That is definitely a valid use case.  The only reason why we don't
have something like that built into the ObjectAccessHook framework is
because we haven't yet figured out a clean way to make it work.   Most
of KaiGai's previous attempts involved passing a bunch of garbage
selected apparently at random into the hook function, and I rejected
that as unmaintainable.  Dimitri's code here doesn't have that problem
- it passes in a consistent set of information across the board.  But
I still think it's unmaintainable, because there's no consistency
about when triggers get invoked, or whether they get invoked at all.
We need something that combines the strengths of both approaches.

I actually think that, to really meet all needs here, we may need the
ability to get control in more than one place.  For example, one thing
that KaiGai has wanted to do (and I bet Dimitri would live to be able
to do it too, and I'm almost sure that Dan Farina would like to do it)
is override the built-in security policy for particular commands.  I
think that KaiGai only wants to be able to deny things that would
normally be allowed, but I suspect some of those other folks would
also like to be able to allow things that would normally be denied.
For those use cases, you want to get control at permissions-checking
time.  However, where Dimitri has the hooks right now, BEFORE triggers
for ALTER commands fire after you've already looked up the object that
you're manipulating.  That has the advantage of allowing you to use
the OID of the object, rather than its name, to make policy decisions;
but by that time it's too late to cancel a denial-of-access by the
first-line permissions checks.  Dimitri also mentioned wanting to be
able to cancel the actual operation - and presumably, do something
else instead, like go execute it on a different node, and I think
that's another valid use case for this kind of trigger.  It's going to
take some work, though, to figure out what the right set of control
points is, and it's probably going to require some refactoring of the
existing code, which is a mess that I have been slowly trying to clean
up.

>> In the
>> interest of getting event triggers, you're arguing that we should
>> contort the definition of the work "command" to include sub-commands,
>> but not necessarily commands that turn out to be a no-op, and maybe
>> things that are sort of like what commands do even though nobody
>> actually ever executed a command by that name.  I just don't think
>> that's a good idea.  We either have triggers on commands, and they
>> execute once per command, period; or we have triggers on events and
>> they execute every time that event happens.
> I don't think thats a very helpfull definition. What on earth would you want 
> to
> do with plain passing of
> CREATE SCHEMA blub CREATE TABLE foo() CREATE TABLE bar();
>
> So some decomposition seems to be necessary. Getting the level right sure
> ain't totally easy.
> It might be helpful to pass in the context from which something like that
> happened.

I agree that it's not a very helpful decision, but I'm not the one who
said we wanted command triggers rather than event triggers.  :-)

> Which would basically require including pg_dump in the backend to implement
> replication and logging. I don't think librarifying pg_dump is a fair burden
> at all.

I don't either, but that strikes me as a largely unrelated problem.
As you may recall, I've complained that too much of that functionality
is in the client in the past, and I haven't changed my opinion on
that.  But how is that any different with Dimitri's approach?  You can
get a callback AFTER CREATE TABLE, and you'll get the table name.  Now
what?  If you get the trigger in C you can get the node tree, but
that's hardly any better.  You're still going to need to do some
pretty tricky push-ups to get reliable replication.  It's not at all
evident to me that the parse-tree is any better a place to start than
the system catalog representation; in fact, I would argue that it's
probably much worse, because you'll have to exactly replicate whatever
the backend did to decide what catalog entries to create, or you'll
get drift between servers.

> Also I have a *very hard* time to imagining to sensibly implement replicating
> CREATE TABLE or ALTER TABLE ... ADD COLUMN with just object access hooks.
> There is just not enough context. How would you discern between a ADD COLUMN
> and a CREATE TABLE? They look very similar or even identical on a catalog
> level.

That can be fixed using the optional argument to
InvokeObjectAccessHook, similar to what we've done for differentiating
internal drops from other drops.

> I also have the strong feeling that all this would expose implementation
> details *at

Re: [HACKERS] Another review of URI for libpq, v7 submission

2012-03-27 Thread Alex Shulgin

Heikki Linnakangas  writes:

> On 22.03.2012 23:42, Alex wrote:
>> Okay, at last here's v9, rebased against current master branch.
>
> Some quick comments on this patch:

Heikki, thank you for taking a look at this!

> I see a compiler warning:
> fe-connect.c: In function ‘conninfo_parse’:
> fe-connect.c:4113: warning: unused variable ‘option’

The warning is due to the earlier commit e9ce658b.  I believe the above
line supposed to go away.

> Docs are missing.

Yes, my plan is to focus on the documentation and code comments while
sorting out any remaining issues with the code.

> I wonder if you should get an error if you try specify the same option
> multiple times. At least the behavior needs to be documented.

Since conninfo strings may contain duplicated keywords and the latter
just takes precedence, I think we should just do the same with URIs
(which we already do.)  I don't see the behavior of conninfo strings
documented anywhere, however.

> Should %00 be forbidden?

Probably yes, good spot.

> The error message is a bit confusing for
> "postgres://localhost?dbname=%XXfoo":
> WARNING: ignoring unrecognized URI query parameter: dbname
> There is in fact nothing wrong with "dbname", it's the %XX in the
> value that's bogus.

Hm, yes, that's a bug.

Looks like conninfo_uri_parse_params needs to be adjusted to properly
pass the error message generated by conninfo_store_uri_encoded_value.  I
wonder if examining the errorMessage buffer to tell if it's a hard error
(it's going to be empty in the case of ignoreMissing=true) is a good
practice.

> Looking at conninfo_uri_decode(), I think it's missing a bounds check,
> and peeks at two bytes beyond the end of string if the input ends in a
> %'.

No, in that case what happens on L4919 is this: we dereference q which
is pointing at NUL terminator and pass the value to the first
get_hexdigit in the "if" condition, the pointer itself is then
incremented and does point beyond the end of string, but since
get_hexdigit returns FALSE we don't call the second get_hexdigit, so we
don't dereference the invalid pointer.

There is a comment right before that "if" which says just that.

--
Alex

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Tom Lane

Noah Misch  writes:
> On Mon, Mar 26, 2012 at 07:53:25PM -0400, Robert Haas wrote:
>> I think the more important question is a policy question: do we want
>> it to work like this?

> The DBA can customize policy by revoking public execute permissions on
> pg_catalog.pg_terminate_backend and interposing a security definer function
> implementing his checks.  For the population who will want something different
> here, that's adequate.

I don't particularly trust solutions that involve modifying
system-defined objects.  In this case, a dump and reload would be
sufficient to create a security hole, because the REVOKE would go away.

(Now, I'm not particularly concerned about the issue in the first place.
Just pointing out that for someone who is, the above isn't a great
solution.)

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Odd out of memory problem.

2012-03-27 Thread Andrew Dunstan




On 03/26/2012 01:54 PM, Andrew Dunstan wrote:



On 03/26/2012 01:34 PM, Tom Lane wrote:

Andrew Dunstan  writes:

On 03/26/2012 01:06 PM, Heikki Linnakangas wrote:

Is it possible this job is inserting and then updating (or deleteing)
the row it just inserted and doing a large number of such
insert/update operations all within the same transaction? Or perhaps
it's updating the same row over and over again?

It's all in a single transaction. In fact the solution I'm currently
testing and seems to be working involves breaking it up into batches of
a few thousand LOs restored per batch.
Hm.  The test case is just a straight pg_restore of lots and lots of 
LOs?

What pg_dump version was the dump made with?





8.4.8, same as the target. We get the same issue whether we restore 
direct to the database from pg_restore or via a text dump.




Following this up, the workaround of making small batches of LOs did 
solve that memory issue. Here's what I did:


   pg_restore --list full_export.dmp | grep BLOB > bloblist
   pg_restore --use-list=bloblist full_export.dmp | \
   perl -n e  ' $n++  if /lo_open/; ' \
-e ' print " end; begin;\n" if (/lo_open/ && ($n % 1000 ==
   0)); ' \
-e ' print ;' |  \
   psql -t -q -v ON_ERROR_STOP "dbname=adtest" > /dev/null


That's a fairly ugly hack to have to use.


cheers

andrew

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Andres Freund

On Tuesday, March 27, 2012 02:55:47 PM Robert Haas wrote:
> On Tue, Mar 27, 2012 at 4:27 AM, Dimitri Fontaine
> 
>  wrote:
> > In the first versions of the patch I did try to have a single point
> > where to integrate the feature and that didn't work out. If you want to
> > publish information such as object id, name and schema you need to have
> > specialized code spread out everywhere.
> 
> [...]
> 
> > That's meant to be a feature of the command trigger system, that's been
> > asked about by a lot of people. Having triggers fire for sub commands is
> > possibly The Right Thing™, or at least the most popular one.
> 
> [...]
> 
> > I will rework LOAD, and ALTER TABLE too, which is more work as you can
> > imagine, because now we have to insinuate the command trigger calling
> > into each branch of what ALTER TABLE is able to implement.
> 
> [...]
> 
> > From last year's cluster hacker meeting then several mails on this list,
> > it appears that we don't want to do it the way you propose, which indeed
> > would be simpler to implement.
> 
> [...]
> 
> > I think that's a feature. You skip calling the commands trigger when you
> > know you won't run the command at all.
> 
> I am coming more and more strongly to the conclusion that we're going
> in the wrong direction here.  It seems to me that you're spending an
> enormous amount of energy implementing something that goes by the name
> COMMAND TRIGGER when what you really want is an EVENT TRIGGER.
> Apparently, you don't want a callback every time someone types CREATE
> TABLE: you want a callback every time a table gets created.
Not necessarily. One use-case is doing something *before* something happens 
like enforcing naming conventions, data types et al during development/schema 
migration.

> In the
> interest of getting event triggers, you're arguing that we should
> contort the definition of the work "command" to include sub-commands,
> but not necessarily commands that turn out to be a no-op, and maybe
> things that are sort of like what commands do even though nobody
> actually ever executed a command by that name.  I just don't think
> that's a good idea.  We either have triggers on commands, and they
> execute once per command, period; or we have triggers on events and
> they execute every time that event happens.
I don't think thats a very helpfull definition. What on earth would you want to 
do with plain passing of
CREATE SCHEMA blub CREATE TABLE foo() CREATE TABLE bar();

So some decomposition seems to be necessary. Getting the level right sure 
ain't totally easy.
It might be helpful to pass in the context from which something like that 
happened.

> As it turns out, two people have already put quite a bit of work into
> designing and implementing what amounts to an event trigger system for
> PostgreSQL: me, and KaiGai Kohei.  It's called the ObjectAccessHook
> mechanism, and it fires every time we create or drop an object.  It
> passes the type of object created or dropped, the OID of the object,
> and the column number also in the case of a column.  The major
> drawback of this mechanism is that you have to write the code you want
> to execute in C, and you can't arrange for it to be executed via a DDL
> command; instead, you have to set a global variable from your shared
> library's _PG_init() function.  However, I don't think that would be
> very hard to fix.  We could simply replaces the
> InvokeObjectAccessHook() macro with a function that calls a list of
> triggers pulled from the catalog.
Which would basically require including pg_dump in the backend to implement 
replication and logging. I don't think librarifying pg_dump is a fair burden 
at all.
Also I have a *very hard* time to imagining to sensibly implement replicating 
CREATE TABLE or ALTER TABLE ... ADD COLUMN with just object access hooks. 
There is just not enough context. How would you discern between a ADD COLUMN 
and a CREATE TABLE? They look very similar or even identical on a catalog 
level.

I also have the strong feeling that all this would expose implementation 
details *at least* as much as command triggers. A slight change in order of 
catalog modifcation would be *way* harder to hide via the object hook than 
something of a similar scale via command triggers.

> I think there are a couple of advantages of this approach over what
> you've got right now.  First, there are a bunch of tested hook points
> that are already committed.  They have well-defined semantics that are
> easy to understand: every time we create or drop an object, you get a
> callback with these arguments.  Second, KaiGai Kohei is interested in
> adding more hook points in the future to service sepgsql.  If the
> command triggers code and the sepgsql code both use the same set of
> hook points then (1) we won't clutter the code with multiple sets of
> hook points and (2) any features that get added for purposes of
> command triggers will also benefit sepgsql, and visca versa.  Since
> the two of yo

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Noah Misch

On Tue, Mar 27, 2012 at 02:58:26PM +0200, Magnus Hagander wrote:
> On Tue, Mar 27, 2012 at 11:04, Noah Misch  wrote:
> > On Mon, Mar 26, 2012 at 07:53:25PM -0400, Robert Haas wrote:
> >> I think the more important question is a policy question: do we want
> >> it to work like this? ?It seems like a policy question that ought to
> >> be left to the DBA, but we have no policy management framework for
> >> DBAs to configure what they do or do not wish to allow. ?Still, if
> >> we've decided it's OK to allow cancelling, I don't see any real reason
> >> why this should be treated differently.
> >
> > The DBA can customize policy by revoking public execute permissions on
> > pg_catalog.pg_terminate_backend and interposing a security definer function
> > implementing his checks. ?For the population who will want something 
> > different
> > here, that's adequate.
> 
> Well, by that argument, we can keep pg_terminate_backend superuser
> only and have the user wrap a security definer function around it to
> *get* it, no?

Yes.  However, if letting users terminate their own backends makes for a
better default, we should still make it so.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Another review of URI for libpq, v7 submission

2012-03-27 Thread Heikki Linnakangas


On 22.03.2012 23:42, Alex wrote:

Okay, at last here's v9, rebased against current master branch.


Some quick comments on this patch:

I see a compiler warning:
fe-connect.c: In function ‘conninfo_parse’:
fe-connect.c:4113: warning: unused variable ‘option’

Docs are missing.

I wonder if you should get an error if you try specify the same option 
multiple times. At least the behavior needs to be documented.


Should %00 be forbidden?

The error message is a bit confusing for 
"postgres://localhost?dbname=%XXfoo":

WARNING: ignoring unrecognized URI query parameter: dbname
There is in fact nothing wrong with "dbname", it's the %XX in the value 
that's bogus.


Looking at conninfo_uri_decode(), I think it's missing a bounds check, 
and peeks at two bytes beyond the end of string if the input ends in a '%'.


--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Magnus Hagander

On Tue, Mar 27, 2012 at 11:04, Noah Misch  wrote:
> On Mon, Mar 26, 2012 at 07:53:25PM -0400, Robert Haas wrote:
>> I think the more important question is a policy question: do we want
>> it to work like this?  It seems like a policy question that ought to
>> be left to the DBA, but we have no policy management framework for
>> DBAs to configure what they do or do not wish to allow.  Still, if
>> we've decided it's OK to allow cancelling, I don't see any real reason
>> why this should be treated differently.
>
> The DBA can customize policy by revoking public execute permissions on
> pg_catalog.pg_terminate_backend and interposing a security definer function
> implementing his checks.  For the population who will want something different
> here, that's adequate.

Well, by that argument, we can keep pg_terminate_backend superuser
only and have the user wrap a security definer function around it to
*get* it, no?


-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 4:27 AM, Dimitri Fontaine
 wrote:
> In the first versions of the patch I did try to have a single point
> where to integrate the feature and that didn't work out. If you want to
> publish information such as object id, name and schema you need to have
> specialized code spread out everywhere.
>
[...]
>
> That's meant to be a feature of the command trigger system, that's been
> asked about by a lot of people. Having triggers fire for sub commands is
> possibly The Right Thing™, or at least the most popular one.
>
[...]
>
> I will rework LOAD, and ALTER TABLE too, which is more work as you can
> imagine, because now we have to insinuate the command trigger calling
> into each branch of what ALTER TABLE is able to implement.
>
[...]
>
> From last year's cluster hacker meeting then several mails on this list,
> it appears that we don't want to do it the way you propose, which indeed
> would be simpler to implement.
>
[...]
>
> I think that's a feature. You skip calling the commands trigger when you
> know you won't run the command at all.

I am coming more and more strongly to the conclusion that we're going
in the wrong direction here.  It seems to me that you're spending an
enormous amount of energy implementing something that goes by the name
COMMAND TRIGGER when what you really want is an EVENT TRIGGER.
Apparently, you don't want a callback every time someone types CREATE
TABLE: you want a callback every time a table gets created.  You don't
want a callback every time someone types DROP FUNCTION; you want a
callback every time a function gets dropped.  If the goal here is to
do replication, you'd more than likely even want such callbacks to
occur when the function is dropped indirectly via CASCADE.  In the
interest of getting event triggers, you're arguing that we should
contort the definition of the work "command" to include sub-commands,
but not necessarily commands that turn out to be a no-op, and maybe
things that are sort of like what commands do even though nobody
actually ever executed a command by that name.  I just don't think
that's a good idea.  We either have triggers on commands, and they
execute once per command, period; or we have triggers on events and
they execute every time that event happens.

As it turns out, two people have already put quite a bit of work into
designing and implementing what amounts to an event trigger system for
PostgreSQL: me, and KaiGai Kohei.  It's called the ObjectAccessHook
mechanism, and it fires every time we create or drop an object.  It
passes the type of object created or dropped, the OID of the object,
and the column number also in the case of a column.  The major
drawback of this mechanism is that you have to write the code you want
to execute in C, and you can't arrange for it to be executed via a DDL
command; instead, you have to set a global variable from your shared
library's _PG_init() function.  However, I don't think that would be
very hard to fix.  We could simply replaces the
InvokeObjectAccessHook() macro with a function that calls a list of
triggers pulled from the catalog.

I think there are a couple of advantages of this approach over what
you've got right now.  First, there are a bunch of tested hook points
that are already committed.  They have well-defined semantics that are
easy to understand: every time we create or drop an object, you get a
callback with these arguments.  Second, KaiGai Kohei is interested in
adding more hook points in the future to service sepgsql.  If the
command triggers code and the sepgsql code both use the same set of
hook points then (1) we won't clutter the code with multiple sets of
hook points and (2) any features that get added for purposes of
command triggers will also benefit sepgsql, and visca versa.  Since
the two of you are trying to solve very similar problems, it would be
nice if both of you were pulling in the same direction.  Third, and
most importantly, it seems to match the semantics you want, which is a
callback every time something *happens* rather than a callback every
time someone *types something*.

Specifically, I propose the following plan:

- Rename COMMAND TRIGGER to EVENT TRIGGER.  Rewrite the documentation
to say that we're going to fire triggers every time an *event*
happens.  Rewrite the code to put the firing mechanism inside
InvokeObjectAccessHook, which will become a function rather than a
macro.

- Change the list of supported trigger types to match what the
ObjectAccessHook mechanism understands, which means, at present,
post-create and drop.  It might even make sense to forget about having
separate hooks for every time of object that can be created or dropped
and instead just let people say CREATE EVENT TRIGGER name ON { CREATE
| DROP } EXECUTE PROCEDURE function_name ( arguments ).

- Once that's done, start adding object-access-hook invocations (and
thus, the corresponding command trigger functionality) to whatever
other operations you'd like to have supp

Re: [HACKERS] Odd out of memory problem.

2012-03-27 Thread Robert Haas

On Tue, Mar 27, 2012 at 3:22 AM, Hitoshi Harada  wrote:
> According to what I've learned in the last couple of months, array_agg
> is not ready for fallback ways like dumping to tuplestore.  Even
> merge-state is not able for them.  The problem is that the executor
> doesn't know how to serialize/deserialize the internal type trans
> value.  So in one implementation, the existence of merge function is a
> flag to switch back to sort grouping not hash aggregate and array_agg
> is one of such aggregate functions.  That said, if you invent a new
> flag to note the aggregate is not dump-ready, it'd be worth inventing
> state merge function to aggregate infrastructure anyway.
>
> So I can imagine a way without state-merge function nor dumping to
> tuplestore would be to sort hash table content the rest of inputs so
> that we can switch to sort grouping.  Since we have hash table, we can
> definitely sort them in memory, and we can put to disk everything that
> comes later than the fallback and read it after the scan finishes. Now
> we have sorted state values and sorted input, we can continue the rest
> of work.

It's a little bit tricky to make this work - you have to get all of
the values out of the hash-table you've built and stick them into a
Tuplesort object - but I think it can be made to work, and it seems
more elegant than anything else proposed so far.

I also agree with you and with Greg Stark that it would be good to
invent a state-merge function.  Although it wouldn't apply to every
case, it would make some very common cases a lot more efficient, both
in run time and in memory.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pgsql_fdw, FDW for PostgreSQL server

2012-03-27 Thread Albe Laurenz

Shigeru HANADA wrote:
>> My gut feeling is that planning should be done by the server which
>> will execute the query.
> 
> Agreed, if selectivity of both local filtering and remote filtering
were
> available, we can estimate result rows correctly and choose better
plan.
> 
> How about getting # of rows estimate by executing EXPLAIN for
> fully-fledged remote query (IOW, contains pushed-down WHERE clause),
and
> estimate selectivity of local filter on the basis of the statistics
> which are generated by FDW via do_analyze_rel() and FDW-specific
> sampling function?  In this design, we would be able to use quite
> correct rows estimate because we can consider filtering stuffs done on
> each side separately, though it requires expensive remote EXPLAIN for
> each possible path.

That sounds nice.
How would that work with a query that has one condition that could be
pushed down and one that has to be filtered locally?
Would you use the (local) statistics for the full table or can you
somehow account for the fact that rows have already been filtered
out remotely, which might influence the distribution?

> You are right, built-in equality and inequality operators don't cause
> collation problem.  Perhaps allowing them would cover significant
cases
> of string comparison, but I'm not sure how to determine whether an
> operator is = or != in generic way.  We might have to hold list of oid
> for collation-safe operator/functions until we support ROUTINE MAPPING
> or something like that...  Anyway, I'll fix pgsql_fdw to allow = and
!=
> for character types.

I believe that this covers a significant percentage of real-world cases.
I'd think that every built-in operator with name "=" or "<>" could
be pushed down.

Yours,
Laurenz Albe

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pgsql_fdw, FDW for PostgreSQL server

2012-03-27 Thread Thom Brown

2012/3/26 Shigeru HANADA :
> (2012/03/15 23:06), Shigeru HANADA wrote:
>> Although the patches are still WIP, especially in WHERE push-down part,
>> but I'd like to post them so that I can get feedback of the design as
>> soon as possible.
>
> I've implemented pgsql_fdw's own deparser and enhanced some features
> since last post.  Please apply attached patches in the order below:
>
> * pgsql_fdw_v17.patch
>    - Adds pgsql_fdw as contrib module
> * pgsql_fdw_pushdown_v10.patch
>    - Adds WHERE push down capability to pgsql_fdw
> * pgsql_fdw_analyze_v1.patch
>    - Adds pgsql_fdw_analyze function for updating local stats

Hmm... I've applied them using the latest Git master, and in the order
specified, but I can't build the contrib module.  What am I doing
wrong?

It starts off with things like:

../../src/Makefile.global:420: warning: overriding commands for target
`submake-libpq'
../../src/Makefile.global:420: warning: ignoring old commands for
target `submake-libpq'

and later:

pgsql_fdw.c: In function ‘pgsqlGetForeignPlan’:
pgsql_fdw.c:321:2: warning: implicit declaration of function
‘extractRemoteExprs’ [-Wimplicit-function-declaration]
pgsql_fdw.c:321:12: warning: assignment makes pointer from integer
without a cast [enabled by default]
pgsql_fdw.c:362:3: warning: implicit declaration of function
‘deparseWhereClause’ [-Wimplicit-function-declaration]
pgsql_fdw.c: At top level:
pgsql_fdw.c:972:1: error: redefinition of ‘Pg_magic_func’
pgsql_fdw.c:39:1: note: previous definition of ‘Pg_magic_func’ was here

-- 
Thom

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pgsql_fdw, FDW for PostgreSQL server

2012-03-27 Thread Shigeru HANADA

Thanks for the comments.

(2012/03/27 18:36), Albe Laurenz wrote:
>> 2) Defer planning stuffs as long as possible to clarify the role of
> each
>> function.  Currently GetRelSize just estimates result rows from local
>> statistics, and GetPaths adds only one path which represents SeqScan
> on
>> remote side.  As result of this change, PgsqlFdwPlanState struct is
>> obsolete.
> 
> I see the advantage of being able to do all this locally, but
> I think there are a lot of downsides too:
> - You have an additional maintenance task if you want to keep
>statistics for remote tables accurate.  I understand that this may
>get better in a future release.
> - You depend on the structure of pg_statistic, which means a potential
>incompatibility between server versions.  You can add cases to
>pgsql_fdw_analyze to cater for changes, but that is cumbersome and
> will
>only help for later PostgreSQL versions connecting to earlier ones.
> - Planning and execution will change (improve, of course) between server
>versions.  The local planner may choose an inferior plan based on a
>wrong assumption of how a certain query can be handled on the remote.
> - You have no statistics if the foreign table points to a view on the
>remote system.

Especially for 2nd and 4th, generating pg_statistic records without
calling do_analyze_rel() seems unpractical in multiple version
environment.  As you pointed out, I've missed another
semantics-different problem here.  We would have to use do_analyze_rel()
and custom sampling function which returns sample rows from remote data
source, if we want to have statistics of foreign data locally.  This
method would be available for most of FDWs, but requires some changes in
core.  [I'll comment on Fujita-san's ANALYZE patch about this issue soon.]

> My gut feeling is that planning should be done by the server which
> will execute the query.

Agreed, if selectivity of both local filtering and remote filtering were
available, we can estimate result rows correctly and choose better plan.

How about getting # of rows estimate by executing EXPLAIN for
fully-fledged remote query (IOW, contains pushed-down WHERE clause), and
estimate selectivity of local filter on the basis of the statistics
which are generated by FDW via do_analyze_rel() and FDW-specific
sampling function?  In this design, we would be able to use quite
correct rows estimate because we can consider filtering stuffs done on
each side separately, though it requires expensive remote EXPLAIN for
each possible path.

>> 3) Implement pgsql_fdw's own deparser which pushes down collation-free
>> and immutable expressions in local WHERE clause.  This means that most
>> of numeric conditions can be pushed down, but conditions using
> character
>> types are not.
> 
> I understand that this is simple and practical, but it is a pity that
> this excludes equality and inequality conditions on strings.
> Correct me if I am wrong, but I thought that these work the same
> regardless of the collation.

You are right, built-in equality and inequality operators don't cause
collation problem.  Perhaps allowing them would cover significant cases
of string comparison, but I'm not sure how to determine whether an
operator is = or != in generic way.  We might have to hold list of oid
for collation-safe operator/functions until we support ROUTINE MAPPING
or something like that...  Anyway, I'll fix pgsql_fdw to allow = and !=
for character types.

Regards,
-- 
Shigeru Hanada

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] PATCH: pg_basebackup (missing exit on error)

2012-03-27 Thread Fujii Masao

On Thu, Jan 26, 2012 at 11:48 PM, Robert Haas  wrote:
> On Tue, Jan 24, 2012 at 4:39 AM, Thomas Ogrisegg
>  wrote:
>> While testing a backup script, I noticed that pg_basebackup exits with
>> 0, even if it had errors while writing the backup to disk when the
>> backup file is to be sent to stdout. The attached patch should fix this
>> problem (and some special cases when the last write fails).
>
> This part looks like a typo:
>
> +                                       if (fflush (tarfile) != 0)
> +                                       {
> +                                               fprintf(stderr, _("%s:
> error flushing stdout: %s\n"),
> +
> strerror (errno));
> +                                               disconnect_and_exit(1);
> +                                       }
>
> The error is in flushing the tarfile, not stdout, right?

No, in this case tarfile is set to stdout as follows.

-
if (strcmp(basedir, "-") == 0)
{
#ifdef HAVE_LIBZ
if (compresslevel != 0)
{
ztarfile = gzdopen(dup(fileno(stdout)), "wb");
if (gzsetparams(ztarfile, compresslevel, 
Z_DEFAULT_STRATEGY) != Z_OK)
{
fprintf(stderr, _("%s: could not set 
compression level %d: %s\n"),
progname, 
compresslevel, get_gz_error(ztarfile));
disconnect_and_exit(1);
}
}
else
#endif
tarfile = stdout;
}
-

I don't think that pg_basebackup really needs to fflush() stdout for
each file. Right?

-
 #endif
if (tarfile != NULL)
-   fclose(tarfile);
+   {
+   if (fclose(tarfile) != 0)
+   {
+   fprintf(stderr, _("%s: error 
closing file \"%s\": %s\n"),
+   progname, 
filename, strerror (errno));
+   disconnect_and_exit(1);
+   }
+   }
-

This message doesn't obey the PostgreSQL message style.

It's guaranteed that the tarfile must not be NULL here, so the above check of
tarfile is not required. The remaining code of pg_basebackup relies on this
assumption.

Attached patch removes the fflush() part, changes the log message and removes
the check of tarfile, as above.

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/src/bin/pg_basebackup/pg_basebackup.c
--- b/src/bin/pg_basebackup/pg_basebackup.c
***
*** 584,589  ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
--- 584,590 
  {
  	fprintf(stderr, _("%s: could not write to compressed file \"%s\": %s\n"),
  			progname, filename, get_gz_error(ztarfile));
+ 	disconnect_and_exit(1);
  }
  			}
  			else
***
*** 600,606  ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
  			if (strcmp(basedir, "-") == 0)
  			{
  #ifdef HAVE_LIBZ
! if (ztarfile)
  	gzclose(ztarfile);
  #endif
  			}
--- 601,607 
  			if (strcmp(basedir, "-") == 0)
  			{
  #ifdef HAVE_LIBZ
! if (ztarfile != NULL)
  	gzclose(ztarfile);
  #endif
  			}
***
*** 609,617  ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
  #ifdef HAVE_LIBZ
  if (ztarfile != NULL)
  	gzclose(ztarfile);
  #endif
! if (tarfile != NULL)
! 	fclose(tarfile);
  			}
  
  			break;
--- 610,625 
  #ifdef HAVE_LIBZ
  if (ztarfile != NULL)
  	gzclose(ztarfile);
+ else
  #endif
! {
! 	if (fclose(tarfile) != 0)
! 	{
! 		fprintf(stderr, _("%s: could not close file \"%s\": %s\n"),
! progname, filename, strerror (errno));
! 		disconnect_and_exit(1);
! 	}
! }
  			}
  
  			break;
***
*** 630,635  ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
--- 638,644 
  			{
  fprintf(stderr, _("%s: could not write to compressed file \"%s\": %s\n"),
  		progname, filename, get_gz_error(ztarfile));
+ disconnect_and_exit(1);
  			}
  		}
  		else

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Reporting WAL file containing checkpoint's REDO record in pg_controldata's result

2012-03-27 Thread Fujii Masao

On Mon, Mar 26, 2012 at 11:18 PM, Tom Lane  wrote:
> Robert Haas  writes:
>> On Mon, Mar 26, 2012 at 2:50 AM, Magnus Hagander  wrote:
 s/segment/file/g?
>
>>> We're already using "file" to mean something different *internally*,
>>> don't we? And since pg_controldata shows fairly internal information,
>>> I'm not sure this is the best idea.
>>>
>>> Maybe compromise and call it "segment file" - that is both easier to
>>> understand than segment, and not actually using a term that means
>>> something else...
>
>> It's also kind of wordy.  I think "file" is fine.
>
> +1 for "file".  I think the internal usage of "file" to mean "roughly
> 4GB worth of WAL" is going to go away soon anyway, as there won't be
> much reason to worry about the concept once LSN arithmetic is 64-bit.

Agreed. This would mean that the following lots of log messages need to
be changed after 64-bit LSN will have been committed.

errmsg("could not fdatasync log file %u, segment %u: %m",
   log, seg)));

Anyway, should I add this patch into the next CF? Or is anyone planning
to commit the patch for 9.2?

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pgsql_fdw, FDW for PostgreSQL server

2012-03-27 Thread Albe Laurenz

Shigeru HANADA wrote:
> I've implemented pgsql_fdw's own deparser and enhanced some features
> since last post.  Please apply attached patches in the order below:

> Changes from previous version
> =
> 
> 1) Don't use remote EXPLAIN for cost/rows estimation, so now planner
> estimates result rows and costs on the basis of local statistics such
as
> pg_class and pg_statistic.  To update local statistics, I added
> pgsql_fdw_analyze() SQL function which updates local statistics of a
> foreign table by retrieving remote statistics, such as pg_class and
> pg_statistic, via libpq.  This would make the planning of pgsql_fdw
> simple and fast.  This function can be easily modified to handle
ANALYZE
> command invoked for a foreign table (Fujita-san is proposing this as
> common feature in another thread).
> 
> 2) Defer planning stuffs as long as possible to clarify the role of
each
> function.  Currently GetRelSize just estimates result rows from local
> statistics, and GetPaths adds only one path which represents SeqScan
on
> remote side.  As result of this change, PgsqlFdwPlanState struct is
> obsolete.

I see the advantage of being able to do all this locally, but
I think there are a lot of downsides too:
- You have an additional maintenance task if you want to keep
  statistics for remote tables accurate.  I understand that this may
  get better in a future release.
- You depend on the structure of pg_statistic, which means a potential
  incompatibility between server versions.  You can add cases to
  pgsql_fdw_analyze to cater for changes, but that is cumbersome and
will
  only help for later PostgreSQL versions connecting to earlier ones.
- Planning and execution will change (improve, of course) between server
  versions.  The local planner may choose an inferior plan based on a
  wrong assumption of how a certain query can be handled on the remote.
- You have no statistics if the foreign table points to a view on the
  remote system.

My gut feeling is that planning should be done by the server which
will execute the query.

> 3) Implement pgsql_fdw's own deparser which pushes down collation-free
> and immutable expressions in local WHERE clause.  This means that most
> of numeric conditions can be pushed down, but conditions using
character
> types are not.

I understand that this is simple and practical, but it is a pity that
this excludes equality and inequality conditions on strings.
Correct me if I am wrong, but I thought that these work the same
regardless of the collation.

Yours,
Laurenz Albe

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Improvement of log messages in pg_basebackup

2012-03-27 Thread Fujii Masao

Hi,

>   fprintf(stderr, _("%s: could not identify system: %s\n"),
>   progname, PQerrorMessage(conn));

Since PQerrorMessage() result includes a trailing newline, the above
log message in pg_basebackup doesn't need to include a trailing \n.
Attached patch gets rid of that \n.

>   res = PQgetResult(conn);
>   if (PQresultStatus(res) != PGRES_TUPLES_OK)
>   {
>   fprintf(stderr, _("%s: could not get WAL end position from 
> server\n"),
>   progname);

ISTM it's worth including PQerrorMessage() result in the above log
message, to diagnose the cause of error. Attached patch does that.

Regards,

-- 
Fujii Masao
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
*** a/src/bin/pg_basebackup/pg_basebackup.c
--- b/src/bin/pg_basebackup/pg_basebackup.c
***
*** 914,920  BaseBackup(void)
  	res = PQexec(conn, "IDENTIFY_SYSTEM");
  	if (PQresultStatus(res) != PGRES_TUPLES_OK)
  	{
! 		fprintf(stderr, _("%s: could not identify system: %s\n"),
  progname, PQerrorMessage(conn));
  		disconnect_and_exit(1);
  	}
--- 914,920 
  	res = PQexec(conn, "IDENTIFY_SYSTEM");
  	if (PQresultStatus(res) != PGRES_TUPLES_OK)
  	{
! 		fprintf(stderr, _("%s: could not identify system: %s"),
  progname, PQerrorMessage(conn));
  		disconnect_and_exit(1);
  	}
***
*** 1049,1056  BaseBackup(void)
  	res = PQgetResult(conn);
  	if (PQresultStatus(res) != PGRES_TUPLES_OK)
  	{
! 		fprintf(stderr, _("%s: could not get WAL end position from server\n"),
! progname);
  		disconnect_and_exit(1);
  	}
  	if (PQntuples(res) != 1)
--- 1049,1056 
  	res = PQgetResult(conn);
  	if (PQresultStatus(res) != PGRES_TUPLES_OK)
  	{
! 		fprintf(stderr, _("%s: could not get WAL end position from server: %s"),
! progname, PQerrorMessage(conn));
  		disconnect_and_exit(1);
  	}
  	if (PQntuples(res) != 1)

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Noah Misch

On Mon, Mar 26, 2012 at 07:53:25PM -0400, Robert Haas wrote:
> I think the more important question is a policy question: do we want
> it to work like this?  It seems like a policy question that ought to
> be left to the DBA, but we have no policy management framework for
> DBAs to configure what they do or do not wish to allow.  Still, if
> we've decided it's OK to allow cancelling, I don't see any real reason
> why this should be treated differently.

The DBA can customize policy by revoking public execute permissions on
pg_catalog.pg_terminate_backend and interposing a security definer function
implementing his checks.  For the population who will want something different
here, that's adequate.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Command Triggers patch v18

2012-03-27 Thread Dimitri Fontaine

Hi,

First things first, thanks for the review!

I'm working on a new version of the patch to fix all the specific
comments you called "nitpicking" here and in your previous email. This
new patch will also implement a single list of triggers to run in
alphabetical order, not split by ANY/specific command.

Robert Haas  writes:
> I am extremely concerned about the way in which this patch arranges to
> invoke command triggers.  You've got call sites spattered all over the
> place, and I think that's going to be, basically, an unfixable mess
> and a breeding ground for bugs.  On a first read-through:

In the first versions of the patch I did try to have a single point
where to integrate the feature and that didn't work out. If you want to
publish information such as object id, name and schema you need to have
specialized code spread out everywhere.

Then about where to call the trigger, it's a per-command decision with a
general policy. Your comments here are of two kinds, mostly about having
to guess the policy because it's not explicit, and some specifics that
either are in question or not following up on the policy.

The policy I've been willing to install is that command triggers should
get fired once the basic error checking is done. That's not perfect for
auditing purposes *if you want to log all attempts* but it's good enough
to audit all commands that had an impact on your system, and you still
can block commands.

Also, in most commands you can't get the object id and name before the
basic error checking is done.

Now, about the IF NOT EXISTS case, with the policy just described
there's no reason to trigger user code, but I can also see your position
here.

> 1. BEFORE ALTER TABLE triggers are fired in AlterTable().  However, an

I used to have that in utility.c but Andres had me move that out, and it
seems like I should get back to utility.c for the reasons you're
mentioning and that I missed.

> 2. BEFORE CREATE TABLE triggers seem to have similar issues; see
> transformCreateStmt.
>
> rhaas=# create table foo (a serial primary key);
> NOTICE:  CREATE TABLE will create implicit sequence "foo_a_seq" for
> serial column "foo.a"
> NOTICE:  snitch: BEFORE CREATE SEQUENCE public.foo_a_seq []
> NOTICE:  snitch: AFTER CREATE SEQUENCE public.foo_a_seq [16392]
> NOTICE:  snitch: BEFORE CREATE TABLE public.foo []
> NOTICE:  snitch: BEFORE CREATE INDEX public.foo_pkey []
> NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index
> "foo_pkey" for table "foo"
> NOTICE:  snitch: AFTER CREATE INDEX public.foo_pkey [16398]
> NOTICE:  snitch: BEFORE ALTER SEQUENCE public.foo_a_seq [16392]
> NOTICE:  snitch: AFTER ALTER SEQUENCE public.foo_a_seq [16392]
> NOTICE:  snitch: AFTER CREATE TABLE public.foo [16394]
> CREATE TABLE

That's meant to be a feature of the command trigger system, that's been
asked about by a lot of people. Having triggers fire for sub commands is
possibly The Right Thing™, or at least the most popular one.

> 3. RemoveRelations() and RemoveObjects() similarly take the position
> that when the object does not exist, command triggers need not fire.
> This seems entirely arbitrary.  CREATE EXTENSION IF NOT EXISTS,
> however, takes the opposite (and probably correct) position that even
> if we decide not to create the extension, we should still fire command
> triggers.  In a similar vein, AlterFunctionOwner_oid() skips firing
> the command triggers if the old and new owners happen to be the same,
> but other forms of ALTER FUNCTION (e.g. ALTER FUNCTION .. COST) fire
> triggers even if the old and new parameters are the same; and
> AlterForeignDataWrapperOwner_internal does NOT skip firing command
> triggers just because the old and new owners are the same.

We integrate here with the code as it stands, I didn't question the
error management already existing. Do we need to revise that in the
scope of the command triggers patch?

> 4. RemoveRelations() and RemoveObjects() also take the position that a
> statement like "DROP TABLE foo, bar" should fire each relevant BEFORE
> command trigger twice, then drop both objects, then fire each relevant
> AFTER command trigger twice.  I think that's wrong.  It's 100% clear

See above, it's what users are asking us to implement as a feature.

> 5. It seems desirable for BEFORE command triggers to fire at a
> consistent point during command execution, but currently they don't.

The policy should be to fire the triggers once the usual error checking
is done.

> For example, BEFORE DROP VIEW triggers don't fire until we've verified
> that q exists, is a view, and that we have permission to drop it, but
> LOAD triggers fire much earlier, before we've really checked anything
> at all.  And ALTER TABLE is somewhere in the middle: we fire the
> BEFORE trigger after checking permissions on the main table, but
> before all permissions checks are done, viz:

I will rework LOAD, and ALTER TABLE too, which is more work as you can
imagine, because now we have to insinu

Re: [HACKERS] 9.2 commitfest closure (was Command Triggers, v16)

2012-03-27 Thread Simon Riggs

On Tue, Mar 27, 2012 at 12:39 AM, Tom Lane  wrote:

> Thom Brown  writes:
>> This is probably a dumb question but... surely there's more than 2
>> committers able to review?
>
> Able and willing might be two different things.  Alvaro, Heikki, and
> Magnus have all been looking at stuff, but I think they may be getting
> burned out too.

If people are keeping score, add myself and Robert also, maybe others
- I've not been watching too closely.

On average there appears to be about 10 patches per active committer
in this CF. Given the complexity of the patches in last CF always
seems to be higher, that is a huge number and represents weeks of
work.

One of the key problems I see is that few people actually get paid to
do this, so its fairly hard to allocate time. I want to make it a
policy of "1 for 1" so if you write a patch you need to review a
patch. That way sponsors are forced to spend money on review time for
stuff they may not care about as a trade for getting reviews on stuff
they do. This would take pressure off the few.

-- 
 Simon Riggs   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Odd out of memory problem.

2012-03-27 Thread Hitoshi Harada

On Mon, Mar 26, 2012 at 5:11 PM, Tom Lane  wrote:
> Greg Stark  writes:
>> On Mon, Mar 26, 2012 at 6:15 PM, Tom Lane  wrote:
>>> Could you give us a brain dump on the sketch?  I've never seen how to
>>> do it without unreasonable overhead.
>
>> Hm. So my original plan was dependent on adding the state-merge
>> function we've talked about in the past. Not all aggregate functions
>> necessarily can support such a function but I think all or nearly all
>> the builtin aggregates can. Certainly min,max, count, sum, avg,
>> stddev, array_agg can which are most of what people do. That would be
>> a function which can take two state variables and produce a new state
>> variable.
>
> I'd rather not invent new requirements for aggregate implementations
> if we can avoid it.
>
>> However now that I've started thinking about it further I think you
>> could solve it with less complexity by cheating in various ways. For
>> example if you limit the hash size to 1/2 of work_mem then you when
>> you reach that limit you could just stuff any tuple that doesn't match
>> a hash entry into a tuplesort with 1/2 of work_mem and do the regular
>> level break logic on the output of that.
>
> Or just start dumping such tuples into a tuplestore, while continuing to
> process tuples that match the hashagg entries that are already in
> existence.  Once the input is exhausted, read out the hashagg entries we
> have, flush the hashagg table, start reading from the tuplestore.
> Repeat as needed.
>
> I like this idea because the only thing you give up is predictability of
> the order of output of aggregated entries, which is something that a
> hashagg isn't guaranteeing anyway.  In particular, we would still have a
> guarantee that any one aggregate evaluation processes the matching
> tuples in arrival order, which is critical for some aggregates.
>
> The main problem I can see is that if we start to flush after work_mem
> is X% full, we're essentially hoping that the state values for the
> existing aggregates won't grow by more than 1-X%, which is safe for many
> common aggregates but fails for some like array_agg().  Ultimately, for
> ones like that, it'd probably be best to never consider hashing at all.
> I guess we could invent an "unsafe for hash aggregation" flag for
> aggregates that have unbounded state-size requirements.
>

According to what I've learned in the last couple of months, array_agg
is not ready for fallback ways like dumping to tuplestore.  Even
merge-state is not able for them.  The problem is that the executor
doesn't know how to serialize/deserialize the internal type trans
value.  So in one implementation, the existence of merge function is a
flag to switch back to sort grouping not hash aggregate and array_agg
is one of such aggregate functions.  That said, if you invent a new
flag to note the aggregate is not dump-ready, it'd be worth inventing
state merge function to aggregate infrastructure anyway.

So I can imagine a way without state-merge function nor dumping to
tuplestore would be to sort hash table content the rest of inputs so
that we can switch to sort grouping.  Since we have hash table, we can
definitely sort them in memory, and we can put to disk everything that
comes later than the fallback and read it after the scan finishes. Now
we have sorted state values and sorted input, we can continue the rest
of work.

Anyway the memory usage problem is not only array_agg and hash
aggregate.  Even if you can say the hash table exceeds X% of the
work_mem, how can you tell other operators such like Sort are not
using the rest of memory?  One approach I could see to avoid this is
assigning arbitrary amount of memory to each operator from work_mem
and calculate it locally.  But this approach is going to skew
occasionally and not perfect, either.

Thanks,
-- 
Hitoshi Harada

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: Cross-backend signals and administration (Was: Re: pg_terminate_backend for same-role)

2012-03-27 Thread Martijn van Oosterhout

On Tue, Mar 27, 2012 at 03:17:36AM +0100, Greg Stark wrote:
> I may be forgetting something obvious here but is there even a
> function to send an interrupt signal? That would trigger the same
> behaviour that a user hitting C-c would trigger which would only be
> handled at the next CHECK_FOR_INTERRUPTS which seems like it would be
> non-controversial and iirc we don't currently have a function to do
> this for other connections the user may have if he doesn't have access
> to the original terminal and doesn't have raw shell access to run
> arbitrary commands.

Sure, C-c just sends a SIGINT. But IIRC the problem wasn't so much
cancelling running queries, SIGINT appears to work fine there, it's
that a connection blocked on waiting for client input doesn't wake up
when sent a signal. To fix that you'd need to change the signal mode
and teach the various input layers (like SSL) to handle the EINTR case.

Have a nice day,
-- 
Martijn van Oosterhout  http://svana.org/kleptog/
> He who writes carelessly confesses thereby at the very outset that he does
> not attach much importance to his own thoughts.
   -- Arthur Schopenhauer


signature.asc
Description: Digital signature

63 matches

Mail list logo