On Sun, Feb 26, 2012 at 7:16 PM, Heikki Linnakangas
<[email protected]> wrote:
> On 24.02.2012 22:55, Simon Riggs wrote:
>>
>> A long time ago, in a galaxy far away, we discussed ways to speed up
>> data loads/COPY.
>> http://archives.postgresql.org/pgsql-hackers/2007-01/msg00470.php
>>
>> In particular, the idea that we could mark tuples as committed while
>> we are still loading them, to avoid negative behaviour for the first
>> reader.
>>
>> Simple patch to implement this is attached, together with test case.
>>
>> ...
>>
>>
>> What exactly does it do? Previously, we optimised COPY when it was
>> loading data into a newly created table or a freshly truncated table.
>> This patch extends that and actually sets the tuple header flag as
>> HEAP_XMIN_COMMITTED during the load. Doing so is simple 2 lines of
>> code. The patch also adds some tests for corner cases that would make
>> that action break MVCC - though those cases are minor and typical data
>> loads will benefit fully from this.
>
>
> This doesn't work with subtransactions:
...
> The query should return the row copied in the same subtransaction.
Thanks for pointing that out.
New patch with corrected logic and test case attached.
>> In the link above, Tom suggested reworking HeapTupleSatisfiesMVCC()
>> and adding current xid to snapshots. That is an invasive change that I
>> would wish to avoid at any time and explains the long delay in
>> tackling this. The way I've implemented it, is just as a short test
>> during XidInMVCCSnapshot() so that we trap the case when the xid ==
>> xmax and so would appear to be running. This is much less invasive and
>> just as performant as Tom's original suggestion.
>
>
> TransactionIdIsCurrentTransactionId() can be fairly expensive if you have a
> lot of subtransactions open...
I've put in something to avoid that cost for the common case - just a boolean.
This seems like the best plan rather than the explicit FREEZE option.
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c910863..3899282 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2056,6 +2056,17 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
tup->t_tableOid = RelationGetRelid(relation);
/*
+ * If we are inserting into a new relation invisible as yet to other
+ * backends and our session has no prior snapshots and no ready portals
+ * then we can also set the hint bit for the rows we are inserting. The
+ * last two restrictions ensure that HeapTupleSatisfiesMVCC() gives
+ * the right answer if the current transaction inspects the data after
+ * we load it.
+ */
+ if (options & HEAP_INSERT_HINT_XMIN)
+ tup->t_data->t_infomask |= HEAP_XMIN_COMMITTED;
+
+ /*
* If the new tuple is too big for storage or contains already toasted
* out-of-line attributes from some other relation, invoke the toaster.
*/
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index e22bdac..de7504c 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -146,6 +146,7 @@ typedef struct TransactionStateData
int prevSecContext; /* previous SecurityRestrictionContext */
bool prevXactReadOnly; /* entry-time xact r/o state */
bool startedInRecovery; /* did we start in recovery? */
+ bool maySeePreHintedTuples; /* did subtrans write pre-hinted rows? */
struct TransactionStateData *parent; /* back link to parent */
} TransactionStateData;
@@ -175,6 +176,7 @@ static TransactionStateData TopTransactionStateData = {
0, /* previous SecurityRestrictionContext */
false, /* entry-time xact r/o state */
false, /* startedInRecovery */
+ false, /* maySeePreHintedTuples */
NULL /* link to parent state block */
};
@@ -704,6 +706,18 @@ TransactionStartedDuringRecovery(void)
return CurrentTransactionState->startedInRecovery;
}
+bool
+TransactionMaySeePreHintedTuples(void)
+{
+ return CurrentTransactionState->maySeePreHintedTuples;
+}
+
+void
+TransactionMayWritePreHintedTuples(void)
+{
+ CurrentTransactionState->maySeePreHintedTuples = true;
+}
+
/*
* CommandCounterIncrement
*/
@@ -1689,6 +1703,7 @@ StartTransaction(void)
s->startedInRecovery = false;
XactReadOnly = DefaultXactReadOnly;
}
+ s->maySeePreHintedTuples = false;
XactDeferrable = DefaultXactDeferrable;
XactIsoLevel = DefaultXactIsoLevel;
forceSyncCommit = false;
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 110480f..1419e33 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -43,6 +43,7 @@
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/portal.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
@@ -1922,6 +1923,13 @@ CopyFrom(CopyState cstate)
hi_options |= HEAP_INSERT_SKIP_FSM;
if (!XLogIsNeeded())
hi_options |= HEAP_INSERT_SKIP_WAL;
+
+ if (ThereAreNoPriorRegisteredSnapshots() &&
+ ThereAreNoReadyPortals())
+ {
+ hi_options |= HEAP_INSERT_HINT_XMIN;
+ TransactionMayWritePreHintedTuples();
+ }
}
/*
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index cfb73c1..24075db 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -1055,3 +1055,22 @@ pg_cursor(PG_FUNCTION_ARGS)
return (Datum) 0;
}
+
+bool
+ThereAreNoReadyPortals(void)
+{
+ HASH_SEQ_STATUS status;
+ PortalHashEnt *hentry;
+
+ hash_seq_init(&status, PortalHashTable);
+
+ while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL)
+ {
+ Portal portal = hentry->portal;
+
+ if (portal->status == PORTAL_READY)
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 5aebbd1..5d9e3bf 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1183,3 +1183,12 @@ DeleteAllExportedSnapshotFiles(void)
FreeDir(s_dir);
}
+
+bool
+ThereAreNoPriorRegisteredSnapshots(void)
+{
+ if (RegisteredSnapshots <= 1)
+ return true;
+
+ return false;
+}
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 31791a7..6cb4152 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -1255,6 +1255,28 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
/* Any xid < xmin is not in-progress */
if (TransactionIdPrecedes(xid, snapshot->xmin))
return false;
+
+ /*
+ * TransactionIdIsCurrentTransactionId() could be expensive so avoid
+ * running it unless we know that the current subtransaction has
+ * decided to write tuples that already have HEAP_XMIN_COMMITTED set.
+ * In that case we end up here when we run HeapTupleSatisfiesMVCC.
+ */
+ if (TransactionMaySeePreHintedTuples())
+ {
+ /*
+ * Make sure current xid is never thought to be in progress on tuples
+ * that are already marked as committed - for use in bulk COPY etc..
+ * We never included the current xid in snapshots, but its possible
+ * that the current xid is equal to or greater than xmax, so we
+ * check for the current xid before we check for xmax.
+ * We could optimise this further by stopping the search as soon
+ * as the xid is less than xmax, but seems like a step too far.
+ */
+ if (TransactionIdIsCurrentTransactionId(xid))
+ return false;
+ }
+
/* Any xid >= xmax is in-progress */
if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
return true;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index fa38803..0381785 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -26,6 +26,7 @@
/* "options" flag bits for heap_insert */
#define HEAP_INSERT_SKIP_WAL 0x0001
#define HEAP_INSERT_SKIP_FSM 0x0002
+#define HEAP_INSERT_HINT_XMIN 0x0004
typedef struct BulkInsertStateData *BulkInsertState;
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 20e344e..7fb0123 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -217,6 +217,8 @@ extern TimestampTz GetCurrentTransactionStopTimestamp(void);
extern void SetCurrentStatementStartTimestamp(void);
extern int GetCurrentTransactionNestLevel(void);
extern bool TransactionIdIsCurrentTransactionId(TransactionId xid);
+extern bool TransactionMaySeePreHintedTuples(void);
+extern void TransactionMayWritePreHintedTuples(void);
extern void CommandCounterIncrement(void);
extern void ForceSyncCommit(void);
extern void StartTransactionCommand(void);
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index 4833942..bd2b133 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -219,5 +219,6 @@ extern void PortalDefineQuery(Portal portal,
extern Node *PortalListGetPrimaryStmt(List *stmts);
extern void PortalCreateHoldStore(Portal portal);
extern void PortalHashTableDeleteAll(void);
+extern bool ThereAreNoReadyPortals(void);
#endif /* PORTAL_H */
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index f195981..789b72e 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -46,5 +46,6 @@ extern Datum pg_export_snapshot(PG_FUNCTION_ARGS);
extern void ImportSnapshot(const char *idstr);
extern bool XactHasExportedSnapshots(void);
extern void DeleteAllExportedSnapshotFiles(void);
+extern bool ThereAreNoPriorRegisteredSnapshots(void);
#endif /* SNAPMGR_H */
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 8e2bc0c..7461529 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -239,6 +239,49 @@ a\.
\.b
c\.d
"\."
+CREATE TABLE vistest (LIKE testeoc);
+BEGIN;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a
+---
+ a
+ b
+ c
+(3 rows)
+
+SAVEPOINT s1;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a
+---
+ d
+ e
+ f
+(3 rows)
+
+SAVEPOINT s2;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+SELECT * FROM vistest;
+ a
+---
+ x
+ y
+ z
+(3 rows)
+
+COMMIT;
+SELECT * FROM vistest;
+ a
+---
+ x
+ y
+ z
+(3 rows)
+
DROP TABLE x, y;
DROP FUNCTION fn_x_before();
DROP FUNCTION fn_x_after();
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 6322c8f..a1a8d97 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -164,6 +164,34 @@ c\.d
COPY testeoc TO stdout CSV;
+CREATE TABLE vistest (LIKE testeoc);
+BEGIN;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+a
+b
+c
+\.
+SELECT * FROM vistest;
+SAVEPOINT s1;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+d
+e
+f
+\.
+SELECT * FROM vistest;
+SAVEPOINT s2;
+TRUNCATE vistest;
+COPY vistest FROM stdin CSV;
+x
+y
+z
+\.
+SELECT * FROM vistest;
+COMMIT;
+SELECT * FROM vistest;
+
DROP TABLE x, y;
DROP FUNCTION fn_x_before();
DROP FUNCTION fn_x_after();
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers