Hi,

Today, I discovered that when building a btree index, the btree code
uses index_form_tuple() to create an index tuple from the heap tuple,
calls tuplesort_putindextuple() to copy that tuple into the sort's
memory context, and then frees the original one it built.  This seemed
inefficient, so I wrote a patch to eliminate the tuple copying.  It
works by adding a function tuplesort_putindextuplevalues(), which
builds the tuple in the sort's memory context and thus avoids the need
for a separate copy.  I'm not sure if that's the best approach, but
the optimization seems wortwhile.

I tested it by repeatedly executing "REINDEX INDEX
pgbench_accounts_pkey" on a PPC64 machine.  pgbench_accounts contains
10 million records.  With unpatched master as of
b2f7bd72c4d3e80065725c72e85778d5f4bdfd4a, I got times of 6.159s,
6.177s, and 6.201s.  With the attached patch, I got times of 5.787s,
5.972s, and 5.913s, a savings of almost 5%.  Not bad considering the
amount of work involved.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 542ed43..0743474 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -171,28 +171,21 @@ btbuildCallback(Relation index,
 				void *state)
 {
 	BTBuildState *buildstate = (BTBuildState *) state;
-	IndexTuple	itup;
-
-	/* form an index tuple and point it at the heap tuple */
-	itup = index_form_tuple(RelationGetDescr(index), values, isnull);
-	itup->t_tid = htup->t_self;
 
 	/*
 	 * insert the index tuple into the appropriate spool file for subsequent
 	 * processing
 	 */
 	if (tupleIsAlive || buildstate->spool2 == NULL)
-		_bt_spool(itup, buildstate->spool);
+		_bt_spool(buildstate->spool, htup->t_self, values, isnull);
 	else
 	{
 		/* dead tuples are put into spool2 */
 		buildstate->haveDead = true;
-		_bt_spool(itup, buildstate->spool2);
+		_bt_spool(buildstate->spool2, htup->t_self, values, isnull);
 	}
 
 	buildstate->indtuples += 1;
-
-	pfree(itup);
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 9ddc275..8cd3a91 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -185,9 +185,10 @@ _bt_spooldestroy(BTSpool *btspool)
  * spool an index entry into the sort file.
  */
 void
-_bt_spool(IndexTuple itup, BTSpool *btspool)
+_bt_spool(BTSpool *btspool, ItemPointerData self, Datum *values, bool *isnull)
 {
-	tuplesort_putindextuple(btspool->sortstate, itup);
+	tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
+								  self, values, isnull);
 }
 
 /*
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 8b520c1..3b0a06c 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1150,6 +1150,29 @@ tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple)
 	 */
 	COPYTUP(state, &stup, (void *) tuple);
 
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Collect one index tuple while collecting input data for sort, building
+ * it from caller-supplied values.
+ */
+void
+tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
+							  ItemPointerData self, Datum *values,
+                              bool *isnull)
+{
+	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
+	SortTuple	stup;
+
+	stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull);
+	((IndexTuple) stup.tuple)->t_tid = self;
+	USEMEM(state, GetMemoryChunkSpace(stup.tuple));
+	/* set up first-column key value */
+	stup.datum1 = index_getattr((IndexTuple) stup.tuple,
+								1,
+								RelationGetDescr(state->indexRel),
+								&stup.isnull1);
 	puttuple_common(state, &stup);
 
 	MemoryContextSwitchTo(oldcontext);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 1a8b16d..5ef1f9b 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -717,7 +717,8 @@ typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
 extern BTSpool *_bt_spoolinit(Relation heap, Relation index,
 			  bool isunique, bool isdead);
 extern void _bt_spooldestroy(BTSpool *btspool);
-extern void _bt_spool(IndexTuple itup, BTSpool *btspool);
+extern void _bt_spool(BTSpool *btspool, ItemPointerData self,
+		  Datum *values, bool *isnull);
 extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
 
 /*
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 05445f0..2b62a98 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -85,6 +85,9 @@ extern void tuplesort_puttupleslot(Tuplesortstate *state,
 					   TupleTableSlot *slot);
 extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
 extern void tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple);
+extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
+							  Relation rel, ItemPointerData self,
+							  Datum *values, bool *isnull);
 extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
 				   bool isNull);
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to