Tom Lane <[EMAIL PROTECTED]> writes:
> Yeah, I was just looking at doing that.
Well I imagine it takes you as long to read my patch as it would for you to
write it. But anyways it's still useful to me as exercises.
> It would also be interesting to prefetch one row from the outer table and fall
> out immediately (without building the hash table) if the outer table is
> empty. This seems to require some contortion of the code though :-(
Why is it any more complicated than just moving the hash build down lower?
There's one small special case needed in ExecHashJoinOuterGetTuple but it's
pretty non-intrusive.
It seems to work for me but I can't test multiple batches easily. I think I've
convinced myself that they would work fine but...
test=# explain analyze select * from a natural join b;
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Hash Join (cost=22.50..345.00 rows=5000 width=4) (actual time=0.005..0.005 rows=0
loops=1)
Hash Cond: ("outer".a = "inner".a)
-> Seq Scan on a (cost=0.00..20.00 rows=1000 width=4) (actual time=0.002..0.002
rows=0 loops=1)
-> Hash (cost=20.00..20.00 rows=1000 width=4) (never executed)
-> Seq Scan on b (cost=0.00..20.00 rows=1000 width=4) (never executed)
Total runtime: 0.070 ms
(6 rows)
Index: backend/executor/nodeHash.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/executor/nodeHash.c,v
retrieving revision 1.86
diff -c -r1.86 nodeHash.c
*** backend/executor/nodeHash.c 29 Aug 2004 04:12:31 -0000 1.86
--- backend/executor/nodeHash.c 22 Sep 2004 18:37:40 -0000
***************
*** 232,237 ****
--- 232,238 ----
hashtable->buckets = NULL;
hashtable->nbatch = nbatch;
hashtable->curbatch = 0;
+ hashtable->ntup = 0;
hashtable->innerBatchFile = NULL;
hashtable->outerBatchFile = NULL;
hashtable->innerBatchSize = NULL;
***************
*** 493,498 ****
--- 494,501 ----
heapTuple->t_len);
hashTuple->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = hashTuple;
+
+ hashtable->ntup ++;
}
else
{
Index: backend/executor/nodeHashjoin.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/executor/nodeHashjoin.c,v
retrieving revision 1.64
diff -c -r1.64 nodeHashjoin.c
*** backend/executor/nodeHashjoin.c 29 Aug 2004 05:06:42 -0000 1.64
--- backend/executor/nodeHashjoin.c 22 Sep 2004 18:37:41 -0000
***************
*** 109,143 ****
ResetExprContext(econtext);
/*
- * if this is the first call, build the hash table for inner relation
- */
- if (!node->hj_hashdone)
- {
- /*
- * create the hash table
- */
- Assert(hashtable == NULL);
- hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
-
node->hj_HashOperators);
- node->hj_HashTable = hashtable;
-
- /*
- * execute the Hash node, to build the hash table
- */
- hashNode->hashtable = hashtable;
- (void) ExecProcNode((PlanState *) hashNode);
-
- /*
- * Open temp files for outer batches, if needed. Note that file
- * buffers are palloc'd in regular executor context.
- */
- for (i = 0; i < hashtable->nbatch; i++)
- hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
-
- node->hj_hashdone = true;
- }
-
- /*
* Now get an outer tuple and probe into the hash table for matches
*/
outerTupleSlot = node->js.ps.ps_OuterTupleSlot;
--- 109,114 ----
***************
*** 163,171 ****
--- 134,180 ----
node->hj_MatchedOuter = false;
/*
+ * if this is the first call, build the hash table for inner
relation
+ */
+ if (!node->hj_hashdone)
+ {
+ /*
+ * create the hash table
+ */
+ Assert(hashtable == NULL);
+ hashtable = ExecHashTableCreate((Hash *)
hashNode->ps.plan,
+
node->hj_HashOperators);
+ node->hj_HashTable = hashtable;
+
+ /*
+ * execute the Hash node, to build the hash table
+ */
+ hashNode->hashtable = hashtable;
+ (void) ExecProcNode((PlanState *) hashNode);
+
+ /* An empty hash table can't return any matches */
+ if (hashtable->nbatch == 0 &&
+ hashtable->ntup == 0 &&
+ node->js.jointype != JOIN_LEFT)
+ {
+ return NULL;
+ }
+
+ /*
+ * Open temp files for outer batches, if needed. Note
that file
+ * buffers are palloc'd in regular executor context.
+ */
+ for (i = 0; i < hashtable->nbatch; i++)
+ hashtable->outerBatchFile[i] =
BufFileCreateTemp(false);
+
+ node->hj_hashdone = true;
+ }
+
+ /*
* now we have an outer tuple, find the corresponding bucket
* for this tuple from the hash table
*/
+
node->hj_CurBucketNo = ExecHashGetBucket(hashtable, econtext,
outerkeys);
node->hj_CurTuple = NULL;
***************
*** 503,511 ****
ExecHashJoinOuterGetTuple(PlanState *node, HashJoinState *hjstate)
{
HashJoinTable hashtable = hjstate->hj_HashTable;
! int curbatch = hashtable->curbatch;
TupleTableSlot *slot;
if (curbatch == 0)
{ /* if it is the first
pass */
slot = ExecProcNode(node);
--- 512,531 ----
ExecHashJoinOuterGetTuple(PlanState *node, HashJoinState *hjstate)
{
HashJoinTable hashtable = hjstate->hj_HashTable;
! int curbatch;
TupleTableSlot *slot;
+ /* The very first tuple of the first batch is done before the hash table is
+ * built to avoid building an unnecessary hash table. If it's not there
+ * there is no need for subsequent batches anyways. If it is the hash table
+ * will get built and we'll handle it normally for subsequent batches.
+ */
+
+ if (!hjstate->hj_hashdone) {
+ return ExecProcNode(node);
+ }
+
+ curbatch = hashtable->curbatch;
if (curbatch == 0)
{ /* if it is the first
pass */
slot = ExecProcNode(node);
Index: include/executor/hashjoin.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/include/executor/hashjoin.h,v
retrieving revision 1.32
diff -c -r1.32 hashjoin.h
*** include/executor/hashjoin.h 29 Aug 2004 04:13:06 -0000 1.32
--- include/executor/hashjoin.h 22 Sep 2004 18:37:54 -0000
***************
*** 57,62 ****
--- 57,64 ----
int nbatch; /* number of batches; 0 means
1-pass join */
int curbatch; /* current batch #, or 0
during 1st pass */
+ int ntup; /* Total number of tuples
hashed in this batch */
+
/*
* all these arrays are allocated for the life of the hash join, but
* only if nbatch > 0:
--
greg
---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?
http://www.postgresql.org/docs/faqs/FAQ.html