"Lawrence, Ramon" <ramon.lawre...@ubc.ca> writes:
> Attached is a patch that will disable the physical-tlist optimization
> for hash join if the number of batches is greater than 1.  The patch and
> performance results were created by Michael Henderson (graduate
> student).

I've applied the attached modified version of this patch.

                        regards, tom lane

Index: src/backend/nodes/outfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v
retrieving revision 1.355
diff -c -r1.355 outfuncs.c
*** src/backend/nodes/outfuncs.c        21 Mar 2009 00:04:39 -0000      1.355
--- src/backend/nodes/outfuncs.c        26 Mar 2009 15:19:43 -0000
***************
*** 1448,1453 ****
--- 1448,1454 ----
        _outJoinPathInfo(str, (JoinPath *) node);
  
        WRITE_NODE_FIELD(path_hashclauses);
+       WRITE_INT_FIELD(num_batches);
  }
  
  static void
Index: src/backend/optimizer/path/costsize.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v
retrieving revision 1.205
diff -c -r1.205 costsize.c
*** src/backend/optimizer/path/costsize.c       21 Mar 2009 00:04:39 -0000      
1.205
--- src/backend/optimizer/path/costsize.c       26 Mar 2009 15:19:43 -0000
***************
*** 1880,1885 ****
--- 1880,1887 ----
                                                        &numbatches,
                                                        &num_skew_mcvs);
        virtualbuckets = (double) numbuckets *(double) numbatches;
+       /* mark the path with estimated # of batches */
+       path->num_batches = numbatches;
  
        /*
         * Determine bucketsize fraction for inner relation.  We use the 
smallest
Index: src/backend/optimizer/plan/createplan.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v
retrieving revision 1.256
diff -c -r1.256 createplan.c
*** src/backend/optimizer/plan/createplan.c     21 Mar 2009 00:04:39 -0000      
1.256
--- src/backend/optimizer/plan/createplan.c     26 Mar 2009 15:19:44 -0000
***************
*** 1910,1915 ****
--- 1910,1919 ----
        /* We don't want any excess columns in the hashed tuples */
        disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath);
  
+       /* If we expect batching, suppress excess columns in outer tuples too */
+       if (best_path->num_batches > 1)
+               disuse_physical_tlist(outer_plan, 
best_path->jpath.outerjoinpath);
+ 
        /*
         * If there is a single join clause and we can identify the outer
         * variable as a simple column reference, supply its identity for
Index: src/backend/optimizer/util/pathnode.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v
retrieving revision 1.150
diff -c -r1.150 pathnode.c
*** src/backend/optimizer/util/pathnode.c       27 Feb 2009 00:06:27 -0000      
1.150
--- src/backend/optimizer/util/pathnode.c       26 Mar 2009 15:19:44 -0000
***************
*** 1480,1488 ****
        pathnode->jpath.outerjoinpath = outer_path;
        pathnode->jpath.innerjoinpath = inner_path;
        pathnode->jpath.joinrestrictinfo = restrict_clauses;
!       /* A hashjoin never has pathkeys, since its ordering is unpredictable */
        pathnode->jpath.path.pathkeys = NIL;
        pathnode->path_hashclauses = hashclauses;
  
        cost_hashjoin(pathnode, root, sjinfo);
  
--- 1480,1499 ----
        pathnode->jpath.outerjoinpath = outer_path;
        pathnode->jpath.innerjoinpath = inner_path;
        pathnode->jpath.joinrestrictinfo = restrict_clauses;
!       /*
!        * A hashjoin never has pathkeys, since its output ordering is
!        * unpredictable due to possible batching.  XXX If the inner relation is
!        * small enough, we could instruct the executor that it must not batch,
!        * and then we could assume that the output inherits the outer 
relation's
!        * ordering, which might save a sort step.  However there is 
considerable
!        * downside if our estimate of the inner relation size is badly off.
!        * For the moment we don't risk it.  (Note also that if we wanted to 
take
!        * this seriously, joinpath.c would have to consider many more paths for
!        * the outer rel than it does now.)
!        */
        pathnode->jpath.path.pathkeys = NIL;
        pathnode->path_hashclauses = hashclauses;
+       /* cost_hashjoin will fill in pathnode->num_batches */
  
        cost_hashjoin(pathnode, root, sjinfo);
  
Index: src/include/nodes/relation.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/nodes/relation.h,v
retrieving revision 1.170
diff -c -r1.170 relation.h
*** src/include/nodes/relation.h        5 Mar 2009 23:06:45 -0000       1.170
--- src/include/nodes/relation.h        26 Mar 2009 15:19:44 -0000
***************
*** 845,850 ****
--- 845,851 ----
  {
        JoinPath        jpath;
        List       *path_hashclauses;           /* join clauses used for 
hashing */
+       int                     num_batches;                    /* number of 
batches expected */
  } HashPath;
  
  /*
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to