Repository: incubator-hawq
Updated Branches:
  refs/heads/master c72e58946 -> b6391f191


HAWQ-1604. Add A New GUC hawq_hashjoin_bloomfilter to indicate if use Bloom 
filter for hash join.
Remove gp_hashjoin_bloomfilter and bloom filter in hash join table, this part 
of legacy codes has been verified that it won't improve hash join performance.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/b6391f19
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/b6391f19
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/b6391f19

Branch: refs/heads/master
Commit: b6391f19163e5e332a870c652c2f4327ed861b68
Parents: c72e589
Author: Wen Lin <w...@pivotal.io>
Authored: Sun Apr 8 18:10:12 2018 +0800
Committer: Wen Lin <w...@pivotal.io>
Committed: Sun Apr 8 18:10:12 2018 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbvars.c         |  2 +-
 src/backend/executor/nodeHash.c   | 23 +++--------------------
 src/backend/utils/misc/guc.c      |  8 ++++----
 src/include/cdb/cdbvars.h         |  2 +-
 src/include/executor/hashjoin.h   |  2 +-
 src/test/unit/mock/mock_info.json |  4 ----
 6 files changed, 10 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/cdb/cdbvars.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c
index c2fca21..d8e8552 100644
--- a/src/backend/cdb/cdbvars.c
+++ b/src/backend/cdb/cdbvars.c
@@ -234,7 +234,7 @@ int                 gp_hashagg_spillbatch_min = 0;
 int            gp_hashagg_spillbatch_max = 0;
 
 /* hash join to use bloom filter: default to 0, means not used */
-int            gp_hashjoin_bloomfilter = 0;
+int            hawq_hashjoin_bloomfilter = 0;
 
 /* Analyzing aid */
 int            gp_motion_slice_noop = 0;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/executor/nodeHash.c
----------------------------------------------------------------------
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index dd63305..a461598 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -77,8 +77,6 @@ void ExecChooseHashTableSize(double ntuples, int tupwidth,
                                                uint64 operatorMemKB
                                                );
 
-#define BLOOMVAL(hk)  (((uint64)1) << (((hk) >> 13) & 0x3f))
-
 /* Amount of metadata memory required per batch */
 #define MD_MEM_PER_BATCH       (sizeof(HashJoinBatchData *) + 
sizeof(HashJoinBatchData))
 
@@ -323,7 +321,6 @@ ExecHashTableCreate(HashState *hashState, HashJoinState 
*hjstate, List *hashOper
         */
        hashtable = (HashJoinTable)palloc0(sizeof(HashJoinTableData));
        hashtable->buckets = NULL;
-       hashtable->bloom = NULL;
        hashtable->curbatch = 0;
        hashtable->growEnabled = true;
        hashtable->totalTuples = 0;
@@ -455,9 +452,6 @@ ExecHashTableCreate(HashState *hashState, HashJoinState 
*hjstate, List *hashOper
        hashtable->buckets = (HashJoinTuple *)
                palloc0(nbuckets * sizeof(HashJoinTuple));
 
-       if(gp_hashjoin_bloomfilter!=0)
-               hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));
-
        MemoryContextSwitchTo(oldcxt);
        }
        END_MEMORY_ACCOUNT();
@@ -792,7 +786,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
        {
                HashJoinTuple prevtuple;
                HashJoinTuple tuple;
-               uint64 bloom = 0;
 
                prevtuple = NULL;
                tuple = hashtable->buckets[i];
@@ -812,7 +805,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
                        {
                                /* keep tuple */
                                prevtuple = tuple;
-                               bloom |= BLOOMVAL(tuple->hashvalue);
                        }
                        else
                        {
@@ -846,9 +838,6 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 
                        tuple = nexttuple;
                }
-
-               if(gp_hashjoin_bloomfilter!=0)
-                       hashtable->bloom[i] = bloom;
        }
 
 #ifdef HJDEBUG
@@ -989,9 +978,6 @@ ExecHashTableInsert(HashState *hashState, HashJoinTable 
hashtable,
                hashtable->buckets[bucketno] = hashTuple;
                hashtable->totalTuples += 1;
 
-               if(gp_hashjoin_bloomfilter!=0)
-                       hashtable->bloom[bucketno] |= BLOOMVAL(hashvalue);
-
                /* Double the number of batches when too much data in hash 
table. */
                if (batch->innerspace > hashtable->spaceAllowed ||
                        batch->innertuples > UINT_MAX/2)
@@ -1195,12 +1181,12 @@ ExecScanHashBucket(HashState *hashState, HashJoinState 
*hjstate,
         */
        if (hashTuple == NULL)
        {
-               /* if bloom filter fails, then no match - don't even bother to 
scan */
-               if (gp_hashjoin_bloomfilter == 0 || 0 != 
(hashtable->bloom[hjstate->hj_CurBucketNo] & BLOOMVAL(hashvalue)))
-                       hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+               hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
        }
        else
+       {
                hashTuple = hashTuple->next;
+       }
 
        while (hashTuple != NULL)
        {
@@ -1263,9 +1249,6 @@ ExecHashTableReset(HashState *hashState, HashJoinTable 
hashtable)
        hashtable->buckets = (HashJoinTuple *)
                palloc0(nbuckets * sizeof(HashJoinTuple));
 
-       if(gp_hashjoin_bloomfilter != 0)
-               hashtable->bloom = (uint64*) palloc0(nbuckets * sizeof(uint64));
-
        hashtable->batches[hashtable->curbatch]->innerspace = 0;
        hashtable->batches[hashtable->curbatch]->innertuples = 0;
        hashtable->totalTuples = 0;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ac29d87..64449da 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -5834,13 +5834,13 @@ static struct config_int ConfigureNamesInt[] =
        },
 
        {
-               {"gp_hashjoin_bloomfilter", PGC_USERSET, GP_ARRAY_TUNING,
+               {"hawq_hashjoin_bloomfilter", PGC_USERSET, GP_ARRAY_TUNING,
                 gettext_noop("Use bloomfilter in hash join"),
-                gettext_noop("Use bloomfilter may speed up hashtable probing"),
+                gettext_noop("Use bloomfilter may speed up hash join 
performance"),
                 GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL | GUC_GPDB_ADDOPT
                },
-               &gp_hashjoin_bloomfilter,
-               1, 0, 1, NULL, NULL
+               &hawq_hashjoin_bloomfilter,
+               0, 0, 1, NULL, NULL
        },
 
        {

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/include/cdb/cdbvars.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 9f6c3b1..7ce988a 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -937,7 +937,7 @@ extern int gp_hashagg_spillbatch_min;
 extern int gp_hashagg_spillbatch_max;
 
 /* Hashjoin use bloom filter */
-extern int gp_hashjoin_bloomfilter;
+extern int hawq_hashjoin_bloomfilter;
 
 /* Get statistics for partitioned parent from a child */
 extern bool    gp_statistics_pullup_from_child_partition;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/include/executor/hashjoin.h
----------------------------------------------------------------------
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 9776eb0..208aed6 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -167,7 +167,7 @@ typedef struct HashJoinTableData
        int                     nbuckets;               /* # buckets in the 
in-memory hash table */
        /* buckets[i] is head of list of tuples in i'th in-memory bucket */
        struct HashJoinTupleData **buckets;
-       uint64                                    *bloom; /* bloom[i] is 
bloomfilter for buckets[i] */
+
        /* buckets array is per-batch storage, as are all the tuples */
 
        int                     nbatch;                 /* number of batches */

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b6391f19/src/test/unit/mock/mock_info.json
----------------------------------------------------------------------
diff --git a/src/test/unit/mock/mock_info.json 
b/src/test/unit/mock/mock_info.json
index 9198cf1..315c351 100644
--- a/src/test/unit/mock/mock_info.json
+++ b/src/test/unit/mock/mock_info.json
@@ -13929,10 +13929,6 @@
             "filename": "src/backend/cdb/cdbvars.c", 
             "header filename": "src/include/cdb/cdbvars.h"
         }, 
-        "gp_hashjoin_bloomfilter": {
-            "filename": "src/backend/cdb/cdbvars.c", 
-            "header filename": "src/include/cdb/cdbvars.h"
-        }, 
         "gp_hashjoin_tuples_per_bucket": {
             "filename": "src/backend/cdb/cdbvars.c", 
             "header filename": "src/include/cdb/cdbvars.h"

Reply via email to