hashing

joemcdonnell Tue, 29 Apr 2025 17:04:22 -0700

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git



The following commit(s) were added to refs/heads/master by this push:
     new 5ddd360cf IMPALA-14015: Remove dead code in be/src/experiments/hashing
5ddd360cf is described below

commit 5ddd360cfbde856e304cfb9a73bf5b6c32b5d5f5
Author: Joe McDonnell <[email protected]>
AuthorDate: Tue Apr 29 10:13:34 2025 -0700

    IMPALA-14015: Remove dead code in be/src/experiments/hashing
    
    The code in be/src/experiments/hashing can't compile, as
    the headers needed by that code were removed 9 years ago
    in https://github.com/apache/impala/commit/866e44715b .
    Nothing uses the code, so this removes be/src/experiments/hashing.
    
    Testing:
     - Ran a build
    
    Change-Id: I439cb7ace46446017abf0ca215b89face42b65d4
    Reviewed-on: http://gerrit.cloudera.org:8080/22832
    Reviewed-by: Yida Wu <[email protected]>
    Reviewed-by: Riza Suminto <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/experiments/hashing/cache-hash-test.cc      | 115 ---------------
 be/src/experiments/hashing/growing-test.cc         | 139 ------------------
 .../hashing/interface/cache-hash-test.cc           | 117 ----------------
 .../experiments/hashing/interface/growing-test.cc  | 144 -------------------
 .../hashing/multilevel/cache-hash-test.cc          | 117 ----------------
 .../experiments/hashing/multilevel/growing-test.cc | 129 -----------------
 .../hashing/prefetch/cache-hash-test.cc            | 116 ---------------
 .../experiments/hashing/prefetch/growing-test.cc   | 138 ------------------
 .../hashing/split-benchmarks/cache-hash-test.cc    | 116 ---------------
 .../hashing/split-benchmarks/growing-test.cc       | 138 ------------------
 .../partitioning-throughput-test.cc                | 155 ---------------------
 .../hashing/streaming/cache-hash-test.cc           | 117 ----------------
 .../experiments/hashing/streaming/growing-test.cc  | 143 -------------------
 13 files changed, 1684 deletions(-)

diff --git a/be/src/experiments/hashing/cache-hash-test.cc 
b/be/src/experiments/hashing/cache-hash-test.cc
deleted file mode 100644
index 572af68de..000000000
--- a/be/src/experiments/hashing/cache-hash-test.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-
-#include <vector>
-
-#include "cache-hash-table.h"
-#include "cache-hash-table.inline.h"
-#include "standard-hash-table.h"
-#include "standard-hash-table.inline.h"
-#include "tuple-types.h"
-#include "runtime/mem-pool.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/hash-util.h"
-#include "util/runtime-profile.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-// Very basic hash aggregation prototype and test
-// TODO: Generalize beyond hash aggregation, beyond hashing on the one column, 
etc.
-
-CacheHashTable::CacheHashTable() {
-  num_content_allocated_ = 0;
-}
-
-void CacheHashTable::BucketSizeDistribution() {
-  std::vector<int> bucket_size;
-  for (int i = 0; i < BUCKETS; ++i) {
-    int size = buckets_[i].count;
-    if (size >= bucket_size.size()) {
-      // grow bucket_size to fit this size
-      bucket_size.resize(size + 1, 0);
-    }
-    ++bucket_size[size];
-  }
-
-  std::stringstream distr;
-  for (int i = 0; i < bucket_size.size(); ++i) {
-    distr << i << ": " << bucket_size[i] << "\n";
-  }
-  LOG(INFO) << "Bucket Size Distribution\n" << distr.str();
-}
-
-
-// Update ht, which is doing a COUNT(*) GROUP BY id,
-// by having it process the new tuple probe.
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-inline void Process(T* ht, const ProbeTuple* probe) {
-  BuildTuple *existing = ht->Find(probe);
-  if (existing != NULL) {
-    ++existing->count;
-  } else {
-    BuildTuple build;
-    build.id = probe->id;
-    build.count = 1;
-    ht->Insert(&build);
-  }
-}
-
-// Test ht by aggregating input, which is an array of num_tuples ProbeTuples
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-uint64_t Test(T* ht, const ProbeTuple* input, uint64_t num_tuples) {
-  StopWatch time;
-  time.Start();
-  for (int i = 0; i < num_tuples; ++i) {
-    Process<T>(ht, &input[i]);
-  }
-  time.Stop();
-  return time.ElapsedTime();
-}
-
-int main(int argc, char **argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-
-  srand(time(NULL));
-
-  const int NUM_TUPLES = 100000000; //10^8
-  const int NUM_BUILD_TUPLES = 4 * CacheHashTable::MaxBuildTuples() / 10;
-
-  CacheHashTable cache_ht;
-  StandardHashTable std_ht;
-
-  ProbeTuple* input = GenTuples(NUM_TUPLES, NUM_BUILD_TUPLES);
-  uint64_t cache_time = Test<CacheHashTable>(&cache_ht, input, NUM_TUPLES);
-  LOG(ERROR) << "Cache-aware time: "
-             << PrettyPrinter::Print(cache_time, TUnit::CPU_TICKS);
-  uint64_t std_time = Test<StandardHashTable>(&std_ht, input, NUM_TUPLES);
-
-  LOG(ERROR) << "Bucket-chained time: "
-             << PrettyPrinter::Print(std_time, TUnit::CPU_TICKS);
-  return 0;
-}
diff --git a/be/src/experiments/hashing/growing-test.cc 
b/be/src/experiments/hashing/growing-test.cc
deleted file mode 100644
index 4d71e452f..000000000
--- a/be/src/experiments/hashing/growing-test.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "hash-store.h"
-#include "standard-hash-table.h"
-#include "tuple-types.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-namespace impala {
-
-// Tests performance of hash aggregation with HashStore, which grows to 
contain multiple
-// hash tables as needed. Allows exploring how buffer size and size of the 
keyspace
-// affect performance.
-class GrowingTest : public testing::Test {
- public:
-  static const uint64_t NUM_PROBE_TUPLES = 100000000; // 10^8
-
-  template <int buffer_size>
-  inline static uint64_t AggregateTest(uint64_t num_probe_tuples, int max_id,
-                                       int num_tables) {
-    ProbeTuple* tuples = GenTuples(num_probe_tuples, max_id);
-    HashStore<buffer_size> hs;
-    StopWatch watch;
-    watch.Start();
-    hs.Aggregate(tuples, num_probe_tuples);
-    watch.Stop();
-    SanityCheck<buffer_size>(hs, num_probe_tuples, max_id, num_tables);
-    free(tuples);
-    return watch.ElapsedTime();
-  }
-
-  // Confirm that hs appears to be the correct result of aggregating 
num_probe_tuples,
-  // with a keyspace of size max_id, with a fanout into num_tables tables.
-  template <int buffer_size>
-  static void SanityCheck(const HashStore<buffer_size> & hs,
-                          uint64_t num_probe_tuples, int max_id, int 
num_tables) {
-    uint64_t total_count = 0;
-    for (int i = 0; i < hs.num_tables_; ++i) {
-      StandardHashTable& ht = hs.tables_[i];
-      for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); 
it.Next()) {
-        total_count += it.GetRow()->count;
-      }
-    }
-    ASSERT_EQ(num_probe_tuples, total_count);
-    ASSERT_EQ(num_tables, hs.num_tables_)
-      << "You do not have the number of tables that you hoped.\n"
-      << "This could mean that there weren't enough probe tuples to fill the 
keyspace, "
-      << "skewed hashing lead a hash table that we expected not to overflow to 
overflow, "
-      << "or a genuine bug.";
-    if (buffer_size > 0) {
-      ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
-    }
-  }
-
-  template <int buffer_size>
-  inline static uint64_t NextTestCase(int build_tuples, uint64_t prev, int 
num_tables) {
-    uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, build_tuples, 
num_tables);
-    int64_t delta;
-    if (prev == 0) {
-      // just print 0 for the delta the first time.
-      delta = 0;
-    }
-    else {
-      delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
-    }
-    LOG(ERROR) << build_tuples << "\t"
-               << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
-               << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
-    return time;
-  }
-
-  // Run a test aggregation with buffers of size buffer_size bytes.
-  // Try the full range of working set size (and thus fanout) that we can do 
with
-  // single-level fanout.
-  template <int buffer_size>
-  inline static void Test() {
-    LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " 
tuples ("
-               << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
-               << "):";
-    LOG(ERROR) << "#BuildTuples\tTime\tdTime";
-    uint64_t prev = 0;
-    // only go up to 2^12 hashtables because after that, there are at least as 
many
-    // buffers as L2 cache lines. We won't let this happen; we'll do 
multi-level fanout.
-    // Note that we should probably allow 2 cache lines per buffer (since a 
write might
-    // span a line break), plus we need some memory aside from the buffers, so 
we'll
-    // actually stop at 2^11, or maybe 2^12. And we might want to keep them in 
L1,
-    // in which case we'll stop at 2^7 or so.
-    for (int num_tables = 1; num_tables <= 1<<12; num_tables *= 2) {
-      // how many total build tuples we'll use to fill num_tables tables
-      // Needs to be comfortably less than the total capacity of num_tables 
tables
-      // (the below expression without the constant factor multiplied by it)
-      // because the hashes will not be spread perfectly.
-      // But should be more than 1/2 of that expression because otherwise if 
the hashes
-      // spread out perfectly, it will fit in num_tables / 2 and not give us 
the fanout
-      // we expect. (A test will fail in this case so that we know.)
-      int build_tuples = (StandardHashTable::NODES * num_tables / 10) * 8;
-      prev = NextTestCase<buffer_size>(build_tuples, prev, num_tables);
-    }
-  }
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  ::testing::InitGoogleTest(&argc, argv);
-  CpuInfo::Init();
-  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
-  return RUN_ALL_TESTS();
-}
-
-TEST(GrowingTest, All) {
-  // test without buffers
-  GrowingTest::Test<0>();
-  // test with one of the best buffer sizes
-  // Make it slightly more than a power of 2 cache lines so that they are 
offset.
-  GrowingTest::Test< (1 << 13) + 3 * 64 >();
-}
diff --git a/be/src/experiments/hashing/interface/cache-hash-test.cc 
b/be/src/experiments/hashing/interface/cache-hash-test.cc
deleted file mode 100644
index 9f2548e87..000000000
--- a/be/src/experiments/hashing/interface/cache-hash-test.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-
-#include <glog/logging.h>
-#include <vector>
-
-#include "cache-hash-table.h"
-#include "cache-hash-table.inline.h"
-#include "standard-hash-table.h"
-#include "standard-hash-table.inline.h"
-#include "tuple-types.h"
-#include "runtime/mem-pool.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/hash-util.h"
-#include "util/runtime-profile.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-// Very basic hash aggregation prototype and test
-// TODO: Generalize beyond hash aggregation, beyond hashing on the one column, 
etc.
-
-CacheHashTable::CacheHashTable() {
-  num_content_allocated_ = 0;
-}
-
-void CacheHashTable::BucketSizeDistribution() {
-  std::vector<int> bucket_size;
-  for (int i = 0; i < BUCKETS; ++i) {
-    int size = buckets_[i].count;
-    if (size >= bucket_size.size()) {
-      // grow bucket_size to fit this size
-      bucket_size.resize(size + 1, 0);
-    }
-    ++bucket_size[size];
-  }
-
-  std::stringstream distr;
-  for (int i = 0; i < bucket_size.size(); ++i) {
-    distr << i << ": " << bucket_size[i] << "\n";
-  }
-  LOG(INFO) << "Bucket Size Distribution\n" << distr.str();
-}
-
-
-// Update ht, which is doing a COUNT(*) GROUP BY id,
-// by having it process the new tuple probe.
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-inline void Process(T* ht, const ProbeTuple* probe) {
-  BuildTuple *existing = ht->Find(probe);
-  if (existing != NULL) {
-    ++existing->count;
-  } else {
-    BuildTuple build;
-    build.id = probe->id;
-    build.count = 1;
-    ht->Insert(&build);
-  }
-}
-
-// Test ht by aggregating input, which is an array of num_tuples ProbeTuples
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-uint64_t Test(T* ht, const ProbeTuple* input, uint64_t num_tuples)
-{
-  StopWatch time;
-  time.Start();
-  for (int i = 0; i < num_tuples; ++i) {
-    Process<T>(ht, &input[i]);
-  }
-  time.Stop();
-  return time.Ticks();
-}
-
-int main(int argc, char **argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-
-  srand(time(NULL));
-
-  const int NUM_TUPLES = 100000000; //10^8
-  const int NUM_BUILD_TUPLES = 4 * CacheHashTable::MaxBuildTuples() / 10;
-
-  CacheHashTable cache_ht;
-  StandardHashTable std_ht;
-
-  ProbeTuple* input = GenTuples(NUM_TUPLES, NUM_BUILD_TUPLES);
-  uint64_t cache_time = Test<CacheHashTable>(&cache_ht, input, NUM_TUPLES);
-  LOG(ERROR) << "Cache-aware time: "
-             << PrettyPrinter::Print(cache_time, TUnit::CPU_TICKS);
-  uint64_t std_time = Test<StandardHashTable>(&std_ht, input, NUM_TUPLES);
-
-  LOG(ERROR) << "Bucket-chained time: "
-             << PrettyPrinter::Print(std_time, TUnit::CPU_TICKS);
-  return 0;
-}
diff --git a/be/src/experiments/hashing/interface/growing-test.cc 
b/be/src/experiments/hashing/interface/growing-test.cc
deleted file mode 100644
index ee0372345..000000000
--- a/be/src/experiments/hashing/interface/growing-test.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "hash-store.h"
-#include "standard-hash-table.h"
-#include "tuple-types.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-namespace impala {
-
-// Tests performance of hash aggregation with HashStore, which grows to 
contain multiple
-// hash tables as needed. Allows exploring how buffer size and size of the 
keyspace
-// affect performance.
-class GrowingTest : public testing::Test {
- public:
-  static const uint64_t NUM_PROBE_TUPLES = 10000000; // 10^7
-
-  // how many total build tuples we'll use to fill a table
-  // Needs to be comfortably less than the total capacity of num_tables tables
-  // (the below expression without the constant factor multiplied by it)
-  // because the hashes will not be spread perfectly.
-  // But should be more than 1/2 of that expression because otherwise if the 
hashes
-  // spread out perfectly, it will fit in num_tables / 2 and not give us the 
fanout
-  // we expect. (A test will fail in this case so that we know.)
-  static const int TUPLES_IN_TABLE = (StandardHashTable::NODES / 10) * 8;
-
-
-  template <int buffer_size>
-  inline static uint64_t AggregateTest(uint64_t num_probe_tuples,
-                                       int num_tables) {
-    ProbeTuple* tuples = GenTuples(num_probe_tuples, TUPLES_IN_TABLE, 
num_tables);
-    HashStore<buffer_size> hs;
-    StopWatch watch;
-    watch.Start();
-    hs.Aggregate(tuples, num_probe_tuples);
-    watch.Stop();
-    SanityCheck<buffer_size>(hs, num_probe_tuples, num_tables);
-    free(tuples);
-    return watch.ElapsedTime();
-  }
-
-  // Confirm that hs appears to be the correct result of aggregating 
num_probe_tuples,
-  // with a fanout into num_tables tables.
-  template <int buffer_size>
-  static void SanityCheck(const HashStore<buffer_size> & hs,
-                          uint64_t num_probe_tuples, int num_tables) {
-    uint64_t total_count = 0;
-    uint64_t build_tuples = 0;
-    for (int i = 0; i < hs.num_tables_; ++i) {
-      StandardHashTable& ht = hs.tables_[i];
-      for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); 
it.Next()) {
-        total_count += it.GetRow()->count;
-        ++build_tuples;
-      }
-    }
-    ASSERT_EQ(num_probe_tuples, total_count);
-    ASSERT_EQ(num_tables * TUPLES_IN_TABLE, build_tuples);
-    ASSERT_EQ(num_tables, hs.num_tables_)
-      << "You do not have the number of tables that you hoped.\n"
-      << "This could mean that there weren't enough probe tuples to fill the 
keyspace, "
-      << "skewed hashing lead a hash table that we expected not to overflow to 
overflow, "
-      << "or a genuine bug.";
-    if (buffer_size > 0) {
-      ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
-    }
-  }
-
-  template <int buffer_size>
-  inline static uint64_t NextTestCase(uint64_t prev, int num_tables) {
-    uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, num_tables);
-    int64_t delta;
-    if (prev == 0) {
-      // just print 0 for the delta the first time.
-      delta = 0;
-    }
-    else {
-      delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
-    }
-    LOG(ERROR) << num_tables * TUPLES_IN_TABLE << "\t"
-               << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
-               << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
-    return time;
-  }
-
-  // Run a test aggregation with buffers of size buffer_size bytes.
-  // Try the full range of working set size (and thus fanout) that we can do 
with
-  // single-level fanout.
-  template <int buffer_size>
-  inline static void Test() {
-    LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " 
tuples ("
-               << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
-               << "):";
-    LOG(ERROR) << "#BuildTuples\tTime\tdTime";
-    uint64_t prev = 0;
-    // only go up to 2^12 hashtables because after that, there are at least as 
many
-    // buffers as L2 cache lines. We won't let this happen; we'll do 
multi-level fanout.
-    // Note that we should probably allow 2 cache lines per buffer (since a 
write might
-    // span a line break), plus we need some memory aside from the buffers, so 
we'll
-    // actually stop at 2^11, or maybe 2^12. And we might want to keep them in 
L1,
-    // in which case we'll stop at 2^7 or so.
-    for (int num_tables = 1; num_tables <= 1<<10; num_tables *= 2) {
-      prev = NextTestCase<buffer_size>(prev, num_tables);
-    }
-  }
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  ::testing::InitGoogleTest(&argc, argv);
-  CpuInfo::Init();
-  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
-  return RUN_ALL_TESTS();
-}
-
-TEST(GrowingTest, All) {
-  // test without buffers
-  GrowingTest::Test<0>();
-  // test with one of the best buffer sizes
-  // Make it slightly more than a power of 2 cache lines so that they are 
offset.
-  GrowingTest::Test< (1 << 13) + 3 * 64 >();
-}
diff --git a/be/src/experiments/hashing/multilevel/cache-hash-test.cc 
b/be/src/experiments/hashing/multilevel/cache-hash-test.cc
deleted file mode 100644
index 63b47e896..000000000
--- a/be/src/experiments/hashing/multilevel/cache-hash-test.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-
-#include <glog/logging.h>
-#include <vector>
-
-#include "cache-hash-table.h"
-#include "cache-hash-table.inline.h"
-#include "standard-hash-table.h"
-#include "standard-hash-table.inline.h"
-#include "tuple-types.h"
-#include "runtime/mem-pool.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/hash-util.h"
-#include "util/runtime-profile.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-// Very basic hash aggregation prototype and test
-// TODO: Generalize beyond hash aggregation, beyond hashing on the one column, 
etc.
-
-CacheHashTable::CacheHashTable() {
-  num_content_allocated_ = 0;
-}
-
-void CacheHashTable::BucketSizeDistribution() {
-  std::vector<int> bucket_size;
-  for (int i = 0; i < BUCKETS; ++i) {
-    int size = buckets_[i].count;
-    if (size >= bucket_size.size()) {
-      // grow bucket_size to fit this size
-      bucket_size.resize(size + 1, 0);
-    }
-    ++bucket_size[size];
-  }
-
-  std::stringstream distr;
-  for (int i = 0; i < bucket_size.size(); ++i) {
-    distr << i << ": " << bucket_size[i] << "\n";
-  }
-  LOG(INFO) << "Bucket Size Distribution\n" << distr.str();
-}
-
-
-// Update ht, which is doing a COUNT(*) GROUP BY id,
-// by having it process the new tuple probe.
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-inline void Process(T* ht, const ProbeTuple* probe) {
-  BuildTuple *existing = ht->Find(probe);
-  if (existing != NULL) {
-    ++existing->count;
-  } else {
-    BuildTuple build;
-    build.id = probe->id;
-    build.count = 1;
-    ht->Insert(&build);
-  }
-}
-
-// Test ht by aggregating input, which is an array of num_tuples ProbeTuples
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-uint64_t Test(T* ht, const ProbeTuple* input, uint64_t num_tuples)
-{
-  StopWatch time;
-  time.Start();
-  for (int i = 0; i < num_tuples; ++i) {
-    Process<T>(ht, &input[i]);
-  }
-  time.Stop();
-  return time.ElapsedTime();
-}
-
-int main(int argc, char **argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-
-  srand(time(NULL));
-
-  const int NUM_TUPLES = 100000000; //10^8
-  const int NUM_BUILD_TUPLES = 4 * CacheHashTable::MaxBuildTuples() / 10;
-
-  CacheHashTable cache_ht;
-  StandardHashTable std_ht;
-
-  ProbeTuple* input = GenTuples(NUM_TUPLES, NUM_BUILD_TUPLES);
-  uint64_t cache_time = Test<CacheHashTable>(&cache_ht, input, NUM_TUPLES);
-  LOG(ERROR) << "Cache-aware time: "
-             << PrettyPrinter::Print(cache_time, TUnit::CPU_TICKS);
-  uint64_t std_time = Test<StandardHashTable>(&std_ht, input, NUM_TUPLES);
-
-  LOG(ERROR) << "Bucket-chained time: "
-             << PrettyPrinter::Print(std_time, TUnit::CPU_TICKS);
-  return 0;
-}
diff --git a/be/src/experiments/hashing/multilevel/growing-test.cc 
b/be/src/experiments/hashing/multilevel/growing-test.cc
deleted file mode 100644
index 1c470a733..000000000
--- a/be/src/experiments/hashing/multilevel/growing-test.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "hash-store.h"
-#include "standard-hash-table.h"
-#include "tuple-types.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-namespace impala {
-
-// Tests performance of hash aggregation with HashStore, which grows to 
contain multiple
-// hash tables as needed. Allows exploring how buffer size and size of the 
keyspace
-// affect performance.
-class GrowingTest : public testing::Test {
- public:
-  static const uint64_t NUM_PROBE_TUPLES = 100000000; // 10^8
-
-  template <int buffer_size>
-  inline static uint64_t AggregateTest(uint64_t num_probe_tuples, int max_id,
-                                       int num_tables) {
-    ProbeTuple* tuples = GenTuples(num_probe_tuples, max_id);
-    HashStore<buffer_size> hs;
-    StopWatch watch;
-    watch.Start();
-    hs.Aggregate(tuples, num_probe_tuples);
-    watch.Stop();
-    SanityCheck<buffer_size>(hs, num_probe_tuples, max_id, num_tables);
-    free(tuples);
-    return watch.ElapsedTime();
-  }
-
-  // Confirm that hs appears to be the correct result of aggregating 
num_probe_tuples,
-  // with a keyspace of size max_id, with a fanout into num_tables tables.
-  template <int buffer_size>
-  static void SanityCheck(const HashStore<buffer_size> & hs,
-                          uint64_t num_probe_tuples, int max_id, int 
num_tables) {
-    ASSERT_EQ(num_probe_tuples, hs.TupleCount());
-    // Because CRC hashes contiguous integers nearly perfectly, we should 
always have
-    // exactly num_tables tables, even though we grow on demand.
-    ASSERT_EQ(num_tables, hs.TableCount())
-      << "You do not have the number of tables that you hoped.\n"
-      << "This could mean that there weren't enough probe tuples to fill the 
keyspace, "
-      << "skewed hashing lead a hash table that we expected not to overflow to 
overflow, "
-      << "or a genuine bug.";
-#ifdef NDEBUG
-    if (buffer_size > 0) {
-      // Can't check this in debug mode because it won't neccessarily pack the 
struct.
-      ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
-    }
-#endif
-  }
-
-  template <int buffer_size>
-  inline static uint64_t NextTestCase(int build_tuples, uint64_t prev, int 
num_tables) {
-    uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, build_tuples, 
num_tables);
-    int64_t delta;
-    if (prev == 0) {
-      // just print 0 for the delta the first time.
-      delta = 0;
-    }
-    else {
-      delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
-    }
-    LOG(ERROR) << build_tuples << "\t"
-               << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
-               << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
-    return time;
-  }
-
-  // Run a test aggregation with buffers of size buffer_size bytes.
-  // Try the full range of working set size (and thus fanout) that we can do 
with
-  // single-level fanout.
-  template <int buffer_size>
-  inline static void Test() {
-    LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " 
tuples ("
-               << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
-               << "):";
-    LOG(ERROR) << "#BuildTuples\tTime\tdTime";
-    uint64_t prev = 0;
-    for (int num_tables = 1; num_tables <= 1<<16; num_tables *= 2) {
-      // how many total build tuples we'll use to fill num_tables tables
-      // Needs to be comfortably less than the total capacity of num_tables 
tables
-      // (the below expression without the constant factor multiplied by it)
-      // because the hashes will not be spread perfectly.
-      // But should be more than 1/2 of that expression because otherwise if 
the hashes
-      // spread out perfectly, it will fit in num_tables / 2 and not give us 
the fanout
-      // we expect. (A test will fail in this case so that we know.)
-      int build_tuples = (StandardHashTable::NODES * num_tables / 10) * 8;
-      prev = NextTestCase<buffer_size>(build_tuples, prev, num_tables);
-    }
-  }
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  ::testing::InitGoogleTest(&argc, argv);
-  CpuInfo::Init();
-  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
-  return RUN_ALL_TESTS();
-}
-
-TEST(GrowingTest, All) {
-  // test with one of the best buffer sizes
-  // Make it slightly more than a power of 2 cache lines so that they are 
offset.
-  GrowingTest::Test< (1 << 13) + 3 * 64 >();
-}
diff --git a/be/src/experiments/hashing/prefetch/cache-hash-test.cc 
b/be/src/experiments/hashing/prefetch/cache-hash-test.cc
deleted file mode 100644
index fa3180aaa..000000000
--- a/be/src/experiments/hashing/prefetch/cache-hash-test.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-
-#include <glog/logging.h>
-#include <vector>
-
-#include "cache-hash-table.h"
-#include "cache-hash-table.inline.h"
-#include "standard-hash-table.h"
-#include "standard-hash-table.inline.h"
-#include "tuple-types.h"
-#include "runtime/mem-pool.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/hash-util.h"
-#include "util/runtime-profile.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-// Very basic hash aggregation prototype and test
-// TODO: Generalize beyond hash aggregation, beyond hashing on the one column, 
etc.
-
-CacheHashTable::CacheHashTable() {
-  num_content_allocated_ = 0;
-}
-
-void CacheHashTable::BucketSizeDistribution() {
-  std::vector<int> bucket_size;
-  for (int i = 0; i < BUCKETS; ++i) {
-    int size = buckets_[i].count;
-    if (size >= bucket_size.size()) {
-      // grow bucket_size to fit this size
-      bucket_size.resize(size + 1, 0);
-    }
-    ++bucket_size[size];
-  }
-
-  std::stringstream distr;
-  for (int i = 0; i < bucket_size.size(); ++i) {
-    distr << i << ": " << bucket_size[i] << "\n";
-  }
-  LOG(INFO) << "Bucket Size Distribution\n" << distr.str();
-}
-
-
-// Update ht, which is doing a COUNT(*) GROUP BY id,
-// by having it process the new tuple probe.
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-inline void Process(T* ht, const ProbeTuple* probe) {
-  BuildTuple *existing = ht->Find(probe);
-  if (existing != NULL) {
-    ++existing->count;
-  } else {
-    BuildTuple build;
-    build.id = probe->id;
-    build.count = 1;
-    ht->Insert(&build);
-  }
-}
-
-// Test ht by aggregating input, which is an array of num_tuples ProbeTuples
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-uint64_t Test(T* ht, const ProbeTuple* input, uint64_t num_tuples)
-{
-  StopWatch time;
-  time.Start();
-  for (int i = 0; i < num_tuples; ++i) {
-    Process<T>(ht, &input[i]);
-  }
-  time.Stop();
-  return time.Ticks();
-}
-
-int main(int argc, char **argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-
-  srand(time(NULL));
-
-  const int NUM_TUPLES = 100000000; //10^8
-  const int NUM_BUILD_TUPLES = 4 * CacheHashTable::MaxBuildTuples() / 10;
-
-  CacheHashTable cache_ht;
-  StandardHashTable std_ht;
-
-  ProbeTuple* input = GenTuples(NUM_TUPLES, NUM_BUILD_TUPLES);
-  uint64_t cache_time = Test<CacheHashTable>(&cache_ht, input, NUM_TUPLES);
-  LOG(ERROR) << "Cache-aware time: "
-             << PrettyPrinter::Print(cache_time, TUnit::CPU_TICKS);
-  uint64_t std_time = Test<StandardHashTable>(&std_ht, input, NUM_TUPLES);
-
-  LOG(ERROR) << "Bucket-chained time: "
-             << PrettyPrinter::Print(std_time, TUnit::CPU_TICKS);
-  return 0;
-}
diff --git a/be/src/experiments/hashing/prefetch/growing-test.cc 
b/be/src/experiments/hashing/prefetch/growing-test.cc
deleted file mode 100644
index 348d4c839..000000000
--- a/be/src/experiments/hashing/prefetch/growing-test.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "hash-store.h"
-#include "standard-hash-table.h"
-#include "tuple-types.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-namespace impala {
-
-// Tests performance of hash aggregation with HashStore, which grows to 
contain multiple
-// hash tables as needed. Allows exploring how buffer size and size of the 
keyspace
-// affect performance.
-class GrowingTest : public testing::Test {
- public:
-  static const uint64_t NUM_PROBE_TUPLES = 100000000; // 10^8
-
-  template <int buffer_size>
-  inline static uint64_t AggregateTest(uint64_t num_probe_tuples, int max_id,
-                                       int num_tables) {
-    ProbeTuple* tuples = GenTuples(num_probe_tuples, max_id);
-    HashStore<buffer_size> hs;
-    StopWatch watch;
-    watch.Start();
-    hs.Aggregate(tuples, num_probe_tuples);
-    watch.Stop();
-    SanityCheck<buffer_size>(hs, num_probe_tuples, max_id, num_tables);
-    free(tuples);
-    return watch.Ticks();
-  }
-
-  // Confirm that hs appears to be the correct result of aggregating 
num_probe_tuples,
-  // with a keyspace of size max_id, with a fanout into num_tables tables.
-  template <int buffer_size>
-  static void SanityCheck(const HashStore<buffer_size> & hs,
-                          uint64_t num_probe_tuples, int max_id, int 
num_tables) {
-    uint64_t total_count = 0;
-    for (int i = 0; i < hs.num_tables_; ++i) {
-      StandardHashTable& ht = hs.tables_[i];
-      for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); 
it.Next()) {
-        total_count += it.GetRow()->count;
-      }
-    }
-    ASSERT_EQ(num_probe_tuples, total_count);
-    ASSERT_EQ(num_tables, hs.num_tables_)
-      << "You do not have the number of tables that you hoped.\n"
-      << "This could mean that there weren't enough probe tuples to fill the 
keyspace, "
-      << "skewed hashing lead a hash table that we expected not to overflow to 
overflow, "
-      << "or a genuine bug.";
-    if (buffer_size > 0) {
-      ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
-    }
-  }
-
-  template <int buffer_size>
-  inline static uint64_t NextTestCase(int build_tuples, uint64_t prev, int 
num_tables) {
-    uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, build_tuples, 
num_tables);
-    int64_t delta;
-    if (prev == 0) {
-      // just print 0 for the delta the first time.
-      delta = 0;
-    }
-    else {
-      delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
-    }
-    LOG(ERROR) << build_tuples << "\t"
-               << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
-               << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
-    return time;
-  }
-
-  // Run a test aggregation with buffers of size buffer_size bytes.
-  // Try the full range of working set size (and thus fanout) that we can do 
with
-  // single-level fanout.
-  template <int buffer_size>
-  inline static void Test() {
-    LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " 
tuples ("
-               << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
-               << "):";
-    LOG(ERROR) << "#BuildTuples\tTime\tdTime";
-    uint64_t prev = 0;
-    // only go up to 2^12 hashtables because after that, there are at least as 
many
-    // buffers as L2 cache lines. We won't let this happen; we'll do 
multi-level fanout.
-    // Note that we should probably allow 2 cache lines per buffer (since a 
write might
-    // span a line break), plus we need some memory aside from the buffers, so 
we'll
-    // actually stop at 2^11, or maybe 2^12. And we might want to keep them in 
L1,
-    // in which case we'll stop at 2^7 or so.
-    for (int num_tables = 1; num_tables <= 1<<12; num_tables *= 2) {
-      // how many total build tuples we'll use to fill num_tables tables
-      // Needs to be comfortably less than the total capacity of num_tables 
tables
-      // (the below expression without the constant factor multiplied by it)
-      // because the hashes will not be spread perfectly.
-      // But should be more than 1/2 of that expression because otherwise if 
the hashes
-      // spread out perfectly, it will fit in num_tables / 2 and not give us 
the fanout
-      // we expect. (A test will fail in this case so that we know.)
-      int build_tuples = (StandardHashTable::NODES * num_tables / 10) * 8;
-      prev = NextTestCase<buffer_size>(build_tuples, prev, num_tables);
-    }
-  }
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  ::testing::InitGoogleTest(&argc, argv);
-  CpuInfo::Init();
-  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
-  return RUN_ALL_TESTS();
-}
-
-TEST(GrowingTest, All) {
-  // test without buffers
-  GrowingTest::Test<0>();
-  // test with one of the best buffer sizes
-  // Make it slightly more than a power of 2 cache lines so that they are 
offset.
-  GrowingTest::Test< (1 << 13) + 3 * 64 >();
-}
diff --git a/be/src/experiments/hashing/split-benchmarks/cache-hash-test.cc 
b/be/src/experiments/hashing/split-benchmarks/cache-hash-test.cc
deleted file mode 100644
index fa3180aaa..000000000
--- a/be/src/experiments/hashing/split-benchmarks/cache-hash-test.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-
-#include <glog/logging.h>
-#include <vector>
-
-#include "cache-hash-table.h"
-#include "cache-hash-table.inline.h"
-#include "standard-hash-table.h"
-#include "standard-hash-table.inline.h"
-#include "tuple-types.h"
-#include "runtime/mem-pool.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/hash-util.h"
-#include "util/runtime-profile.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-// Very basic hash aggregation prototype and test
-// TODO: Generalize beyond hash aggregation, beyond hashing on the one column, 
etc.
-
-CacheHashTable::CacheHashTable() {
-  num_content_allocated_ = 0;
-}
-
-void CacheHashTable::BucketSizeDistribution() {
-  std::vector<int> bucket_size;
-  for (int i = 0; i < BUCKETS; ++i) {
-    int size = buckets_[i].count;
-    if (size >= bucket_size.size()) {
-      // grow bucket_size to fit this size
-      bucket_size.resize(size + 1, 0);
-    }
-    ++bucket_size[size];
-  }
-
-  std::stringstream distr;
-  for (int i = 0; i < bucket_size.size(); ++i) {
-    distr << i << ": " << bucket_size[i] << "\n";
-  }
-  LOG(INFO) << "Bucket Size Distribution\n" << distr.str();
-}
-
-
-// Update ht, which is doing a COUNT(*) GROUP BY id,
-// by having it process the new tuple probe.
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-inline void Process(T* ht, const ProbeTuple* probe) {
-  BuildTuple *existing = ht->Find(probe);
-  if (existing != NULL) {
-    ++existing->count;
-  } else {
-    BuildTuple build;
-    build.id = probe->id;
-    build.count = 1;
-    ht->Insert(&build);
-  }
-}
-
-// Test ht by aggregating input, which is an array of num_tuples ProbeTuples
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-uint64_t Test(T* ht, const ProbeTuple* input, uint64_t num_tuples)
-{
-  StopWatch time;
-  time.Start();
-  for (int i = 0; i < num_tuples; ++i) {
-    Process<T>(ht, &input[i]);
-  }
-  time.Stop();
-  return time.Ticks();
-}
-
-int main(int argc, char **argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-
-  srand(time(NULL));
-
-  const int NUM_TUPLES = 100000000; //10^8
-  const int NUM_BUILD_TUPLES = 4 * CacheHashTable::MaxBuildTuples() / 10;
-
-  CacheHashTable cache_ht;
-  StandardHashTable std_ht;
-
-  ProbeTuple* input = GenTuples(NUM_TUPLES, NUM_BUILD_TUPLES);
-  uint64_t cache_time = Test<CacheHashTable>(&cache_ht, input, NUM_TUPLES);
-  LOG(ERROR) << "Cache-aware time: "
-             << PrettyPrinter::Print(cache_time, TUnit::CPU_TICKS);
-  uint64_t std_time = Test<StandardHashTable>(&std_ht, input, NUM_TUPLES);
-
-  LOG(ERROR) << "Bucket-chained time: "
-             << PrettyPrinter::Print(std_time, TUnit::CPU_TICKS);
-  return 0;
-}
diff --git a/be/src/experiments/hashing/split-benchmarks/growing-test.cc 
b/be/src/experiments/hashing/split-benchmarks/growing-test.cc
deleted file mode 100644
index 348d4c839..000000000
--- a/be/src/experiments/hashing/split-benchmarks/growing-test.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "hash-store.h"
-#include "standard-hash-table.h"
-#include "tuple-types.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-namespace impala {
-
-// Tests performance of hash aggregation with HashStore, which grows to 
contain multiple
-// hash tables as needed. Allows exploring how buffer size and size of the 
keyspace
-// affect performance.
-class GrowingTest : public testing::Test {
- public:
-  static const uint64_t NUM_PROBE_TUPLES = 100000000; // 10^8
-
-  template <int buffer_size>
-  inline static uint64_t AggregateTest(uint64_t num_probe_tuples, int max_id,
-                                       int num_tables) {
-    ProbeTuple* tuples = GenTuples(num_probe_tuples, max_id);
-    HashStore<buffer_size> hs;
-    StopWatch watch;
-    watch.Start();
-    hs.Aggregate(tuples, num_probe_tuples);
-    watch.Stop();
-    SanityCheck<buffer_size>(hs, num_probe_tuples, max_id, num_tables);
-    free(tuples);
-    return watch.Ticks();
-  }
-
-  // Confirm that hs appears to be the correct result of aggregating 
num_probe_tuples,
-  // with a keyspace of size max_id, with a fanout into num_tables tables.
-  template <int buffer_size>
-  static void SanityCheck(const HashStore<buffer_size> & hs,
-                          uint64_t num_probe_tuples, int max_id, int 
num_tables) {
-    uint64_t total_count = 0;
-    for (int i = 0; i < hs.num_tables_; ++i) {
-      StandardHashTable& ht = hs.tables_[i];
-      for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); 
it.Next()) {
-        total_count += it.GetRow()->count;
-      }
-    }
-    ASSERT_EQ(num_probe_tuples, total_count);
-    ASSERT_EQ(num_tables, hs.num_tables_)
-      << "You do not have the number of tables that you hoped.\n"
-      << "This could mean that there weren't enough probe tuples to fill the 
keyspace, "
-      << "skewed hashing lead a hash table that we expected not to overflow to 
overflow, "
-      << "or a genuine bug.";
-    if (buffer_size > 0) {
-      ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
-    }
-  }
-
-  template <int buffer_size>
-  inline static uint64_t NextTestCase(int build_tuples, uint64_t prev, int 
num_tables) {
-    uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, build_tuples, 
num_tables);
-    int64_t delta;
-    if (prev == 0) {
-      // just print 0 for the delta the first time.
-      delta = 0;
-    }
-    else {
-      delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
-    }
-    LOG(ERROR) << build_tuples << "\t"
-               << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
-               << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
-    return time;
-  }
-
-  // Run a test aggregation with buffers of size buffer_size bytes.
-  // Try the full range of working set size (and thus fanout) that we can do 
with
-  // single-level fanout.
-  template <int buffer_size>
-  inline static void Test() {
-    LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " 
tuples ("
-               << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
-               << "):";
-    LOG(ERROR) << "#BuildTuples\tTime\tdTime";
-    uint64_t prev = 0;
-    // only go up to 2^12 hashtables because after that, there are at least as 
many
-    // buffers as L2 cache lines. We won't let this happen; we'll do 
multi-level fanout.
-    // Note that we should probably allow 2 cache lines per buffer (since a 
write might
-    // span a line break), plus we need some memory aside from the buffers, so 
we'll
-    // actually stop at 2^11, or maybe 2^12. And we might want to keep them in 
L1,
-    // in which case we'll stop at 2^7 or so.
-    for (int num_tables = 1; num_tables <= 1<<12; num_tables *= 2) {
-      // how many total build tuples we'll use to fill num_tables tables
-      // Needs to be comfortably less than the total capacity of num_tables 
tables
-      // (the below expression without the constant factor multiplied by it)
-      // because the hashes will not be spread perfectly.
-      // But should be more than 1/2 of that expression because otherwise if 
the hashes
-      // spread out perfectly, it will fit in num_tables / 2 and not give us 
the fanout
-      // we expect. (A test will fail in this case so that we know.)
-      int build_tuples = (StandardHashTable::NODES * num_tables / 10) * 8;
-      prev = NextTestCase<buffer_size>(build_tuples, prev, num_tables);
-    }
-  }
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  ::testing::InitGoogleTest(&argc, argv);
-  CpuInfo::Init();
-  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
-  return RUN_ALL_TESTS();
-}
-
-TEST(GrowingTest, All) {
-  // test without buffers
-  GrowingTest::Test<0>();
-  // test with one of the best buffer sizes
-  // Make it slightly more than a power of 2 cache lines so that they are 
offset.
-  GrowingTest::Test< (1 << 13) + 3 * 64 >();
-}
diff --git 
a/be/src/experiments/hashing/split-benchmarks/partitioning-throughput-test.cc 
b/be/src/experiments/hashing/split-benchmarks/partitioning-throughput-test.cc
deleted file mode 100644
index 559267516..000000000
--- 
a/be/src/experiments/hashing/split-benchmarks/partitioning-throughput-test.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <emmintrin.h>
-#include <stdlib.h>
-
-#include <glog/logging.h>
-
-#include "tuple-types.h"
-#include "common/compiler-util.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/stopwatch.h"
-
-#define STREAMING true
-
-using namespace impala;
-
-namespace impala {
-
-// Tests the throughput of simply partitioning tuples from one stream into many
-// with no other processing.
-class PartitioningThroughputTest  {
- public:
-  // There will be 2^FANOUT_BITS buffers
-  static const uint64_t FANOUT_BITS = 6;
-  static const uint64_t NUM_BUFFERS = 1<<FANOUT_BITS;
-  // How many bytes of data to partition
-  static const uint64_t DATA_BYTES = 1<<30; // 2GB
-  static const uint64_t DATA_TUPLES = DATA_BYTES / sizeof(ProbeTuple);
-  // How many bytes each buffer will hold
-  // Twice as much as needed if randomness is perfect.
-  static const uint64_t BUFFER_BYTES = DATA_BYTES / NUM_BUFFERS * 2;
-  static const uint64_t BUFFER_TUPLES = BUFFER_BYTES / sizeof(ProbeTuple);
-
-  static const int STREAMING_BUFFER_TUPLES = 8 * 4; // 4 cache linesp
-
-  struct Buffer {
-    ProbeTuple tuples[BUFFER_TUPLES];
-    uint64_t count;
-    // offset by 7 cache lines
-    uint8_t offset[7 * 64 - sizeof(uint64_t)];
-
-    Buffer() {
-      count = 0;
-    }
-  } __attribute__((__packed__)) __attribute__((aligned(64)));
-
-  struct BufferBuffer {
-    ProbeTuple tuples[STREAMING_BUFFER_TUPLES];
-    int count;
-    uint8_t padding[64 - sizeof(int)];
-
-    BufferBuffer() {
-      count = 0;
-    }
-  } __attribute__((__packed__)) __attribute__((aligned(64)));
-
-#if STREAMING
-  inline void BufferTuple(const ProbeTuple* tuple, Buffer* buffer) {
-    BufferBuffer* buffer_buffer = &buffer_buffers_[tuple->id];
-    DCHECK_LT(buffer_buffer->count, STREAMING_BUFFER_TUPLES);
-    buffer_buffer->tuples[buffer_buffer->count++] = *tuple;
-    if (UNLIKELY(buffer_buffer->count == STREAMING_BUFFER_TUPLES)) {
-      DCHECK_LE(buffer->count + buffer_buffer->count, BUFFER_TUPLES);
-      // Do a streaming write of streaming_tuples
-      __m128i* buffer_write_ptr = (__m128i*)&buffer->tuples[buffer->count];
-      // TODO code very dependent on size of ProbeTuple.
-      DCHECK_EQ(buffer_buffer->count % 2, 0);
-      for (int i = 0; i < buffer_buffer->count; i += 2) {
-        __m128i content = _mm_set_epi64x(*(long long*) (buffer_buffer->tuples 
+ i),
-                                         *(long long*) (buffer_buffer->tuples 
+ i + 1));
-        _mm_stream_si128(buffer_write_ptr + i/2, content);
-      }
-      buffer->count += buffer_buffer->count;
-      buffer_buffer->count = 0;
-    }
-  }
-#endif
-
-  void TestThroughput() {
-    // align allocations.
-    bool fail = posix_memalign((void**)&buffers_, __alignof(*buffers_), 
sizeof(*buffers_) * NUM_BUFFERS);
-    CHECK(!fail);
-    fail = posix_memalign((void**)&buffer_buffers_, 
__alignof(*buffer_buffers_), sizeof(*buffers_) * NUM_BUFFERS);
-    CHECK(!fail);
-    CHECK_EQ(((long)buffers_) % 64, 0);
-    for (int i = 0; i < NUM_BUFFERS; ++i) {
-      new (buffers_ + i) Buffer();
-      new (buffer_buffers_ + i) BufferBuffer();
-    }
-    ProbeTuple* tuples = GenTuples(DATA_TUPLES, NUM_BUFFERS);
-    StopWatch watch;
-    watch.Start();
-    for (uint64_t i = 0; i < DATA_TUPLES; ++i) {
-      const ProbeTuple* tuple = &tuples[i];
-      Buffer* buffer = &buffers_[tuple->id];
-#if STREAMING
-      BufferTuple(tuple, buffer);
-#else
-      buffer->tuples[buffer->count++] = *tuple;
-      DCHECK_LT(buffer->count, BUFFER_TUPLES);
-#endif
-    }
-    watch.Stop();
-    LOG(ERROR) << PrettyPrinter::Print(watch.Ticks(), TUnit::CPU_TICKS);;
-    free(tuples);
-    // Note: destructors not called.
-    free(buffers_);
-    free(buffer_buffers_);
-  }
-
-  void TestRawThroughput() {
-    const int NUM_RECORDS = 1<<27;
-    int64_t* buffer = (int64_t*) malloc(sizeof(long) * NUM_RECORDS);
-    int64_t constant = 0xFA57;
-    StopWatch watch;
-    watch.Start();
-    for (int64_t i = 0; i < NUM_RECORDS; ++i) {
-      buffer[i] = constant;
-    }
-    watch.Stop();
-    LOG(ERROR) << PrettyPrinter::Print(watch.Ticks(), TUnit::CPU_TICKS);;
-    free(buffer);
-  }
-
-  
-  Buffer* buffers_;
-  BufferBuffer* buffer_buffers_;
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-  PartitioningThroughputTest test;
-  test.TestRawThroughput();
-  //test.TestThroughput();
-  return 0;
-}
diff --git a/be/src/experiments/hashing/streaming/cache-hash-test.cc 
b/be/src/experiments/hashing/streaming/cache-hash-test.cc
deleted file mode 100644
index 63b47e896..000000000
--- a/be/src/experiments/hashing/streaming/cache-hash-test.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-
-#include <glog/logging.h>
-#include <vector>
-
-#include "cache-hash-table.h"
-#include "cache-hash-table.inline.h"
-#include "standard-hash-table.h"
-#include "standard-hash-table.inline.h"
-#include "tuple-types.h"
-#include "runtime/mem-pool.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/hash-util.h"
-#include "util/runtime-profile.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-// Very basic hash aggregation prototype and test
-// TODO: Generalize beyond hash aggregation, beyond hashing on the one column, 
etc.
-
-CacheHashTable::CacheHashTable() {
-  num_content_allocated_ = 0;
-}
-
-void CacheHashTable::BucketSizeDistribution() {
-  std::vector<int> bucket_size;
-  for (int i = 0; i < BUCKETS; ++i) {
-    int size = buckets_[i].count;
-    if (size >= bucket_size.size()) {
-      // grow bucket_size to fit this size
-      bucket_size.resize(size + 1, 0);
-    }
-    ++bucket_size[size];
-  }
-
-  std::stringstream distr;
-  for (int i = 0; i < bucket_size.size(); ++i) {
-    distr << i << ": " << bucket_size[i] << "\n";
-  }
-  LOG(INFO) << "Bucket Size Distribution\n" << distr.str();
-}
-
-
-// Update ht, which is doing a COUNT(*) GROUP BY id,
-// by having it process the new tuple probe.
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-inline void Process(T* ht, const ProbeTuple* probe) {
-  BuildTuple *existing = ht->Find(probe);
-  if (existing != NULL) {
-    ++existing->count;
-  } else {
-    BuildTuple build;
-    build.id = probe->id;
-    build.count = 1;
-    ht->Insert(&build);
-  }
-}
-
-// Test ht by aggregating input, which is an array of num_tuples ProbeTuples
-// Templatized on the type of hash table so we can reuse code without virtual 
calls.
-template<typename T>
-uint64_t Test(T* ht, const ProbeTuple* input, uint64_t num_tuples)
-{
-  StopWatch time;
-  time.Start();
-  for (int i = 0; i < num_tuples; ++i) {
-    Process<T>(ht, &input[i]);
-  }
-  time.Stop();
-  return time.ElapsedTime();
-}
-
-int main(int argc, char **argv) {
-  google::InitGoogleLogging(argv[0]);
-  CpuInfo::Init();
-
-  srand(time(NULL));
-
-  const int NUM_TUPLES = 100000000; //10^8
-  const int NUM_BUILD_TUPLES = 4 * CacheHashTable::MaxBuildTuples() / 10;
-
-  CacheHashTable cache_ht;
-  StandardHashTable std_ht;
-
-  ProbeTuple* input = GenTuples(NUM_TUPLES, NUM_BUILD_TUPLES);
-  uint64_t cache_time = Test<CacheHashTable>(&cache_ht, input, NUM_TUPLES);
-  LOG(ERROR) << "Cache-aware time: "
-             << PrettyPrinter::Print(cache_time, TUnit::CPU_TICKS);
-  uint64_t std_time = Test<StandardHashTable>(&std_ht, input, NUM_TUPLES);
-
-  LOG(ERROR) << "Bucket-chained time: "
-             << PrettyPrinter::Print(std_time, TUnit::CPU_TICKS);
-  return 0;
-}
diff --git a/be/src/experiments/hashing/streaming/growing-test.cc 
b/be/src/experiments/hashing/streaming/growing-test.cc
deleted file mode 100644
index 006285777..000000000
--- a/be/src/experiments/hashing/streaming/growing-test.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "hash-store.h"
-#include "standard-hash-table.h"
-#include "tuple-types.h"
-#include "util/cpu-info.h"
-#include "util/debug-util.h"
-#include "util/pretty-printer.h"
-#include "util/stopwatch.h"
-
-using namespace impala;
-
-namespace impala {
-
-// Tests performance of hash aggregation with HashStore, which grows to 
contain multiple
-// hash tables as needed. Allows exploring how buffer size and size of the 
keyspace
-// affect performance.
-class GrowingTest : public testing::Test {
- public:
-  static const uint64_t NUM_PROBE_TUPLES = 100000000; // 10^8
-
-  template <int buffer_size, int streaming_lines>
-  inline static uint64_t AggregateTest(uint64_t num_probe_tuples, int max_id,
-                                       int num_tables) {
-    ProbeTuple* tuples = GenTuples(num_probe_tuples, max_id);
-    HashStore<buffer_size, streaming_lines> hs;
-    StopWatch watch;
-    watch.Start();
-    hs.Aggregate(tuples, num_probe_tuples);
-    watch.Stop();
-    SanityCheck<buffer_size>(hs, num_probe_tuples, max_id, num_tables);
-    free(tuples);
-    return watch.ElapsedTime();
-  }
-
-  // Confirm that hs appears to be the correct result of aggregating 
num_probe_tuples,
-  // with a keyspace of size max_id, with a fanout into num_tables tables.
-  template <int buffer_size, int streaming_lines>
-  static void SanityCheck(const HashStore<buffer_size, streaming_lines> & hs,
-                          uint64_t num_probe_tuples, int max_id, int 
num_tables) {
-    uint64_t total_count = 0;
-    for (int i = 0; i < hs.num_tables_; ++i) {
-      StandardHashTable& ht = hs.tables_[i];
-      for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); 
it.Next()) {
-        total_count += it.GetRow()->count;
-      }
-    }
-    ASSERT_EQ(num_probe_tuples, total_count);
-    ASSERT_EQ(num_tables, hs.num_tables_)
-      << "You do not have the number of tables that you hoped.\n"
-      << "This could mean that there weren't enough probe tuples to fill the 
keyspace, "
-      << "skewed hashing lead a hash table that we expected not to overflow to 
overflow, "
-      << "or a genuine bug.";
-    if (buffer_size > 0) {
-      ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size, 
streaming_lines>::Buffer));
-    }
-  }
-
-  template <int buffer_size, int streaming_lines>
-  inline static uint64_t NextTestCase(int build_tuples, uint64_t prev, int 
num_tables) {
-    uint64_t time = AggregateTest<buffer_size, 
streaming_lines>(NUM_PROBE_TUPLES, build_tuples, num_tables);
-    int64_t delta;
-    if (prev == 0) {
-      // just print 0 for the delta the first time.
-      delta = 0;
-    }
-    else {
-      delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
-    }
-    LOG(ERROR) << build_tuples << "\t"
-               << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
-               << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
-    return time;
-  }
-
-  // Run a test aggregation with buffers of size buffer_size bytes.
-  // Try the full range of working set size (and thus fanout) that we can do 
with
-  // single-level fanout.
-  template <int buffer_size, int streaming_lines>
-  inline static void Test() {
-    LOG(ERROR) << "Buffer size " << HashStore<buffer_size, 
streaming_lines>::BUFFER_SIZE << " tuples ("
-               << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
-               << "):" << streaming_lines << " lines of buffering";
-    LOG(ERROR) << "#BuildTuples\tTime\tdTime";
-    uint64_t prev = 0;
-    // only go up to 2^12 hashtables because after that, there are at least as 
many
-    // buffers as L2 cache lines. We won't let this happen; we'll do 
multi-level fanout.
-    // Note that we should probably allow 2 cache lines per buffer (since a 
write might
-    // span a line break), plus we need some memory aside from the buffers, so 
we'll
-    // actually stop at 2^11, or maybe 2^12. And we might want to keep them in 
L1,
-    // in which case we'll stop at 2^7 or so.
-    for (int num_tables = 1; num_tables <= 1<<10; num_tables *= 2) {
-      // how many total build tuples we'll use to fill num_tables tables
-      // Needs to be comfortably less than the total capacity of num_tables 
tables
-      // (the below expression without the constant factor multiplied by it)
-      // because the hashes will not be spread perfectly.
-      // But should be more than 1/2 of that expression because otherwise if 
the hashes
-      // spread out perfectly, it will fit in num_tables / 2 and not give us 
the fanout
-      // we expect. (A test will fail in this case so that we know.)
-      int build_tuples = (StandardHashTable::NODES * num_tables / 10) * 8;
-      prev = NextTestCase<buffer_size, streaming_lines>(build_tuples, prev, 
num_tables);
-    }
-  }
-};
-
-}
-
-int main(int argc, char** argv) {
-  google::InitGoogleLogging(argv[0]);
-  ::testing::InitGoogleTest(&argc, argv);
-  CpuInfo::Init();
-  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
-  return RUN_ALL_TESTS();
-}
-
-TEST(GrowingTest, All) {
-  // test without buffers
-  //GrowingTest::Test<0>();
-  // test with one of the best buffer sizes
-  // Make it slightly more than a power of 2 cache lines so that they are 
offset.
-  GrowingTest::Test< (1 << 13) + 3 * 64, 1 >();
-  GrowingTest::Test< (1 << 13) + 3 * 64, 2 >();
-  GrowingTest::Test< (1 << 13) + 3 * 64, 4 >();
-  GrowingTest::Test< (1 << 13) + 3 * 64, 8 >();
-  GrowingTest::Test< (1 << 13) + 3 * 64, 16 >();
-}

(impala) branch master updated: IMPALA-14015: Remove dead code in be/src/experiments/hashing

Reply via email to