This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new aa3328d ARROW-1844: [C++] Add initial Unique benchmarks for int64, variable-length strings aa3328d is described below commit aa3328d53dc9aa0447097d0173990ff4ac41ce75 Author: Wes McKinney <wes.mckin...@twosigma.com> AuthorDate: Wed Nov 29 10:43:20 2017 -0500 ARROW-1844: [C++] Add initial Unique benchmarks for int64, variable-length strings I also fixed a bug this surfaced in the hash table resize (unit test coverage was not adequate) Now we have ``` $ ./release/compute-benchmark Run on (8 X 4200.16 MHz CPU s) 2017-11-28 18:33:53 Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------- BM_BuildDictionary/min_time:1.000 1352 us 1352 us 1038 2.88639GB/s BM_BuildStringDictionary/min_time:1.000 3994 us 3994 us 351 75.5809MB/s BM_UniqueInt64NoNulls/16M/50/min_time:1.000/real_time 35814 us 35816 us 39 3.49023GB/s BM_UniqueInt64NoNulls/16M/1024/min_time:1.000/real_time 119656 us 119660 us 12 1069.73MB/s BM_UniqueInt64NoNulls/16M/10k/min_time:1.000/real_time 174924 us 174930 us 8 731.747MB/s BM_UniqueInt64NoNulls/16M/1024k/min_time:1.000/real_time 448425 us 448440 us 3 285.443MB/s BM_UniqueInt64WithNulls/16M/50/min_time:1.000/real_time 49511 us 49513 us 29 2.52468GB/s BM_UniqueInt64WithNulls/16M/1024/min_time:1.000/real_time 134519 us 134523 us 10 951.541MB/s BM_UniqueInt64WithNulls/16M/10k/min_time:1.000/real_time 191331 us 191336 us 7 668.999MB/s BM_UniqueInt64WithNulls/16M/1024k/min_time:1.000/real_time 533597 us 533613 us 3 239.882MB/s BM_UniqueString10bytes/16M/50/min_time:1.000/real_time 150731 us 150736 us 9 1061.5MB/s BM_UniqueString10bytes/16M/1024/min_time:1.000/real_time 256929 us 256938 us 5 622.739MB/s BM_UniqueString10bytes/16M/10k/min_time:1.000/real_time 414412 us 414426 us 3 386.09MB/s BM_UniqueString10bytes/16M/1024k/min_time:1.000/real_time 1744253 us 1744308 us 1 91.7298MB/s BM_UniqueString100bytes/16M/50/min_time:1.000/real_time 563890 us 563909 us 2 2.77093GB/s BM_UniqueString100bytes/16M/1024/min_time:1.000/real_time 704695 us 704720 us 2 2.21727GB/s BM_UniqueString100bytes/16M/10k/min_time:1.000/real_time 995685 us 995721 us 2 1.56927GB/s BM_UniqueString100bytes/16M/1024k/min_time:1.000/real_time 3584108 us 3584230 us 1 446.415MB/s ``` We can also refactor the hash table implementations without worrying too much about whether we're making things slower Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #1370 from wesm/ARROW-1844 and squashes the following commits: 638f1a11 [Wes McKinney] Decrease resize load factor to 0.5 2885c645 [Wes McKinney] Multiply bytes processed by state.iterations() f7b36194 [Wes McKinney] Add initial Unique benchmarks for int64, strings --- cpp/src/arrow/compute/compute-benchmark.cc | 127 ++++++++++++++++++++++++++++- cpp/src/arrow/compute/compute-test.cc | 4 +- cpp/src/arrow/compute/kernels/hash.cc | 4 +- 3 files changed, 129 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index 974fffc..aa7d899 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -81,8 +81,131 @@ static void BM_BuildStringDictionary( state.SetBytesProcessed(state.iterations() * total_bytes); } -BENCHMARK(BM_BuildDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildStringDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); +template <typename Type> +struct HashParams { + using T = typename Type::c_type; + + double null_percent; + + void GenerateTestData(const int64_t length, const int64_t num_unique, + std::shared_ptr<Array>* arr) const { + std::vector<int64_t> draws; + std::vector<T> values; + std::vector<bool> is_valid; + test::randint<int64_t>(length, 0, num_unique, &draws); + for (int64_t draw : draws) { + values.push_back(draw); + } + + if (this->null_percent > 0) { + test::random_is_valid(length, this->null_percent, &is_valid); + ArrayFromVector<Type, T>(is_valid, values, arr); + } else { + ArrayFromVector<Type, T>(values, arr); + } + } + + int64_t GetBytesProcessed(int64_t length) const { return length * sizeof(T); } +}; + +template <> +struct HashParams<StringType> { + double null_percent; + int32_t byte_width; + void GenerateTestData(const int64_t length, const int64_t num_unique, + std::shared_ptr<Array>* arr) const { + std::vector<int64_t> draws; + test::randint<int64_t>(length, 0, num_unique, &draws); + + const int64_t total_bytes = this->byte_width * num_unique; + std::vector<uint8_t> uniques(total_bytes); + const uint32_t seed = 0; + test::random_bytes(total_bytes, seed, uniques.data()); + + std::vector<bool> is_valid; + if (this->null_percent > 0) { + test::random_is_valid(length, this->null_percent, &is_valid); + } + + StringBuilder builder; + for (int64_t i = 0; i < length; ++i) { + if (this->null_percent == 0 || is_valid[i]) { + ABORT_NOT_OK(builder.Append(uniques.data() + this->byte_width * draws[i], + this->byte_width)); + } else { + ABORT_NOT_OK(builder.AppendNull()); + } + } + ABORT_NOT_OK(builder.Finish(arr)); + } + + int64_t GetBytesProcessed(int64_t length) const { return length * byte_width; } +}; + +template <typename ParamType> +void BenchUnique(benchmark::State& state, const ParamType& params, int64_t length, + int64_t num_unique) { + std::shared_ptr<Array> arr; + params.GenerateTestData(length, num_unique, &arr); + + FunctionContext ctx; + while (state.KeepRunning()) { + std::shared_ptr<Array> out; + ABORT_NOT_OK(Unique(&ctx, Datum(arr), &out)); + } + state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); +} + +template <typename ParamType> +void BenchDictionaryEncode(benchmark::State& state, const ParamType& params, + int64_t length, int64_t num_unique) { + std::shared_ptr<Array> arr; + params.GenerateTestData(length, num_unique, &arr); + + FunctionContext ctx; + while (state.KeepRunning()) { + Datum out; + ABORT_NOT_OK(DictionaryEncode(&ctx, Datum(arr), &out)); + } + state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); +} + +static void BM_UniqueInt64NoNulls(benchmark::State& state) { + BenchUnique(state, HashParams<Int64Type>{0}, state.range(0), state.range(1)); +} + +static void BM_UniqueInt64WithNulls(benchmark::State& state) { + BenchUnique(state, HashParams<Int64Type>{0.05}, state.range(0), state.range(1)); +} + +static void BM_UniqueString10bytes(benchmark::State& state) { + // Byte strings with 10 bytes each + BenchUnique(state, HashParams<StringType>{0.05, 10}, state.range(0), state.range(1)); +} + +static void BM_UniqueString100bytes(benchmark::State& state) { + // Byte strings with 100 bytes each + BenchUnique(state, HashParams<StringType>{0.05, 100}, state.range(0), state.range(1)); +} + +BENCHMARK(BM_BuildDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_BuildStringDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); + +constexpr int64_t kHashBenchmarkLength = 1 << 24; + +#define ADD_HASH_ARGS(WHAT) \ + WHAT->Args({kHashBenchmarkLength, 50}) \ + ->Args({kHashBenchmarkLength, 1 << 10}) \ + ->Args({kHashBenchmarkLength, 10 * 1 << 10}) \ + ->Args({kHashBenchmarkLength, 1 << 20}) \ + ->MinTime(1.0) \ + ->Unit(benchmark::kMicrosecond) \ + ->UseRealTime() + +ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64NoNulls)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64WithNulls)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueString10bytes)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueString100bytes)); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc index c73bfa3..84af8f7 100644 --- a/cpp/src/arrow/compute/compute-test.cc +++ b/cpp/src/arrow/compute/compute-test.cc @@ -869,8 +869,8 @@ TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) { return; } - const int64_t kTotalValues = 10000; - const int64_t kRepeats = 10; + const int64_t kTotalValues = 1000000; + const int64_t kRepeats = 5; vector<T> values; vector<T> uniques; diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index 66c9073..750f1d3 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -43,7 +43,7 @@ typedef int32_t hash_slot_t; static constexpr hash_slot_t kHashSlotEmpty = std::numeric_limits<int32_t>::max(); // The maximum load factor for the hash table before resizing. -static constexpr double kMaxHashTableLoad = 0.7; +static constexpr double kMaxHashTableLoad = 0.5; enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; @@ -260,7 +260,7 @@ struct HashDictionary<Type, enable_if_has_c_type<Type>> { COMPUTE_HASH; \ while (kHashSlotEmpty != new_hash_slots[j]) { \ ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ + if (ARROW_PREDICT_FALSE(j == new_size)) { \ j = 0; \ } \ } \ -- To stop receiving notification emails like this one, please contact ['"commits@arrow.apache.org" <commits@arrow.apache.org>'].