Force inlining of BloomFilter::MakeMask I noticed that this function was showing up in perf top for TPC-H Q8 running locally. It wasn't inlined into BloomFilter::BucketFindAVX2. Inlining made the query ~5% faster for me locally.
Change-Id: I89282f6c315570bea5ad8a0f854cb6eea0592923 Reviewed-on: http://gerrit.cloudera.org:8080/9214 Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/9e887b0a Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/9e887b0a Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/9e887b0a Branch: refs/heads/2.x Commit: 9e887b0aa0c58d79b3a95a8f9097ee09d33a6d19 Parents: 83f9650 Author: Tim Armstrong <tarmstr...@cloudera.com> Authored: Mon Feb 5 10:57:34 2018 -0800 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Thu Feb 8 07:01:53 2018 +0000 ---------------------------------------------------------------------- be/src/benchmarks/bloom-filter-benchmark.cc | 167 ++++++++++++----------- be/src/util/bloom-filter.h | 3 +- 2 files changed, 86 insertions(+), 84 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/9e887b0a/be/src/benchmarks/bloom-filter-benchmark.cc ---------------------------------------------------------------------- diff --git a/be/src/benchmarks/bloom-filter-benchmark.cc b/be/src/benchmarks/bloom-filter-benchmark.cc index c9eef7e..6728c42 100644 --- a/be/src/benchmarks/bloom-filter-benchmark.cc +++ b/be/src/benchmarks/bloom-filter-benchmark.cc @@ -42,70 +42,71 @@ using namespace impala; // As in bloom-filter.h, ndv refers to the number of unique items inserted into a filter // and fpp is the probability of false positives. // -// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz +// +// Machine Info: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz // // initialize: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// ndv 10k fpp 10.0% 5.92e+03 5.98e+03 6.03e+03 1X 1X 1X -// ndv 10k fpp 1.0% 3.17e+03 3.24e+03 3.26e+03 0.535X 0.542X 0.541X -// ndv 10k fpp 0.1% 1.16e+03 1.17e+03 1.18e+03 0.195X 0.195X 0.195X -// ndv 1000k fpp 10.0% 3.85 3.93 3.93 0.000651X 0.000657X 0.000652X -// ndv 1000k fpp 1.0% 2.08 2.12 2.12 0.000351X 0.000354X 0.000351X -// ndv 1000k fpp 0.1% 2.08 2.12 2.12 0.000351X 0.000354X 0.000351X -// ndv 100000k fpp 10.0% 0.0299 0.0304 0.031 5.06e-06X 5.09e-06X 5.14e-06X -// ndv 100000k fpp 1.0% 0.0295 0.0306 0.0311 4.98e-06X 5.12e-06X 5.15e-06X -// ndv 100000k fpp 0.1% 0.0151 0.0153 0.0154 2.55e-06X 2.55e-06X 2.55e-06X +// ndv 10k fpp 10.0% 5.77e+03 5.81e+03 5.84e+03 1X 1X 1X +// ndv 10k fpp 1.0% 3.08e+03 3.1e+03 3.13e+03 0.534X 0.534X 0.536X +// ndv 10k fpp 0.1% 1.24e+03 1.25e+03 1.27e+03 0.216X 0.216X 0.217X +// ndv 1000k fpp 10.0% 4.71 4.71 4.71 0.000816X 0.000811X 0.000805X +// ndv 1000k fpp 1.0% 2.31 2.35 2.35 0.0004X 0.000405X 0.000403X +// ndv 1000k fpp 0.1% 2.35 2.35 2.35 0.000408X 0.000405X 0.000403X +// ndv 100000k fpp 10.0% 0.0926 0.0935 0.0935 1.61e-05X 1.61e-05X 1.6e-05X +// ndv 100000k fpp 1.0% 0.0926 0.0935 0.0935 1.61e-05X 1.61e-05X 1.6e-05X +// ndv 100000k fpp 0.1% 0.0481 0.0481 0.0481 8.33e-06X 8.28e-06X 8.23e-06X // // With AVX2: // // insert: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// ndv 10k fpp 10.0% 1.22e+05 1.23e+05 1.24e+05 1X 1X 1X -// ndv 10k fpp 1.0% 1.22e+05 1.23e+05 1.24e+05 0.998X 1X 1X -// ndv 10k fpp 0.1% 1.22e+05 1.23e+05 1.24e+05 1X 1X 1X -// ndv 1000k fpp 10.0% 1.16e+05 1.18e+05 1.2e+05 0.95X 0.964X 0.965X -// ndv 1000k fpp 1.0% 1.14e+05 1.15e+05 1.16e+05 0.935X 0.941X 0.939X -// ndv 1000k fpp 0.1% 1.14e+05 1.16e+05 1.17e+05 0.939X 0.945X 0.943X -// ndv 100000k fpp 10.0% 3.35e+04 4.22e+04 5.3e+04 0.275X 0.344X 0.428X -// ndv 100000k fpp 1.0% 3.16e+04 4.77e+04 5.78e+04 0.26X 0.388X 0.466X -// ndv 100000k fpp 0.1% 3e+04 3.7e+04 4.66e+04 0.246X 0.301X 0.376X +// ndv 10k fpp 10.0% 2.1e+05 2.11e+05 2.13e+05 1X 1X 1X +// ndv 10k fpp 1.0% 2.16e+05 2.18e+05 2.19e+05 1.03X 1.03X 1.03X +// ndv 10k fpp 0.1% 2.12e+05 2.14e+05 2.16e+05 1.01X 1.01X 1.01X +// ndv 1000k fpp 10.0% 1.98e+05 1.99e+05 2.01e+05 0.943X 0.942X 0.945X +// ndv 1000k fpp 1.0% 1.96e+05 1.98e+05 1.99e+05 0.935X 0.936X 0.937X +// ndv 1000k fpp 0.1% 1.96e+05 1.97e+05 1.99e+05 0.935X 0.934X 0.936X +// ndv 100000k fpp 10.0% 5.63e+04 5.8e+04 6.18e+04 0.269X 0.274X 0.291X +// ndv 100000k fpp 1.0% 5.64e+04 5.84e+04 6.24e+04 0.269X 0.276X 0.293X +// ndv 100000k fpp 0.1% 5.56e+04 5.75e+04 5.86e+04 0.265X 0.272X 0.275X // // find: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// present ndv 10k fpp 10.0% 1.16e+05 1.17e+05 1.18e+05 1X 1X 1X -// absent ndv 10k fpp 10.0% 1.15e+05 1.17e+05 1.18e+05 0.996X 0.998X 1X -// present ndv 10k fpp 1.0% 1.16e+05 1.17e+05 1.18e+05 0.999X 0.996X 1X -// absent ndv 10k fpp 1.0% 1.16e+05 1.17e+05 1.18e+05 1X 0.998X 0.999X -// present ndv 10k fpp 0.1% 1.16e+05 1.17e+05 1.18e+05 0.999X 0.997X 0.997X -// absent ndv 10k fpp 0.1% 1.16e+05 1.17e+05 1.18e+05 1X 0.996X 0.998X -// present ndv 1000k fpp 10.0% 1.09e+05 1.12e+05 1.14e+05 0.936X 0.958X 0.964X -// absent ndv 1000k fpp 10.0% 1.07e+05 1.14e+05 1.15e+05 0.921X 0.976X 0.976X -// present ndv 1000k fpp 1.0% 1.05e+05 1.1e+05 1.12e+05 0.906X 0.943X 0.946X -// absent ndv 1000k fpp 1.0% 1.11e+05 1.13e+05 1.14e+05 0.961X 0.966X 0.969X -// present ndv 1000k fpp 0.1% 9.78e+04 1.11e+05 1.12e+05 0.844X 0.944X 0.946X -// absent ndv 1000k fpp 0.1% 1.08e+05 1.13e+05 1.14e+05 0.93X 0.967X 0.97X -// present ndv 100000k fpp 10.0% 3.85e+04 4.53e+04 6.12e+04 0.332X 0.387X 0.518X -// absent ndv 100000k fpp 10.0% 2.54e+04 3.01e+04 3.26e+04 0.219X 0.257X 0.276X -// present ndv 100000k fpp 1.0% 3.3e+04 4.5e+04 6.06e+04 0.284X 0.384X 0.514X -// absent ndv 100000k fpp 1.0% 2.67e+04 3.01e+04 3.2e+04 0.23X 0.257X 0.271X -// present ndv 100000k fpp 0.1% 3.12e+04 4.25e+04 5.15e+04 0.269X 0.362X 0.436X -// absent ndv 100000k fpp 0.1% 2.39e+04 2.69e+04 2.84e+04 0.206X 0.229X 0.24X +// present ndv 10k fpp 10.0% 1.97e+05 1.98e+05 1.99e+05 1X 1X 1X +// absent ndv 10k fpp 10.0% 1.99e+05 2.01e+05 2.03e+05 1.01X 1.01X 1.02X +// present ndv 10k fpp 1.0% 1.97e+05 1.98e+05 2e+05 1X 1X 1X +// absent ndv 10k fpp 1.0% 2e+05 2.01e+05 2.03e+05 1.02X 1.02X 1.02X +// present ndv 10k fpp 0.1% 1.97e+05 1.99e+05 2e+05 1X 1X 1X +// absent ndv 10k fpp 0.1% 2e+05 2.02e+05 2.03e+05 1.02X 1.02X 1.02X +// present ndv 1000k fpp 10.0% 1.75e+05 1.77e+05 1.78e+05 0.891X 0.893X 0.893X +// absent ndv 1000k fpp 10.0% 1.78e+05 1.8e+05 1.81e+05 0.907X 0.907X 0.907X +// present ndv 1000k fpp 1.0% 1.8e+05 1.82e+05 1.83e+05 0.917X 0.917X 0.919X +// absent ndv 1000k fpp 1.0% 1.84e+05 1.86e+05 1.88e+05 0.937X 0.939X 0.941X +// present ndv 1000k fpp 0.1% 1.69e+05 1.7e+05 1.71e+05 0.857X 0.859X 0.858X +// absent ndv 1000k fpp 0.1% 1.7e+05 1.72e+05 1.74e+05 0.866X 0.87X 0.871X +// present ndv 100000k fpp 10.0% 5.34e+04 5.53e+04 7.21e+04 0.271X 0.279X 0.362X +// absent ndv 100000k fpp 10.0% 5.05e+04 5.28e+04 5.52e+04 0.257X 0.267X 0.277X +// present ndv 100000k fpp 1.0% 5.43e+04 5.74e+04 8.65e+04 0.276X 0.29X 0.434X +// absent ndv 100000k fpp 1.0% 5.09e+04 5.42e+04 5.73e+04 0.259X 0.274X 0.288X +// present ndv 100000k fpp 0.1% 5.11e+04 5.24e+04 6.69e+04 0.26X 0.265X 0.336X +// absent ndv 100000k fpp 0.1% 4.93e+04 5.02e+04 5.1e+04 0.251X 0.254X 0.256X // // union: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// ndv 10k fpp 10.0% 5.43e+03 5.63e+03 5.67e+03 1X 1X 1X -// ndv 10k fpp 1.0% 2.82e+03 2.84e+03 2.87e+03 0.52X 0.505X 0.507X -// ndv 10k fpp 0.1% 780 803 812 0.144X 0.143X 0.143X -// ndv 1000k fpp 10.0% 16.2 16.5 16.7 0.00298X 0.00292X 0.00294X -// ndv 1000k fpp 1.0% 7.75 8.04 8.11 0.00143X 0.00143X 0.00143X -// ndv 1000k fpp 0.1% 7.96 8.11 8.11 0.00147X 0.00144X 0.00143X -// ndv 100000k fpp 10.0% 0.045 0.0472 0.0478 8.29e-06X 8.38e-06X 8.44e-06X -// ndv 100000k fpp 1.0% 0.045 0.0474 0.0478 8.29e-06X 8.42e-06X 8.44e-06X -// ndv 100000k fpp 0.1% 0.023 0.0235 0.0238 4.23e-06X 4.17e-06X 4.2e-06X +// ndv 10k fpp 10.0% 6.76e+05 6.8e+05 6.88e+05 1X 1X 1X +// ndv 10k fpp 1.0% 6.77e+05 6.81e+05 6.87e+05 1X 1X 0.998X +// ndv 10k fpp 0.1% 6.78e+05 6.82e+05 6.86e+05 1X 1X 0.996X +// ndv 1000k fpp 10.0% 6.78e+05 6.82e+05 6.88e+05 1X 1X 1X +// ndv 1000k fpp 1.0% 6.78e+05 6.83e+05 6.89e+05 1X 1X 1X +// ndv 1000k fpp 0.1% 6.77e+05 6.8e+05 6.89e+05 1X 1X 1X +// ndv 100000k fpp 10.0% 6.77e+05 6.81e+05 6.88e+05 1X 1X 0.999X +// ndv 100000k fpp 1.0% 6.77e+05 6.85e+05 6.89e+05 1X 1.01X 1X +// ndv 100000k fpp 0.1% 6.76e+05 6.8e+05 6.88e+05 1X 1X 1X // // // Without AVX or AVX2: @@ -113,51 +114,51 @@ using namespace impala; // insert: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// ndv 10k fpp 10.0% 9.47e+04 9.52e+04 9.6e+04 1X 1X 1X -// ndv 10k fpp 1.0% 9.45e+04 9.53e+04 9.59e+04 0.998X 1X 0.998X -// ndv 10k fpp 0.1% 9.2e+04 9.56e+04 9.64e+04 0.972X 1X 1X -// ndv 1000k fpp 10.0% 9.2e+04 9.46e+04 9.57e+04 0.972X 0.993X 0.997X -// ndv 1000k fpp 1.0% 8.49e+04 9.32e+04 9.45e+04 0.896X 0.979X 0.984X -// ndv 1000k fpp 0.1% 8.37e+04 9.35e+04 9.47e+04 0.884X 0.981X 0.986X -// ndv 100000k fpp 10.0% 4.03e+04 5.1e+04 5.83e+04 0.425X 0.536X 0.607X -// ndv 100000k fpp 1.0% 3.2e+04 3.95e+04 5.11e+04 0.337X 0.415X 0.532X -// ndv 100000k fpp 0.1% 3.82e+04 4.52e+04 5.19e+04 0.404X 0.474X 0.54X +// ndv 10k fpp 10.0% 9.07e+04 9.12e+04 9.22e+04 1X 1X 1X +// ndv 10k fpp 1.0% 9.08e+04 9.13e+04 9.21e+04 1X 1X 0.999X +// ndv 10k fpp 0.1% 9.04e+04 9.08e+04 9.15e+04 0.997X 0.996X 0.993X +// ndv 1000k fpp 10.0% 8.85e+04 8.92e+04 9e+04 0.976X 0.978X 0.976X +// ndv 1000k fpp 1.0% 8.8e+04 8.89e+04 8.94e+04 0.971X 0.975X 0.97X +// ndv 1000k fpp 0.1% 8.79e+04 8.83e+04 8.92e+04 0.97X 0.968X 0.968X +// ndv 100000k fpp 10.0% 3.64e+04 3.82e+04 4.26e+04 0.401X 0.419X 0.462X +// ndv 100000k fpp 1.0% 3.67e+04 3.94e+04 4.52e+04 0.405X 0.432X 0.491X +// ndv 100000k fpp 0.1% 3.58e+04 3.75e+04 4.58e+04 0.395X 0.411X 0.497X // // find: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// present ndv 10k fpp 10.0% 1.25e+05 1.3e+05 1.31e+05 1X 1X 1X -// absent ndv 10k fpp 10.0% 7.91e+04 7.99e+04 8.06e+04 0.633X 0.614X 0.613X -// present ndv 10k fpp 1.0% 1.26e+05 1.32e+05 1.33e+05 1.01X 1.01X 1.01X -// absent ndv 10k fpp 1.0% 9.99e+04 1.01e+05 1.02e+05 0.799X 0.779X 0.777X -// present ndv 10k fpp 0.1% 1.25e+05 1.29e+05 1.29e+05 0.999X 0.989X 0.985X -// absent ndv 10k fpp 0.1% 1.52e+05 1.66e+05 1.68e+05 1.21X 1.28X 1.28X -// present ndv 1000k fpp 10.0% 9.23e+04 9.61e+04 9.71e+04 0.739X 0.739X 0.739X -// absent ndv 1000k fpp 10.0% 5.77e+04 5.84e+04 5.88e+04 0.462X 0.449X 0.448X -// present ndv 1000k fpp 1.0% 7.25e+04 9.08e+04 9.33e+04 0.581X 0.698X 0.71X -// absent ndv 1000k fpp 1.0% 7.6e+04 8.97e+04 9.08e+04 0.608X 0.69X 0.691X -// present ndv 1000k fpp 0.1% 8.65e+04 9.35e+04 9.43e+04 0.692X 0.719X 0.717X -// absent ndv 1000k fpp 0.1% 8.33e+04 8.98e+04 9.07e+04 0.667X 0.69X 0.69X -// present ndv 100000k fpp 10.0% 2.74e+04 3.06e+04 3.37e+04 0.219X 0.236X 0.256X -// absent ndv 100000k fpp 10.0% 2.88e+04 2.98e+04 3.03e+04 0.231X 0.229X 0.231X -// present ndv 100000k fpp 1.0% 2.29e+04 2.82e+04 2.95e+04 0.184X 0.217X 0.224X -// absent ndv 100000k fpp 1.0% 2.84e+04 2.94e+04 3.01e+04 0.227X 0.226X 0.229X -// present ndv 100000k fpp 0.1% 2.34e+04 2.72e+04 3.09e+04 0.187X 0.209X 0.235X -// absent ndv 100000k fpp 0.1% 3.3e+04 3.84e+04 3.96e+04 0.264X 0.295X 0.301X +// present ndv 10k fpp 10.0% 1.34e+05 1.35e+05 1.36e+05 1X 1X 1X +// absent ndv 10k fpp 10.0% 7.83e+04 7.87e+04 7.94e+04 0.584X 0.583X 0.584X +// present ndv 10k fpp 1.0% 1.35e+05 1.36e+05 1.37e+05 1.01X 1X 1.01X +// absent ndv 10k fpp 1.0% 8.79e+04 8.84e+04 8.93e+04 0.656X 0.655X 0.657X +// present ndv 10k fpp 0.1% 1.34e+05 1.35e+05 1.36e+05 1X 1X 1X +// absent ndv 10k fpp 0.1% 1.38e+05 1.39e+05 1.4e+05 1.03X 1.03X 1.03X +// present ndv 1000k fpp 10.0% 9.6e+04 9.66e+04 9.77e+04 0.716X 0.716X 0.719X +// absent ndv 1000k fpp 10.0% 5.43e+04 5.47e+04 5.51e+04 0.405X 0.405X 0.406X +// present ndv 1000k fpp 1.0% 9.48e+04 9.56e+04 9.65e+04 0.707X 0.709X 0.711X +// absent ndv 1000k fpp 1.0% 7.95e+04 8.01e+04 8.06e+04 0.593X 0.593X 0.594X +// present ndv 1000k fpp 0.1% 9.47e+04 9.55e+04 9.64e+04 0.707X 0.708X 0.71X +// absent ndv 1000k fpp 0.1% 7.93e+04 7.98e+04 8.05e+04 0.592X 0.592X 0.592X +// present ndv 100000k fpp 10.0% 3.34e+04 3.46e+04 3.81e+04 0.249X 0.257X 0.28X +// absent ndv 100000k fpp 10.0% 3.61e+04 3.81e+04 4.04e+04 0.269X 0.282X 0.298X +// present ndv 100000k fpp 1.0% 3.86e+04 4.19e+04 4.69e+04 0.288X 0.311X 0.346X +// absent ndv 100000k fpp 1.0% 3.6e+04 3.73e+04 4.12e+04 0.268X 0.276X 0.304X +// present ndv 100000k fpp 0.1% 3.59e+04 3.74e+04 3.97e+04 0.268X 0.277X 0.292X +// absent ndv 100000k fpp 0.1% 4.82e+04 4.92e+04 5.11e+04 0.36X 0.365X 0.376X // // union: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile // (relative) (relative) (relative) // --------------------------------------------------------------------------------------------------------- -// ndv 10k fpp 10.0% 3.9e+03 3.96e+03 3.99e+03 1X 1X 1X -// ndv 10k fpp 1.0% 1.9e+03 1.95e+03 1.96e+03 0.487X 0.492X 0.491X -// ndv 10k fpp 0.1% 630 638 643 0.161X 0.161X 0.161X -// ndv 1000k fpp 10.0% 15.5 15.8 15.9 0.00397X 0.00399X 0.00399X -// ndv 1000k fpp 1.0% 7.52 7.74 7.88 0.00193X 0.00196X 0.00197X -// ndv 1000k fpp 0.1% 7.46 7.88 7.89 0.00191X 0.00199X 0.00198X -// ndv 100000k fpp 10.0% 0.0452 0.0474 0.0478 1.16e-05X 1.2e-05X 1.2e-05X -// ndv 100000k fpp 1.0% 0.0452 0.0474 0.0478 1.16e-05X 1.2e-05X 1.2e-05X -// ndv 100000k fpp 0.1% 0.0231 0.0235 0.0239 5.92e-06X 5.93e-06X 5.98e-06X - +// ndv 10k fpp 10.0% 6.77e+05 6.81e+05 6.89e+05 1X 1X 1X +// ndv 10k fpp 1.0% 6.77e+05 6.82e+05 6.87e+05 1X 1X 0.998X +// ndv 10k fpp 0.1% 6.77e+05 6.82e+05 6.89e+05 1X 1X 1X +// ndv 1000k fpp 10.0% 6.77e+05 6.8e+05 6.89e+05 0.999X 0.999X 1X +// ndv 1000k fpp 1.0% 6.77e+05 6.8e+05 6.88e+05 0.999X 0.999X 0.998X +// ndv 1000k fpp 0.1% 6.78e+05 6.82e+05 6.87e+05 1X 1X 0.997X +// ndv 100000k fpp 10.0% 6.78e+05 6.82e+05 6.87e+05 1X 1X 0.998X +// ndv 100000k fpp 1.0% 6.77e+05 6.8e+05 6.87e+05 0.999X 0.998X 0.998X +// ndv 100000k fpp 0.1% 6.77e+05 6.82e+05 6.87e+05 0.999X 1X 0.997X +// // Make a random uint32_t, avoiding the absent high bit and the low-entropy low bits // produced by rand(). uint32_t MakeRand() { http://git-wip-us.apache.org/repos/asf/impala/blob/9e887b0a/be/src/util/bloom-filter.h ---------------------------------------------------------------------- diff --git a/be/src/util/bloom-filter.h b/be/src/util/bloom-filter.h index 2e225a2..9628402 100644 --- a/be/src/util/bloom-filter.h +++ b/be/src/util/bloom-filter.h @@ -158,7 +158,8 @@ class BloomFilter { /// A helper function for the AVX2 methods. Turns a 32-bit hash into a 256-bit Bucket /// with 1 single 1-bit set in each 32-bit lane. - static __m256i MakeMask(const uint32_t hash) __attribute__((__target__("avx2"))); + static inline ALWAYS_INLINE __m256i MakeMask(const uint32_t hash) + __attribute__((__target__("avx2"))); int64_t directory_size() const { return 1uLL << (log_num_buckets_ + LOG_BUCKET_BYTE_SIZE);