This is an automated email from the ASF dual-hosted git repository. granthenke pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 2e50335112aab5ffc706414c5873400b8177bd5b Author: Todd Lipcon <[email protected]> AuthorDate: Fri Dec 6 15:14:02 2019 -0800 client: use dense_hash_set in Batcher The Batcher implementation keeps an unordered set of in-flight operations. The built-in unordered_map is not very fast. Google's dense_hash_map is noticeably faster. This sped up a client-reactor-bound benchmark by around 27%. Before: Generator report time total : 37293.8 ms time per row: 0.000466172 ms Dropping auto-created table 'default.loadgen_auto_ece2f41beef94a9fa032c77899f7e61c' Performance counter stats for './build/thinlto/bin/kudu perf loadgen localhost -num_rows_per_thread=10000000 -num_threads=8': 189,125.49 msec task-clock # 5.060 CPUs utilized 29,363 context-switches # 0.155 K/sec 2,043 cpu-migrations # 0.011 K/sec 48,405 page-faults # 0.256 K/sec 772,496,448,279 cycles # 4.085 GHz (83.33%) 129,999,474,226 stalled-cycles-frontend # 16.83% frontend cycles idle (83.36%) 300,049,388,250 stalled-cycles-backend # 38.84% backend cycles idle (83.30%) 414,415,517,571 instructions # 0.54 insn per cycle # 0.72 stalled cycles per insn (83.32%) 76,829,647,882 branches # 406.236 M/sec (83.34%) 352,749,453 branch-misses # 0.46% of all branches (83.35%) 37.376785122 seconds time elapsed 186.834651000 seconds user 2.143945000 seconds sys After: Generator report time total : 29191.6 ms time per row: 0.000364895 ms Dropping auto-created table 'default.loadgen_auto_86e36746a9ce4bf19ef2beee143c09f7' Performance counter stats for './build/thinlto/bin/kudu perf loadgen localhost -num_rows_per_thread=10000000 -num_threads=8': 175,386.48 msec task-clock # 5.993 CPUs utilized 107,552 context-switches # 0.613 K/sec 3,056 cpu-migrations # 0.017 K/sec 49,802 page-faults # 0.284 K/sec 718,173,598,221 cycles # 4.095 GHz (83.34%) 154,338,830,503 stalled-cycles-frontend # 21.49% frontend cycles idle (83.35%) 162,605,327,354 stalled-cycles-backend # 22.64% backend cycles idle (83.33%) 407,408,456,605 instructions # 0.57 insn per cycle # 0.40 stalled cycles per insn (83.33%) 76,468,794,077 branches # 436.002 M/sec (83.31%) 462,854,805 branch-misses # 0.61% of all branches (83.35%) 29.266327666 seconds time elapsed 171.921196000 seconds user 3.312785000 seconds sys Change-Id: Ieb2d190b9445b49ed752c5991146477fd793099a Reviewed-on: http://gerrit.cloudera.org:8080/14869 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> --- src/kudu/client/batcher.cc | 2 ++ src/kudu/client/batcher.h | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/kudu/client/batcher.cc b/src/kudu/client/batcher.cc index c68e77d..20ff371 100644 --- a/src/kudu/client/batcher.cc +++ b/src/kudu/client/batcher.cc @@ -598,6 +598,8 @@ Batcher::Batcher(KuduClient* client, timeout_(client->default_rpc_timeout()), outstanding_lookups_(0), buffer_bytes_used_(0) { + ops_.set_empty_key(nullptr); + ops_.set_deleted_key(reinterpret_cast<InFlightOp*>(-1)); } void Batcher::Abort() { diff --git a/src/kudu/client/batcher.h b/src/kudu/client/batcher.h index b244b28..7128d71 100644 --- a/src/kudu/client/batcher.h +++ b/src/kudu/client/batcher.h @@ -20,9 +20,11 @@ #include <cstdint> #include <mutex> #include <unordered_map> -#include <unordered_set> +#include <utility> #include <vector> +#include <sparsehash/dense_hash_set> + #include "kudu/client/client.h" #include "kudu/client/shared_ptr.h" #include "kudu/client/write_op.h" @@ -42,11 +44,10 @@ class KuduStatusCallback; namespace internal { -struct InFlightOp; - class ErrorCollector; class RemoteTablet; class WriteRpc; +struct InFlightOp; // A Batcher is the class responsible for collecting row operations, routing them to the // correct tablet server, and possibly batching them together for better efficiency. @@ -202,7 +203,7 @@ class Batcher : public RefCountedThreadSafe<Batcher> { KuduStatusCallback* flush_callback_; // All buffered or in-flight ops. - std::unordered_set<InFlightOp*> ops_; + google::dense_hash_set<InFlightOp*> ops_; // Each tablet's buffered ops. typedef std::unordered_map<RemoteTablet*, std::vector<InFlightOp*> > OpsMap; OpsMap per_tablet_ops_;
