This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 36146d56c302374f75d03b22a5076def76b24199 Author: Adar Dembo <[email protected]> AuthorDate: Fri Apr 5 10:35:34 2019 -0700 tool: perf tablet_scan action I found this action useful for benchmarking raw tserver scan performance (i.e. by removing the client from the picture). Not sure it's generally useful, but if it is, burying it in the 'perf' mode makes sense. Change-Id: Id4667dcb2d9a21d77149ebdae2e29b7fce8b460b Reviewed-on: http://gerrit.cloudera.org:8080/12945 Tested-by: Adar Dembo <[email protected]> Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/tools/kudu-tool-test.cc | 25 +++++++- src/kudu/tools/tool_action_perf.cc | 128 ++++++++++++++++++++++++++++++++++--- 2 files changed, 144 insertions(+), 9 deletions(-) diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 7a3760c..50ff0d6 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -1027,7 +1027,8 @@ TEST_F(ToolTest, TestModeHelp) { { const vector<string> kPerfRegexes = { "loadgen.*Run load generation with optional scan afterwards", - "table_scan.*Show row count and scanning time cost of tablets in a table" + "table_scan.*Show row count and scanning time cost of tablets in a table", + "tablet_scan.*Show row count of a local tablet" }; NO_FATALS(RunTestHelp("perf", kPerfRegexes)); } @@ -2204,6 +2205,28 @@ TEST_F(ToolTest, TestPerfTableScan) { NO_FATALS(RunScanTableCheck(kTableName, "", 1, 2000, {}, "perf table_scan")); } +TEST_F(ToolTest, TestPerfTabletScan) { + // Create a table. + const string& kTableName = "perf.tablet_scan"; + NO_FATALS(RunLoadgen(1, { "--keep_auto_table=true" }, kTableName)); + + // Get the list of tablets. + vector<string> tablet_ids; + TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()]; + ASSERT_OK(ListRunningTabletIds(ts, MonoDelta::FromSeconds(30), &tablet_ids)); + + // Scan the tablets using the local tool. + cluster_->Shutdown(); + for (const string& tid : tablet_ids) { + const string args = + Substitute("perf tablet_scan $0 --fs_wal_dir=$1 --fs_data_dirs=$2 --num_iters=2", + tid, cluster_->tablet_server(0)->wal_dir(), + JoinStrings(cluster_->tablet_server(0)->data_dirs(), ",")); + NO_FATALS(RunActionStdoutNone(args)); + NO_FATALS(RunActionStdoutNone(args + " --ordered_scan")); + } +} + // Test 'kudu remote_replica copy' tool when the destination tablet server is online. // 1. Test the copy tool when the destination replica is healthy // 2. Test the copy tool when the destination replica is tombstoned diff --git a/src/kudu/tools/tool_action_perf.cc b/src/kudu/tools/tool_action_perf.cc index 85f96cf..f980d86 100644 --- a/src/kudu/tools/tool_action_perf.cc +++ b/src/kudu/tools/tool_action_perf.cc @@ -186,20 +186,41 @@ #include "kudu/client/schema.h" #include "kudu/client/shared_ptr.h" #include "kudu/client/write_op.h" +#include "kudu/clock/clock.h" +#include "kudu/clock/logical_clock.h" #include "kudu/common/common.pb.h" +#include "kudu/common/iterator.h" #include "kudu/common/partial_row.h" +#include "kudu/common/rowblock.h" #include "kudu/common/schema.h" +#include "kudu/common/timestamp.h" #include "kudu/common/types.h" +#include "kudu/consensus/consensus_meta.h" +#include "kudu/consensus/consensus_meta_manager.h" +#include "kudu/consensus/log.h" +#include "kudu/consensus/log_anchor_registry.h" +#include "kudu/consensus/raft_consensus.h" +#include "kudu/fs/fs_manager.h" #include "kudu/gutil/map-util.h" +#include "kudu/gutil/ref_counted.h" #include "kudu/gutil/stl_util.h" #include "kudu/gutil/strings/strcat.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/rpc/result_tracker.h" +#include "kudu/tablet/rowset.h" +#include "kudu/tablet/tablet.h" +#include "kudu/tablet/tablet_bootstrap.h" +#include "kudu/tablet/tablet_metadata.h" +#include "kudu/tablet/tablet_replica.h" #include "kudu/tools/table_scanner.h" #include "kudu/tools/tool_action.h" #include "kudu/tools/tool_action_common.h" #include "kudu/util/decimal_util.h" +#include "kudu/util/env.h" #include "kudu/util/flag_validators.h" #include "kudu/util/int128.h" +#include "kudu/util/logging.h" +#include "kudu/util/memory/arena.h" #include "kudu/util/oid_generator.h" #include "kudu/util/random.h" #include "kudu/util/status.h" @@ -219,7 +240,16 @@ using kudu::client::KuduSchemaBuilder; using kudu::client::KuduSession; using kudu::client::KuduTable; using kudu::client::KuduTableCreator; -using kudu::client::sp::shared_ptr; +using kudu::clock::Clock; +using kudu::clock::LogicalClock; +using kudu::consensus::ConsensusBootstrapInfo; +using kudu::consensus::ConsensusMetadata; +using kudu::consensus::ConsensusMetadataManager; +using kudu::log::Log; +using kudu::log::LogAnchorRegistry; +using kudu::tablet::RowIteratorOptions; +using kudu::tablet::Tablet; +using kudu::tablet::TabletMetadata; using std::accumulate; using std::cerr; using std::cout; @@ -228,6 +258,7 @@ using std::lock_guard; using std::mutex; using std::numeric_limits; using std::ostringstream; +using std::shared_ptr; using std::string; using std::thread; using std::unique_ptr; @@ -257,11 +288,15 @@ DEFINE_bool(keep_auto_table, false, "has no effect if using already existing table " "(see the '--table_name' flag): neither the existing table " "nor its data is ever dropped/deleted."); +DEFINE_int32(num_iters, 1, + "Number of times to run the scan."); DEFINE_uint64(num_rows_per_thread, 1000, "Number of rows each thread generates and inserts; " "0 means unlimited. All rows generated by a thread are inserted " "in the context of the same session."); DECLARE_int32(num_threads); +DEFINE_bool(ordered_scan, false, + "Whether to run an ordered or unordered scan."); DEFINE_bool(run_scan, false, "Whether to run post-insertion scan to verify that the count of " "the inserted rows matches the expected number. If enabled, " @@ -469,14 +504,14 @@ Status GenerateRowData(Generator* gen, KuduPartialRow* row, mutex cerr_lock; void GeneratorThread( - const shared_ptr<KuduClient>& client, const string& table_name, + const client::sp::shared_ptr<KuduClient>& client, const string& table_name, size_t gen_idx, Status* status, uint64_t* row_count, uint64_t* err_count) { const Generator::Mode gen_mode = FLAGS_use_random ? Generator::MODE_RAND : Generator::MODE_SEQ; const size_t flush_per_n_rows = FLAGS_flush_per_n_rows; const uint64_t gen_seq_start = FLAGS_seq_start; - shared_ptr<KuduSession> session(client->NewSession()); + client::sp::shared_ptr<KuduSession> session(client->NewSession()); uint64_t idx = 0; auto generator = [&]() -> Status { @@ -491,7 +526,7 @@ void GeneratorThread( : KuduSession::MANUAL_FLUSH)); const size_t num_rows_per_gen = FLAGS_num_rows_per_thread; - shared_ptr<KuduTable> table; + client::sp::shared_ptr<KuduTable> table; RETURN_NOT_OK(client->OpenTable(table_name, &table)); const size_t num_columns = table->schema().num_columns(); @@ -536,7 +571,7 @@ void GeneratorThread( } } -Status GenerateInsertRows(const shared_ptr<KuduClient>& client, +Status GenerateInsertRows(const client::sp::shared_ptr<KuduClient>& client, const string& table_name, uint64_t* total_row_count, uint64_t* total_err_count) { @@ -570,7 +605,7 @@ Status GenerateInsertRows(const shared_ptr<KuduClient>& client, // Fetch all rows from the table with the specified name; iterate over them // and output their total count. -Status CountTableRows(const shared_ptr<KuduClient>& client, +Status CountTableRows(const client::sp::shared_ptr<KuduClient>& client, const string& table_name, uint64_t* count) { TableScanner scanner(client, table_name); scanner.SetReadMode(KuduScanner::ReadMode::READ_YOUR_WRITES); @@ -583,7 +618,7 @@ Status CountTableRows(const shared_ptr<KuduClient>& client, } Status TestLoadGenerator(const RunnerContext& context) { - shared_ptr<KuduClient> client; + client::sp::shared_ptr<KuduClient> client; RETURN_NOT_OK(CreateKuduClient(context, &client)); string table_name; @@ -686,7 +721,7 @@ Status TestLoadGenerator(const RunnerContext& context) { return Status::OK(); } -Status TableScan(const RunnerContext &context) { +Status TableScan(const RunnerContext& context) { client::sp::shared_ptr<KuduClient> client; RETURN_NOT_OK(CreateKuduClient(context, &client)); @@ -698,6 +733,72 @@ Status TableScan(const RunnerContext &context) { return scanner.StartScan(); } +Status TabletScan(const RunnerContext& context) { + const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg); + + // Initialize just enough of a tserver to bootstrap the tablet. We must + // bootstrap so that our scan includes data from the WAL segments. + // + // Note: we need a read-write FsManager because bootstrapping will do + // destructive things (e.g. rename the tablet's WAL segment directory). + FsManager fs(Env::Default(), FsManagerOpts()); + RETURN_NOT_OK(fs.Open()); + + scoped_refptr<TabletMetadata> tmeta; + RETURN_NOT_OK(TabletMetadata::Load(&fs, tablet_id, &tmeta)); + + scoped_refptr<ConsensusMetadataManager> cmeta_manager( + new ConsensusMetadataManager(&fs)); + scoped_refptr<ConsensusMetadata> cmeta; + RETURN_NOT_OK(cmeta_manager->Load(tablet_id, &cmeta)); + + scoped_refptr<Clock> clock( + LogicalClock::CreateStartingAt(Timestamp::kInitialTimestamp)); + RETURN_NOT_OK(clock->Init()); + + scoped_refptr<LogAnchorRegistry> registry(new LogAnchorRegistry()); + + // Bootstrap the tablet. + shared_ptr<Tablet> tablet; + scoped_refptr<Log> log; + ConsensusBootstrapInfo cbi; + RETURN_NOT_OK(tablet::BootstrapTablet(std::move(tmeta), + cmeta->CommittedConfig(), + std::move(clock), + /*mem_tracker=*/ nullptr, + /*result_tracker=*/ nullptr, + /*metric_registry=*/ nullptr, + /*tablet_replica=*/ nullptr, + &tablet, + &log, + std::move(registry), + &cbi)); + + // Tablet has been bootstrapped and opened. We can now scan it. + for (int i = 0; i < FLAGS_num_iters; i++) { + LOG_TIMING(INFO, Substitute("scanning tablet (iter $0)", i)) { + Schema projection = tablet->schema()->CopyWithoutColumnIds(); + RowIteratorOptions opts; + opts.projection = &projection; + opts.order = FLAGS_ordered_scan ? ORDERED : UNORDERED; + unique_ptr<RowwiseIterator> iter; + RETURN_NOT_OK(tablet->NewRowIterator(std::move(opts), &iter)); + RETURN_NOT_OK(iter->Init(nullptr)); + Arena arena(1024); + RowBlock block(&projection, 100, &arena); + int64_t rows_scanned = 0; + while (iter->HasNext()) { + arena.Reset(); + RETURN_NOT_OK(iter->NextBlock(&block)); + rows_scanned += block.nrows(); + KLOG_EVERY_N_SECS(INFO, 10) << "scanned " << rows_scanned << " rows"; + } + LOG(INFO) << "scanned " << rows_scanned << " rows"; + } + } + return Status::OK(); +} + } // anonymous namespace unique_ptr<Mode> BuildPerfMode() { @@ -761,10 +862,21 @@ unique_ptr<Mode> BuildPerfMode() { .AddOptionalParameter("tablets") .Build(); + unique_ptr<Action> tablet_scan = + ActionBuilder("tablet_scan", &TabletScan) + .Description("Show row count of a local tablet") + .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc }) + .AddOptionalParameter("fs_data_dirs") + .AddOptionalParameter("fs_metadata_dir") + .AddOptionalParameter("fs_wal_dir") + .AddOptionalParameter("num_iters") + .AddOptionalParameter("ordered_scan") + .Build(); return ModeBuilder("perf") .Description("Measure the performance of a Kudu cluster") .AddAction(std::move(loadgen)) .AddAction(std::move(table_scan)) + .AddAction(std::move(tablet_scan)) .Build(); }
