This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 84e1e8de3da32d3a4c20930272f237e0c35aa735 Author: Adar Dembo <[email protected]> AuthorDate: Tue Mar 12 16:33:10 2019 -0700 tool: fixes for kudu local_replica dump rowset This patch makes several adjustments to 'kudu local_replica dump rowset': - The existing 'metadata_only' and 'nrows' controls were being ignored. - The existing 'rowset_index' control wasn't working properly. - I changed the "what to dump" contols to 'dump_all_columns' and 'dump_metadata'. When 'dump_all_columns' is false, the row keys are dumped in a format that's comparable and ASCII-compatible (currently hex). This functionality helped me dump a tablet's keys (grouped by rowset), which I then used for a series of MergeIterator experiments. Change-Id: Ib50ab4e7b2aa0fec60ce0718d16823945a05cb7f Reviewed-on: http://gerrit.cloudera.org:8080/12976 Tested-by: Kudu Jenkins Reviewed-by: Andrew Wong <[email protected]> --- src/kudu/tools/kudu-tool-test.cc | 133 ++++++++++++++++++++-------- src/kudu/tools/tool_action_local_replica.cc | 87 ++++++++++++++---- 2 files changed, 163 insertions(+), 57 deletions(-) diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 50ff0d6..2c9a002 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -73,6 +73,7 @@ #include "kudu/gutil/port.h" #include "kudu/gutil/ref_counted.h" #include "kudu/gutil/stl_util.h" +#include "kudu/gutil/stringprintf.h" #include "kudu/gutil/strings/escaping.h" #include "kudu/gutil/strings/join.h" #include "kudu/gutil/strings/numbers.h" @@ -1725,13 +1726,15 @@ TEST_F(ToolTest, TestLocalReplicaOps) { ASSERT_OK(harness.Open()); LocalTabletWriter writer(harness.tablet().get(), &kSchema); KuduPartialRow row(&kSchemaWithIds); - for (int i = 0; i< 10; i++) { - ASSERT_OK(row.SetInt32(0, i)); - ASSERT_OK(row.SetInt32(1, i*10)); - ASSERT_OK(row.SetStringCopy(2, "HelloWorld")); - writer.Insert(row); + for (int num_rowsets = 0; num_rowsets < 3; num_rowsets++) { + for (int i = 0; i < 10; i++) { + ASSERT_OK(row.SetInt32(0, num_rowsets * 10 + i)); + ASSERT_OK(row.SetInt32(1, num_rowsets * 10 * 10 + i)); + ASSERT_OK(row.SetStringCopy(2, "HelloWorld")); + writer.Insert(row); + } + harness.tablet()->Flush(); } - harness.tablet()->Flush(); harness.tablet()->Shutdown(); string fs_paths = "--fs_wal_dir=" + kTestDir + " " "--fs_data_dirs=" + kTestDir; @@ -1785,6 +1788,25 @@ TEST_F(ToolTest, TestLocalReplicaOps) { string expected = "Could not find rowset " + SimpleItoa(kRowId) + " in tablet id " + kTestTablet; ASSERT_STR_CONTAINS(stderr, expected); + + NO_FATALS(RunActionStdoutString( + Substitute("local_replica dump rowset --nodump_all_columns " + "--nodump_metadata --nrows=15 $0 $1", + kTestTablet, fs_paths), &stdout)); + + SCOPED_TRACE(stdout); + ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0"); + ASSERT_STR_CONTAINS(stdout, "Dumping rowset 1"); + ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2"); + ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata"); + for (int row_idx = 0; row_idx < 30; row_idx++) { + string row_key = StringPrintf("800000%02x", row_idx); + if (row_idx < 15) { + ASSERT_STR_CONTAINS(stdout, row_key); + } else { + ASSERT_STR_NOT_CONTAINS(stdout, row_key); + } + } } { TabletMetadata* meta = harness.tablet()->metadata(); @@ -1834,22 +1856,38 @@ TEST_F(ToolTest, TestLocalReplicaOps) { KuduTableTestId | ffffffffffffffffffffffffffffffff | 0 | BLOOM | 4.1K KuduTableTestId | ffffffffffffffffffffffffffffffff | 0 | PK | 0B KuduTableTestId | ffffffffffffffffffffffffffffffff | 0 | * | 4.6K - KuduTableTestId | ffffffffffffffffffffffffffffffff | * | c10 (key) | 164B - KuduTableTestId | ffffffffffffffffffffffffffffffff | * | c11 (int_val) | 113B - KuduTableTestId | ffffffffffffffffffffffffffffffff | * | c12 (string_val) | 138B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | c10 (key) | 184B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | c11 (int_val) | 129B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | c12 (string_val) | 158B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | REDO | 0B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | UNDO | 181B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | BLOOM | 4.1K + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | PK | 0B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 1 | * | 4.7K + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | c10 (key) | 184B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | c11 (int_val) | 129B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | c12 (string_val) | 158B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | REDO | 0B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | UNDO | 181B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | BLOOM | 4.1K + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | PK | 0B + KuduTableTestId | ffffffffffffffffffffffffffffffff | 2 | * | 4.7K + KuduTableTestId | ffffffffffffffffffffffffffffffff | * | c10 (key) | 543B + KuduTableTestId | ffffffffffffffffffffffffffffffff | * | c11 (int_val) | 364B + KuduTableTestId | ffffffffffffffffffffffffffffffff | * | c12 (string_val) | 472B KuduTableTestId | ffffffffffffffffffffffffffffffff | * | REDO | 0B - KuduTableTestId | ffffffffffffffffffffffffffffffff | * | UNDO | 169B - KuduTableTestId | ffffffffffffffffffffffffffffffff | * | BLOOM | 4.1K + KuduTableTestId | ffffffffffffffffffffffffffffffff | * | UNDO | 492B + KuduTableTestId | ffffffffffffffffffffffffffffffff | * | BLOOM | 12.2K KuduTableTestId | ffffffffffffffffffffffffffffffff | * | PK | 0B - KuduTableTestId | ffffffffffffffffffffffffffffffff | * | * | 4.6K - KuduTableTestId | * | * | c10 (key) | 164B - KuduTableTestId | * | * | c11 (int_val) | 113B - KuduTableTestId | * | * | c12 (string_val) | 138B + KuduTableTestId | ffffffffffffffffffffffffffffffff | * | * | 14.1K + KuduTableTestId | * | * | c10 (key) | 543B + KuduTableTestId | * | * | c11 (int_val) | 364B + KuduTableTestId | * | * | c12 (string_val) | 472B KuduTableTestId | * | * | REDO | 0B - KuduTableTestId | * | * | UNDO | 169B - KuduTableTestId | * | * | BLOOM | 4.1K + KuduTableTestId | * | * | UNDO | 492B + KuduTableTestId | * | * | BLOOM | 12.2K KuduTableTestId | * | * | PK | 0B - KuduTableTestId | * | * | * | 4.6K + KuduTableTestId | * | * | * | 14.1K )"; // Preprocess stdout and our expected table so that we are less // sensitive to small variations in encodings, id assignment, etc. @@ -1891,14 +1929,19 @@ TEST_F(ToolTest, TestLocalReplicaOps) { // Test 'kudu fs list' rowset group. { - string stdout; - NO_FATALS(RunActionStdoutString( + vector<string> stdout; + NO_FATALS(RunActionStdoutLines( Substitute("fs list $0 --columns=table,tablet-id,rowset-id --format=csv", fs_paths), &stdout)); SCOPED_TRACE(stdout); - EXPECT_EQ(stdout, "KuduTableTest,ffffffffffffffffffffffffffffffff,0"); + ASSERT_EQ(3, stdout.size()); + for (int rowset_idx = 0; rowset_idx < 3; rowset_idx++) { + EXPECT_EQ(stdout[rowset_idx], + Substitute("KuduTableTest,ffffffffffffffffffffffffffffffff,$0", + rowset_idx)); + } } // Test 'kudu fs list' block group. { @@ -1911,12 +1954,19 @@ TEST_F(ToolTest, TestLocalReplicaOps) { &stdout)); SCOPED_TRACE(stdout); - ASSERT_EQ(5, stdout.size()); - EXPECT_EQ(stdout[0], Substitute("KuduTableTest,$0,0,column,key", kTestTablet)); - EXPECT_EQ(stdout[1], Substitute("KuduTableTest,$0,0,column,int_val", kTestTablet)); - EXPECT_EQ(stdout[2], Substitute("KuduTableTest,$0,0,column,string_val", kTestTablet)); - EXPECT_EQ(stdout[3], Substitute("KuduTableTest,$0,0,undo,", kTestTablet)); - EXPECT_EQ(stdout[4], Substitute("KuduTableTest,$0,0,bloom,", kTestTablet)); + ASSERT_EQ(15, stdout.size()); + for (int rowset_idx = 0; rowset_idx < 3; rowset_idx++) { + EXPECT_EQ(stdout[rowset_idx * 5 + 0], + Substitute("KuduTableTest,$0,$1,column,key", kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 1], + Substitute("KuduTableTest,$0,$1,column,int_val", kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 2], + Substitute("KuduTableTest,$0,$1,column,string_val", kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 3], + Substitute("KuduTableTest,$0,$1,undo,", kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 4], + Substitute("KuduTableTest,$0,$1,bloom,", kTestTablet, rowset_idx)); + } } // Test 'kudu fs list' cfile group. @@ -1931,17 +1981,24 @@ TEST_F(ToolTest, TestLocalReplicaOps) { &stdout)); SCOPED_TRACE(stdout); - ASSERT_EQ(5, stdout.size()); - EXPECT_EQ(stdout[0], - Substitute("KuduTableTest,$0,0,column,key,BIT_SHUFFLE,10", kTestTablet)); - EXPECT_EQ(stdout[1], - Substitute("KuduTableTest,$0,0,column,int_val,BIT_SHUFFLE,10", kTestTablet)); - EXPECT_EQ(stdout[2], - Substitute("KuduTableTest,$0,0,column,string_val,DICT_ENCODING,10", kTestTablet)); - EXPECT_EQ(stdout[3], - Substitute("KuduTableTest,$0,0,undo,,PLAIN_ENCODING,10", kTestTablet)); - EXPECT_EQ(stdout[4], - Substitute("KuduTableTest,$0,0,bloom,,PLAIN_ENCODING,0", kTestTablet)); + ASSERT_EQ(15, stdout.size()); + for (int rowset_idx = 0; rowset_idx < 3; rowset_idx++) { + EXPECT_EQ(stdout[rowset_idx * 5 + 0], + Substitute("KuduTableTest,$0,$1,column,key,BIT_SHUFFLE,10", + kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 1], + Substitute("KuduTableTest,$0,$1,column,int_val,BIT_SHUFFLE,10", + kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 2], + Substitute("KuduTableTest,$0,$1,column,string_val,DICT_ENCODING,10", + kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 3], + Substitute("KuduTableTest,$0,$1,undo,,PLAIN_ENCODING,10", + kTestTablet, rowset_idx)); + EXPECT_EQ(stdout[rowset_idx * 5 + 4], + Substitute("KuduTableTest,$0,$1,bloom,,PLAIN_ENCODING,0", + kTestTablet, rowset_idx)); + } } } diff --git a/src/kudu/tools/tool_action_local_replica.cc b/src/kudu/tools/tool_action_local_replica.cc index 2173acb..038baed 100644 --- a/src/kudu/tools/tool_action_local_replica.cc +++ b/src/kudu/tools/tool_action_local_replica.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include <algorithm> #include <cstddef> #include <cstdint> #include <iostream> @@ -31,7 +32,9 @@ #include <glog/logging.h> #include "kudu/common/common.pb.h" +#include "kudu/common/iterator.h" #include "kudu/common/partition.h" +#include "kudu/common/rowblock.h" #include "kudu/common/schema.h" #include "kudu/common/wire_protocol.h" #include "kudu/consensus/consensus.pb.h" @@ -48,8 +51,10 @@ #include "kudu/fs/block_manager.h" #include "kudu/fs/data_dirs.h" #include "kudu/fs/fs_manager.h" +#include "kudu/fs/io_context.h" #include "kudu/gutil/map-util.h" #include "kudu/gutil/ref_counted.h" +#include "kudu/gutil/strings/escaping.h" #include "kudu/gutil/strings/human_readable.h" #include "kudu/gutil/strings/join.h" #include "kudu/gutil/strings/numbers.h" @@ -60,6 +65,7 @@ #include "kudu/rpc/messenger.h" #include "kudu/tablet/diskrowset.h" #include "kudu/tablet/metadata.pb.h" +#include "kudu/tablet/rowset.h" #include "kudu/tablet/rowset_metadata.h" #include "kudu/tablet/tablet_mem_trackers.h" #include "kudu/tablet/tablet_metadata.h" @@ -70,16 +76,20 @@ #include "kudu/tserver/ts_tablet_manager.h" #include "kudu/util/env.h" #include "kudu/util/env_util.h" +#include "kudu/util/faststring.h" +#include "kudu/util/memory/arena.h" #include "kudu/util/metrics.h" #include "kudu/util/net/net_util.h" #include "kudu/util/pb_util.h" #include "kudu/util/status.h" -DEFINE_bool(dump_data, false, - "Dump the data for each column in the rowset."); -DEFINE_bool(metadata_only, false, - "Only dump the block metadata when printing blocks."); -DEFINE_int64(nrows, 0, "Number of rows to dump"); +DEFINE_bool(dump_all_columns, true, + "If true, dumped rows include all of the columns in the rowset. If " + "false, dumped rows include just the key columns (in a comparable format)."); +DEFINE_bool(dump_metadata, true, + "If true, dumps rowset metadata before dumping data. If false, " + "only dumps the data."); +DEFINE_int64(nrows, -1, "Number of rows to dump. If negative, dumps all rows."); DEFINE_bool(list_detail, false, "Print partition info for the local replicas"); DEFINE_int64(rowset_index, -1, @@ -99,6 +109,7 @@ using consensus::ConsensusMetadataManager; using consensus::OpId; using consensus::RaftConfigPB; using consensus::RaftPeerPB; +using fs::IOContext; using fs::ReadableBlock; using log::LogEntryPB; using log::LogEntryReader; @@ -118,6 +129,7 @@ using std::unique_ptr; using std::vector; using strings::Substitute; using tablet::DiskRowSet; +using tablet::RowIteratorOptions; using tablet::RowSetMetadata; using tablet::TabletMetadata; using tablet::TabletDataState; @@ -620,31 +632,64 @@ Status ListLocalReplicas(const RunnerContext& context) { return Status::OK(); } -Status DumpRowSetInternal(const shared_ptr<RowSetMetadata>& rs_meta, - int indent) { +Status DumpRowSetInternal(const IOContext& ctx, + const shared_ptr<RowSetMetadata>& rs_meta, + int indent, + int64_t* rows_left) { tablet::RowSetDataPB pb; rs_meta->ToProtobuf(&pb); - cout << Indent(indent) << "RowSet metadata: " << pb_util::SecureDebugString(pb) - << endl << endl; + if (FLAGS_dump_metadata) { + cout << Indent(indent) << "RowSet metadata: " << pb_util::SecureDebugString(pb) + << endl << endl; + } scoped_refptr<log::LogAnchorRegistry> log_reg(new log::LogAnchorRegistry()); shared_ptr<DiskRowSet> rs; RETURN_NOT_OK(DiskRowSet::Open(rs_meta, log_reg.get(), tablet::TabletMemTrackers(), - nullptr, + &ctx, &rs)); vector<string> lines; - RETURN_NOT_OK(rs->DebugDump(&lines)); - for (const auto& l : lines) { - cout << l << endl; + if (FLAGS_dump_all_columns) { + RETURN_NOT_OK(rs->DebugDump(&lines)); + } else { + Schema key_proj = rs_meta->tablet_schema().CreateKeyProjection(); + RowIteratorOptions opts; + opts.projection = &key_proj; + opts.io_context = &ctx; + unique_ptr<RowwiseIterator> it; + RETURN_NOT_OK(rs->NewRowIterator(opts, &it)); + RETURN_NOT_OK(it->Init(nullptr)); + + Arena arena(1024); + RowBlock block(&key_proj, 100, &arena); + faststring key; + while (it->HasNext()) { + RETURN_NOT_OK(it->NextBlock(&block)); + for (int i = 0; i < block.nrows(); i++) { + key_proj.EncodeComparableKey(block.row(i), &key); + lines.emplace_back(strings::b2a_hex(key.ToString())); + } + } + } + + // Respect 'rows_left' when dumping the output. + int64_t limit = *rows_left >= 0 ? + std::min<int64_t>(*rows_left, lines.size()) : lines.size(); + for (int i = 0; i < limit; i++) { + cout << lines[i] << endl; } + if (*rows_left >= 0) { + *rows_left -= limit; + } return Status::OK(); } Status DumpRowSet(const RunnerContext& context) { + const int kIndent = 2; unique_ptr<FsManager> fs_manager; RETURN_NOT_OK(FsInit(&fs_manager)); const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg); @@ -657,11 +702,15 @@ Status DumpRowSet(const RunnerContext& context) { return Status::OK(); } + IOContext ctx; + ctx.tablet_id = meta->tablet_id(); + int64_t rows_left = FLAGS_nrows; + // If rowset index is provided, only dump that rowset. if (FLAGS_rowset_index != -1) { - for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets()) { + for (const auto& rs_meta : meta->rowsets()) { if (rs_meta->id() == FLAGS_rowset_index) { - return Status::OK(); + return DumpRowSetInternal(ctx, rs_meta, kIndent, &rows_left); } } return Status::InvalidArgument( @@ -671,9 +720,9 @@ Status DumpRowSet(const RunnerContext& context) { // Rowset index not provided, dump all rowsets size_t idx = 0; - for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets()) { + for (const auto& rs_meta : meta->rowsets()) { cout << endl << "Dumping rowset " << idx++ << endl << kSeparatorLine; - RETURN_NOT_OK(DumpRowSetInternal(rs_meta, 2)); + RETURN_NOT_OK(DumpRowSetInternal(ctx, rs_meta, kIndent, &rows_left)); } return Status::OK(); } @@ -734,11 +783,11 @@ unique_ptr<Mode> BuildDumpMode() { ActionBuilder("rowset", &DumpRowSet) .Description("Dump the rowset contents of a local replica") .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc }) - .AddOptionalParameter("dump_data") + .AddOptionalParameter("dump_all_columns") + .AddOptionalParameter("dump_metadata") .AddOptionalParameter("fs_data_dirs") .AddOptionalParameter("fs_metadata_dir") .AddOptionalParameter("fs_wal_dir") - .AddOptionalParameter("metadata_only") .AddOptionalParameter("nrows") .AddOptionalParameter("rowset_index") .Build();
