This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 84e1e8de3da32d3a4c20930272f237e0c35aa735
Author: Adar Dembo <[email protected]>
AuthorDate: Tue Mar 12 16:33:10 2019 -0700

    tool: fixes for kudu local_replica dump rowset
    
    This patch makes several adjustments to 'kudu local_replica dump rowset':
    - The existing 'metadata_only' and 'nrows' controls were being ignored.
    - The existing 'rowset_index' control wasn't working properly.
    - I changed the "what to dump" contols to 'dump_all_columns' and
      'dump_metadata'. When 'dump_all_columns' is false, the row keys are dumped
      in a format that's comparable and ASCII-compatible (currently hex).
    
    This functionality helped me dump a tablet's keys (grouped by rowset), which
    I then used for a series of MergeIterator experiments.
    
    Change-Id: Ib50ab4e7b2aa0fec60ce0718d16823945a05cb7f
    Reviewed-on: http://gerrit.cloudera.org:8080/12976
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <[email protected]>
---
 src/kudu/tools/kudu-tool-test.cc            | 133 ++++++++++++++++++++--------
 src/kudu/tools/tool_action_local_replica.cc |  87 ++++++++++++++----
 2 files changed, 163 insertions(+), 57 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 50ff0d6..2c9a002 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -73,6 +73,7 @@
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/stl_util.h"
+#include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/escaping.h"
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/numbers.h"
@@ -1725,13 +1726,15 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
   ASSERT_OK(harness.Open());
   LocalTabletWriter writer(harness.tablet().get(), &kSchema);
   KuduPartialRow row(&kSchemaWithIds);
-  for (int i = 0; i< 10; i++) {
-    ASSERT_OK(row.SetInt32(0, i));
-    ASSERT_OK(row.SetInt32(1, i*10));
-    ASSERT_OK(row.SetStringCopy(2, "HelloWorld"));
-    writer.Insert(row);
+  for (int num_rowsets = 0; num_rowsets < 3; num_rowsets++) {
+    for (int i = 0; i < 10; i++) {
+      ASSERT_OK(row.SetInt32(0, num_rowsets * 10 + i));
+      ASSERT_OK(row.SetInt32(1, num_rowsets * 10 * 10 + i));
+      ASSERT_OK(row.SetStringCopy(2, "HelloWorld"));
+      writer.Insert(row);
+    }
+    harness.tablet()->Flush();
   }
-  harness.tablet()->Flush();
   harness.tablet()->Shutdown();
   string fs_paths = "--fs_wal_dir=" + kTestDir + " "
       "--fs_data_dirs=" + kTestDir;
@@ -1785,6 +1788,25 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
     string expected = "Could not find rowset " + SimpleItoa(kRowId) +
         " in tablet id " + kTestTablet;
     ASSERT_STR_CONTAINS(stderr, expected);
+
+    NO_FATALS(RunActionStdoutString(
+        Substitute("local_replica dump rowset --nodump_all_columns "
+                   "--nodump_metadata --nrows=15 $0 $1",
+                   kTestTablet, fs_paths), &stdout));
+
+    SCOPED_TRACE(stdout);
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 0");
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 1");
+    ASSERT_STR_CONTAINS(stdout, "Dumping rowset 2");
+    ASSERT_STR_NOT_CONTAINS(stdout, "RowSet metadata");
+    for (int row_idx = 0; row_idx < 30; row_idx++) {
+      string row_key = StringPrintf("800000%02x", row_idx);
+      if (row_idx < 15) {
+        ASSERT_STR_CONTAINS(stdout, row_key);
+      } else {
+        ASSERT_STR_NOT_CONTAINS(stdout, row_key);
+      }
+    }
   }
   {
     TabletMetadata* meta = harness.tablet()->metadata();
@@ -1834,22 +1856,38 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
  KuduTableTestId | ffffffffffffffffffffffffffffffff | 0         | BLOOM        
    | 4.1K
  KuduTableTestId | ffffffffffffffffffffffffffffffff | 0         | PK           
    | 0B
  KuduTableTestId | ffffffffffffffffffffffffffffffff | 0         | *            
    | 4.6K
- KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | c10 (key)    
    | 164B
- KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | c11 
(int_val)    | 113B
- KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | c12 
(string_val) | 138B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | c10 (key)    
    | 184B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | c11 
(int_val)    | 129B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | c12 
(string_val) | 158B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | REDO         
    | 0B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | UNDO         
    | 181B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | BLOOM        
    | 4.1K
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | PK           
    | 0B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 1         | *            
    | 4.7K
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | c10 (key)    
    | 184B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | c11 
(int_val)    | 129B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | c12 
(string_val) | 158B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | REDO         
    | 0B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | UNDO         
    | 181B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | BLOOM        
    | 4.1K
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | PK           
    | 0B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | 2         | *            
    | 4.7K
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | c10 (key)    
    | 543B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | c11 
(int_val)    | 364B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | c12 
(string_val) | 472B
  KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | REDO         
    | 0B
- KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | UNDO         
    | 169B
- KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | BLOOM        
    | 4.1K
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | UNDO         
    | 492B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | BLOOM        
    | 12.2K
  KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | PK           
    | 0B
- KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | *            
    | 4.6K
- KuduTableTestId | *                                | *         | c10 (key)    
    | 164B
- KuduTableTestId | *                                | *         | c11 
(int_val)    | 113B
- KuduTableTestId | *                                | *         | c12 
(string_val) | 138B
+ KuduTableTestId | ffffffffffffffffffffffffffffffff | *         | *            
    | 14.1K
+ KuduTableTestId | *                                | *         | c10 (key)    
    | 543B
+ KuduTableTestId | *                                | *         | c11 
(int_val)    | 364B
+ KuduTableTestId | *                                | *         | c12 
(string_val) | 472B
  KuduTableTestId | *                                | *         | REDO         
    | 0B
- KuduTableTestId | *                                | *         | UNDO         
    | 169B
- KuduTableTestId | *                                | *         | BLOOM        
    | 4.1K
+ KuduTableTestId | *                                | *         | UNDO         
    | 492B
+ KuduTableTestId | *                                | *         | BLOOM        
    | 12.2K
  KuduTableTestId | *                                | *         | PK           
    | 0B
- KuduTableTestId | *                                | *         | *            
    | 4.6K
+ KuduTableTestId | *                                | *         | *            
    | 14.1K
 )";
     // Preprocess stdout and our expected table so that we are less
     // sensitive to small variations in encodings, id assignment, etc.
@@ -1891,14 +1929,19 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
 
   // Test 'kudu fs list' rowset group.
   {
-    string stdout;
-    NO_FATALS(RunActionStdoutString(
+    vector<string> stdout;
+    NO_FATALS(RunActionStdoutLines(
           Substitute("fs list $0 --columns=table,tablet-id,rowset-id 
--format=csv",
                      fs_paths),
           &stdout));
 
     SCOPED_TRACE(stdout);
-    EXPECT_EQ(stdout, "KuduTableTest,ffffffffffffffffffffffffffffffff,0");
+    ASSERT_EQ(3, stdout.size());
+    for (int rowset_idx = 0; rowset_idx < 3; rowset_idx++) {
+      EXPECT_EQ(stdout[rowset_idx],
+                Substitute("KuduTableTest,ffffffffffffffffffffffffffffffff,$0",
+                           rowset_idx));
+    }
   }
   // Test 'kudu fs list' block group.
   {
@@ -1911,12 +1954,19 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
           &stdout));
 
     SCOPED_TRACE(stdout);
-    ASSERT_EQ(5, stdout.size());
-    EXPECT_EQ(stdout[0], Substitute("KuduTableTest,$0,0,column,key", 
kTestTablet));
-    EXPECT_EQ(stdout[1], Substitute("KuduTableTest,$0,0,column,int_val", 
kTestTablet));
-    EXPECT_EQ(stdout[2], Substitute("KuduTableTest,$0,0,column,string_val", 
kTestTablet));
-    EXPECT_EQ(stdout[3], Substitute("KuduTableTest,$0,0,undo,", kTestTablet));
-    EXPECT_EQ(stdout[4], Substitute("KuduTableTest,$0,0,bloom,", kTestTablet));
+    ASSERT_EQ(15, stdout.size());
+    for (int rowset_idx = 0; rowset_idx < 3; rowset_idx++) {
+      EXPECT_EQ(stdout[rowset_idx * 5 + 0],
+                Substitute("KuduTableTest,$0,$1,column,key", kTestTablet, 
rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 1],
+                Substitute("KuduTableTest,$0,$1,column,int_val", kTestTablet, 
rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 2],
+                Substitute("KuduTableTest,$0,$1,column,string_val", 
kTestTablet, rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 3],
+                Substitute("KuduTableTest,$0,$1,undo,", kTestTablet, 
rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 4],
+                Substitute("KuduTableTest,$0,$1,bloom,", kTestTablet, 
rowset_idx));
+    }
   }
 
   // Test 'kudu fs list' cfile group.
@@ -1931,17 +1981,24 @@ TEST_F(ToolTest, TestLocalReplicaOps) {
           &stdout));
 
     SCOPED_TRACE(stdout);
-    ASSERT_EQ(5, stdout.size());
-    EXPECT_EQ(stdout[0],
-              Substitute("KuduTableTest,$0,0,column,key,BIT_SHUFFLE,10", 
kTestTablet));
-    EXPECT_EQ(stdout[1],
-              Substitute("KuduTableTest,$0,0,column,int_val,BIT_SHUFFLE,10", 
kTestTablet));
-    EXPECT_EQ(stdout[2],
-              
Substitute("KuduTableTest,$0,0,column,string_val,DICT_ENCODING,10", 
kTestTablet));
-    EXPECT_EQ(stdout[3],
-              Substitute("KuduTableTest,$0,0,undo,,PLAIN_ENCODING,10", 
kTestTablet));
-    EXPECT_EQ(stdout[4],
-              Substitute("KuduTableTest,$0,0,bloom,,PLAIN_ENCODING,0", 
kTestTablet));
+    ASSERT_EQ(15, stdout.size());
+    for (int rowset_idx = 0; rowset_idx < 3; rowset_idx++) {
+      EXPECT_EQ(stdout[rowset_idx * 5 + 0],
+                Substitute("KuduTableTest,$0,$1,column,key,BIT_SHUFFLE,10",
+                           kTestTablet, rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 1],
+                Substitute("KuduTableTest,$0,$1,column,int_val,BIT_SHUFFLE,10",
+                           kTestTablet, rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 2],
+                
Substitute("KuduTableTest,$0,$1,column,string_val,DICT_ENCODING,10",
+                           kTestTablet, rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 3],
+                Substitute("KuduTableTest,$0,$1,undo,,PLAIN_ENCODING,10",
+                           kTestTablet, rowset_idx));
+      EXPECT_EQ(stdout[rowset_idx * 5 + 4],
+                Substitute("KuduTableTest,$0,$1,bloom,,PLAIN_ENCODING,0",
+                           kTestTablet, rowset_idx));
+    }
   }
 }
 
diff --git a/src/kudu/tools/tool_action_local_replica.cc 
b/src/kudu/tools/tool_action_local_replica.cc
index 2173acb..038baed 100644
--- a/src/kudu/tools/tool_action_local_replica.cc
+++ b/src/kudu/tools/tool_action_local_replica.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <algorithm>
 #include <cstddef>
 #include <cstdint>
 #include <iostream>
@@ -31,7 +32,9 @@
 #include <glog/logging.h>
 
 #include "kudu/common/common.pb.h"
+#include "kudu/common/iterator.h"
 #include "kudu/common/partition.h"
+#include "kudu/common/rowblock.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/consensus/consensus.pb.h"
@@ -48,8 +51,10 @@
 #include "kudu/fs/block_manager.h"
 #include "kudu/fs/data_dirs.h"
 #include "kudu/fs/fs_manager.h"
+#include "kudu/fs/io_context.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/ref_counted.h"
+#include "kudu/gutil/strings/escaping.h"
 #include "kudu/gutil/strings/human_readable.h"
 #include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/numbers.h"
@@ -60,6 +65,7 @@
 #include "kudu/rpc/messenger.h"
 #include "kudu/tablet/diskrowset.h"
 #include "kudu/tablet/metadata.pb.h"
+#include "kudu/tablet/rowset.h"
 #include "kudu/tablet/rowset_metadata.h"
 #include "kudu/tablet/tablet_mem_trackers.h"
 #include "kudu/tablet/tablet_metadata.h"
@@ -70,16 +76,20 @@
 #include "kudu/tserver/ts_tablet_manager.h"
 #include "kudu/util/env.h"
 #include "kudu/util/env_util.h"
+#include "kudu/util/faststring.h"
+#include "kudu/util/memory/arena.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/status.h"
 
-DEFINE_bool(dump_data, false,
-            "Dump the data for each column in the rowset.");
-DEFINE_bool(metadata_only, false,
-            "Only dump the block metadata when printing blocks.");
-DEFINE_int64(nrows, 0, "Number of rows to dump");
+DEFINE_bool(dump_all_columns, true,
+            "If true, dumped rows include all of the columns in the rowset. If 
"
+            "false, dumped rows include just the key columns (in a comparable 
format).");
+DEFINE_bool(dump_metadata, true,
+            "If true, dumps rowset metadata before dumping data. If false, "
+            "only dumps the data.");
+DEFINE_int64(nrows, -1, "Number of rows to dump. If negative, dumps all 
rows.");
 DEFINE_bool(list_detail, false,
             "Print partition info for the local replicas");
 DEFINE_int64(rowset_index, -1,
@@ -99,6 +109,7 @@ using consensus::ConsensusMetadataManager;
 using consensus::OpId;
 using consensus::RaftConfigPB;
 using consensus::RaftPeerPB;
+using fs::IOContext;
 using fs::ReadableBlock;
 using log::LogEntryPB;
 using log::LogEntryReader;
@@ -118,6 +129,7 @@ using std::unique_ptr;
 using std::vector;
 using strings::Substitute;
 using tablet::DiskRowSet;
+using tablet::RowIteratorOptions;
 using tablet::RowSetMetadata;
 using tablet::TabletMetadata;
 using tablet::TabletDataState;
@@ -620,31 +632,64 @@ Status ListLocalReplicas(const RunnerContext& context) {
   return Status::OK();
 }
 
-Status DumpRowSetInternal(const shared_ptr<RowSetMetadata>& rs_meta,
-                          int indent) {
+Status DumpRowSetInternal(const IOContext& ctx,
+                          const shared_ptr<RowSetMetadata>& rs_meta,
+                          int indent,
+                          int64_t* rows_left) {
   tablet::RowSetDataPB pb;
   rs_meta->ToProtobuf(&pb);
 
-  cout << Indent(indent) << "RowSet metadata: " << 
pb_util::SecureDebugString(pb)
-       << endl << endl;
+  if (FLAGS_dump_metadata) {
+    cout << Indent(indent) << "RowSet metadata: " << 
pb_util::SecureDebugString(pb)
+         << endl << endl;
+  }
 
   scoped_refptr<log::LogAnchorRegistry> log_reg(new log::LogAnchorRegistry());
   shared_ptr<DiskRowSet> rs;
   RETURN_NOT_OK(DiskRowSet::Open(rs_meta,
                                  log_reg.get(),
                                  tablet::TabletMemTrackers(),
-                                 nullptr,
+                                 &ctx,
                                  &rs));
   vector<string> lines;
-  RETURN_NOT_OK(rs->DebugDump(&lines));
-  for (const auto& l : lines) {
-    cout << l << endl;
+  if (FLAGS_dump_all_columns) {
+    RETURN_NOT_OK(rs->DebugDump(&lines));
+  } else {
+    Schema key_proj = rs_meta->tablet_schema().CreateKeyProjection();
+    RowIteratorOptions opts;
+    opts.projection = &key_proj;
+    opts.io_context = &ctx;
+    unique_ptr<RowwiseIterator> it;
+    RETURN_NOT_OK(rs->NewRowIterator(opts, &it));
+    RETURN_NOT_OK(it->Init(nullptr));
+
+    Arena arena(1024);
+    RowBlock block(&key_proj, 100, &arena);
+    faststring key;
+    while (it->HasNext()) {
+      RETURN_NOT_OK(it->NextBlock(&block));
+      for (int i = 0; i < block.nrows(); i++) {
+        key_proj.EncodeComparableKey(block.row(i), &key);
+        lines.emplace_back(strings::b2a_hex(key.ToString()));
+      }
+    }
+  }
+
+  // Respect 'rows_left' when dumping the output.
+  int64_t limit = *rows_left >= 0 ?
+                  std::min<int64_t>(*rows_left, lines.size()) : lines.size();
+  for (int i = 0; i < limit; i++) {
+    cout << lines[i] << endl;
   }
 
+  if (*rows_left >= 0) {
+    *rows_left -= limit;
+  }
   return Status::OK();
 }
 
 Status DumpRowSet(const RunnerContext& context) {
+  const int kIndent = 2;
   unique_ptr<FsManager> fs_manager;
   RETURN_NOT_OK(FsInit(&fs_manager));
   const string& tablet_id = FindOrDie(context.required_args, kTabletIdArg);
@@ -657,11 +702,15 @@ Status DumpRowSet(const RunnerContext& context) {
     return Status::OK();
   }
 
+  IOContext ctx;
+  ctx.tablet_id = meta->tablet_id();
+  int64_t rows_left = FLAGS_nrows;
+
   // If rowset index is provided, only dump that rowset.
   if (FLAGS_rowset_index != -1) {
-    for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets())  {
+    for (const auto& rs_meta : meta->rowsets())  {
       if (rs_meta->id() == FLAGS_rowset_index) {
-        return Status::OK();
+        return DumpRowSetInternal(ctx, rs_meta, kIndent, &rows_left);
       }
     }
     return Status::InvalidArgument(
@@ -671,9 +720,9 @@ Status DumpRowSet(const RunnerContext& context) {
 
   // Rowset index not provided, dump all rowsets
   size_t idx = 0;
-  for (const shared_ptr<RowSetMetadata>& rs_meta : meta->rowsets())  {
+  for (const auto& rs_meta : meta->rowsets())  {
     cout << endl << "Dumping rowset " << idx++ << endl << kSeparatorLine;
-    RETURN_NOT_OK(DumpRowSetInternal(rs_meta, 2));
+    RETURN_NOT_OK(DumpRowSetInternal(ctx, rs_meta, kIndent, &rows_left));
   }
   return Status::OK();
 }
@@ -734,11 +783,11 @@ unique_ptr<Mode> BuildDumpMode() {
       ActionBuilder("rowset", &DumpRowSet)
       .Description("Dump the rowset contents of a local replica")
       .AddRequiredParameter({ kTabletIdArg, kTabletIdArgDesc })
-      .AddOptionalParameter("dump_data")
+      .AddOptionalParameter("dump_all_columns")
+      .AddOptionalParameter("dump_metadata")
       .AddOptionalParameter("fs_data_dirs")
       .AddOptionalParameter("fs_metadata_dir")
       .AddOptionalParameter("fs_wal_dir")
-      .AddOptionalParameter("metadata_only")
       .AddOptionalParameter("nrows")
       .AddOptionalParameter("rowset_index")
       .Build();

Reply via email to