Repository: kudu Updated Branches: refs/heads/master 25f5c215e -> b7d2780ff
tool: port cfile-dump to 'kudu fs dump_cfile' Some non-cosmetic changes: - I changed the block_id conversion into something nicer than a CHECK. - The block_id parameter is expected in base 10, not base 16. To be honest, cfile-dump should have used base 10 for quite some time, because that's how they're printed in dumped PBs. - I dropped the num_iterations parameter because it didn't seem useful. Change-Id: I30cbaa6552e88348cebbf3059390a4c252eb7f8e Reviewed-on: http://gerrit.cloudera.org:8080/4151 Reviewed-by: Todd Lipcon <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b7d2780f Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b7d2780f Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b7d2780f Branch: refs/heads/master Commit: b7d2780ffee2b5d8b88ff1801d3e40d54c773f06 Parents: 25f5c21 Author: Adar Dembo <[email protected]> Authored: Sun Aug 28 12:18:49 2016 -0700 Committer: Adar Dembo <[email protected]> Committed: Mon Aug 29 22:23:44 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/CMakeLists.txt | 4 -- src/kudu/cfile/cfile-dump.cc | 93 ----------------------------------- src/kudu/cfile/cfile-test-base.h | 6 +-- src/kudu/tools/kudu-tool-test.cc | 64 ++++++++++++++++++++++++ src/kudu/tools/tool_action_fs.cc | 70 +++++++++++++++++++++++--- 5 files changed, 131 insertions(+), 106 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/cfile/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/CMakeLists.txt b/src/kudu/cfile/CMakeLists.txt index 2bd5ca4..ad2a961 100644 --- a/src/kudu/cfile/CMakeLists.txt +++ b/src/kudu/cfile/CMakeLists.txt @@ -65,7 +65,3 @@ ADD_KUDU_TEST(bloomfile-test) ADD_KUDU_TEST(mt-bloomfile-test) ADD_KUDU_TEST(block_cache-test) ADD_KUDU_TEST(compression-test) - -# Tools -add_executable(cfile-dump cfile-dump.cc) -target_link_libraries(cfile-dump cfile ${KUDU_BASE_LIBS}) http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/cfile/cfile-dump.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile-dump.cc b/src/kudu/cfile/cfile-dump.cc deleted file mode 100644 index 2b279d0..0000000 --- a/src/kudu/cfile/cfile-dump.cc +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include <gflags/gflags.h> -#include <glog/logging.h> -#include <iostream> - -#include "kudu/cfile/cfile_reader.h" -#include "kudu/cfile/cfile_util.h" -#include "kudu/fs/block_id.h" -#include "kudu/fs/fs_manager.h" -#include "kudu/util/logging.h" -#include "kudu/util/flags.h" - -DEFINE_bool(print_meta, true, "print the header and footer from the file"); -DEFINE_bool(iterate_rows, true, "iterate each row in the file"); -DEFINE_int32(num_iterations, 1, "number of times to iterate the file"); - -namespace kudu { -namespace cfile { - -using std::string; -using std::cout; -using std::endl; - -Status DumpFile(const string& block_id_str) { - // Allow read-only access to live blocks. - FsManagerOpts fs_opts; - fs_opts.read_only = true; - FsManager fs_manager(Env::Default(), fs_opts); - RETURN_NOT_OK(fs_manager.Open()); - - uint64_t numeric_id; - CHECK(safe_strtou64_base(block_id_str, &numeric_id, 16)); - BlockId block_id(numeric_id); - gscoped_ptr<fs::ReadableBlock> block; - RETURN_NOT_OK(fs_manager.OpenBlock(block_id, &block)); - - gscoped_ptr<CFileReader> reader; - RETURN_NOT_OK(CFileReader::Open(std::move(block), ReaderOptions(), &reader)); - - if (FLAGS_print_meta) { - cout << "Header:\n" << reader->header().DebugString() << endl; - cout << "Footer:\n" << reader->footer().DebugString() << endl; - } - - if (FLAGS_iterate_rows) { - gscoped_ptr<CFileIterator> it; - RETURN_NOT_OK(reader->NewIterator(&it, CFileReader::DONT_CACHE_BLOCK)); - - for (int i = 0; i < FLAGS_num_iterations; i++) { - RETURN_NOT_OK(it->SeekToFirst()); - RETURN_NOT_OK(DumpIterator(*reader, it.get(), &cout, 0, 0)); - } - } - - return Status::OK(); -} - -} // namespace cfile -} // namespace kudu - -int main(int argc, char **argv) { - kudu::ParseCommandLineFlags(&argc, &argv, true); - kudu::InitGoogleLoggingSafe(argv[0]); - if (argc != 2) { - std::cerr << "usage: " << argv[0] - << " -fs_wal_dir <dir> -fs_data_dirs <dirs> <block id>" << std::endl; - return 1; - } - - kudu::Status s = kudu::cfile::DumpFile(argv[1]); - if (!s.ok()) { - std::cerr << "Error: " << s.ToString() << std::endl; - return 1; - } - - return 0; -} http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/cfile/cfile-test-base.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h index a5e1b9b..7213bfa 100644 --- a/src/kudu/cfile/cfile-test-base.h +++ b/src/kudu/cfile/cfile-test-base.h @@ -388,7 +388,7 @@ SumType FastSum(const Indexable &data, size_t n) { } template<DataType Type, typename SumType> -static void TimeReadFileForDataType(gscoped_ptr<CFileIterator> &iter, int &count) { +void TimeReadFileForDataType(gscoped_ptr<CFileIterator> &iter, int &count) { ScopedColumnBlock<Type> cb(8192); SumType sum = 0; @@ -404,7 +404,7 @@ static void TimeReadFileForDataType(gscoped_ptr<CFileIterator> &iter, int &count } template<DataType Type> -static void ReadBinaryFile(CFileIterator* iter, int* count) { +void ReadBinaryFile(CFileIterator* iter, int* count) { ScopedColumnBlock<Type> cb(100); uint64_t sum_lens = 0; while (iter->HasNext()) { @@ -420,7 +420,7 @@ static void ReadBinaryFile(CFileIterator* iter, int* count) { LOG(INFO) << "Count: " << *count; } -static void TimeReadFile(FsManager* fs_manager, const BlockId& block_id, size_t *count_ret) { +void TimeReadFile(FsManager* fs_manager, const BlockId& block_id, size_t *count_ret) { Status s; gscoped_ptr<fs::ReadableBlock> source; http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/tools/kudu-tool-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index 877439f..637cdf9 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -21,7 +21,12 @@ #include <gtest/gtest.h> #include <glog/stl_logging.h> +#include "kudu/cfile/cfile-test-base.h" +#include "kudu/cfile/cfile_util.h" +#include "kudu/cfile/cfile_writer.h" +#include "kudu/fs/block_manager.h" #include "kudu/fs/fs_manager.h" +#include "kudu/gutil/gscoped_ptr.h" #include "kudu/gutil/strings/split.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/util/env.h" @@ -34,6 +39,10 @@ namespace kudu { namespace tools { +using cfile::CFileWriter; +using cfile::StringDataGenerator; +using cfile::WriterOptions; +using fs::WritableBlock; using std::string; using std::vector; using strings::Substitute; @@ -258,5 +267,60 @@ TEST_F(ToolTest, TestPbcDump) { } } +TEST_F(ToolTest, TestFsDumpCFile) { + const int kNumEntries = 8192; + const string kTestDir = GetTestPath("test"); + FsManager fs(env_.get(), kTestDir); + ASSERT_OK(fs.CreateInitialFileSystemLayout()); + ASSERT_OK(fs.Open()); + + gscoped_ptr<WritableBlock> block; + ASSERT_OK(fs.CreateNewBlock(&block)); + BlockId block_id = block->id(); + StringDataGenerator<false> generator("hello %04d"); + WriterOptions opts; + opts.write_posidx = true; + CFileWriter writer(opts, GetTypeInfo(generator.kDataType), + generator.has_nulls(), std::move(block)); + ASSERT_OK(writer.Start()); + generator.Build(kNumEntries); + ASSERT_OK_FAST(writer.AppendEntries(generator.values(), kNumEntries)); + ASSERT_OK(writer.Finish()); + + vector<string> stdout; + { + NO_FATALS(RunTestAction(Substitute( + "fs dump_cfile --fs_wal_dir=$0 $1 --noprint_meta --noprint_rows", + kTestDir, block_id.ToString()), &stdout)); + SCOPED_TRACE(stdout); + ASSERT_TRUE(stdout.empty()); + } + { + NO_FATALS(RunTestAction(Substitute( + "fs dump_cfile --fs_wal_dir=$0 $1 --noprint_rows", + kTestDir, block_id.ToString()), &stdout)); + SCOPED_TRACE(stdout); + ASSERT_GE(stdout.size(), 4); + ASSERT_EQ(stdout[0], "Header:"); + ASSERT_EQ(stdout[3], "Footer:"); + } + { + NO_FATALS(RunTestAction(Substitute( + "fs dump_cfile --fs_wal_dir=$0 $1 --noprint_meta", + kTestDir, block_id.ToString()), &stdout)); + SCOPED_TRACE(stdout); + ASSERT_EQ(kNumEntries, stdout.size()); + } + { + NO_FATALS(RunTestAction(Substitute( + "fs dump_cfile --fs_wal_dir=$0 $1", + kTestDir, block_id.ToString()), &stdout)); + SCOPED_TRACE(stdout); + ASSERT_GT(stdout.size(), kNumEntries); + ASSERT_EQ(stdout[0], "Header:"); + ASSERT_EQ(stdout[3], "Footer:"); + } +} + } // namespace tools } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/tools/tool_action_fs.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc index 3b9d269..63fd702 100644 --- a/src/kudu/tools/tool_action_fs.cc +++ b/src/kudu/tools/tool_action_fs.cc @@ -24,20 +24,31 @@ #include <boost/optional/optional.hpp> #include <gflags/gflags.h> +#include "kudu/cfile/cfile_reader.h" +#include "kudu/cfile/cfile_util.h" #include "kudu/fs/fs_manager.h" +#include "kudu/gutil/strings/numbers.h" +#include "kudu/gutil/strings/substitute.h" #include "kudu/util/status.h" +DEFINE_bool(print_meta, true, + "Print the header and footer from the CFile"); +DEFINE_bool(print_rows, true, + "Print each row in the CFile"); +DEFINE_string(uuid, "", + "The uuid to use in the filesystem. If not provided, one is generated"); +namespace kudu { +namespace tools { + +using cfile::CFileReader; +using cfile::CFileIterator; +using cfile::ReaderOptions; using std::cout; using std::endl; using std::string; using std::unique_ptr; using std::vector; - -DEFINE_string(uuid, "", - "The uuid to use in the filesystem. If not provided, one is generated"); - -namespace kudu { -namespace tools { +using strings::Substitute; namespace { @@ -59,6 +70,42 @@ Status PrintUuid(const RunnerContext& context) { return Status::OK(); } +Status DumpCFile(const RunnerContext& context) { + string block_id_str = FindOrDie(context.required_args, "block_id"); + uint64_t numeric_id; + if (!safe_strtou64(block_id_str, &numeric_id)) { + return Status::InvalidArgument(Substitute( + "Could not parse $0 as numeric block ID", block_id_str)); + } + BlockId block_id(numeric_id); + + FsManagerOpts fs_opts; + fs_opts.read_only = true; + FsManager fs_manager(Env::Default(), fs_opts); + RETURN_NOT_OK(fs_manager.Open()); + + gscoped_ptr<fs::ReadableBlock> block; + RETURN_NOT_OK(fs_manager.OpenBlock(block_id, &block)); + + gscoped_ptr<CFileReader> reader; + RETURN_NOT_OK(CFileReader::Open(std::move(block), ReaderOptions(), &reader)); + + if (FLAGS_print_meta) { + cout << "Header:\n" << reader->header().DebugString() << endl; + cout << "Footer:\n" << reader->footer().DebugString() << endl; + } + + if (FLAGS_print_rows) { + gscoped_ptr<CFileIterator> it; + RETURN_NOT_OK(reader->NewIterator(&it, CFileReader::DONT_CACHE_BLOCK)); + RETURN_NOT_OK(it->SeekToFirst()); + + RETURN_NOT_OK(DumpIterator(*reader, it.get(), &cout, 0, 0)); + } + + return Status::OK(); +} + } // anonymous namespace unique_ptr<Mode> BuildFsMode() { @@ -77,10 +124,21 @@ unique_ptr<Mode> BuildFsMode() { .AddOptionalParameter("fs_data_dirs") .Build(); + unique_ptr<Action> dump_cfile = + ActionBuilder("dump_cfile", &DumpCFile) + .Description("Dump the contents of a CFile (column file)") + .AddRequiredParameter({ "block_id", "block identifier" }) + .AddOptionalParameter("fs_wal_dir") + .AddOptionalParameter("fs_data_dirs") + .AddOptionalParameter("print_meta") + .AddOptionalParameter("print_rows") + .Build(); + return ModeBuilder("fs") .Description("Operate on a local Kudu filesystem") .AddAction(std::move(format)) .AddAction(std::move(print_uuid)) + .AddAction(std::move(dump_cfile)) .Build(); }
