This is an automated email from the ASF dual-hosted git repository.
gaurava pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 4ef1d3eef93 HDFS-16472. Make HDFS setrep tool cross platform (#4130)
4ef1d3eef93 is described below
commit 4ef1d3eef9340b59f132421fba443dc456671ee4
Author: Gautham B A <[email protected]>
AuthorDate: Tue Apr 5 22:59:11 2022 +0530
HDFS-16472. Make HDFS setrep tool cross platform (#4130)
* The source files for hdfs_setrep
uses getopt for parsing the
command line arguments.
* getopt is available only on Linux
and thus, isn't cross platform.
* We need to replace getopt
with boost::program_options
to make this tool cross platform.
---
.../native/libhdfspp/tests/tools/CMakeLists.txt | 3 +
.../libhdfspp/tests/tools/hdfs-setrep-mock.cc | 56 ++++++
.../libhdfspp/tests/tools/hdfs-setrep-mock.h | 68 +++++++
.../libhdfspp/tests/tools/hdfs-tool-tests.cc | 24 ++-
.../src/main/native/libhdfspp/tools/CMakeLists.txt | 3 +-
.../libhdfspp/tools/hdfs-setrep/CMakeLists.txt | 27 +++
.../libhdfspp/tools/hdfs-setrep/hdfs-setrep.cc | 220 +++++++++++++++++++++
.../libhdfspp/tools/hdfs-setrep/hdfs-setrep.h | 96 +++++++++
.../native/libhdfspp/tools/hdfs-setrep/main.cc | 52 +++++
.../src/main/native/libhdfspp/tools/hdfs_setrep.cc | 172 ----------------
.../tools/internal/set-replication-state.h | 72 +++++++
11 files changed, 617 insertions(+), 176 deletions(-)
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
index 75c5ad1ff98..bec93439039 100644
---
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt
@@ -38,6 +38,7 @@ add_executable(hdfs_tool_tests
hdfs-get-mock.cc
hdfs-find-mock.cc
hdfs-ls-mock.cc
+ hdfs-setrep-mock.cc
main.cc)
target_include_directories(hdfs_tool_tests PRIVATE
../tools
@@ -60,6 +61,7 @@ target_include_directories(hdfs_tool_tests PRIVATE
../../tools/hdfs-get
../../tools/hdfs-find
../../tools/hdfs-ls
+ ../../tools/hdfs-setrep
../../tools/hdfs-cat)
target_link_libraries(hdfs_tool_tests PRIVATE
gmock_main
@@ -81,5 +83,6 @@ target_link_libraries(hdfs_tool_tests PRIVATE
hdfs_get_lib
hdfs_find_lib
hdfs_ls_lib
+ hdfs_setrep_lib
hdfs_cat_lib)
add_test(hdfs_tool_tests hdfs_tool_tests)
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-setrep-mock.cc
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-setrep-mock.cc
new file mode 100644
index 00000000000..d33f49b6aec
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-setrep-mock.cc
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "hdfs-setrep-mock.h"
+#include "hdfs-tool-tests.h"
+
+namespace hdfs::tools::test {
+SetrepMock::~SetrepMock() = default;
+
+void SetrepMock::SetExpectations(
+ std::function<std::unique_ptr<SetrepMock>()> test_case,
+ const std::vector<std::string> &args) const {
+ // Get the pointer to the function that defines the test case
+ const auto test_case_func =
+ test_case.target<std::unique_ptr<SetrepMock> (*)()>();
+ ASSERT_NE(test_case_func, nullptr);
+
+ // Set the expected method calls and their corresponding arguments for each
+ // test case
+ if (*test_case_func == &CallHelp<SetrepMock>) {
+ EXPECT_CALL(*this, HandleHelp()).Times(1).WillOnce(testing::Return(true));
+ return;
+ }
+
+ if (*test_case_func == &PassPermissionsAndAPath<SetrepMock>) {
+ const auto number = args[0];
+ const auto path = args[1];
+ EXPECT_CALL(*this, HandlePath(path, number))
+ .Times(1)
+ .WillOnce(testing::Return(true));
+ }
+}
+} // namespace hdfs::tools::test
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-setrep-mock.h
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-setrep-mock.h
new file mode 100644
index 00000000000..db1e0960ae0
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-setrep-mock.h
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_TOOLS_HDFS_SETREP_MOCK
+#define LIBHDFSPP_TOOLS_HDFS_SETREP_MOCK
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+
+#include "hdfs-setrep.h"
+
+namespace hdfs::tools::test {
+/**
+ * {@class SetrepMock} is an {@class Setrep} whereby it mocks the
+ * HandleHelp and HandlePath methods for testing their functionality.
+ */
+class SetrepMock : public hdfs::tools::Setrep {
+public:
+ /**
+ * {@inheritdoc}
+ */
+ SetrepMock(const int argc, char **argv) : Setrep(argc, argv) {}
+
+ // Abiding to the Rule of 5
+ SetrepMock(const SetrepMock &) = delete;
+ SetrepMock(SetrepMock &&) = delete;
+ SetrepMock &operator=(const SetrepMock &) = delete;
+ SetrepMock &operator=(SetrepMock &&) = delete;
+ ~SetrepMock() override;
+
+ /**
+ * Defines the methods and the corresponding arguments that are expected
+ * to be called on this instance of {@link HdfsTool} for the given test case.
+ *
+ * @param test_case An {@link std::function} object that points to the
+ * function defining the test case
+ * @param args The arguments that are passed to this test case
+ */
+ void SetExpectations(std::function<std::unique_ptr<SetrepMock>()> test_case,
+ const std::vector<std::string> &args = {}) const;
+
+ MOCK_METHOD(bool, HandleHelp, (), (const, override));
+
+ MOCK_METHOD(bool, HandlePath, (const std::string &, const std::string &),
+ (const, override));
+};
+} // namespace hdfs::tools::test
+
+#endif
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
index 97169293f49..1bdf82f9afd 100644
---
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc
@@ -38,6 +38,7 @@
#include "hdfs-move-to-local-mock.h"
#include "hdfs-rename-snapshot-mock.h"
#include "hdfs-rm-mock.h"
+#include "hdfs-setrep-mock.h"
#include "hdfs-tool-test-fixtures.h"
#include "hdfs-tool-tests.h"
@@ -156,6 +157,11 @@ INSTANTIATE_TEST_SUITE_P(
PassMOptPermissionsAndAPath<hdfs::tools::test::FindMock>,
PassNOptAndAPath<hdfs::tools::test::FindMock>));
+INSTANTIATE_TEST_SUITE_P(
+ HdfsSetrep, HdfsToolBasicTest,
+ testing::Values(CallHelp<hdfs::tools::test::SetrepMock>,
+ PassPermissionsAndAPath<hdfs::tools::test::SetrepMock>));
+
// Negative tests
INSTANTIATE_TEST_SUITE_P(
HdfsAllowSnapshot, HdfsToolNegativeTestThrows,
@@ -245,6 +251,20 @@ INSTANTIATE_TEST_SUITE_P(
PassMOpt<hdfs::tools::test::FindMock>,
PassNOpt<hdfs::tools::test::FindMock>));
+INSTANTIATE_TEST_SUITE_P(
+ HdfsChgrp, HdfsToolNegativeTestThrows,
+ testing::Values(PassNOptAndAPath<hdfs::tools::test::ChgrpMock>));
+
+INSTANTIATE_TEST_SUITE_P(
+ HdfsSetrep, HdfsToolNegativeTestThrows,
+ testing::Values(
+ Pass3Paths<hdfs::tools::test::SetrepMock>,
+ PassRecursiveOwnerAndAPath<hdfs::tools::test::SetrepMock>,
+ PassRecursive<hdfs::tools::test::SetrepMock>,
+ PassMPOptsPermissionsAndAPath<hdfs::tools::test::SetrepMock>,
+ PassMOpt<hdfs::tools::test::SetrepMock>,
+ PassNOpt<hdfs::tools::test::SetrepMock>));
+
INSTANTIATE_TEST_SUITE_P(
HdfsRm, HdfsToolNegativeTestNoThrow,
testing::Values(PassRecursive<hdfs::tools::test::RmMock>));
@@ -302,5 +322,5 @@ INSTANTIATE_TEST_SUITE_P(
testing::Values(PassAPath<hdfs::tools::test::ChgrpMock>));
INSTANTIATE_TEST_SUITE_P(
- HdfsChgrp, HdfsToolNegativeTestThrows,
- testing::Values(PassNOptAndAPath<hdfs::tools::test::ChgrpMock>));
+ HdfsSetrep, HdfsToolNegativeTestNoThrow,
+ testing::Values(PassAPath<hdfs::tools::test::SetrepMock>));
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
index 7cbbe49b558..0d9a684c8ca 100644
---
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt
@@ -64,8 +64,7 @@ add_subdirectory(hdfs-copy-to-local)
add_subdirectory(hdfs-move-to-local)
-add_executable(hdfs_setrep hdfs_setrep.cc)
-target_link_libraries(hdfs_setrep tools_common hdfspp_static)
+add_subdirectory(hdfs-setrep)
add_subdirectory(hdfs-allow-snapshot)
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/CMakeLists.txt
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/CMakeLists.txt
new file mode 100644
index 00000000000..a0d8bafa630
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/CMakeLists.txt
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_library(hdfs_setrep_lib STATIC $<TARGET_OBJECTS:hdfs_tool_obj>
hdfs-setrep.cc)
+target_include_directories(hdfs_setrep_lib PRIVATE ../../tools
${Boost_INCLUDE_DIRS})
+target_link_libraries(hdfs_setrep_lib PRIVATE Boost::boost
Boost::program_options tools_common hdfspp_static)
+
+add_executable(hdfs_setrep main.cc)
+target_include_directories(hdfs_setrep PRIVATE ../../tools)
+target_link_libraries(hdfs_setrep PRIVATE hdfs_setrep_lib)
+
+install(TARGETS hdfs_setrep RUNTIME DESTINATION bin)
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/hdfs-setrep.cc
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/hdfs-setrep.cc
new file mode 100644
index 00000000000..542659b29f1
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/hdfs-setrep.cc
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <future>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+#include "hdfs-setrep.h"
+#include "internal/set-replication-state.h"
+#include "tools_common.h"
+
+namespace hdfs::tools {
+Setrep::Setrep(const int argc, char **argv) : HdfsTool(argc, argv) {}
+
+bool Setrep::Initialize() {
+ auto add_options = opt_desc_.add_options();
+ add_options("help,h",
+ "Changes the replication factor of a file at PATH. If PATH is a "
+ "directory then the command recursively changes the replication "
+ "factor of all files under the directory tree rooted at PATH.");
+ add_options(
+ "replication-factor", po::value<std::string>(),
+ "The replication factor to set for the given path and its children.");
+ add_options("path", po::value<std::string>(),
+ "The path for which the replication factor needs to be set.");
+
+ // We allow only one positional argument to be passed to this tool. An
+ // exception is thrown if multiple arguments are passed.
+ pos_opt_desc_.add("replication-factor", 1);
+ pos_opt_desc_.add("path", 1);
+
+ po::store(po::command_line_parser(argc_, argv_)
+ .options(opt_desc_)
+ .positional(pos_opt_desc_)
+ .run(),
+ opt_val_);
+ po::notify(opt_val_);
+ return true;
+}
+
+bool Setrep::ValidateConstraints() const {
+ // Only "help" is allowed as single argument.
+ if (argc_ == 2) {
+ return opt_val_.count("help");
+ }
+
+ // Rest of the cases must contain more than 2 arguments on the command line.
+ return argc_ > 2;
+}
+
+std::string Setrep::GetDescription() const {
+ std::stringstream desc;
+ desc << "Usage: hdfs_setrep [OPTION] NUM_REPLICAS PATH" << std::endl
+ << std::endl
+ << "Changes the replication factor of a file at PATH. If PATH is a "
+ "directory then the command"
+ << std::endl
+ << "recursively changes the replication factor of all files under the "
+ "directory tree rooted at PATH."
+ << std::endl
+ << std::endl
+ << " -h display this help and exit" << std::endl
+ << std::endl
+ << "Examples:" << std::endl
+ << "hdfs_setrep 5 hdfs://localhost.localdomain:8020/dir/file"
+ << std::endl
+ << "hdfs_setrep 3 /dir1/dir2" << std::endl;
+ return desc.str();
+}
+
+bool Setrep::Do() {
+ if (!Initialize()) {
+ std::cerr << "Unable to initialize HDFS setrep tool" << std::endl;
+ return false;
+ }
+
+ if (!ValidateConstraints()) {
+ std::cout << GetDescription();
+ return false;
+ }
+
+ if (opt_val_.count("help") > 0) {
+ return HandleHelp();
+ }
+
+ if (opt_val_.count("path") > 0 && opt_val_.count("replication-factor") > 0) {
+ const auto replication_factor =
+ opt_val_["replication-factor"].as<std::string>();
+ const auto path = opt_val_["path"].as<std::string>();
+ return HandlePath(path, replication_factor);
+ }
+
+ return false;
+}
+
+bool Setrep::HandleHelp() const {
+ std::cout << GetDescription();
+ return true;
+}
+
+bool Setrep::HandlePath(const std::string &path,
+ const std::string &replication_factor) const {
+ // Building a URI object from the given path.
+ auto uri = hdfs::parse_path_or_exit(path);
+
+ const auto fs = hdfs::doConnect(uri, true);
+ if (!fs) {
+ std::cerr << "Could not connect to the file system." << std::endl;
+ return false;
+ }
+
+ /*
+ * Wrap async FileSystem::SetReplication with promise to make it a blocking
+ * call.
+ */
+ auto promise = std::make_shared<std::promise<hdfs::Status>>();
+ std::future future(promise->get_future());
+ auto handler = [promise](const hdfs::Status &s) { promise->set_value(s); };
+
+ const auto replication = static_cast<uint16_t>(
+ std::strtol(replication_factor.c_str(), nullptr, 8));
+ /*
+ * Allocating shared state, which includes:
+ * replication to be set, handler to be called, request counter, and a
boolean
+ * to keep track if find is done
+ */
+ auto state =
+ std::make_shared<SetReplicationState>(replication, handler, 0, false);
+
+ /*
+ * Keep requesting more from Find until we process the entire listing. Call
+ * handler when Find is done and request counter is 0. Find guarantees that
+ * the handler will only be called once at a time so we do not need locking
in
+ * handler_find.
+ */
+ auto handler_find = [fs, state](const hdfs::Status &status_find,
+ const std::vector<hdfs::StatInfo>
&stat_infos,
+ const bool has_more_results) -> bool {
+ /*
+ * For each result returned by Find we call async SetReplication with the
+ * handler below. SetReplication DOES NOT guarantee that the handler will
+ * only be called once at a time, so we DO need locking in
+ * handler_set_replication.
+ */
+ auto handler_set_replication =
+ [state](const hdfs::Status &status_set_replication) {
+ std::lock_guard guard(state->lock);
+
+ // Decrement the counter once since we are done with this async call.
+ if (!status_set_replication.ok() && state->status.ok()) {
+ // We make sure we set state->status only on the first error.
+ state->status = status_set_replication;
+ }
+ state->request_counter--;
+ if (state->request_counter == 0 && state->find_is_done) {
+ state->handler(state->status); // Exit.
+ }
+ };
+ if (!stat_infos.empty() && state->status.ok()) {
+ for (hdfs::StatInfo const &stat_info : stat_infos) {
+ // Launch an asynchronous call to SetReplication for every returned
+ // file.
+ if (stat_info.file_type == hdfs::StatInfo::IS_FILE) {
+ state->request_counter++;
+ fs->SetReplication(stat_info.full_path, state->replication,
+ handler_set_replication);
+ }
+ }
+ }
+
+ /*
+ * Lock this section because handlerSetReplication might be accessing the
+ * same shared variables simultaneously.
+ */
+ std::lock_guard guard(state->lock);
+ if (!status_find.ok() && state->status.ok()) {
+ // We make sure we set state->status only on the first error.
+ state->status = status_find;
+ }
+ if (!has_more_results) {
+ state->find_is_done = true;
+ if (state->request_counter == 0) {
+ state->handler(state->status); // Exit.
+ }
+ return false;
+ }
+ return true;
+ };
+
+ // Asynchronous call to Find.
+ fs->Find(uri.get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(),
+ handler_find);
+
+ // Block until promise is set.
+ const auto status = future.get();
+ if (!status.ok()) {
+ std::cerr << "Error: " << status.ToString() << std::endl;
+ return false;
+ }
+ return true;
+}
+} // namespace hdfs::tools
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/hdfs-setrep.h
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/hdfs-setrep.h
new file mode 100644
index 00000000000..20ee7405b6d
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/hdfs-setrep.h
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_TOOLS_HDFS_SETREP
+#define LIBHDFSPP_TOOLS_HDFS_SETREP
+
+#include <string>
+
+#include <boost/program_options.hpp>
+
+#include "hdfs-tool.h"
+
+namespace hdfs::tools {
+/**
+ * {@class Setrep} is an {@class HdfsTool} that changes the replication factor
+ * of a file at a given path. If the path is a directory, then it recursively
+ * changes the replication factor of all files under the directory tree rooted
+ * at the given path.
+ */
+class Setrep : public HdfsTool {
+public:
+ /**
+ * {@inheritdoc}
+ */
+ Setrep(int argc, char **argv);
+
+ // Abiding to the Rule of 5
+ Setrep(const Setrep &) = default;
+ Setrep(Setrep &&) = default;
+ Setrep &operator=(const Setrep &) = delete;
+ Setrep &operator=(Setrep &&) = delete;
+ ~Setrep() override = default;
+
+ /**
+ * {@inheritdoc}
+ */
+ [[nodiscard]] std::string GetDescription() const override;
+
+ /**
+ * {@inheritdoc}
+ */
+ [[nodiscard]] bool Do() override;
+
+protected:
+ /**
+ * {@inheritdoc}
+ */
+ [[nodiscard]] bool Initialize() override;
+
+ /**
+ * {@inheritdoc}
+ */
+ [[nodiscard]] bool ValidateConstraints() const override;
+
+ /**
+ * {@inheritdoc}
+ */
+ [[nodiscard]] bool HandleHelp() const override;
+
+ /**
+ * Handle the path argument that's passed to this tool.
+ *
+ * @param path The path to the directory for which we need setrep info.
+ * @param replication_factor The replication factor to set to given path and
+ * its children.
+ *
+ * @return A boolean indicating the result of this operation.
+ */
+ [[nodiscard]] virtual bool
+ HandlePath(const std::string &path,
+ const std::string &replication_factor) const;
+
+private:
+ /**
+ * A boost data-structure containing the description of positional arguments
+ * passed to the command-line.
+ */
+ po::positional_options_description pos_opt_desc_;
+};
+} // namespace hdfs::tools
+#endif
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/main.cc
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/main.cc
new file mode 100644
index 00000000000..a3d8399c575
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-setrep/main.cc
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdlib>
+#include <exception>
+#include <iostream>
+
+#include <google/protobuf/stubs/common.h>
+
+#include "hdfs-setrep.h"
+
+int main(int argc, char *argv[]) {
+ const auto result = std::atexit([]() -> void {
+ // Clean up static data on exit and prevent valgrind memory leaks
+ google::protobuf::ShutdownProtobufLibrary();
+ });
+ if (result != 0) {
+ std::cerr << "Error: Unable to schedule clean-up tasks for HDFS setrep "
+ "tool, exiting"
+ << std::endl;
+ std::exit(EXIT_FAILURE);
+ }
+
+ hdfs::tools::Setrep setrep(argc, argv);
+ auto success = false;
+
+ try {
+ success = setrep.Do();
+ } catch (const std::exception &e) {
+ std::cerr << "Error: " << e.what() << std::endl;
+ }
+
+ if (!success) {
+ std::exit(EXIT_FAILURE);
+ }
+ return 0;
+}
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_setrep.cc
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_setrep.cc
deleted file mode 100644
index 019e24d63fe..00000000000
---
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_setrep.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
-*/
-
-#include <google/protobuf/stubs/common.h>
-#include <unistd.h>
-#include <future>
-#include "tools_common.h"
-
-void usage(){
- std::cout << "Usage: hdfs_setrep [OPTION] NUM_REPLICAS PATH"
- << std::endl
- << std::endl << "Changes the replication factor of a file at PATH. If
PATH is a directory then the command"
- << std::endl << "recursively changes the replication factor of all files
under the directory tree rooted at PATH."
- << std::endl
- << std::endl << " -h display this help and exit"
- << std::endl
- << std::endl << "Examples:"
- << std::endl << "hdfs_setrep 5
hdfs://localhost.localdomain:8020/dir/file"
- << std::endl << "hdfs_setrep 3 /dir1/dir2"
- << std::endl;
-}
-
-struct SetReplicationState {
- const uint16_t replication;
- const std::function<void(const hdfs::Status &)> handler;
- //The request counter is incremented once every time SetReplication async
call is made
- uint64_t request_counter;
- //This boolean will be set when find returns the last result
- bool find_is_done;
- //Final status to be returned
- hdfs::Status status;
- //Shared variables will need protection with a lock
- std::mutex lock;
- SetReplicationState(const uint16_t replication_, const
std::function<void(const hdfs::Status &)> & handler_,
- uint64_t request_counter_, bool find_is_done_)
- : replication(replication_),
- handler(handler_),
- request_counter(request_counter_),
- find_is_done(find_is_done_),
- status(),
- lock() {
- }
-};
-
-int main(int argc, char *argv[]) {
- //We should have 3 or 4 parameters
- if (argc < 3) {
- usage();
- exit(EXIT_FAILURE);
- }
-
- int input;
-
- //Using GetOpt to read in the values
- opterr = 0;
- while ((input = getopt(argc, argv, "h")) != -1) {
- switch (input)
- {
- case 'h':
- usage();
- exit(EXIT_SUCCESS);
- case '?':
- if (isprint(optopt))
- std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
- else
- std::cerr << "Unknown option character `" << (char) optopt << "'." <<
std::endl;
- usage();
- exit(EXIT_FAILURE);
- default:
- exit(EXIT_FAILURE);
- }
- }
- std::string repl = argv[optind];
- std::string uri_path = argv[optind + 1];
-
- //Building a URI object from the given uri_path
- hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
-
- std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
- if (!fs) {
- std::cerr << "Could not connect the file system. " << std::endl;
- exit(EXIT_FAILURE);
- }
-
- /* wrap async FileSystem::SetReplication with promise to make it a blocking
call */
- std::shared_ptr<std::promise<hdfs::Status>> promise =
std::make_shared<std::promise<hdfs::Status>>();
- std::future<hdfs::Status> future(promise->get_future());
- auto handler = [promise](const hdfs::Status &s) {
- promise->set_value(s);
- };
-
- uint16_t replication = std::stoi(repl.c_str(), NULL, 8);
- //Allocating shared state, which includes:
- //replication to be set, handler to be called, request counter, and a
boolean to keep track if find is done
- std::shared_ptr<SetReplicationState> state =
std::make_shared<SetReplicationState>(replication, handler, 0, false);
-
- // Keep requesting more from Find until we process the entire listing. Call
handler when Find is done and reques counter is 0.
- // Find guarantees that the handler will only be called once at a time so we
do not need locking in handlerFind.
- auto handlerFind = [fs, state](const hdfs::Status &status_find, const
std::vector<hdfs::StatInfo> & stat_infos, bool has_more_results) -> bool {
-
- //For each result returned by Find we call async SetReplication with the
handler below.
- //SetReplication DOES NOT guarantee that the handler will only be called
once at a time, so we DO need locking in handlerSetReplication.
- auto handlerSetReplication = [state](const hdfs::Status
&status_set_replication) {
- std::lock_guard<std::mutex> guard(state->lock);
-
- //Decrement the counter once since we are done with this async call
- if (!status_set_replication.ok() && state->status.ok()){
- //We make sure we set state->status only on the first error.
- state->status = status_set_replication;
- }
- state->request_counter--;
- if(state->request_counter == 0 && state->find_is_done){
- state->handler(state->status); //exit
- }
- };
- if(!stat_infos.empty() && state->status.ok()) {
- for (hdfs::StatInfo const& s : stat_infos) {
- //Launch an asynchronous call to SetReplication for every returned file
- if(s.file_type == hdfs::StatInfo::IS_FILE){
- state->request_counter++;
- fs->SetReplication(s.full_path, state->replication,
handlerSetReplication);
- }
- }
- }
-
- //Lock this section because handlerSetReplication might be accessing the
same
- //shared variables simultaneously
- std::lock_guard<std::mutex> guard(state->lock);
- if (!status_find.ok() && state->status.ok()){
- //We make sure we set state->status only on the first error.
- state->status = status_find;
- }
- if(!has_more_results){
- state->find_is_done = true;
- if(state->request_counter == 0){
- state->handler(state->status); //exit
- }
- return false;
- }
- return true;
- };
-
- //Asynchronous call to Find
- fs->Find(uri.get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(),
handlerFind);
-
- /* block until promise is set */
- hdfs::Status status = future.get();
- if (!status.ok()) {
- std::cerr << "Error: " << status.ToString() << std::endl;
- exit(EXIT_FAILURE);
- }
-
- // Clean up static data and prevent valgrind memory leaks
- google::protobuf::ShutdownProtobufLibrary();
- return 0;
-}
diff --git
a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/internal/set-replication-state.h
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/internal/set-replication-state.h
new file mode 100644
index 00000000000..5d432eddbf7
--- /dev/null
+++
b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/internal/set-replication-state.h
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_TOOLS_HDFS_SET_REPLICATION_STATE
+#define LIBHDFSPP_TOOLS_HDFS_SET_REPLICATION_STATE
+
+#include <functional>
+#include <mutex>
+
+#include "hdfspp/hdfspp.h"
+
+namespace hdfs::tools {
+/**
+ * {@class SetReplicationState} helps in handling the intermediate results
while
+ * running {@link Setrep}.
+ */
+struct SetReplicationState {
+ SetReplicationState(const uint16_t replication,
+ std::function<void(const hdfs::Status &)> handler,
+ const uint64_t request_counter, const bool find_is_done)
+ : replication{replication}, handler{std::move(handler)},
+ request_counter{request_counter}, find_is_done{find_is_done} {}
+
+ /**
+ * The replication factor.
+ */
+ const uint16_t replication;
+
+ /**
+ * Handle the given {@link hdfs::Status}.
+ */
+ const std::function<void(const hdfs::Status &)> handler;
+
+ /**
+ * The request counter is incremented once every time SetReplication async
+ * call is made.
+ */
+ uint64_t request_counter;
+
+ /**
+ * This boolean will be set when find returns the last result.
+ */
+ bool find_is_done;
+
+ /**
+ * Final status to be returned.
+ */
+ hdfs::Status status;
+
+ /**
+ * Shared variables will need protection with a lock.
+ */
+ std::mutex lock;
+};
+} // namespace hdfs::tools
+
+#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]