Copilot commented on code in PR #45998:
URL: https://github.com/apache/arrow/pull/45998#discussion_r3479964258
##########
cpp/src/arrow/filesystem/hdfs.h:
##########
@@ -88,10 +98,48 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
Status DeleteFile(const std::string& path) override;
+ Status MakeDirectory(const std::string& path);
+
+ bool Exists(const std::string& path);
+
+ Status GetPathInfoStatus(const std::string& path, HdfsPathInfo* info);
+
+ Status ListDirectory(const std::string& path, std::vector<HdfsPathInfo>*
listing);
Review Comment:
`HdfsPathInfo` is only forward-declared, but it’s used inside
`std::vector<HdfsPathInfo>` in the public `HadoopFileSystem::ListDirectory`
signature. Instantiating `std::vector<T>` requires `T` to be complete at the
point of declaration, so this header will not compile for downstream users.
Either move the `HdfsPathInfo` definition into this public header (and remove
the duplicate definition from `hdfs_internal.h`), or change these APIs to use
an already-public type (e.g. `FileInfo`) and keep `HdfsPathInfo` internal.
##########
cpp/src/arrow/filesystem/hdfs.cc:
##########
@@ -39,28 +38,115 @@ using util::Uri;
namespace fs {
+#define CHECK_FAILURE(RETURN_VALUE, WHAT) \
+ do { \
+ if (RETURN_VALUE == -1) { \
+ return IOErrorFromErrno(errno, "HDFS ", WHAT, " failed"); \
+ } \
+ } while (0)
+
+static constexpr int kDefaultHdfsBufferSize = 1 << 16;
+
using internal::GetAbstractPathParent;
using internal::MakeAbstractPathRelative;
using internal::RemoveLeadingSlash;
+// ----------------------------------------------------------------------
+// HDFS client
+
+// TODO(wesm): this could throw std::bad_alloc in the course of copying strings
+// into the path info object
+static void SetPathInfo(const hdfsFileInfo* input, HdfsPathInfo* out) {
+ out->kind = input->mKind == kObjectKindFile ? arrow::fs::FileType::File
+ : arrow::fs::FileType::Directory;
+ out->name = std::string(input->mName);
+ out->owner = std::string(input->mOwner);
+ out->group = std::string(input->mGroup);
+
+ out->last_access_time = static_cast<int32_t>(input->mLastAccess);
+ out->last_modified_time = static_cast<int32_t>(input->mLastMod);
+ out->size = static_cast<int64_t>(input->mSize);
+
+ out->replication = input->mReplication;
+ out->block_size = input->mBlockSize;
+
+ out->permissions = input->mPermissions;
+}
+
+namespace {
+
+Status GetPathInfoFailed(const std::string& path) {
+ return IOErrorFromErrno(errno, "Calling GetPathInfo for '", path, "'
failed");
+}
+
+} // namespace
+
class HadoopFileSystem::Impl {
public:
Impl(HdfsOptions options, const io::IOContext& io_context)
- : options_(std::move(options)), io_context_(io_context) {}
+ : options_(std::move(options)),
+ io_context_(io_context),
+ driver_(NULLPTR),
+ port_(0),
+ client_(NULLPTR) {}
~Impl() { ARROW_WARN_NOT_OK(Close(), "Failed to disconnect hdfs client"); }
Status Init() {
- io::internal::LibHdfsShim* driver_shim;
- RETURN_NOT_OK(ConnectLibHdfs(&driver_shim));
- RETURN_NOT_OK(io::HadoopFileSystem::Connect(&options_.connection_config,
&client_));
+ const HdfsConnectionConfig* config = &options_.connection_config;
+ RETURN_NOT_OK(ConnectLibHdfs(&driver_));
Review Comment:
`ConnectLibHdfs()` is declared in `arrow::fs::internal` (see
`arrow/filesystem/hdfs_internal.h`), but here it is called unqualified. This
won’t compile because unqualified lookup in `arrow::fs` won’t find names in the
nested `arrow::fs::internal` namespace.
##########
python/pyarrow/io.pxi:
##########
@@ -32,7 +32,7 @@ import warnings
from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
from queue import Queue, Empty as QueueEmpty
-from pyarrow.lib cimport check_status, HaveLibHdfs
+from pyarrow.lib cimport check_status
Review Comment:
This removes `pyarrow.io.have_libhdfs()`, which was previously part of the
`pyarrow.io` public surface. Even though `pyarrow.have_libhdfs()` is now
provided, existing code importing `have_libhdfs` from `pyarrow.io` will break.
Consider keeping a small compatibility wrapper in `pyarrow.io` (optionally with
a deprecation warning) that forwards to the new location.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]