westonpace commented on code in PR #35440:
URL: https://github.com/apache/arrow/pull/35440#discussion_r1243014753
##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -1870,195 +1731,301 @@ class S3FileSystem::Impl : public
std::enable_shared_from_this<S3FileSystem::Imp
"ListObjectsV2", outcome.GetError());
}
- Status CheckNestingDepth(int32_t nesting_depth) {
- if (nesting_depth >= kMaxNestingDepth) {
- return Status::IOError("S3 filesystem tree exceeds maximum nesting depth
(",
- kMaxNestingDepth, ")");
+ static FileInfo MakeDirectoryInfo(std::string dirname) {
+ FileInfo dir;
+ dir.set_type(FileType::Directory);
+ dir.set_path(dirname);
+ return dir;
+ }
+
+ static std::vector<FileInfo> MakeDirectoryInfos(std::vector<std::string>
dirnames) {
+ std::vector<FileInfo> dir_infos;
+ for (auto& dirname : dirnames) {
+ dir_infos.push_back(MakeDirectoryInfo(std::move(dirname)));
}
- return Status::OK();
+ return dir_infos;
}
- // A helper class for Walk and WalkAsync
- struct FileInfoCollector {
- FileInfoCollector(std::string bucket, std::string key, const FileSelector&
select)
- : bucket(std::move(bucket)),
- key(std::move(key)),
- allow_not_found(select.allow_not_found) {}
+ using FileInfoSink = PushGenerator<std::vector<FileInfo>>::Producer;
+
+ struct FileListerState {
+ FileInfoSink files_queue;
+ bool allow_not_found;
+ int max_recursion;
+ bool include_virtual;
Review Comment:
I've added a comment to `ListAsync`. Imagine there is a file object at path
`A/B/C`. Sometimes there are empty file objects at `A/` and `A/B/`. Sometimes
there are not. From what I've seen, S3 libs differ on whether these should be
created or not. Arrow-c++ does create these files (in MakeDirectory).
However, I don't want to depend on them existing.
Virtual files are included when the user is listing files (`GetFileInfo`).
However, they are not included when the user is deleting files recursively
(since there is no actual file object to delete).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]