Copilot commented on code in PR #50128:
URL: https://github.com/apache/arrow/pull/50128#discussion_r3374599906


##########
cpp/src/arrow/filesystem/filesystem.h:
##########
@@ -359,11 +361,34 @@ class ARROW_EXPORT FileSystem
 
 struct FileSystemFactory {
   std::function<Result<std::shared_ptr<FileSystem>>(
-      const Uri& uri, const io::IOContext& io_context, std::string* out_path)>
+      const Uri& uri, const std::vector<std::pair<std::string, std::any>>& 
options,
+      const io::IOContext& io_context, std::string* out_path)>
       function;
   std::string_view file;
   int line;
 
+  /// Construct from an options-aware factory function.
+  FileSystemFactory(std::function<Result<std::shared_ptr<FileSystem>>(
+                        const Uri&, const std::vector<std::pair<std::string, 
std::any>>&,
+                        const io::IOContext&, std::string*)>
+                        fn,
+                    std::string_view file, int line)
+      : function(std::move(fn)), file(file), line(line) {}
+
+  /// Construct from a non-options aware factory function maintaining source 
compatibility
+  /// with existing factories.
+  FileSystemFactory(std::function<Result<std::shared_ptr<FileSystem>>(
+                        const Uri&, const io::IOContext&, std::string*)>
+                        fn,
+                    std::string_view file, int line)
+      : function([fn = std::move(fn)](
+                     const Uri& uri,
+                     const std::vector<std::pair<std::string, std::any>>& 
/*ignored*/,
+                     const io::IOContext& ctx,
+                     std::string* out_path) { return fn(uri, ctx, out_path); 
}),

Review Comment:
   The compatibility constructor for non-options-aware factories currently 
ignores the provided options and forwards the call to the legacy factory 
unconditionally. This can silently drop backend-specific options (potentially 
including credentials), which defeats the purpose of the options channel and is 
unsafe/surprising for callers.



##########
cpp/src/arrow/testing/examplefs.cc:
##########
@@ -26,12 +29,31 @@ namespace arrow::fs {
 
 auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM(
     "example",
-    [](const Uri& uri, const io::IOContext& io_context,
+    [](const Uri& uri, const std::vector<std::pair<std::string, std::any>>& 
options,
+       const io::IOContext& io_context,
        std::string* out_path) -> Result<std::shared_ptr<FileSystem>> {
       constexpr std::string_view kScheme = "example";
       EXPECT_EQ(uri.scheme(), kScheme);
       auto local_uri = "file" + uri.ToString().substr(kScheme.size());
-      return FileSystemFromUri(local_uri, io_context, out_path);
+      ARROW_ASSIGN_OR_RAISE(auto fs,
+                            FileSystemFromUri(local_uri, options, io_context, 
out_path));

Review Comment:
   The example filesystem factory forwards the full backend-specific options 
list to the nested "file" filesystem. Options are scheme-specific, so passing 
them to a different scheme can cause unintended failures (once "file" starts 
rejecting options) or accidental option leakage. The example factory should 
consume its own options and call the underlying filesystem with an empty option 
list.



##########
cpp/src/arrow/filesystem/filesystem.h:
##########
@@ -547,6 +572,31 @@ ARROW_EXPORT
 Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
                                                       std::string* out_path = 
NULLPTR);
 
+/// \brief Create a new FileSystem by URI with extended backend-specific 
filesystem
+/// options
+///
+/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
+/// "gs" and "gcs".
+///
+/// Support for other schemes can be added using RegisterFileSystemFactory.
+///
+/// \param[in] uri the URI to give access to
+/// \param[in] options a list of backend-specific filesystem options
+///            Each option is a (name, value) pair.
+///            The expected type is specific to the backend and
+///            option name.
+///            Options are forwarded to schemes dispatched through a registered
+///            FileSystemFactory. A registered factory that does not support 
options
+///            (currently "s3") returns NotImplemented for non-empty options.
+///            Schemes not handled by a registered factory will also return
+///            NotImplemented for non-empty options.

Review Comment:
   The docstring still claims that the registered factory that does not support 
options is "currently \"s3\"", but this PR makes the S3 factory consume 
options. The comment should be updated to avoid misleading API users about 
which schemes accept/reject options.



##########
cpp/src/arrow/filesystem/filesystem.h:
##########
@@ -563,6 +613,32 @@ Result<std::shared_ptr<FileSystem>> 
FileSystemFromUri(const std::string& uri,
                                                       const io::IOContext& 
io_context,
                                                       std::string* out_path = 
NULLPTR);
 
+/// \brief Create a new FileSystem by URI with a custom IO context with 
backend-specific
+/// filesystem options
+///
+/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
+/// "gs" and "gcs".
+///
+/// Support for other schemes can be added using RegisterFileSystemFactory.
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[in] options a list of backend-specific filesystem options
+///            Each option is a (name, value) pair.
+///            The expected type is specific to the backend and
+///            option name.
+///            Options are forwarded to schemes dispatched through a registered
+///            FileSystemFactory. A registered factory that does not support 
options
+///            (currently "s3") returns NotImplemented for non-empty options.
+///            Schemes not handled by a registered factory will also return
+///            NotImplemented for non-empty options.

Review Comment:
   Same as above: this docstring says the only non-options-aware registered 
factory is "currently \"s3\"", which becomes inaccurate once S3 consumes 
options. Update the comment so it describes the general behavior rather than 
naming a specific scheme.



##########
cpp/src/arrow/filesystem/s3fs.cc:
##########
@@ -3603,11 +3649,19 @@ Result<std::string> ResolveS3BucketRegion(const 
std::string& bucket) {
 
 auto kS3FileSystemModule = ARROW_REGISTER_FILESYSTEM(
     "s3",
-    [](const arrow::util::Uri& uri, const io::IOContext& io_context,
+    [](const arrow::util::Uri& uri,
+       const std::vector<std::pair<std::string, std::any>>& options,
+       const io::IOContext& io_context,
        std::string* out_path) -> Result<std::shared_ptr<fs::FileSystem>> {
+      /*if (!options.empty()) {
+        return Status::NotImplemented(
+            "S3 filesystem factory options are not supported yet, got: ", 
options.size(),
+            " options");
+      }*/

Review Comment:
   There is a commented-out NotImplemented block for S3 factory options that is 
now obsolete (the factory uses S3Options::FromUriAndOptions). Leaving dead 
commented code makes the intent unclear and adds noise.



##########
cpp/src/arrow/testing/examplefs.cc:
##########
@@ -26,12 +29,31 @@ namespace arrow::fs {
 
 auto kExampleFileSystemModule = ARROW_REGISTER_FILESYSTEM(
     "example",
-    [](const Uri& uri, const io::IOContext& io_context,
+    [](const Uri& uri, const std::vector<std::pair<std::string, std::any>>& 
options,
+       const io::IOContext& io_context,
        std::string* out_path) -> Result<std::shared_ptr<FileSystem>> {
       constexpr std::string_view kScheme = "example";
       EXPECT_EQ(uri.scheme(), kScheme);
       auto local_uri = "file" + uri.ToString().substr(kScheme.size());
-      return FileSystemFromUri(local_uri, io_context, out_path);
+      ARROW_ASSIGN_OR_RAISE(auto fs,
+                            FileSystemFromUri(local_uri, options, io_context, 
out_path));
+      for (const auto& [key, value] : options) {
+        EXPECT_TRUE(value.has_value());
+        if (key == "example_option_string") {
+          EXPECT_TRUE(value.type() == typeid(std::string));
+          if (out_path != nullptr) {
+            *out_path += "/" + std::any_cast<std::string>(value);
+          }
+        } else if (key == "example_option_int") {
+          EXPECT_TRUE(value.type() == typeid(int));
+          if (out_path != nullptr) {
+            *out_path += "/" + std::to_string(std::any_cast<int>(value));
+          }

Review Comment:
   The option type checks use EXPECT_TRUE, but the code then unconditionally 
calls std::any_cast<>() which will throw (and likely crash the test) if the 
type is wrong. Since these checks are validating input types, they should be 
ASSERT_* (or use std::any_cast<T>(&) and check for nullptr) before any_cast by 
value.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to