westonpace commented on code in PR #34420:
URL: https://github.com/apache/arrow/pull/34420#discussion_r1140217730


##########
cpp/src/arrow/filesystem/filesystem.cc:
##########
@@ -738,8 +738,57 @@ Result<std::shared_ptr<FileSystem>> 
FileSystemFromUriReal(const Uri& uri,
   return Status::Invalid("Unrecognized filesystem type in URI: ", uri_string);
 }
 
+Status CheckFileSystem(const FileSystem* filesystem,
+                       const std::string& expected_type_name, const 
std::string& uri) {
+  if (filesystem && filesystem->type_name() != expected_type_name) {
+    return Status::Invalid("URI ", uri, " is not compatible with filesystem of 
type ",
+                           filesystem->type_name());
+  }
+  return Status::OK();
+}
+
 }  // namespace
 
+Result<std::string> PathFromUriOrPath(const FileSystem* filesystem,
+                                      const std::string& uri_string) {
+  if (internal::DetectAbsolutePath(uri_string)) {
+    RETURN_NOT_OK(CheckFileSystem(filesystem, "local", uri_string));
+    // Normalize path separators
+    return ToSlashes(uri_string);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto uri, ParseFileSystemUri(uri_string));
+  const auto scheme = uri.scheme();
+  std::string path;
+  if (scheme == "file") {
+    RETURN_NOT_OK(CheckFileSystem(filesystem, "local", uri_string));
+    RETURN_NOT_OK(LocalFileSystemOptions::FromUri(uri, &path));
+  } else if (scheme == "gs" || scheme == "gcs") {
+    RETURN_NOT_OK(CheckFileSystem(filesystem, "gcs", uri_string));
+#ifdef ARROW_GCS
+    RETURN_NOT_OK(GcsOptions::FromUri(uri, &path));
+#else
+    return Status::NotImplemented("Got GCS URI but Arrow compiled without GCS 
support");
+#endif
+  } else if (scheme == "hdfs" || scheme == "viewfs") {
+    RETURN_NOT_OK(CheckFileSystem(filesystem, "hdfs", uri_string));
+    path = uri.path();
+  } else if (scheme == "s3") {
+    RETURN_NOT_OK(CheckFileSystem(filesystem, "s3", uri_string));
+#ifdef ARROW_S3
+    RETURN_NOT_OK(S3Options::FromUri(uri, &path));

Review Comment:
   For example, `s3://my_bucket/foo?region=us-west-1` and 
`s3://my_bucket/bar?region=us-east-1` shouldn't be compatible with the same 
filesystem.  But would `s3://my_bucket/foo` be compatible with a filesystem 
created earlier with region `us-east-1`?  I would think the answer would be 
yes.  So I don't think we could just call `S3Options::FromUri` and then compare 
the resulting options object with the current.
   
   We would almost need to reimplement `S3Options::FromUri` so that we can 
figure out which parts of the options the URI is actually specifying beyond the 
default and then compare those with the current options.  Would you prefer this 
approach (more code, now potentially need to update two spots for every new 
config, potential to get out of sync) or just go with "yes, these scheme is 
compatible with the filesystem" and live with the potential false positives?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to