westonpace commented on code in PR #34420:
URL: https://github.com/apache/arrow/pull/34420#discussion_r1140217730
##########
cpp/src/arrow/filesystem/filesystem.cc:
##########
@@ -738,8 +738,57 @@ Result<std::shared_ptr<FileSystem>>
FileSystemFromUriReal(const Uri& uri,
return Status::Invalid("Unrecognized filesystem type in URI: ", uri_string);
}
+Status CheckFileSystem(const FileSystem* filesystem,
+ const std::string& expected_type_name, const
std::string& uri) {
+ if (filesystem && filesystem->type_name() != expected_type_name) {
+ return Status::Invalid("URI ", uri, " is not compatible with filesystem of
type ",
+ filesystem->type_name());
+ }
+ return Status::OK();
+}
+
} // namespace
+Result<std::string> PathFromUriOrPath(const FileSystem* filesystem,
+ const std::string& uri_string) {
+ if (internal::DetectAbsolutePath(uri_string)) {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "local", uri_string));
+ // Normalize path separators
+ return ToSlashes(uri_string);
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto uri, ParseFileSystemUri(uri_string));
+ const auto scheme = uri.scheme();
+ std::string path;
+ if (scheme == "file") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "local", uri_string));
+ RETURN_NOT_OK(LocalFileSystemOptions::FromUri(uri, &path));
+ } else if (scheme == "gs" || scheme == "gcs") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "gcs", uri_string));
+#ifdef ARROW_GCS
+ RETURN_NOT_OK(GcsOptions::FromUri(uri, &path));
+#else
+ return Status::NotImplemented("Got GCS URI but Arrow compiled without GCS
support");
+#endif
+ } else if (scheme == "hdfs" || scheme == "viewfs") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "hdfs", uri_string));
+ path = uri.path();
+ } else if (scheme == "s3") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "s3", uri_string));
+#ifdef ARROW_S3
+ RETURN_NOT_OK(S3Options::FromUri(uri, &path));
Review Comment:
For example, `s3://my_bucket/foo?region=us-west-1` and
`s3://my_bucket/bar?region=us-east-1` shouldn't be compatible with the same
filesystem. But would `s3://my_bucket/foo` be compatible with a filesystem
created earlier with region `us-east-1`? I would think the answer would be
yes. So I don't think we could just call `S3Options::FromUri` and then compare
the resulting options object with the current.
We would almost need to reimplement `S3Options::FromUri` so that we can
figure out which parts of the options the URI is actually specifying beyond the
default and then compare those with the current options. Would you prefer this
approach (more code, now potentially need to update two spots for every new
config, potential to get out of sync) or just go with "yes, these scheme is
compatible with the filesystem" and live with the potential false positives?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]