lidavidm commented on code in PR #34420:
URL: https://github.com/apache/arrow/pull/34420#discussion_r1140277541
##########
cpp/src/arrow/filesystem/filesystem.cc:
##########
@@ -738,8 +738,57 @@ Result<std::shared_ptr<FileSystem>>
FileSystemFromUriReal(const Uri& uri,
return Status::Invalid("Unrecognized filesystem type in URI: ", uri_string);
}
+Status CheckFileSystem(const FileSystem* filesystem,
+ const std::string& expected_type_name, const
std::string& uri) {
+ if (filesystem && filesystem->type_name() != expected_type_name) {
+ return Status::Invalid("URI ", uri, " is not compatible with filesystem of
type ",
+ filesystem->type_name());
+ }
+ return Status::OK();
+}
+
} // namespace
+Result<std::string> PathFromUriOrPath(const FileSystem* filesystem,
+ const std::string& uri_string) {
+ if (internal::DetectAbsolutePath(uri_string)) {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "local", uri_string));
+ // Normalize path separators
+ return ToSlashes(uri_string);
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto uri, ParseFileSystemUri(uri_string));
+ const auto scheme = uri.scheme();
+ std::string path;
+ if (scheme == "file") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "local", uri_string));
+ RETURN_NOT_OK(LocalFileSystemOptions::FromUri(uri, &path));
+ } else if (scheme == "gs" || scheme == "gcs") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "gcs", uri_string));
+#ifdef ARROW_GCS
+ RETURN_NOT_OK(GcsOptions::FromUri(uri, &path));
+#else
+ return Status::NotImplemented("Got GCS URI but Arrow compiled without GCS
support");
+#endif
+ } else if (scheme == "hdfs" || scheme == "viewfs") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "hdfs", uri_string));
+ path = uri.path();
+ } else if (scheme == "s3") {
+ RETURN_NOT_OK(CheckFileSystem(filesystem, "s3", uri_string));
+#ifdef ARROW_S3
+ RETURN_NOT_OK(S3Options::FromUri(uri, &path));
Review Comment:
More the latter - basically, enough to perhaps parse the URI into a path and
validate the filesystem type (as we do here). I think a lot of that other stuff
gets into application-specific behavior fast, and I'd rather not maintain it
here (unless there's a clear need). So I think we should just be clear here
about what we do and do not promise.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]