davisusanibar commented on code in PR #35034:
URL: https://github.com/apache/arrow/pull/35034#discussion_r1163375000


##########
java/dataset/src/main/cpp/jni_wrapper.cc:
##########
@@ -533,6 +535,59 @@ 
Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory(
   JNI_METHOD_END(-1L)
 }
 
+/*
+ * Class:     org_apache_arrow_dataset_file_JniWrapper
+ * Method:    makeFileSystemDatasetFactory
+ * Signature: ([Ljava/lang/String;II)J
+ */
+JNIEXPORT jlong JNICALL
+Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory___3Ljava_lang_String_2I(
+    JNIEnv* env, jobject, jobjectArray uris, jint file_format_id) {
+  JNI_METHOD_START
+
+  using FsPathPair = std::pair<std::shared_ptr<arrow::fs::FileSystem>, 
std::string>;
+
+  std::shared_ptr<arrow::dataset::FileFormat> file_format =
+      JniGetOrThrow(GetFileFormat(file_format_id));
+  arrow::dataset::FileSystemFactoryOptions options;
+
+  std::vector<std::string> uri_vec = ToStringVector(env, uris);
+
+  // If not all URIs, throw exception
+  if (!std::all_of(uri_vec.begin(), uri_vec.end(), 
arrow::fs::internal::IsLikelyUri)) {
+    JniThrow("All sources must be valid URIs.");
+  }
+
+  std::vector<FsPathPair> filesystems;
+  filesystems.reserve(uri_vec.size());
+  std::transform(uri_vec.begin(), uri_vec.end(), 
std::back_inserter(filesystems),
+    [](const auto& s) -> FsPathPair {
+    std::string output_path;
+    auto fs = JniGetOrThrow(arrow::fs::FileSystemFromUri(s, &output_path));
+    return {fs, output_path};
+  });
+
+  // If all URIs, ensure that they all share a FileSystem type
+  if (std::unique(filesystems.begin(), filesystems.end(),
+        [] (const auto& p1, const auto& p2) {
+          return p1.first->type_name() == p2.first->type_name();
+        }) - filesystems.begin() != 1) {
+    JniThrow("Different filesystems are not supported in a multi-file 
dataset.");

Review Comment:
   Could be possible to add the message that contains detail about what 
different filesystem the user are trying to use?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to