ianmcook commented on code in PR #35034:
URL: https://github.com/apache/arrow/pull/35034#discussion_r1189162195


##########
java/dataset/src/main/cpp/jni_wrapper.cc:
##########
@@ -533,6 +535,59 @@ 
Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory(
   JNI_METHOD_END(-1L)
 }
 
+/*
+ * Class:     org_apache_arrow_dataset_file_JniWrapper
+ * Method:    makeFileSystemDatasetFactory
+ * Signature: ([Ljava/lang/String;II)J
+ */
+JNIEXPORT jlong JNICALL
+Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory___3Ljava_lang_String_2I(
+    JNIEnv* env, jobject, jobjectArray uris, jint file_format_id) {
+  JNI_METHOD_START
+
+  using FsPathPair = std::pair<std::shared_ptr<arrow::fs::FileSystem>, 
std::string>;
+
+  std::shared_ptr<arrow::dataset::FileFormat> file_format =
+      JniGetOrThrow(GetFileFormat(file_format_id));
+  arrow::dataset::FileSystemFactoryOptions options;
+
+  std::vector<std::string> uri_vec = ToStringVector(env, uris);
+
+  // If not all URIs, throw exception
+  if (!std::all_of(uri_vec.begin(), uri_vec.end(), 
arrow::fs::internal::IsLikelyUri)) {
+    JniThrow("All sources must be valid URIs.");
+  }
+
+  std::vector<FsPathPair> filesystems;
+  filesystems.reserve(uri_vec.size());
+  std::transform(uri_vec.begin(), uri_vec.end(), 
std::back_inserter(filesystems),
+    [](const auto& s) -> FsPathPair {
+    std::string output_path;
+    auto fs = JniGetOrThrow(arrow::fs::FileSystemFromUri(s, &output_path));
+    return {fs, output_path};
+  });
+
+  // If all URIs, ensure that they all share a FileSystem type
+  if (std::unique(filesystems.begin(), filesystems.end(),
+        [] (const auto& p1, const auto& p2) {
+          return p1.first->type_name() == p2.first->type_name();
+        }) - filesystems.begin() != 1) {
+    JniThrow("Different filesystems are not supported in a multi-file 
dataset.");

Review Comment:
   @NoahFournier FYI https://github.com/apache/arrow/pull/34420 is merged now



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to