ianmcook commented on code in PR #35034: URL: https://github.com/apache/arrow/pull/35034#discussion_r1189162195
########## java/dataset/src/main/cpp/jni_wrapper.cc: ########## @@ -533,6 +535,59 @@ Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory( JNI_METHOD_END(-1L) } +/* + * Class: org_apache_arrow_dataset_file_JniWrapper + * Method: makeFileSystemDatasetFactory + * Signature: ([Ljava/lang/String;II)J + */ +JNIEXPORT jlong JNICALL +Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory___3Ljava_lang_String_2I( + JNIEnv* env, jobject, jobjectArray uris, jint file_format_id) { + JNI_METHOD_START + + using FsPathPair = std::pair<std::shared_ptr<arrow::fs::FileSystem>, std::string>; + + std::shared_ptr<arrow::dataset::FileFormat> file_format = + JniGetOrThrow(GetFileFormat(file_format_id)); + arrow::dataset::FileSystemFactoryOptions options; + + std::vector<std::string> uri_vec = ToStringVector(env, uris); + + // If not all URIs, throw exception + if (!std::all_of(uri_vec.begin(), uri_vec.end(), arrow::fs::internal::IsLikelyUri)) { + JniThrow("All sources must be valid URIs."); + } + + std::vector<FsPathPair> filesystems; + filesystems.reserve(uri_vec.size()); + std::transform(uri_vec.begin(), uri_vec.end(), std::back_inserter(filesystems), + [](const auto& s) -> FsPathPair { + std::string output_path; + auto fs = JniGetOrThrow(arrow::fs::FileSystemFromUri(s, &output_path)); + return {fs, output_path}; + }); + + // If all URIs, ensure that they all share a FileSystem type + if (std::unique(filesystems.begin(), filesystems.end(), + [] (const auto& p1, const auto& p2) { + return p1.first->type_name() == p2.first->type_name(); + }) - filesystems.begin() != 1) { + JniThrow("Different filesystems are not supported in a multi-file dataset."); Review Comment: @NoahFournier FYI https://github.com/apache/arrow/pull/34420 is merged now -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org