lidavidm commented on code in PR #34227:
URL: https://github.com/apache/arrow/pull/34227#discussion_r1163972503


##########
java/dataset/src/main/cpp/jni_wrapper.cc:
##########
@@ -261,6 +264,50 @@ void JNI_OnUnload(JavaVM* vm, void* reserved) {
   default_memory_pool_id = -1L;
 }
 
+/// Unpack the named tables passed through JNI.
+///
+/// Named tables are encoded as a string array, where every two elements 
+/// encode (1) the table name and (2) the address of an ArrowArrayStream
+/// containing the table data.  This function will eagerly read all
+/// tables into Tables.
+std::unordered_map<std::string, std::shared_ptr<arrow::Table>> 
LoadNamedTables(JNIEnv* env, jobjectArray& str_array) {
+  std::unordered_map<std::string, std::shared_ptr<arrow::Table>> 
map_table_to_record_batch_reader;
+  int length = env->GetArrayLength(str_array);
+  if (length % 2 != 0) {
+    JniThrow("Can not map odd number of array elements to key/value pairs");
+  }
+  std::shared_ptr<arrow::Table> output_table;
+  for (int pos = 0; pos < length; pos++) {
+    auto j_string_key = 
reinterpret_cast<jstring>(env->GetObjectArrayElement(str_array, pos));
+    pos++;
+    auto j_string_value = 
reinterpret_cast<jstring>(env->GetObjectArrayElement(str_array, pos));
+    long memory_address = 0;
+    try {
+      memory_address = std::stol(JStringToCString(env, j_string_value));
+    } catch (...) {
+      JniThrow("Failed to parse memory address from string value");
+    }
+    auto* arrow_stream_in = 
reinterpret_cast<ArrowArrayStream*>(memory_address);
+    std::shared_ptr<arrow::RecordBatchReader> readerIn = 
JniGetOrThrow(arrow::ImportRecordBatchReader(arrow_stream_in));
+    output_table = JniGetOrThrow(readerIn->ToTable());
+    map_table_to_record_batch_reader[JStringToCString(env, j_string_key)] = 
output_table;
+  }
+  return map_table_to_record_batch_reader;
+}
+
+/// Find the arrow Table associated with a given table name
+std::shared_ptr<arrow::Table> GetTableByName(const std::vector<std::string>& 
names,
+    std::unordered_map<std::string, std::shared_ptr<arrow::Table>> 
map_table_to_reader) {
+  std::shared_ptr<arrow::Table> output_table;
+  for (const auto& name : names) {
+    output_table = map_table_to_reader[name];
+    if (output_table == nullptr) {
+      JniThrow("Table name " + name + " is needed to execute the Substrait 
plan");
+    }
+  }
+  return output_table;
+}

Review Comment:
   I've explained already: the implementation here makes no sense. Why are we 
looping through a list of names and just discarding the lookups? Both this code 
and the test case you linked to are not right. Substrait is trying to represent 
a hierarchical name like "catalog.schema.table". We don't support that, so as I 
already explained, the code should instead check that the names array is 
exactly of length 1, and only do a single lookup. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to