davisusanibar commented on code in PR #35570:
URL: https://github.com/apache/arrow/pull/35570#discussion_r1218432494
##########
java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java:
##########
@@ -204,4 +206,132 @@ public void testRunBinaryQueryNamedTableNation() throws
Exception {
}
}
}
+
+ @Test
+ public void testDeserializeExtendedExpressions() {
+ // Extended Expression 01 (`add` `2` to column `id`): id + 2
+ // Extended Expression 02 (`concatenate` column `name` || column `name`):
name || name
+ // Extended Expression 03 (`filter` 'id' < 20): id < 20
+ // Extended expression result: [add_two_to_column_a, add(FieldPath(0), 2),
+ // concat_column_a_and_b, binary_join_element_wise(FieldPath(1),
FieldPath(1), ""),
+ // filter_one, (FieldPath(0) < 20)]
+ String binaryExtendedExpressions =
"Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwKHggCEhovZnVuY3Rpb25zX2NvbXBhcmlz" +
+
"b24ueWFtbBIRGg8IARoLYWRkOmkzMl9pMzISFBoSCAIQARoMY29uY2F0OnZjaGFyEhIaEAgCEAIaCmx0OmFueV9hbnkaMQoaGhgaBCoCEAE"
+
+
"iCBoGEgQKAhIAIgYaBAoCKAIaE2FkZF90d29fdG9fY29sdW1uX2EaOwoiGiAIARoEYgIQASIKGggSBgoEEgIIASIKGggSBgoEEgIIARoVY2"
+
+
"9uY2F0X2NvbHVtbl9hX2FuZF9iGjcKHBoaCAIaBAoCEAEiCBoGEgQKAhIAIgYaBAoCKBQaF2ZpbHRlcl9pZF9sb3dlcl90aGFuXzIwIhoKA"
+
+ "klECgROQU1FEg4KBCoCEAEKBGICEAEYAg==";
+ // get binary plan
+ byte[] expression = Base64.getDecoder().decode(binaryExtendedExpressions);
+ ByteBuffer substraitExpression =
ByteBuffer.allocateDirect(expression.length);
+ substraitExpression.put(expression);
+ // deserialize extended expression
+ List<String> extededExpressionList =
+ new
AceroSubstraitConsumer(rootAllocator()).runDeserializeExpressions(substraitExpression);
+ assertEquals(3, extededExpressionList.size() / 2);
+ assertEquals("add_two_to_column_a", extededExpressionList.get(0));
+ assertEquals("add(FieldPath(0), 2)", extededExpressionList.get(1));
+ assertEquals("concat_column_a_and_b", extededExpressionList.get(2));
+ assertEquals("binary_join_element_wise(FieldPath(1), FieldPath(1), \"\")",
extededExpressionList.get(3));
+ assertEquals("filter_id_lower_than_20", extededExpressionList.get(4));
+ assertEquals("(FieldPath(0) < 20)", extededExpressionList.get(5));
+ }
+
+ @Test
+ public void testBaseParquetReadWithExtendedExpressionsProjectAndFilter()
throws Exception {
+ // Extended Expression 01 (`add` `2` to column `id`): id + 2
+ // Extended Expression 02 (`concatenate` column `name` || column `name`):
name || name
+ // Extended Expression 03 (`filter` 'id' < 20): id < 20
+ // Extended expression result: [add_two_to_column_a, add(FieldPath(0), 2),
+ // concat_column_a_and_b, binary_join_element_wise(FieldPath(1),
FieldPath(1), ""),
+ // filter_one, (FieldPath(0) < 20)]
+ // Base64.getEncoder().encodeToString(plan.toByteArray()): Generated
throughout Substrait POJO Extended Expressions
+ String binaryExtendedExpressions =
"Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwKHggCEhovZnVuY3Rpb25zX2NvbXBhcmlz" +
+
"b24ueWFtbBIRGg8IARoLYWRkOmkzMl9pMzISFBoSCAIQARoMY29uY2F0OnZjaGFyEhIaEAgCEAIaCmx0OmFueV9hbnkaMQoaGhgaBCoCEAE"
+
+
"iCBoGEgQKAhIAIgYaBAoCKAIaE2FkZF90d29fdG9fY29sdW1uX2EaOwoiGiAIARoEYgIQASIKGggSBgoEEgIIASIKGggSBgoEEgIIARoVY2"
+
+
"9uY2F0X2NvbHVtbl9hX2FuZF9iGjcKHBoaCAIaBAoCEAEiCBoGEgQKAhIAIgYaBAoCKBQaF2ZpbHRlcl9pZF9sb3dlcl90aGFuXzIwIhoKA"
+
+ "klECgROQU1FEg4KBCoCEAEKBGICEAEYAg==";
+ Map<String, String> metadataSchema = new HashMap<>();
+ metadataSchema.put("parquet.avro.schema",
"{\"type\":\"record\",\"name\":\"Users\"," +
+
"\"namespace\":\"org.apache.arrow.dataset\",\"fields\":[{\"name\":\"id\"," +
+
"\"type\":[\"int\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]}]}");
+ metadataSchema.put("writer.model.name", "avro");
Review Comment:
I just discovered this, all the Dataset response message attach this schema
metadata in their response messages. This was not detected because only Fields
or Data was compared but if all schema is needed to compare we nee to add this
metadata to the expected messages.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]