davisusanibar commented on code in PR #35570:
URL: https://github.com/apache/arrow/pull/35570#discussion_r1218750453
##########
java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java:
##########
@@ -204,4 +206,132 @@ public void testRunBinaryQueryNamedTableNation() throws
Exception {
}
}
}
+
+ @Test
+ public void testDeserializeExtendedExpressions() {
+ // Extended Expression 01 (`add` `2` to column `id`): id + 2
+ // Extended Expression 02 (`concatenate` column `name` || column `name`):
name || name
+ // Extended Expression 03 (`filter` 'id' < 20): id < 20
+ // Extended expression result: [add_two_to_column_a, add(FieldPath(0), 2),
+ // concat_column_a_and_b, binary_join_element_wise(FieldPath(1),
FieldPath(1), ""),
+ // filter_one, (FieldPath(0) < 20)]
+ String binaryExtendedExpressions =
"Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwKHggCEhovZnVuY3Rpb25zX2NvbXBhcmlz" +
+
"b24ueWFtbBIRGg8IARoLYWRkOmkzMl9pMzISFBoSCAIQARoMY29uY2F0OnZjaGFyEhIaEAgCEAIaCmx0OmFueV9hbnkaMQoaGhgaBCoCEAE"
+
+
"iCBoGEgQKAhIAIgYaBAoCKAIaE2FkZF90d29fdG9fY29sdW1uX2EaOwoiGiAIARoEYgIQASIKGggSBgoEEgIIASIKGggSBgoEEgIIARoVY2"
+
+
"9uY2F0X2NvbHVtbl9hX2FuZF9iGjcKHBoaCAIaBAoCEAEiCBoGEgQKAhIAIgYaBAoCKBQaF2ZpbHRlcl9pZF9sb3dlcl90aGFuXzIwIhoKA"
+
+ "klECgROQU1FEg4KBCoCEAEKBGICEAEYAg==";
+ // get binary plan
+ byte[] expression = Base64.getDecoder().decode(binaryExtendedExpressions);
+ ByteBuffer substraitExpression =
ByteBuffer.allocateDirect(expression.length);
+ substraitExpression.put(expression);
+ // deserialize extended expression
+ List<String> extededExpressionList =
+ new
AceroSubstraitConsumer(rootAllocator()).runDeserializeExpressions(substraitExpression);
+ assertEquals(3, extededExpressionList.size() / 2);
+ assertEquals("add_two_to_column_a", extededExpressionList.get(0));
+ assertEquals("add(FieldPath(0), 2)", extededExpressionList.get(1));
+ assertEquals("concat_column_a_and_b", extededExpressionList.get(2));
+ assertEquals("binary_join_element_wise(FieldPath(1), FieldPath(1), \"\")",
extededExpressionList.get(3));
+ assertEquals("filter_id_lower_than_20", extededExpressionList.get(4));
+ assertEquals("(FieldPath(0) < 20)", extededExpressionList.get(5));
+ }
Review Comment:
Deleted
##########
java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java:
##########
@@ -204,4 +206,132 @@ public void testRunBinaryQueryNamedTableNation() throws
Exception {
}
}
}
+
+ @Test
+ public void testDeserializeExtendedExpressions() {
+ // Extended Expression 01 (`add` `2` to column `id`): id + 2
+ // Extended Expression 02 (`concatenate` column `name` || column `name`):
name || name
+ // Extended Expression 03 (`filter` 'id' < 20): id < 20
+ // Extended expression result: [add_two_to_column_a, add(FieldPath(0), 2),
+ // concat_column_a_and_b, binary_join_element_wise(FieldPath(1),
FieldPath(1), ""),
+ // filter_one, (FieldPath(0) < 20)]
+ String binaryExtendedExpressions =
"Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwKHggCEhovZnVuY3Rpb25zX2NvbXBhcmlz" +
+
"b24ueWFtbBIRGg8IARoLYWRkOmkzMl9pMzISFBoSCAIQARoMY29uY2F0OnZjaGFyEhIaEAgCEAIaCmx0OmFueV9hbnkaMQoaGhgaBCoCEAE"
+
+
"iCBoGEgQKAhIAIgYaBAoCKAIaE2FkZF90d29fdG9fY29sdW1uX2EaOwoiGiAIARoEYgIQASIKGggSBgoEEgIIASIKGggSBgoEEgIIARoVY2"
+
+
"9uY2F0X2NvbHVtbl9hX2FuZF9iGjcKHBoaCAIaBAoCEAEiCBoGEgQKAhIAIgYaBAoCKBQaF2ZpbHRlcl9pZF9sb3dlcl90aGFuXzIwIhoKA"
+
+ "klECgROQU1FEg4KBCoCEAEKBGICEAEYAg==";
+ // get binary plan
+ byte[] expression = Base64.getDecoder().decode(binaryExtendedExpressions);
+ ByteBuffer substraitExpression =
ByteBuffer.allocateDirect(expression.length);
+ substraitExpression.put(expression);
+ // deserialize extended expression
+ List<String> extededExpressionList =
+ new
AceroSubstraitConsumer(rootAllocator()).runDeserializeExpressions(substraitExpression);
+ assertEquals(3, extededExpressionList.size() / 2);
+ assertEquals("add_two_to_column_a", extededExpressionList.get(0));
+ assertEquals("add(FieldPath(0), 2)", extededExpressionList.get(1));
+ assertEquals("concat_column_a_and_b", extededExpressionList.get(2));
+ assertEquals("binary_join_element_wise(FieldPath(1), FieldPath(1), \"\")",
extededExpressionList.get(3));
+ assertEquals("filter_id_lower_than_20", extededExpressionList.get(4));
+ assertEquals("(FieldPath(0) < 20)", extededExpressionList.get(5));
+ }
+
+ @Test
+ public void testBaseParquetReadWithExtendedExpressionsProjectAndFilter()
throws Exception {
+ // Extended Expression 01 (`add` `2` to column `id`): id + 2
+ // Extended Expression 02 (`concatenate` column `name` || column `name`):
name || name
+ // Extended Expression 03 (`filter` 'id' < 20): id < 20
+ // Extended expression result: [add_two_to_column_a, add(FieldPath(0), 2),
+ // concat_column_a_and_b, binary_join_element_wise(FieldPath(1),
FieldPath(1), ""),
+ // filter_one, (FieldPath(0) < 20)]
+ // Base64.getEncoder().encodeToString(plan.toByteArray()): Generated
throughout Substrait POJO Extended Expressions
+ String binaryExtendedExpressions =
"Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwKHggCEhovZnVuY3Rpb25zX2NvbXBhcmlz" +
+
"b24ueWFtbBIRGg8IARoLYWRkOmkzMl9pMzISFBoSCAIQARoMY29uY2F0OnZjaGFyEhIaEAgCEAIaCmx0OmFueV9hbnkaMQoaGhgaBCoCEAE"
+
+
"iCBoGEgQKAhIAIgYaBAoCKAIaE2FkZF90d29fdG9fY29sdW1uX2EaOwoiGiAIARoEYgIQASIKGggSBgoEEgIIASIKGggSBgoEEgIIARoVY2"
+
+
"9uY2F0X2NvbHVtbl9hX2FuZF9iGjcKHBoaCAIaBAoCEAEiCBoGEgQKAhIAIgYaBAoCKBQaF2ZpbHRlcl9pZF9sb3dlcl90aGFuXzIwIhoKA"
+
+ "klECgROQU1FEg4KBCoCEAEKBGICEAEYAg==";
+ Map<String, String> metadataSchema = new HashMap<>();
+ metadataSchema.put("parquet.avro.schema",
"{\"type\":\"record\",\"name\":\"Users\"," +
+
"\"namespace\":\"org.apache.arrow.dataset\",\"fields\":[{\"name\":\"id\"," +
+
"\"type\":[\"int\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]}]}");
+ metadataSchema.put("writer.model.name", "avro");
Review Comment:
Compare by Fields
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]