This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 994f901c8d3bdeda0eed72aff463544eea171e1a Author: ayush.tripathi <[email protected]> AuthorDate: Tue Nov 19 16:13:01 2024 +0530 [ASTERIXDB-3503][EXT] Tests for Field pushdown and delta all type - user model changes: yes - storage format changes: no - interface changes: no Details: - Tests to check optional flags of Deltalake like "decimal-to-double", "date-to-long" and "timestamp-to-long". - Testing plans for column filter. Ext-ref: MB-63840 Change-Id: I8b9dc57a792f88a28467a791a8d0b678ade7d82d Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19091 Reviewed-by: Murtadha Hubail <[email protected]> Tested-by: Murtadha Hubail <[email protected]> Integration-Tests: Murtadha Hubail <[email protected]> --- .../external_dataset/ExternalDatasetTestUtils.java | 4 + .../deltalake/DeltaAllTypeGenerator.java | 268 +++++++++++++++++++++ .../deltalake-all-type.00.ddl.sqlpp | 50 ++++ .../deltalake-all-type.01.query.sqlpp | 22 ++ .../deltalake-all-type.02.query.sqlpp | 22 ++ .../deltalake-field-access-pushdown.00.ddl.sqlpp | 51 ++++ .../deltalake-field-access-pushdown.01.query.sqlpp | 28 +++ .../deltalake-field-access-pushdown.02.query.sqlpp | 27 +++ .../deltalake-field-access-pushdown.03.query.sqlpp | 26 ++ .../deltalake-field-access-pushdown.04.query.sqlpp | 27 +++ .../deltalake-field-access-pushdown.05.query.sqlpp | 28 +++ .../deltalake-field-access-pushdown.06.query.sqlpp | 28 +++ .../deltalake-field-access-pushdown.07.query.sqlpp | 27 +++ .../deltalake-field-access-pushdown.08.query.sqlpp | 28 +++ .../deltalake-field-access-pushdown.09.query.sqlpp | 27 +++ .../deltalake-field-access-pushdown.10.query.sqlpp | 28 +++ .../deltalake-all-type/deltalake-all-type.01.adm | 5 + .../deltalake-all-type/deltalake-all-type.02.adm | 5 + .../deltalake-field-access-pushdown.01.adm | 5 + .../deltalake-field-access-pushdown.02.plan | 22 ++ .../deltalake-field-access-pushdown.03.adm | 5 + .../deltalake-field-access-pushdown.04.plan | 22 ++ .../deltalake-field-access-pushdown.05.adm | 5 + .../deltalake-field-access-pushdown.06.plan | 50 ++++ .../deltalake-field-access-pushdown.07.adm | 5 + .../deltalake-field-access-pushdown.08.plan | 50 ++++ .../deltalake-field-access-pushdown.09.adm | 5 + .../deltalake-field-access-pushdown.10.adm | 46 ++++ .../runtimets/testsuite_external_dataset_s3.xml | 12 + 29 files changed, 928 insertions(+) diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java index db1bf5039b..07789b764f 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java @@ -37,6 +37,7 @@ import java.nio.file.Paths; import java.util.Collection; import org.apache.asterix.test.external_dataset.avro.AvroFileConverterUtil; +import org.apache.asterix.test.external_dataset.deltalake.DeltaAllTypeGenerator; import org.apache.asterix.test.external_dataset.deltalake.DeltaTableGenerator; import org.apache.asterix.test.external_dataset.parquet.BinaryFileConverterUtil; import org.apache.asterix.testframework.context.TestCaseContext; @@ -122,6 +123,7 @@ public class ExternalDatasetTestUtils { // cleaning directory BinaryFileConverterUtil.cleanBinaryDirectory(basePath, DELTA_GEN_BASEDIR); DeltaTableGenerator.prepareDeltaTableContainer(new Configuration()); + DeltaAllTypeGenerator.createTableInsertData(new Configuration()); } /** @@ -434,6 +436,8 @@ public class ExternalDatasetTestUtils { loadDeltaDirectory(generatedDataBasePath, "/modified_delta_table/_delta_log", JSON_FILTER, "delta-data/"); loadDeltaDirectory(generatedDataBasePath, "/multiple_file_delta_table", PARQUET_FILTER, "delta-data/"); loadDeltaDirectory(generatedDataBasePath, "/multiple_file_delta_table/_delta_log", JSON_FILTER, "delta-data/"); + loadDeltaDirectory(generatedDataBasePath, "/delta_all_type/_delta_log", JSON_FILTER, "delta-data/"); + loadDeltaDirectory(generatedDataBasePath, "/delta_all_type", PARQUET_FILTER, "delta-data/"); } private static void loadDeltaDirectory(String dataBasePath, String rootPath, FilenameFilter filter, diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/deltalake/DeltaAllTypeGenerator.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/deltalake/DeltaAllTypeGenerator.java new file mode 100644 index 0000000000..5c302a5f90 --- /dev/null +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/deltalake/DeltaAllTypeGenerator.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.test.external_dataset.deltalake; + +import static io.delta.kernel.internal.util.Utils.toCloseableIterator; + +import java.io.File; +import java.io.IOException; +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import org.apache.hadoop.conf.Configuration; + +import io.delta.kernel.DataWriteContext; +import io.delta.kernel.Operation; +import io.delta.kernel.Table; +import io.delta.kernel.Transaction; +import io.delta.kernel.TransactionBuilder; +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.ColumnarBatch; +import io.delta.kernel.data.FilteredColumnarBatch; +import io.delta.kernel.data.Row; +import io.delta.kernel.defaults.engine.DefaultEngine; +import io.delta.kernel.defaults.internal.data.DefaultColumnarBatch; +import io.delta.kernel.engine.Engine; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DecimalType; +import io.delta.kernel.types.DoubleType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructType; +import io.delta.kernel.types.TimestampType; +import io.delta.kernel.utils.CloseableIterable; +import io.delta.kernel.utils.CloseableIterator; +import io.delta.kernel.utils.DataFileStatus; + +public class DeltaAllTypeGenerator { + public static final DecimalType decimal_t = new DecimalType(10, 5); + protected static final StructType exampleTableSchema = new StructType().add("integer_type", IntegerType.INTEGER) + .add("string_type", StringType.STRING).add("decimal_type", decimal_t).add("double_type", DoubleType.DOUBLE) + .add("timestamp_type", TimestampType.TIMESTAMP).add("date_type", DateType.DATE); + public static final String DELTA_ALL_TYPE_TABLE_PATH = + "target" + File.separatorChar + "generated_delta_files" + File.separatorChar + "delta_all_type"; + + public static void createTableInsertData(Configuration conf) throws IOException { + Engine engine = DefaultEngine.create(conf); + Table table = Table.forPath(engine, DELTA_ALL_TYPE_TABLE_PATH); + TransactionBuilder txnBuilder = table.createTransactionBuilder(engine, "Examples", Operation.CREATE_TABLE); + txnBuilder = txnBuilder.withSchema(engine, exampleTableSchema); + Transaction txn = txnBuilder.build(engine); + Row txnState = txn.getTransactionState(engine); + ColumnVector[] vectors = new ColumnVector[exampleTableSchema.length()]; + vectors[0] = intVector(Arrays.asList(123, 124, 125, 126, 127)); + vectors[1] = stringVector( + Arrays.asList("FirstPerson", "SecondPerson", "ThirdPerson", "FourthPerson", "FifthPerson")); + vectors[2] = decimalVector(Arrays.asList(new BigDecimal("1.25432"), new BigDecimal("2666.223"), + new BigDecimal("1245.2421"), new BigDecimal("23731.2"), new BigDecimal("80911.222456"))); + vectors[3] = doubleVector(Arrays.asList(100.34d, 200.055d, 300.02d, 400.21014d, 500.219d)); + vectors[4] = timestampVector( + Arrays.asList(1732010400000L, 1732010400330L, 1732010400450L, 1732010403000L, 1732010401200L)); + vectors[5] = dateVector(Arrays.asList(127, 23, 11, 456, 23)); + ColumnarBatch batch = new DefaultColumnarBatch(5, exampleTableSchema, vectors); + FilteredColumnarBatch f1 = new FilteredColumnarBatch(batch, Optional.empty()); + CloseableIterator<FilteredColumnarBatch> data = toCloseableIterator(Arrays.asList(f1).iterator()); + CloseableIterator<FilteredColumnarBatch> physicalData = + Transaction.transformLogicalData(engine, txnState, data, Collections.emptyMap()); + DataWriteContext writeContext = Transaction.getWriteContext(engine, txnState, Collections.emptyMap()); + CloseableIterator<DataFileStatus> dataFiles = engine.getParquetHandler().writeParquetFiles( + writeContext.getTargetDirectory(), physicalData, writeContext.getStatisticsColumns()); + CloseableIterator<Row> dataActions = + Transaction.generateAppendActions(engine, txnState, dataFiles, writeContext); + CloseableIterable<Row> dataActionsIterable = CloseableIterable.inMemoryIterable(dataActions); + txn.commit(engine, dataActionsIterable); + + } + + static ColumnVector stringVector(List<String> data) { + return new ColumnVector() { + @Override + public DataType getDataType() { + return StringType.STRING; + } + + @Override + public int getSize() { + return data.size(); + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return data.get(rowId) == null; + } + + @Override + public String getString(int rowId) { + return data.get(rowId); + } + }; + } + + static ColumnVector intVector(List<Integer> data) { + return new ColumnVector() { + @Override + public DataType getDataType() { + return IntegerType.INTEGER; + } + + @Override + public int getSize() { + return data.size(); + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return false; + } + + @Override + public int getInt(int rowId) { + return data.get(rowId); + } + }; + } + + static ColumnVector doubleVector(List<Double> data) { + return new ColumnVector() { + @Override + public DataType getDataType() { + return DoubleType.DOUBLE; + } + + @Override + public int getSize() { + return data.size(); + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return data.get(rowId) == null; + } + + @Override + public double getDouble(int rowId) { + return data.get(rowId); + } + }; + } + + static ColumnVector decimalVector(List<BigDecimal> data) { + return new ColumnVector() { + @Override + public DataType getDataType() { + return decimal_t; // Use the specific DecimalType passed (scale and precision) + } + + @Override + public int getSize() { + return data.size(); + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return data.get(rowId) == null; + } + + @Override + public BigDecimal getDecimal(int rowId) { + // Return the BigDecimal value directly as Delta Kernel works natively with BigDecimal for decimals + return data.get(rowId); + } + }; + } + + static ColumnVector timestampVector(List<Long> data) { // Assuming timestamp values are stored as microseconds since epoch + return new ColumnVector() { + @Override + public DataType getDataType() { + return TimestampType.TIMESTAMP; + } + + @Override + public int getSize() { + return data.size(); + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return data.get(rowId) == null; + } + + @Override + public long getLong(int rowId) { + // Delta Lake often uses microseconds since epoch for timestamps + return data.get(rowId); + } + }; + } + + static ColumnVector dateVector(List<Integer> data) { // Assuming date values are stored as days since epoch + return new ColumnVector() { + @Override + public DataType getDataType() { + return DateType.DATE; + } + + @Override + public int getSize() { + return data.size(); + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return data.get(rowId) == null; + } + + @Override + public int getInt(int rowId) { + // Delta Lake often uses days since epoch for dates + return data.get(rowId); + } + }; + } + +} diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.00.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.00.ddl.sqlpp new file mode 100644 index 0000000000..cdae363774 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.00.ddl.sqlpp @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + +USE test; + + +CREATE TYPE DeltalakeTableType as { + }; + +CREATE EXTERNAL COLLECTION DeltalakeDataset1(DeltalakeTableType) USING %adapter% + ( + %template%, + ("container"="playground"), + ("definition"="delta-data/delta_all_type"), + ("decimal-to-double" = "true"), + ("timestamp-to-long" = "false"), + ("date-to-int" = "false"), + ("timezone" = "PST"), + ("table-format" = "delta") + ); + +CREATE EXTERNAL COLLECTION DeltalakeDataset2(DeltalakeTableType) USING %adapter% + ( + %template%, + ("container"="playground"), + ("definition"="delta-data/delta_all_type"), + ("decimal-to-double" = "true"), + ("timestamp-to-long" = "true"), + ("date-to-int" = "true"), + ("table-format" = "delta") + ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.01.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.01.query.sqlpp new file mode 100644 index 0000000000..90f80ab159 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.01.query.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT element ds FROM DeltalakeDataset1 as ds; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.02.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.02.query.sqlpp new file mode 100644 index 0000000000..b75203b184 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-all-type/deltalake-all-type.02.query.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SELECT element ds FROM DeltalakeDataset2 as ds; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.00.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.00.ddl.sqlpp new file mode 100644 index 0000000000..4987403348 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.00.ddl.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; + + USE test; + + + CREATE TYPE DeltalakeTableType as { + }; + + CREATE EXTERNAL COLLECTION DeltalakeDataset1(DeltalakeTableType) USING %adapter% + ( + %template%, + ("container"="playground"), + ("definition"="delta-data/delta_all_type"), + ("decimal-to-double" = "true"), + ("timestamp-to-long"="false"), + ("date-to-int"="false"), + ("timezone" = "PST"), + ("table-format" = "delta") + ); + + CREATE EXTERNAL COLLECTION DeltalakeDataset2(DeltalakeTableType) USING %adapter% + ( + %template%, + ("container"="playground"), + ("definition"="delta-data/delta_all_type"), + ("decimal-to-double" = "true"), + ("timestamp-to-long"="false"), + ("date-to-int"="false"), + ("timezone" = "PST"), + ("table-format" = "delta") + ); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.01.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.01.query.sqlpp new file mode 100644 index 0000000000..41805993ee --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.01.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +USE test; + +SET `compiler.external.field.pushdown` "false"; + + +SELECT VALUE d +FROM DeltalakeDataset1 d +ORDER BY d.integer_type; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.02.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.02.query.sqlpp new file mode 100644 index 0000000000..d3009591a3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.02.query.sqlpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "false"; + +Explain +SELECT VALUE d +FROM DeltalakeDataset1 d +ORDER BY d.integer_type; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.03.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.03.query.sqlpp new file mode 100644 index 0000000000..073171cc55 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.03.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "true"; + +SELECT VALUE d +FROM DeltalakeDataset1 d +ORDER BY d.integer_type; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.04.query.sqlpp new file mode 100644 index 0000000000..d3009591a3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.04.query.sqlpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "false"; + +Explain +SELECT VALUE d +FROM DeltalakeDataset1 d +ORDER BY d.integer_type; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.05.query.sqlpp new file mode 100644 index 0000000000..fa60442b46 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.05.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "false"; + + +SELECT d1.integer_type, d2.string_type, d2.timestamp_type +FROM DeltalakeDataset1 d1, DeltalakeDataset2 d2 +WHERE d1.integer_type = d2.integer_type +ORDER BY d2.id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.06.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.06.query.sqlpp new file mode 100644 index 0000000000..e1eceddfb3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.06.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "false"; + +EXPLAIN +SELECT d1.integer_type, d2.string_type, d2.timestamp_type +FROM DeltalakeDataset1 d1, DeltalakeDataset2 d2 +WHERE d1.integer_type = d2.integer_type +ORDER BY d2.id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.07.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.07.query.sqlpp new file mode 100644 index 0000000000..1e440bfb93 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.07.query.sqlpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "true"; + +SELECT d1.integer_type, d2.string_type, d2.timestamp_type +FROM DeltalakeDataset1 d1, DeltalakeDataset2 d2 +WHERE d1.integer_type = d2.integer_type +ORDER BY d2.id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.08.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.08.query.sqlpp new file mode 100644 index 0000000000..786618af96 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.08.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "true"; + +Explain +SELECT d1.integer_type, d2.string_type, d2.timestamp_type +FROM DeltalakeDataset1 d1, DeltalakeDataset2 d2 +WHERE d1.integer_type = d2.integer_type +ORDER BY d2.id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.09.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.09.query.sqlpp new file mode 100644 index 0000000000..d2c91531fb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.09.query.sqlpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "true"; + +SELECT d1.integer_type, d1.date_type, d2.decimal_type +FROM DeltalakeDataset1 d1, DeltalakeDataset2 d2 +WHERE d1.integer_type = d2.integer_type +ORDER BY d2.integer_type; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.10.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.10.query.sqlpp new file mode 100644 index 0000000000..bb4701a976 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.10.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE test; + +SET `compiler.external.field.pushdown` "true"; + +Explain +SELECT d1.integer_type, d1.date_type, d2.decimal_type +FROM DeltalakeDataset1 d1, DeltalakeDataset2 d2 +WHERE d1.integer_type = d2.integer_type +ORDER BY d2.integer_type; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-all-type/deltalake-all-type.01.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-all-type/deltalake-all-type.01.adm new file mode 100644 index 0000000000..e5f99a3486 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-all-type/deltalake-all-type.01.adm @@ -0,0 +1,5 @@ +{ "integer_type": 123, "string_type": "FirstPerson", "decimal_type": 1.25432, "double_type": 100.34, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-05-08") } +{ "integer_type": 124, "string_type": "SecondPerson", "decimal_type": 2666.223, "double_type": 200.055, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-01-24") } +{ "integer_type": 125, "string_type": "ThirdPerson", "decimal_type": 1245.2421, "double_type": 300.02, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-01-12") } +{ "integer_type": 126, "string_type": "FourthPerson", "decimal_type": 23731.2, "double_type": 400.21014, "timestamp_type": datetime("1970-01-20T17:06:50.403"), "date_type": date("1971-04-02") } +{ "integer_type": 127, "string_type": "FifthPerson", "decimal_type": 80911.22245, "double_type": 500.219, "timestamp_type": datetime("1970-01-20T17:06:50.401"), "date_type": date("1970-01-24") } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-all-type/deltalake-all-type.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-all-type/deltalake-all-type.02.adm new file mode 100644 index 0000000000..9cc21d6f59 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-all-type/deltalake-all-type.02.adm @@ -0,0 +1,5 @@ +{ "integer_type": 123, "string_type": "FirstPerson", "decimal_type": 1.25432, "double_type": 100.34, "timestamp_type": 1732010400, "date_type": 127 } +{ "integer_type": 124, "string_type": "SecondPerson", "decimal_type": 2666.223, "double_type": 200.055, "timestamp_type": 1732010400, "date_type": 23 } +{ "integer_type": 125, "string_type": "ThirdPerson", "decimal_type": 1245.2421, "double_type": 300.02, "timestamp_type": 1732010400, "date_type": 11 } +{ "integer_type": 126, "string_type": "FourthPerson", "decimal_type": 23731.2, "double_type": 400.21014, "timestamp_type": 1732010403, "date_type": 456 } +{ "integer_type": 127, "string_type": "FifthPerson", "decimal_type": 80911.22245, "double_type": 500.219, "timestamp_type": 1732010401, "date_type": 23 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.01.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.01.adm new file mode 100644 index 0000000000..e5f99a3486 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.01.adm @@ -0,0 +1,5 @@ +{ "integer_type": 123, "string_type": "FirstPerson", "decimal_type": 1.25432, "double_type": 100.34, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-05-08") } +{ "integer_type": 124, "string_type": "SecondPerson", "decimal_type": 2666.223, "double_type": 200.055, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-01-24") } +{ "integer_type": 125, "string_type": "ThirdPerson", "decimal_type": 1245.2421, "double_type": 300.02, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-01-12") } +{ "integer_type": 126, "string_type": "FourthPerson", "decimal_type": 23731.2, "double_type": 400.21014, "timestamp_type": datetime("1970-01-20T17:06:50.403"), "date_type": date("1971-04-02") } +{ "integer_type": 127, "string_type": "FifthPerson", "decimal_type": 80911.22245, "double_type": 500.219, "timestamp_type": datetime("1970-01-20T17:06:50.401"), "date_type": date("1970-01-24") } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.02.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.02.plan new file mode 100644 index 0000000000..6f01ee7583 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.02.plan @@ -0,0 +1,22 @@ +distribute result [$$d] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] +-- DISTRIBUTE_RESULT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$d]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- SORT_MERGE_EXCHANGE [$$14(ASC) ] |PARTITIONED| + order (ASC, $$14) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STABLE_SORT [$$14(ASC)] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + assign [$$14] <- [$$d.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d] <- test.DeltalakeDataset1 [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.03.adm new file mode 100644 index 0000000000..e5f99a3486 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.03.adm @@ -0,0 +1,5 @@ +{ "integer_type": 123, "string_type": "FirstPerson", "decimal_type": 1.25432, "double_type": 100.34, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-05-08") } +{ "integer_type": 124, "string_type": "SecondPerson", "decimal_type": 2666.223, "double_type": 200.055, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-01-24") } +{ "integer_type": 125, "string_type": "ThirdPerson", "decimal_type": 1245.2421, "double_type": 300.02, "timestamp_type": datetime("1970-01-20T17:06:50.400"), "date_type": date("1970-01-12") } +{ "integer_type": 126, "string_type": "FourthPerson", "decimal_type": 23731.2, "double_type": 400.21014, "timestamp_type": datetime("1970-01-20T17:06:50.403"), "date_type": date("1971-04-02") } +{ "integer_type": 127, "string_type": "FifthPerson", "decimal_type": 80911.22245, "double_type": 500.219, "timestamp_type": datetime("1970-01-20T17:06:50.401"), "date_type": date("1970-01-24") } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.04.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.04.plan new file mode 100644 index 0000000000..6f01ee7583 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.04.plan @@ -0,0 +1,22 @@ +distribute result [$$d] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] +-- DISTRIBUTE_RESULT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$d]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- SORT_MERGE_EXCHANGE [$$14(ASC) ] |PARTITIONED| + order (ASC, $$14) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STABLE_SORT [$$14(ASC)] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + assign [$$14] <- [$$d.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d] <- test.DeltalakeDataset1 [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.05.adm new file mode 100644 index 0000000000..1f480e4c12 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.05.adm @@ -0,0 +1,5 @@ +{ "integer_type": 125, "string_type": "ThirdPerson", "timestamp_type": datetime("1970-01-20T17:06:50.400") } +{ "integer_type": 126, "string_type": "FourthPerson", "timestamp_type": datetime("1970-01-20T17:06:50.403") } +{ "integer_type": 127, "string_type": "FifthPerson", "timestamp_type": datetime("1970-01-20T17:06:50.401") } +{ "integer_type": 124, "string_type": "SecondPerson", "timestamp_type": datetime("1970-01-20T17:06:50.400") } +{ "integer_type": 123, "string_type": "FirstPerson", "timestamp_type": datetime("1970-01-20T17:06:50.400") } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.06.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.06.plan new file mode 100644 index 0000000000..848bc0456a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.06.plan @@ -0,0 +1,50 @@ +distribute result [$$32] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] +-- DISTRIBUTE_RESULT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$32]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$32] <- [{"integer_type": $$34, "string_type": $$37, "timestamp_type": $$38}] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + project ([$$34, $$37, $$38]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- SORT_MERGE_EXCHANGE [$$36(ASC) ] |PARTITIONED| + order (ASC, $$36) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STABLE_SORT [$$36(ASC)] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$34, $$37, $$38, $$36]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$34, $$35)) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HYBRID_HASH_JOIN [$$34][$$35] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HASH_PARTITION_EXCHANGE [$$34] |PARTITIONED| + project ([$$34]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$34] <- [$$d1.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d1] <- test.DeltalakeDataset1 [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HASH_PARTITION_EXCHANGE [$$35] |PARTITIONED| + project ([$$37, $$38, $$36, $$35]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$38, $$37, $$36, $$35] <- [$$d2.getField("timestamp_type"), $$d2.getField("string_type"), $$d2.getField("id"), $$d2.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d2] <- test.DeltalakeDataset2 [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.07.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.07.adm new file mode 100644 index 0000000000..1f480e4c12 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.07.adm @@ -0,0 +1,5 @@ +{ "integer_type": 125, "string_type": "ThirdPerson", "timestamp_type": datetime("1970-01-20T17:06:50.400") } +{ "integer_type": 126, "string_type": "FourthPerson", "timestamp_type": datetime("1970-01-20T17:06:50.403") } +{ "integer_type": 127, "string_type": "FifthPerson", "timestamp_type": datetime("1970-01-20T17:06:50.401") } +{ "integer_type": 124, "string_type": "SecondPerson", "timestamp_type": datetime("1970-01-20T17:06:50.400") } +{ "integer_type": 123, "string_type": "FirstPerson", "timestamp_type": datetime("1970-01-20T17:06:50.400") } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.08.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.08.plan new file mode 100644 index 0000000000..34671ddc4d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.08.plan @@ -0,0 +1,50 @@ +distribute result [$$32] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] +-- DISTRIBUTE_RESULT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$32]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$32] <- [{"integer_type": $$34, "string_type": $$37, "timestamp_type": $$38}] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + project ([$$34, $$37, $$38]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- SORT_MERGE_EXCHANGE [$$36(ASC) ] |PARTITIONED| + order (ASC, $$36) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STABLE_SORT [$$36(ASC)] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$34, $$37, $$38, $$36]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$34, $$35)) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HYBRID_HASH_JOIN [$$34][$$35] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HASH_PARTITION_EXCHANGE [$$34] |PARTITIONED| + project ([$$34]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$34] <- [$$d1.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d1] <- test.DeltalakeDataset1 project ({integer_type:any}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HASH_PARTITION_EXCHANGE [$$35] |PARTITIONED| + project ([$$37, $$38, $$36, $$35]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$38, $$37, $$36, $$35] <- [$$d2.getField("timestamp_type"), $$d2.getField("string_type"), $$d2.getField("id"), $$d2.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d2] <- test.DeltalakeDataset2 project ({string_type:any,timestamp_type:any,id:any,integer_type:any}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.09.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.09.adm new file mode 100644 index 0000000000..5e1edce4c3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.09.adm @@ -0,0 +1,5 @@ +{ "integer_type": 123, "date_type": date("1970-05-08"), "decimal_type": 1.25432 } +{ "integer_type": 124, "date_type": date("1970-01-24"), "decimal_type": 2666.223 } +{ "integer_type": 125, "date_type": date("1970-01-12"), "decimal_type": 1245.2421 } +{ "integer_type": 126, "date_type": date("1971-04-02"), "decimal_type": 23731.2 } +{ "integer_type": 127, "date_type": date("1970-01-24"), "decimal_type": 80911.22245 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.10.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.10.adm new file mode 100644 index 0000000000..f706ba28dd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/deltalake-field-access-pushdown/deltalake-field-access-pushdown.10.adm @@ -0,0 +1,46 @@ +distribute result [$$32] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] +-- DISTRIBUTE_RESULT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + project ([$$32]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$32] <- [{"integer_type": $$34, "date_type": $$37, "decimal_type": $$38}] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + project ([$$34, $$37, $$38]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- SORT_MERGE_EXCHANGE [$$35(ASC) ] |PARTITIONED| + order (ASC, $$35) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STABLE_SORT [$$35(ASC)] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + join (eq($$34, $$35)) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HYBRID_HASH_JOIN [$$34][$$35] |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HASH_PARTITION_EXCHANGE [$$34] |PARTITIONED| + project ([$$34, $$37]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$37, $$34] <- [$$d1.getField("date_type"), $$d1.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d1] <- test.DeltalakeDataset1 project ({date_type:any,integer_type:any}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- HASH_PARTITION_EXCHANGE [$$35] |PARTITIONED| + project ([$$38, $$35]) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- STREAM_PROJECT |PARTITIONED| + assign [$$38, $$35] <- [$$d2.getField("decimal_type"), $$d2.getField("integer_type")] [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ASSIGN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + data-scan []<-[$$d2] <- test.DeltalakeDataset2 project ({decimal_type:any,integer_type:any}) [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- DATASOURCE_SCAN |PARTITIONED| + exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0] + -- EMPTY_TUPLE_SOURCE |PARTITIONED| diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 2c7af49eb0..d56c1a46ec 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -320,6 +320,18 @@ <output-dir compare="Text">common/deltalake-modified-data</output-dir> </compilation-unit> </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="common/deltalake-all-type"> + <placeholder name="adapter" value="S3" /> + <output-dir compare="Text">common/deltalake-all-type</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset"> + <compilation-unit name="common/deltalake-field-access-pushdown"> + <placeholder name="adapter" value="S3" /> + <output-dir compare="Text">common/deltalake-field-access-pushdown</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="external-dataset"> <compilation-unit name="common/deltalake-invalid-file-format"> <placeholder name="adapter" value="S3" />
