This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new aba3a705a IMPALA-13982: Support regular views for Calcite planner in Impala aba3a705a is described below commit aba3a705a4c624b2d77e0ab92bf8c37de2d338e0 Author: Fang-Yu Rao <fangyu....@cloudera.com> AuthorDate: Mon May 12 16:52:10 2025 -0700 IMPALA-13982: Support regular views for Calcite planner in Impala Before this patch, the Calcite planner in Impala only supported inline views like 'temp' in the following query. select id from ( select * from functional.alltypes ) as temp; Regular views, on the other hand, were not supported. For instance, the Calcite planner in Impala did not support regular views like 'functional.alltypes_view' created via the following statement and hence queries against such regular views like "select id from functional.alltypes_view" were not supported. CREATE VIEW functional.alltypes_view AS SELECT * FROM functional.alltypes; This patch adds the support for regular views to the Calcite planner via adding a ViewTable for each regular view in the given query when populating the Calcite schema. This is similar to how regular views are supported in PlannerTest#testView() at https://github.com/apache/calcite/blob/main/core/src/test/java/org/apache/calcite/tools/PlannerTest.java where the regular view to be tested is added in https://github.com/apache/calcite/blob/main/testkit/src/main/java/org/apache/calcite/test/CalciteAssert.java. We do not have to use or extend ViewTableMacro in Apache Calcite because the information about the data types returned from a regular view is already available in its respective FeTable. Therefore, there is no need to parse the SQL statement representing the regular view and collect the metadata of tables referenced by the regular view as done by ViewTableMacro. The patch supports the following cases, where 'functional.alltypes_view' is a regular view defined as "SELECT * FROM functional.alltypes". 1. select id from functional.alltypes_view. 2. select alltypes_view.id from functional.alltypes_view. 3. select functional.alltypes_view.id from functional.alltypes_view. Joining a regular view with an HDFS table like the following is also supported. select alltypestiny.id from functional.alltypes_view, functional.alltypestiny Note that after this patch, queries against regular views are supported only in the legacy catalog mode but not the local catalog mode. In fact, queries against HDFS tables in the local catalog mode are not supported yet by the Calcite planner either. We will deal with this in IMPALA-14080. Testing: - Added test cases mentioned above to calcite.test. This makes sure the test cases are supported when we start the Impala server with the flag of '--use_calcite_planner=true'. - Manually verified the test cases above are supported if we start the Impala server with the environment variable USE_CALCITE_PLANNER set to true and the query option use_calcite_planner set to 1. Change-Id: I600aae816727ae942fb221fae84c2aac63ae1893 Reviewed-on: http://gerrit.cloudera.org:8080/22883 Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com> --- .../apache/impala/calcite/schema/CalciteDb.java | 40 ++++++- .../apache/impala/calcite/schema/CalciteTable.java | 2 +- .../calcite/service/CalciteAnalysisDriver.java | 9 +- .../calcite/service/CalciteMetadataHandler.java | 17 +-- .../calcite/service/CalciteRelNodeConverter.java | 35 +++++- .../impala/calcite/type/ImpalaTypeSystemImpl.java | 3 + .../queries/QueryTest/calcite.test | 117 +++++++++++++++++++++ 7 files changed, 202 insertions(+), 21 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteDb.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteDb.java index e0f5caf42..a6f4e3474 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteDb.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteDb.java @@ -18,12 +18,23 @@ package org.apache.impala.calcite.schema; import com.google.common.collect.ImmutableMap; +import org.apache.calcite.adapter.java.JavaTypeFactory; import org.apache.calcite.prepare.CalciteCatalogReader; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.ViewTable; import org.apache.calcite.schema.impl.AbstractSchema; +import org.apache.impala.calcite.type.ImpalaTypeSystemImpl; import org.apache.impala.catalog.FeTable; +import org.apache.impala.catalog.HdfsTable; +import org.apache.impala.catalog.View; import org.apache.impala.common.ImpalaException; +import org.apache.impala.common.UnsupportedFeatureException; +import com.google.common.collect.ImmutableList; + +import java.lang.reflect.Type; import java.util.HashMap; import java.util.Map; @@ -50,10 +61,33 @@ public class CalciteDb extends AbstractSchema { } public Builder addTable(String tableName, FeTable table) throws ImpalaException { - if (!tableMap_.containsKey(tableName)) { - tableMap_.put(tableName.toLowerCase(), new CalciteTable(table, reader_)); + if (tableMap_.containsKey(tableName)) return this; + + if (table instanceof HdfsTable) { + tableMap_.put(tableName.toLowerCase(), new CalciteTable(table, reader_)); + return this; + } + + if (table instanceof View) { + tableMap_.put(tableName.toLowerCase(), createViewTable(table)); + return this; } - return this; + + throw new UnsupportedFeatureException( + "Table " + table.getFullName() + " has unsupported type " + + table.getClass().getSimpleName() + ". The Calcite planner only supports " + + "HdfsTable's and View's."); + } + + private static ViewTable createViewTable(FeTable feTable) throws ImpalaException { + RelDataType rowType = CalciteTable.buildColumnsForRelDataType(feTable); + JavaTypeFactory typeFactory = (JavaTypeFactory) ImpalaTypeSystemImpl.TYPE_FACTORY; + Type elementType = typeFactory.getJavaClass(rowType); + return new ViewTable(elementType, + RelDataTypeImpl.proto(rowType), ((View) feTable).getQueryStmt().toSql(), + /* schemaPath */ ImmutableList.of(), + /* viewPath */ ImmutableList.of(feTable.getDb().getName().toLowerCase(), + feTable.getName().toLowerCase())); } public CalciteDb build() { diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java index bc3697088..c44ff3ec6 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java @@ -93,7 +93,7 @@ public class CalciteTable extends RelOptAbstractTable checkIfTableIsSupported(table); } - private static RelDataType buildColumnsForRelDataType(FeTable table) + public static RelDataType buildColumnsForRelDataType(FeTable table) throws ImpalaException { RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl()); diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteAnalysisDriver.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteAnalysisDriver.java index 85a2258d2..9e950deb6 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteAnalysisDriver.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteAnalysisDriver.java @@ -100,8 +100,15 @@ public class CalciteAnalysisDriver implements AnalysisDriver { try { reader_ = CalciteMetadataHandler.createCalciteCatalogReader(stmtTableCache_, queryCtx_, queryCtx_.session.database); + // When CalciteRelNodeConverter#convert() is called to convert the valid AST into a + // logical plan, ViewTable#expandView() in Apache Calcite would be invoked if a + // regular view is involved in the query. expandView() validates the SQL statement + // defining the view. During the validation, all referenced tables by the regular + // view are required. Thus, we need all the tables in 'stmtTableCache_'. + // Recall that parsedStmt_.getTablesInQuery(null) only contains TableName's in the + // given query but not the underlying tables referenced by a regular view. CalciteMetadataHandler.populateCalciteSchema(reader_, ctx_.getCatalog(), - parsedStmt_.getTablesInQuery(null)); + stmtTableCache_); typeFactory_ = new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl()); sqlValidator_ = SqlValidatorUtil.newValidator( diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java index ff5375e8a..359931216 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java @@ -36,15 +36,11 @@ import org.apache.calcite.sql.util.SqlBasicVisitor; import org.apache.impala.analysis.StmtMetadataLoader; import org.apache.impala.analysis.TableName; import org.apache.impala.calcite.schema.CalciteDb; -import org.apache.impala.calcite.schema.CalciteTable; import org.apache.impala.calcite.schema.ImpalaCalciteCatalogReader; import org.apache.impala.calcite.type.ImpalaTypeSystemImpl; -import org.apache.impala.catalog.Column; import org.apache.impala.catalog.FeCatalog; import org.apache.impala.catalog.FeDb; import org.apache.impala.catalog.FeTable; -import org.apache.impala.catalog.FeView; -import org.apache.impala.catalog.HdfsTable; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.UnsupportedFeatureException; import org.apache.impala.thrift.TQueryCtx; @@ -101,7 +97,7 @@ public class CalciteMetadataHandler implements CompilerStep { // schema needs to contain the columns in the table for validation, which cannot // be done when it's an IncompleteTable List<String> errorTables = populateCalciteSchema(reader_, - queryCtx.getFrontend().getCatalog(), tableVisitor.tableNames_); + queryCtx.getFrontend().getCatalog(), stmtTableCache_); tableVisitor.checkForComplexTable(stmtTableCache_, errorTables, queryCtx); } @@ -129,11 +125,12 @@ public class CalciteMetadataHandler implements CompilerStep { * list of tables in the query that are not found in the database. */ public static List<String> populateCalciteSchema(CalciteCatalogReader reader, - FeCatalog catalog, Set<TableName> tableNames) throws ImpalaException { + FeCatalog catalog, StmtMetadataLoader.StmtTableCache stmtTableCache) + throws ImpalaException { List<String> notFoundTables = new ArrayList<>(); CalciteSchema rootSchema = reader.getRootSchema(); Map<String, CalciteDb.Builder> dbSchemas = new HashMap<>(); - for (TableName tableName : tableNames) { + for (TableName tableName : stmtTableCache.tables.keySet()) { FeDb db = catalog.getDb(tableName.getDb()); // db is not found, this will probably fail in the validation step if (db == null) { @@ -147,12 +144,6 @@ public class CalciteMetadataHandler implements CompilerStep { notFoundTables.add(tableName.toString()); continue; } - if (!(feTable instanceof HdfsTable)) { - throw new UnsupportedFeatureException( - "Table " + feTable.getFullName() + " has unsupported type " + - feTable.getClass().getSimpleName() + ". The Calcite planner only supports " + - "HDFS tables."); - } // populate the dbschema with its table, creating the dbschema if it's the // first instance seen in the query. diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java index 71cb49b1d..73ff79fee 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java @@ -19,6 +19,7 @@ package org.apache.impala.calcite.service; import org.apache.calcite.rel.type.RelDataTypeFactory; import com.google.common.collect.ImmutableList; +import org.apache.calcite.avatica.util.Quoting; import org.apache.calcite.plan.ConventionTraitDef; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCostImpl; @@ -35,13 +36,18 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.rules.CoreRules; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.SqlExplainFormat; import org.apache.calcite.sql.SqlExplainLevel; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql2rel.RelDecorrelator; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.impala.calcite.operators.ImpalaConvertletTable; +import org.apache.calcite.prepare.PlannerImpl; +import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql2rel.StandardConvertletTable; +import org.apache.calcite.tools.FrameworkConfig; +import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.sql.validate.SqlValidator; @@ -60,8 +66,7 @@ public class CalciteRelNodeConverter implements CompilerStep { protected static final Logger LOG = LoggerFactory.getLogger(CalciteRelNodeConverter.class.getName()); - private static final RelOptTable.ViewExpander NOOP_EXPANDER = - (type, query, schema, path) -> null; + private final RelOptTable.ViewExpander viewExpander_; private final RelOptCluster cluster_; @@ -81,6 +86,8 @@ public class CalciteRelNodeConverter implements CompilerStep { planner_.addRelTraitDef(ConventionTraitDef.INSTANCE); cluster_ = RelOptCluster.create(planner_, new RexBuilder(typeFactory_)); + viewExpander_ = createViewExpander( + analysisResult.getSqlValidator().getCatalogReader().getRootSchema().plus()); } public CalciteRelNodeConverter(CalciteValidator validator) { @@ -91,11 +98,33 @@ public class CalciteRelNodeConverter implements CompilerStep { planner_.addRelTraitDef(ConventionTraitDef.INSTANCE); cluster_ = RelOptCluster.create(planner_, new RexBuilder(typeFactory_)); + viewExpander_ = createViewExpander(validator.getCatalogReader() + .getRootSchema().plus()); + } + + private static RelOptTable.ViewExpander createViewExpander(SchemaPlus schemaPlus) { + SqlParser.Config parserConfig = + SqlParser.configBuilder().setCaseSensitive(false).build() + // This makes SqlParser expect identifiers that require quoting to be + // enclosed by backticks. + .withQuoting(Quoting.BACK_TICK); + FrameworkConfig config = Frameworks.newConfigBuilder() + .defaultSchema(schemaPlus) + // This makes 'connectionConfig' in PlannerImpl case-insensitive, which in turn + // makes the CalciteCatalogReader used to validate the view in + // PlannerImpl#expandView() case-insensitive. Otherwise, + // CalciteRelNodeConverter#convert() would fail. + .parserConfig(parserConfig) + // We need to add ConventionTraitDef.INSTANCE to avoid the call to + // table.getStatistic() in LogicalTableScan#create(). + .traitDefs(ConventionTraitDef.INSTANCE) + .build(); + return new PlannerImpl(config); } public RelNode convert(SqlNode validatedNode) { SqlToRelConverter relConverter = new SqlToRelConverter( - NOOP_EXPANDER, + viewExpander_, sqlValidator_, reader_, cluster_, diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeSystemImpl.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeSystemImpl.java index 799d89e75..d6f61c36f 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeSystemImpl.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/type/ImpalaTypeSystemImpl.java @@ -17,6 +17,7 @@ package org.apache.impala.calcite.type; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeSystemImpl; @@ -60,6 +61,8 @@ public class ImpalaTypeSystemImpl extends RelDataTypeSystemImpl { private static final int DEFAULT_FLOAT_PRECISION = 7; private static final int DEFAULT_DOUBLE_PRECISION = 15; + public static final RelDataTypeFactory TYPE_FACTORY = + new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl()); @Override public int getMaxScale(SqlTypeName typeName) { diff --git a/testdata/workloads/functional-query/queries/QueryTest/calcite.test b/testdata/workloads/functional-query/queries/QueryTest/calcite.test index e90dc03d3..711694664 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/calcite.test +++ b/testdata/workloads/functional-query/queries/QueryTest/calcite.test @@ -943,3 +943,120 @@ row_regex:.*partitions=4/4.* ---- RUNTIME_PROFILE row_regex: .*PlannerType: CalcitePlanner.* ==== +---- QUERY +select count(*) from functional.alltypes_view; +---- RESULTS +7300 +---- TYPES +BIGINT +==== +---- QUERY +select * from functional.alltypes_view order by id limit 10; +---- RESULTS +0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1 +1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1 +2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1 +3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1 +4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1 +5,false,5,5,5,50,5.5,50.5,'01/01/09','5',2009-01-01 00:05:00.100000000,2009,1 +6,true,6,6,6,60,6.599999904632568,60.59999999999999,'01/01/09','6',2009-01-01 00:06:00.150000000,2009,1 +7,false,7,7,7,70,7.699999809265137,70.7,'01/01/09','7',2009-01-01 00:07:00.210000000,2009,1 +8,true,8,8,8,80,8.800000190734863,80.8,'01/01/09','8',2009-01-01 00:08:00.280000000,2009,1 +9,false,9,9,9,90,9.899999618530273,90.89999999999999,'01/01/09','9',2009-01-01 00:09:00.360000000,2009,1 +---- TYPES +INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT +==== +---- QUERY +select id from functional.alltypes_view order by id limit 1; +---- RESULTS +0 +---- TYPES +INT +==== +---- QUERY +select alltypes_view.id from functional.alltypes_view order by id limit 1; +---- RESULTS +0 +---- TYPES +INT +==== +---- QUERY +select functional.alltypes_view.id from functional.alltypes_view order by id limit 1; +---- RESULTS +0 +---- TYPES +INT +==== +---- QUERY +select count(*) from functional.alltypes_view, functional.alltypestiny +where functional.alltypes_view.id = functional.alltypestiny.id; +---- RESULTS +8 +---- TYPES +BIGINT +==== +---- QUERY +select * from functional.alltypes_view, functional.alltypestiny +where functional.alltypes_view.id = functional.alltypestiny.id +order by functional.alltypes_view.id; +---- RESULTS +0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1 +1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1 +2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1,2,true,0,0,0,0,0,0,'02/01/09','0',2009-02-01 00:00:00,2009,2 +3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1,3,false,1,1,1,10,1.100000023841858,10.1,'02/01/09','1',2009-02-01 00:01:00,2009,2 +4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1,4,true,0,0,0,0,0,0,'03/01/09','0',2009-03-01 00:00:00,2009,3 +5,false,5,5,5,50,5.5,50.5,'01/01/09','5',2009-01-01 00:05:00.100000000,2009,1,5,false,1,1,1,10,1.100000023841858,10.1,'03/01/09','1',2009-03-01 00:01:00,2009,3 +6,true,6,6,6,60,6.599999904632568,60.59999999999999,'01/01/09','6',2009-01-01 00:06:00.150000000,2009,1,6,true,0,0,0,0,0,0,'04/01/09','0',2009-04-01 00:00:00,2009,4 +7,false,7,7,7,70,7.699999809265137,70.7,'01/01/09','7',2009-01-01 00:07:00.210000000,2009,1,7,false,1,1,1,10,1.100000023841858,10.1,'04/01/09','1',2009-04-01 00:01:00,2009,4 +---- TYPES +INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT, INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT +==== +---- QUERY +select id from functional.alltypes_view, functional.alltypestiny +where functional.alltypes_view.id = functional.alltypestiny.id +order by functional.alltypes_view.id; +---- CATCH +SqlValidatorException: Column 'ID' is ambiguous +==== +---- QUERY +select alltypestiny.id from functional.alltypes_view, functional.alltypestiny +where functional.alltypes_view.id = functional.alltypestiny.id +order by functional.alltypes_view.id; +---- RESULTS +0 +1 +2 +3 +4 +5 +6 +7 +---- TYPES +INT +==== +---- QUERY +select functional.alltypestiny.id from functional.alltypes_view, functional.alltypestiny +where functional.alltypes_view.id = functional.alltypestiny.id +order by id; +---- RESULTS +0 +1 +2 +3 +4 +5 +6 +7 +---- TYPES +INT +==== +---- QUERY +# This test case makes sure that the definition of a regular view could be correctly +# parsed during view expansion even if database, table, and column names are enclosed in +# backticks. +select * from functional.alltypes_hive_view where id = 0; +---- RESULTS +0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1 +---- TYPES +INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT +====