This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new c9e9f86c0 IMPALA-13571: Calcite Planner: Fix join parsing errors. c9e9f86c0 is described below commit c9e9f86c0536f0991f70613675bbcfbbccc6a324 Author: Steve Carlin <scar...@cloudera.com> AuthorDate: Thu Nov 7 18:46:37 2024 -0800 IMPALA-13571: Calcite Planner: Fix join parsing errors. This commit fixes two parsing errors related to joins. 1) The straight_join hint is now parsed correctly. The hint, however, will be ignored. IMPALA-13574 has been filed for this feature. 2) The anti-join and semi-join will no longer throw parse exceptions but will now throw unsupported exceptions. The jiras IMPALA-13572 and IMPALA-13573 have been filed for these features. IMPALA-13708: Throw unsupported message for complex column syntax. There is special complex column syntax where the column is treated like a table and looks like a table name when parsed. If we get an analysis error for an unfound table, we can see if it is actually a complex column and throw an "unsupported" error instead of giving a cryptic "table not found" message. Also made the support for types of tables to be a bit more generic and check if it's not an HdfsTable rather than only throw an unsupported exception if it's an Iceberg table. Change-Id: Icd0f68441c84b090ed2cb45de96ccee1054deef5 Reviewed-on: http://gerrit.cloudera.org:8080/22412 Reviewed-by: Aman Sinha <amsi...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- .../src/main/codegen/templates/Parser.jj | 4 + .../apache/impala/calcite/schema/CalciteTable.java | 18 ++++- .../impala/calcite/service/CalciteJniFrontend.java | 73 ++++++++++++++++++- .../calcite/service/CalciteMetadataHandler.java | 85 ++++++++++++++++++++-- 4 files changed, 167 insertions(+), 13 deletions(-) diff --git a/java/calcite-planner/src/main/codegen/templates/Parser.jj b/java/calcite-planner/src/main/codegen/templates/Parser.jj index eb72f12f2..09dacd69c 100644 --- a/java/calcite-planner/src/main/codegen/templates/Parser.jj +++ b/java/calcite-planner/src/main/codegen/templates/Parser.jj @@ -1306,6 +1306,8 @@ SqlSelect SqlSelect() : } { <SELECT> { s = span(); } + ( <CLUSTERED> )? + ( <STRAIGHT_JOIN> )? [ <HINT_BEG> AddHint(hints) ( <COMMA> AddHint(hints) )* <COMMENT_END> ] SqlSelectKeywords(keywords) ( @@ -7901,6 +7903,7 @@ SqlPostfixOperator PostfixRowOperator() : | < CLASS_ORIGIN: "CLASS_ORIGIN" > | < CLOB: "CLOB" > | < CLOSE: "CLOSE" > +| < CLUSTERED: "CLUSTERED" > | < COALESCE: "COALESCE" > | < COBOL: "COBOL" > | < COLLATE: "COLLATE" > @@ -8427,6 +8430,7 @@ SqlPostfixOperator PostfixRowOperator() : | < STATIC: "STATIC" > | < STDDEV_POP: "STDDEV_POP" > | < STDDEV_SAMP: "STDDEV_SAMP" > +| < STRAIGHT_JOIN: "STRAIGHT_JOIN" > | < STREAM: "STREAM" > | < STRING_AGG: "STRING_AGG" > | < STRUCTURE: "STRUCTURE" > diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java index 5f70405c8..53f4fd243 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java @@ -50,6 +50,7 @@ import org.apache.impala.analysis.TupleDescriptor; import org.apache.impala.catalog.Column; import org.apache.impala.catalog.FeFsPartition; import org.apache.impala.catalog.FeTable; +import org.apache.impala.catalog.FeView; import org.apache.impala.catalog.HdfsTable; import org.apache.impala.catalog.IcebergTable; import org.apache.impala.calcite.rel.util.ImpalaBaseTableRef; @@ -82,10 +83,7 @@ public class CalciteTable extends RelOptAbstractTable this.table_ = (HdfsTable) table; this.qualifiedTableName_ = table.getTableName().toPath(); - if (table instanceof IcebergTable) { - throw new UnsupportedFeatureException("Calcite parser does not support " + - "Iceberg tables yet."); - } + checkIfTableIsSupported(table); } private static RelDataType buildColumnsForRelDataType(FeTable table) @@ -106,6 +104,18 @@ public class CalciteTable extends RelOptAbstractTable return builder.build(); } + private void checkIfTableIsSupported(FeTable table) throws ImpalaException { + if (table instanceof FeView) { + throw new UnsupportedFeatureException("Views are not supported yet."); + } + + if (!(table instanceof HdfsTable)) { + String tableType = table.getClass().getSimpleName().replace("Table", ""); + throw new UnsupportedFeatureException(tableType + " tables are not supported yet."); + } + + } + public BaseTableRef createBaseTableRef(SimplifiedAnalyzer analyzer ) throws ImpalaException { diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java index e5ab7e4ab..6d75a90cb 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java @@ -26,6 +26,7 @@ import org.apache.calcite.sql.SqlExplain; import org.apache.calcite.sql.SqlNode; import org.apache.impala.analysis.Parser; import org.apache.impala.analysis.SelectStmt; +import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache; import org.apache.impala.calcite.functions.FunctionResolver; import org.apache.impala.calcite.operators.ImpalaOperatorTable; import org.apache.impala.calcite.rel.node.NodeWithExprs; @@ -34,6 +35,7 @@ import org.apache.impala.catalog.BuiltinsDb; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.JniUtil; import org.apache.impala.common.ParseException; +import org.apache.impala.common.UnsupportedFeatureException; import org.apache.impala.service.Frontend; import org.apache.impala.service.FrontendProfile; import org.apache.impala.service.JniFrontend; @@ -45,6 +47,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import java.util.List; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; @@ -60,12 +63,30 @@ public class CalciteJniFrontend extends JniFrontend { protected static final Logger LOG = LoggerFactory.getLogger(CalciteJniFrontend.class.getName()); - private static Pattern SEMI_JOIN = Pattern.compile("\\bsemi\\sjoin\\b", + private static Pattern LEFT_SEMI = Pattern.compile(".*\\bleft\\ssemi\\b.*", Pattern.CASE_INSENSITIVE); - private static Pattern ANTI_JOIN = Pattern.compile("\\banti\\sjoin\\b", + private static Pattern RIGHT_SEMI = Pattern.compile(".*\\bright\\ssemi\\b.*", Pattern.CASE_INSENSITIVE); + private static Pattern LEFT_ANTI = Pattern.compile(".*\\bleft\\santi\\b.*", + Pattern.CASE_INSENSITIVE); + + private static Pattern RIGHT_ANTI = Pattern.compile(".*\\bright\\santi\\b.*", + Pattern.CASE_INSENSITIVE); + + private static Pattern INPUT_FILE_NAME = Pattern.compile(".*\\binput__file__name\\b.*", + Pattern.CASE_INSENSITIVE); + + private static Pattern FILE_POSITION = Pattern.compile(".*\\bfile__position\\b.*", + Pattern.CASE_INSENSITIVE); + + private static Pattern TABLE_NOT_FOUND = + Pattern.compile(".*\\bTable '(.*)' not found\\b.*", Pattern.CASE_INSENSITIVE); + + private static Pattern COLUMN_NOT_FOUND = + Pattern.compile(".*\\bColumn '(.*)' not found\\b.*", Pattern.CASE_INSENSITIVE); + public CalciteJniFrontend(byte[] thriftBackendConfig, boolean isBackendTest) throws ImpalaException, TException { super(thriftBackendConfig, isBackendTest); @@ -84,6 +105,8 @@ public class CalciteJniFrontend extends JniFrontend { QueryContext queryCtx = new QueryContext(thriftQueryContext, getFrontend()); + CalciteMetadataHandler mdHandler = null; + if (!optionSupportedInCalcite(queryCtx)) { return runThroughOriginalPlanner(thriftQueryContext, queryCtx); } @@ -99,8 +122,7 @@ public class CalciteJniFrontend extends JniFrontend { markEvent(queryParser, parsedSqlNode, queryCtx, "Parsed query"); // Make sure the metadata cache has all the info for the query. - CalciteMetadataHandler mdHandler = - new CalciteMetadataHandler(parsedSqlNode, queryCtx); + mdHandler = new CalciteMetadataHandler(parsedSqlNode, queryCtx); markEvent(mdHandler, null, queryCtx, "Loaded tables"); boolean isExplain = false; @@ -140,6 +162,7 @@ public class CalciteJniFrontend extends JniFrontend { return serializedRequest; } catch (ParseException e) { + throwUnsupportedIfKnownException(e); // do a quick parse just to make sure it's not a select stmt. If it is // a select statement, we fail the query since all select statements // should be run through the Calcite Planner. @@ -149,6 +172,13 @@ public class CalciteJniFrontend extends JniFrontend { LOG.info("Calcite planner failed to parse query: " + queryCtx.getStmt()); LOG.info("Going to use original Impala planner."); return runThroughOriginalPlanner(thriftQueryContext, queryCtx); + } catch (ImpalaException e) { + if (mdHandler != null) { + throwUnsupportedIfKnownException(e, mdHandler.getStmtTableCache()); + } + throw e; + } catch (Exception e) { + throw e; } } @@ -188,6 +218,41 @@ public class CalciteJniFrontend extends JniFrontend { ImpalaOperatorTable.create(BuiltinsDb.getInstance()); } + private static void throwUnsupportedIfKnownException(Exception e) + throws ImpalaException { + String s = e.toString().replace("\n"," ");; + if (LEFT_ANTI.matcher(s).matches() || RIGHT_ANTI.matcher(s).matches()) { + throw new UnsupportedFeatureException("Anti joins not supported."); + } + if (LEFT_SEMI.matcher(s).matches() || RIGHT_SEMI.matcher(s).matches()) { + throw new UnsupportedFeatureException("Semi joins not supported."); + } + if (INPUT_FILE_NAME.matcher(s).matches() || FILE_POSITION.matcher(s).matches()) { + throw new UnsupportedFeatureException("Virtual columns not supported."); + } + } + + public static void throwUnsupportedIfKnownException(ImpalaException e, + StmtTableCache stmtTableCache) throws ImpalaException { + throwUnsupportedIfKnownException(e); + String s = e.toString().replace("\n"," ");; + Matcher m = TABLE_NOT_FOUND.matcher(s); + if (m.matches()) { + if (CalciteMetadataHandler.anyTableContainsColumn(stmtTableCache, m.group(1))) { + throw new UnsupportedFeatureException( + "Complex column " + m.group(1) + " not supported."); + } + } + + m = COLUMN_NOT_FOUND.matcher(s); + if (m.matches()) { + if (CalciteMetadataHandler.anyTableContainsColumn(stmtTableCache, m.group(1))) { + throw new UnsupportedFeatureException( + "Complex column " + m.group(1) + " not supported."); + } + } + } + public static class QueryContext { private final TQueryCtx queryCtx_; private final String stmt_; diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java index 13ff2f678..49fda4bc1 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java @@ -47,6 +47,7 @@ import org.apache.impala.catalog.FeView; import org.apache.impala.catalog.HdfsTable; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.UnsupportedFeatureException; +import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.Collections; @@ -96,8 +97,10 @@ public class CalciteMetadataHandler implements CompilerStep { // populate calcite schema. This step needs to be done after the loader because the // schema needs to contain the columns in the table for validation, which cannot // be done when it's an IncompleteTable - populateCalciteSchema(reader_, queryCtx.getFrontend().getCatalog(), - tableVisitor.tableNames_); + List<String> errorTables = populateCalciteSchema(reader_, + queryCtx.getFrontend().getCatalog(), tableVisitor.tableNames_); + + tableVisitor.checkForComplexTable(stmtTableCache_, errorTables, queryCtx); } /** @@ -119,22 +122,26 @@ public class CalciteMetadataHandler implements CompilerStep { } /** - * Populate the CalciteSchema with tables being used by this query. + * Populate the CalciteSchema with tables being used by this query. Returns a + * list of tables in the query that are not found in the database. */ - private void populateCalciteSchema(CalciteCatalogReader reader, + private static List<String> populateCalciteSchema(CalciteCatalogReader reader, FeCatalog catalog, Set<TableName> tableNames) throws ImpalaException { + List<String> notFoundTables = new ArrayList<>(); CalciteSchema rootSchema = reader.getRootSchema(); Map<String, CalciteDb.Builder> dbSchemas = new HashMap<>(); for (TableName tableName : tableNames) { FeDb db = catalog.getDb(tableName.getDb()); // db is not found, this will probably fail in the validation step if (db == null) { + notFoundTables.add(tableName.toString()); continue; } // table is not found, this will probably fail in the validation step FeTable feTable = db.getTable(tableName.getTbl()); if (feTable == null) { + notFoundTables.add(tableName.toString()); continue; } if (!(feTable instanceof HdfsTable)) { @@ -156,6 +163,7 @@ public class CalciteMetadataHandler implements CompilerStep { for (String dbName : dbSchemas.keySet()) { rootSchema.add(dbName, dbSchemas.get(dbName.toLowerCase()).build()); } + return notFoundTables; } public StmtMetadataLoader.StmtTableCache getStmtTableCache() { @@ -174,6 +182,11 @@ public class CalciteMetadataHandler implements CompilerStep { private final String currentDb_; public final Set<TableName> tableNames_ = new HashSet<>(); + // Error condition for now. Complex tables are not yet supported + // so if we see a table name that has more than 2 parts, this variable + // will contain that table. + public final List<String> errorTables_ = new ArrayList<>(); + public TableVisitor(String currentDb) { this.currentDb_ = currentDb.toLowerCase(); } @@ -194,13 +207,15 @@ public class CalciteMetadataHandler implements CompilerStep { if (fromNode instanceof SqlIdentifier) { String tableName = fromNode.toString(); List<String> parts = Splitter.on('.').splitToList(tableName); - // TODO: 'complex' tables ignored for now if (parts.size() == 1) { localTableNames.add(new TableName( currentDb_.toLowerCase(), parts.get(0).toLowerCase())); } else if (parts.size() == 2) { localTableNames.add( new TableName(parts.get(0).toLowerCase(), parts.get(1).toLowerCase())); + } else { + errorTables_.add(tableName); + return localTableNames; } } @@ -219,6 +234,66 @@ public class CalciteMetadataHandler implements CompilerStep { } return localTableNames; } + + /** + * Check if the error table is actually a table with a complex column. There is Impala + * syntax where a complex column uses the same syntax in the FROM clause as a table. + * This method is passed in all the tables that are not found and checks to see if + * the table turned out to be a complex column rather than an actual table. If so, + * this throws an unsupported feature exception (for the time being). If it's not + * a table with a complex column, a table not found error will eventually be thrown + * in a different place. + */ + public void checkForComplexTable(StmtMetadataLoader.StmtTableCache stmtTableCache, + List<String> errorTables, CalciteJniFrontend.QueryContext queryCtx) + throws ImpalaException { + List<String> allErrorTables = new ArrayList<>(); + allErrorTables.addAll(errorTables_); + allErrorTables.addAll(errorTables); + for (String errorTable : allErrorTables) { + List<String> parts = Splitter.on('.').splitToList(errorTable); + // if there are 3 parts, then it has to be db.table.column and must be a + // complex column. + if (parts.size() > 2) { + throw new UnsupportedFeatureException("Complex column " + + errorTable + " not supported."); + } + // if there are 2 parts, then it is either a missing db.table or a + // table.column. We look to see if the column can be found in any + // of the tables, in which case, it is a complex column being referenced. + if (parts.size() == 2) { + // first check the already existing cache for the error table. + if (anyTableContainsColumn(stmtTableCache, parts.get(1))) { + throw new UnsupportedFeatureException("Complex column " + + errorTable + " not supported."); + } + // it's possible that the table wasn't loaded yet because this method is + // only called when there is an error finding a table. Try loading the table + // from catalogd just in case, and check to see if it's a complex column. + TableName potentialComplexTable = new TableName( + currentDb_.toLowerCase(), parts.get(0).toLowerCase()); + StmtMetadataLoader errorLoader = new StmtMetadataLoader( + queryCtx.getFrontend(), queryCtx.getCurrentDb(), queryCtx.getTimeline()); + StmtMetadataLoader.StmtTableCache errorCache = + errorLoader.loadTables(Sets.newHashSet(potentialComplexTable)); + if (anyTableContainsColumn(errorCache, parts.get(1))) { + throw new UnsupportedFeatureException("Complex column " + + errorTable + " not supported."); + } + } + } + } + } + + public static boolean anyTableContainsColumn( + StmtMetadataLoader.StmtTableCache stmtTableCache, String columnName) { + String onlyColumnPart = columnName.split("\\.")[0]; + for (FeTable table : stmtTableCache.tables.values()) { + if (table.getColumn(onlyColumnPart) != null) { + return true; + } + } + return false; } @Override