This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new c9e9f86c0 IMPALA-13571: Calcite Planner: Fix join parsing errors.
c9e9f86c0 is described below

commit c9e9f86c0536f0991f70613675bbcfbbccc6a324
Author: Steve Carlin <scar...@cloudera.com>
AuthorDate: Thu Nov 7 18:46:37 2024 -0800

    IMPALA-13571: Calcite Planner: Fix join parsing errors.
    
    This commit fixes two parsing errors related to joins.
    
    1) The straight_join hint is now parsed correctly. The hint, however,
    will be ignored.  IMPALA-13574 has been filed for this feature.
    
    2) The anti-join and semi-join will no longer throw parse exceptions
    but will now throw unsupported exceptions.  The jiras IMPALA-13572
    and IMPALA-13573 have been filed for these features.
    
    IMPALA-13708: Throw unsupported message for complex column syntax.
    
    There is special complex column syntax where the column is treated
    like a table and looks like a table name when parsed. If we get an
    analysis error for an unfound table, we can see if it is actually
    a complex column and throw an "unsupported" error instead of giving
    a cryptic "table not found" message.
    
    Also made the support for types of tables to be a bit more generic
    and check if it's not an HdfsTable rather than only throw an
    unsupported exception if it's an Iceberg table.
    
    Change-Id: Icd0f68441c84b090ed2cb45de96ccee1054deef5
    Reviewed-on: http://gerrit.cloudera.org:8080/22412
    Reviewed-by: Aman Sinha <amsi...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 .../src/main/codegen/templates/Parser.jj           |  4 +
 .../apache/impala/calcite/schema/CalciteTable.java | 18 ++++-
 .../impala/calcite/service/CalciteJniFrontend.java | 73 ++++++++++++++++++-
 .../calcite/service/CalciteMetadataHandler.java    | 85 ++++++++++++++++++++--
 4 files changed, 167 insertions(+), 13 deletions(-)

diff --git a/java/calcite-planner/src/main/codegen/templates/Parser.jj 
b/java/calcite-planner/src/main/codegen/templates/Parser.jj
index eb72f12f2..09dacd69c 100644
--- a/java/calcite-planner/src/main/codegen/templates/Parser.jj
+++ b/java/calcite-planner/src/main/codegen/templates/Parser.jj
@@ -1306,6 +1306,8 @@ SqlSelect SqlSelect() :
 }
 {
     <SELECT> { s = span(); }
+    ( <CLUSTERED> )?
+    ( <STRAIGHT_JOIN> )?
     [ <HINT_BEG> AddHint(hints) ( <COMMA> AddHint(hints) )* <COMMENT_END> ]
     SqlSelectKeywords(keywords)
     (
@@ -7901,6 +7903,7 @@ SqlPostfixOperator PostfixRowOperator() :
 |   < CLASS_ORIGIN: "CLASS_ORIGIN" >
 |   < CLOB: "CLOB" >
 |   < CLOSE: "CLOSE" >
+|   < CLUSTERED: "CLUSTERED" >
 |   < COALESCE: "COALESCE" >
 |   < COBOL: "COBOL" >
 |   < COLLATE: "COLLATE" >
@@ -8427,6 +8430,7 @@ SqlPostfixOperator PostfixRowOperator() :
 |   < STATIC: "STATIC" >
 |   < STDDEV_POP: "STDDEV_POP" >
 |   < STDDEV_SAMP: "STDDEV_SAMP" >
+|   < STRAIGHT_JOIN: "STRAIGHT_JOIN" >
 |   < STREAM: "STREAM" >
 |   < STRING_AGG: "STRING_AGG" >
 |   < STRUCTURE: "STRUCTURE" >
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
index 5f70405c8..53f4fd243 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/schema/CalciteTable.java
@@ -50,6 +50,7 @@ import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.FeFsPartition;
 import org.apache.impala.catalog.FeTable;
+import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.IcebergTable;
 import org.apache.impala.calcite.rel.util.ImpalaBaseTableRef;
@@ -82,10 +83,7 @@ public class CalciteTable extends RelOptAbstractTable
     this.table_ = (HdfsTable) table;
     this.qualifiedTableName_ = table.getTableName().toPath();
 
-    if (table instanceof IcebergTable) {
-      throw new UnsupportedFeatureException("Calcite parser does not support " 
+
-          "Iceberg tables yet.");
-    }
+    checkIfTableIsSupported(table);
   }
 
   private static RelDataType buildColumnsForRelDataType(FeTable table)
@@ -106,6 +104,18 @@ public class CalciteTable extends RelOptAbstractTable
     return builder.build();
   }
 
+  private void checkIfTableIsSupported(FeTable table) throws ImpalaException {
+    if (table instanceof FeView) {
+      throw new UnsupportedFeatureException("Views are not supported yet.");
+    }
+
+    if (!(table instanceof HdfsTable)) {
+      String tableType = table.getClass().getSimpleName().replace("Table", "");
+      throw new UnsupportedFeatureException(tableType + " tables are not 
supported yet.");
+    }
+
+  }
+
   public BaseTableRef createBaseTableRef(SimplifiedAnalyzer analyzer
       ) throws ImpalaException {
 
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
index e5ab7e4ab..6d75a90cb 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteJniFrontend.java
@@ -26,6 +26,7 @@ import org.apache.calcite.sql.SqlExplain;
 import org.apache.calcite.sql.SqlNode;
 import org.apache.impala.analysis.Parser;
 import org.apache.impala.analysis.SelectStmt;
+import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
 import org.apache.impala.calcite.functions.FunctionResolver;
 import org.apache.impala.calcite.operators.ImpalaOperatorTable;
 import org.apache.impala.calcite.rel.node.NodeWithExprs;
@@ -34,6 +35,7 @@ import org.apache.impala.catalog.BuiltinsDb;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.JniUtil;
 import org.apache.impala.common.ParseException;
+import org.apache.impala.common.UnsupportedFeatureException;
 import org.apache.impala.service.Frontend;
 import org.apache.impala.service.FrontendProfile;
 import org.apache.impala.service.JniFrontend;
@@ -45,6 +47,7 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.exception.ExceptionUtils;
 
 import java.util.List;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.slf4j.Logger;
@@ -60,12 +63,30 @@ public class CalciteJniFrontend extends JniFrontend {
   protected static final Logger LOG =
       LoggerFactory.getLogger(CalciteJniFrontend.class.getName());
 
-  private static Pattern SEMI_JOIN = Pattern.compile("\\bsemi\\sjoin\\b",
+  private static Pattern LEFT_SEMI = Pattern.compile(".*\\bleft\\ssemi\\b.*",
       Pattern.CASE_INSENSITIVE);
 
-  private static Pattern ANTI_JOIN = Pattern.compile("\\banti\\sjoin\\b",
+  private static Pattern RIGHT_SEMI = Pattern.compile(".*\\bright\\ssemi\\b.*",
       Pattern.CASE_INSENSITIVE);
 
+  private static Pattern LEFT_ANTI = Pattern.compile(".*\\bleft\\santi\\b.*",
+      Pattern.CASE_INSENSITIVE);
+
+  private static Pattern RIGHT_ANTI = Pattern.compile(".*\\bright\\santi\\b.*",
+      Pattern.CASE_INSENSITIVE);
+
+  private static Pattern INPUT_FILE_NAME = 
Pattern.compile(".*\\binput__file__name\\b.*",
+      Pattern.CASE_INSENSITIVE);
+
+  private static Pattern FILE_POSITION = 
Pattern.compile(".*\\bfile__position\\b.*",
+      Pattern.CASE_INSENSITIVE);
+
+  private static Pattern TABLE_NOT_FOUND =
+      Pattern.compile(".*\\bTable '(.*)' not found\\b.*", 
Pattern.CASE_INSENSITIVE);
+
+  private static Pattern COLUMN_NOT_FOUND =
+      Pattern.compile(".*\\bColumn '(.*)' not found\\b.*", 
Pattern.CASE_INSENSITIVE);
+
   public CalciteJniFrontend(byte[] thriftBackendConfig, boolean isBackendTest)
       throws ImpalaException, TException {
     super(thriftBackendConfig, isBackendTest);
@@ -84,6 +105,8 @@ public class CalciteJniFrontend extends JniFrontend {
 
     QueryContext queryCtx = new QueryContext(thriftQueryContext, 
getFrontend());
 
+    CalciteMetadataHandler mdHandler = null;
+
     if (!optionSupportedInCalcite(queryCtx)) {
       return runThroughOriginalPlanner(thriftQueryContext, queryCtx);
     }
@@ -99,8 +122,7 @@ public class CalciteJniFrontend extends JniFrontend {
       markEvent(queryParser, parsedSqlNode, queryCtx, "Parsed query");
 
       // Make sure the metadata cache has all the info for the query.
-      CalciteMetadataHandler mdHandler =
-          new CalciteMetadataHandler(parsedSqlNode, queryCtx);
+      mdHandler = new CalciteMetadataHandler(parsedSqlNode, queryCtx);
       markEvent(mdHandler, null, queryCtx, "Loaded tables");
 
       boolean isExplain = false;
@@ -140,6 +162,7 @@ public class CalciteJniFrontend extends JniFrontend {
 
       return serializedRequest;
     } catch (ParseException e) {
+      throwUnsupportedIfKnownException(e);
       // do a quick parse just to make sure it's not a select stmt. If it is
       // a select statement, we fail the query since all select statements
       // should be run through the Calcite Planner.
@@ -149,6 +172,13 @@ public class CalciteJniFrontend extends JniFrontend {
       LOG.info("Calcite planner failed to parse query: " + queryCtx.getStmt());
       LOG.info("Going to use original Impala planner.");
       return runThroughOriginalPlanner(thriftQueryContext, queryCtx);
+    } catch (ImpalaException e) {
+      if (mdHandler != null) {
+        throwUnsupportedIfKnownException(e, mdHandler.getStmtTableCache());
+      }
+      throw e;
+    } catch (Exception e) {
+      throw e;
     }
   }
 
@@ -188,6 +218,41 @@ public class CalciteJniFrontend extends JniFrontend {
     ImpalaOperatorTable.create(BuiltinsDb.getInstance());
   }
 
+  private static void throwUnsupportedIfKnownException(Exception e)
+      throws ImpalaException {
+    String s = e.toString().replace("\n"," ");;
+    if (LEFT_ANTI.matcher(s).matches() || RIGHT_ANTI.matcher(s).matches()) {
+      throw new UnsupportedFeatureException("Anti joins not supported.");
+    }
+    if (LEFT_SEMI.matcher(s).matches() || RIGHT_SEMI.matcher(s).matches()) {
+      throw new UnsupportedFeatureException("Semi joins not supported.");
+    }
+    if (INPUT_FILE_NAME.matcher(s).matches() || 
FILE_POSITION.matcher(s).matches()) {
+      throw new UnsupportedFeatureException("Virtual columns not supported.");
+    }
+  }
+
+  public static void throwUnsupportedIfKnownException(ImpalaException e,
+      StmtTableCache stmtTableCache) throws ImpalaException {
+    throwUnsupportedIfKnownException(e);
+    String s = e.toString().replace("\n"," ");;
+    Matcher m = TABLE_NOT_FOUND.matcher(s);
+    if (m.matches()) {
+      if (CalciteMetadataHandler.anyTableContainsColumn(stmtTableCache, 
m.group(1))) {
+        throw new UnsupportedFeatureException(
+            "Complex column " + m.group(1) + " not supported.");
+      }
+    }
+
+    m = COLUMN_NOT_FOUND.matcher(s);
+    if (m.matches()) {
+      if (CalciteMetadataHandler.anyTableContainsColumn(stmtTableCache, 
m.group(1))) {
+        throw new UnsupportedFeatureException(
+            "Complex column " + m.group(1) + " not supported.");
+      }
+    }
+  }
+
   public static class QueryContext {
     private final TQueryCtx queryCtx_;
     private final String stmt_;
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
index 13ff2f678..49fda4bc1 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteMetadataHandler.java
@@ -47,6 +47,7 @@ import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.UnsupportedFeatureException;
+import com.google.common.collect.Sets;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -96,8 +97,10 @@ public class CalciteMetadataHandler implements CompilerStep {
     // populate calcite schema.  This step needs to be done after the loader 
because the
     // schema needs to contain the columns in the table for validation, which 
cannot
     // be done when it's an IncompleteTable
-    populateCalciteSchema(reader_, queryCtx.getFrontend().getCatalog(),
-        tableVisitor.tableNames_);
+    List<String> errorTables = populateCalciteSchema(reader_,
+        queryCtx.getFrontend().getCatalog(), tableVisitor.tableNames_);
+
+    tableVisitor.checkForComplexTable(stmtTableCache_, errorTables, queryCtx);
   }
 
   /**
@@ -119,22 +122,26 @@ public class CalciteMetadataHandler implements 
CompilerStep {
   }
 
   /**
-   * Populate the CalciteSchema with tables being used by this query.
+   * Populate the CalciteSchema with tables being used by this query. Returns a
+   * list of tables in the query that are not found in the database.
    */
-  private void populateCalciteSchema(CalciteCatalogReader reader,
+  private static List<String> populateCalciteSchema(CalciteCatalogReader 
reader,
       FeCatalog catalog, Set<TableName> tableNames) throws ImpalaException {
+    List<String> notFoundTables = new ArrayList<>();
     CalciteSchema rootSchema = reader.getRootSchema();
     Map<String, CalciteDb.Builder> dbSchemas = new HashMap<>();
     for (TableName tableName : tableNames) {
       FeDb db = catalog.getDb(tableName.getDb());
       // db is not found, this will probably fail in the validation step
       if (db == null) {
+        notFoundTables.add(tableName.toString());
         continue;
       }
 
       // table is not found, this will probably fail in the validation step
       FeTable feTable = db.getTable(tableName.getTbl());
       if (feTable == null) {
+        notFoundTables.add(tableName.toString());
         continue;
       }
       if (!(feTable instanceof HdfsTable)) {
@@ -156,6 +163,7 @@ public class CalciteMetadataHandler implements CompilerStep 
{
     for (String dbName : dbSchemas.keySet()) {
       rootSchema.add(dbName, dbSchemas.get(dbName.toLowerCase()).build());
     }
+    return notFoundTables;
   }
 
   public StmtMetadataLoader.StmtTableCache getStmtTableCache() {
@@ -174,6 +182,11 @@ public class CalciteMetadataHandler implements 
CompilerStep {
     private final String currentDb_;
     public final Set<TableName> tableNames_ = new HashSet<>();
 
+    // Error condition for now. Complex tables are not yet supported
+    // so if we see a table name that has more than 2 parts, this variable
+    // will contain that table.
+    public final List<String> errorTables_ = new ArrayList<>();
+
     public TableVisitor(String currentDb) {
       this.currentDb_ = currentDb.toLowerCase();
     }
@@ -194,13 +207,15 @@ public class CalciteMetadataHandler implements 
CompilerStep {
       if (fromNode instanceof SqlIdentifier) {
         String tableName = fromNode.toString();
         List<String> parts = Splitter.on('.').splitToList(tableName);
-        // TODO: 'complex' tables ignored for now
         if (parts.size() == 1) {
           localTableNames.add(new TableName(
               currentDb_.toLowerCase(), parts.get(0).toLowerCase()));
         } else if (parts.size() == 2) {
           localTableNames.add(
               new TableName(parts.get(0).toLowerCase(), 
parts.get(1).toLowerCase()));
+        } else {
+          errorTables_.add(tableName);
+          return localTableNames;
         }
       }
 
@@ -219,6 +234,66 @@ public class CalciteMetadataHandler implements 
CompilerStep {
       }
       return localTableNames;
     }
+
+    /**
+     * Check if the error table is actually a table with a complex column. 
There is Impala
+     * syntax where a complex column uses the same syntax in the FROM clause 
as a table.
+     * This method is passed in all the tables that are not found and checks 
to see if
+     * the table turned out to be a complex column rather than an actual 
table. If so,
+     * this throws an unsupported feature exception (for the time being). If 
it's not
+     * a table with a complex column, a table not found error will eventually 
be thrown
+     * in a different place.
+     */
+    public void checkForComplexTable(StmtMetadataLoader.StmtTableCache 
stmtTableCache,
+        List<String> errorTables, CalciteJniFrontend.QueryContext queryCtx)
+        throws ImpalaException {
+      List<String> allErrorTables = new ArrayList<>();
+      allErrorTables.addAll(errorTables_);
+      allErrorTables.addAll(errorTables);
+      for (String errorTable : allErrorTables) {
+        List<String> parts = Splitter.on('.').splitToList(errorTable);
+        // if there are 3 parts, then it has to be db.table.column and must be 
a
+        // complex column.
+        if (parts.size() > 2) {
+          throw new UnsupportedFeatureException("Complex column " +
+              errorTable + " not supported.");
+        }
+        // if there are 2 parts, then it is either a missing db.table or a
+        // table.column.  We look to see if the column can be found in any
+        // of the tables, in which case, it is a complex column being 
referenced.
+        if (parts.size() == 2) {
+          // first check the already existing cache for the error table.
+          if (anyTableContainsColumn(stmtTableCache, parts.get(1))) {
+            throw new UnsupportedFeatureException("Complex column " +
+                errorTable + " not supported.");
+          }
+          // it's possible that the table wasn't loaded yet because this 
method is
+          // only called when there is an error finding a table. Try loading 
the table
+          // from catalogd just in case, and check to see if it's a complex 
column.
+          TableName potentialComplexTable = new TableName(
+              currentDb_.toLowerCase(), parts.get(0).toLowerCase());
+          StmtMetadataLoader errorLoader = new StmtMetadataLoader(
+              queryCtx.getFrontend(), queryCtx.getCurrentDb(), 
queryCtx.getTimeline());
+          StmtMetadataLoader.StmtTableCache errorCache =
+              errorLoader.loadTables(Sets.newHashSet(potentialComplexTable));
+          if (anyTableContainsColumn(errorCache, parts.get(1))) {
+            throw new UnsupportedFeatureException("Complex column " +
+                errorTable + " not supported.");
+          }
+        }
+      }
+    }
+  }
+
+  public static boolean anyTableContainsColumn(
+      StmtMetadataLoader.StmtTableCache stmtTableCache, String columnName) {
+    String onlyColumnPart = columnName.split("\\.")[0];
+    for (FeTable table : stmtTableCache.tables.values()) {
+      if (table.getColumn(onlyColumnPart) != null) {
+        return true;
+      }
+    }
+    return false;
   }
 
   @Override

Reply via email to