http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java deleted file mode 100644 index 5049064..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java +++ /dev/null @@ -1,215 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.HashMap; -import java.util.Map; - -import com.cloudera.impala.analysis.ArithmeticExpr.Operator; -import com.cloudera.impala.catalog.Function.CompareMode; -import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.thrift.TExprNode; -import com.cloudera.impala.thrift.TExprNodeType; -import com.google.common.base.Preconditions; - -/** - * Describes the addition and subtraction of time units from timestamps. - * Arithmetic expressions on timestamps are syntactic sugar. - * They are executed as function call exprs in the BE. - */ -public class TimestampArithmeticExpr extends Expr { - - // Time units supported in timestamp arithmetic. - public static enum TimeUnit { - YEAR("YEAR"), - MONTH("MONTH"), - WEEK("WEEK"), - DAY("DAY"), - HOUR("HOUR"), - MINUTE("MINUTE"), - SECOND("SECOND"), - MILLISECOND("MILLISECOND"), - MICROSECOND("MICROSECOND"), - NANOSECOND("NANOSECOND"); - - private final String description_; - - private TimeUnit(String description) { - this.description_ = description; - } - - @Override - public String toString() { - return description_; - } - } - - private static Map<String, TimeUnit> TIME_UNITS_MAP = new HashMap<String, TimeUnit>(); - static { - for (TimeUnit timeUnit : TimeUnit.values()) { - TIME_UNITS_MAP.put(timeUnit.toString(), timeUnit); - TIME_UNITS_MAP.put(timeUnit.toString() + "S", timeUnit); - } - } - - // Set for function call-like arithmetic. - private final String funcName_; - private ArithmeticExpr.Operator op_; - - // Keep the original string passed in the c'tor to resolve - // ambiguities with other uses of IDENT during query parsing. - private final String timeUnitIdent_; - private TimeUnit timeUnit_; - - // Indicates an expr where the interval comes first, e.g., 'interval b year + a'. - private final boolean intervalFirst_; - - // C'tor for function-call like arithmetic, e.g., 'date_add(a, interval b year)'. - public TimestampArithmeticExpr(String funcName, Expr e1, Expr e2, - String timeUnitIdent) { - this.funcName_ = funcName.toLowerCase(); - this.timeUnitIdent_ = timeUnitIdent; - this.intervalFirst_ = false; - children_.add(e1); - children_.add(e2); - } - - // C'tor for non-function-call like arithmetic, e.g., 'a + interval b year'. - // e1 always refers to the timestamp to be added/subtracted from, and e2 - // to the time value (even in the interval-first case). - public TimestampArithmeticExpr(ArithmeticExpr.Operator op, Expr e1, Expr e2, - String timeUnitIdent, boolean intervalFirst) { - Preconditions.checkState(op == Operator.ADD || op == Operator.SUBTRACT); - this.funcName_ = null; - this.op_ = op; - this.timeUnitIdent_ = timeUnitIdent; - this.intervalFirst_ = intervalFirst; - children_.add(e1); - children_.add(e2); - } - - /** - * Copy c'tor used in clone(). - */ - protected TimestampArithmeticExpr(TimestampArithmeticExpr other) { - super(other); - funcName_ = other.funcName_; - op_ = other.op_; - timeUnitIdent_ = other.timeUnitIdent_; - timeUnit_ = other.timeUnit_; - intervalFirst_ = other.intervalFirst_; - } - - @Override - public void analyze(Analyzer analyzer) throws AnalysisException { - if (isAnalyzed_) return; - super.analyze(analyzer); - - if (funcName_ != null) { - // Set op based on funcName for function-call like version. - if (funcName_.equals("date_add")) { - op_ = ArithmeticExpr.Operator.ADD; - } else if (funcName_.equals("date_sub")) { - op_ = ArithmeticExpr.Operator.SUBTRACT; - } else { - throw new AnalysisException("Encountered function name '" + funcName_ + - "' in timestamp arithmetic expression '" + toSql() + "'. " + - "Expected function name 'DATE_ADD' or 'DATE_SUB'."); - } - } - - timeUnit_ = TIME_UNITS_MAP.get(timeUnitIdent_.toUpperCase()); - if (timeUnit_ == null) { - throw new AnalysisException("Invalid time unit '" + timeUnitIdent_ + - "' in timestamp arithmetic expression '" + toSql() + "'."); - } - - // The first child must return a timestamp or null. - if (!getChild(0).getType().isTimestamp() && !getChild(0).getType().isNull()) { - throw new AnalysisException("Operand '" + getChild(0).toSql() + - "' of timestamp arithmetic expression '" + toSql() + "' returns type '" + - getChild(0).getType().toSql() + "'. Expected type 'TIMESTAMP'."); - } - - // The second child must be an integer type. - if (!getChild(1).getType().isIntegerType() && - !getChild(1).getType().isNull()) { - throw new AnalysisException("Operand '" + getChild(1).toSql() + - "' of timestamp arithmetic expression '" + toSql() + "' returns type '" + - getChild(1).getType().toSql() + "'. Expected an integer type."); - } - - String funcOpName = String.format("%sS_%s", timeUnit_, - (op_ == ArithmeticExpr.Operator.ADD) ? "ADD" : "SUB"); - // For the month interval, use the invisible special-case implementation. - // "ADD_MONTHS(t, m)" by definition is different from "t + INTERVAL m MONTHS". - if (timeUnit_ == TimeUnit.MONTH) funcOpName += "_INTERVAL"; - - fn_ = getBuiltinFunction(analyzer, funcOpName.toLowerCase(), - collectChildReturnTypes(), CompareMode.IS_NONSTRICT_SUPERTYPE_OF); - castForFunctionCall(false); - - Preconditions.checkNotNull(fn_); - Preconditions.checkState(fn_.getReturnType().isTimestamp()); - type_ = fn_.getReturnType(); - if (hasChildCosts()) evalCost_ = getChildCosts() + TIMESTAMP_ARITHMETIC_COST; - } - - @Override - protected void toThrift(TExprNode msg) { - msg.node_type = TExprNodeType.FUNCTION_CALL; - } - - public String getTimeUnitIdent() { return timeUnitIdent_; } - public TimeUnit getTimeUnit() { return timeUnit_; } - public ArithmeticExpr.Operator getOp() { return op_; } - - @Override - public String toSqlImpl() { - StringBuilder strBuilder = new StringBuilder(); - if (funcName_ != null) { - // Function-call like version. - strBuilder.append(funcName_.toUpperCase() + "("); - strBuilder.append(getChild(0).toSql() + ", "); - strBuilder.append("INTERVAL "); - strBuilder.append(getChild(1).toSql()); - strBuilder.append(" " + timeUnitIdent_); - strBuilder.append(")"); - return strBuilder.toString(); - } - if (intervalFirst_) { - // Non-function-call like version with interval as first operand. - strBuilder.append("INTERVAL "); - strBuilder.append(getChild(1).toSql() + " "); - strBuilder.append(timeUnitIdent_); - strBuilder.append(" " + op_.toString() + " "); - strBuilder.append(getChild(0).toSql()); - } else { - // Non-function-call like version with interval as second operand. - strBuilder.append(getChild(0).toSql()); - strBuilder.append(" " + op_.toString() + " "); - strBuilder.append("INTERVAL "); - strBuilder.append(getChild(1).toSql() + " "); - strBuilder.append(timeUnitIdent_); - } - return strBuilder.toString(); - } - - @Override - public Expr clone() { return new TimestampArithmeticExpr(this); } -}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java b/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java deleted file mode 100644 index 851e6ba..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java +++ /dev/null @@ -1,348 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.cloudera.impala.catalog.KuduTable; -import org.antlr.runtime.ANTLRStringStream; -import org.antlr.runtime.Token; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.ql.parse.HiveLexer; - -import com.cloudera.impala.catalog.CatalogException; -import com.cloudera.impala.catalog.Column; -import com.cloudera.impala.catalog.Function; -import com.cloudera.impala.catalog.HBaseTable; -import com.cloudera.impala.catalog.HdfsCompression; -import com.cloudera.impala.catalog.HdfsFileFormat; -import com.cloudera.impala.catalog.RowFormat; -import com.cloudera.impala.catalog.Table; -import com.cloudera.impala.catalog.View; -import com.cloudera.impala.common.PrintUtils; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -/** - * Contains utility methods for creating SQL strings, for example, - * for creating identifier strings that are compatible with Hive or Impala. - */ -public class ToSqlUtils { - // Table properties to hide when generating the toSql() statement - // EXTERNAL and comment are hidden because they are part of the toSql result, e.g., - // "CREATE EXTERNAL TABLE <name> ... COMMENT <comment> ..." - private static final ImmutableSet<String> HIDDEN_TABLE_PROPERTIES = - ImmutableSet.of("EXTERNAL", "comment"); - - /** - * Given an unquoted identifier string, returns an identifier lexable by - * Impala and Hive, possibly by enclosing the original identifier in "`" quotes. - * For example, Hive cannot parse its own auto-generated column - * names "_c0", "_c1" etc. unless they are quoted. Impala and Hive keywords - * must also be quoted. - * - * Impala's lexer recognizes a superset of the unquoted identifiers that Hive can. - * At the same time, Impala's and Hive's list of keywords differ. - * This method always returns an identifier that Impala and Hive can recognize, - * although for some identifiers the quotes may not be strictly necessary for - * one or the other system. - */ - public static String getIdentSql(String ident) { - boolean hiveNeedsQuotes = true; - HiveLexer hiveLexer = new HiveLexer(new ANTLRStringStream(ident)); - try { - Token t = hiveLexer.nextToken(); - // Check that the lexer recognizes an identifier and then EOF. - boolean identFound = t.getType() == HiveLexer.Identifier; - t = hiveLexer.nextToken(); - // No enclosing quotes are necessary for Hive. - hiveNeedsQuotes = !(identFound && t.getType() == HiveLexer.EOF); - } catch (Exception e) { - // Ignore exception and just quote the identifier to be safe. - } - boolean isImpalaKeyword = SqlScanner.isKeyword(ident.toUpperCase()); - // Impala's scanner recognizes the ".123" portion of "db.123_tbl" as a decimal, - // so while the quoting is not necessary for the given identifier itself, the quotes - // are needed if this identifier will be preceded by a ".". - boolean startsWithNumber = false; - if (!hiveNeedsQuotes && !isImpalaKeyword) { - try { - Integer.parseInt(ident.substring(0, 1)); - startsWithNumber = true; - } catch (NumberFormatException e) { - // Ignore exception, identifier does not start with number. - } - } - if (hiveNeedsQuotes || isImpalaKeyword || startsWithNumber) return "`" + ident + "`"; - return ident; - } - - public static List<String> getIdentSqlList(List<String> identList) { - List<String> identSqlList = Lists.newArrayList(); - for (String ident: identList) { - identSqlList.add(getIdentSql(ident)); - } - return identSqlList; - } - - public static String getPathSql(List<String> path) { - StringBuilder result = new StringBuilder(); - for (String p: path) { - if (result.length() > 0) result.append("."); - result.append(getIdentSql(p)); - } - return result.toString(); - } - - /** - * Returns the "CREATE TABLE" SQL string corresponding to the given CreateTableStmt - * statement. - */ - public static String getCreateTableSql(CreateTableStmt stmt) { - ArrayList<String> colsSql = Lists.newArrayList(); - for (ColumnDef col: stmt.getColumnDefs()) { - colsSql.add(col.toString()); - } - ArrayList<String> partitionColsSql = Lists.newArrayList(); - for (ColumnDef col: stmt.getPartitionColumnDefs()) { - partitionColsSql.add(col.toString()); - } - // TODO: Pass the correct compression, if applicable. - return getCreateTableSql(stmt.getDb(), stmt.getTbl(), stmt.getComment(), colsSql, - partitionColsSql, stmt.getTblProperties(), stmt.getSerdeProperties(), - stmt.isExternal(), stmt.getIfNotExists(), stmt.getRowFormat(), - HdfsFileFormat.fromThrift(stmt.getFileFormat()), HdfsCompression.NONE, null, - stmt.getLocation()); - } - - /** - * Returns the "CREATE TABLE" SQL string corresponding to the given - * CreateTableAsSelectStmt statement. - */ - public static String getCreateTableSql(CreateTableAsSelectStmt stmt) { - CreateTableStmt innerStmt = stmt.getCreateStmt(); - // Only add partition column labels to output. Table columns must not be specified as - // they are deduced from the select statement. - ArrayList<String> partitionColsSql = Lists.newArrayList(); - for (ColumnDef col: innerStmt.getPartitionColumnDefs()) { - partitionColsSql.add(col.getColName()); - } - // TODO: Pass the correct compression, if applicable. - String createTableSql = getCreateTableSql(innerStmt.getDb(), innerStmt.getTbl(), - innerStmt.getComment(), null, partitionColsSql, innerStmt.getTblProperties(), - innerStmt.getSerdeProperties(), innerStmt.isExternal(), - innerStmt.getIfNotExists(), innerStmt.getRowFormat(), - HdfsFileFormat.fromThrift(innerStmt.getFileFormat()), HdfsCompression.NONE, null, - innerStmt.getLocation()); - return createTableSql + " AS " + stmt.getQueryStmt().toSql(); - } - - /** - * Returns a "CREATE TABLE" or "CREATE VIEW" statement that creates the specified - * table. - */ - public static String getCreateTableSql(Table table) throws CatalogException { - Preconditions.checkNotNull(table); - if (table instanceof View) return getCreateViewSql((View)table); - org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable(); - HashMap<String, String> properties = Maps.newHashMap(msTable.getParameters()); - boolean isExternal = msTable.getTableType() != null && - msTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString()); - String comment = properties.get("comment"); - for (String hiddenProperty: HIDDEN_TABLE_PROPERTIES) { - properties.remove(hiddenProperty); - } - ArrayList<String> colsSql = Lists.newArrayList(); - ArrayList<String> partitionColsSql = Lists.newArrayList(); - boolean isHbaseTable = table instanceof HBaseTable; - for (int i = 0; i < table.getColumns().size(); i++) { - if (!isHbaseTable && i < table.getNumClusteringCols()) { - partitionColsSql.add(columnToSql(table.getColumns().get(i))); - } else { - colsSql.add(columnToSql(table.getColumns().get(i))); - } - } - RowFormat rowFormat = RowFormat.fromStorageDescriptor(msTable.getSd()); - HdfsFileFormat format = HdfsFileFormat.fromHdfsInputFormatClass( - msTable.getSd().getInputFormat()); - HdfsCompression compression = HdfsCompression.fromHdfsInputFormatClass( - msTable.getSd().getInputFormat()); - String location = isHbaseTable ? null : msTable.getSd().getLocation(); - Map<String, String> serdeParameters = msTable.getSd().getSerdeInfo().getParameters(); - - String storageHandlerClassName = table.getStorageHandlerClassName(); - if (table instanceof KuduTable) { - // Kudu tables don't use LOCATION syntax - location = null; - format = null; - // Kudu tables cannot use the Hive DDL syntax for the storage handler - storageHandlerClassName = null; - } - HdfsUri tableLocation = location == null ? null : new HdfsUri(location); - return getCreateTableSql(table.getDb().getName(), table.getName(), comment, colsSql, - partitionColsSql, properties, serdeParameters, isExternal, false, rowFormat, - format, compression, storageHandlerClassName, tableLocation); - } - - /** - * Returns a "CREATE TABLE" string that creates the table with the specified properties. - * The tableName must not be null. If columnsSql is null, the schema syntax will - * not be generated. - */ - public static String getCreateTableSql(String dbName, String tableName, - String tableComment, List<String> columnsSql, List<String> partitionColumnsSql, - Map<String, String> tblProperties, Map<String, String> serdeParameters, - boolean isExternal, boolean ifNotExists, RowFormat rowFormat, - HdfsFileFormat fileFormat, HdfsCompression compression, String storageHandlerClass, - HdfsUri location) { - Preconditions.checkNotNull(tableName); - StringBuilder sb = new StringBuilder("CREATE "); - if (isExternal) sb.append("EXTERNAL "); - sb.append("TABLE "); - if (ifNotExists) sb.append("IF NOT EXISTS "); - if (dbName != null) sb.append(dbName + "."); - sb.append(tableName); - if (columnsSql != null) { - sb.append(" (\n "); - sb.append(Joiner.on(", \n ").join(columnsSql)); - sb.append("\n)"); - } - sb.append("\n"); - if (tableComment != null) sb.append(" COMMENT '" + tableComment + "'\n"); - - if (partitionColumnsSql != null && partitionColumnsSql.size() > 0) { - sb.append(String.format("PARTITIONED BY (\n %s\n)\n", - Joiner.on(", \n ").join(partitionColumnsSql))); - } - - if (rowFormat != null && !rowFormat.isDefault()) { - sb.append("ROW FORMAT DELIMITED"); - if (rowFormat.getFieldDelimiter() != null) { - String fieldDelim = StringEscapeUtils.escapeJava(rowFormat.getFieldDelimiter()); - sb.append(" FIELDS TERMINATED BY '" + fieldDelim + "'"); - } - if (rowFormat.getEscapeChar() != null) { - String escapeChar = StringEscapeUtils.escapeJava(rowFormat.getEscapeChar()); - sb.append(" ESCAPED BY '" + escapeChar + "'"); - } - if (rowFormat.getLineDelimiter() != null) { - String lineDelim = StringEscapeUtils.escapeJava(rowFormat.getLineDelimiter()); - sb.append(" LINES TERMINATED BY '" + lineDelim + "'"); - } - sb.append("\n"); - } - - if (storageHandlerClass == null) { - // TODO: Remove this special case when we have the LZO_TEXT writer - // We must handle LZO_TEXT specially because Impala does not yet support creating - // tables with this row format. In this case, we cannot output "WITH - // SERDEPROPERTIES" because Hive does not support it with "STORED AS". For any - // other HdfsFileFormat we want to output the serdeproperties because it is - // supported by Impala. - if (compression != HdfsCompression.LZO && - compression != HdfsCompression.LZO_INDEX && - serdeParameters != null && !serdeParameters.isEmpty()) { - sb.append( - "WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n"); - } - - if (fileFormat != null) { - sb.append("STORED AS " + fileFormat.toSql(compression) + "\n"); - } - } else { - // If the storageHandlerClass is set, then we will generate the proper Hive DDL - // because we do not yet support creating HBase tables via Impala. - sb.append("STORED BY '" + storageHandlerClass + "'\n"); - if (serdeParameters != null && !serdeParameters.isEmpty()) { - sb.append( - "WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n"); - } - } - if (location != null) { - sb.append("LOCATION '" + location.toString() + "'\n"); - } - if (tblProperties != null && !tblProperties.isEmpty()) { - sb.append("TBLPROPERTIES " + propertyMapToSql(tblProperties)); - } - return sb.toString(); - } - - public static String getCreateFunctionSql(List<Function> functions) { - Preconditions.checkNotNull(functions); - StringBuilder sb = new StringBuilder(); - for (Function fn: functions) { - sb.append(fn.toSql(false)); - } - return sb.toString(); - } - - public static String getCreateViewSql(View view) { - StringBuffer sb = new StringBuffer(); - sb.append("CREATE VIEW "); - // Use toSql() to ensure that the table name and query statement are normalized - // and identifiers are quoted. - sb.append(view.getTableName().toSql()); - sb.append(" AS\n"); - sb.append(view.getQueryStmt().toSql()); - return sb.toString(); - } - - private static String columnToSql(Column col) { - StringBuilder sb = new StringBuilder(col.getName()); - if (col.getType() != null) sb.append(" " + col.getType().toSql()); - if (!Strings.isNullOrEmpty(col.getComment())) { - sb.append(String.format(" COMMENT '%s'", col.getComment())); - } - return sb.toString(); - } - - private static String propertyMapToSql(Map<String, String> propertyMap) { - List<String> properties = Lists.newArrayList(); - for (Map.Entry<String, String> entry: propertyMap.entrySet()) { - properties.add(String.format("'%s'='%s'", entry.getKey(), - // Properties may contain characters that need to be escaped. - // e.g. If the row format escape delimiter is '\', the map of serde properties - // from the metastore table will contain 'escape.delim' => '\', which is not - // properly escaped. - StringEscapeUtils.escapeJava(entry.getValue()))); - } - return "(" + Joiner.on(", ").join(properties) + ")"; - } - - /** - * Returns a SQL representation of the given list of hints. Uses the end-of-line - * commented plan hint style such that hinted views created by Impala are readable by - * Hive (parsed as a comment by Hive). - */ - public static String getPlanHintsSql(List<String> hints) { - if (hints == null || hints.isEmpty()) return ""; - StringBuilder sb = new StringBuilder(); - sb.append("\n-- +"); - sb.append(Joiner.on(",").join(hints)); - sb.append("\n"); - return sb.toString(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TruncateStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TruncateStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/TruncateStmt.java deleted file mode 100644 index 0ea930e..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TruncateStmt.java +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.ArrayList; -import java.util.List; - -import com.cloudera.impala.authorization.Privilege; -import com.cloudera.impala.catalog.HdfsTable; -import com.cloudera.impala.catalog.Table; -import com.cloudera.impala.catalog.Type; -import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.thrift.TTableName; -import com.cloudera.impala.thrift.TTruncateParams; -import com.google.common.base.Preconditions; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; - -/** - * Representation of a TRUNCATE statement. - * Acceptable syntax: - * - * TRUNCATE [TABLE] [IF EXISTS] [database.]table - * - */ -public class TruncateStmt extends StatementBase { - private TableName tableName_; - private final boolean ifExists_; - - // Set in analyze(). - private Table table_; - - public TruncateStmt(TableName tableName, boolean ifExists) { - Preconditions.checkNotNull(tableName); - tableName_ = tableName; - table_ = null; - ifExists_ = ifExists; - } - - @Override - public void analyze(Analyzer analyzer) throws AnalysisException { - tableName_ = analyzer.getFqTableName(tableName_); - try { - table_ = analyzer.getTable(tableName_, Privilege.INSERT); - } catch (AnalysisException e) { - if (ifExists_ && analyzer.getMissingTbls().isEmpty()) return; - throw e; - } - // We only support truncating hdfs tables now. - if (!(table_ instanceof HdfsTable)) { - throw new AnalysisException(String.format( - "TRUNCATE TABLE not supported on non-HDFS table: %s", table_.getFullName())); - } - } - - @Override - public String toSql() { - return "TRUNCATE TABLE " + (ifExists_ ? " IF EXISTS " : "") + tableName_; - } - - public TTruncateParams toThrift() { - TTruncateParams params = new TTruncateParams(); - params.setTable_name(tableName_.toThrift()); - params.setIf_exists(ifExists_); - return params; - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java b/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java deleted file mode 100644 index 145a10b..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java +++ /dev/null @@ -1,310 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; - -import com.cloudera.impala.catalog.ColumnStats; -import com.cloudera.impala.catalog.HdfsTable; -import com.cloudera.impala.catalog.StructType; -import com.cloudera.impala.catalog.Table; -import com.cloudera.impala.catalog.View; -import com.cloudera.impala.thrift.TTupleDescriptor; -import com.google.common.base.Joiner; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -/** - * A collection of slots that are organized in a CPU-friendly memory layout. A slot is - * a typed placeholder for a single value operated on at runtime. A slot can be named or - * anonymous. A named slot corresponds directly to a column or field that can be directly - * referenced in a query by its name. An anonymous slot represents an intermediate value - * produced during query execution, e.g., aggregation output. - * A tuple descriptor has an associated type and a list of slots. Its type is a struct - * that contains as its fields the list of all named slots covered by this tuple. - * The list of slots tracks the named slots that are actually referenced in a query, as - * well as all anonymous slots. Although not required, a tuple descriptor typically - * only has named or anonymous slots and not a mix of both. - * - * For example, every table reference has a corresponding tuple descriptor. The columns - * of the table are represented by the tuple descriptor's type (struct type with one - * field per column). The list of slots tracks which of the table's columns are actually - * referenced. A similar explanation applies for collection references. - * - * A tuple descriptor may be materialized or non-materialized. A non-materialized tuple - * descriptor acts as a placeholder for 'virtual' table references such as inline views, - * and must not be materialized at runtime. - */ -public class TupleDescriptor { - private final TupleId id_; - private final String debugName_; // debug-only - private final ArrayList<SlotDescriptor> slots_ = Lists.newArrayList(); - - // Resolved path to the collection corresponding to this tuple descriptor, if any, - // Only set for materialized tuples. - private Path path_; - - // Type of this tuple descriptor. Used for slot/table resolution in analysis. - private StructType type_; - - // All legal aliases of this tuple. - private String[] aliases_; - - // If true, requires that aliases_.length() == 1. However, aliases_.length() == 1 - // does not imply an explicit alias because nested collection refs have only a - // single implicit alias. - private boolean hasExplicitAlias_; - - // If false, this tuple doesn't need to be materialized. - private boolean isMaterialized_ = true; - - // If true, computeMemLayout() has been called and we can't add any additional slots. - private boolean hasMemLayout_ = false; - - private int byteSize_; // of all slots plus null indicators - private int numNullBytes_; - private float avgSerializedSize_; // in bytes; includes serialization overhead - - public TupleDescriptor(TupleId id, String debugName) { - id_ = id; - path_ = null; - debugName_ = debugName; - } - - public void addSlot(SlotDescriptor desc) { - Preconditions.checkState(!hasMemLayout_); - slots_.add(desc); - } - - public TupleId getId() { return id_; } - public ArrayList<SlotDescriptor> getSlots() { return slots_; } - - public ArrayList<SlotDescriptor> getMaterializedSlots() { - ArrayList<SlotDescriptor> result = Lists.newArrayList(); - for (SlotDescriptor slot: slots_) { - if (slot.isMaterialized()) result.add(slot); - } - return result; - } - - public Table getTable() { - if (path_ == null) return null; - return path_.getRootTable(); - } - - public TableName getTableName() { - Table t = getTable(); - return (t == null) ? null : t.getTableName(); - } - - public void setPath(Path p) { - Preconditions.checkNotNull(p); - Preconditions.checkState(p.isResolved()); - Preconditions.checkState(p.destType().isCollectionType()); - path_ = p; - if (p.destTable() != null) { - // Do not use Path.getTypeAsStruct() to only allow implicit path resolutions, - // because this tuple desc belongs to a base table ref. - type_ = (StructType) p.destTable().getType().getItemType(); - } else { - // Also allow explicit path resolutions. - type_ = Path.getTypeAsStruct(p.destType()); - } - } - - public Path getPath() { return path_; } - public void setType(StructType type) { type_ = type; } - public StructType getType() { return type_; } - public int getByteSize() { return byteSize_; } - public float getAvgSerializedSize() { return avgSerializedSize_; } - public boolean isMaterialized() { return isMaterialized_; } - public void setIsMaterialized(boolean value) { isMaterialized_ = value; } - public boolean hasMemLayout() { return hasMemLayout_; } - public void setAliases(String[] aliases, boolean hasExplicitAlias) { - aliases_ = aliases; - hasExplicitAlias_ = hasExplicitAlias; - } - public boolean hasExplicitAlias() { return hasExplicitAlias_; } - public String getAlias() { return (aliases_ != null) ? aliases_[0] : null; } - public TableName getAliasAsName() { - return (aliases_ != null) ? new TableName(null, aliases_[0]) : null; - } - - public TupleDescriptor getRootDesc() { - if (path_ == null) return null; - return path_.getRootDesc(); - } - - public String debugString() { - String tblStr = (getTable() == null ? "null" : getTable().getFullName()); - List<String> slotStrings = Lists.newArrayList(); - for (SlotDescriptor slot : slots_) { - slotStrings.add(slot.debugString()); - } - return Objects.toStringHelper(this) - .add("id", id_.asInt()) - .add("name", debugName_) - .add("tbl", tblStr) - .add("byte_size", byteSize_) - .add("is_materialized", isMaterialized_) - .add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]") - .toString(); - } - - /** - * Checks that this tuple is materialized and has a mem layout. Throws if this tuple - * is not executable, i.e., if one of those conditions is not met. - */ - public void checkIsExecutable() { - Preconditions.checkState(isMaterialized_, String.format( - "Illegal reference to non-materialized tuple: debugname=%s alias=%s tid=%s", - debugName_, StringUtils.defaultIfEmpty(getAlias(), "n/a"), id_)); - Preconditions.checkState(hasMemLayout_, String.format( - "Missing memory layout for tuple: debugname=%s alias=%s tid=%s", - debugName_, StringUtils.defaultIfEmpty(getAlias(), "n/a"), id_)); - } - - /** - * Materialize all slots. - */ - public void materializeSlots() { - for (SlotDescriptor slot: slots_) { - slot.setIsMaterialized(true); - } - } - - public TTupleDescriptor toThrift() { - TTupleDescriptor ttupleDesc = - new TTupleDescriptor(id_.asInt(), byteSize_, numNullBytes_); - // do not set the table id or tuple path for views - if (getTable() != null && !(getTable() instanceof View)) { - ttupleDesc.setTableId(getTable().getId().asInt()); - Preconditions.checkNotNull(path_); - ttupleDesc.setTuplePath(path_.getAbsolutePath()); - } - return ttupleDesc; - } - - public void computeMemLayout() { - if (hasMemLayout_) return; - hasMemLayout_ = true; - - // sort slots by size - Map<Integer, List<SlotDescriptor>> slotsBySize = - new HashMap<Integer, List<SlotDescriptor>>(); - - // populate slotsBySize; also compute avgSerializedSize - int numNullableSlots = 0; - for (SlotDescriptor d: slots_) { - if (!d.isMaterialized()) continue; - ColumnStats stats = d.getStats(); - if (stats.hasAvgSerializedSize()) { - avgSerializedSize_ += d.getStats().getAvgSerializedSize(); - } else { - // TODO: for computed slots, try to come up with stats estimates - avgSerializedSize_ += d.getType().getSlotSize(); - } - if (!slotsBySize.containsKey(d.getType().getSlotSize())) { - slotsBySize.put(d.getType().getSlotSize(), new ArrayList<SlotDescriptor>()); - } - slotsBySize.get(d.getType().getSlotSize()).add(d); - if (d.getIsNullable()) ++numNullableSlots; - } - // we shouldn't have anything of size <= 0 - Preconditions.checkState(!slotsBySize.containsKey(0)); - Preconditions.checkState(!slotsBySize.containsKey(-1)); - - // assign offsets to slots in order of ascending size - numNullBytes_ = (numNullableSlots + 7) / 8; - int offset = numNullBytes_; - int nullIndicatorByte = 0; - int nullIndicatorBit = 0; - // slotIdx is the index into the resulting tuple struct. The first (smallest) field - // is 0, next is 1, etc. - int slotIdx = 0; - List<Integer> sortedSizes = new ArrayList<Integer>(slotsBySize.keySet()); - Collections.sort(sortedSizes); - for (int slotSize: sortedSizes) { - if (slotsBySize.get(slotSize).isEmpty()) continue; - if (slotSize > 1) { - // insert padding - int alignTo = Math.min(slotSize, 8); - offset = (offset + alignTo - 1) / alignTo * alignTo; - } - - for (SlotDescriptor d: slotsBySize.get(slotSize)) { - Preconditions.checkState(d.isMaterialized()); - d.setByteSize(slotSize); - d.setByteOffset(offset); - d.setSlotIdx(slotIdx++); - offset += slotSize; - - // assign null indicator - if (d.getIsNullable()) { - d.setNullIndicatorByte(nullIndicatorByte); - d.setNullIndicatorBit(nullIndicatorBit); - nullIndicatorBit = (nullIndicatorBit + 1) % 8; - if (nullIndicatorBit == 0) ++nullIndicatorByte; - } else { - // Non-nullable slots will have 0 for the byte offset and -1 for the bit mask - d.setNullIndicatorBit(-1); - d.setNullIndicatorByte(0); - } - } - } - - this.byteSize_ = offset; - } - - /** - * Return true if the slots being materialized are all partition columns. - */ - public boolean hasClusteringColsOnly() { - Table table = getTable(); - if (!(table instanceof HdfsTable) || table.getNumClusteringCols() == 0) return false; - - HdfsTable hdfsTable = (HdfsTable)table; - for (SlotDescriptor slotDesc: getSlots()) { - if (!slotDesc.isMaterialized()) continue; - if (slotDesc.getColumn() == null || - slotDesc.getColumn().getPosition() >= hdfsTable.getNumClusteringCols()) { - return false; - } - } - return true; - } - - /** - * Returns true if tuples of type 'this' can be assigned to tuples of type 'desc' - * (checks that both have the same number of slots and that slots are of the same type) - */ - public boolean isCompatible(TupleDescriptor desc) { - if (slots_.size() != desc.slots_.size()) return false; - for (int i = 0; i < slots_.size(); ++i) { - if (!slots_.get(i).getType().equals(desc.slots_.get(i).getType())) return false; - } - return true; - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TupleId.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TupleId.java b/fe/src/main/java/com/cloudera/impala/analysis/TupleId.java deleted file mode 100644 index 918671c..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TupleId.java +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import com.cloudera.impala.common.Id; -import com.cloudera.impala.common.IdGenerator; - -/** - * Tuple identifier unique within a single query. - */ -public class TupleId extends Id<TupleId> { - // Construction only allowed via an IdGenerator. - protected TupleId(int id) { - super(id); - } - - public static IdGenerator<TupleId> createGenerator() { - return new IdGenerator<TupleId>() { - @Override - public TupleId getNextId() { return new TupleId(nextId_++); } - @Override - public TupleId getMaxId() { return new TupleId(nextId_ - 1); } - }; - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java b/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java deleted file mode 100644 index 5a07328..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java +++ /dev/null @@ -1,185 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.List; -import java.util.Set; - -import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.common.InternalException; -import com.cloudera.impala.thrift.TExprNode; -import com.cloudera.impala.thrift.TExprNodeType; -import com.cloudera.impala.thrift.TTupleIsNullPredicate; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - -/** - * Internal expr that returns true if all of the given tuples are NULL, otherwise false. - * Used to make exprs originating from an inline view nullable in an outer join. - * The given tupleIds must be materialized but not necessarily nullable at the - * appropriate PlanNode. It is important not to require nullability of the tuples - * because some exprs may be wrapped in a TupleIsNullPredicate that contain - * SlotRefs on non-nullable tuples, e.g., an expr in the On-clause of an outer join - * that refers to an outer-joined inline view (see IMPALA-904). - */ -public class TupleIsNullPredicate extends Predicate { - private final Set<TupleId> tupleIds_; - private Analyzer analyzer_; - - public TupleIsNullPredicate(List<TupleId> tupleIds) { - Preconditions.checkState(tupleIds != null && !tupleIds.isEmpty()); - this.tupleIds_ = Sets.newHashSet(tupleIds); - } - - /** - * Copy c'tor used in clone(). - */ - protected TupleIsNullPredicate(TupleIsNullPredicate other) { - super(other); - tupleIds_ = Sets.newHashSet(other.tupleIds_); - analyzer_ = other.analyzer_; - } - - @Override - public void analyze(Analyzer analyzer) throws AnalysisException { - if (isAnalyzed_) return; - super.analyze(analyzer); - analyzer_ = analyzer; - evalCost_ = tupleIds_.size() * IS_NULL_COST; - } - - @Override - protected void toThrift(TExprNode msg) { - msg.node_type = TExprNodeType.TUPLE_IS_NULL_PRED; - msg.tuple_is_null_pred = new TTupleIsNullPredicate(); - Preconditions.checkNotNull(analyzer_); - for (TupleId tid: tupleIds_) { - // Check that all referenced tuples are materialized. - TupleDescriptor tupleDesc = analyzer_.getTupleDesc(tid); - Preconditions.checkNotNull(tupleDesc, "Unknown tuple id: " + tid.toString()); - Preconditions.checkState(tupleDesc.isMaterialized(), - String.format("Illegal reference to non-materialized tuple: tid=%s", tid)); - msg.tuple_is_null_pred.addToTuple_ids(tid.asInt()); - } - } - - @Override - public boolean equals(Object o) { - if (!super.equals(o)) return false; - TupleIsNullPredicate other = (TupleIsNullPredicate) o; - return other.tupleIds_.containsAll(tupleIds_) && - tupleIds_.containsAll(other.tupleIds_); - } - - @Override - protected String toSqlImpl() { return "TupleIsNull()"; } - - public Set<TupleId> getTupleIds() { return tupleIds_; } - - @Override - public boolean isBoundByTupleIds(List<TupleId> tids) { - return tids.containsAll(tupleIds_); - } - - @Override - public boolean isConstant() { return false; } - - /** - * Makes each input expr nullable, if necessary, by wrapping it as follows: - * IF(TupleIsNull(tids), NULL, expr) - * - * The given tids must be materialized. The given inputExprs are expected to be bound - * by tids once fully substituted against base tables. However, inputExprs may not yet - * be fully substituted at this point. - * - * Returns a new list with the nullable exprs. - */ - public static List<Expr> wrapExprs(List<Expr> inputExprs, - List<TupleId> tids, Analyzer analyzer) throws InternalException { - // Assert that all tids are materialized. - for (TupleId tid: tids) { - TupleDescriptor tupleDesc = analyzer.getTupleDesc(tid); - Preconditions.checkState(tupleDesc.isMaterialized()); - } - // Perform the wrapping. - List<Expr> result = Lists.newArrayListWithCapacity(inputExprs.size()); - for (Expr e: inputExprs) { - result.add(wrapExpr(e, tids, analyzer)); - } - return result; - } - - /** - * Returns a new analyzed conditional expr 'IF(TupleIsNull(tids), NULL, expr)', - * if required to make expr nullable. Otherwise, returns expr. - */ - public static Expr wrapExpr(Expr expr, List<TupleId> tids, Analyzer analyzer) - throws InternalException { - if (!requiresNullWrapping(expr, analyzer)) return expr; - List<Expr> params = Lists.newArrayList(); - params.add(new TupleIsNullPredicate(tids)); - params.add(new NullLiteral()); - params.add(expr); - Expr ifExpr = new FunctionCallExpr("if", params); - ifExpr.analyzeNoThrow(analyzer); - return ifExpr; - } - - /** - * Returns true if the given expr evaluates to a non-NULL value if all its contained - * SlotRefs evaluate to NULL, false otherwise. - * Throws an InternalException if expr evaluation in the BE failed. - */ - private static boolean requiresNullWrapping(Expr expr, Analyzer analyzer) - throws InternalException { - Preconditions.checkNotNull(expr); - // If the expr is already wrapped in an IF(TupleIsNull(), NULL, expr) - // then it must definitely be wrapped again at this level. - // Do not try to execute expr because a TupleIsNullPredicate is not constant. - if (expr.contains(TupleIsNullPredicate.class)) return true; - // Wrap expr with an IS NOT NULL predicate. - Expr isNotNullLiteralPred = new IsNullPredicate(expr, true); - // analyze to insert casts, etc. - isNotNullLiteralPred.analyzeNoThrow(analyzer); - return analyzer.isTrueWithNullSlots(isNotNullLiteralPred); - } - - /** - * Recursive function that replaces all 'IF(TupleIsNull(), NULL, e)' exprs in - * 'expr' with e and returns the modified expr. - */ - public static Expr unwrapExpr(Expr expr) { - if (expr instanceof FunctionCallExpr) { - FunctionCallExpr fnCallExpr = (FunctionCallExpr) expr; - List<Expr> params = fnCallExpr.getParams().exprs(); - if (fnCallExpr.getFnName().getFunction().equals("if") && - params.get(0) instanceof TupleIsNullPredicate && - params.get(1) instanceof NullLiteral) { - return unwrapExpr(params.get(2)); - } - } - for (int i = 0; i < expr.getChildren().size(); ++i) { - expr.setChild(i, unwrapExpr(expr.getChild(i))); - } - return expr; - } - - @Override - public Expr clone() { return new TupleIsNullPredicate(this); } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java b/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java deleted file mode 100644 index bbbb1b4..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java +++ /dev/null @@ -1,159 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.Set; - -import org.apache.hadoop.hive.metastore.MetaStoreUtils; - -import com.cloudera.impala.catalog.ArrayType; -import com.cloudera.impala.catalog.MapType; -import com.cloudera.impala.catalog.PrimitiveType; -import com.cloudera.impala.catalog.ScalarType; -import com.cloudera.impala.catalog.StructField; -import com.cloudera.impala.catalog.StructType; -import com.cloudera.impala.catalog.Type; -import com.cloudera.impala.common.AnalysisException; -import com.google.common.base.Preconditions; -import com.google.common.collect.Sets; - -/** - * Represents an anonymous type definition, e.g., used in DDL and CASTs. - */ -public class TypeDef implements ParseNode { - private boolean isAnalyzed_; - private final Type parsedType_; - - public TypeDef(Type parsedType) { - parsedType_ = parsedType; - } - - @Override - public void analyze(Analyzer analyzer) throws AnalysisException { - if (isAnalyzed_) return; - // Check the max nesting depth before calling the recursive analyze() to avoid - // a stack overflow. - if (parsedType_.exceedsMaxNestingDepth()) { - throw new AnalysisException(String.format( - "Type exceeds the maximum nesting depth of %s:\n%s", - Type.MAX_NESTING_DEPTH, parsedType_.toSql())); - } - analyze(parsedType_, analyzer); - isAnalyzed_ = true; - } - - private void analyze(Type type, Analyzer analyzer) throws AnalysisException { - if (!type.isSupported()) { - throw new AnalysisException("Unsupported data type: " + type.toSql()); - } - if (type.isScalarType()) { - analyzeScalarType((ScalarType) type, analyzer); - } else if (type.isStructType()) { - analyzeStructType((StructType) type, analyzer); - } else if (type.isArrayType()) { - ArrayType arrayType = (ArrayType) type; - analyze(arrayType.getItemType(), analyzer); - } else { - Preconditions.checkState(type.isMapType()); - analyzeMapType((MapType) type, analyzer); - } - } - - private void analyzeScalarType(ScalarType scalarType, Analyzer analyzer) - throws AnalysisException { - PrimitiveType type = scalarType.getPrimitiveType(); - switch (type) { - case CHAR: - case VARCHAR: { - String name; - int maxLen; - if (type == PrimitiveType.VARCHAR) { - name = "Varchar"; - maxLen = ScalarType.MAX_VARCHAR_LENGTH; - } else if (type == PrimitiveType.CHAR) { - name = "Char"; - maxLen = ScalarType.MAX_CHAR_LENGTH; - } else { - Preconditions.checkState(false); - return; - } - int len = scalarType.getLength(); - if (len <= 0) { - throw new AnalysisException(name + " size must be > 0: " + len); - } - if (scalarType.getLength() > maxLen) { - throw new AnalysisException( - name + " size must be <= " + maxLen + ": " + len); - } - break; - } - case DECIMAL: { - int precision = scalarType.decimalPrecision(); - int scale = scalarType.decimalScale(); - if (precision > ScalarType.MAX_PRECISION) { - throw new AnalysisException("Decimal precision must be <= " + - ScalarType.MAX_PRECISION + ": " + precision); - } - if (precision == 0) { - throw new AnalysisException("Decimal precision must be > 0: " + precision); - } - if (scale > precision) { - throw new AnalysisException("Decimal scale (" + scale + ") must be <= " + - "precision (" + precision + ")"); - } - } - default: break; - } - } - - private void analyzeStructType(StructType structType, Analyzer analyzer) - throws AnalysisException { - // Check for duplicate field names. - Set<String> fieldNames = Sets.newHashSet(); - for (StructField f: structType.getFields()) { - analyze(f.getType(), analyzer); - if (!fieldNames.add(f.getName().toLowerCase())) { - throw new AnalysisException( - String.format("Duplicate field name '%s' in struct '%s'", - f.getName(), toSql())); - } - // Check whether the column name meets the Metastore's requirements. - if (!MetaStoreUtils.validateName(f.getName().toLowerCase())) { - throw new AnalysisException("Invalid struct field name: " + f.getName()); - } - } - } - - private void analyzeMapType(MapType mapType, Analyzer analyzer) - throws AnalysisException { - analyze(mapType.getKeyType(), analyzer); - if (mapType.getKeyType().isComplexType()) { - throw new AnalysisException( - "Map type cannot have a complex-typed key: " + mapType.toSql()); - } - analyze(mapType.getValueType(), analyzer); - } - - public Type getType() { return parsedType_; } - - @Override - public String toString() { return parsedType_.toSql(); } - - @Override - public String toSql() { return parsedType_.toSql(); } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/TypesUtil.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TypesUtil.java b/fe/src/main/java/com/cloudera/impala/analysis/TypesUtil.java deleted file mode 100644 index 58bb960..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/TypesUtil.java +++ /dev/null @@ -1,237 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.math.BigDecimal; - -import com.cloudera.impala.catalog.ScalarType; -import com.cloudera.impala.catalog.Type; -import com.cloudera.impala.common.AnalysisException; -import com.google.common.base.Preconditions; - -// Utility class for handling types. -public class TypesUtil { - // The sql standard specifies that the scale after division is incremented - // by a system wide constant. Hive picked 4 so we will as well. - // TODO: how did they pick this? - static final int DECIMAL_DIVISION_SCALE_INCREMENT = 4; - - /** - * [1-9] precision -> 4 bytes - * [10-18] precision -> 8 bytes - * [19-38] precision -> 16 bytes - * TODO: Support 12 byte decimal? - * For precision [20-28], we could support a 12 byte decimal but currently a 12 - * byte decimal in the BE is not implemented. - */ - public static int getDecimalSlotSize(ScalarType type) { - Preconditions.checkState(type.isDecimal() && !type.isWildcardDecimal()); - if (type.decimalPrecision() <= 9) return 4; - if (type.decimalPrecision() <= 18) return 8; - return 16; - } - - /** - * Returns the smallest integer type that can store decType without loss - * of precision. decType must have scale == 0. - * In the case where the decimal can be bigger than BIGINT, we return - * BIGINT (and the execution will report it as overflows). - */ - public static ScalarType getContainingIntType(ScalarType decType) { - Preconditions.checkState(decType.isFullySpecifiedDecimal()); - Preconditions.checkState(decType.decimalScale() == 0); - // TINYINT_MAX = 128 - if (decType.decimalPrecision() <= 2) return Type.TINYINT; - // SMALLINT_MAX = 32768 - if (decType.decimalPrecision() <= 4) return Type.SMALLINT; - // INT_MAX = 2147483648 - if (decType.decimalPrecision() <= 9) return Type.INT; - return Type.BIGINT; - } - - /** - * Returns the decimal type that can hold t1 and t2 without loss of precision. - * decimal(10, 2) && decimal(12, 2) -> decimal(12, 2) - * decimal (10, 5) && decimal(12, 3) -> decimal(14, 5) - * Either t1 or t2 can be a wildcard decimal (but not both). - */ - public static ScalarType getDecimalAssignmentCompatibleType( - ScalarType t1, ScalarType t2) { - Preconditions.checkState(t1.isDecimal()); - Preconditions.checkState(t2.isDecimal()); - Preconditions.checkState(!(t1.isWildcardDecimal() && t2.isWildcardDecimal())); - if (t1.isWildcardDecimal()) return t2; - if (t2.isWildcardDecimal()) return t1; - - Preconditions.checkState(t1.isFullySpecifiedDecimal()); - Preconditions.checkState(t2.isFullySpecifiedDecimal()); - if (t1.equals(t2)) return t1; - int s1 = t1.decimalScale(); - int s2 = t2.decimalScale(); - int p1 = t1.decimalPrecision(); - int p2 = t2.decimalPrecision(); - int digitsBefore = Math.max(p1 - s1, p2 - s2); - int digitsAfter = Math.max(s1, s2); - return ScalarType.createDecimalTypeInternal( - digitsBefore + digitsAfter, digitsAfter); - } - - /** - * Returns the necessary result type for t1 op t2. Throws an analysis exception - * if the operation does not make sense for the types. - */ - public static Type getArithmeticResultType(Type t1, Type t2, - ArithmeticExpr.Operator op) throws AnalysisException { - Preconditions.checkState(t1.isNumericType() || t1.isNull()); - Preconditions.checkState(t2.isNumericType() || t2.isNull()); - - if (t1.isNull() && t2.isNull()) return Type.NULL; - - if (t1.isDecimal() || t2.isDecimal()) { - if (t1.isNull()) return t2; - if (t2.isNull()) return t1; - - // For multiplications involving at least one floating point type we cast decimal to - // double in order to prevent decimals from overflowing. - if (op == ArithmeticExpr.Operator.MULTIPLY && - (t1.isFloatingPointType() || t2.isFloatingPointType())) { - return Type.DOUBLE; - } - - t1 = ((ScalarType) t1).getMinResolutionDecimal(); - t2 = ((ScalarType) t2).getMinResolutionDecimal(); - Preconditions.checkState(t1.isDecimal()); - Preconditions.checkState(t2.isDecimal()); - return getDecimalArithmeticResultType(t1, t2, op); - } - - Type type = null; - switch (op) { - case MULTIPLY: - case ADD: - case SUBTRACT: - // If one of the types is null, use the compatible type without promotion. - // Otherwise, promote the compatible type to the next higher resolution type, - // to ensure that that a <op> b won't overflow/underflow. - Type compatibleType = - ScalarType.getAssignmentCompatibleType(t1, t2, false); - Preconditions.checkState(compatibleType.isScalarType()); - type = ((ScalarType) compatibleType).getNextResolutionType(); - break; - case MOD: - type = ScalarType.getAssignmentCompatibleType(t1, t2, false); - break; - case DIVIDE: - type = Type.DOUBLE; - break; - default: - throw new AnalysisException("Invalid op: " + op); - } - Preconditions.checkState(type.isValid()); - return type; - } - - /** - * Returns the resulting typical type from (t1 op t2) - * These rules are mostly taken from the hive/sql server rules with some changes. - * http://blogs.msdn.com/b/sqlprogrammability/archive/2006/03/29/564110.aspx - * - * Changes: - * - Multiply does not need +1 for the result precision. - * - Divide scale truncation is different. - */ - public static ScalarType getDecimalArithmeticResultType(Type t1, Type t2, - ArithmeticExpr.Operator op) throws AnalysisException { - Preconditions.checkState(t1.isFullySpecifiedDecimal()); - Preconditions.checkState(t2.isFullySpecifiedDecimal()); - ScalarType st1 = (ScalarType) t1; - ScalarType st2 = (ScalarType) t2; - int s1 = st1.decimalScale(); - int s2 = st2.decimalScale(); - int p1 = st1.decimalPrecision(); - int p2 = st2.decimalPrecision(); - int sMax = Math.max(s1, s2); - - switch (op) { - case ADD: - case SUBTRACT: - return ScalarType.createDecimalTypeInternal( - sMax + Math.max(p1 - s1, p2 - s2) + 1, sMax); - case MULTIPLY: - return ScalarType.createDecimalTypeInternal(p1 + p2, s1 + s2); - case DIVIDE: - int resultScale = Math.max(DECIMAL_DIVISION_SCALE_INCREMENT, s1 + p2 + 1); - int resultPrecision = p1 - s1 + s2 + resultScale; - if (resultPrecision > ScalarType.MAX_PRECISION) { - // In this case, the desired resulting precision exceeds the maximum and - // we need to truncate some way. We can either remove digits before or - // after the decimal and there is no right answer. This is an implementation - // detail and different databases will handle this differently. - // For simplicity, we will set the resulting scale to be the max of the input - // scales and use the maximum precision. - resultScale = Math.max(s1, s2); - resultPrecision = ScalarType.MAX_PRECISION; - } - return ScalarType.createDecimalTypeInternal(resultPrecision, resultScale); - case MOD: - return ScalarType.createDecimalTypeInternal( - Math.min(p1 - s1, p2 - s2) + sMax, sMax); - default: - throw new AnalysisException( - "Operation '" + op + "' is not allowed for decimal types."); - } - } - - /** - * Computes the ColumnType that can represent 'v' with no loss of resolution. - * The scale/precision in BigDecimal is not compatible with SQL decimal semantics - * (much more like significant figures and exponent). - * Returns null if the value cannot be represented. - */ - public static Type computeDecimalType(BigDecimal v) { - // PlainString returns the string with no exponent. We walk it to compute - // the digits before and after. - // TODO: better way? - String str = v.toPlainString(); - int digitsBefore = 0; - int digitsAfter = 0; - boolean decimalFound = false; - boolean leadingZeros = true; - for (int i = 0; i < str.length(); ++i) { - char c = str.charAt(i); - if (c == '-') continue; - if (c == '.') { - decimalFound = true; - continue; - } - if (decimalFound) { - ++digitsAfter; - } else { - // Strip out leading 0 before the decimal point. We want "0.1" to - // be parsed as ".1" (1 digit instead of 2). - if (c == '0' && leadingZeros) continue; - leadingZeros = false; - ++digitsBefore; - } - } - if (digitsAfter > ScalarType.MAX_SCALE) return null; - if (digitsBefore + digitsAfter > ScalarType.MAX_PRECISION) return null; - if (digitsBefore == 0 && digitsAfter == 0) digitsBefore = 1; - return ScalarType.createDecimalType(digitsBefore + digitsAfter, digitsAfter); - } -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b544f019/fe/src/main/java/com/cloudera/impala/analysis/UnionStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/UnionStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/UnionStmt.java deleted file mode 100644 index 10758c7..0000000 --- a/fe/src/main/java/com/cloudera/impala/analysis/UnionStmt.java +++ /dev/null @@ -1,607 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package com.cloudera.impala.analysis; - -import java.util.ArrayList; -import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.cloudera.impala.catalog.ColumnStats; -import com.cloudera.impala.common.AnalysisException; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; - -/** - * Representation of a union with its list of operands, and optional order by and limit. - * A union materializes its results, and its resultExprs are slotrefs into the - * materialized tuple. - * During analysis, the operands are normalized (separated into a single sequence of - * DISTINCT followed by a single sequence of ALL operands) and unnested to the extent - * possible. This also creates the AggregationInfo for DISTINCT operands. - */ -public class UnionStmt extends QueryStmt { - private final static Logger LOG = LoggerFactory.getLogger(UnionStmt.class); - - public static enum Qualifier { - ALL, - DISTINCT - } - - /** - * Represents an operand to a union, created by the parser. - * Contains a query statement and the all/distinct qualifier - * of the union operator (null for the first queryStmt). - */ - public static class UnionOperand { - // Qualifier as seen by the parser. Null for the first operand. - private final Qualifier originalQualifier_; - - ///////////////////////////////////////// - // BEGIN: Members that need to be reset() - - private final QueryStmt queryStmt_; - - // Effective qualifier. Possibly different from parsedQualifier_ due - // to DISTINCT propagation. - private Qualifier qualifier_; - - // Analyzer used for this operand. Set in analyze(). - // We must preserve the conjuncts registered in the analyzer for partition pruning. - private Analyzer analyzer_; - - // Map from UnionStmt's result slots to our resultExprs. Used during plan generation. - private final ExprSubstitutionMap smap_; - - // END: Members that need to be reset() - ///////////////////////////////////////// - - public UnionOperand(QueryStmt queryStmt, Qualifier qualifier) { - queryStmt_ = queryStmt; - originalQualifier_ = qualifier; - qualifier_ = qualifier; - smap_ = new ExprSubstitutionMap(); - } - - public void analyze(Analyzer parent) throws AnalysisException { - if (isAnalyzed()) return; - analyzer_ = new Analyzer(parent); - queryStmt_.analyze(analyzer_); - } - - public boolean isAnalyzed() { return analyzer_ != null; } - public QueryStmt getQueryStmt() { return queryStmt_; } - public Qualifier getQualifier() { return qualifier_; } - // Used for propagating DISTINCT. - public void setQualifier(Qualifier qualifier) { qualifier_ = qualifier; } - public Analyzer getAnalyzer() { return analyzer_; } - public ExprSubstitutionMap getSmap() { return smap_; } - - public boolean hasAnalyticExprs() { - if (queryStmt_ instanceof SelectStmt) { - return ((SelectStmt) queryStmt_).hasAnalyticInfo(); - } else { - Preconditions.checkState(queryStmt_ instanceof UnionStmt); - return ((UnionStmt) queryStmt_).hasAnalyticExprs(); - } - } - - /** - * C'tor for cloning. - */ - private UnionOperand(UnionOperand other) { - queryStmt_ = other.queryStmt_.clone(); - originalQualifier_ = other.originalQualifier_; - qualifier_ = other.qualifier_; - analyzer_ = other.analyzer_; - smap_ = other.smap_.clone(); - } - - public void reset() { - queryStmt_.reset(); - qualifier_ = originalQualifier_; - analyzer_ = null; - smap_.clear(); - } - - @Override - public UnionOperand clone() { return new UnionOperand(this); } - } - - ///////////////////////////////////////// - // BEGIN: Members that need to be reset() - - // before analysis, this contains the list of union operands derived verbatim - // from the query; - // after analysis, this contains all of distinctOperands followed by allOperands - protected final List<UnionOperand> operands_; - - // filled during analyze(); contains all operands that need to go through - // distinct aggregation - protected final List<UnionOperand> distinctOperands_ = Lists.newArrayList(); - - // filled during analyze(); contains all operands that can be aggregated with - // a simple merge without duplicate elimination (also needs to merge the output - // of the DISTINCT operands) - protected final List<UnionOperand> allOperands_ = Lists.newArrayList(); - - protected AggregateInfo distinctAggInfo_; // only set if we have DISTINCT ops - - // Single tuple materialized by the union. Set in analyze(). - protected TupleId tupleId_; - - // set prior to unnesting - protected String toSqlString_ = null; - - // true if any of the operands_ references an AnalyticExpr - private boolean hasAnalyticExprs_ = false; - - // END: Members that need to be reset() - ///////////////////////////////////////// - - public UnionStmt(List<UnionOperand> operands, - ArrayList<OrderByElement> orderByElements, LimitElement limitElement) { - super(orderByElements, limitElement); - operands_ = operands; - } - - /** - * C'tor for cloning. - */ - protected UnionStmt(UnionStmt other) { - super(other.cloneOrderByElements(), - (other.limitElement_ == null) ? null : other.limitElement_.clone()); - operands_ = Lists.newArrayList(); - if (analyzer_ != null) { - for (UnionOperand o: other.distinctOperands_) distinctOperands_.add(o.clone()); - for (UnionOperand o: other.allOperands_) allOperands_.add(o.clone()); - operands_.addAll(distinctOperands_); - operands_.addAll(allOperands_); - } else { - for (UnionOperand operand: other.operands_) operands_.add(operand.clone()); - } - analyzer_ = other.analyzer_; - distinctAggInfo_ = - (other.distinctAggInfo_ != null) ? other.distinctAggInfo_.clone() : null; - tupleId_ = other.tupleId_; - toSqlString_ = (other.toSqlString_ != null) ? new String(other.toSqlString_) : null; - hasAnalyticExprs_ = other.hasAnalyticExprs_; - withClause_ = (other.withClause_ != null) ? other.withClause_.clone() : null; - } - - public List<UnionOperand> getOperands() { return operands_; } - public List<UnionOperand> getDistinctOperands() { return distinctOperands_; } - public boolean hasDistinctOps() { return !distinctOperands_.isEmpty(); } - public List<UnionOperand> getAllOperands() { return allOperands_; } - public boolean hasAllOps() { return !allOperands_.isEmpty(); } - public AggregateInfo getDistinctAggInfo() { return distinctAggInfo_; } - public boolean hasAnalyticExprs() { return hasAnalyticExprs_; } - - public void removeAllOperands() { - operands_.removeAll(allOperands_); - allOperands_.clear(); - } - - /** - * Propagates DISTINCT from left to right, and checks that all - * union operands are union compatible, adding implicit casts if necessary. - */ - @Override - public void analyze(Analyzer analyzer) throws AnalysisException { - if (isAnalyzed()) return; - try { - super.analyze(analyzer); - } catch (AnalysisException e) { - if (analyzer.getMissingTbls().isEmpty()) throw e; - } - Preconditions.checkState(operands_.size() > 0); - - // Propagates DISTINCT from right to left - propagateDistinct(); - - // Make sure all operands return an equal number of exprs. - QueryStmt firstQuery = operands_.get(0).getQueryStmt(); - - try { - operands_.get(0).analyze(analyzer); - } catch (AnalysisException e) { - if (analyzer.getMissingTbls().isEmpty()) throw e; - } - - List<List<Expr>> resultExprLists = Lists.newArrayList(); - List<Expr> firstQueryExprs = firstQuery.getBaseTblResultExprs(); - resultExprLists.add(firstQueryExprs); - for (int i = 1; i < operands_.size(); ++i) { - QueryStmt query = operands_.get(i).getQueryStmt(); - try { - operands_.get(i).analyze(analyzer); - List<Expr> exprs = query.getBaseTblResultExprs(); - if (firstQueryExprs.size() != exprs.size()) { - throw new AnalysisException("Operands have unequal number of columns:\n" + - "'" + queryStmtToSql(firstQuery) + "' has " + - firstQueryExprs.size() + " column(s)\n" + - "'" + queryStmtToSql(query) + "' has " + exprs.size() + " column(s)"); - } - resultExprLists.add(exprs); - } catch (AnalysisException e) { - if (analyzer.getMissingTbls().isEmpty()) throw e; - } - } - - if (!analyzer.getMissingTbls().isEmpty()) { - throw new AnalysisException("Found missing tables. Aborting analysis."); - } - - // compute hasAnalyticExprs_ - hasAnalyticExprs_ = false; - for (UnionOperand op: operands_) { - if (op.hasAnalyticExprs()) { - hasAnalyticExprs_ = true; - break; - } - } - - analyzer.castToUnionCompatibleTypes(resultExprLists); - - // Create tuple descriptor materialized by this UnionStmt, - // its resultExprs, and its sortInfo if necessary. - createMetadata(analyzer); - createSortInfo(analyzer); - toSqlString_ = toSql(); - - unnestOperands(analyzer); - if (evaluateOrderBy_) createSortTupleInfo(analyzer); - baseTblResultExprs_ = resultExprs_; - } - - /** - * Marks the baseTblResultExprs of its operands as materialized, based on - * which of the output slots have been marked. - * Calls materializeRequiredSlots() on the operands themselves. - */ - @Override - public void materializeRequiredSlots(Analyzer analyzer) { - TupleDescriptor tupleDesc = analyzer.getDescTbl().getTupleDesc(tupleId_); - if (!distinctOperands_.isEmpty()) { - // to keep things simple we materialize all grouping exprs = output slots, - // regardless of what's being referenced externally - for (SlotDescriptor slotDesc: tupleDesc.getSlots()) { - slotDesc.setIsMaterialized(true); - } - } - - if (evaluateOrderBy_) { - sortInfo_.materializeRequiredSlots(analyzer, null); - } - - // collect operands' result exprs - List<SlotDescriptor> outputSlots = tupleDesc.getSlots(); - List<Expr> exprs = Lists.newArrayList(); - for (int i = 0; i < outputSlots.size(); ++i) { - SlotDescriptor slotDesc = outputSlots.get(i); - if (!slotDesc.isMaterialized()) continue; - for (UnionOperand op: operands_) { - exprs.add(op.getQueryStmt().getBaseTblResultExprs().get(i)); - } - if (distinctAggInfo_ != null) { - // also mark the corresponding slot in the distinct agg tuple as being - // materialized - distinctAggInfo_.getOutputTupleDesc().getSlots().get(i).setIsMaterialized(true); - } - } - materializeSlots(analyzer, exprs); - - for (UnionOperand op: operands_) { - op.getQueryStmt().materializeRequiredSlots(analyzer); - } - } - - /** - * Fill distinct-/allOperands and performs possible unnesting of UnionStmt - * operands in the process. - */ - private void unnestOperands(Analyzer analyzer) throws AnalysisException { - if (operands_.size() == 1) { - // ValuesStmt for a single row. - allOperands_.add(operands_.get(0)); - setOperandSmap(operands_.get(0), analyzer); - return; - } - - // find index of first ALL operand - int firstUnionAllIdx = operands_.size(); - for (int i = 1; i < operands_.size(); ++i) { - UnionOperand operand = operands_.get(i); - if (operand.getQualifier() == Qualifier.ALL) { - firstUnionAllIdx = (i == 1 ? 0 : i); - break; - } - } - // operands[0] is always implicitly ALL, so operands[1] can't be the - // first one - Preconditions.checkState(firstUnionAllIdx != 1); - - // unnest DISTINCT operands - Preconditions.checkState(distinctOperands_.isEmpty()); - for (int i = 0; i < firstUnionAllIdx; ++i) { - unnestOperand(distinctOperands_, Qualifier.DISTINCT, operands_.get(i)); - } - - // unnest ALL operands - Preconditions.checkState(allOperands_.isEmpty()); - for (int i = firstUnionAllIdx; i < operands_.size(); ++i) { - unnestOperand(allOperands_, Qualifier.ALL, operands_.get(i)); - } - - operands_.clear(); - operands_.addAll(distinctOperands_); - operands_.addAll(allOperands_); - - // create unnested operands' smaps - for (UnionOperand operand: operands_) { - setOperandSmap(operand, analyzer); - } - - // create distinctAggInfo, if necessary - if (!distinctOperands_.isEmpty()) { - // Aggregate produces exactly the same tuple as the original union stmt. - ArrayList<Expr> groupingExprs = Expr.cloneList(resultExprs_); - try { - distinctAggInfo_ = - AggregateInfo.create(groupingExprs, null, - analyzer.getDescTbl().getTupleDesc(tupleId_), analyzer); - } catch (AnalysisException e) { - // this should never happen - throw new AnalysisException("error creating agg info in UnionStmt.analyze()"); - } - } - } - - /** - * Sets the smap for the given operand. It maps from the output slots this union's - * tuple to the corresponding base table exprs of the operand. - */ - private void setOperandSmap(UnionOperand operand, Analyzer analyzer) { - TupleDescriptor tupleDesc = analyzer.getDescTbl().getTupleDesc(tupleId_); - // operands' smaps were already set in the operands' analyze() - operand.getSmap().clear(); - for (int i = 0; i < tupleDesc.getSlots().size(); ++i) { - SlotDescriptor outputSlot = tupleDesc.getSlots().get(i); - operand.getSmap().put( - new SlotRef(outputSlot), - // TODO: baseTblResultExprs? - operand.getQueryStmt().getResultExprs().get(i).clone()); - } - } - - /** - * Add a single operand to the target list; if the operand itself is a UnionStmt, - * apply unnesting to the extent possible (possibly modifying 'operand' in the process). - */ - private void unnestOperand( - List<UnionOperand> target, Qualifier targetQualifier, UnionOperand operand) { - QueryStmt queryStmt = operand.getQueryStmt(); - if (queryStmt instanceof SelectStmt) { - target.add(operand); - return; - } - - Preconditions.checkState(queryStmt instanceof UnionStmt); - UnionStmt unionStmt = (UnionStmt) queryStmt; - if (unionStmt.hasLimit() || unionStmt.hasOffset()) { - // we must preserve the nested Union - target.add(operand); - } else if (targetQualifier == Qualifier.DISTINCT || !unionStmt.hasDistinctOps()) { - // there is no limit in the nested Union and we can absorb all of its - // operands as-is - target.addAll(unionStmt.getDistinctOperands()); - target.addAll(unionStmt.getAllOperands()); - } else { - // the nested Union contains some Distinct ops and we're accumulating - // into our All ops; unnest only the All ops and leave the rest in place - target.addAll(unionStmt.getAllOperands()); - unionStmt.removeAllOperands(); - target.add(operand); - } - } - - /** - * String representation of queryStmt used in reporting errors. - * Allow subclasses to override this. - */ - protected String queryStmtToSql(QueryStmt queryStmt) { - return queryStmt.toSql(); - } - - /** - * Propagates DISTINCT (if present) from right to left. - * Implied associativity: - * A UNION ALL B UNION DISTINCT C = (A UNION ALL B) UNION DISTINCT C - * = A UNION DISTINCT B UNION DISTINCT C - */ - private void propagateDistinct() { - int lastDistinctPos = -1; - for (int i = operands_.size() - 1; i > 0; --i) { - UnionOperand operand = operands_.get(i); - if (lastDistinctPos != -1) { - // There is a DISTINCT somewhere to the right. - operand.setQualifier(Qualifier.DISTINCT); - } else if (operand.getQualifier() == Qualifier.DISTINCT) { - lastDistinctPos = i; - } - } - } - - /** - * Create a descriptor for the tuple materialized by the union. - * Set resultExprs to be slot refs into that tuple. - * Also fills the substitution map, such that "order by" can properly resolve - * column references from the result of the union. - */ - private void createMetadata(Analyzer analyzer) throws AnalysisException { - // Create tuple descriptor for materialized tuple created by the union. - TupleDescriptor tupleDesc = analyzer.getDescTbl().createTupleDescriptor("union"); - tupleDesc.setIsMaterialized(true); - tupleId_ = tupleDesc.getId(); - LOG.trace("UnionStmt.createMetadata: tupleId=" + tupleId_.toString()); - - // One slot per expr in the select blocks. Use first select block as representative. - List<Expr> firstSelectExprs = operands_.get(0).getQueryStmt().getBaseTblResultExprs(); - - // Compute column stats for the materialized slots from the source exprs. - List<ColumnStats> columnStats = Lists.newArrayList(); - for (int i = 0; i < operands_.size(); ++i) { - List<Expr> selectExprs = operands_.get(i).getQueryStmt().getBaseTblResultExprs(); - for (int j = 0; j < selectExprs.size(); ++j) { - ColumnStats statsToAdd = ColumnStats.fromExpr(selectExprs.get(j)); - if (i == 0) { - columnStats.add(statsToAdd); - } else { - columnStats.get(j).add(statsToAdd); - } - } - } - - // Create tuple descriptor and slots. - for (int i = 0; i < firstSelectExprs.size(); ++i) { - Expr expr = firstSelectExprs.get(i); - SlotDescriptor slotDesc = analyzer.addSlotDescriptor(tupleDesc); - slotDesc.setLabel(getColLabels().get(i)); - slotDesc.setType(expr.getType()); - slotDesc.setStats(columnStats.get(i)); - SlotRef outputSlotRef = new SlotRef(slotDesc); - resultExprs_.add(outputSlotRef); - - // Add to aliasSMap so that column refs in "order by" can be resolved. - if (orderByElements_ != null) { - SlotRef aliasRef = new SlotRef(getColLabels().get(i)); - if (aliasSmap_.containsMappingFor(aliasRef)) { - ambiguousAliasList_.add(aliasRef); - } else { - aliasSmap_.put(aliasRef, outputSlotRef); - } - } - - // register single-directional value transfers from output slot - // to operands' result exprs (if those happen to be slotrefs); - // don't do that if the operand computes analytic exprs - // (see Planner.createInlineViewPlan() for the reasoning) - for (UnionOperand op: operands_) { - Expr resultExpr = op.getQueryStmt().getBaseTblResultExprs().get(i); - slotDesc.addSourceExpr(resultExpr); - if (op.hasAnalyticExprs()) continue; - SlotRef slotRef = resultExpr.unwrapSlotRef(true); - if (slotRef == null) continue; - analyzer.registerValueTransfer(outputSlotRef.getSlotId(), slotRef.getSlotId()); - } - } - baseTblResultExprs_ = resultExprs_; - } - - public TupleId getTupleId() { return tupleId_; } - - @Override - public void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList) { - // Return the sort tuple if there is an evaluated order by. - if (evaluateOrderBy_) { - tupleIdList.add(sortInfo_.getSortTupleDescriptor().getId()); - } else { - tupleIdList.add(tupleId_); - } - } - - @Override - public void collectTableRefs(List<TableRef> tblRefs) { - for (UnionOperand op: operands_) { - op.getQueryStmt().collectTableRefs(tblRefs); - } - } - - @Override - public String toSql() { - if (toSqlString_ != null) return toSqlString_; - StringBuilder strBuilder = new StringBuilder(); - Preconditions.checkState(operands_.size() > 0); - - if (withClause_ != null) { - strBuilder.append(withClause_.toSql()); - strBuilder.append(" "); - } - - strBuilder.append(operands_.get(0).getQueryStmt().toSql()); - for (int i = 1; i < operands_.size() - 1; ++i) { - strBuilder.append(" UNION " + - ((operands_.get(i).getQualifier() == Qualifier.ALL) ? "ALL " : "")); - if (operands_.get(i).getQueryStmt() instanceof UnionStmt) { - strBuilder.append("("); - } - strBuilder.append(operands_.get(i).getQueryStmt().toSql()); - if (operands_.get(i).getQueryStmt() instanceof UnionStmt) { - strBuilder.append(")"); - } - } - // Determine whether we need parenthesis around the last union operand. - UnionOperand lastOperand = operands_.get(operands_.size() - 1); - QueryStmt lastQueryStmt = lastOperand.getQueryStmt(); - strBuilder.append(" UNION " + - ((lastOperand.getQualifier() == Qualifier.ALL) ? "ALL " : "")); - if (lastQueryStmt instanceof UnionStmt || - ((hasOrderByClause() || hasLimit() || hasOffset()) && - !lastQueryStmt.hasLimit() && !lastQueryStmt.hasOffset() && - !lastQueryStmt.hasOrderByClause())) { - strBuilder.append("("); - strBuilder.append(lastQueryStmt.toSql()); - strBuilder.append(")"); - } else { - strBuilder.append(lastQueryStmt.toSql()); - } - // Order By clause - if (hasOrderByClause()) { - strBuilder.append(" ORDER BY "); - for (int i = 0; i < orderByElements_.size(); ++i) { - strBuilder.append(orderByElements_.get(i).toSql()); - strBuilder.append((i+1 != orderByElements_.size()) ? ", " : ""); - } - } - // Limit clause. - strBuilder.append(limitElement_.toSql()); - return strBuilder.toString(); - } - - @Override - public ArrayList<String> getColLabels() { - Preconditions.checkState(operands_.size() > 0); - return operands_.get(0).getQueryStmt().getColLabels(); - } - - @Override - public UnionStmt clone() { return new UnionStmt(this); } - - @Override - public void reset() { - super.reset(); - for (UnionOperand op: operands_) op.reset(); - distinctOperands_.clear(); - allOperands_.clear(); - distinctAggInfo_ = null; - tupleId_ = null; - toSqlString_ = null; - hasAnalyticExprs_ = false; - } -}
