This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new dcb1b9e9e1f HIVE-28724: CREATE TABLE is not handled by DDLSemanticAnalyzerFactory (#5973) dcb1b9e9e1f is described below commit dcb1b9e9e1f31f66603ca9cb63db5d5ca95b9f33 Author: ramitg254 <97451167+ramitg...@users.noreply.github.com> AuthorDate: Thu Jul 31 14:27:56 2025 +0530 HIVE-28724: CREATE TABLE is not handled by DDLSemanticAnalyzerFactory (#5973) --- .../ql/ddl/table/create/CreateTableAnalyzer.java | 818 +++++++++++++++++++++ .../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java | 42 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 54 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 785 +------------------- 4 files changed, 913 insertions(+), 786 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java new file mode 100644 index 00000000000..b4c7f4bec38 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java @@ -0,0 +1,818 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.ddl.table.create; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.TransactionalValidationListener; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint; +import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory.DDLType; +import org.apache.hadoop.hive.ql.ddl.DDLWork; +import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFieldDesc; +import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFields; +import org.apache.hadoop.hive.ql.ddl.table.constraint.ConstraintsUtils; +import org.apache.hadoop.hive.ql.ddl.table.convert.AlterTableConvertOperation; +import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc; +import org.apache.hadoop.hive.ql.ddl.table.storage.skewed.SkewedTableUtils; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.io.SchemaInferenceUtils; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.PartitionTransform; +import org.apache.hadoop.hive.ql.parse.QB; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.StorageFormat; +import org.apache.hadoop.hive.ql.parse.TableMask; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.session.SessionStateUtil; +import org.apache.hadoop.hive.ql.util.NullOrdering; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.base.Strings; + +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DEFAULT_TABLE_TYPE; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; + +@DDLType(types = HiveParser.TOK_CREATETABLE) +public class CreateTableAnalyzer extends CalcitePlanner { + + public CreateTableAnalyzer(QueryState queryState) + throws SemanticException { + super(queryState); + } + + private static final String[] UPDATED_TBL_PROPS = + {hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, hive_metastoreConstants.TABLE_BUCKETING_VERSION}; + + @Override + protected ASTNode handlePostCboRewriteContext(PreCboCtx cboCtx, ASTNode newAST) + throws SemanticException { + if (cboCtx.getType() == PreCboCtx.Type.CTAS) { + // CTAS + init(false); + setAST(newAST); + newAST = reAnalyzeCTASAfterCbo(newAST); + } else { + newAST = super.handlePostCboRewriteContext(cboCtx, newAST); + } + return newAST; + } + + public ASTNode reAnalyzeCTASAfterCbo(ASTNode newAst) + throws SemanticException { + // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it + // here. + newAst = analyzeCreateTable(newAst, getQB(), null); + if (newAst == null) { + LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" + " new ast is " + getAST().dump()); + throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO"); + } + return newAst; + } + + @Override + protected boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) + throws SemanticException { + ASTNode child; + this.ast = ast; + viewsExpanded = new ArrayList<String>(); + if ((child = analyzeCreateTable(ast, getQB(), plannerCtx)) == null) { + return false; + } + return analyzeAndResolveChildTree(child, plannerCtx); + } + + /** + * Checks to see if given partition columns has DEFAULT or CHECK constraints (whether ENABLED or DISABLED) + * Or has NOT NULL constraints (only ENABLED) + * @param partCols partition columns + * @param defConstraints default constraints + * @param notNullConstraints not null constraints + * @param checkConstraints CHECK constraints + * @return true or false + */ + private boolean hasConstraints(final List<FieldSchema> partCols, final List<SQLDefaultConstraint> defConstraints, + final List<SQLNotNullConstraint> notNullConstraints, final List<SQLCheckConstraint> checkConstraints) { + for (FieldSchema partFS : partCols) { + for (SQLDefaultConstraint dc : defConstraints) { + if (dc.getColumn_name().equals(partFS.getName())) { + return true; + } + } + for (SQLCheckConstraint cc : checkConstraints) { + if (cc.getColumn_name().equals(partFS.getName())) { + return true; + } + } + for (SQLNotNullConstraint nc : notNullConstraints) { + if (nc.getColumn_name().equals(partFS.getName()) && nc.isEnable_cstr()) { + return true; + } + } + } + return false; + } + + private String getSortOrderJson(ASTNode ast) { + List<SortFieldDesc> sortFieldDescList = new ArrayList<>(); + SortFields sortFields = new SortFields(sortFieldDescList); + for (int i = 0; i < ast.getChildCount(); i++) { + ASTNode child = (ASTNode) ast.getChild(i); + SortFieldDesc.SortDirection sortDirection = + child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEDESC ? SortFieldDesc.SortDirection.DESC + : SortFieldDesc.SortDirection.ASC; + child = (ASTNode) child.getChild(0); + String name = unescapeIdentifier(child.getChild(0).getText()).toLowerCase(); + NullOrdering nullOrder = NullOrdering.fromToken(child.getToken().getType()); + sortFieldDescList.add(new SortFieldDesc(name, sortDirection, nullOrder)); + } + try { + return JSON_OBJECT_MAPPER.writer().writeValueAsString(sortFields); + } catch (JsonProcessingException e) { + LOG.warn("Can not create write order json. ", e); + return null; + } + } + + /** + * This api is used to determine where to create acid tables are not. + * if the default table type is set to external, then create transactional table should result in acid tables, + * else create table should result in external table. + * */ + private boolean isExternalTableChanged(Map<String, String> tblProp, boolean isTransactional, boolean isExt, + boolean isTableTypeChanged) { + if (isTableTypeChanged && tblProp != null && tblProp.getOrDefault(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, + "false").equalsIgnoreCase("true") || isTransactional) { + isExt = false; + } + return isExt; + } + + private static boolean isIcebergTable(Map<String, String> tblProps) { + return AlterTableConvertOperation.ConversionFormats.ICEBERG.properties().get(META_TABLE_STORAGE) + .equalsIgnoreCase(tblProps.get(META_TABLE_STORAGE)); + } + + private String getDefaultLocation(String dbName, String tableName, boolean isExt) + throws SemanticException { + String tblLocation; + try { + Warehouse wh = new Warehouse(conf); + tblLocation = wh.getDefaultTablePath(db.getDatabase(dbName), tableName, isExt).toUri().getPath(); + } catch (MetaException | HiveException e) { + throw new SemanticException(e); + } + return tblLocation; + } + + /** + * Add default properties for table property. If a default parameter exists + * in the tblProp, the value in tblProp will be kept. + * + * @param tblProp + * property map + * @return Modified table property map + */ + private Map<String, String> validateAndAddDefaultProperties(Map<String, String> tblProp, boolean isExt, + StorageFormat storageFormat, String qualifiedTableName, List<Order> sortCols, boolean isMaterialization, + boolean isTemporaryTable, boolean isTransactional, boolean isManaged, String[] qualifiedTabName, + boolean isTableTypeChanged) + throws SemanticException { + Map<String, String> retValue = Optional.ofNullable(tblProp).orElseGet(HashMap::new); + + String paraString = HiveConf.getVar(conf, HiveConf.ConfVars.NEW_TABLE_DEFAULT_PARA); + if (paraString != null && !paraString.isEmpty()) { + for (String keyValuePair : paraString.split(",")) { + String[] keyValue = keyValuePair.split("=", 2); + if (keyValue.length != 2) { + continue; + } + if (!retValue.containsKey(keyValue[0])) { + retValue.put(keyValue[0], keyValue[1]); + } + } + } + if (!retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) && retValue.containsKey( + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES)) { + throw new SemanticException( + "Cannot specify " + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES + " without " + + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + } + isExt = isExternalTableChanged(retValue, isTransactional, isExt, isTableTypeChanged); + + if (isExt && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_EXTERNALTABLE_PURGE_DEFAULT)) { + if (retValue.get(MetaStoreUtils.EXTERNAL_TABLE_PURGE) == null) { + retValue.put(MetaStoreUtils.EXTERNAL_TABLE_PURGE, "true"); + } + } + + boolean makeInsertOnly = + !isTemporaryTable && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY); + boolean makeAcid = !isTemporaryTable && makeAcid(); + // if not specify managed table and create.table.as.external is true + // ignore makeInsertOnly and makeAcid. + if (!isManaged && HiveConf.getBoolVar(conf, HiveConf.ConfVars.CREATE_TABLE_AS_EXTERNAL)) { + makeInsertOnly = false; + makeAcid = false; + } + if ((makeInsertOnly || makeAcid || isTransactional || isManaged) && !isExt && !isMaterialization + && StringUtils.isBlank(storageFormat.getStorageHandler()) + //don't overwrite user choice if transactional attribute is explicitly set + && !retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL)) { + if (makeInsertOnly || isTransactional) { + retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, + TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); + } + if (makeAcid || isTransactional || (isManaged && !makeInsertOnly)) { + retValue = convertToAcidByDefault(storageFormat, qualifiedTableName, sortCols, retValue); + } + } + if (!isExt) { + addDbAndTabToOutputs(qualifiedTabName, TableType.MANAGED_TABLE, isTemporaryTable, retValue, storageFormat); + } else { + addDbAndTabToOutputs(qualifiedTabName, TableType.EXTERNAL_TABLE, isTemporaryTable, retValue, storageFormat); + } + + if (isIcebergTable(retValue)) { + SessionStateUtil.addResourceOrThrow(conf, SessionStateUtil.DEFAULT_TABLE_LOCATION, + getDefaultLocation(qualifiedTabName[0], qualifiedTabName[1], true)); + } + return retValue; + } + + /** + * Update the default table properties with values fetch from the original table properties. The property names are + * @param source properties of source table, must be not null. + * @param target properties of target table. + * @param skipped a list of properties which should be not overwritten. It can be null or empty. + */ + private void updateDefaultTblProps(Map<String, String> source, Map<String, String> target, List<String> skipped) { + if (source == null || target == null) { + return; + } + for (String property : UPDATED_TBL_PROPS) { + if ((skipped == null || !skipped.contains(property)) && source.containsKey(property)) { + target.put(property, source.get(property)); + } + } + } + + /** + * Analyze the create table command. If it is a regular create-table or + * create-table-like statements, we create a DDLWork and return true. If it is + * a create-table-as-select, we get the necessary info such as the SerDe and + * Storage Format and put it in QB, and return false, indicating the rest of + * the semantic analyzer need to deal with the select statement with respect + * to the SerDe and Storage Format. + */ + ASTNode analyzeCreateTable(ASTNode ast, QB qb, PlannerContext plannerCtx) + throws SemanticException { + TableName qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0)); + final String dbDotTab = qualifiedTabName.getNotEmptyDbTable(); + + String likeTableName = null; + List<FieldSchema> cols = new ArrayList<FieldSchema>(); + List<FieldSchema> partCols = new ArrayList<FieldSchema>(); + List<String> partColNames = new ArrayList<>(); + List<String> bucketCols = new ArrayList<String>(); + List<SQLPrimaryKey> primaryKeys = new ArrayList<SQLPrimaryKey>(); + List<SQLForeignKey> foreignKeys = new ArrayList<SQLForeignKey>(); + List<SQLUniqueConstraint> uniqueConstraints = new ArrayList<>(); + List<SQLNotNullConstraint> notNullConstraints = new ArrayList<>(); + List<SQLDefaultConstraint> defaultConstraints = new ArrayList<>(); + List<SQLCheckConstraint> checkConstraints = new ArrayList<>(); + List<Order> sortCols = new ArrayList<Order>(); + int numBuckets = -1; + String comment = null; + String location = null; + Map<String, String> tblProps = null; + boolean ifNotExists = false; + boolean isExt = false; + boolean isTemporary = false; + boolean isManaged = false; + boolean isMaterialization = false; + boolean isTransactional = false; + ASTNode selectStmt = null; + final int CREATE_TABLE = 0; // regular CREATE TABLE + final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) + final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) + final int CTT = 3; // CREATE TRANSACTIONAL TABLE + final int CTLF = 4; // CREATE TABLE LIKE FILE + int command_type = CREATE_TABLE; + List<String> skewedColNames = new ArrayList<String>(); + List<List<String>> skewedValues = new ArrayList<List<String>>(); + Map<List<String>, String> listBucketColValuesMapping = new HashMap<List<String>, String>(); + boolean storedAsDirs = false; + boolean isUserStorageFormat = false; + boolean partitionTransformSpecExists = false; + String likeFile = null; + String likeFileFormat = null; + String sortOrder = null; + RowFormatParams rowFormatParams = new RowFormatParams(); + StorageFormat storageFormat = new StorageFormat(conf); + + LOG.info("Creating table " + dbDotTab + " position=" + ast.getCharPositionInLine()); + int numCh = ast.getChildCount(); + + // set storage handler if default handler is provided in config + String defaultStorageHandler = HiveConf.getVar(conf, HIVE_DEFAULT_STORAGE_HANDLER); + if (defaultStorageHandler != null && !defaultStorageHandler.isEmpty()) { + LOG.info("Default storage handler class detected in config. Using storage handler class if exists: '{}'", + defaultStorageHandler); + storageFormat.setStorageHandler(defaultStorageHandler); + isUserStorageFormat = true; + } + + /* + * Check the 1st-level children and do simple semantic checks: 1) CTLT and + * CTAS should not coexists. 2) CTLT or CTAS should not coexists with column + * list (target table schema). 3) CTAS does not support partitioning (for + * now). + */ + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); + if (storageFormat.fillStorageFormat(child)) { + isUserStorageFormat = true; + continue; + } + switch (child.getToken().getType()) { + case HiveParser.TOK_IFNOTEXISTS: + ifNotExists = true; + break; + case HiveParser.KW_EXTERNAL: + isExt = true; + break; + case HiveParser.KW_MANAGED: + isManaged = true; + isTransactional = true; + break; + case HiveParser.KW_TEMPORARY: + isTemporary = true; + isMaterialization = MATERIALIZATION_MARKER.equals(child.getText()); + break; + case HiveParser.KW_TRANSACTIONAL: + isTransactional = true; + command_type = CTT; + break; + case HiveParser.TOK_LIKEFILE: + if (cols.size() != 0) { + throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg()); + } + likeFileFormat = getUnescapedName((ASTNode) child.getChild(0)); + likeFile = getUnescapedName((ASTNode) child.getChild(1)); + command_type = CTLF; + break; + case HiveParser.TOK_LIKETABLE: + if (child.getChildCount() > 0) { + likeTableName = getUnescapedName((ASTNode) child.getChild(0)); + if (likeTableName != null) { + if (command_type == CTAS) { + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); + } + if (cols.size() != 0) { + throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE.getMsg()); + } + } + command_type = CTLT; + } + break; + + case HiveParser.TOK_QUERY: // CTAS + if (command_type == CTLT) { + throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); + } + if (cols.size() != 0) { + throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); + } + if (partCols.size() != 0 || bucketCols.size() != 0) { + boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMIC_PARTITIONING); + if (dynPart == false) { + throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); + } else { + // TODO: support dynamic partition for CTAS + throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); + } + } + if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_CTAS_EXTERNAL_TABLES) && isExt) { + throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); + } + command_type = CTAS; + if (plannerCtx != null) { + plannerCtx.setCTASToken(child); + } + selectStmt = child; + break; + case HiveParser.TOK_TABCOLLIST: + cols = getColumns(child, true, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, uniqueConstraints, + notNullConstraints, defaultConstraints, checkConstraints, conf); + break; + case HiveParser.TOK_TABLECOMMENT: + comment = unescapeSQLString(child.getChild(0).getText()); + break; + case HiveParser.TOK_TABLEPARTCOLS: + partCols = getColumns(child, false, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, uniqueConstraints, + notNullConstraints, defaultConstraints, checkConstraints, conf); + if (hasConstraints(partCols, defaultConstraints, notNullConstraints, checkConstraints)) { + //TODO: these constraints should be supported for partition columns + throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg( + "NOT NULL,DEFAULT and CHECK Constraints are not allowed with " + "partition columns. ")); + } + break; + case HiveParser.TOK_TABLEPARTCOLSBYSPEC: + SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, + PartitionTransform.getPartitionTransformSpec(child)); + partitionTransformSpecExists = true; + break; + case HiveParser.TOK_TABLEPARTCOLNAMES: + partColNames = getColumnNames(child); + break; + case HiveParser.TOK_ALTERTABLE_BUCKETS: + bucketCols = getColumnNames((ASTNode) child.getChild(0)); + if (child.getChildCount() == 2) { + numBuckets = Integer.parseInt(child.getChild(1).getText()); + } else { + sortCols = getColumnNamesOrder((ASTNode) child.getChild(1)); + numBuckets = Integer.parseInt(child.getChild(2).getText()); + } + break; + case HiveParser.TOK_WRITE_LOCALLY_ORDERED: + sortOrder = getSortOrderJson((ASTNode) child.getChild(0)); + break; + case HiveParser.TOK_TABLEROWFORMAT: + rowFormatParams.analyzeRowFormat(child); + break; + case HiveParser.TOK_TABLELOCATION: + location = unescapeSQLString(child.getChild(0).getText()); + location = EximUtil.relativeToAbsolutePath(conf, location); + inputs.add(toReadEntity(location)); + break; + case HiveParser.TOK_TABLEPROPERTIES: + tblProps = getProps((ASTNode) child.getChild(0)); + addPropertyReadEntry(tblProps, inputs); + break; + case HiveParser.TOK_TABLESERIALIZER: + child = (ASTNode) child.getChild(0); + storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); + if (child.getChildCount() == 2) { + readProps((ASTNode) (child.getChild(1).getChild(0)), storageFormat.getSerdeProps()); + } + break; + case HiveParser.TOK_TABLESKEWED: + /** + * Throw an error if the user tries to use the DDL with + * hive.internal.ddl.list.bucketing.enable set to false. + */ + HiveConf hiveConf = SessionState.get().getConf(); + + // skewed column names + skewedColNames = SkewedTableUtils.analyzeSkewedTableDDLColNames(child); + // skewed value + skewedValues = SkewedTableUtils.analyzeDDLSkewedValues(child); + // stored as directories + storedAsDirs = analyzeStoredAdDirs(child); + + break; + default: + throw new AssertionError("Unknown token: " + child.getToken()); + } + } + + validateStorageFormat(storageFormat, tblProps, partitionTransformSpecExists); + + if (command_type == CREATE_TABLE || command_type == CTLT || command_type == CTT || command_type == CTLF) { + queryState.setCommandType(HiveOperation.CREATETABLE); + } else if (command_type == CTAS) { + queryState.setCommandType(HiveOperation.CREATETABLE_AS_SELECT); + } else { + throw new SemanticException("Unrecognized command."); + } + + if (isExt && ConstraintsUtils.hasEnabledOrValidatedConstraints(notNullConstraints, defaultConstraints, + checkConstraints)) { + throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg( + "Constraints are disallowed with External tables. " + "Only RELY is allowed.")); + } + if (checkConstraints != null && !checkConstraints.isEmpty()) { + ConstraintsUtils.validateCheckConstraint(cols, checkConstraints, ctx.getConf()); + } + + storageFormat.fillDefaultStorageFormat(isExt, false); + + // check for existence of table + if (ifNotExists) { + try { + Table table = getTable(qualifiedTabName, false); + if (table != null) { // table exists + return null; + } + } catch (HiveException e) { + // should not occur since second parameter to getTableWithQN is false + throw new IllegalStateException("Unexpected Exception thrown: " + e.getMessage(), e); + } + } + + if (isTemporary) { + if (location == null) { + // for temporary tables we set the location to something in the session's scratch dir + // it has the same life cycle as the tmp table + try { + // Generate a unique ID for temp table path. + // This path will be fixed for the life of the temp table. + location = SessionState.generateTempTableLocation(conf); + } catch (MetaException err) { + throw new SemanticException("Error while generating temp table path:", err); + } + } + } + + // Handle different types of CREATE TABLE command + // Note: each branch must call addDbAndTabToOutputs after finalizing table properties. + Database database = getDatabase(qualifiedTabName.getDb()); + boolean isDefaultTableTypeChanged = false; + if (database.getParameters() != null) { + String defaultTableType = database.getParameters().getOrDefault(DEFAULT_TABLE_TYPE, null); + if (defaultTableType != null && defaultTableType.equalsIgnoreCase("external")) { + isExt = true; + isDefaultTableTypeChanged = true; + } else if (defaultTableType != null && defaultTableType.equalsIgnoreCase("acid")) { + isDefaultTableTypeChanged = true; + if (isExt) { // create external table on db with default type as acid + isTransactional = false; + } else { + isTransactional = true; + } + } + } + switch (command_type) { + case CTLF: + try { + if (!SchemaInferenceUtils.doesSupportSchemaInference(conf, likeFileFormat)) { + throw new SemanticException(ErrorMsg.CTLF_UNSUPPORTED_FORMAT.getErrorCodedMsg(likeFileFormat)); + } + } catch (HiveException e) { + throw new SemanticException(e.getMessage(), e); + } + // fall through + case CREATE_TABLE: // REGULAR CREATE TABLE DDL + if (!CollectionUtils.isEmpty(partColNames)) { + throw new SemanticException( + "Partition columns can only declared using their name and types in regular CREATE TABLE statements"); + } + tblProps = + validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, + isTemporary, isTransactional, isManaged, + new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); + isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); + addDbAndTabToOutputs(new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); + if (!Strings.isNullOrEmpty(sortOrder)) { + tblProps.put("default-sort-order", sortOrder); + } + CreateTableDesc crtTblDesc = + new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, + rowFormatParams.getFieldDelim(), rowFormatParams.getFieldEscape(), rowFormatParams.getCollItemDelim(), + rowFormatParams.getMapKeyDelim(), rowFormatParams.getLineDelim(), comment, + storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), + storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, + skewedValues, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, + checkConstraints); + crtTblDesc.setStoredAsSubDirectories(storedAsDirs); + crtTblDesc.setNullFormat(rowFormatParams.getNullFormat()); + crtTblDesc.setLikeFile(likeFile); + crtTblDesc.setLikeFileFormat(likeFileFormat); + + crtTblDesc.validate(conf); + // outputs is empty, which means this create table happens in the current + // database. + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc))); + String tblLocation = null; + if (location != null) { + tblLocation = location; + } else { + tblLocation = getDefaultLocation(qualifiedTabName.getDb(), qualifiedTabName.getTable(), isExt); + } + try { + HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(conf, storageFormat.getStorageHandler()); + if (storageHandler != null) { + storageHandler.addResourcesForCreateTable(tblProps, conf); + } + } catch (HiveException e) { + throw new RuntimeException(e); + } + SessionStateUtil.addResourceOrThrow(conf, META_TABLE_LOCATION, tblLocation); + break; + case CTT: // CREATE TRANSACTIONAL TABLE + if (isExt && !isDefaultTableTypeChanged) { + throw new SemanticException( + qualifiedTabName.getTable() + " cannot be declared transactional because it's an external table"); + } + tblProps = + validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, + isTemporary, isTransactional, isManaged, + new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); + isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); + addDbAndTabToOutputs(new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + TableType.MANAGED_TABLE, false, tblProps, storageFormat); + + CreateTableDesc crtTranTblDesc = + new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, + rowFormatParams.getFieldDelim(), rowFormatParams.getFieldEscape(), rowFormatParams.getCollItemDelim(), + rowFormatParams.getMapKeyDelim(), rowFormatParams.getLineDelim(), comment, + storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), + storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, + skewedValues, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, + checkConstraints); + crtTranTblDesc.setStoredAsSubDirectories(storedAsDirs); + crtTranTblDesc.setNullFormat(rowFormatParams.getNullFormat()); + + crtTranTblDesc.validate(conf); + // outputs is empty, which means this create table happens in the current + // database. + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTranTblDesc))); + break; + + case CTLT: // create table like <tbl_name> + + tblProps = + validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, + isTemporary, + + isTransactional, isManaged, new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + isDefaultTableTypeChanged); + tblProps.put(hive_metastoreConstants.TABLE_IS_CTLT, "true"); + isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); + addDbAndTabToOutputs(new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); + + Table likeTable = getTable(likeTableName, false); + if (likeTable != null) { + if (isTemporary || isExt || isIcebergTable(tblProps)) { + updateDefaultTblProps(likeTable.getParameters(), tblProps, new ArrayList<>( + Arrays.asList(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES))); + } else { + updateDefaultTblProps(likeTable.getParameters(), tblProps, null); + } + } + if (likeTable.getTableType() == TableType.EXTERNAL_TABLE && HiveConf.getBoolVar(conf, + HiveConf.ConfVars.CREATE_TABLE_AS_EXTERNAL)) { + isExt = true; + } + CreateTableLikeDesc crtTblLikeDesc = + new CreateTableLikeDesc(dbDotTab, isExt, isTemporary, storageFormat.getInputFormat(), + storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getSerdeProps(), + tblProps, ifNotExists, likeTableName, isUserStorageFormat); + tblLocation = getDefaultLocation(qualifiedTabName.getDb(), qualifiedTabName.getTable(), isExt); + SessionStateUtil.addResource(conf, META_TABLE_LOCATION, tblLocation); + rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc))); + break; + + case CTAS: // create table as select + + if (isTemporary) { + if (!ctx.isExplainSkipExecution() && !isMaterialization) { + SessionState ss = SessionState.get(); + if (ss == null) { + throw new SemanticException( + "No current SessionState, cannot create temporary table " + qualifiedTabName.getNotEmptyDbTable()); + } + Map<String, Table> tables = SessionHiveMetaStoreClient.getTempTablesForDatabase(qualifiedTabName.getDb(), + qualifiedTabName.getTable()); + if (tables != null && tables.containsKey(qualifiedTabName.getTable())) { + throw new SemanticException( + "Temporary table " + qualifiedTabName.getNotEmptyDbTable() + " already exists"); + } + } + } else { + // Verify that the table does not already exist + // dumpTable is only used to check the conflict for non-temporary tables + try { + Table dumpTable = db.newTable(dbDotTab); + if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) + && !ctx.isExplainSkipExecution()) { + throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab)); + } + } catch (HiveException e) { + throw new SemanticException(e); + } + } + + if (location != null && location.length() != 0) { + Path locPath = new Path(location); + FileSystem curFs = null; + FileStatus locStats = null; + try { + curFs = locPath.getFileSystem(conf); + if (curFs != null) { + locStats = curFs.getFileStatus(locPath); + } + if (locStats != null && locStats.isDir()) { + FileStatus[] lStats = curFs.listStatus(locPath); + if (lStats != null && lStats.length != 0) { + // Don't throw an exception if the target location only contains the staging-dirs + for (FileStatus lStat : lStats) { + if (!lStat.getPath().getName().startsWith(HiveConf.getVar(conf, HiveConf.ConfVars.STAGING_DIR))) { + throw new SemanticException(ErrorMsg.CTAS_LOCATION_NONEMPTY.getMsg(location)); + } + } + } + } + } catch (FileNotFoundException nfe) { + //we will create the folder if it does not exist. + } catch (IOException ioE) { + LOG.debug("Exception when validate folder", ioE); + } + tblLocation = location; + } else { + tblLocation = getDefaultLocation(qualifiedTabName.getDb(), qualifiedTabName.getTable(), isExt); + } + SessionStateUtil.addResource(conf, META_TABLE_LOCATION, tblLocation); + if (!CollectionUtils.isEmpty(partCols)) { + throw new SemanticException("Partition columns can only declared using their names in CTAS statements"); + } + + tblProps = + validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, + isTemporary, isTransactional, isManaged, + new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); + isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); + tblProps.put(TABLE_IS_CTAS, "true"); + addDbAndTabToOutputs(new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, + TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); + tableDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partColNames, bucketCols, sortCols, + numBuckets, rowFormatParams.getFieldDelim(), rowFormatParams.getFieldEscape(), + rowFormatParams.getCollItemDelim(), rowFormatParams.getMapKeyDelim(), rowFormatParams.getLineDelim(), + comment, storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, + storageFormat.getSerde(), storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, + ifNotExists, skewedColNames, skewedValues, true, primaryKeys, foreignKeys, uniqueConstraints, + notNullConstraints, defaultConstraints, checkConstraints); + tableDesc.setMaterialization(isMaterialization); + tableDesc.setStoredAsSubDirectories(storedAsDirs); + tableDesc.setNullFormat(rowFormatParams.getNullFormat()); + qb.setTableDesc(tableDesc); + + return selectStmt; + + default: + throw new SemanticException("Unrecognized command."); + } + return null; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 99a00ffd5fd..b2da8195c4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -233,15 +233,39 @@ public boolean isPrepareQuery() { return prepareQuery; } - static final class RowFormatParams { - String fieldDelim = null; - String fieldEscape = null; - String collItemDelim = null; - String mapKeyDelim = null; - String lineDelim = null; - String nullFormat = null; - - protected void analyzeRowFormat(ASTNode child) throws SemanticException { + public static final class RowFormatParams { + private String fieldDelim; + private String fieldEscape; + private String collItemDelim; + private String mapKeyDelim; + private String lineDelim; + private String nullFormat; + + public String getFieldDelim() { + return fieldDelim; + } + + public String getFieldEscape() { + return fieldEscape; + } + + public String getCollItemDelim() { + return collItemDelim; + } + + public String getMapKeyDelim() { + return mapKeyDelim; + } + + public String getNullFormat() { + return nullFormat; + } + + public String getLineDelim() { + return lineDelim; + } + + public void analyzeRowFormat(ASTNode child) throws SemanticException { child = (ASTNode) child.getChild(0); int numChildRowFormat = child.getChildCount(); for (int numC = 0; numC < numChildRowFormat; numC++) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 053e344d329..5150de01baa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -143,6 +143,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -616,21 +617,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept // save the resultSchema before rewriting it originalResultSchema = resultSchema; } - if (cboCtx.type == PreCboCtx.Type.VIEW) { - try { - viewSelect = handleCreateViewDDL(newAST); - } catch (SemanticException e) { - throw new CalciteViewSemanticException(e.getMessage()); - } - } else if (cboCtx.type == PreCboCtx.Type.CTAS) { - // CTAS - init(false); - setAST(newAST); - newAST = reAnalyzeCTASAfterCbo(newAST); - } else { - // All others - init(false); - } + newAST = handlePostCboRewriteContext(cboCtx, newAST); if (oldHints.size() > 0) { if (getQB().getParseInfo().getHints() != null) { LOG.warn("Hints are not null in the optimized tree; " @@ -738,6 +725,21 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept return sinkOp; } + protected ASTNode handlePostCboRewriteContext(PreCboCtx cboCtx, ASTNode newAST) + throws SemanticException { + if (cboCtx.type == PreCboCtx.Type.VIEW) { + try { + viewSelect = handleCreateViewDDL(newAST); + } catch (SemanticException e) { + throw new CalciteViewSemanticException(e.getMessage()); + } + } else { + // All others + init(false); + } + return newAST; + } + private String getOptimizedByCboInfo() { String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, ""); String cboInfo = "Plan optimized by CBO."; @@ -1061,7 +1063,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { createTable.addChild(temporary); createTable.addChild(cte.cteNode); - CalcitePlanner analyzer = new CalcitePlanner(queryState); + CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState); analyzer.initCtx(ctx); analyzer.init(false); @@ -1113,7 +1115,7 @@ String fixCtasColumnName(String colName) { * (currently, this is used for CTAS and insert-as-select). */ protected static class PreCboCtx extends PlannerContext { - enum Type { + public enum Type { NONE, INSERT, MULTI_INSERT, CTAS, VIEW, UNEXPECTED } @@ -1131,8 +1133,12 @@ private void set(Type type, ASTNode ast) { this.nodeOfInterest = ast; } + public Type getType() { + return type; + } + @Override - void setCTASToken(ASTNode child) { + public void setCTASToken(ASTNode child) { set(PreCboCtx.Type.CTAS, child); } @@ -1200,18 +1206,6 @@ protected ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx c } } - ASTNode reAnalyzeCTASAfterCbo(ASTNode newAst) throws SemanticException { - // analyzeCreateTable uses this.ast, but doPhase1 doesn't, so only reset it - // here. - newAst = analyzeCreateTable(newAst, getQB(), null); - if (newAst == null) { - LOG.error("analyzeCreateTable failed to initialize CTAS after CBO;" + " new ast is " - + getAST().dump()); - throw new SemanticException("analyzeCreateTable failed to initialize CTAS after CBO"); - } - return newAst; - } - ASTNode reAnalyzeViewAfterCbo(ASTNode newAst) throws SemanticException { // analyzeCreateView uses this.ast, but doPhase1 doesn't, so only reset it // here. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 289224df0c0..d6eba2f1c9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -23,17 +23,13 @@ import static org.apache.hadoop.hive.common.AcidConstants.SOFT_DELETE_TABLE; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMIC_PARTITION_CONVERT; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ARCHIVE_ENABLED; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_STATS_DBCLASS; import static org.apache.hadoop.hive.conf.HiveConf.shouldComputeLineage; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DEFAULT_TABLE_TYPE; import static org.apache.hadoop.hive.ql.ddl.view.create.AbstractCreateViewAnalyzer.validateTablesUsed; import static org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.NON_FK_FILTERED; -import java.io.FileNotFoundException; import java.io.IOException; import java.security.AccessControlException; import java.util.ArrayDeque; @@ -66,7 +62,6 @@ import java.util.regex.PatternSyntaxException; import java.util.stream.Collectors; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; @@ -106,12 +101,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint; -import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint; -import org.apache.hadoop.hive.metastore.api.SQLForeignKey; -import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; -import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; -import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.SourceTable; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; @@ -126,15 +115,9 @@ import org.apache.hadoop.hive.ql.ddl.DDLDescWithTableProperties; import org.apache.hadoop.hive.ql.ddl.DDLWork; import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc; -import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFieldDesc; -import org.apache.hadoop.hive.ql.ddl.misc.sortoder.SortFields; -import org.apache.hadoop.hive.ql.ddl.table.constraint.ConstraintsUtils; -import org.apache.hadoop.hive.ql.ddl.table.convert.AlterTableConvertOperation; import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; -import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc; -import org.apache.hadoop.hive.ql.ddl.table.storage.skewed.SkewedTableUtils; import org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc; import org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; @@ -179,7 +162,6 @@ import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; -import org.apache.hadoop.hive.ql.io.SchemaInferenceUtils; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.SemanticDispatcher; @@ -197,7 +179,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.Optimizer; @@ -330,7 +311,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; -import com.google.common.base.Strings; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; @@ -358,7 +338,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { public static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__"; /** Marks the temporary table created for a serialized CTE. The table is scoped to the query. */ - static final String MATERIALIZATION_MARKER = "$MATERIALIZATION"; + protected static final String MATERIALIZATION_MARKER = "$MATERIALIZATION"; private static final String RESULTS_CACHE_KEY_TOKEN_REWRITE_PROGRAM = "RESULTS_CACHE_KEY_PROGRAM"; private Map<TableScanOperator, ExprNodeDesc> opToPartPruner; @@ -393,7 +373,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { protected List<FieldSchema> originalResultSchema; protected CreateMaterializedViewDesc createVwDesc; private MaterializedViewUpdateDesc materializedViewUpdateDesc; - private List<String> viewsExpanded; + protected + List<String> viewsExpanded; protected ASTNode viewSelect; protected final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; @@ -449,9 +430,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { */ private boolean rootTasksResolved; - private TableMask tableMask; + protected TableMask tableMask; - CreateTableDesc tableDesc; + protected CreateTableDesc tableDesc; protected AnalyzeRewriteContext analyzeRewrite; @@ -479,15 +460,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { private static final CommonToken DOT_TOKEN = new ImmutableCommonToken(HiveParser.DOT, "."); - private static final String[] UPDATED_TBL_PROPS = { - hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, - hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, - hive_metastoreConstants.TABLE_BUCKETING_VERSION - }; - private int subQueryExpressionAliasCounter = 0; - private static final ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); - static class Phase1Ctx { + protected static final ObjectMapper JSON_OBJECT_MAPPER = new ObjectMapper(); + + public static class Phase1Ctx { String dest; int nextNum; } @@ -1107,7 +1083,7 @@ public static String generateErrorMessage(ASTNode ast, String message) { return sb.toString(); } - ASTNode getAST() { + protected ASTNode getAST() { return this.ast; } @@ -1759,7 +1735,7 @@ private String processLateralView(QB qb, ASTNode lateralView) } @SuppressWarnings({"fallthrough", "nls"}) - boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) + protected boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx) throws SemanticException { return doPhase1(ast, qb, ctx_1, plannerCtx, this.aliasToCTEs); } @@ -2342,7 +2318,7 @@ void getMetaData(QB qb) throws SemanticException { getMetaData(qb, false); } - void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException { + protected void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException { try { Map<String, CTEClause> materializationAliasToCTEs = null; if (enableMaterialization) { @@ -2704,13 +2680,13 @@ private void getMetaData(QB qb, ReadEntity parentInput) switch (child.getToken().getType()) { case HiveParser.TOK_TABLEROWFORMAT: rowFormatParams.analyzeRowFormat(child); - directoryDesc.setFieldDelim(rowFormatParams.fieldDelim); - directoryDesc.setLineDelim(rowFormatParams.lineDelim); - directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim); - directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim); - directoryDesc.setFieldEscape(rowFormatParams.fieldEscape); - directoryDesc.setNullFormat(rowFormatParams.nullFormat); - directoryDescIsSet=true; + directoryDesc.setFieldDelim(rowFormatParams.getFieldDelim()); + directoryDesc.setLineDelim(rowFormatParams.getLineDelim()); + directoryDesc.setCollItemDelim(rowFormatParams.getCollItemDelim()); + directoryDesc.setMapKeyDelim(rowFormatParams.getMapKeyDelim()); + directoryDesc.setFieldEscape(rowFormatParams.getFieldEscape()); + directoryDesc.setNullFormat(rowFormatParams.getNullFormat()); + directoryDescIsSet = true; break; case HiveParser.TOK_TABLESERIALIZER: ASTNode serdeChild = (ASTNode) child.getChild(0); @@ -12719,7 +12695,7 @@ private void LVmergeRowResolvers(RowResolver source, RowResolver dest, } @SuppressWarnings("nls") - Phase1Ctx initPhase1Ctx() { + protected Phase1Ctx initPhase1Ctx() { Phase1Ctx ctx_1 = new Phase1Ctx(); ctx_1.nextNum = 0; ctx_1.dest = "reduce"; @@ -12745,9 +12721,9 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { /** * Planner specific stuff goes in here. */ - static class PlannerContext { + public static class PlannerContext { - void setCTASToken(ASTNode child) { + public void setCTASToken(ASTNode child) { } void setViewToken(ASTNode child) { @@ -12980,7 +12956,7 @@ private ParseResult rewriteASTWithMaskAndFilter(TableMask tableMask, ASTNode ast } } - void gatherUserSuppliedFunctions(ASTNode ast) throws SemanticException { + protected void gatherUserSuppliedFunctions(ASTNode ast) throws SemanticException { int tokenType = ast.getToken().getType(); if (tokenType == HiveParser.TOK_FUNCTION || tokenType == HiveParser.TOK_FUNCTIONDI || @@ -13000,7 +12976,7 @@ void gatherUserSuppliedFunctions(ASTNode ast) throws SemanticException { } } - boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { + protected boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { ASTNode child = ast; this.ast = ast; viewsExpanded = new ArrayList<String>(); @@ -13008,18 +12984,9 @@ boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws Sema // 1. analyze and process the position alias // step processPositionAlias out of genResolvedParseTree - // 2. analyze create table command - if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { - // if it is not CTAS, we don't need to go further and just return - if ((child = analyzeCreateTable(ast, qb, plannerCtx)) == null) { - return false; - } - } else { - // TODO: reiterate on this in HIVE-28750 - queryState.setCommandType(HiveOperation.QUERY); - } + queryState.setCommandType(HiveOperation.QUERY); - // 3. analyze create view command + // 2. analyze create view command if (ast.getToken().getType() == HiveParser.TOK_CREATE_MATERIALIZED_VIEW) { child = analyzeCreateView(ast, qb, plannerCtx); if (child == null) { @@ -13058,6 +13025,12 @@ else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { return false; } + // 3. analysis and resolution of child tree + return analyzeAndResolveChildTree(child, plannerCtx); + } + + protected boolean analyzeAndResolveChildTree(ASTNode child, PlannerContext plannerCtx) + throws SemanticException { // masking and filtering should be created here // the basic idea is similar to unparseTranslator. tableMask = new TableMask(this, conf, ctx.isSkipTableMasking()); @@ -13066,7 +13039,7 @@ else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { // determine if authorization checks need to occur on the UDFs. gatherUserSuppliedFunctions(child); - // 4. continue analyzing from the child ASTNode. + // Continue analyzing from the child ASTNode Phase1Ctx ctx_1 = initPhase1Ctx(); if (!doPhase1(child, qb, ctx_1, plannerCtx)) { // if phase1Result false return @@ -13074,7 +13047,7 @@ else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { } LOG.info("Completed phase 1 of Semantic Analysis"); - // 5. Resolve Parse Tree + // Resolve Parse Tree and Get Metadata // Materialization is allowed if it is not a view definition getMetaData(qb, createVwDesc == null && !forViewCreation); LOG.info("Completed getting MetaData in Semantic Analysis"); @@ -13843,114 +13816,7 @@ private void validate(Task<?> task, boolean reworkMapredWork) } } - /** - * Update the default table properties with values fetch from the original table properties. The property names are - * defined in {@link SemanticAnalyzer#UPDATED_TBL_PROPS}. - * @param source properties of source table, must be not null. - * @param target properties of target table. - * @param skipped a list of properties which should be not overwritten. It can be null or empty. - */ - private void updateDefaultTblProps(Map<String, String> source, Map<String, String> target, List<String> skipped) { - if (source == null || target == null) { - return; - } - for (String property : UPDATED_TBL_PROPS) { - if ((skipped == null || !skipped.contains(property)) && source.containsKey(property)) { - target.put(property, source.get(property)); - } - } - } - - /** - * Add default properties for table property. If a default parameter exists - * in the tblProp, the value in tblProp will be kept. - * - * @param tblProp - * property map - * @return Modified table property map - */ - private Map<String, String> validateAndAddDefaultProperties( - Map<String, String> tblProp, boolean isExt, StorageFormat storageFormat, - String qualifiedTableName, List<Order> sortCols, boolean isMaterialization, - boolean isTemporaryTable, boolean isTransactional, boolean isManaged, String[] qualifiedTabName, boolean isTableTypeChanged) throws SemanticException { - Map<String, String> retValue = Optional.ofNullable(tblProp).orElseGet(HashMap::new); - - String paraString = HiveConf.getVar(conf, ConfVars.NEW_TABLE_DEFAULT_PARA); - if (paraString != null && !paraString.isEmpty()) { - for (String keyValuePair : paraString.split(",")) { - String[] keyValue = keyValuePair.split("=", 2); - if (keyValue.length != 2) { - continue; - } - if (!retValue.containsKey(keyValue[0])) { - retValue.put(keyValue[0], keyValue[1]); - } - } - } - if (!retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) - && retValue.containsKey(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES)) { - throw new SemanticException("Cannot specify " - + hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES - + " without " + hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); - } - isExt = isExternalTableChanged(retValue, isTransactional, isExt, isTableTypeChanged); - - if (isExt && HiveConf.getBoolVar(conf, ConfVars.HIVE_EXTERNALTABLE_PURGE_DEFAULT)) { - if (retValue.get(MetaStoreUtils.EXTERNAL_TABLE_PURGE) == null) { - retValue.put(MetaStoreUtils.EXTERNAL_TABLE_PURGE, "true"); - } - } - - boolean makeInsertOnly = !isTemporaryTable && HiveConf.getBoolVar( - conf, ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY); - boolean makeAcid = !isTemporaryTable && makeAcid(); - // if not specify managed table and create.table.as.external is true - // ignore makeInsertOnly and makeAcid. - if (!isManaged && HiveConf.getBoolVar(conf, ConfVars.CREATE_TABLE_AS_EXTERNAL)) { - makeInsertOnly = false; - makeAcid = false; - } - if ((makeInsertOnly || makeAcid || isTransactional || isManaged) - && !isExt && !isMaterialization && StringUtils.isBlank(storageFormat.getStorageHandler()) - //don't overwrite user choice if transactional attribute is explicitly set - && !retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL)) { - if (makeInsertOnly || isTransactional) { - retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); - retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, - TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); - } - if (makeAcid || isTransactional || (isManaged && !makeInsertOnly)) { - retValue = convertToAcidByDefault(storageFormat, qualifiedTableName, sortCols, retValue); - } - } - if (!isExt) { - addDbAndTabToOutputs(qualifiedTabName, - TableType.MANAGED_TABLE, isTemporaryTable, retValue, storageFormat); - } else { - addDbAndTabToOutputs(qualifiedTabName, - TableType.EXTERNAL_TABLE, isTemporaryTable, retValue, storageFormat); - } - - if (isIcebergTable(retValue)) { - SessionStateUtil.addResourceOrThrow(conf, SessionStateUtil.DEFAULT_TABLE_LOCATION, - getDefaultLocation(qualifiedTabName[0], qualifiedTabName[1], true)); - } - return retValue; - } - - /** - * This api is used to determine where to create acid tables are not. - * if the default table type is set to external, then create transactional table should result in acid tables, - * else create table should result in external table. - * */ - private boolean isExternalTableChanged (Map<String, String> tblProp, boolean isTransactional, boolean isExt, boolean isTableTypeChanged) { - if (isTableTypeChanged && tblProp != null && tblProp.getOrDefault(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false").equalsIgnoreCase("true") || isTransactional) { - isExt = false; - } - return isExt; - } - - private Map<String, String> convertToAcidByDefault( + protected Map<String, String> convertToAcidByDefault( StorageFormat storageFormat, String qualifiedTableName, List<Order> sortCols, Map<String, String> retValue) { /*for CTAS, TransactionalValidationListener.makeAcid() runs to late to make table Acid @@ -13980,563 +13846,7 @@ private Map<String, String> convertToAcidByDefault( return retValue; } - /** - * Checks to see if given partition columns has DEFAULT or CHECK constraints (whether ENABLED or DISABLED) - * Or has NOT NULL constraints (only ENABLED) - * @param partCols partition columns - * @param defConstraints default constraints - * @param notNullConstraints not null constraints - * @param checkConstraints CHECK constraints - * @return true or false - */ - private boolean hasConstraints(final List<FieldSchema> partCols, final List<SQLDefaultConstraint> defConstraints, - final List<SQLNotNullConstraint> notNullConstraints, - final List<SQLCheckConstraint> checkConstraints) { - for(FieldSchema partFS: partCols) { - for(SQLDefaultConstraint dc:defConstraints) { - if(dc.getColumn_name().equals(partFS.getName())) { - return true; - } - } - for(SQLCheckConstraint cc:checkConstraints) { - if(cc.getColumn_name().equals(partFS.getName())) { - return true; - } - } - for(SQLNotNullConstraint nc:notNullConstraints) { - if(nc.getColumn_name().equals(partFS.getName()) && nc.isEnable_cstr()) { - return true; - } - } - } - return false; - } - - /** - * Analyze the create table command. If it is a regular create-table or - * create-table-like statements, we create a DDLWork and return true. If it is - * a create-table-as-select, we get the necessary info such as the SerDe and - * Storage Format and put it in QB, and return false, indicating the rest of - * the semantic analyzer need to deal with the select statement with respect - * to the SerDe and Storage Format. - */ - ASTNode analyzeCreateTable( - ASTNode ast, QB qb, PlannerContext plannerCtx) throws SemanticException { - TableName qualifiedTabName = getQualifiedTableName((ASTNode) ast.getChild(0)); - final String dbDotTab = qualifiedTabName.getNotEmptyDbTable(); - - String likeTableName = null; - List<FieldSchema> cols = new ArrayList<FieldSchema>(); - List<FieldSchema> partCols = new ArrayList<FieldSchema>(); - List<String> partColNames = new ArrayList<>(); - List<String> bucketCols = new ArrayList<String>(); - List<SQLPrimaryKey> primaryKeys = new ArrayList<SQLPrimaryKey>(); - List<SQLForeignKey> foreignKeys = new ArrayList<SQLForeignKey>(); - List<SQLUniqueConstraint> uniqueConstraints = new ArrayList<>(); - List<SQLNotNullConstraint> notNullConstraints = new ArrayList<>(); - List<SQLDefaultConstraint> defaultConstraints= new ArrayList<>(); - List<SQLCheckConstraint> checkConstraints= new ArrayList<>(); - List<Order> sortCols = new ArrayList<Order>(); - int numBuckets = -1; - String comment = null; - String location = null; - Map<String, String> tblProps = null; - boolean ifNotExists = false; - boolean isExt = false; - boolean isTemporary = false; - boolean isManaged = false; - boolean isMaterialization = false; - boolean isTransactional = false; - ASTNode selectStmt = null; - final int CREATE_TABLE = 0; // regular CREATE TABLE - final int CTLT = 1; // CREATE TABLE LIKE ... (CTLT) - final int CTAS = 2; // CREATE TABLE AS SELECT ... (CTAS) - final int CTT = 3; // CREATE TRANSACTIONAL TABLE - final int CTLF = 4; // CREATE TABLE LIKE FILE - int command_type = CREATE_TABLE; - List<String> skewedColNames = new ArrayList<String>(); - List<List<String>> skewedValues = new ArrayList<List<String>>(); - Map<List<String>, String> listBucketColValuesMapping = new HashMap<List<String>, String>(); - boolean storedAsDirs = false; - boolean isUserStorageFormat = false; - boolean partitionTransformSpecExists = false; - String likeFile = null; - String likeFileFormat = null; - String sortOrder = null; - RowFormatParams rowFormatParams = new RowFormatParams(); - StorageFormat storageFormat = new StorageFormat(conf); - - LOG.info("Creating table " + dbDotTab + " position=" + ast.getCharPositionInLine()); - int numCh = ast.getChildCount(); - - // set storage handler if default handler is provided in config - String defaultStorageHandler = HiveConf.getVar(conf, HIVE_DEFAULT_STORAGE_HANDLER); - if (defaultStorageHandler != null && !defaultStorageHandler.isEmpty()) { - LOG.info("Default storage handler class detected in config. Using storage handler class if exists: '{}'", - defaultStorageHandler); - storageFormat.setStorageHandler(defaultStorageHandler); - isUserStorageFormat = true; - } - - // CREATE TABLE is a DDL and yet it's not handled by DDLSemanticAnalyzerFactory - // FIXME: HIVE-28724 - queryProperties.setQueryType(QueryProperties.QueryType.DDL); - - /* - * Check the 1st-level children and do simple semantic checks: 1) CTLT and - * CTAS should not coexists. 2) CTLT or CTAS should not coexists with column - * list (target table schema). 3) CTAS does not support partitioning (for - * now). - */ - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); - if (storageFormat.fillStorageFormat(child)) { - isUserStorageFormat = true; - continue; - } - switch (child.getToken().getType()) { - case HiveParser.TOK_IFNOTEXISTS: - ifNotExists = true; - break; - case HiveParser.KW_EXTERNAL: - isExt = true; - break; - case HiveParser.KW_MANAGED: - isManaged = true; - isTransactional = true; - break; - case HiveParser.KW_TEMPORARY: - isTemporary = true; - isMaterialization = MATERIALIZATION_MARKER.equals(child.getText()); - break; - case HiveParser.KW_TRANSACTIONAL: - isTransactional = true; - command_type = CTT; - break; - case HiveParser.TOK_LIKEFILE: - if (cols.size() != 0) { - throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE - .getMsg()); - } - likeFileFormat = getUnescapedName((ASTNode) child.getChild(0)); - likeFile = getUnescapedName((ASTNode) child.getChild(1)); - command_type = CTLF; - break; - case HiveParser.TOK_LIKETABLE: - if (child.getChildCount() > 0) { - likeTableName = getUnescapedName((ASTNode) child.getChild(0)); - if (likeTableName != null) { - if (command_type == CTAS) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE - .getMsg()); - } - if (cols.size() != 0) { - throw new SemanticException(ErrorMsg.CTLT_COLLST_COEXISTENCE - .getMsg()); - } - } - command_type = CTLT; - } - break; - - case HiveParser.TOK_QUERY: // CTAS - if (command_type == CTLT) { - throw new SemanticException(ErrorMsg.CTAS_CTLT_COEXISTENCE.getMsg()); - } - if (cols.size() != 0) { - throw new SemanticException(ErrorMsg.CTAS_COLLST_COEXISTENCE.getMsg()); - } - if (partCols.size() != 0 || bucketCols.size() != 0) { - boolean dynPart = HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMIC_PARTITIONING); - if (dynPart == false) { - throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); - } else { - // TODO: support dynamic partition for CTAS - throw new SemanticException(ErrorMsg.CTAS_PARCOL_COEXISTENCE.getMsg()); - } - } - if (!conf.getBoolVar(ConfVars.HIVE_CTAS_EXTERNAL_TABLES) && isExt) { - throw new SemanticException(ErrorMsg.CTAS_EXTTBL_COEXISTENCE.getMsg()); - } - command_type = CTAS; - if (plannerCtx != null) { - plannerCtx.setCTASToken(child); - } - selectStmt = child; - break; - case HiveParser.TOK_TABCOLLIST: - cols = getColumns(child, true, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, - uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints, conf); - break; - case HiveParser.TOK_TABLECOMMENT: - comment = unescapeSQLString(child.getChild(0).getText()); - break; - case HiveParser.TOK_TABLEPARTCOLS: - partCols = getColumns(child, false, ctx.getTokenRewriteStream(), primaryKeys, foreignKeys, - uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints, conf); - if(hasConstraints(partCols, defaultConstraints, notNullConstraints, checkConstraints)) { - //TODO: these constraints should be supported for partition columns - throw new SemanticException( - ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("NOT NULL,DEFAULT and CHECK Constraints are not allowed with " + - "partition columns. ")); - } - break; - case HiveParser.TOK_TABLEPARTCOLSBYSPEC: - SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, - PartitionTransform.getPartitionTransformSpec(child)); - partitionTransformSpecExists = true; - break; - case HiveParser.TOK_TABLEPARTCOLNAMES: - partColNames = getColumnNames(child); - break; - case HiveParser.TOK_ALTERTABLE_BUCKETS: - bucketCols = getColumnNames((ASTNode) child.getChild(0)); - if (child.getChildCount() == 2) { - numBuckets = Integer.parseInt(child.getChild(1).getText()); - } else { - sortCols = getColumnNamesOrder((ASTNode) child.getChild(1)); - numBuckets = Integer.parseInt(child.getChild(2).getText()); - } - break; - case HiveParser.TOK_WRITE_LOCALLY_ORDERED: - sortOrder = getSortOrderJson((ASTNode) child.getChild(0)); - break; - case HiveParser.TOK_TABLEROWFORMAT: - rowFormatParams.analyzeRowFormat(child); - break; - case HiveParser.TOK_TABLELOCATION: - location = unescapeSQLString(child.getChild(0).getText()); - location = EximUtil.relativeToAbsolutePath(conf, location); - inputs.add(toReadEntity(location)); - break; - case HiveParser.TOK_TABLEPROPERTIES: - tblProps = getProps((ASTNode) child.getChild(0)); - addPropertyReadEntry(tblProps, inputs); - break; - case HiveParser.TOK_TABLESERIALIZER: - child = (ASTNode) child.getChild(0); - storageFormat.setSerde(unescapeSQLString(child.getChild(0).getText())); - if (child.getChildCount() == 2) { - readProps((ASTNode) (child.getChild(1).getChild(0)), - storageFormat.getSerdeProps()); - } - break; - case HiveParser.TOK_TABLESKEWED: - /** - * Throw an error if the user tries to use the DDL with - * hive.internal.ddl.list.bucketing.enable set to false. - */ - HiveConf hiveConf = SessionState.get().getConf(); - - // skewed column names - skewedColNames = SkewedTableUtils.analyzeSkewedTableDDLColNames(child); - // skewed value - skewedValues = SkewedTableUtils.analyzeDDLSkewedValues(child); - // stored as directories - storedAsDirs = analyzeStoredAdDirs(child); - - break; - default: - throw new AssertionError("Unknown token: " + child.getToken()); - } - } - - validateStorageFormat(storageFormat, tblProps, partitionTransformSpecExists); - - if (command_type == CREATE_TABLE || command_type == CTLT || command_type == CTT || command_type == CTLF) { - queryState.setCommandType(HiveOperation.CREATETABLE); - } else if (command_type == CTAS) { - queryState.setCommandType(HiveOperation.CREATETABLE_AS_SELECT); - } else { - throw new SemanticException("Unrecognized command."); - } - - if (isExt && ConstraintsUtils.hasEnabledOrValidatedConstraints(notNullConstraints, defaultConstraints, - checkConstraints)) { - throw new SemanticException( - ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Constraints are disallowed with External tables. " - + "Only RELY is allowed.")); - } - if (checkConstraints != null && !checkConstraints.isEmpty()) { - ConstraintsUtils.validateCheckConstraint(cols, checkConstraints, ctx.getConf()); - } - - storageFormat.fillDefaultStorageFormat(isExt, false); - - // check for existence of table - if (ifNotExists) { - try { - Table table = getTable(qualifiedTabName, false); - if (table != null) { // table exists - return null; - } - } catch (HiveException e) { - // should not occur since second parameter to getTableWithQN is false - throw new IllegalStateException("Unexpected Exception thrown: " + e.getMessage(), e); - } - } - - if (isTemporary) { - if (location == null) { - // for temporary tables we set the location to something in the session's scratch dir - // it has the same life cycle as the tmp table - try { - // Generate a unique ID for temp table path. - // This path will be fixed for the life of the temp table. - location = SessionState.generateTempTableLocation(conf); - } catch (MetaException err) { - throw new SemanticException("Error while generating temp table path:", err); - } - } - } - - // Handle different types of CREATE TABLE command - // Note: each branch must call addDbAndTabToOutputs after finalizing table properties. - Database database = getDatabase(qualifiedTabName.getDb()); - boolean isDefaultTableTypeChanged = false; - if(database.getParameters() != null) { - String defaultTableType = database.getParameters().getOrDefault(DEFAULT_TABLE_TYPE, null); - if (defaultTableType != null && defaultTableType.equalsIgnoreCase("external")) { - isExt = true; - isDefaultTableTypeChanged = true; - } else if (defaultTableType != null && defaultTableType.equalsIgnoreCase("acid")) { - isDefaultTableTypeChanged = true; - if (isExt) { // create external table on db with default type as acid - isTransactional = false; - } else { - isTransactional = true; - } - } - } - switch (command_type) { - case CTLF: - try { - if (!SchemaInferenceUtils.doesSupportSchemaInference(conf, likeFileFormat)) { - throw new SemanticException(ErrorMsg.CTLF_UNSUPPORTED_FORMAT.getErrorCodedMsg(likeFileFormat)); - } - } catch (HiveException e) { - throw new SemanticException(e.getMessage(), e); - } - // fall through - case CREATE_TABLE: // REGULAR CREATE TABLE DDL - if (!CollectionUtils.isEmpty(partColNames)) { - throw new SemanticException( - "Partition columns can only declared using their name and types in regular CREATE TABLE statements"); - } - tblProps = validateAndAddDefaultProperties( - tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, - isTransactional, isManaged, new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); - isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); - addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, - TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); - if (!Strings.isNullOrEmpty(sortOrder)) { - tblProps.put("default-sort-order", sortOrder); - } - CreateTableDesc crtTblDesc = new CreateTableDesc(qualifiedTabName, - isExt, isTemporary, cols, partCols, - bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, - rowFormatParams.fieldEscape, - rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, - comment, - storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, storageFormat.getSerde(), - storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, - skewedValues, primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, - checkConstraints); - crtTblDesc.setStoredAsSubDirectories(storedAsDirs); - crtTblDesc.setNullFormat(rowFormatParams.nullFormat); - crtTblDesc.setLikeFile(likeFile); - crtTblDesc.setLikeFileFormat(likeFileFormat); - - crtTblDesc.validate(conf); - // outputs is empty, which means this create table happens in the current - // database. - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblDesc))); - String tblLocation = null; - if (location != null) { - tblLocation = location; - } else { - tblLocation = getDefaultLocation(qualifiedTabName.getDb(), qualifiedTabName.getTable(), isExt); - } - try { - HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(conf, storageFormat.getStorageHandler()); - if (storageHandler != null) { - storageHandler.addResourcesForCreateTable(tblProps, conf); - } - } catch (HiveException e) { - throw new RuntimeException(e); - } - SessionStateUtil.addResourceOrThrow(conf, META_TABLE_LOCATION, tblLocation); - break; - case CTT: // CREATE TRANSACTIONAL TABLE - if (isExt && !isDefaultTableTypeChanged) { - throw new SemanticException( - qualifiedTabName.getTable() + " cannot be declared transactional because it's an external table"); - } - tblProps = validateAndAddDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, - isTemporary, isTransactional, isManaged, new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); - isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); - addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, - TableType.MANAGED_TABLE, false, tblProps, storageFormat); - - CreateTableDesc crtTranTblDesc = - new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, - rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, - rowFormatParams.mapKeyDelim, rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), - storageFormat.getOutputFormat(), location, storageFormat.getSerde(), storageFormat.getStorageHandler(), - storageFormat.getSerdeProps(), tblProps, ifNotExists, skewedColNames, skewedValues, primaryKeys, - foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints); - crtTranTblDesc.setStoredAsSubDirectories(storedAsDirs); - crtTranTblDesc.setNullFormat(rowFormatParams.nullFormat); - - crtTranTblDesc.validate(conf); - // outputs is empty, which means this create table happens in the current - // database. - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTranTblDesc))); - break; - - case CTLT: // create table like <tbl_name> - - tblProps = validateAndAddDefaultProperties( - tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, - - isTransactional, isManaged, new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); - tblProps.put(hive_metastoreConstants.TABLE_IS_CTLT, "true"); - isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); - addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, - TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); - - Table likeTable = getTable(likeTableName, false); - if (likeTable != null) { - if (isTemporary || isExt || isIcebergTable(tblProps)) { - updateDefaultTblProps(likeTable.getParameters(), tblProps, - new ArrayList<>(Arrays.asList(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, - hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES))); - } else { - updateDefaultTblProps(likeTable.getParameters(), tblProps, null); - } - } - if (likeTable.getTableType() == TableType.EXTERNAL_TABLE && - HiveConf.getBoolVar(conf, ConfVars.CREATE_TABLE_AS_EXTERNAL)) { - isExt = true; - } - CreateTableLikeDesc crtTblLikeDesc = new CreateTableLikeDesc(dbDotTab, isExt, isTemporary, - storageFormat.getInputFormat(), storageFormat.getOutputFormat(), location, - storageFormat.getSerde(), storageFormat.getSerdeProps(), tblProps, ifNotExists, - likeTableName, isUserStorageFormat); - tblLocation = getDefaultLocation(qualifiedTabName.getDb(), qualifiedTabName.getTable(), isExt); - SessionStateUtil.addResource(conf, META_TABLE_LOCATION, tblLocation); - rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), crtTblLikeDesc))); - break; - - case CTAS: // create table as select - - if (isTemporary) { - if (!ctx.isExplainSkipExecution() && !isMaterialization) { - SessionState ss = SessionState.get(); - if (ss == null) { - throw new SemanticException("No current SessionState, cannot create temporary table " - + qualifiedTabName.getNotEmptyDbTable()); - } - Map<String, Table> tables = SessionHiveMetaStoreClient. - getTempTablesForDatabase(qualifiedTabName.getDb(), qualifiedTabName.getTable()); - if (tables != null && tables.containsKey(qualifiedTabName.getTable())) { - throw new SemanticException("Temporary table " + qualifiedTabName.getNotEmptyDbTable() - + " already exists"); - } - } - } else { - // Verify that the table does not already exist - // dumpTable is only used to check the conflict for non-temporary tables - try { - Table dumpTable = db.newTable(dbDotTab); - if (null != db.getTable(dumpTable.getDbName(), dumpTable.getTableName(), false) && !ctx.isExplainSkipExecution()) { - throw new SemanticException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(dbDotTab)); - } - } catch (HiveException e) { - throw new SemanticException(e); - } - } - - if (location != null && location.length() != 0) { - Path locPath = new Path(location); - FileSystem curFs = null; - FileStatus locStats = null; - try { - curFs = locPath.getFileSystem(conf); - if(curFs != null) { - locStats = curFs.getFileStatus(locPath); - } - if (locStats != null && locStats.isDir()) { - FileStatus[] lStats = curFs.listStatus(locPath); - if(lStats != null && lStats.length != 0) { - // Don't throw an exception if the target location only contains the staging-dirs - for (FileStatus lStat : lStats) { - if (!lStat.getPath().getName().startsWith(HiveConf.getVar(conf, HiveConf.ConfVars.STAGING_DIR))) { - throw new SemanticException(ErrorMsg.CTAS_LOCATION_NONEMPTY.getMsg(location)); - } - } - } - } - } catch (FileNotFoundException nfe) { - //we will create the folder if it does not exist. - } catch (IOException ioE) { - LOG.debug("Exception when validate folder", ioE); - } - tblLocation = location; - } else { - tblLocation = getDefaultLocation(qualifiedTabName.getDb(), qualifiedTabName.getTable(), isExt); - } - SessionStateUtil.addResource(conf, META_TABLE_LOCATION, tblLocation); - if (!CollectionUtils.isEmpty(partCols)) { - throw new SemanticException( - "Partition columns can only declared using their names in CTAS statements"); - } - - tblProps = validateAndAddDefaultProperties( - tblProps, isExt, storageFormat, dbDotTab, sortCols, isMaterialization, isTemporary, - isTransactional, isManaged, new String[]{qualifiedTabName.getDb(), qualifiedTabName.getTable()}, isDefaultTableTypeChanged); - isExt = isExternalTableChanged(tblProps, isTransactional, isExt, isDefaultTableTypeChanged); - tblProps.put(TABLE_IS_CTAS, "true"); - addDbAndTabToOutputs(new String[] {qualifiedTabName.getDb(), qualifiedTabName.getTable()}, - TableType.MANAGED_TABLE, isTemporary, tblProps, storageFormat); - tableDesc = new CreateTableDesc(qualifiedTabName, isExt, isTemporary, cols, - partColNames, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, - rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, - rowFormatParams.lineDelim, comment, storageFormat.getInputFormat(), - storageFormat.getOutputFormat(), location, storageFormat.getSerde(), - storageFormat.getStorageHandler(), storageFormat.getSerdeProps(), tblProps, ifNotExists, - skewedColNames, skewedValues, true, primaryKeys, foreignKeys, - uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints); - tableDesc.setMaterialization(isMaterialization); - tableDesc.setStoredAsSubDirectories(storedAsDirs); - tableDesc.setNullFormat(rowFormatParams.nullFormat); - qb.setTableDesc(tableDesc); - - return selectStmt; - - default: - throw new SemanticException("Unrecognized command."); - } - return null; - } - - private String getDefaultLocation(String dbName, String tableName, boolean isExt) throws SemanticException { - String tblLocation; - try { - Warehouse wh = new Warehouse(conf); - tblLocation = wh.getDefaultTablePath(db.getDatabase(dbName), tableName, isExt).toUri().getPath(); - } catch (MetaException | HiveException e) { - throw new SemanticException(e); - } - return tblLocation; - } - - private static boolean isIcebergTable(Map<String, String> tblProps) { - return AlterTableConvertOperation.ConversionFormats.ICEBERG.properties().get(META_TABLE_STORAGE) - .equalsIgnoreCase(tblProps.get(META_TABLE_STORAGE)); - } - - private void validateStorageFormat( + protected void validateStorageFormat( StorageFormat storageFormat, Map<String, String> tblProps, boolean partitionTransformSpecExists) throws SemanticException { HiveStorageHandler handler; @@ -14568,7 +13878,7 @@ private void validateStorageFormat( } /** Adds entities for create table/create view. */ - private void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type, + protected void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type, boolean isTemporary, Map<String, String> tblProps, StorageFormat storageFormat) throws SemanticException { Database database = getDatabase(qualifiedTabName[0]); outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_SHARED)); @@ -14759,7 +14069,7 @@ protected ASTNode analyzeCreateView(ASTNode ast, QB qb, PlannerContext plannerCt return selectStmt; } - private boolean makeAcid() { + protected boolean makeAcid() { return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID) && HiveConf.getBoolVar(conf, ConfVars.HIVE_SUPPORT_CONCURRENCY) && DbTxnManager.class.getCanonicalName().equals(HiveConf.getVar(conf, ConfVars.HIVE_TXN_MANAGER)); @@ -16172,26 +15482,7 @@ protected void addPartitionColsToInsert(List<FieldSchema> partCols, rewrittenQueryStr.append(")"); } } - - private String getSortOrderJson(ASTNode ast) { - List<SortFieldDesc> sortFieldDescList = new ArrayList<>(); - SortFields sortFields = new SortFields(sortFieldDescList); - for (int i = 0; i < ast.getChildCount(); i++) { - ASTNode child = (ASTNode) ast.getChild(i); - SortFieldDesc.SortDirection sortDirection = child.getToken() - .getType() == HiveParser.TOK_TABSORTCOLNAMEDESC ? SortFieldDesc.SortDirection.DESC : SortFieldDesc.SortDirection.ASC; - child = (ASTNode) child.getChild(0); - String name = unescapeIdentifier(child.getChild(0).getText()).toLowerCase(); - NullOrdering nullOrder = NullOrdering.fromToken(child.getToken().getType()); - sortFieldDescList.add(new SortFieldDesc(name, sortDirection, nullOrder)); - } - try { - return JSON_OBJECT_MAPPER.writer().writeValueAsString(sortFields); - } catch (JsonProcessingException e) { - LOG.warn("Can not create write order json. ", e); - return null; - } - } + @Override public WriteEntity getAcidAnalyzeTable() { return acidAnalyzeTable;