This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 2197fbca081 HIVE-28595: Not able to insert data with hive.cbo.enable=false (#5651) 2197fbca081 is described below commit 2197fbca081938c1434d8413381e5cd44c8e5b49 Author: Kiran Velumuri <kiranvelumuri...@gmail.com> AuthorDate: Sat Jun 21 15:03:39 2025 +0530 HIVE-28595: Not able to insert data with hive.cbo.enable=false (#5651) --- .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 91 ++++++++++++-- .../test/queries/clientpositive/insert_cbo_false.q | 26 ++++ .../clientpositive/llap/insert_cbo_false.q.out | 133 +++++++++++++++++++++ 4 files changed, 239 insertions(+), 12 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2f56c408fbe..c2a46d01f75 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -97,6 +97,7 @@ minillap.query.files=\ input36.q,\ input38.q,\ input5.q,\ + insert_cbo_false.q,\ insert_dynamic_partitions_notification_log.q,\ insert_into1.q,\ insert_into2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6136d49c7f4..289224df0c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -315,7 +315,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaConstantStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -4976,12 +4978,51 @@ && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) { pos++; } } + Operator output = generateSelectOperator(dest, selExprList, qb, input, exprList, out_rwsch, colList, selectStar, posn); + if (isInTransform) { + output = genScriptPlan(trfm, qb, output); + } + + if (isUDTF) { + output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV); + + if(!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) + && qb.getParseInfo().getDestSchemaForClause(dest) != null) { + List<ExprNodeDesc> expColList = explodeColListForUDTF(colList); + output = generateSelectOperator(dest, selExprList, qb, output, exprList, opParseCtx.get(output).getRowResolver(), expColList, selectStar, posn); + } + } + + LOG.debug("Created Select Plan row schema: {}", out_rwsch); + return output; + } + + /** + * Generates a Select operator for the given query block and destination. + * This method processes the SELECT clause of the query, creating the necessary + * operator to handle the selection of columns or expressions. + * + * @param dest The destination clause identifier. + * @param selExprList The ASTNode representing the SELECT expression list. + * @param qb The query block containing metadata and context for the query. + * @param input The input operator to which the Select operator will be connected. + * @param exprList The ASTNode representing the list of expressions in the SELECT clause. + * @param out_rwsch The RowResolver for the output schema of the Select operator. + * @param colList The list of column expressions to be included in the Select operator. + * @param selectStar A boolean indicating whether the SELECT clause includes a wildcard (*). + * @param posn The position of the current expression in the SELECT clause. + * @return The generated Select operator. + * @throws SemanticException If there is an error during the generation of the operator. + */ + private Operator<?> generateSelectOperator(String dest, ASTNode selExprList, QB qb, Operator<?> input, + ASTNode exprList, RowResolver out_rwsch, List<ExprNodeDesc> colList, + boolean selectStar, int posn) throws SemanticException { selectStar = selectStar && exprList.getChildCount() == posn + 1; out_rwsch = handleInsertStatementSpec(colList, dest, out_rwsch, qb, selExprList); - List<String> columnNames = new ArrayList<String>(); - Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); + List<String> columnNames = new ArrayList<>(); + Map<String, ExprNodeDesc> colExprMap = new HashMap<>(); for (int i = 0; i < colList.size(); i++) { String outputCol = getColumnInternalName(i); colExprMap.put(outputCol, colList.get(i)); @@ -4989,19 +5030,11 @@ && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) { } Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(colList, columnNames, selectStar), new RowSchema( - out_rwsch.getColumnInfos()), input), out_rwsch); + new SelectDesc(colList, columnNames, selectStar), new RowSchema( + out_rwsch.getColumnInfos()), input), out_rwsch); output.setColumnExprMap(colExprMap); - if (isInTransform) { - output = genScriptPlan(trfm, qb, output); - } - - if (isUDTF) { - output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb, output, outerLV); - } - LOG.debug("Created Select Plan row schema: {}", out_rwsch); return output; } @@ -5058,6 +5091,35 @@ private RowResolver getColForInsertStmtSpec(Map<String, ExprNodeDesc> targetCol2 return newOutputRR; } + /** + * Explodes a list of columns represented by a complex datatype for a User-Defined Table Function (UDTF). + * This method takes a list of column descriptors and explodes it into individual + * columns based on the structure of the list's element type. + * + * @param colList A list of column descriptors representing multiple columns + * with a list type like array<struct<col1:string,col2:int>> + * @return A list of exploded column descriptors, where each column corresponds + * to a field in the struct type of the list's element. + */ + List<ExprNodeDesc> explodeColListForUDTF(List<ExprNodeDesc> colList) { + List<ExprNodeDesc> expColList = new ArrayList<>(); + + ListTypeInfo typeInfo = (ListTypeInfo) colList.get(0).getTypeInfo(); + StructTypeInfo elementTypeInfo = (StructTypeInfo) typeInfo.getListElementTypeInfo(); + + List<String> fieldNames = elementTypeInfo.getAllStructFieldNames(); + List<TypeInfo> typeInfos = elementTypeInfo.getAllStructFieldTypeInfos(); + + for (int i = 0; i < fieldNames.size(); i++) { + ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(); + colDesc.setColumn(fieldNames.get(i)); + colDesc.setTypeInfo(typeInfos.get(i)); + expColList.add(colDesc); + } + + return expColList; + } + /** * This modifies the Select projections when the Select is part of an insert statement and * the insert statement specifies a column list for the target table, e.g. @@ -5084,6 +5146,11 @@ RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, return outputRR; } if(targetTableSchema.size() != col_list.size()) { + if(!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) + && col_list.get(0).getTypeInfo() instanceof ListTypeInfo + && targetTableSchema.size() == explodeColListForUDTF(col_list).size()){ + return outputRR; + } Table target = qb.getMetaData().getDestTableForAlias(dest); Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; throw new SemanticException(generateErrorMessage(selExprList, diff --git a/ql/src/test/queries/clientpositive/insert_cbo_false.q b/ql/src/test/queries/clientpositive/insert_cbo_false.q new file mode 100644 index 00000000000..3bb9bd0cb44 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_cbo_false.q @@ -0,0 +1,26 @@ +set hive.cbo.enable=false; + +CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3,2)); + +INSERT INTO TABLE students VALUES ('s1', 1, 1.23); + +INSERT INTO TABLE students(name, age, gpa) VALUES('s2', 2, 2.34); + +SELECT * FROM students; + +-- Partitioned table +CREATE TABLE people (id int, name string, age int) PARTITIONED BY (country string); + +INSERT INTO people PARTITION (country="Cuba") VALUES (1, "abc", 23); + +INSERT INTO people PARTITION (country="Denmark") (id, name, age) VALUES (2, "bcd", 34); + +INSERT INTO people PARTITION (country) VALUES (3, "cde", 45, "Egypt"); + +INSERT INTO people PARTITION (country) (id, name, age, country) VALUES (4, "def", 56, "Finland"); + +INSERT INTO people VALUES (5, "efg", 67, "Ghana"); + +INSERT INTO people (id, name, age, country) VALUES (6, "fgh", 78, "Hungary"); + +SELECT * FROM people; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/insert_cbo_false.q.out b/ql/src/test/results/clientpositive/llap/insert_cbo_false.q.out new file mode 100644 index 00000000000..9099d1f4f54 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/insert_cbo_false.q.out @@ -0,0 +1,133 @@ +PREHOOK: query: CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@students +POSTHOOK: query: CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@students +PREHOOK: query: INSERT INTO TABLE students VALUES ('s1', 1, 1.23) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@students +POSTHOOK: query: INSERT INTO TABLE students VALUES ('s1', 1, 1.23) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@students +POSTHOOK: Lineage: students.age SCRIPT [] +POSTHOOK: Lineage: students.gpa SCRIPT [] +POSTHOOK: Lineage: students.name SCRIPT [] +PREHOOK: query: INSERT INTO TABLE students(name, age, gpa) VALUES('s2', 2, 2.34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@students +POSTHOOK: query: INSERT INTO TABLE students(name, age, gpa) VALUES('s2', 2, 2.34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@students +POSTHOOK: Lineage: students.name EXPRESSION [] +PREHOOK: query: SELECT * FROM students +PREHOOK: type: QUERY +PREHOOK: Input: default@students +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM students +POSTHOOK: type: QUERY +POSTHOOK: Input: default@students +POSTHOOK: Output: hdfs://### HDFS PATH ### +s1 1 1.23 +s2 2 2.34 +PREHOOK: query: CREATE TABLE people (id int, name string, age int) PARTITIONED BY (country string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@people +POSTHOOK: query: CREATE TABLE people (id int, name string, age int) PARTITIONED BY (country string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@people +PREHOOK: query: INSERT INTO people PARTITION (country="Cuba") VALUES (1, "abc", 23) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@people@country=Cuba +POSTHOOK: query: INSERT INTO people PARTITION (country="Cuba") VALUES (1, "abc", 23) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@people@country=Cuba +POSTHOOK: Lineage: people PARTITION(country=Cuba).age SCRIPT [] +POSTHOOK: Lineage: people PARTITION(country=Cuba).id SCRIPT [] +POSTHOOK: Lineage: people PARTITION(country=Cuba).name SCRIPT [] +PREHOOK: query: INSERT INTO people PARTITION (country="Denmark") (id, name, age) VALUES (2, "bcd", 34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@people@country=Denmark +POSTHOOK: query: INSERT INTO people PARTITION (country="Denmark") (id, name, age) VALUES (2, "bcd", 34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@people@country=Denmark +PREHOOK: query: INSERT INTO people PARTITION (country) VALUES (3, "cde", 45, "Egypt") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@people +POSTHOOK: query: INSERT INTO people PARTITION (country) VALUES (3, "cde", 45, "Egypt") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@people +POSTHOOK: Output: default@people@country=Egypt +POSTHOOK: Lineage: people PARTITION(country=Egypt).age SCRIPT [] +POSTHOOK: Lineage: people PARTITION(country=Egypt).id SCRIPT [] +POSTHOOK: Lineage: people PARTITION(country=Egypt).name SCRIPT [] +PREHOOK: query: INSERT INTO people PARTITION (country) (id, name, age, country) VALUES (4, "def", 56, "Finland") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@people +POSTHOOK: query: INSERT INTO people PARTITION (country) (id, name, age, country) VALUES (4, "def", 56, "Finland") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@people +POSTHOOK: Output: default@people@country=Finland +PREHOOK: query: INSERT INTO people VALUES (5, "efg", 67, "Ghana") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@people +POSTHOOK: query: INSERT INTO people VALUES (5, "efg", 67, "Ghana") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@people +POSTHOOK: Output: default@people@country=Ghana +POSTHOOK: Lineage: people PARTITION(country=Ghana).age SCRIPT [] +POSTHOOK: Lineage: people PARTITION(country=Ghana).id SCRIPT [] +POSTHOOK: Lineage: people PARTITION(country=Ghana).name SCRIPT [] +PREHOOK: query: INSERT INTO people (id, name, age, country) VALUES (6, "fgh", 78, "Hungary") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@people +POSTHOOK: query: INSERT INTO people (id, name, age, country) VALUES (6, "fgh", 78, "Hungary") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@people +POSTHOOK: Output: default@people@country=Hungary +PREHOOK: query: SELECT * FROM people +PREHOOK: type: QUERY +PREHOOK: Input: default@people +PREHOOK: Input: default@people@country=Cuba +PREHOOK: Input: default@people@country=Denmark +PREHOOK: Input: default@people@country=Egypt +PREHOOK: Input: default@people@country=Finland +PREHOOK: Input: default@people@country=Ghana +PREHOOK: Input: default@people@country=Hungary +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * FROM people +POSTHOOK: type: QUERY +POSTHOOK: Input: default@people +POSTHOOK: Input: default@people@country=Cuba +POSTHOOK: Input: default@people@country=Denmark +POSTHOOK: Input: default@people@country=Egypt +POSTHOOK: Input: default@people@country=Finland +POSTHOOK: Input: default@people@country=Ghana +POSTHOOK: Input: default@people@country=Hungary +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 abc 23 Cuba +2 bcd 34 Denmark +3 cde 45 Egypt +4 def 56 Finland +5 efg 67 Ghana +6 fgh 78 Hungary