zhangbutao commented on code in PR #4700: URL: https://github.com/apache/hive/pull/4700#discussion_r1331780361
########## ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java: ########## @@ -177,39 +169,41 @@ private void reparseAndSuperAnalyze(ASTNode tree, Table mTable, ASTNode tabNameN ASTNode where = null; int whereIndex = deleting() ? 1 : 2; + if (children.size() > whereIndex) { where = (ASTNode)children.get(whereIndex); assert where.getToken().getType() == HiveParser.TOK_WHERE : "Expected where clause, but found " + where.getName(); + + if (copyOnWriteMode) { + String whereClause = ctx.getTokenRewriteStream().toString( + where.getChild(0).getTokenStartIndex(), where.getChild(0).getTokenStopIndex()); + + rewrittenQueryStr.append(" where "); + // Add the inverted where clause, since we want to hold the records which doesn't satisfy the condition. + rewrittenQueryStr.append(" not(").append(whereClause).append(")"); + // Add the file path filter that matches the delete condition. + rewrittenQueryStr.append(" and FILE__PATH in ("); + rewrittenQueryStr.append(" select `FILE__PATH` from ").append(getFullTableNameForSQL(tabNameNode)); + rewrittenQueryStr.append(" where ").append(whereClause); + rewrittenQueryStr.append(" )"); - if (shouldOverwrite) { - if (where.getChildCount() == 1) { - - // Add isNull check for the where clause condition, since null is treated as false in where condition and - // not null also resolves to false, so we need to explicitly handle this case. - ASTNode isNullFuncNodeExpr = new ASTNode(new CommonToken(HiveParser.TOK_FUNCTION, "TOK_FUNCTION")); - isNullFuncNodeExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, "isNull"))); - isNullFuncNodeExpr.addChild(where.getChild(0)); - - ASTNode orNodeExpr = new ASTNode(new CommonToken(HiveParser.KW_OR, "OR")); - orNodeExpr.addChild(isNullFuncNodeExpr); - - // Add the inverted where clause condition, since we want to hold the records which doesn't satisfy this - // condition. - ASTNode notNodeExpr = new ASTNode(new CommonToken(HiveParser.KW_NOT, "!")); - notNodeExpr.addChild(where.getChild(0)); - orNodeExpr.addChild(notNodeExpr); - where.setChild(0, orNodeExpr); - } else if (where.getChildCount() > 1) { - throw new SemanticException("Overwrite mode not supported with more than 1 children in where clause."); - } + rewrittenQueryStr.append(" union all "); Review Comment: If I just delete a column value which does't exist in the data, can we skip executing the redundant `union` & `reduce ` tasks? This can avoid to launch many unnecessary tasks and finish the `delete `job as soon as possible. e.g. : ` create table icetbl (id int, name string) stored by Iceberg stored as orc TBLPROPERTIES('format-version'='2','write.delete.mode'='copy-on-write');` `insert into icetbl values (1, 'ABC'),(2, 'CBS');` Here delete a column that does not exist in the table: `delete from icetbl where id >100;` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org