[
https://issues.apache.org/jira/browse/HIVE-21160?focusedWorklogId=777470&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-777470
]
ASF GitHub Bot logged work on HIVE-21160:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 02/Jun/22 12:15
Start Date: 02/Jun/22 12:15
Worklog Time Spent: 10m
Work Description: kgyrtkirk commented on code in PR #2855:
URL: https://github.com/apache/hive/pull/2855#discussion_r887876292
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java:
##########
@@ -8292,8 +8294,8 @@ private WriteEntity generateTableWriteEntity(String dest,
Table dest_tab,
if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) {
output = new WriteEntity(dest_tab, determineWriteType(ltd, dest));
if (!outputs.add(output)) {
- if(!((this instanceof MergeSemanticAnalyzer) &&
- conf.getBoolVar(ConfVars.MERGE_SPLIT_UPDATE))) {
+ if(!((this instanceof MergeSemanticAnalyzer || this instanceof
UpdateDeleteSemanticAnalyzer) &&
Review Comment:
I believe this could be more readable if the condition would be placed into
an instance method; which could be overriden by `MergeSemanticAnalyzer` /
`UpdateDeleteSemanticAnalyzer`
##########
itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java:
##########
@@ -251,6 +252,44 @@ public void testExceucteUpdateCounts() throws Exception {
assertEquals("1 row PreparedStatement update", 1, count);
}
+ @Test
+ public void testExceucteMergeCounts() throws Exception {
+ testExceucteMergeCounts(true);
+ }
+
+ @Test
+ public void testExceucteMergeCountsNoSplitUpdate() throws Exception {
+ testExceucteMergeCounts(false);
+ }
+
+ private void testExceucteMergeCounts(boolean splitUpdateEarly) throws
Exception {
+
+ Statement stmt = con.createStatement();
+ stmt.execute("set " + ConfVars.MERGE_SPLIT_UPDATE.varname + "=" +
splitUpdateEarly);
+ stmt.execute("set " + ConfVars.HIVE_SUPPORT_CONCURRENCY.varname + "=true");
Review Comment:
isn't `MERGE_SPLIT_UPDATE` is the old option's name; meanwhile the one is
named `SPLIT_UPDATE` ?
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java:
##########
@@ -91,23 +110,15 @@ private void analyzeDelete(ASTNode tree) throws
SemanticException {
* The sort by clause is put in there so that records come out in the right
order to enable
* merge on read.
*/
- private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException {
+ private void reparseAndSuperAnalyze(ASTNode tree, ASTNode tabNameNode, Table
mTable) throws SemanticException {
List<? extends Node> children = tree.getChildren();
- // The first child should be the table we are updating / deleting from
- ASTNode tabName = (ASTNode)children.get(0);
- assert tabName.getToken().getType() == HiveParser.TOK_TABNAME :
- "Expected tablename as first child of " + operation + " but found " +
tabName.getName();
- Table mTable = getTargetTable(tabName);
- validateTxnManager(mTable);
- validateTargetTable(mTable);
-
// save the operation type into the query state
SessionStateUtil.addResource(conf,
Context.Operation.class.getSimpleName(), operation.name());
StringBuilder rewrittenQueryStr = new StringBuilder();
rewrittenQueryStr.append("insert into table ");
- rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
+ rewrittenQueryStr.append(getFullTableNameForSQL(tabNameNode));
Review Comment:
it seems like `tabNameNode` is only used as an argument of this method...
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java:
##########
@@ -48,28 +52,43 @@ public class UpdateDeleteSemanticAnalyzer extends
RewriteSemanticAnalyzer {
super(queryState);
}
- protected void analyze(ASTNode tree) throws SemanticException {
+ @Override
+ protected ASTNode getTargetTableNode(ASTNode tree) {
+ // The first child should be the table we are updating / deleting from
+ ASTNode tabName = (ASTNode)tree.getChild(0);
+ assert tabName.getToken().getType() == HiveParser.TOK_TABNAME :
+ "Expected tablename as first child of " + operation + " but found
" + tabName.getName();
+ return tabName;
+ }
+
+ protected void analyze(ASTNode tree, Table table, ASTNode tabNameNode)
throws SemanticException {
switch (tree.getToken().getType()) {
case HiveParser.TOK_DELETE_FROM:
- analyzeDelete(tree);
+ analyzeDelete(tree, tabNameNode, table);
break;
case HiveParser.TOK_UPDATE_TABLE:
- analyzeUpdate(tree);
+ analyzeUpdate(tree, tabNameNode, table);
break;
default:
throw new RuntimeException("Asked to parse token " + tree.getName() + "
in " +
"UpdateDeleteSemanticAnalyzer");
}
}
- private void analyzeUpdate(ASTNode tree) throws SemanticException {
+ private void analyzeUpdate(ASTNode tree, ASTNode tabNameNode, Table mTable)
throws SemanticException {
operation = Context.Operation.UPDATE;
- reparseAndSuperAnalyze(tree);
+ boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(mTable);
+
+ if (HiveConf.getBoolVar(queryState.getConf(),
HiveConf.ConfVars.SPLIT_UPDATE) && !nonNativeAcid) {
+ analyzeSplitUpdate(tree, mTable, tabNameNode);
+ } else {
+ reparseAndSuperAnalyze(tree, tabNameNode, mTable);
Review Comment:
can we use one order; either `tree,mTable, tableNameNode` or the other
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java:
##########
@@ -48,28 +52,43 @@ public class UpdateDeleteSemanticAnalyzer extends
RewriteSemanticAnalyzer {
super(queryState);
}
- protected void analyze(ASTNode tree) throws SemanticException {
+ @Override
+ protected ASTNode getTargetTableNode(ASTNode tree) {
+ // The first child should be the table we are updating / deleting from
+ ASTNode tabName = (ASTNode)tree.getChild(0);
+ assert tabName.getToken().getType() == HiveParser.TOK_TABNAME :
+ "Expected tablename as first child of " + operation + " but found
" + tabName.getName();
+ return tabName;
+ }
+
+ protected void analyze(ASTNode tree, Table table, ASTNode tabNameNode)
throws SemanticException {
Review Comment:
can't we use the `TableName` object instead of the `ASTNode` of the
`tabNameNode` / why don't we have that info in the `table`?
##########
itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java:
##########
@@ -251,6 +252,44 @@ public void testExceucteUpdateCounts() throws Exception {
assertEquals("1 row PreparedStatement update", 1, count);
}
+ @Test
+ public void testExceucteMergeCounts() throws Exception {
+ testExceucteMergeCounts(true);
+ }
+
+ @Test
+ public void testExceucteMergeCountsNoSplitUpdate() throws Exception {
+ testExceucteMergeCounts(false);
+ }
+
+ private void testExceucteMergeCounts(boolean splitUpdateEarly) throws
Exception {
+
+ Statement stmt = con.createStatement();
+ stmt.execute("set " + ConfVars.MERGE_SPLIT_UPDATE.varname + "=" +
splitUpdateEarly);
+ stmt.execute("set " + ConfVars.HIVE_SUPPORT_CONCURRENCY.varname + "=true");
+ stmt.execute("set " + ConfVars.HIVE_TXN_MANAGER.varname +
+ "=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager");
+
+ stmt.execute("drop table if exists transactional_crud");
+ stmt.execute("drop table if exists source");
+
+ stmt.execute("create table transactional_crud (a int, b int) stored as orc
" +
+ "tblproperties('transactional'='true',
'transactional_properties'='default')");
+ stmt.executeUpdate("insert into transactional_crud
values(1,2),(3,4),(5,6),(7,8),(9,10)");
+
+ stmt.execute("create table source (a int, b int) stored as orc " +
+ "tblproperties('transactional'='true',
'transactional_properties'='default')");
+ stmt.executeUpdate("insert into source
values(1,12),(3,14),(9,19),(100,100)");
+
+ int count = stmt.executeUpdate(" MERGE INTO transactional_crud as t
using source as s ON t.a = s.a\n" +
+ " WHEN MATCHED AND s.a > 7 THEN DELETE\n" +
+ " WHEN MATCHED AND s.a <= 8 THEN UPDATE set b = 100\n" +
+ " WHEN NOT MATCHED THEN INSERT VALUES (s.a, s.b)");
+ stmt.close();
+
+ assertEquals("Statement merge", 4, count);
Review Comment:
is this assert sufficient? the input table has 4 rows - and the merge has a
`not matched` part; so it should be 4 all the time
or what we don't want to see in the count is the number of deleted records?
Issue Time Tracking
-------------------
Worklog Id: (was: 777470)
Time Spent: 3.5h (was: 3h 20m)
> Rewrite Update statement as Multi-insert and do Update split early
> ------------------------------------------------------------------
>
> Key: HIVE-21160
> URL: https://issues.apache.org/jira/browse/HIVE-21160
> Project: Hive
> Issue Type: Sub-task
> Components: Transactions
> Affects Versions: 3.0.0
> Reporter: Eugene Koifman
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 3.5h
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.7#820007)