[ 
https://issues.apache.org/jira/browse/HIVE-25915?focusedWorklogId=724476&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-724476
 ]

ASF GitHub Bot logged work on HIVE-25915:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 10/Feb/22 13:06
            Start Date: 10/Feb/22 13:06
    Worklog Time Spent: 10m 
      Work Description: deniskuzZ commented on a change in pull request #3000:
URL: https://github.com/apache/hive/pull/3000#discussion_r803654682



##########
File path: 
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
##########
@@ -69,16 +72,147 @@
 import org.apache.tez.dag.history.logging.proto.ProtoMessageReader;
 import org.junit.Assert;
 import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+import org.mockito.internal.util.reflection.FieldSetter;
 
+import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker;
 import static 
org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.execSelectAndDumpData;
 import static 
org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver;
 import static 
org.apache.hadoop.hive.ql.txn.compactor.CompactorTestUtil.executeStatementOnDriverAndReturnResults;
-import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.*;
 
 @SuppressWarnings("deprecation")
 public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
+  @Test
+  public void testMinorCompactionShouldBeRefusedOnTablesWithOriginalFiles() 
throws Exception {
+    conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true);
+
+    final String dbName = "default";
+    final String tableName = "compaction_test";
+    executeStatementOnDriver("drop table if exists " + tableName, driver);
+    executeStatementOnDriver("CREATE TABLE " + tableName + "(id string, value 
string) CLUSTERED BY(id) " +
+            "INTO 10 BUCKETS STORED AS ORC 
TBLPROPERTIES('transactional'='false')", driver);
+
+    executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values 
('1','one'),('2','two'),('3','three')," +
+            
"('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten'),"
 +
+            
"('11','eleven'),('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen'),"
 +
+            
"('17','seventeen'),('18','eighteen'),('19','nineteen'),('20','twenty')", 
driver);
+
+    executeStatementOnDriver("alter table " + tableName + " set 
TBLPROPERTIES('transactional'='true')", driver);
+
+    executeStatementOnDriver("insert into " + tableName + " values ('21', 
'value21'),('84', 'value84')," +
+            "('66', 'value66'),('54', 'value54')", driver);
+    executeStatementOnDriver("insert into " + tableName + " values ('22', 
'value22'),('34', 'value34')," +
+            "('35', 'value35')", driver);
+    executeStatementOnDriver("insert into " + tableName + " values ('75', 
'value75'),('99', 'value99')", driver);
+
+    execSelectAndDumpData("select * from " + tableName, driver, "Dumping data 
for " +
+            tableName + " after load:");
+
+    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
+
+    //Prevent initiator from submitting the compaction requests

Review comment:
       why?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 724476)
    Time Spent: 1h 20m  (was: 1h 10m)

> Query based MINOR compaction fails with NPE if the data is loaded into the 
> ACID table
> -------------------------------------------------------------------------------------
>
>                 Key: HIVE-25915
>                 URL: https://issues.apache.org/jira/browse/HIVE-25915
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>            Reporter: László Végh
>            Assignee: László Végh
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> Steps to reproduce:
>  #  Create a table with import:
> {{CREATE TABLE temp_acid(id string, value string) CLUSTERED BY(id) INTO 10 
> BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');}}
>  # {{insert into temp_acid values 
> ('1','one'),('2','two'),('3','three'),('4','four'),('5','five'),('6','six'),('7','seven'),('8','eight'),('9','nine'),('10','ten'),('11','eleven'),('12','twelve'),('13','thirteen'),('14','fourteen'),('15','fifteen'),('16','sixteen'),('17','seventeen'),('18','eighteen'),('19','nineteen'),('20','twenty');}}
> {{export table temp_acid to '/tmp/temp_acid';}}
> {{{}i{}}}{{{}mport table imported from '/tmp/temp_acid';{}}}
>  # Do some inserts:
> {{insert into imported values ('21', 'value21'),('84', 'value84'),('66', 
> 'value66'),('54', 'value54');
> insert into imported values ('22', 'value22'),('34', 'value34'),('35', 
> 'value35');
> insert into imported values ('75', 'value75'),('99', 'value99');}}
>  # {{Run a minor compaction}}
> If the data is loaded or imported into the table they way it is described 
> above, the rows in the ORC file don't contain the ACID metadata. The 
> query-based MINOR compaction fails on this kind of table, because when the 
> FileSinkOperator tries to read out the bucket metadata from the rows it will 
> throw a NPE. But deleting and updating a table like this is possible. So 
> somehow the bucketId can be calculated for rows like this.
> The non-query based MINOR compaction works fine on a table like this.



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to