[
https://issues.apache.org/jira/browse/HIVE-22977?focusedWorklogId=838346&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-838346
]
ASF GitHub Bot logged work on HIVE-22977:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 10/Jan/23 14:59
Start Date: 10/Jan/23 14:59
Worklog Time Spent: 10m
Work Description: SourabhBadhya commented on code in PR #3801:
URL: https://github.com/apache/hive/pull/3801#discussion_r1065888532
##########
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java:
##########
@@ -2951,4 +2952,170 @@ public void testStatsAfterCompactionPartTbl(boolean
isQueryBased, boolean isAuto
Assert.assertEquals("The number of rows is differing from the expected",
"2", parameters.get("numRows"));
executeStatementOnDriver("drop table if exists " + tblName, driver);
}
+
+ @Test
+ public void testMajorCompactionWithMergeNotPartitionedWithoutBuckets()
throws Exception {
+ testCompactionWithMerge(CompactionType.MAJOR, false, false, null,
Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("base_0000003_v0000007"), true, true);
+ }
+
+ @Test
+ public void testMajorCompactionWithMergePartitionedWithoutBuckets() throws
Exception {
+ testCompactionWithMerge(CompactionType.MAJOR, true, false, "ds=today",
Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("base_0000003_v0000007"), true, true);
+ }
+
+ @Test
+ public void testMajorCompactionWithMergeNotPartitionedWithBuckets() throws
Exception {
+ testCompactionWithMerge(CompactionType.MAJOR, false, true, null,
Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("base_0000003_v0000007"), true, true);
+ }
+
+ @Test
+ public void testMajorCompactionWithMergerPartitionedWithBuckets() throws
Exception {
+ testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today",
Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("base_0000003_v0000007"), true, true);
+ }
+
+ @Test
+ public void testMinorCompactionWithMergeNotPartitionedWithoutBuckets()
throws Exception {
+ testCompactionWithMerge(CompactionType.MINOR, false, false, null,
+ Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("delta_0000001_0000003_v0000007"), true,
true);
+ }
+
+ @Test
+ public void testMinorCompactionWithMergePartitionedWithoutBuckets() throws
Exception {
+ testCompactionWithMerge(CompactionType.MINOR, true, false, "ds=today",
+ Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("delta_0000001_0000003_v0000007"), true,
true);
+ }
+
+ @Test
+ public void testMinorCompactionWithMergeNotPartitionedWithBuckets() throws
Exception {
+ testCompactionWithMerge(CompactionType.MINOR, false, true, null,
+ Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("delta_0000001_0000003_v0000007"), true,
true);
+ }
+
+ @Test
+ public void testMinorCompactionWithMergePartitionedWithBuckets() throws
Exception {
+ testCompactionWithMerge(CompactionType.MINOR, true, true, "ds=today",
+ Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("delta_0000001_0000003_v0000007"), true,
true);
+ }
+
+ @Test
+ public void testMajorCompactionAfterMinorWithMerge() throws Exception {
+ testCompactionWithMerge(CompactionType.MINOR, true, true, "ds=today",
+ Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("delta_0000001_0000003_v0000007"),true,
false);
+ testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today",
+ Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000003_v0000007",
"delta_0000004_0000004_0000", "delta_0000005_0000005_0000",
+ "delta_0000006_0000006_0000"),
Collections.singletonList("base_0000006_v0000014"), false, true);
+ }
+
+ @Test
+ public void testMinorCompactionAfterMajorWithMerge() throws Exception {
+ testCompactionWithMerge(CompactionType.MAJOR, false, false, null,
+ Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("base_0000003_v0000007"), true, false);
+ testCompactionWithMerge(CompactionType.MINOR, false, false, null,
+ Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000004_0000004_0000",
"delta_0000005_0000005_0000", "delta_0000006_0000006_0000"),
+ Collections.singletonList("delta_0000001_0000006_v0000014"),
false, true);
+ }
+
+ @Test
+ public void testMultipleMajorCompactionWithMerge() throws Exception {
+ testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today",
+ Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("base_0000003_v0000007"), true, false);
+ testCompactionWithMerge(CompactionType.MAJOR, true, true, "ds=today",
+ Arrays.asList("bucket_00000", "bucket_00001"),
+ Arrays.asList("delta_0000004_0000004_0000",
"delta_0000005_0000005_0000", "delta_0000006_0000006_0000"),
+ Collections.singletonList("base_0000006_v0000014"), false, true);
+ }
+
+ @Test
+ public void testMultipleMinorCompactionWithMerge() throws Exception {
+ testCompactionWithMerge(CompactionType.MINOR, false, false, null,
+ Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000001_0000",
"delta_0000002_0000002_0000", "delta_0000003_0000003_0000"),
+ Collections.singletonList("delta_0000001_0000003_v0000007"), true,
false);
+ testCompactionWithMerge(CompactionType.MINOR, false, false, null,
+ Collections.singletonList("bucket_00000"),
+ Arrays.asList("delta_0000001_0000003_v0000007",
"delta_0000004_0000004_0000", "delta_0000005_0000005_0000",
+ "delta_0000006_0000006_0000"),
+ Collections.singletonList("delta_0000001_0000006_v0000014"),
false, true);
+ }
+
+ private void testCompactionWithMerge(CompactionType compactionType, boolean
isPartitioned, boolean isBucketed,
Review Comment:
Added tests which use only Merge compaction and tests which use fallback to
query-based compaction using mocking. Done.
Issue Time Tracking
-------------------
Worklog Id: (was: 838346)
Time Spent: 3h 20m (was: 3h 10m)
> Merge delta files instead of running a query in major/minor compaction
> ----------------------------------------------------------------------
>
> Key: HIVE-22977
> URL: https://issues.apache.org/jira/browse/HIVE-22977
> Project: Hive
> Issue Type: Improvement
> Reporter: László Pintér
> Assignee: Sourabh Badhya
> Priority: Major
> Labels: pull-request-available
> Attachments: HIVE-22977.01.patch, HIVE-22977.02.patch
>
> Time Spent: 3h 20m
> Remaining Estimate: 0h
>
> [Compaction Optimiziation]
> We should analyse the possibility to move a delta file instead of running a
> major/minor compaction query.
> Please consider the following use cases:
> - full acid table but only insert queries were run. This means that no
> delete delta directories were created. Is it possible to merge the delta
> directory contents without running a compaction query?
> - full acid table, initiating queries through the streaming API. If there
> are no abort transactions during the streaming, is it possible to merge the
> delta directory contents without running a compaction query?
--
This message was sent by Atlassian Jira
(v8.20.10#820010)