[
https://issues.apache.org/jira/browse/HIVE-26102?focusedWorklogId=753221&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-753221
]
ASF GitHub Bot logged work on HIVE-26102:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 06/Apr/22 07:10
Start Date: 06/Apr/22 07:10
Worklog Time Spent: 10m
Work Description: pvary commented on code in PR #3131:
URL: https://github.com/apache/hive/pull/3131#discussion_r843549581
##########
iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergV2.java:
##########
@@ -228,6 +230,104 @@ public void
testReadAndWriteFormatV2Partitioned_PosDelete_RowSupplied() throws I
Assert.assertArrayEquals(new Object[] {2L, "Trudy", "Pink"},
objects.get(3));
}
+ @Test
+ public void testDeleteStatementUnpartitioned() {
+ Assume.assumeFalse("Iceberg DELETEs are only implemented for
non-vectorized mode for now", isVectorized);
+
+ // create and insert an initial batch of records
+ testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ PartitionSpec.unpartitioned(), fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2);
+ // insert one more batch so that we have multiple data files within the
same partition
+
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+ TableIdentifier.of("default", "customers"), false));
+
+ shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or
first_name='Joanna'");
+
+ List<Object[]> objects = shell.executeStatement("SELECT * FROM customers
ORDER BY customer_id, last_name");
+ Assert.assertEquals(6, objects.size());
+ List<Record> expected =
TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA)
+ .add(1L, "Sharon", "Taylor")
+ .add(2L, "Jake", "Donnel")
+ .add(2L, "Susan", "Morrison")
+ .add(2L, "Bob", "Silver")
+ .add(4L, "Laci", "Zold")
+ .add(5L, "Peti", "Rozsaszin")
+ .build();
+ HiveIcebergTestUtils.validateData(expected,
+
HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
objects), 0);
+ }
+
+ @Test
+ public void testDeleteStatementPartitioned() {
+ Assume.assumeFalse("Iceberg DELETEs are only implemented for
non-vectorized mode for now", isVectorized);
+ PartitionSpec spec =
PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA)
+ .identity("last_name").bucket("customer_id", 16).build();
+
+ // create and insert an initial batch of records
+ testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ spec, fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2);
+ // insert one more batch so that we have multiple data files within the
same partition
+
shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1,
+ TableIdentifier.of("default", "customers"), false));
+
+ shell.executeStatement("DELETE FROM customers WHERE customer_id=3 or
first_name='Joanna'");
+
+ List<Object[]> objects = shell.executeStatement("SELECT * FROM customers
ORDER BY customer_id, last_name");
+ Assert.assertEquals(6, objects.size());
+ List<Record> expected =
TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA)
+ .add(1L, "Sharon", "Taylor")
+ .add(2L, "Jake", "Donnel")
+ .add(2L, "Susan", "Morrison")
+ .add(2L, "Bob", "Silver")
+ .add(4L, "Laci", "Zold")
+ .add(5L, "Peti", "Rozsaszin")
+ .build();
+ HiveIcebergTestUtils.validateData(expected,
+
HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
objects), 0);
+ }
+
+ @Test
+ public void testDeleteStatementWithOtherTable() {
+ Assume.assumeFalse("Iceberg DELETEs are only implemented for
non-vectorized mode for now", isVectorized);
+ PartitionSpec spec =
PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA)
+ .identity("last_name").bucket("customer_id", 16).build();
+
+ // create a couple of tables, with an initial batch of records
+ testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ spec, fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, 2);
+ testTables.createTable(shell, "other",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ spec, fileFormat,
HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, 2);
+
+ shell.executeStatement("DELETE FROM customers WHERE customer_id in (select
t1.customer_id from customers t1 join " +
+ "other t2 on t1.customer_id = t2.customer_id) or " +
+ "first_name in (select first_name from customers where first_name =
'Bob')");
+
+ List<Object[]> objects = shell.executeStatement("SELECT * FROM customers
ORDER BY customer_id, last_name");
+ Assert.assertEquals(5, objects.size());
+ List<Record> expected =
TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA)
+ .add(1L, "Joanna", "Pierce")
+ .add(1L, "Sharon", "Taylor")
+ .add(2L, "Jake", "Donnel")
+ .add(2L, "Susan", "Morrison")
+ .add(2L, "Joanna", "Silver")
+ .build();
+ HiveIcebergTestUtils.validateData(expected,
+
HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
objects), 0);
+ }
+
+ @Test
+ public void testDeleteStatementThrowsIfVectorizationEnabled() {
Review Comment:
Would it be possible to turn off vectorization if we are deleting from an
Iceberg table?
Issue Time Tracking
-------------------
Worklog Id: (was: 753221)
Time Spent: 4h 50m (was: 4h 40m)
> Implement DELETE statements for Iceberg tables
> ----------------------------------------------
>
> Key: HIVE-26102
> URL: https://issues.apache.org/jira/browse/HIVE-26102
> Project: Hive
> Issue Type: New Feature
> Reporter: Marton Bod
> Assignee: Marton Bod
> Priority: Major
> Labels: pull-request-available
> Time Spent: 4h 50m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)