This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 2bdf0cc HIVE-25773: Column descriptors might not be deleted via
direct sql (Yu-Wen Lai reviewed by Peter Vary) (#2843)
2bdf0cc is described below
commit 2bdf0ccb94f9555dd0c06131a7fb5defcf8010ed
Author: hsnusonic <[email protected]>
AuthorDate: Fri Dec 10 06:16:04 2021 -0800
HIVE-25773: Column descriptors might not be deleted via direct sql (Yu-Wen
Lai reviewed by Peter Vary) (#2843)
---
.../hadoop/hive/metastore/MetaStoreDirectSql.java | 27 +++++++--------
.../hadoop/hive/metastore/TestObjectStore.java | 38 ++++++++++++++++++++--
2 files changed, 49 insertions(+), 16 deletions(-)
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index d28e630..b200608 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -29,12 +29,15 @@ import java.sql.Statement;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
@@ -151,7 +154,7 @@ class MetaStoreDirectSql {
* @return The concatenated list
* @throws MetaException If the list contains wrong data
*/
- public static <T> String getIdListForIn(List<T> objectIds) throws
MetaException {
+ public static <T> String getIdListForIn(Collection<T> objectIds) throws
MetaException {
return objectIds.stream()
.map(i -> i.toString())
.collect(Collectors.joining(","));
@@ -2622,7 +2625,7 @@ class MetaStoreDirectSql {
+ "WHERE " + PARTITIONS + ".\"PART_ID\" in (" + partitionIds + ")";
List<Object> sdIdList = new ArrayList<>(partitionIdList.size());
- List<Object> columnDescriptorIdList = new ArrayList<>(1);
+ List<Long> columnDescriptorIdList = new ArrayList<>(1);
List<Object> serdeIdList = new ArrayList<>(partitionIdList.size());
try (QueryWrapper query = new
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
List<Object[]> sqlResult = MetastoreDirectSqlUtils
@@ -2808,7 +2811,7 @@ class MetaStoreDirectSql {
* @throws MetaException If there is an SQL exception during the execution
it converted to
* MetaException
*/
- private void dropDanglingColumnDescriptors(List<Object>
columnDescriptorIdList)
+ private void dropDanglingColumnDescriptors(List<Long> columnDescriptorIdList)
throws MetaException {
if (columnDescriptorIdList.isEmpty()) {
return;
@@ -2818,26 +2821,24 @@ class MetaStoreDirectSql {
// Drop column descriptor, if no relation left
queryText =
- "SELECT " + SDS + ".\"CD_ID\", count(1) "
+ "SELECT " + SDS + ".\"CD_ID\" "
+ "from " + SDS + " "
+ "WHERE " + SDS + ".\"CD_ID\" in (" + colIds + ") "
+ "GROUP BY " + SDS + ".\"CD_ID\"";
- List<Object> danglingColumnDescriptorIdList = new
ArrayList<>(columnDescriptorIdList.size());
+ Set<Long> danglingColumnDescriptorIdSet = new
HashSet<>(columnDescriptorIdList);
try (QueryWrapper query = new
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
- List<Object[]> sqlResult = MetastoreDirectSqlUtils
- .ensureList(executeWithArray(query, null, queryText));
+ List<Long> sqlResult = executeWithArray(query, null, queryText);
if (!sqlResult.isEmpty()) {
- for (Object[] fields : sqlResult) {
- if (MetastoreDirectSqlUtils.extractSqlInt(fields[1]) == 0) {
-
danglingColumnDescriptorIdList.add(MetastoreDirectSqlUtils.extractSqlLong(fields[0]));
- }
+ for (Long cdId : sqlResult) {
+ // the returned CD is not dangling, so remove it from the list
+ danglingColumnDescriptorIdSet.remove(cdId);
}
}
}
- if (!danglingColumnDescriptorIdList.isEmpty()) {
+ if (!danglingColumnDescriptorIdSet.isEmpty()) {
try {
- String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdList);
+ String danglingCDIds = getIdListForIn(danglingColumnDescriptorIdSet);
// Drop the columns_v2
queryText = "delete from " + COLUMNS_V2 + " where \"CD_ID\" in (" +
danglingCDIds + ")";
diff --git
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
index 37ff22c..4de31fe 100644
---
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
+++
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
@@ -563,7 +563,7 @@ public class TestObjectStore {
* Checks if the directSQL partition drop removes every connected data from
the RDBMS tables.
*/
@Test
- public void testDirectSQLDropParitionsCleanup() throws Exception {
+ public void testDirectSQLDropPartitionsCleanup() throws Exception {
createPartitionedTable(true, true);
@@ -583,8 +583,8 @@ public class TestObjectStore {
checkBackendTableSize("SERDES", 4); // Table has a serde
// drop the partitions
- try(AutoCloseable c =deadline()) {
- objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1,
TABLE1,
+ try (AutoCloseable c = deadline()) {
+ objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1,
Arrays.asList("test_part_col=a0", "test_part_col=a1",
"test_part_col=a2"), true, false);
}
@@ -605,6 +605,38 @@ public class TestObjectStore {
}
@Test
+ public void testDirectSQLCDsCleanup() throws Exception {
+ createPartitionedTable(true, true);
+ // Checks there is only one CD before altering partition
+ checkBackendTableSize("PARTITIONS", 3);
+ checkBackendTableSize("CDS", 1);
+ checkBackendTableSize("COLUMNS_V2", 5);
+ // Alters a partition to create a new column descriptor
+ List<String> partVals = Arrays.asList("a0");
+ try (AutoCloseable c = deadline()) {
+ Partition part = objectStore.getPartition(DEFAULT_CATALOG_NAME, DB1,
TABLE1, partVals);
+ StorageDescriptor newSd = part.getSd().deepCopy();
+ newSd.addToCols(new FieldSchema("test_add_col", "int", null));
+ Partition newPart = part.deepCopy();
+ newPart.setSd(newSd);
+ objectStore.alterPartition(DEFAULT_CATALOG_NAME, DB1, TABLE1, partVals,
newPart, null);
+ }
+ // Checks now there is one more column descriptor
+ checkBackendTableSize("PARTITIONS", 3);
+ checkBackendTableSize("CDS", 2);
+ checkBackendTableSize("COLUMNS_V2", 11);
+ // drop the partitions
+ try (AutoCloseable c = deadline()) {
+ objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1,
+ Arrays.asList("test_part_col=a0", "test_part_col=a1",
"test_part_col=a2"), true, false);
+ }
+ // Checks if the data connected to the partitions is dropped
+ checkBackendTableSize("PARTITIONS", 0);
+ checkBackendTableSize("CDS", 1); // Table has a CD
+ checkBackendTableSize("COLUMNS_V2", 5);
+ }
+
+ @Test
public void testGetPartitionStatistics() throws Exception {
createPartitionedTable(true, true);