kasakrisz commented on code in PR #4672:
URL: https://github.com/apache/hive/pull/4672#discussion_r1324679981
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java:
##########
@@ -338,4 +373,42 @@ private void addStatTask(ASTNode root, Table table, Path
oldPartitionLocation, P
moveTask.addDependentTask(statTask);
}
}
+
+ public StringBuilder constructDeleteQuery(Table table, Map<String, String>
partitionSpec) throws SemanticException {
+ StringBuilder sb = new StringBuilder().append("delete from
").append(table.getTableName())
+ .append(" where ");
+ List<String> keyList = new ArrayList<String>(partitionSpec.keySet());
+ Deserializer deserializer = table.getDeserializer();
+ Map<String, PrimitiveObjectInspector.PrimitiveCategory> stringTypeInfoMap
= new HashMap<>();
+ try {
+ ObjectInspector objectInspector = deserializer.getObjectInspector();
+ if (objectInspector.getCategory() == ObjectInspector.Category.STRUCT) {
+ StructObjectInspector structObjectInspector = (StructObjectInspector)
objectInspector;
+ List<? extends StructField> structFields =
structObjectInspector.getAllStructFieldRefs();
+ for (int index = 0;index < structFields.size();index++) {
+ StructField structField = structFields.get(index);
Review Comment:
nit.: Is it possible to use foreach here?
```
for (StructField structField : structFields) {
...
```
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java:
##########
@@ -597,12 +600,49 @@ public void
rollbackAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTab
}
@Override
- public void preTruncateTable(org.apache.hadoop.hive.metastore.api.Table
table, EnvironmentContext context)
+ public void preTruncateTable(org.apache.hadoop.hive.metastore.api.Table
table, EnvironmentContext context,
+ List<String> partNames)
throws MetaException {
this.catalogProperties = getCatalogProperties(table);
this.icebergTable = Catalogs.loadTable(conf, catalogProperties);
+ Map<String, PartitionField> partitionFieldMap = Maps.newHashMap();
+ for (PartitionField partField : icebergTable.spec().fields()) {
+ partitionFieldMap.put(partField.name(), partField);
+ }
+ Expression finalExp = Expressions.alwaysTrue();
+ if (partNames != null && !partNames.isEmpty()) {
+ for (String partName : partNames) {
+ String[] partColPairs = partName.split("/");
+ Expression subExp = Expressions.alwaysTrue();
+ for (String partColPair : partColPairs) {
+ String[] partColNameValue = partColPair.split("=");
+ assert partColNameValue.length == 2;
+ String partColName = partColNameValue[0];
+ String partColValue = partColNameValue[1];
+ if (partitionFieldMap.containsKey(partColName)) {
+ PartitionField partitionField = partitionFieldMap.get(partColName);
+ Type resultType =
partitionField.transform().getResultType(icebergTable.schema()
+ .findField(partitionField.sourceId()).type());
+ TransformSpec.TransformType transformType =
IcebergTableUtil.getTransformType(partitionField.transform());
+ Object value = Conversions.fromPartitionString(resultType,
partColValue);
+ Iterable iterable = () ->
Collections.singletonList(value).iterator();
+ if (transformType.equals(TransformSpec.TransformType.IDENTITY)) {
Review Comment:
Enum values can be compared using `==` operator.
If you prefer `equals` then please swap the operands like
```
TransformSpec.TransformType.IDENTITY.equals(transformType)
```
for `null` safety
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java:
##########
@@ -338,4 +373,42 @@ private void addStatTask(ASTNode root, Table table, Path
oldPartitionLocation, P
moveTask.addDependentTask(statTask);
}
}
+
+ public StringBuilder constructDeleteQuery(Table table, Map<String, String>
partitionSpec) throws SemanticException {
+ StringBuilder sb = new StringBuilder().append("delete from
").append(table.getTableName())
Review Comment:
This may have to be unescaped
```
table.getTableName()
```
to support quoted table names with special characters
https://github.com/apache/hive/blob/88bc8269a64d31eee372bf3602933c75283c686b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java#L15838C20-L15843
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -1644,4 +1647,68 @@ public void
validatePartSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable,
}
}
}
+
+ // Metadata delete or a positional delete
+ @Override
+ public boolean shouldTruncate(org.apache.hadoop.hive.ql.metadata.Table
hmsTable, Map<String, String> partitionSpec)
Review Comment:
Could you please add a javadocs to describe the criteria of the decision
when should truncate vs delete.
Plus example if possible.
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java:
##########
@@ -338,4 +373,42 @@ private void addStatTask(ASTNode root, Table table, Path
oldPartitionLocation, P
moveTask.addDependentTask(statTask);
}
}
+
+ public StringBuilder constructDeleteQuery(Table table, Map<String, String>
partitionSpec) throws SemanticException {
+ StringBuilder sb = new StringBuilder().append("delete from
").append(table.getTableName())
+ .append(" where ");
+ List<String> keyList = new ArrayList<String>(partitionSpec.keySet());
+ Deserializer deserializer = table.getDeserializer();
+ Map<String, PrimitiveObjectInspector.PrimitiveCategory> stringTypeInfoMap
= new HashMap<>();
+ try {
+ ObjectInspector objectInspector = deserializer.getObjectInspector();
+ if (objectInspector.getCategory() == ObjectInspector.Category.STRUCT) {
+ StructObjectInspector structObjectInspector = (StructObjectInspector)
objectInspector;
+ List<? extends StructField> structFields =
structObjectInspector.getAllStructFieldRefs();
+ for (int index = 0;index < structFields.size();index++) {
+ StructField structField = structFields.get(index);
+ if (structField.getFieldObjectInspector().getCategory() ==
ObjectInspector.Category.PRIMITIVE) {
Review Comment:
Is it possible to have other than primitive categories here? If yes, how to
handle them?
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java:
##########
@@ -338,4 +373,42 @@ private void addStatTask(ASTNode root, Table table, Path
oldPartitionLocation, P
moveTask.addDependentTask(statTask);
}
}
+
+ public StringBuilder constructDeleteQuery(Table table, Map<String, String>
partitionSpec) throws SemanticException {
+ StringBuilder sb = new StringBuilder().append("delete from
").append(table.getTableName())
+ .append(" where ");
+ List<String> keyList = new ArrayList<String>(partitionSpec.keySet());
+ Deserializer deserializer = table.getDeserializer();
+ Map<String, PrimitiveObjectInspector.PrimitiveCategory> stringTypeInfoMap
= new HashMap<>();
+ try {
+ ObjectInspector objectInspector = deserializer.getObjectInspector();
+ if (objectInspector.getCategory() == ObjectInspector.Category.STRUCT) {
+ StructObjectInspector structObjectInspector = (StructObjectInspector)
objectInspector;
+ List<? extends StructField> structFields =
structObjectInspector.getAllStructFieldRefs();
+ for (int index = 0;index < structFields.size();index++) {
+ StructField structField = structFields.get(index);
+ if (structField.getFieldObjectInspector().getCategory() ==
ObjectInspector.Category.PRIMITIVE) {
+ PrimitiveObjectInspector primitiveObjectInspector =
(PrimitiveObjectInspector) structField.getFieldObjectInspector();
+ stringTypeInfoMap.put(structField.getFieldName(),
+ primitiveObjectInspector.getTypeInfo().getPrimitiveCategory());
+ }
+ }
+ }
+ } catch (SerDeException e) {
+ throw new SemanticException(String.format("Unable to get object
inspector due to: %s", e));
+ }
+ for (int index = 0;index < keyList.size();index++) {
+ String key = keyList.get(index);
+ PrimitiveObjectInspector.PrimitiveCategory category =
stringTypeInfoMap.get(key);
+ String value = partitionSpec.get(key);
+ boolean shouldEncloseQuotes =
TypeInfoUtils.shouldEncloseQuotes(category);
+ sb.append(index == 0 ? "" : " and ").append(key).append(" = ");
+ if (shouldEncloseQuotes) {
+ sb.append("'").append(value).append("'");
Review Comment:
Does `'` has to be escaped?
```
table's_partition
```
```
pkey = 'table''s_partition'
```
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -1644,4 +1647,68 @@ public void
validatePartSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable,
}
}
}
+
+ // Metadata delete or a positional delete
+ @Override
+ public boolean shouldTruncate(org.apache.hadoop.hive.ql.metadata.Table
hmsTable, Map<String, String> partitionSpec)
+ throws SemanticException {
+ Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+ if (MapUtils.isEmpty(partitionSpec) || !isPartitionEvolution(table)) {
+ return true;
+ }
+
+ Map<String, PartitionField> partitionFieldMap = Maps.newHashMap();
+ for (PartitionField partField : table.spec().fields()) {
+ partitionFieldMap.put(partField.name(), partField);
+ }
+ Expression finalExp = Expressions.alwaysTrue();
+ for (Map.Entry<String, String> entry : partitionSpec.entrySet()) {
+ String partColName = entry.getKey();
+ if (partitionFieldMap.containsKey(partColName)) {
+ PartitionField partitionField = partitionFieldMap.get(partColName);
+ Type resultType =
partitionField.transform().getResultType(table.schema()
+ .findField(partitionField.sourceId()).type());
+ TransformSpec.TransformType transformType =
IcebergTableUtil.getTransformType(partitionField.transform());
+ Object value = Conversions.fromPartitionString(resultType,
entry.getValue());
+ Iterable iterable = () -> Collections.singletonList(value).iterator();
+ if (transformType.equals(TransformSpec.TransformType.IDENTITY)) {
+ Expression boundPredicate = Expressions.in(partitionField.name(),
iterable);
+ finalExp = Expressions.and(finalExp, boundPredicate);
+ } else {
+ throw new SemanticException(
+ String.format("Partition transforms are not supported via
truncate operation: %s", partColName));
+ }
+ } else {
+ throw new SemanticException(String.format("No partition
column/transform by the name: %s", partColName));
+ }
+ }
+ FindFiles.Builder builder = new
FindFiles.Builder(table).withRecordsMatching(finalExp).includeColumnStats();
+ Set<DataFile> dataFiles =
Sets.newHashSet(Iterables.transform(builder.collect(), file -> file));
+ boolean result = true;
+ for (DataFile dataFile : dataFiles) {
+ PartitionData partitionData = (PartitionData) dataFile.partition();
+ Expression residual = ResidualEvaluator.of(table.spec(), finalExp, false)
+ .residualFor(partitionData);
+ StrictMetricsEvaluator strictMetricsEvaluator = new
StrictMetricsEvaluator(table.schema(), residual);
+ if (!strictMetricsEvaluator.eval(dataFile)) {
+ result = false;
+ }
+ }
+
+ boolean isV2Table = hmsTable.getParameters() != null &&
+
"2".equals(hmsTable.getParameters().get(TableProperties.FORMAT_VERSION));
+ if (!result && !isV2Table) {
+ throw new SemanticException("Truncate conversion to delete is not
possible since its not an Iceberg V2 table." +
+ " Consider converting the table to Iceberg's V2 format
specification.");
+ }
Review Comment:
Maybe this check should be done in the beginning of the method body. WDYT?
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -1644,4 +1647,68 @@ public void
validatePartSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable,
}
}
}
+
+ // Metadata delete or a positional delete
+ @Override
+ public boolean shouldTruncate(org.apache.hadoop.hive.ql.metadata.Table
hmsTable, Map<String, String> partitionSpec)
+ throws SemanticException {
+ Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+ if (MapUtils.isEmpty(partitionSpec) || !isPartitionEvolution(table)) {
+ return true;
+ }
+
+ Map<String, PartitionField> partitionFieldMap = Maps.newHashMap();
Review Comment:
Is it possible to pass the initial size? It should be
`table.spec().fields().size()` I guess.
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java:
##########
@@ -597,12 +600,49 @@ public void
rollbackAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTab
}
@Override
- public void preTruncateTable(org.apache.hadoop.hive.metastore.api.Table
table, EnvironmentContext context)
+ public void preTruncateTable(org.apache.hadoop.hive.metastore.api.Table
table, EnvironmentContext context,
+ List<String> partNames)
throws MetaException {
this.catalogProperties = getCatalogProperties(table);
this.icebergTable = Catalogs.loadTable(conf, catalogProperties);
+ Map<String, PartitionField> partitionFieldMap = Maps.newHashMap();
+ for (PartitionField partField : icebergTable.spec().fields()) {
+ partitionFieldMap.put(partField.name(), partField);
+ }
+ Expression finalExp = Expressions.alwaysTrue();
+ if (partNames != null && !partNames.isEmpty()) {
Review Comment:
nit.: `&& !partNames.isEmpty()` is not necessary since the for loop has 0
iterations in case of empty `partNames`
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/misc/truncate/TruncateTableAnalyzer.java:
##########
@@ -338,4 +373,42 @@ private void addStatTask(ASTNode root, Table table, Path
oldPartitionLocation, P
moveTask.addDependentTask(statTask);
}
}
+
+ public StringBuilder constructDeleteQuery(Table table, Map<String, String>
partitionSpec) throws SemanticException {
+ StringBuilder sb = new StringBuilder().append("delete from
").append(table.getTableName())
+ .append(" where ");
+ List<String> keyList = new ArrayList<String>(partitionSpec.keySet());
+ Deserializer deserializer = table.getDeserializer();
+ Map<String, PrimitiveObjectInspector.PrimitiveCategory> stringTypeInfoMap
= new HashMap<>();
+ try {
+ ObjectInspector objectInspector = deserializer.getObjectInspector();
+ if (objectInspector.getCategory() == ObjectInspector.Category.STRUCT) {
+ StructObjectInspector structObjectInspector = (StructObjectInspector)
objectInspector;
+ List<? extends StructField> structFields =
structObjectInspector.getAllStructFieldRefs();
+ for (int index = 0;index < structFields.size();index++) {
+ StructField structField = structFields.get(index);
+ if (structField.getFieldObjectInspector().getCategory() ==
ObjectInspector.Category.PRIMITIVE) {
+ PrimitiveObjectInspector primitiveObjectInspector =
(PrimitiveObjectInspector) structField.getFieldObjectInspector();
+ stringTypeInfoMap.put(structField.getFieldName(),
+ primitiveObjectInspector.getTypeInfo().getPrimitiveCategory());
+ }
+ }
+ }
+ } catch (SerDeException e) {
+ throw new SemanticException(String.format("Unable to get object
inspector due to: %s", e));
+ }
+ for (int index = 0;index < keyList.size();index++) {
+ String key = keyList.get(index);
Review Comment:
What is the purpose of `keyList` ?
How about
```
for (String key : partitionSpec.keySet()) {
...
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]