marton-bod commented on a change in pull request #2701:
URL: https://github.com/apache/hive/pull/2701#discussion_r724113786
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
##########
@@ -2366,7 +2366,9 @@ private void getMetaData(QB qb, ReadEntity parentInput)
"Inconsistent data structure detected: we are writing to " +
ts.tableHandle + " in " +
name + " but it's not in isInsertIntoTable() or
getInsertOverwriteTables()";
// Disallow update and delete on non-acid tables
- boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle);
+ boolean isFullAcid =
Review comment:
There is a check right at the beginning, which allows running
DELETE/UPDATE queries only for acid tables
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
##########
@@ -11410,7 +11413,7 @@ private Operator genTablePlan(String alias, QB qb)
throws SemanticException {
Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
// use a list for easy cumtomize
List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
- if(!tab.isNonNative()) {
+ if(!tab.isNonNative() || tab.getStorageHandler().alwaysUnpartitioned()) {
Review comment:
Virtual columns are only added for managed tables unless we change it
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
##########
@@ -102,7 +102,11 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws
SemanticException {
rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);
- rewrittenQueryStr.append(" select ROW__ID");
+ if (mTable.getStorageHandler() != null &&
mTable.getStorageHandler().alwaysUnpartitioned()) {
+ rewrittenQueryStr.append(" select POS__DELETE");
Review comment:
For Iceberg, insert the POS__DELETE virtual column (<file_path, pos>)
into the table, not the ROW__ID
##########
File path:
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
##########
@@ -428,7 +439,16 @@ private static Schema readSchema(Configuration conf,
Schema tableSchema, boolean
return tableSchema;
}
- return caseSensitive ? tableSchema.select(selectedColumns) :
tableSchema.caseInsensitiveSelect(selectedColumns);
+ readSchema = caseSensitive ? tableSchema.select(selectedColumns) :
+ tableSchema.caseInsensitiveSelect(selectedColumns);
+
+ if (conf.get("hive.query.string").startsWith("DELETE")) {
Review comment:
When deleting, append the row_position metadata column to the end of the
schema
##########
File path:
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
##########
@@ -214,18 +218,27 @@ public void initialize(InputSplit split,
TaskAttemptContext newContext) {
this.reuseContainers =
conf.getBoolean(InputFormatConfig.REUSE_CONTAINERS, false);
this.inMemoryDataModel =
conf.getEnum(InputFormatConfig.IN_MEMORY_DATA_MODEL,
InputFormatConfig.InMemoryDataModel.GENERIC);
- this.currentIterator = open(tasks.next(), expectedSchema).iterator();
+ // save the scanTask to retrieve the file path
+ this.currentScanTask = tasks.next();
+ this.currentIterator = open(currentScanTask, expectedSchema).iterator();
}
@Override
public boolean nextKeyValue() throws IOException {
while (true) {
if (currentIterator.hasNext()) {
current = currentIterator.next();
+ Object position = ((GenericRecord)
current).getField(MetadataColumns.ROW_POSITION.name());
+ if (position != null) {
+ // hacky way to propagate the data to
MapOperator#populateVirtualColumnValues
+ System.setProperty("delete_file_path",
currentScanTask.file().path().toString());
Review comment:
I told you it's super hacky at this point 😛
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]