[
https://issues.apache.org/jira/browse/HIVE-26102?focusedWorklogId=754621&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-754621
]
ASF GitHub Bot logged work on HIVE-26102:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 08/Apr/22 13:55
Start Date: 08/Apr/22 13:55
Worklog Time Spent: 10m
Work Description: marton-bod commented on code in PR #3131:
URL: https://github.com/apache/hive/pull/3131#discussion_r846137472
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergAcidUtil.java:
##########
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.mr.hive;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import org.apache.iceberg.MetadataColumns;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.deletes.PositionDelete;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.StructProjection;
+
+public class IcebergAcidUtil {
+
+ private IcebergAcidUtil() {
+ }
+
+ private static final Types.NestedField PARTITION_STRUCT_META_COL = null; //
placeholder value in the map
+ private static final Map<Types.NestedField, Integer>
DELETE_FILE_READ_META_COLS = Maps.newLinkedHashMap();
+
+ static {
+ DELETE_FILE_READ_META_COLS.put(MetadataColumns.SPEC_ID, 0);
+ DELETE_FILE_READ_META_COLS.put(PARTITION_STRUCT_META_COL, 1);
+ DELETE_FILE_READ_META_COLS.put(MetadataColumns.FILE_PATH, 2);
+ DELETE_FILE_READ_META_COLS.put(MetadataColumns.ROW_POSITION, 3);
+ }
+
+ private static final Types.NestedField PARTITION_HASH_META_COL =
Types.NestedField.required(
+ MetadataColumns.PARTITION_COLUMN_ID,
MetadataColumns.PARTITION_COLUMN_NAME, Types.LongType.get());
+ private static final Map<Types.NestedField, Integer> DELETE_SERDE_META_COLS
= Maps.newLinkedHashMap();
+
+ static {
+ DELETE_SERDE_META_COLS.put(MetadataColumns.SPEC_ID, 0);
+ DELETE_SERDE_META_COLS.put(PARTITION_HASH_META_COL, 1);
+ DELETE_SERDE_META_COLS.put(MetadataColumns.FILE_PATH, 2);
+ DELETE_SERDE_META_COLS.put(MetadataColumns.ROW_POSITION, 3);
+ }
+
+ /**
+ * @param dataCols The columns of the original file read schema
+ * @param table The table object - it is used for populating the partition
struct meta column
+ * @return The schema for reading files, extended with metadata columns
needed for deletes
+ */
+ public static Schema createFileReadSchemaForDelete(List<Types.NestedField>
dataCols, Table table) {
+ List<Types.NestedField> cols =
Lists.newArrayListWithCapacity(dataCols.size() +
DELETE_FILE_READ_META_COLS.size());
+ DELETE_FILE_READ_META_COLS.forEach((metaCol, index) -> {
+ if (metaCol == PARTITION_STRUCT_META_COL) {
+ cols.add(MetadataColumns.metadataColumn(table,
MetadataColumns.PARTITION_COLUMN_NAME));
+ } else {
+ cols.add(metaCol);
+ }
+ });
+ cols.addAll(dataCols);
+ return new Schema(cols);
+ }
+
+ /**
+ * @param dataCols The columns of the serde projection schema
+ * @return The schema for SerDe operations, extended with metadata columns
needed for deletes
+ */
+ public static Schema createSerdeSchemaForDelete(List<Types.NestedField>
dataCols) {
+ List<Types.NestedField> cols =
Lists.newArrayListWithCapacity(dataCols.size() + DELETE_SERDE_META_COLS.size());
+ DELETE_SERDE_META_COLS.forEach((metaCol, index) -> cols.add(metaCol));
+ cols.addAll(dataCols);
+ return new Schema(cols);
+ }
+
+ public static PositionDelete<Record> getPositionDelete(Schema schema, Record
rec) {
+ PositionDelete<Record> positionDelete = PositionDelete.create();
+ String filePath =
rec.get(DELETE_SERDE_META_COLS.get(MetadataColumns.FILE_PATH), String.class);
+ long filePosition =
rec.get(DELETE_SERDE_META_COLS.get(MetadataColumns.ROW_POSITION), Long.class);
+
+ int dataOffset = DELETE_SERDE_META_COLS.size(); // position in the rec
where the actual row data begins
+ Record rowData = GenericRecord.create(schema);
Review Comment:
We could use a caffeine cache, but I'm not sure how much cost is associated
with a potential cache miss
Issue Time Tracking
-------------------
Worklog Id: (was: 754621)
Time Spent: 14.5h (was: 14h 20m)
> Implement DELETE statements for Iceberg tables
> ----------------------------------------------
>
> Key: HIVE-26102
> URL: https://issues.apache.org/jira/browse/HIVE-26102
> Project: Hive
> Issue Type: New Feature
> Reporter: Marton Bod
> Assignee: Marton Bod
> Priority: Major
> Labels: pull-request-available
> Time Spent: 14.5h
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)