Blazer-007 commented on code in PR #4058: URL: https://github.com/apache/gobblin/pull/4058#discussion_r1774528239
########## gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/iceberg/IcebergReplacePartitionsStep.java: ########## @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.data.management.copy.iceberg; + +import java.io.IOException; +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.ExecutionException; + +import org.apache.iceberg.DataFile; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.util.SerializationUtil; + +import com.github.rholder.retry.Attempt; +import com.github.rholder.retry.RetryException; +import com.github.rholder.retry.RetryListener; +import com.github.rholder.retry.Retryer; +import com.google.common.collect.ImmutableMap; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.gobblin.commit.CommitStep; +import org.apache.gobblin.util.retry.RetryerFactory; + +import static org.apache.gobblin.util.retry.RetryerFactory.RETRY_INTERVAL_MS; +import static org.apache.gobblin.util.retry.RetryerFactory.RETRY_TIMES; +import static org.apache.gobblin.util.retry.RetryerFactory.RETRY_TYPE; +import static org.apache.gobblin.util.retry.RetryerFactory.RetryType; + +/** + * Commit step for replacing partitions in an Iceberg table. + * <p> + * This class implements the {@link CommitStep} interface and provides functionality to replace + * partitions in the destination Iceberg table using serialized data files. + * </p> + */ +@Slf4j +public class IcebergReplacePartitionsStep implements CommitStep { + private final String destTableIdStr; + private final Properties properties; + private final byte[] serializedDataFiles; + public static final String REPLACE_PARTITIONS_RETRYER_CONFIG_PREFIX = IcebergDatasetFinder.ICEBERG_DATASET_PREFIX + ".catalog.replace.partitions.retries"; + private static final Config RETRYER_FALLBACK_CONFIG = ConfigFactory.parseMap(ImmutableMap.of( + RETRY_INTERVAL_MS, TimeUnit.SECONDS.toMillis(3L), + RETRY_TIMES, 3, + RETRY_TYPE, RetryType.FIXED_ATTEMPT.name())); + + /** + * Constructs an {@code IcebergReplacePartitionsStep} with the specified parameters. + * + * @param destTableIdStr the identifier of the destination table as a string + * @param serializedDataFiles the serialized data files to be used for replacing partitions + * @param properties the properties containing configuration + */ + public IcebergReplacePartitionsStep(String destTableIdStr, byte[] serializedDataFiles, Properties properties) { + this.destTableIdStr = destTableIdStr; + this.serializedDataFiles = serializedDataFiles; + this.properties = properties; + } + + @Override + public boolean isCompleted() { + return false; + } + + /** + * Executes the partition replacement in the destination Iceberg table. + * Also, have retry mechanism as done in {@link IcebergRegisterStep#execute()} + * + * @throws IOException if an I/O error occurs during execution + */ + @Override + public void execute() throws IOException { Review Comment: I will keep them as separate as in replace partition commit step there is no way to pass current metadata, it internally validates it, so checking them will be not that useful. Also given that we might need RegisterStep as well if schema evolves in some cases, then storing currentDestMetadata will not make any sense atleast in replace partition commit step. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@gobblin.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org