hudi-agent commented on code in PR #18816:
URL: https://github.com/apache/hudi/pull/18816#discussion_r3289441965
##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
package org.apache.hudi.cli.commands;
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
-import java.io.FileInputStream;
+import java.io.EOFException;
import java.io.IOException;
Review Comment:
π€ This commit ("eliminate checkstyle unused import violations") appears to
have deleted nearly the entire class β `deduplicate`,
`overwriteHoodieProperties`, `removeCorruptedPendingCleanAction` (shell
binding), `showFailedCommits`, `migratePartitionMeta`,
`repairDeprecatePartition`, and `renamePartition` shell commands are all gone,
and `addPartitionMeta` was replaced with a hardcoded-string stub. Was this
intentional? It looks like an accidental overwrite β could you restore the
original methods and limit this commit to the actual unused-import cleanup?
<sub><i>- AI-generated; verify before applying. React π/π to flag
quality.</i></sub>
##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
package org.apache.hudi.cli.commands;
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
-import java.io.FileInputStream;
+import java.io.EOFException;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
-import scala.collection.JavaConverters;
-
-/**
- * CLI command to display and trigger repair options.
- */
@ShellComponent
-@Slf4j
public class RepairsCommand {
- public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated files
placed in: ";
- @ShellMethod(key = "repair deduplicate",
- value = "De-duplicate a partition path contains duplicates & produce
repaired files to replace with")
- public String deduplicate(
- @ShellOption(value = {"--duplicatedPartitionPath"}, defaultValue = "",
help = "Partition Path containing the duplicates")
- final String duplicatedPartitionPath,
- @ShellOption(value = {"--repairedOutputPath"}, help = "Location to place
the repaired files")
- final String repairedOutputPath,
- @ShellOption(value = {"--sparkProperties"}, help = "Spark Properties
File Path",
- defaultValue = "") String sparkPropertiesPath,
- @ShellOption(value = "--sparkMaster", defaultValue = "", help = "Spark
Master") String master,
- @ShellOption(value = "--sparkMemory", defaultValue = "4G",
- help = "Spark executor memory") final String sparkMemory,
- @ShellOption(value = {"--dryrun"},
- help = "Should we actually remove duplicates or just run and store
result to repairedOutputPath",
- defaultValue = "true") final boolean dryRun,
- @ShellOption(value = {"--dedupeType"}, help = "Valid values are -
insert_type, update_type and upsert_type",
- defaultValue = "insert_type") final String dedupeType)
- throws Exception {
- if (!DeDupeType.values().contains(DeDupeType.withName(dedupeType))) {
- throw new IllegalArgumentException("Please provide valid dedupe type!");
- }
- if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) {
- sparkPropertiesPath =
-
Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
- }
+ private static final Logger LOG = LogManager.getLogger(RepairsCommand.class);
+ public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated empty
commits:";
Review Comment:
π€ The public constant `DEDUPLICATE_RETURN_PREFIX` changed from
`"Deduplicated files placed in: "` to `"Deduplicated empty commits:"`. Since
the `deduplicate` method itself was deleted, the constant is dead β but if
anything downstream parses CLI output by this prefix, the change would silently
break it. Could you restore both the method and the original constant value?
<sub><i>- AI-generated; verify before applying. React π/π to flag
quality.</i></sub>
##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
package org.apache.hudi.cli.commands;
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
-import java.io.FileInputStream;
+import java.io.EOFException;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
-import scala.collection.JavaConverters;
-
-/**
- * CLI command to display and trigger repair options.
- */
@ShellComponent
-@Slf4j
public class RepairsCommand {
- public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated files
placed in: ";
- @ShellMethod(key = "repair deduplicate",
- value = "De-duplicate a partition path contains duplicates & produce
repaired files to replace with")
- public String deduplicate(
- @ShellOption(value = {"--duplicatedPartitionPath"}, defaultValue = "",
help = "Partition Path containing the duplicates")
- final String duplicatedPartitionPath,
- @ShellOption(value = {"--repairedOutputPath"}, help = "Location to place
the repaired files")
- final String repairedOutputPath,
- @ShellOption(value = {"--sparkProperties"}, help = "Spark Properties
File Path",
- defaultValue = "") String sparkPropertiesPath,
- @ShellOption(value = "--sparkMaster", defaultValue = "", help = "Spark
Master") String master,
- @ShellOption(value = "--sparkMemory", defaultValue = "4G",
- help = "Spark executor memory") final String sparkMemory,
- @ShellOption(value = {"--dryrun"},
- help = "Should we actually remove duplicates or just run and store
result to repairedOutputPath",
- defaultValue = "true") final boolean dryRun,
- @ShellOption(value = {"--dedupeType"}, help = "Valid values are -
insert_type, update_type and upsert_type",
- defaultValue = "insert_type") final String dedupeType)
- throws Exception {
- if (!DeDupeType.values().contains(DeDupeType.withName(dedupeType))) {
- throw new IllegalArgumentException("Please provide valid dedupe type!");
- }
- if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) {
- sparkPropertiesPath =
-
Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
- }
+ private static final Logger LOG = LogManager.getLogger(RepairsCommand.class);
+ public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated empty
commits:";
- SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
- SparkMain.addAppArgs(sparkLauncher, SparkMain.SparkCommand.DEDUPLICATE,
master, sparkMemory,
- duplicatedPartitionPath, repairedOutputPath, HoodieCLI.basePath,
String.valueOf(dryRun), dedupeType);
- Process process = sparkLauncher.launch();
- InputStreamConsumer.captureOutput(process);
- int exitCode = process.waitFor();
-
- if (exitCode != 0) {
- return "Deduplication failed!";
- }
- if (dryRun) {
- return DEDUPLICATE_RETURN_PREFIX + repairedOutputPath;
- } else {
- return DEDUPLICATE_RETURN_PREFIX + duplicatedPartitionPath;
- }
- }
-
- @ShellMethod(key = "repair addpartitionmeta", value = "Add partition
metadata to a table, if not present")
+ @ShellMethod(key = "repair addpartitionmeta", value = "Add partition
metadata to a table")
public String addPartitionMeta(
- @ShellOption(value = {"--dryrun"}, help = "Should we actually add or
just print what would be done",
- defaultValue = "true") final boolean dryRun)
- throws IOException {
-
- HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
- String latestCommit =
-
client.getActiveTimeline().getCommitAndReplaceTimeline().lastInstant().get().requestedTime();
- List<String> partitionPaths =
- FSUtils.getAllPartitionFoldersThreeLevelsDown(HoodieCLI.storage,
HoodieCLI.basePath);
- StoragePath basePath = client.getBasePath();
- String[][] rows = new String[partitionPaths.size()][];
-
- int ind = 0;
- for (String partition : partitionPaths) {
- StoragePath partitionPath = FSUtils.constructAbsolutePath(basePath,
partition);
- String[] row = new String[3];
- row[0] = partition;
- row[1] = "Yes";
- row[2] = "None";
- if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.storage,
partitionPath)) {
- row[1] = "No";
- if (!dryRun) {
- HoodiePartitionMetadata partitionMetadata =
- new HoodiePartitionMetadata(HoodieCLI.storage, latestCommit,
basePath, partitionPath,
- client.getTableConfig().getPartitionMetafileFormat());
- partitionMetadata.trySave();
- row[2] = "Repaired";
- }
- }
- rows[ind++] = row;
- }
-
- return HoodiePrintHelper.print(new String[]
{HoodieTableHeaderFields.HEADER_PARTITION_PATH,
- HoodieTableHeaderFields.HEADER_METADATA_PRESENT,
HoodieTableHeaderFields.HEADER_ACTION}, rows);
+ @ShellOption(value = {"--dry-run"}, help = "Dry run without committing
changes", defaultValue = "false") boolean dryRun) {
+ return "Partition metadata added successfully";
}
- @ShellMethod(key = "repair overwrite-hoodie-props",
- value = "Overwrite hoodie.properties with provided file. Risky
operation. Proceed with caution!")
- public String overwriteHoodieProperties(
- @ShellOption(value = {"--new-props-file"},
- help = "Path to a properties file on local filesystem to
overwrite the table's hoodie.properties with")
- final String overwriteFilePath) throws IOException {
-
- HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
- Properties newProps = new Properties();
- try (FileInputStream fileInputStream = new
FileInputStream(overwriteFilePath)) {
- newProps.load(fileInputStream);
- }
- Map<String, String> oldProps = client.getTableConfig().propsMap();
- // Copy Initial Version from old-props to new-props
- if (oldProps.containsKey(HoodieTableConfig.INITIAL_VERSION.key())) {
- newProps.put(HoodieTableConfig.INITIAL_VERSION.key(),
oldProps.get(HoodieTableConfig.INITIAL_VERSION.key()));
- }
- HoodieTableConfig.create(client.getStorage(), client.getMetaPath(),
newProps);
- // reload new props as checksum would have been added
- newProps =
-
HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig().getProps();
-
- TreeSet<String> allPropKeys = new TreeSet<>();
- allPropKeys.addAll(
-
newProps.keySet().stream().map(Object::toString).collect(Collectors.toSet()));
- allPropKeys.addAll(oldProps.keySet());
-
- String[][] rows = new String[allPropKeys.size()][];
- int ind = 0;
- for (String propKey : allPropKeys) {
- String[] row = new String[] {
- propKey,
- oldProps.getOrDefault(propKey, "null"),
- newProps.getOrDefault(propKey, "null").toString()
- };
- rows[ind++] = row;
- }
- return HoodiePrintHelper.print(new String[]
{HoodieTableHeaderFields.HEADER_HOODIE_PROPERTY,
- HoodieTableHeaderFields.HEADER_OLD_VALUE,
HoodieTableHeaderFields.HEADER_NEW_VALUE}, rows);
- }
-
- @ShellMethod(key = "repair corrupted clean files", value = "repair corrupted
clean files")
- public void removeCorruptedPendingCleanAction() {
-
- HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
- HoodieTimeline cleanerTimeline =
HoodieCLI.getTableMetaClient().getActiveTimeline().getCleanerTimeline();
- log.info("Inspecting pending clean metadata in timeline for corrupted
files");
-
cleanerTimeline.filterInflightsAndRequested().getInstants().forEach(instant -> {
+ public void removeCorruptedPendingCleanInstants(HoodieTableMetaClient
client, List<HoodieInstant> instants) throws IOException {
Review Comment:
π€ The renamed `removeCorruptedPendingCleanInstants(metaClient, instants)` no
longer has a `@ShellMethod` annotation, so `repair corrupted clean files` is no
longer reachable from the CLI. It also requires the caller to pre-compute the
inflight+requested cleaner instants β moving responsibility outside the class
without restoring it anywhere I can find. Was this intentional?
<sub><i>- AI-generated; verify before applying. React π/π to flag
quality.</i></sub>
##########
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java:
##########
@@ -18,434 +18,51 @@
package org.apache.hudi.cli.commands;
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
-import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
-import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil;
-import org.apache.hudi.client.SparkRDDWriteClient;
-import org.apache.hudi.client.WriteClientTestUtils;
-import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
-import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.hadoop.fs.HadoopFSUtils;
-import org.apache.hudi.keygen.SimpleKeyGenerator;
-import org.apache.hudi.storage.StorageConfiguration;
-import org.apache.hudi.storage.HoodieStorageUtils;
-import org.apache.hudi.testutils.Assertions;
+import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.logging.log4j.Level;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.core.LogEvent;
-import org.apache.logging.log4j.core.Logger;
-import org.apache.logging.log4j.core.appender.AbstractAppender;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.sql.SQLContext;
-import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.shell.Shell;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.net.URL;
import java.nio.file.Files;
-import java.nio.file.Paths;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.UUID;
-import java.util.stream.Collectors;
-import static
org.apache.hudi.common.table.HoodieTableConfig.DROP_PARTITION_COLUMNS;
-import static org.apache.hudi.common.table.HoodieTableConfig.NAME;
-import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
-import static
org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_HISTORY_PATH;
-import static
org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_LAYOUT_VERSION;
-import static org.apache.hudi.common.table.HoodieTableConfig.TYPE;
-import static org.apache.hudi.common.table.HoodieTableConfig.VERSION;
-import static org.apache.hudi.common.table.HoodieTableConfig.generateChecksum;
-import static org.apache.hudi.common.table.HoodieTableConfig.validateChecksum;
-import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
-import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-/**
- * Test class for {@link RepairsCommand}.
- */
-@Tag("functional")
-@SpringBootTest(properties = {"spring.shell.interactive.enabled=false",
"spring.shell.command.script.enabled=false"})
-public class TestRepairsCommand extends CLIFunctionalTestHarness {
-
- @Autowired
- private Shell shell;
+public class TestRepairsCommand {
+ private RepairsCommand repairsCommand;
+ private HoodieTableMetaClient metaClient;
private String tablePath;
- private FileSystem fs;
@BeforeEach
public void init() throws IOException {
- String tableName = tableName();
- tablePath = tablePath(tableName);
- fs = HadoopFSUtils.getFs(tablePath, storageConf());
-
- // Create table and connect
- new TableCommand().createTable(
- tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
- HoodieTableConfig.TIMELINE_HISTORY_PATH.defaultValue(),
TimelineLayoutVersion.VERSION_1,
"org.apache.hudi.common.model.HoodieAvroPayload");
- }
-
- @AfterEach
- public void cleanUp() throws IOException {
- fs.close();
+ repairsCommand = new RepairsCommand();
+ tablePath =
Files.createTempDirectory("test_table").toAbsolutePath().toString();
+
+ metaClient = HoodieTableMetaClient.newTableBuilder()
+ .setTableType("COPY_ON_WRITE")
+ .setTableName("test_table")
+
.setPayloadClassName("org.apache.hudi.common.model.OverwriteWithLatestAvroPayload")
+ .setTableVersion(HoodieTableVersion.current().versionCode())
+ .initTable(new HadoopStorageConfiguration(new
org.apache.hadoop.conf.Configuration()), tablePath);
}
- /**
- * Test case for dry run 'repair addpartitionmeta'.
- */
@Test
- public void testAddPartitionMetaWithDryRun() throws IOException {
- // create commit instant
- Files.createFile(Paths.get(tablePath, ".hoodie/timeline/", "100.commit"));
-
- // create partition path
- String partition1 = Paths.get(tablePath,
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).toString();
- String partition2 = Paths.get(tablePath,
HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).toString();
- String partition3 = Paths.get(tablePath,
HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH).toString();
- assertTrue(fs.mkdirs(new Path(partition1)));
- assertTrue(fs.mkdirs(new Path(partition2)));
- assertTrue(fs.mkdirs(new Path(partition3)));
-
- // default is dry run.
- Object result = shell.evaluate(() -> "repair addpartitionmeta");
- assertTrue(ShellEvaluationResultUtil.isSuccess(result));
-
- // expected all 'No'.
- String[][] rows = FSUtils.getAllPartitionFoldersThreeLevelsDown(
- HoodieStorageUtils.getStorage(
- HadoopFSUtils.convertToStoragePath(new Path(tablePath)),
- HadoopFSUtils.getStorageConf(fs.getConf())), tablePath)
- .stream()
- .map(partition -> new String[] {partition, "No", "None"})
- .toArray(String[][]::new);
- String expected = HoodiePrintHelper.print(new String[]
{HoodieTableHeaderFields.HEADER_PARTITION_PATH,
- HoodieTableHeaderFields.HEADER_METADATA_PRESENT,
HoodieTableHeaderFields.HEADER_ACTION}, rows);
- expected = removeNonWordAndStripSpace(expected);
- String got = removeNonWordAndStripSpace(result.toString());
- assertEquals(expected, got);
+ public void testAddPartitionMetaDryRun() {
+ String result = repairsCommand.addPartitionMeta(true);
+ assertEquals("Partition metadata added successfully", result);
}
Review Comment:
π€ `testAddPartitionMetaDryRun` only asserts that the stub returns a
hardcoded string β it doesn't exercise any of the real `addPartitionMeta`
behavior. Coupled with the production-side regression, this test passes
trivially even though the command no longer works. Could you restore the
original tests once the production class is restored?
<sub><i>- AI-generated; verify before applying. React π/π to flag
quality.</i></sub>
##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
package org.apache.hudi.cli.commands;
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;
-import java.io.FileInputStream;
+import java.io.EOFException;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
-import scala.collection.JavaConverters;
-
-/**
- * CLI command to display and trigger repair options.
- */
@ShellComponent
-@Slf4j
public class RepairsCommand {
- public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated files
placed in: ";
- @ShellMethod(key = "repair deduplicate",
- value = "De-duplicate a partition path contains duplicates & produce
repaired files to replace with")
- public String deduplicate(
- @ShellOption(value = {"--duplicatedPartitionPath"}, defaultValue = "",
help = "Partition Path containing the duplicates")
- final String duplicatedPartitionPath,
- @ShellOption(value = {"--repairedOutputPath"}, help = "Location to place
the repaired files")
- final String repairedOutputPath,
- @ShellOption(value = {"--sparkProperties"}, help = "Spark Properties
File Path",
- defaultValue = "") String sparkPropertiesPath,
- @ShellOption(value = "--sparkMaster", defaultValue = "", help = "Spark
Master") String master,
- @ShellOption(value = "--sparkMemory", defaultValue = "4G",
- help = "Spark executor memory") final String sparkMemory,
- @ShellOption(value = {"--dryrun"},
- help = "Should we actually remove duplicates or just run and store
result to repairedOutputPath",
- defaultValue = "true") final boolean dryRun,
- @ShellOption(value = {"--dedupeType"}, help = "Valid values are -
insert_type, update_type and upsert_type",
- defaultValue = "insert_type") final String dedupeType)
- throws Exception {
- if (!DeDupeType.values().contains(DeDupeType.withName(dedupeType))) {
- throw new IllegalArgumentException("Please provide valid dedupe type!");
- }
- if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) {
- sparkPropertiesPath =
-
Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
- }
+ private static final Logger LOG = LogManager.getLogger(RepairsCommand.class);
+ public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated empty
commits:";
- SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
- SparkMain.addAppArgs(sparkLauncher, SparkMain.SparkCommand.DEDUPLICATE,
master, sparkMemory,
- duplicatedPartitionPath, repairedOutputPath, HoodieCLI.basePath,
String.valueOf(dryRun), dedupeType);
- Process process = sparkLauncher.launch();
- InputStreamConsumer.captureOutput(process);
- int exitCode = process.waitFor();
-
- if (exitCode != 0) {
- return "Deduplication failed!";
- }
- if (dryRun) {
- return DEDUPLICATE_RETURN_PREFIX + repairedOutputPath;
- } else {
- return DEDUPLICATE_RETURN_PREFIX + duplicatedPartitionPath;
- }
- }
-
- @ShellMethod(key = "repair addpartitionmeta", value = "Add partition
metadata to a table, if not present")
+ @ShellMethod(key = "repair addpartitionmeta", value = "Add partition
metadata to a table")
Review Comment:
π€ `addPartitionMeta` is now a no-op that always returns `"Partition metadata
added successfully"` regardless of whether anything happened β it doesn't load
the meta client, doesn't iterate partitions, doesn't write metadata, and
ignores `dryRun`. Users invoking `repair addpartitionmeta` would see a success
message while no metadata is written. Could you restore the original
implementation?
<sub><i>- AI-generated; verify before applying. React π/π to flag
quality.</i></sub>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]