This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.1.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 90a886a153ade08461e567a1c8f05de199affa2e Author: Vamshi Krishna Kyatham <[email protected]> AuthorDate: Sun Oct 26 07:18:29 2025 -0700 fix: updating error messages thrown to end users (#14115) --------- Co-authored-by: sivabalan <[email protected]> --- .../hudi/common/table/HoodieTableVersion.java | 9 ++++++++- .../hudi/io/hadoop/HoodieBaseParquetWriter.java | 4 ++++ .../main/java/org/apache/hudi/DataSourceUtils.java | 15 +++++++++++++- .../hudi/hive/util/IMetaStoreClientUtil.java | 23 ++++++++++++++++++++-- .../apache/hudi/sync/common/HoodieSyncClient.java | 19 ++++++++++++++++-- .../apache/hudi/utilities/sources/InputBatch.java | 7 ++++++- .../deltastreamer/TestHoodieDeltaStreamer.java | 6 +++++- .../hudi/utilities/sources/TestInputBatch.java | 6 +++++- 8 files changed, 80 insertions(+), 9 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java index 08ec668f20b1..945ed2366607 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java @@ -78,7 +78,14 @@ public enum HoodieTableVersion { public static HoodieTableVersion fromVersionCode(int versionCode) { return Arrays.stream(HoodieTableVersion.values()) .filter(v -> v.versionCode == versionCode).findAny() - .orElseThrow(() -> new HoodieException("Unknown table versionCode:" + versionCode)); + .orElseThrow(() -> new HoodieException( + String.format( + "Unsupported table version code: %d.%n%n" + + "This table version is not recognized by this Hudi version.%n%n" + + "This can happen if:%n" + + " (1) The table was created with a newer version of Hudi (upgrade your readers),%n" + + " (2) The table metadata is corrupted.%n%n" + + "See: https://hudi.apache.org/docs/migration_guide for more information", versionCode))); } public static HoodieTableVersion fromReleaseVersion(String releaseVersion) { diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java index ed20f0bca114..24ad77a6c135 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java @@ -150,6 +150,10 @@ public abstract class HoodieBaseParquetWriter<R> implements Closeable { writtenRecordCount.incrementAndGet(); } + private static boolean isRequiredFieldNullError(String errorMessage) { + return errorMessage.contains("null") && errorMessage.contains("required"); + } + protected long getWrittenRecordCount() { return writtenRecordCount.get(); } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java index db32fb13f6b7..fe05c4281761 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -323,7 +323,20 @@ public class DataSourceUtils { if (totalErroredRecords > 0) { hasErrored.set(true); ValidationUtils.checkArgument(writeStatusesOpt.isPresent(), "RDD <WriteStatus> expected to be present when there are errors"); - LOG.error("{} failed with errors", writeOperationType); + long errorCount = HoodieJavaRDD.getJavaRDD(writeStatusesOpt.get()) + .filter(WriteStatus::hasErrors) + .count(); + + String errorSummary = String.format( + "%s operation failed with %d error(s).%n%n" + + "Total write statuses with errors: %d%n%n" + + "Check the driver logs for error stacktraces which provide more information on the failure.", + writeOperationType, + totalErroredRecords, + errorCount); + + LOG.error(errorSummary); + if (LOG.isTraceEnabled()) { LOG.trace("Printing out the top 100 errors"); diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java index c2cadd5b1159..875d7ab388ef 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hudi.exception.HoodieException; import java.lang.reflect.InvocationTargetException; @@ -39,8 +40,26 @@ public class IMetaStoreClientUtil { try { metaStoreClient = ((Hive) Hive.class.getMethod("getWithoutRegisterFns", HiveConf.class).invoke(null, hiveConf)).getMSC(); } catch (NoSuchMethodException | IllegalAccessException | IllegalArgumentException - | InvocationTargetException ex) { - metaStoreClient = Hive.get(hiveConf).getMSC(); + | InvocationTargetException ex) { + try { + metaStoreClient = Hive.get(hiveConf).getMSC(); + } catch (RuntimeException e) { + if (e.getMessage() != null && e.getMessage().contains("not org.apache.hudi.org.apache.hadoop")) { + throw new HoodieException( + String.format( + "Hive Metastore compatibility issue detected. This usually happens due to:%n" + + " (1) Hive version mismatch,%n" + + " (2) Conflicting Hive libraries in classpath,%n" + + " (3) Incompatible hudi-spark-bundle version.%n%n" + + "To resolve:%n" + + " - For Hive 2.x use hudi-spark-bundle with 'hive2' classifier,%n" + + " - For Hive 3.x use hudi-spark-bundle with 'hive3' classifier,%n" + + " - Ensure no conflicting Hive jars in Spark classpath.%n%n" + + "Check: https://hudi.apache.org/docs/syncing_metastore.%n%n" + + "Technical details: %s", e.getMessage()), e); + } + throw e; + } } return metaStoreClient; } diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java index be5b76764783..c696ba222c64 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java @@ -112,7 +112,7 @@ public abstract class HoodieSyncClient implements HoodieMetaSyncOperations, Auto try { return tableSchemaResolver.getTableParquetSchema(); } catch (Exception e) { - throw new HoodieSyncException("Failed to read schema from storage.", e); + throw new HoodieSyncException(buildSchemaReadErrorMessage(e), e); } } @@ -121,10 +121,25 @@ public abstract class HoodieSyncClient implements HoodieMetaSyncOperations, Auto try { return tableSchemaResolver.getTableParquetSchema(includeMetadataField); } catch (Exception e) { - throw new HoodieSyncException("Failed to read schema from storage.", e); + throw new HoodieSyncException(buildSchemaReadErrorMessage(e), e); } } + private String buildSchemaReadErrorMessage(Exception e) { + String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getName(); + if (e instanceof java.io.FileNotFoundException) { + return String.format( + "Cannot read Hudi table schema.%n%n" + + "Required data file missing .%n%n" + + "This indicates:%n" + + " 1. Aggressive cleaner retention compared to query run times\n" + + " 2. Manual file deletions (timeline files or data files)\n" + + " 3. Concurrent writers without proper locking or configurations set\n\n" + + "Original error: %s", errorMessage); + } + return String.format("Failed to read schema from storage.%nError: %s", errorMessage); + } + /** * Gets all relative partitions paths in the Hudi table on storage. * diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java index 54879f72b7bb..32f11cb5b818 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java @@ -63,7 +63,12 @@ public class InputBatch<T> { public SchemaProvider getSchemaProvider() { if (batch.isPresent() && schemaProvider == null) { - throw new HoodieException("Please provide a valid schema provider class!"); + throw new HoodieException( + "Schema provider is required for this operation and for the source of interest. " + + "Please set '--schemaprovider-class' in the top level HoodieStreamer config for the source of interest. " + + "Based on the schema provider class chosen, additional configs might be required. " + + "For eg, if you choose 'org.apache.hudi.utilities.schema.SchemaRegistryProvider', " + + "you may need to set configs like 'hoodie.streamer.schemaprovider.registry.url'."); } return Option.ofNullable(schemaProvider).orElseGet(NullSchemaProvider::getInstance); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java index 89781741e39f..4d6729bd2da4 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java @@ -2102,7 +2102,11 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase { new HoodieDeltaStreamer(cfg, jsc, fs, hiveServer.getHiveConf()).sync(); }, "Should error out when schema provider is not provided"); LOG.debug("Expected error during reading data from source ", e); - assertTrue(e.getMessage().contains("Please provide a valid schema provider class!")); + assertTrue(e.getMessage().contains("Schema provider is required for this operation and for the source of interest. " + + "Please set '--schemaprovider-class' in the top level HoodieStreamer config for the source of interest. " + + "Based on the schema provider class chosen, additional configs might be required. " + + "For eg, if you choose 'org.apache.hudi.utilities.schema.SchemaRegistryProvider', " + + "you may need to set configs like 'hoodie.streamer.schemaprovider.registry.url'.")); } @Test diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java index d5d03e246959..f7bb36076a7a 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java @@ -36,7 +36,11 @@ public class TestInputBatch { public void getSchemaProviderShouldThrowException() { final InputBatch<String> inputBatch = new InputBatch<>(Option.of("foo"), (String) null, null); Throwable t = assertThrows(HoodieException.class, inputBatch::getSchemaProvider); - assertEquals("Please provide a valid schema provider class!", t.getMessage()); + assertEquals("Schema provider is required for this operation and for the source of interest. " + + "Please set '--schemaprovider-class' in the top level HoodieStreamer config for the source of interest. " + + "Based on the schema provider class chosen, additional configs might be required. " + + "For eg, if you choose 'org.apache.hudi.utilities.schema.SchemaRegistryProvider', " + + "you may need to set configs like 'hoodie.streamer.schemaprovider.registry.url'.", t.getMessage()); } @Test
