This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-1.1.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 90a886a153ade08461e567a1c8f05de199affa2e
Author: Vamshi Krishna Kyatham 
<[email protected]>
AuthorDate: Sun Oct 26 07:18:29 2025 -0700

    fix: updating error messages thrown to end users (#14115)
    
    
    ---------
    
    Co-authored-by: sivabalan <[email protected]>
---
 .../hudi/common/table/HoodieTableVersion.java      |  9 ++++++++-
 .../hudi/io/hadoop/HoodieBaseParquetWriter.java    |  4 ++++
 .../main/java/org/apache/hudi/DataSourceUtils.java | 15 +++++++++++++-
 .../hudi/hive/util/IMetaStoreClientUtil.java       | 23 ++++++++++++++++++++--
 .../apache/hudi/sync/common/HoodieSyncClient.java  | 19 ++++++++++++++++--
 .../apache/hudi/utilities/sources/InputBatch.java  |  7 ++++++-
 .../deltastreamer/TestHoodieDeltaStreamer.java     |  6 +++++-
 .../hudi/utilities/sources/TestInputBatch.java     |  6 +++++-
 8 files changed, 80 insertions(+), 9 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
index 08ec668f20b1..945ed2366607 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
@@ -78,7 +78,14 @@ public enum HoodieTableVersion {
   public static HoodieTableVersion fromVersionCode(int versionCode) {
     return Arrays.stream(HoodieTableVersion.values())
         .filter(v -> v.versionCode == versionCode).findAny()
-        .orElseThrow(() -> new HoodieException("Unknown table versionCode:" + 
versionCode));
+        .orElseThrow(() -> new HoodieException(
+            String.format(
+                "Unsupported table version code: %d.%n%n"
+                    + "This table version is not recognized by this Hudi 
version.%n%n"
+                    + "This can happen if:%n"
+                    + "  (1) The table was created with a newer version of 
Hudi (upgrade your readers),%n"
+                    + "  (2) The table metadata is corrupted.%n%n"
+                    + "See: https://hudi.apache.org/docs/migration_guide for 
more information", versionCode)));
   }
 
   public static HoodieTableVersion fromReleaseVersion(String releaseVersion) {
diff --git 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java
 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java
index ed20f0bca114..24ad77a6c135 100644
--- 
a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java
+++ 
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java
@@ -150,6 +150,10 @@ public abstract class HoodieBaseParquetWriter<R> 
implements Closeable {
     writtenRecordCount.incrementAndGet();
   }
 
+  private static boolean isRequiredFieldNullError(String errorMessage) {
+    return errorMessage.contains("null") && errorMessage.contains("required");
+  }
+
   protected long getWrittenRecordCount() {
     return writtenRecordCount.get();
   }
diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
 
b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
index db32fb13f6b7..fe05c4281761 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -323,7 +323,20 @@ public class DataSourceUtils {
       if (totalErroredRecords > 0) {
         hasErrored.set(true);
         ValidationUtils.checkArgument(writeStatusesOpt.isPresent(), "RDD 
<WriteStatus> expected to be present when there are errors");
-        LOG.error("{} failed with errors", writeOperationType);
+        long errorCount = HoodieJavaRDD.getJavaRDD(writeStatusesOpt.get())
+            .filter(WriteStatus::hasErrors)
+            .count();
+
+        String errorSummary = String.format(
+            "%s operation failed with %d error(s).%n%n"
+                + "Total write statuses with errors: %d%n%n"
+                + "Check the driver logs for error stacktraces which provide 
more information on the failure.",
+            writeOperationType,
+            totalErroredRecords,
+            errorCount);
+
+        LOG.error(errorSummary);
+
         if (LOG.isTraceEnabled()) {
           LOG.trace("Printing out the top 100 errors");
 
diff --git 
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java
 
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java
index c2cadd5b1159..875d7ab388ef 100644
--- 
a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java
+++ 
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/IMetaStoreClientUtil.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hudi.exception.HoodieException;
 
 import java.lang.reflect.InvocationTargetException;
 
@@ -39,8 +40,26 @@ public class IMetaStoreClientUtil {
     try {
       metaStoreClient = ((Hive) Hive.class.getMethod("getWithoutRegisterFns", 
HiveConf.class).invoke(null, hiveConf)).getMSC();
     } catch (NoSuchMethodException | IllegalAccessException | 
IllegalArgumentException
-      | InvocationTargetException ex) {
-      metaStoreClient = Hive.get(hiveConf).getMSC();
+        | InvocationTargetException ex) {
+      try {
+        metaStoreClient = Hive.get(hiveConf).getMSC();
+      } catch (RuntimeException e) {
+        if (e.getMessage() != null && e.getMessage().contains("not 
org.apache.hudi.org.apache.hadoop")) {
+          throw new HoodieException(
+              String.format(
+                  "Hive Metastore compatibility issue detected. This usually 
happens due to:%n"
+                      + "  (1) Hive version mismatch,%n"
+                      + "  (2) Conflicting Hive libraries in classpath,%n"
+                      + "  (3) Incompatible hudi-spark-bundle version.%n%n"
+                      + "To resolve:%n"
+                      + "  - For Hive 2.x use hudi-spark-bundle with 'hive2' 
classifier,%n"
+                      + "  - For Hive 3.x use hudi-spark-bundle with 'hive3' 
classifier,%n"
+                      + "  - Ensure no conflicting Hive jars in Spark 
classpath.%n%n"
+                      + "Check: 
https://hudi.apache.org/docs/syncing_metastore.%n%n";
+                      + "Technical details: %s", e.getMessage()), e);
+        }
+        throw e;
+      }
     }
     return metaStoreClient;
   }
diff --git 
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
 
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
index be5b76764783..c696ba222c64 100644
--- 
a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
+++ 
b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java
@@ -112,7 +112,7 @@ public abstract class HoodieSyncClient implements 
HoodieMetaSyncOperations, Auto
     try {
       return tableSchemaResolver.getTableParquetSchema();
     } catch (Exception e) {
-      throw new HoodieSyncException("Failed to read schema from storage.", e);
+      throw new HoodieSyncException(buildSchemaReadErrorMessage(e), e);
     }
   }
 
@@ -121,10 +121,25 @@ public abstract class HoodieSyncClient implements 
HoodieMetaSyncOperations, Auto
     try {
       return tableSchemaResolver.getTableParquetSchema(includeMetadataField);
     } catch (Exception e) {
-      throw new HoodieSyncException("Failed to read schema from storage.", e);
+      throw new HoodieSyncException(buildSchemaReadErrorMessage(e), e);
     }
   }
 
+  private String buildSchemaReadErrorMessage(Exception e) {
+    String errorMessage = e.getMessage() != null ? e.getMessage() : 
e.getClass().getName();
+    if (e instanceof java.io.FileNotFoundException) {
+      return String.format(
+          "Cannot read Hudi table schema.%n%n"
+              + "Required data file missing .%n%n"
+              + "This indicates:%n"
+              + "  1. Aggressive cleaner retention compared to query run 
times\n"
+              + "  2. Manual file deletions (timeline files or data files)\n"
+              + "  3. Concurrent writers without proper locking or 
configurations set\n\n"
+              + "Original error: %s", errorMessage);
+    }
+    return String.format("Failed to read schema from storage.%nError: %s", 
errorMessage);
+  }
+
   /**
    * Gets all relative partitions paths in the Hudi table on storage.
    *
diff --git 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java
 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java
index 54879f72b7bb..32f11cb5b818 100644
--- 
a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java
+++ 
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java
@@ -63,7 +63,12 @@ public class InputBatch<T> {
 
   public SchemaProvider getSchemaProvider() {
     if (batch.isPresent() && schemaProvider == null) {
-      throw new HoodieException("Please provide a valid schema provider 
class!");
+      throw new HoodieException(
+          "Schema provider is required for this operation and for the source 
of interest. "
+              + "Please set '--schemaprovider-class' in the top level 
HoodieStreamer config for the source of interest. "
+              + "Based on the schema provider class chosen, additional configs 
might be required. "
+              + "For eg, if you choose 
'org.apache.hudi.utilities.schema.SchemaRegistryProvider', "
+              + "you may need to set configs like 
'hoodie.streamer.schemaprovider.registry.url'.");
     }
     return 
Option.ofNullable(schemaProvider).orElseGet(NullSchemaProvider::getInstance);
   }
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
index 89781741e39f..4d6729bd2da4 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java
@@ -2102,7 +2102,11 @@ public class TestHoodieDeltaStreamer extends 
HoodieDeltaStreamerTestBase {
       new HoodieDeltaStreamer(cfg, jsc, fs, hiveServer.getHiveConf()).sync();
     }, "Should error out when schema provider is not provided");
     LOG.debug("Expected error during reading data from source ", e);
-    assertTrue(e.getMessage().contains("Please provide a valid schema provider 
class!"));
+    assertTrue(e.getMessage().contains("Schema provider is required for this 
operation and for the source of interest. "
+        + "Please set '--schemaprovider-class' in the top level HoodieStreamer 
config for the source of interest. "
+        + "Based on the schema provider class chosen, additional configs might 
be required. "
+        + "For eg, if you choose 
'org.apache.hudi.utilities.schema.SchemaRegistryProvider', "
+        + "you may need to set configs like 
'hoodie.streamer.schemaprovider.registry.url'."));
   }
 
   @Test
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java
 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java
index d5d03e246959..f7bb36076a7a 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestInputBatch.java
@@ -36,7 +36,11 @@ public class TestInputBatch {
   public void getSchemaProviderShouldThrowException() {
     final InputBatch<String> inputBatch = new InputBatch<>(Option.of("foo"), 
(String) null, null);
     Throwable t = assertThrows(HoodieException.class, 
inputBatch::getSchemaProvider);
-    assertEquals("Please provide a valid schema provider class!", 
t.getMessage());
+    assertEquals("Schema provider is required for this operation and for the 
source of interest. "
+        + "Please set '--schemaprovider-class' in the top level HoodieStreamer 
config for the source of interest. "
+        + "Based on the schema provider class chosen, additional configs might 
be required. "
+        + "For eg, if you choose 
'org.apache.hudi.utilities.schema.SchemaRegistryProvider', "
+        + "you may need to set configs like 
'hoodie.streamer.schemaprovider.registry.url'.", t.getMessage());
   }
 
   @Test

Reply via email to