sunchao commented on a change in pull request #34308:
URL: https://github.com/apache/spark/pull/34308#discussion_r732051021



##########
File path: 
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetDictionary.java
##########
@@ -23,54 +23,85 @@
 
 public final class ParquetDictionary implements Dictionary {
   private org.apache.parquet.column.Dictionary dictionary;
+  private String  file;

Review comment:
       nit: extra space

##########
File path: 
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetDictionary.java
##########
@@ -23,54 +23,85 @@
 
 public final class ParquetDictionary implements Dictionary {
   private org.apache.parquet.column.Dictionary dictionary;
+  private String  file;
   private boolean needTransform = false;
 
-  public ParquetDictionary(org.apache.parquet.column.Dictionary dictionary, 
boolean needTransform) {
+  public ParquetDictionary(
+      org.apache.parquet.column.Dictionary dictionary,
+      String file,
+      boolean needTransform) {
     this.dictionary = dictionary;
+    this.file = file;
     this.needTransform = needTransform;
   }
 
   @Override
   public int decodeToInt(int id) {
-    if (needTransform) {
-      return (int) dictionary.decodeToLong(id);
-    } else {
-      return dictionary.decodeToInt(id);
+    try {
+      if (needTransform) {
+        return (int) dictionary.decodeToLong(id);
+      } else {
+        return dictionary.decodeToInt(id);
+      }
+    } catch (UnsupportedOperationException e) {
+      throw new UnsupportedOperationException("Decoding to Int failed when 
reading file " +
+        file + "using " + e.getMessage(), e.getCause());
     }
   }
 
   @Override
   public long decodeToLong(int id) {
-    if (needTransform) {
-      // For unsigned int32, it stores as dictionary encoded signed int32 in 
Parquet
-      // whenever dictionary is available.
-      // Here we lazily decode it to the original signed int value then 
convert to long(unit32).
-      return Integer.toUnsignedLong(dictionary.decodeToInt(id));
-    } else {
-      return dictionary.decodeToLong(id);
+    try {
+      if (needTransform) {
+        // For unsigned int32, it stores as dictionary encoded signed int32 in 
Parquet
+        // whenever dictionary is available.
+        // Here we lazily decode it to the original signed int value then 
convert to long(unit32).
+        return Integer.toUnsignedLong(dictionary.decodeToInt(id));
+      } else {
+        return dictionary.decodeToLong(id);
+      }
+    } catch (UnsupportedOperationException e) {
+      throw new UnsupportedOperationException("Decoding to Long failed when 
reading file " +
+        file + "using " + e.getMessage(), e.getCause());
     }
   }
 
   @Override
   public float decodeToFloat(int id) {
-    return dictionary.decodeToFloat(id);
+    try {
+      return dictionary.decodeToFloat(id);
+    } catch (UnsupportedOperationException e) {
+      throw new UnsupportedOperationException("Decoding to Float failed when 
reading file " +

Review comment:
       ditto

##########
File path: 
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
##########
@@ -350,7 +350,8 @@ private void checkEndOfRowGroup() throws IOException {
         pages.getRowIndexes().orElse(null),
         convertTz,
         datetimeRebaseMode,
-        int96RebaseMode);
+        int96RebaseMode,
+        file == null? "null": file.toString());

Review comment:
       hm when `file` will be null?

##########
File path: 
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetDictionary.java
##########
@@ -23,54 +23,85 @@
 
 public final class ParquetDictionary implements Dictionary {
   private org.apache.parquet.column.Dictionary dictionary;
+  private String  file;
   private boolean needTransform = false;
 
-  public ParquetDictionary(org.apache.parquet.column.Dictionary dictionary, 
boolean needTransform) {
+  public ParquetDictionary(
+      org.apache.parquet.column.Dictionary dictionary,
+      String file,
+      boolean needTransform) {
     this.dictionary = dictionary;
+    this.file = file;
     this.needTransform = needTransform;
   }
 
   @Override
   public int decodeToInt(int id) {
-    if (needTransform) {
-      return (int) dictionary.decodeToLong(id);
-    } else {
-      return dictionary.decodeToInt(id);
+    try {
+      if (needTransform) {
+        return (int) dictionary.decodeToLong(id);
+      } else {
+        return dictionary.decodeToInt(id);
+      }
+    } catch (UnsupportedOperationException e) {
+      throw new UnsupportedOperationException("Decoding to Int failed when 
reading file " +

Review comment:
       nit: maybe change this to `"Decoding to Int is not supported by " + 
dictionary.getClass().getSimpleName() + " while reading file " + file"`
   
   Also maybe it's better to add this to `QueryExecutionErrors`?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to