sunchao commented on a change in pull request #34308:
URL: https://github.com/apache/spark/pull/34308#discussion_r732051021
##########
File path:
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetDictionary.java
##########
@@ -23,54 +23,85 @@
public final class ParquetDictionary implements Dictionary {
private org.apache.parquet.column.Dictionary dictionary;
+ private String file;
Review comment:
nit: extra space
##########
File path:
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetDictionary.java
##########
@@ -23,54 +23,85 @@
public final class ParquetDictionary implements Dictionary {
private org.apache.parquet.column.Dictionary dictionary;
+ private String file;
private boolean needTransform = false;
- public ParquetDictionary(org.apache.parquet.column.Dictionary dictionary,
boolean needTransform) {
+ public ParquetDictionary(
+ org.apache.parquet.column.Dictionary dictionary,
+ String file,
+ boolean needTransform) {
this.dictionary = dictionary;
+ this.file = file;
this.needTransform = needTransform;
}
@Override
public int decodeToInt(int id) {
- if (needTransform) {
- return (int) dictionary.decodeToLong(id);
- } else {
- return dictionary.decodeToInt(id);
+ try {
+ if (needTransform) {
+ return (int) dictionary.decodeToLong(id);
+ } else {
+ return dictionary.decodeToInt(id);
+ }
+ } catch (UnsupportedOperationException e) {
+ throw new UnsupportedOperationException("Decoding to Int failed when
reading file " +
+ file + "using " + e.getMessage(), e.getCause());
}
}
@Override
public long decodeToLong(int id) {
- if (needTransform) {
- // For unsigned int32, it stores as dictionary encoded signed int32 in
Parquet
- // whenever dictionary is available.
- // Here we lazily decode it to the original signed int value then
convert to long(unit32).
- return Integer.toUnsignedLong(dictionary.decodeToInt(id));
- } else {
- return dictionary.decodeToLong(id);
+ try {
+ if (needTransform) {
+ // For unsigned int32, it stores as dictionary encoded signed int32 in
Parquet
+ // whenever dictionary is available.
+ // Here we lazily decode it to the original signed int value then
convert to long(unit32).
+ return Integer.toUnsignedLong(dictionary.decodeToInt(id));
+ } else {
+ return dictionary.decodeToLong(id);
+ }
+ } catch (UnsupportedOperationException e) {
+ throw new UnsupportedOperationException("Decoding to Long failed when
reading file " +
+ file + "using " + e.getMessage(), e.getCause());
}
}
@Override
public float decodeToFloat(int id) {
- return dictionary.decodeToFloat(id);
+ try {
+ return dictionary.decodeToFloat(id);
+ } catch (UnsupportedOperationException e) {
+ throw new UnsupportedOperationException("Decoding to Float failed when
reading file " +
Review comment:
ditto
##########
File path:
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
##########
@@ -350,7 +350,8 @@ private void checkEndOfRowGroup() throws IOException {
pages.getRowIndexes().orElse(null),
convertTz,
datetimeRebaseMode,
- int96RebaseMode);
+ int96RebaseMode,
+ file == null? "null": file.toString());
Review comment:
hm when `file` will be null?
##########
File path:
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetDictionary.java
##########
@@ -23,54 +23,85 @@
public final class ParquetDictionary implements Dictionary {
private org.apache.parquet.column.Dictionary dictionary;
+ private String file;
private boolean needTransform = false;
- public ParquetDictionary(org.apache.parquet.column.Dictionary dictionary,
boolean needTransform) {
+ public ParquetDictionary(
+ org.apache.parquet.column.Dictionary dictionary,
+ String file,
+ boolean needTransform) {
this.dictionary = dictionary;
+ this.file = file;
this.needTransform = needTransform;
}
@Override
public int decodeToInt(int id) {
- if (needTransform) {
- return (int) dictionary.decodeToLong(id);
- } else {
- return dictionary.decodeToInt(id);
+ try {
+ if (needTransform) {
+ return (int) dictionary.decodeToLong(id);
+ } else {
+ return dictionary.decodeToInt(id);
+ }
+ } catch (UnsupportedOperationException e) {
+ throw new UnsupportedOperationException("Decoding to Int failed when
reading file " +
Review comment:
nit: maybe change this to `"Decoding to Int is not supported by " +
dictionary.getClass().getSimpleName() + " while reading file " + file"`
Also maybe it's better to add this to `QueryExecutionErrors`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]