EnricoMi commented on code in PR #38312:
URL: https://github.com/apache/spark/pull/38312#discussion_r1095704158
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala:
##########
@@ -49,24 +49,28 @@ import org.apache.spark.sql.types._
* @param caseSensitive Whether use case sensitive analysis when comparing
Spark catalyst read
* schema with Parquet schema.
* @param inferTimestampNTZ Whether TimestampNTZType type is enabled.
+ * @param nanosAsLong Whether timestamps with nanos are converted to long.
*/
class ParquetToSparkSchemaConverter(
assumeBinaryIsString: Boolean =
SQLConf.PARQUET_BINARY_AS_STRING.defaultValue.get,
assumeInt96IsTimestamp: Boolean =
SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get,
caseSensitive: Boolean = SQLConf.CASE_SENSITIVE.defaultValue.get,
inferTimestampNTZ: Boolean =
SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.defaultValue.get) {
+ nanosAsLong: Boolean =
SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.defaultValue.get) {
Review Comment:
```suggestion
inferTimestampNTZ: Boolean =
SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.defaultValue.get,
nanosAsLong: Boolean =
SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.defaultValue.get) {
```
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala:
##########
@@ -65,12 +68,14 @@ abstract class ParquetSchemaTest extends ParquetTest with
SharedSparkSession {
caseSensitive: Boolean = false,
inferTimestampNTZ: Boolean = true,
sparkReadSchema: Option[StructType] = None,
- expectedParquetColumn: Option[ParquetColumn] = None): Unit = {
+ expectedParquetColumn: Option[ParquetColumn] = None,
+ nanosAsLong: Boolean = false): Unit = {
val converter = new ParquetToSparkSchemaConverter(
assumeBinaryIsString = binaryAsString,
assumeInt96IsTimestamp = int96AsTimestamp,
caseSensitive = caseSensitive,
inferTimestampNTZ = inferTimestampNTZ)
+ nanosAsLong = nanosAsLong)
Review Comment:
```suggestion
inferTimestampNTZ = inferTimestampNTZ,
nanosAsLong = nanosAsLong)
```
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala:
##########
@@ -454,13 +459,15 @@ object ParquetFileFormat extends Logging {
val assumeBinaryIsString =
sparkSession.sessionState.conf.isParquetBinaryAsString
val assumeInt96IsTimestamp =
sparkSession.sessionState.conf.isParquetINT96AsTimestamp
val inferTimestampNTZ =
sparkSession.sessionState.conf.parquetInferTimestampNTZEnabled
+ val nanosAsLong = sparkSession.sessionState.conf.legacyParquetNanosAsLong
val reader = (files: Seq[FileStatus], conf: Configuration,
ignoreCorruptFiles: Boolean) => {
// Converter used to convert Parquet `MessageType` to Spark SQL
`StructType`
val converter = new ParquetToSparkSchemaConverter(
assumeBinaryIsString = assumeBinaryIsString,
assumeInt96IsTimestamp = assumeInt96IsTimestamp,
inferTimestampNTZ = inferTimestampNTZ)
+ nanosAsLong = nanosAsLong)
Review Comment:
```suggestion
inferTimestampNTZ = inferTimestampNTZ,
nanosAsLong = nanosAsLong)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]