Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19769#discussion_r155703855
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
 ---
    @@ -363,9 +370,25 @@ class ParquetFileFormat
               fileSplit.getLocations,
               null)
     
    +      val sharedConf = broadcastedHadoopConf.value.value
    +      // PARQUET_INT96_TIMESTAMP_CONVERSION says to apply timezone 
conversions to int96 timestamps'
    +      // *only* if the file was created by something other than 
"parquet-mr", so check the actual
    +      // writer here for this file.  We have to do this per-file, as each 
file in the table may
    +      // have different writers.
    +      def isCreatedByParquetMr(): Boolean = {
    +        val footer = ParquetFileReader.readFooter(sharedConf, 
fileSplit.getPath, SKIP_ROW_GROUPS)
    +        footer.getFileMetaData().getCreatedBy().startsWith("parquet-mr")
    +      }
    +      val convertTz =
    +        if (timestampConversion && !isCreatedByParquetMr()) {
    +          
Some(DateTimeUtils.getTimeZone(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
    +        } else {
    +          None
    +        }
    +
           val attemptId = new TaskAttemptID(new TaskID(new JobID(), 
TaskType.MAP, 0), 0)
           val hadoopAttemptContext =
    -        new TaskAttemptContextImpl(broadcastedHadoopConf.value.value, 
attemptId)
    +        new TaskAttemptContextImpl(broadcastedHadoopConf.value.value, 
attemptId);
    --- End diff --
    
    nit: we don't need `;` at the end of this line.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to