MaxGekk commented on a change in pull request #34471:
URL: https://github.com/apache/spark/pull/34471#discussion_r779825155
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
##########
@@ -134,33 +134,35 @@ abstract class ParquetRebaseDatetimeSuite
tsOutputType: String = "TIMESTAMP_MICROS",
inWriteConf: String = SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key,
inReadConf: String = SQLConf.PARQUET_REBASE_MODE_IN_READ.key): Unit = {
- withTempPaths(2) { paths =>
- paths.foreach(_.delete())
+ withAllParquetWriters {
+ withTempPaths(2) { paths =>
+ paths.foreach(_.delete())
val oldPath = getResourceParquetFilePath("test-data/" + fileName)
val path3_x = paths(0).getCanonicalPath
val path3_x_rebase = paths(1).getCanonicalPath
- val df = Seq.tabulate(N)(rowFunc).toDF("dict", "plain")
- .select($"dict".cast(catalystType), $"plain".cast(catalystType))
- withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> tsOutputType)
{
+ val df = Seq.tabulate(N)(rowFunc).toDF("dict", "plain")
+ .select($"dict".cast(catalystType), $"plain".cast(catalystType))
+ withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
tsOutputType) {
Review comment:
Something wrong with indentation here and below:
<img width="642" alt="Screenshot 2022-01-06 at 22 56 02"
src="https://user-images.githubusercontent.com/1580697/148443305-f6890111-26c2-4538-9abe-53a28144c46b.png">
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
##########
@@ -219,60 +221,62 @@ abstract class ParquetRebaseDatetimeSuite
test("SPARK-31159, SPARK-37705: rebasing timestamps in write") {
val N = 8
Seq(false, true).foreach { dictionaryEncoding =>
- Seq(
- (
- "TIMESTAMP_MILLIS",
- "1001-01-01 01:02:03.123",
- "1001-01-07 01:09:05.123",
- SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key,
- SQLConf.PARQUET_REBASE_MODE_IN_READ.key),
- (
- "TIMESTAMP_MICROS",
- "1001-01-01 01:02:03.123456",
- "1001-01-07 01:09:05.123456",
- SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key,
- SQLConf.PARQUET_REBASE_MODE_IN_READ.key),
- (
- "INT96",
- "1001-01-01 01:02:03.123456",
- "1001-01-07 01:09:05.123456",
- SQLConf.PARQUET_INT96_REBASE_MODE_IN_WRITE.key,
- SQLConf.PARQUET_INT96_REBASE_MODE_IN_READ.key
- )
- ).foreach { case (outType, tsStr, nonRebased, inWriteConf, inReadConf) =>
+ withAllParquetWriters {
+ Seq(
+ (
+ "TIMESTAMP_MILLIS",
+ "1001-01-01 01:02:03.123",
+ "1001-01-07 01:09:05.123",
+ SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key,
+ SQLConf.PARQUET_REBASE_MODE_IN_READ.key),
+ (
+ "TIMESTAMP_MICROS",
+ "1001-01-01 01:02:03.123456",
+ "1001-01-07 01:09:05.123456",
+ SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key,
+ SQLConf.PARQUET_REBASE_MODE_IN_READ.key),
+ (
+ "INT96",
+ "1001-01-01 01:02:03.123456",
+ "1001-01-07 01:09:05.123456",
+ SQLConf.PARQUET_INT96_REBASE_MODE_IN_WRITE.key,
+ SQLConf.PARQUET_INT96_REBASE_MODE_IN_READ.key
+ )
+ ).foreach { case (outType, tsStr, nonRebased, inWriteConf, inReadConf)
=>
// Ignore the default JVM time zone and use the session time zone
instead of it in rebasing.
DateTimeTestUtils.withDefaultTimeZone(DateTimeTestUtils.JST) {
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key ->
DateTimeTestUtils.LA.getId) {
- withClue(s"output type $outType") {
- withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
outType) {
- withTempPath { dir =>
- val path = dir.getAbsolutePath
- withSQLConf(inWriteConf -> LEGACY.toString) {
- Seq.tabulate(N)(_ => tsStr).toDF("tsS")
- .select($"tsS".cast("timestamp").as("ts"))
- .repartition(1)
- .write
- .option("parquet.enable.dictionary", dictionaryEncoding)
- .parquet(path)
- }
+ withClue(s"output type $outType") {
+ withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) {
+ withTempPath { dir =>
+ val path = dir.getAbsolutePath
+ withSQLConf(inWriteConf -> LEGACY.toString) {
+ Seq.tabulate(N)(_ => tsStr).toDF("tsS")
+ .select($"tsS".cast("timestamp").as("ts"))
+ .repartition(1)
+ .write
+ .option("parquet.enable.dictionary", dictionaryEncoding)
+ .parquet(path)
+ }
- withAllParquetReaders {
+ withAllParquetReaders {
// The file metadata indicates if it needs rebase or not,
so we can always get
// the correct result regardless of the "rebase mode"
config.
- runInMode(inReadConf, Seq(LEGACY, CORRECTED, EXCEPTION)) {
options =>
- checkAnswer(
+ runInMode(inReadConf, Seq(LEGACY, CORRECTED, EXCEPTION)) {
options =>
+ checkAnswer(
spark.read.options(options).parquet(path).select($"ts".cast("string")),
Seq.tabulate(N)(_ => Row(tsStr)))
- }
+ }
- // Force to not rebase to prove the written datetime
values are rebased
- // and we will get wrong result if we don't rebase while
reading.
- withSQLConf("spark.test.forceNoRebase" -> "true") {
- checkAnswer(
+ // Force to not rebase to prove the written datetime values
are rebased
+ // and we will get wrong result if we don't rebase while
reading.
+ withSQLConf("spark.test.forceNoRebase" -> "true") {
+ checkAnswer(
spark.read.parquet(path).select($"ts".cast("string")),
Seq.tabulate(N)(_ => Row(nonRebased)))
}
}
+ }
Review comment:
Wrong indentation too
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]