Bruce Robbins created SPARK-31557:
-------------------------------------

             Summary: Legacy parser incorrectly interprets pre-Gregorian dates
                 Key: SPARK-31557
                 URL: https://issues.apache.org/jira/browse/SPARK-31557
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 3.0.0, 3.1.0
            Reporter: Bruce Robbins


With CSV:
{noformat}
scala> sql("set spark.sql.legacy.timeParserPolicy=LEGACY")
res0: org.apache.spark.sql.DataFrame = [key: string, value: string]

scala> val seq = Seq("0002-01-01", "1000-01-01", "1500-01-01", 
"1800-01-01").map(x => s"$x,$x")
seq: Seq[String] = List(0002-01-01,0002-01-01, 1000-01-01,1000-01-01, 
1500-01-01,1500-01-01, 1800-01-01,1800-01-01)

scala> val ds = seq.toDF("value").as[String]
ds: org.apache.spark.sql.Dataset[String] = [value: string]

scala> spark.read.schema("expected STRING, actual DATE").csv(ds).show
+----------+----------+
|  expected|    actual|
+----------+----------+
|0002-01-01|0001-12-30|
|1000-01-01|1000-01-06|
|1500-01-01|1500-01-10|
|1800-01-01|1800-01-01|
+----------+----------+

scala> 
{noformat}
Similarly, with JSON:
{noformat}
scala> sql("set spark.sql.legacy.timeParserPolicy=LEGACY")
res0: org.apache.spark.sql.DataFrame = [key: string, value: string]

scala> val seq = Seq("0002-01-01", "1000-01-01", "1500-01-01", 
"1800-01-01").map { x =>
  s"""{"expected": "$x", "actual": "$x"}"""
}

     |      | seq: Seq[String] = List({"expected": "0002-01-01", "actual": 
"0002-01-01"}, {"expected": "1000-01-01", "actual": "1000-01-01"}, {"expected": 
"1500-01-01", "actual": "1500-01-01"}, {"expected": "1800-01-01", "actual": 
"1800-01-01"})

scala> 
scala> val ds = seq.toDF("value").as[String]
ds: org.apache.spark.sql.Dataset[String] = [value: string]

scala> spark.read.schema("expected STRING, actual DATE").json(ds).show
+----------+----------+
|  expected|    actual|
+----------+----------+
|0002-01-01|0001-12-30|
|1000-01-01|1000-01-06|
|1500-01-01|1500-01-10|
|1800-01-01|1800-01-01|
+----------+----------+

scala> 
{noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to