ryan-johnson-databricks commented on code in PR #43403:
URL: https://github.com/apache/spark/pull/43403#discussion_r1362362661
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala:
##########
@@ -65,4 +66,36 @@ object TimeTravelSpec {
None
}
}
+
+ def fromOptions(
+ options: CaseInsensitiveStringMap,
+ timestampKey: String,
+ versionKey: String,
+ sessionLocalTimeZone: String): Option[TimeTravelSpec] = {
+ val timestampStr = options.get(timestampKey)
+ val versionStr = options.get(versionKey)
+ if (timestampStr != null && versionStr != null) {
Review Comment:
Seems like `null` is a scala antipattern... would it be cleaner to do this
instead?
```scala
(Option(options.get(timestampKey)), Option(options.get(versionKey))) match {
case (Some(_), Some(_)) =>
throw QueryCompilationErrors.invalidTimeTravelSpecError()
case (Some(timestampString), None) =>
...
Some(AsOfTimestamp(...))
case (None, Some(versionStr)) =>
Some(AsOfVersion(versionStr))
case (None, None) =>
None
}
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala:
##########
@@ -1122,7 +1122,10 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
case r @ RelationTimeTravel(u: UnresolvedRelation, timestamp, version)
if timestamp.forall(ts => ts.resolved &&
!SubqueryExpression.hasSubquery(ts)) =>
- resolveRelation(u, TimeTravelSpec.create(timestamp, version,
conf)).getOrElse(r)
+ resolveRelation(
+ u,
+ TimeTravelSpec.create(timestamp, version, conf.sessionLocalTimeZone)
+ ).getOrElse(r)
Review Comment:
```suggestion
val timeTravelSpec = TimeTravelSpec.create(timestamp, version,
conf.sessionLocalTimeZone)
resolveRelation(u, timeTravelSpec).getOrElse(r)
```
##########
common/utils/src/main/resources/error/error-classes.json:
##########
@@ -2196,6 +2208,13 @@
},
"sqlState" : "42K0E"
},
+ "INVALID_TIME_TRAVEL_TIMESTAMP_OPTION" : {
Review Comment:
Should this be a subclass of the existing
`INVALID_TIME_TRAVEL_TIMESTAMP_EXPR`?
(unless `TIMESTAMP AS OF 'yyyy-MM-dd HH:mm:ss[.us][zone_id]'` is invalid and
options are special?)
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala:
##########
@@ -1255,17 +1258,27 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
private def resolveRelation(
u: UnresolvedRelation,
timeTravelSpec: Option[TimeTravelSpec] = None): Option[LogicalPlan] = {
- resolveTempView(u.multipartIdentifier, u.isStreaming,
timeTravelSpec.isDefined).orElse {
+ val timeTravelSpecFromOptions = TimeTravelSpec.fromOptions(
+ u.options,
+ conf.getConf(SQLConf.TIME_TRAVEL_TIMESTAMP_KEY),
+ conf.getConf(SQLConf.TIME_TRAVEL_VERSION_KEY),
Review Comment:
If it's part of `SQLConf`, does that mean somebody might try to force time
travel for all table accesses in their session? Wouldn't that be a weird
conflict, if somebody used SQL `AS OF` or normal options?
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala:
##########
@@ -65,4 +66,36 @@ object TimeTravelSpec {
None
}
}
+
+ def fromOptions(
+ options: CaseInsensitiveStringMap,
+ timestampKey: String,
+ versionKey: String,
+ sessionLocalTimeZone: String): Option[TimeTravelSpec] = {
+ val timestampStr = options.get(timestampKey)
+ val versionStr = options.get(versionKey)
+ if (timestampStr != null && versionStr != null) {
+ throw QueryCompilationErrors.invalidTimeTravelSpecError()
+ }
+
+ if (timestampStr != null) {
+ val timestampValue = Cast(
+ Literal(timestampStr),
+ TimestampType,
+ Some(sessionLocalTimeZone),
+ ansiEnabled = false
Review Comment:
Shouldn't ansi enablement normally come from the session conf?
Or are we relying specifically on non-ansi so that invalid timestamp gives
back null instead of raising an exception?
##########
common/utils/src/main/resources/error/error-classes.json:
##########
@@ -869,6 +869,12 @@
],
"sqlState" : "42710"
},
+ "DUPLICATED_TIME_TRAVEL_SPEC" : {
Review Comment:
nit:
```suggestion
"MULTIPLE_TIME_TRAVEL_SPEC" : {
```
(merely "duplicated" could in theory be ok, in the sense that both specs are
identical and thus don't conflict -- the real problem comes if the provided
specs disagree with each other... but it doesn't seem worth the trouble to
actually compare the two in order to raise a `CONFLICTING_TIME_TRAVEL` error --
better to require a single spec and be done with it)
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala:
##########
@@ -2928,28 +2932,34 @@ class DataSourceV2SQLSuiteV1Filter
sql(s"INSERT INTO $t4 VALUES (7)")
sql(s"INSERT INTO $t4 VALUES (8)")
- assert(sql("SELECT * FROM t TIMESTAMP AS OF '2019-01-29
00:37:58'").collect
- === Array(Row(5), Row(6)))
- assert(sql("SELECT * FROM t TIMESTAMP AS OF '2021-01-29
00:00:00'").collect
- === Array(Row(7), Row(8)))
- assert(sql(s"SELECT * FROM t TIMESTAMP AS OF $ts1InSeconds").collect
- === Array(Row(5), Row(6)))
- assert(sql(s"SELECT * FROM t TIMESTAMP AS OF $ts2InSeconds").collect
- === Array(Row(7), Row(8)))
- assert(sql(s"SELECT * FROM t FOR SYSTEM_TIME AS OF
$ts1InSeconds").collect
- === Array(Row(5), Row(6)))
- assert(sql(s"SELECT * FROM t FOR SYSTEM_TIME AS OF
$ts2InSeconds").collect
- === Array(Row(7), Row(8)))
- assert(sql("SELECT * FROM t TIMESTAMP AS OF make_date(2021, 1,
29)").collect
- === Array(Row(7), Row(8)))
- assert(sql("SELECT * FROM t TIMESTAMP AS OF to_timestamp('2021-01-29
00:00:00')").collect
- === Array(Row(7), Row(8)))
Review Comment:
This seems like a spurious change/refactor?
Even if the refactor is important, it's arguably best isolated in its own PR
to not mix feature+refactor.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala:
##########
@@ -1255,17 +1258,27 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
private def resolveRelation(
u: UnresolvedRelation,
timeTravelSpec: Option[TimeTravelSpec] = None): Option[LogicalPlan] = {
- resolveTempView(u.multipartIdentifier, u.isStreaming,
timeTravelSpec.isDefined).orElse {
+ val timeTravelSpecFromOptions = TimeTravelSpec.fromOptions(
+ u.options,
+ conf.getConf(SQLConf.TIME_TRAVEL_TIMESTAMP_KEY),
+ conf.getConf(SQLConf.TIME_TRAVEL_VERSION_KEY),
Review Comment:
Update: The conf lookup is just fetching the _names_ of the time travel
keys, not their values.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]