yaooqinn commented on a change in pull request #26491: [SPARK-29870][SQL] Unify
the logic of multi-units interval string to CalendarInterval
URL: https://github.com/apache/spark/pull/26491#discussion_r345701486
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
##########
@@ -2075,22 +2071,21 @@ class AstBuilder(conf: SQLConf) extends
SqlBaseBaseVisitor[AnyRef] with Logging
*/
override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext):
CalendarInterval = {
withOrigin(ctx) {
- val units = ctx.intervalUnit().asScala.map { unit =>
- val u = unit.getText.toLowerCase(Locale.ROOT)
- // Handle plural forms, e.g:
yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
- if (u.endsWith("s")) u.substring(0, u.length - 1) else u
- }.map(IntervalUtils.IntervalUnit.withName).toArray
-
- val values = ctx.intervalValue().asScala.map { value =>
- if (value.STRING() != null) {
- string(value.STRING())
- } else {
- value.getText
- }
- }.toArray
-
+ val units = ctx.intervalUnit().asScala
+ val values = ctx.intervalValue().asScala
try {
- IntervalUtils.fromUnitStrings(units, values)
+ assert(units.length == values.length)
+ val kvs = units.indices.map { i =>
+ val u = units(i).getText
+ val v = if (values(i).STRING() != null) {
+ string(values(i).STRING())
+ } else {
+ values(i).getText
+ }
+ v + " " + u
+ }
+ val str = kvs.mkString(" ")
+ IntervalUtils.fromString(str)
Review comment:
With a particular modified `IntervalBenchmark` test, which mocks the type
constructor logic, which is directly different with an
`IntervalUtils.fromString` call only.
```scala
private def addCase(benchmark: Benchmark, cardinality: Long, units:
Seq[String]): Unit = {
Seq(true, false).foreach { withPrefix =>
val expr = buildString(withPrefix, units).cast("interval")
val note = if (withPrefix) "w/ interval" else "w/o interval"
benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) {
_ =>
// doBenchmark(cardinality, expr)
(0L until cardinality).foreach(_ =>
IntervalUtils.fromString(units.mkString(" ")))
}
}
}
```
we can see huge perfomance improment here. Any way, this is just used to
parse typed literals, not a big deal acturally.
```scala
info] Running case: 1 units w/ interval
[info] Stopped after 3 iterations, 98544 ms
[info] Running case: 1 units w/o interval
[info] Stopped after 3 iterations, 78871 ms
[info] Running case: 2 units w/ interval
[info] Stopped after 3 iterations, 72469 ms
[info] Running case: 2 units w/o interval
[info] Stopped after 3 iterations, 78753 ms
```
```scala
[info] Running case: 1 units w/ interval
[info] Stopped after 3 iterations, 8926 ms
[info] Running case: 1 units w/o interval
[info] Stopped after 3 iterations, 8881 ms
[info] Running case: 2 units w/ interval
[info] Stopped after 3 iterations, 8773 ms
[info] Running case: 2 units w/o interval
[info] Stopped after 3 iterations, 8815 ms
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]