yaooqinn commented on a change in pull request #28284:
URL: https://github.com/apache/spark/pull/28284#discussion_r412621357
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
##########
@@ -2033,108 +1991,26 @@ case class MakeTimestamp(
override def prettyName: String = "make_timestamp"
}
-case class Millennium(child: Expression) extends UnaryExpression with
ImplicitCastInputTypes {
-
- override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
-
- override def dataType: DataType = IntegerType
-
- override protected def nullSafeEval(date: Any): Any = {
- DateTimeUtils.getMillennium(date.asInstanceOf[Int])
- }
-
- override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
- val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
- defineCodeGen(ctx, ev, c => s"$dtu.getMillennium($c)")
- }
-}
-
-case class Century(child: Expression) extends UnaryExpression with
ImplicitCastInputTypes {
-
- override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
-
- override def dataType: DataType = IntegerType
-
- override protected def nullSafeEval(date: Any): Any = {
- DateTimeUtils.getCentury(date.asInstanceOf[Int])
- }
-
- override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
- val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
- defineCodeGen(ctx, ev, c => s"$dtu.getCentury($c)")
- }
-}
-
-case class Decade(child: Expression) extends UnaryExpression with
ImplicitCastInputTypes {
-
- override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
-
- override def dataType: DataType = IntegerType
-
- override protected def nullSafeEval(date: Any): Any = {
- DateTimeUtils.getDecade(date.asInstanceOf[Int])
- }
-
- override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
- val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
- defineCodeGen(ctx, ev, c => s"$dtu.getDecade($c)")
- }
-}
-
-case class Epoch(child: Expression, timeZoneId: Option[String] = None)
- extends UnaryExpression with ImplicitCastInputTypes with
TimeZoneAwareExpression {
-
- override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
- // DecimalType is used to not lose precision while converting microseconds to
- // the fractional part of seconds. Scale 6 is taken to have all microseconds
as
- // the fraction. The precision 20 should cover whole valid range of years
[1, 9999]
- // plus negative years that can be used in some cases though are not
officially supported.
- override def dataType: DataType = DecimalType(20, 6)
- override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
- copy(timeZoneId = Option(timeZoneId))
-
- override protected def nullSafeEval(timestamp: Any): Any = {
- DateTimeUtils.getEpoch(timestamp.asInstanceOf[Long], zoneId)
- }
-
- override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
- val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
- val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
- defineCodeGen(ctx, ev, c => s"$dtu.getEpoch($c, $zid)")
- }
-}
-
object DatePart {
def parseExtractField(
extractField: String,
source: Expression,
errorHandleFunc: => Nothing): Expression =
extractField.toUpperCase(Locale.ROOT) match {
- case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => Millennium(source)
- case "CENTURY" | "CENTURIES" | "C" | "CENT" => Century(source)
- case "DECADE" | "DECADES" | "DEC" | "DECS" => Decade(source)
case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source)
- case "ISOYEAR" => IsoYear(source)
+ case "YEAROFWEEK" => YearOfWeek(source)
case "QUARTER" | "QTR" => Quarter(source)
case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source)
case "WEEK" | "W" | "WEEKS" => WeekOfYear(source)
case "DAY" | "D" | "DAYS" => DayOfMonth(source)
case "DAYOFWEEK" | "DOW" => DayOfWeek(source)
- case "ISODOW" => Add(WeekDay(source), Literal(1))
+ case "DAYOFWEEK_ISO" | "DOW_ISO" => Add(WeekDay(source), Literal(1))
Review comment:
Hi @dongjoon-hyun, for historical reasons, we have [`dayofweek`, `dow`]
implemented for representing a non-ISO week of week-based-year and a newly
added `isodow` from PostgreSQL for ISO week of week-based-year. Many other
systems only have one week-numbering system support. Things in spark become a
little bit complicated.
1. because of the existence of `isodow`, so we need to add iso-prefix to
`dayofweek` to make a pair for it too. [`dayofweek`, `isodayofweek`, `dow` and
`isodow`]
2. because there are rare `iso`-prefixed systems and more systems choose
`iso`-suffixed way, so we may result in [`dayofweek`, `dayofweekiso`, `dow`,
`dowiso`]
3. `dayofweekiso` looks nice and has use cases in the platforms listed
above, e.g. snowflake, but `dowiso` looks weird and no use cases found.
4. after a discussion with @cloud-fan, we have both agreed with an
underscore before `iso` may look much better because `isodow` is new and there
is no standard for `iso` kind of things, so this may be good for us to make it
simple and clear for end-users
Thus, we finally result in [`dayofweek`, `dow`] for Non-ISO week-numbering
system and [`dayofweek_iso`, `dow_iso`] for ISO system
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]