[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/20015 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157925994 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,181 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { + val instant: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * @param input internalRow (time) + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc function: (time, level) => time + */ + protected def evalHelper(input: InternalRow, maxLevel: Int)( +truncFunc: (Any, Int) => Any): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } -if (level == -1) { - // unknown format +if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { + // unknown format or too large level null } else { - val d = date.eval(input) - if (d == null) { + val t = instant.eval(input) + if (t == null) { null } else { -DateTimeUtils.truncDate(d.asInstanceOf[Int], level) +truncFunc(t, level) } } } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + protected def codeGenHelper( + ctx: CodegenContext, + ev: ExprCode, + maxLevel: Int, + orderReversed: Boolean = false)( + truncFunc: (String, String) => String) +: ExprCode = { val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") if (format.foldable) { - if (truncLevel == -1) { + if (truncLevel == DateTimeUtils.TRUNC_INVALID || truncLevel > maxLevel) { ev.copy(code = s""" boolean ${ev.isNull} = true; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""") } else { -val d = date.genCode(ctx) +val t = instant.genCode(ctx) +val truncFuncStr = truncFunc(t.value, truncLevel.toString) ev.copy(code = s""" - ${d.code} - boolean ${ev.isNull} = ${d.isNull}; + ${t.code} + boolean ${ev.isNull} = ${t.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { -${ev.value} = $dtu.truncDate(${d.value}, $truncLevel); +${ev.value} = $dtu.$truncFuncStr; }""") } } else { - nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { + nullSafeCodeGen(ctx, ev, (left, right) => { val form = ctx.freshName("form") +val (dateVal, fmt) = if (orderReversed) { + (right, left) +} else { + (left, right) +} +val truncFuncStr = truncFunc(dateVal, form) s""" int $form = $dtu.parseTruncLevel($fmt); - if ($form == -1) { + if ($form == -1 || $form > $maxLevel) { ${ev.isNull} = true; } else { -${ev.value} = $dtu.truncDate($dateVal, $form); +${ev.value} = $dtu.$truncFuncStr } """ }) } } } +/** + * Returns date truncated to the u
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157926202 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala --- @@ -563,6 +563,76 @@ class DateTimeUtilsSuite extends SparkFunSuite { } } + test("truncTimestamp") { +def test( --- End diff -- `test` -> `testTrunc` ? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157926055 --- Diff: python/pyspark/sql/functions.py --- @@ -,6 +,24 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(2.3) +def date_trunc(format, timestamp): +""" +Returns timestamp truncated to the unit specified by the format. + +:param format: 'year', '', 'yy', 'month', 'mon', 'mm', +'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter' + +>>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['d']) --- End diff -- `d` -> `t` or `ts`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157908622 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,183 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input internalRow (time) + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc function: (time, level) => time + * @return --- End diff -- Remove `@return` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157908653 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,183 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * --- End diff -- Remove this line. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157896273 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala --- @@ -964,7 +981,62 @@ object DateTimeUtils { } /** - * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, or TRUNC_INVALID, + * Returns the trunc date time from original date time and trunc level. + * Trunc level should be generated using `parseTruncLevel()`, should be between 1 and 8 + */ + def truncTimestamp(d: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = { --- End diff -- nit: d -> ts or t --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157905770 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,183 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression --- End diff -- Maybe, `time` -> `instant`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157902164 --- Diff: python/pyspark/sql/functions.py --- @@ -,6 +,24 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(2.3) +def date_trunc(format, timestamp): +""" +Returns timestamp truncated to the unit specified by the format. + +:param format: 'year', '', 'yy', 'month', 'mon', 'mm', --- End diff -- Also update the original `trunc` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157902098 --- Diff: python/pyspark/sql/functions.py --- @@ -,6 +,24 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(2.3) +def date_trunc(format, timestamp): +""" +Returns timestamp truncated to the unit specified by the format. + +:param format: 'year', '', 'yy', 'month', 'mon', 'mm', --- End diff -- Nit: `` -> `` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157821540 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc + * @tparam T + * @return + */ + protected def evalHelper[T](input: InternalRow, maxLevel: Int)( +truncFunc: (Any, Int) => T): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } -if (level == -1) { +if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { // unknown format null } else { - val d = date.eval(input) + val d = time.eval(input) if (d == null) { null } else { -DateTimeUtils.truncDate(d.asInstanceOf[Int], level) +truncFunc(d, level) } } } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + protected def codeGenHelper[T]( + ctx: CodegenContext, + ev: ExprCode, + maxLevel: Int, + orderReversed: Boolean = false)( + truncFunc: (String, String) => String) +: ExprCode = { val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") if (format.foldable) { - if (truncLevel == -1) { + if (truncLevel == DateTimeUtils.TRUNC_INVALID || truncLevel > maxLevel) { ev.copy(code = s""" boolean ${ev.isNull} = true; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""") } else { -val d = date.genCode(ctx) +val d = time.genCode(ctx) +val truncFuncStr = truncFunc(d.value, truncLevel.toString) ev.copy(code = s""" ${d.code} boolean ${ev.isNull} = ${d.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { -${ev.value} = $dtu.truncDate(${d.value}, $truncLevel); +${ev.value} = $dtu.$truncFuncStr; }""") } } else { - nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { + nullSafeCodeGen(ctx, ev, (left, right) => { val form = ctx.freshName("form") +val (dateVal, fmt) = if (orderReversed) { + (right, left) +} else { + (left, right) +} +val truncFuncStr = truncFunc(dateVal, form) s""" int $form = $dtu.parseTruncLevel($fmt); - if ($form == -1) { + if ($form == -1 || $form > $maxLevel) { ${ev.isNull} = true; } else { -${ev.value} = $dtu.truncDate($dateVal, $form); +${ev.value} = $dtu.$truncFuncStr } """ }) } } } +/** + * Returns date truncated to the unit specified by the format. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(date, fmt) - Returns `d
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gczsjdy commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157686437 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc + * @tparam T + * @return + */ + protected def evalHelper[T](input: InternalRow, maxLevel: Int)( +truncFunc: (Any, Int) => T): Any = { --- End diff -- Maybe `truncFunc: (Any, Int) => Any` is enough? So we don't need to use the `T`, but I'm not sure if this is better... --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gczsjdy commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157676669 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc + * @tparam T + * @return + */ + protected def evalHelper[T](input: InternalRow, maxLevel: Int)( +truncFunc: (Any, Int) => T): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } -if (level == -1) { +if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { --- End diff -- `// unknown format or too small level`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gczsjdy commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157678588 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc + * @tparam T + * @return + */ + protected def evalHelper[T](input: InternalRow, maxLevel: Int)( +truncFunc: (Any, Int) => T): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } -if (level == -1) { +if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { // unknown format null } else { - val d = date.eval(input) + val d = time.eval(input) if (d == null) { null } else { -DateTimeUtils.truncDate(d.asInstanceOf[Int], level) +truncFunc(d, level) } } } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + protected def codeGenHelper[T]( --- End diff -- Why do we need a type parameter `T`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gczsjdy commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157674840 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc + * @tparam T + * @return + */ + protected def evalHelper[T](input: InternalRow, maxLevel: Int)( +truncFunc: (Any, Int) => T): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } -if (level == -1) { +if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { // unknown format null } else { - val d = date.eval(input) + val d = time.eval(input) --- End diff -- nit: Since this is a time, it can be `val t = ...` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user gczsjdy commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157680290 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala --- @@ -944,9 +954,16 @@ object DateTimeUtils { date + daysToMonthEnd } - private val TRUNC_TO_YEAR = 1 - private val TRUNC_TO_MONTH = 2 - private val TRUNC_INVALID = -1 + // Visible for testing. + val TRUNC_TO_YEAR = 1 + val TRUNC_TO_MONTH = 2 + val TRUNC_TO_DAY = 3 + val TRUNC_TO_HOUR = 4 + val TRUNC_TO_MINUTE = 5 + val TRUNC_TO_SECOND = 6 + val TRUNC_TO_WEEK = 7 + val TRUNC_TO_QUARTER = 8 + val TRUNC_INVALID = -1 --- End diff -- Can we bring quarter and week forward, maybe to 3 and 4? Then it's more conform to the order of time granularity and max-level design is not influenced. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157673626 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input --- End diff -- Seems `input` and `truncFunc` descriptions missing. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157677136 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/functions.scala --- @@ -2797,6 +2797,21 @@ object functions { TruncDate(date.expr, Literal(format)) } + /** + * Returns timestamp truncated to the unit specified by the format. + * + * @param format: 'year', '', 'yy' for truncate by year, + * 'month', 'mon', 'mm' for truncate by month, --- End diff -- nit: one space each more. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157677311 --- Diff: python/pyspark/sql/functions.py --- @@ -,6 +,24 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(2.3) +def date_trunc(format, timestamp): +""" +Returns timestamp truncated to the unit specified by the format. + +:param format: 'year', '', 'yy', 'month', 'mon', 'mm', +'DAY', 'DD', 'HOUR', 'MINUTE', 'SECOND', 'WEEK', 'QUARTER' --- End diff -- Could we make those lowercased too? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157677400 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/functions.scala --- @@ -2797,6 +2797,21 @@ object functions { TruncDate(date.expr, Literal(format)) } + /** + * Returns timestamp truncated to the unit specified by the format. + * + * @param format: 'year', '', 'yy' for truncate by year, + * 'month', 'mon', 'mm' for truncate by month, + * 'day', 'dd' for truncate by day, + * Other options are: second, minute, hour, week, month, quarter --- End diff -- Maybe, `'second', 'minute', 'hour', 'week', 'month' and 'quarter'` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157673559 --- Diff: python/pyspark/sql/functions.py --- @@ -,6 +,24 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(2.3) +def date_trunc(format, timestamp): +""" +Returns timestamp truncated to the unit specified by the format. + +:param format: 'year', '', 'yy', 'month', 'mon', 'mm', +'DAY', 'DD', 'HOUR', 'MINUTE', 'SECOND', 'WEEK', 'QUARTER' + +>>> df = spark.createDataFrame([('1997-02-28',)], ['d']) --- End diff -- Can we use a timestamp string like `1997-02-28 05:02:11` to show the difference from `trunc` a bit more clearly? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20015: [SPARK-22829] Add new built-in function date_trun...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20015#discussion_r157675835 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala --- @@ -1295,87 +1295,184 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ -Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { --- End diff -- Maybe `TruncInstant`? I received this advice before and I liked it too. Not a big deal tho. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org