uros-b commented on code in PR #56498:
URL: https://github.com/apache/spark/pull/56498#discussion_r3420874219
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -1701,6 +1730,91 @@ case class StringInstr(str: Expression, substr:
Expression)
newLeft: Expression, newRight: Expression): StringInstr = copy(str =
newLeft, substr = newRight)
}
+/**
+ * A function that returns the position of the specified occurrence of
`substr` in the given
+ * string, starting the search from position `start`. If `start` is positive,
the search proceeds
+ * forward; if `start` is negative, the search proceeds backward. `start` = 0
returns 0. If
+ * `start` is not specified, it defaults to 1. If `occurrence` is specified,
it determines which
+ * occurrence of `substr` to return; `occurrence` must be a positive integer
and defaults to 1.
+ *
+ * Returns null if either of the arguments are null and
+ * returns 0 if substr could not be found in str.
+ *
+ * NOTE: that this is not zero based, but 1-based index. The first character
in str has index 1.
+ */
+case class StringInstr4(
+ str: Expression,
+ sub: Expression,
+ start: Expression,
+ occurrence: Expression,
+ failOnError: Boolean = SQLConf.get.ansiEnabled)
+ extends QuaternaryExpression with ImplicitCastInputTypes {
+
+ def this(str: Expression, sub: Expression) =
+ this(str, sub, Literal(1), Literal(1), SQLConf.get.ansiEnabled)
+
+ def this(str: Expression, sub: Expression, start: Expression) =
+ this(str, sub, start, Literal(1), SQLConf.get.ansiEnabled)
+
+ def this(str: Expression, sub: Expression, start: Expression, occurrence:
Expression) =
+ this(str, sub, start, occurrence, SQLConf.get.ansiEnabled)
+
+ override def nullable: Boolean = true
+ override def nullIntolerant: Boolean = true
+ final lazy val collationId: Int =
first.dataType.asInstanceOf[StringType].collationId
+
+ override def first: Expression = str
+ override def second: Expression = sub
+ override def third: Expression = start
+ override def fourth: Expression = occurrence
+ override def dataType: DataType = IntegerType
+ override def inputTypes: Seq[AbstractDataType] =
+ Seq(
+ StringTypeNonCSAICollation(supportsTrimCollation = true),
+ StringTypeNonCSAICollation(supportsTrimCollation = true),
+ IntegerType,
+ IntegerType
+ )
+
+ override def contextIndependentFoldable: Boolean =
super.contextIndependentFoldable
+
+ override def nullSafeEval(string: Any, sub: Any, start: Any, occurrence:
Any): Any = {
+ val occ = occurrence.asInstanceOf[Int]
+ if (occ <= 0) {
Review Comment:
ANSI-gated error for occurrence <= 0 is inconsistent with convention.
INVALID_PARAMETER_VALUE.* errors are usage errors and are conventionally thrown
unconditionally, not gated on `spark.sql.ansi.enabled` (see e.g.
BIT_POSITION_RANGE, PATTERN, etc.). Returning null in non-ANSI mode is
surprising and differs from how a clearly-invalid argument is normally treated.
I'd recommend either always throwing. cc @cloud-fan @MaxGekk @srielau for
this design decision
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]