uros-db commented on code in PR #45643: URL: https://github.com/apache/spark/pull/45643#discussion_r1565484403
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala: ########## @@ -1346,20 +1350,24 @@ case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = Non case class StringInstr(str: Expression, substr: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { + final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId + override def left: Expression = str override def right: Expression = substr override def dataType: DataType = IntegerType - override def inputTypes: Seq[DataType] = Seq(StringType, StringType) + override def inputTypes: Seq[AbstractDataType] = + Seq(StringTypeAnyCollation, StringTypeAnyCollation) override def nullSafeEval(string: Any, sub: Any): Any = { - string.asInstanceOf[UTF8String].indexOf(sub.asInstanceOf[UTF8String], 0) + 1 + CollationSupport.IndexOf. + exec(string.asInstanceOf[UTF8String], sub.asInstanceOf[UTF8String], 0, collationId) + 1 } override def prettyName: String = "instr" override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - defineCodeGen(ctx, ev, (l, r) => - s"($l).indexOf($r, 0) + 1") + defineCodeGen(ctx, ev, (l, r) => + CollationSupport.IndexOf.genCode(l, r, 0, collationId) + " + 1") Review Comment: perhaps we should use "string" & "sub" (instead of "l" & "r") to preserve the original naming -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org