Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/18416#discussion_r123920728 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala --- @@ -992,6 +1123,128 @@ case class ExternalMapToCatalyst private( } } +object ExternalSetToCatalystArray { + private val curId = new java.util.concurrent.atomic.AtomicInteger() + + def apply( + inputSet: Expression, + elementType: DataType, + elementConverter: Expression => Expression, + elementNullable: Boolean): ExternalSetToCatalystArray = { + val id = curId.getAndIncrement() + val elementName = "ExternalSetToCatalystArray_element" + id + val elementIsNull = "ExternalSetToCatalystArray_element_isNull" + id + + ExternalSetToCatalystArray( + elementName, + elementIsNull, + elementType, + elementConverter(LambdaVariable(elementName, elementIsNull, elementType, elementNullable)), + inputSet + ) + } +} + +/** + * Converts a Scala/Java set object into catalyst array format, by applying the converter when + * iterate the set. + * + * @param element the name of the set element variable that used when iterate the set, and used as + * input for the `elementConverter` + * @param elementIsNull the nullability of the element variable that used when iterate the set, and + * used as input for the `elementConverter` + * @param elementType the data type of the element variable that used when iterate the set, and + * used as input for the `elementConverter` + * @param elementConverter A function that take the `element` as input, and converts it to catalyst + * array format. + * @param child An expression that when evaluated returns the input set object. + */ +case class ExternalSetToCatalystArray private( + element: String, + elementIsNull: String, + elementType: DataType, + elementConverter: Expression, + child: Expression) + extends UnaryExpression with NonSQLExpression { + + override def foldable: Boolean = false + + override def dataType: ArrayType = ArrayType( + elementType = elementConverter.dataType, containsNull = elementConverter.nullable) + + override def eval(input: InternalRow): Any = + throw new UnsupportedOperationException("Only code-generated evaluation is supported") + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val inputSet = child.genCode(ctx) + val genElementConverter = elementConverter.genCode(ctx) + val length = ctx.freshName("length") + val index = ctx.freshName("index") + + val iter = ctx.freshName("iter") + val (defineIterator, defineElement) = child.dataType match { + case ObjectType(cls) if classOf[java.util.Set[_]].isAssignableFrom(cls) => + val javaIteratorCls = classOf[java.util.Iterator[_]].getName --- End diff -- I'd prefer to leave java set support to other PR.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org