Github user JoshRosen commented on a diff in the pull request:
https://github.com/apache/spark/pull/6660#discussion_r31858853
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -33,21 +32,167 @@ class IntegerHashSet extends
org.apache.spark.util.collection.OpenHashSet[Int]
class LongHashSet extends
org.apache.spark.util.collection.OpenHashSet[Long]
/**
+ * Java source for evaluating an [[Expression]] given a [[Row]] of input.
+ *
+ * @param code The sequence of statements required to evaluate the
expression.
+ * @param nullTerm A term that holds a boolean value representing whether
the expression evaluated
+ * to null.
+ * @param primitiveTerm A term for a possible primitive value of the
result of the evaluation. Not
+ * valid if `nullTerm` is set to `true`.
+ */
+case class GeneratedExpressionCode(var code: Code, var nullTerm: Term, var
primitiveTerm: Term)
+
+/**
+ * A context for codegen, which is used to bookkeeping the expressions
those are not supported
+ * by codegen, then they are evaluated directly. The unsupported
expression is appended at the
+ * end of `references`, the position of it is kept in the code, used to
access and evaluate it.
+ */
+class CodeGenContext {
+
+ /**
+ * Holding all the expressions those do not support codegen, will be
evaluated directly.
+ */
+ val references: mutable.ArrayBuffer[Expression] = new
mutable.ArrayBuffer[Expression]()
+
+ val stringType = classOf[UTF8String].getName
+ val decimalType = classOf[Decimal].getName
+
+ private val curId = new java.util.concurrent.atomic.AtomicInteger()
+
+ /**
+ * Returns a term name that is unique within this instance of a
`CodeGenerator`.
+ *
+ * (Since we aren't in a macro context we do not seem to have access to
the built in `freshName`
+ * function.)
+ */
+ def freshName(prefix: String): Term = {
+ s"$prefix${curId.getAndIncrement}"
+ }
+
+ /**
+ * Return the code to access a column for given DataType
+ */
+ def getColumn(dataType: DataType, ordinal: Int): Code = {
+ if (isNativeType(dataType)) {
+ s"i.${accessorForType(dataType)}($ordinal)"
+ } else {
+ s"(${boxedType(dataType)})i.apply($ordinal)"
+ }
+ }
+
+ /**
+ * Return the code to update a column in Row for given DataType
+ */
+ def setColumn(dataType: DataType, ordinal: Int, value: Term): Code = {
+ if (isNativeType(dataType)) {
+ s"${mutatorForType(dataType)}($ordinal, $value)"
+ } else {
+ s"update($ordinal, $value)"
+ }
+ }
+
+ /**
+ * Return the name of accessor in Row for a DataType
+ */
+ def accessorForType(dt: DataType): Term = dt match {
+ case IntegerType => "getInt"
+ case other => s"get${boxedType(dt)}"
+ }
+
+ /**
+ * Return the name of mutator in Row for a DataType
+ */
+ def mutatorForType(dt: DataType): Term = dt match {
+ case IntegerType => "setInt"
+ case other => s"set${boxedType(dt)}"
+ }
+
+ /**
+ * Return the primitive type for a DataType
--- End diff --
Maybe say "Java primitive type"?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]