Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/19811#discussion_r154041665
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
---
@@ -177,11 +190,67 @@ class CodegenContext {
* the list of default imports available.
* Also, generic type arguments are accepted but ignored.
* @param variableName Name of the field.
- * @param initCode The statement(s) to put into the init() method to
initialize this field.
+ * @param codeFunctions Function includes statement(s) to put into the
init() method to
+ * initialize this field. An argument is the name of the
mutable state variable
* If left blank, the field will be default-initialized.
+ * @param inline whether the declaration and initialization code may be
inlined rather than
+ * compacted. If true, the name is not changed
+ * @return the name of the mutable state variable, which is either the
original name if the
+ * variable is inlined to the outer class, or an array access if
the variable is to be
+ * stored in an array of variables of the same type and
initialization.
+ * primitive type variables will be inlined into outer class
when the total number of
+ * mutable variables is less than
`CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD`
+ * the max size of an array for compaction is given by
+ * `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`.
*/
- def addMutableState(javaType: String, variableName: String, initCode:
String = ""): Unit = {
- mutableStates += ((javaType, variableName, initCode))
+ def addMutableState(
+ javaType: String,
+ variableName: String,
+ codeFunctions: String => String = _ => "",
+ inline: Boolean = false): String = {
+ val varName = if (!inline) freshName(variableName) else variableName
+ val initCode = codeFunctions(varName)
+
+ if (inline ||
+ // want to put a primitive type variable at outerClass for
performance
+ isPrimitiveType(javaType) &&
+ (mutableStates.length <
CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD) ||
+ // identify multi-dimensional array or no simply-assigned object
+ !isPrimitiveType(javaType) &&
+ (javaType.contains("[][]") ||
+ !initCode.matches("(^[\\w_]+\\d+\\s*=\\s*null;|"
+ + "^[\\w_]+\\d+\\s*=\\s*new\\s*[\\w\\.]+\\(\\);$|"
+ + "^$)"))) {
+ // primitive type or non-simply-assigned state is declared inline to
the outer class
+ mutableStates += ((javaType, varName, initCode))
+ varName
+ } else {
+ // Create an initialization code agnostic to the actual variable
name which we can key by
+ val initCodeKey = initCode.replaceAll(varName, "*VALUE*")
+
+ val arrayName = mutableStateArrayCurrentNames.getOrElse((javaType,
initCodeKey), "")
+ val prevIdx = mutableStateArrayIdx.getOrElse((javaType, arrayName),
-1)
+ if (0 <= prevIdx && prevIdx <
CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT - 1) {
+ // a mutableStateArray for the given type and initialization has
already been declared,
+ // update the max index of the array and return an array element
+ val idx = prevIdx + 1
+ mutableStateArrayIdx.update((javaType, arrayName), idx)
+ s"$arrayName[$idx]"
+ } else {
+ // mutableStateArray has not been declared yet for the given type
and initialization code.
+ // Create a new name for the array, and add an entry to keep track
of current array name
+ // for type and initialized code. In addition, type, array name,
and qualified initialized
+ // code is stored for code generation
+ val arrayName = freshName("mutableStateArray")
+ val qualifiedInitCode = initCode.replaceAll(
--- End diff --
I am afraid about the side-effect codeFunction(""). codeFunction may
different result from the result in the first call. Thus, I want to call
codeFunction() only once.
WDYT?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]