Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/19811#discussion_r153976761 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala --- @@ -177,11 +190,67 @@ class CodegenContext { * the list of default imports available. * Also, generic type arguments are accepted but ignored. * @param variableName Name of the field. - * @param initCode The statement(s) to put into the init() method to initialize this field. + * @param codeFunctions Function includes statement(s) to put into the init() method to + * initialize this field. An argument is the name of the mutable state variable * If left blank, the field will be default-initialized. + * @param inline whether the declaration and initialization code may be inlined rather than + * compacted. If true, the name is not changed + * @return the name of the mutable state variable, which is either the original name if the + * variable is inlined to the outer class, or an array access if the variable is to be + * stored in an array of variables of the same type and initialization. + * primitive type variables will be inlined into outer class when the total number of + * mutable variables is less than `CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD` + * the max size of an array for compaction is given by + * `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`. */ - def addMutableState(javaType: String, variableName: String, initCode: String = ""): Unit = { - mutableStates += ((javaType, variableName, initCode)) + def addMutableState( + javaType: String, + variableName: String, + codeFunctions: String => String = _ => "", + inline: Boolean = false): String = { + val varName = if (!inline) freshName(variableName) else variableName + val initCode = codeFunctions(varName) + + if (inline || + // want to put a primitive type variable at outerClass for performance + isPrimitiveType(javaType) && + (mutableStates.length < CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD) || + // identify multi-dimensional array or no simply-assigned object + !isPrimitiveType(javaType) && + (javaType.contains("[][]") || + !initCode.matches("(^[\\w_]+\\d+\\s*=\\s*null;|" + + "^[\\w_]+\\d+\\s*=\\s*new\\s*[\\w\\.]+\\(\\);$|" + + "^$)"))) { + // primitive type or non-simply-assigned state is declared inline to the outer class + mutableStates += ((javaType, varName, initCode)) + varName + } else { + // Create an initialization code agnostic to the actual variable name which we can key by + val initCodeKey = initCode.replaceAll(varName, "*VALUE*") + + val arrayName = mutableStateArrayCurrentNames.getOrElse((javaType, initCodeKey), "") + val prevIdx = mutableStateArrayIdx.getOrElse((javaType, arrayName), -1) + if (0 <= prevIdx && prevIdx < CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT - 1) { + // a mutableStateArray for the given type and initialization has already been declared, + // update the max index of the array and return an array element + val idx = prevIdx + 1 + mutableStateArrayIdx.update((javaType, arrayName), idx) + s"$arrayName[$idx]" + } else { + // mutableStateArray has not been declared yet for the given type and initialization code. + // Create a new name for the array, and add an entry to keep track of current array name + // for type and initialized code. In addition, type, array name, and qualified initialized + // code is stored for code generation + val arrayName = freshName("mutableStateArray") + val qualifiedInitCode = initCode.replaceAll( --- End diff -- ditto
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org