GitHub user kiszk opened a pull request:
https://github.com/apache/spark/pull/18641
[SPARK-21413][SQL] Fix 64KB JVM bytecode limit problem in multiple
projections with CASE WHEN
## What changes were proposed in this pull request?
This PR changes casewhen's code generation to place condition and then
expressions' generated code into separated methods if these size could be large.
When the method is newly generated, variables for `isNull` and `value` are
declared as an instance variable.
Before this PR
```java
/* 005 */ class SpecificMutableProjection extends
org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection {
...
/* 034 */ public java.lang.Object apply(java.lang.Object _i) {
/* 035 */ InternalRow i = (InternalRow) _i;
/* 036 */
/* 037 */
/* 038 */
/* 039 */ boolean isNull = true;
/* 040 */ int value = -1;
/* 041 */
/* 042 */
/* 043 */ boolean isNull1 = true;
/* 044 */ boolean value1 = false;
/* 045 */
/* 046 */ boolean isNull2 = true;
/* 047 */ int value2 = -1;
/* 048 */
/* 049 */
/* 050 */ boolean isNull3 = true;
/* 051 */ boolean value3 = false;
/* 052 */
/* 053 */ boolean isNull4 = true;
/* 054 */ int value4 = -1;
/* 055 */
/* 056 */
/* 057 */ boolean isNull5 = true;
/* 058 */ boolean value5 = false;
/* 059 */
/* 060 */ boolean isNull6 = true;
/* 061 */ int value6 = -1;
/* 062 */
/* 063 */
/* 064 */ boolean isNull7 = true;
/* 065 */ boolean value7 = false;
/* 066 */
/* 067 */ boolean isNull8 = true;
/* 068 */ int value8 = -1;
/* 069 */
/* 070 */
/* 071 */ boolean isNull9 = true;
/* 072 */ boolean value9 = false;
/* 073 */
/* 074 */ boolean isNull10 = true;
/* 075 */ int value10 = -1;
/* 076 */
/* 077 */
/* 078 */ boolean isNull11 = true;
/* 079 */ boolean value11 = false;
/* 080 */
/* 081 */ boolean isNull12 = true;
/* 082 */ int value12 = -1;
/* 083 */
/* 084 */
/* 085 */ boolean isNull13 = true;
/* 086 */ boolean value13 = false;
/* 087 */
/* 088 */ boolean isNull14 = true;
/* 089 */ int value14 = -1;
/* 090 */
/* 091 */
/* 092 */ boolean isNull15 = true;
/* 093 */ boolean value15 = false;
/* 094 */
/* 095 */ boolean isNull16 = true;
/* 096 */ int value16 = -1;
/* 097 */
/* 098 */
/* 099 */ boolean isNull17 = true;
/* 100 */ boolean value17 = false;
/* 101 */
/* 102 */ boolean isNull18 = true;
/* 103 */ int value18 = -1;
/* 104 */
/* 105 */
/* 106 */ boolean isNull19 = true;
/* 107 */ boolean value19 = false;
/* 108 */
/* 109 */ boolean isNull20 = i.isNullAt(0);
/* 110 */ int value20 = isNull20 ? -1 : (i.getInt(0));
/* 111 */ if (!isNull20) {
/* 112 */
/* 113 */
/* 114 */ isNull19 = false; // resultCode could change nullability.
/* 115 */ value19 = value20 == 0;
/* 116 */
/* 117 */ }
/* 118 */ if (!isNull19 && value19) {
/* 119 */
/* 120 */ isNull18 = false;
/* 121 */ value18 = -1;
/* 122 */ }
/* 123 */
/* 124 */ else {
/* 125 */
/* 126 */
/* 127 */ boolean isNull23 = i.isNullAt(0);
/* 128 */ int value23 = isNull23 ? -1 : (i.getInt(0));
/* 129 */ isNull18 = isNull23;
/* 130 */ value18 = value23;
/* 131 */ }
...
```
After this PR
```java
/* 005 */ class SpecificMutableProjection extends
org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection {
...
/* 263 */ private boolean isNull1409;
/* 264 */ private boolean value1409;
...
/* 519 */ private boolean isNull2815;
/* 520 */ private boolean value2815;
...
/* 1073 */ public java.lang.Object apply(java.lang.Object _i) {
/* 1074 */ InternalRow i = (InternalRow) _i;
/* 1075 */
/* 1076 */
/* 1077 */
/* 1078 */ boolean isNull = true;
/* 1079 */ int value = -1;
/* 1080 */
/* 1081 */ caseWhenCondExpr255(i);
/* 1082 */ if (!isNull2815 && value2815) {
/* 1083 */
/* 1084 */ isNull = false;
/* 1085 */ value = -1;
/* 1086 */ }
/* 1087 */
/* 1088 */ else {
/* 1089 */
/* 1090 */
/* 1091 */ boolean isNull2816 = true;
/* 1092 */ int value2816 = -1;
/* 1093 */
/* 1094 */ caseWhenCondExpr383(i);
/* 1095 */ if (!isNull4223 && value4223) {
/* 1096 */
/* 1097 */ isNull2816 = false;
/* 1098 */ value2816 = -1;
/* 1099 */ }
...
/* 30171 */ private void caseWhenCondExpr255(InternalRow i) {
/* 30172 */ boolean isNull1 = true;
/* 30173 */ boolean value1 = false;
/* 30174 */
/* 30175 */ boolean isNull2 = true;
/* 30176 */ int value2 = -1;
/* 30177 */
/* 30178 */ caseWhenCondExpr127(i);
/* 30179 */ if (!isNull1409 && value1409) {
/* 30180 */
/* 30181 */ isNull2 = false;
/* 30182 */ value2 = -1;
/* 30183 */ }
/* 30184 */
/* 30185 */ else {
/* 30186 */
/* 30187 */
/* 30188 */ boolean isNull1410 = true;
/* 30189 */ int value1410 = -1;
/* 30190 */
/* 30191 */ caseWhenCondExpr191(i);
/* 30192 */ if (!isNull2113 && value2113) {
/* 30193 */
/* 30194 */ isNull1410 = false;
/* 30195 */ value1410 = -1;
/* 30196 */ }
...
```
## How was this patch tested?
Added new test suites into `CodeGenerationSuite` and `DataFrameSuite`
You can merge this pull request into a Git repository by running:
$ git pull https://github.com/kiszk/spark SPARK-21413
Alternatively you can review and apply these changes as the patch at:
https://github.com/apache/spark/pull/18641.patch
To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:
This closes #18641
----
commit 19ae0dce58f468e672cb03a6c94d0eab54504473
Author: Kazuaki Ishizaki <[email protected]>
Date: 2017-07-15T08:25:20Z
initial commit
----
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]