Github user hvanhovell commented on the issue:
https://github.com/apache/spark/pull/13065
Here are the code dumps for all the generator paths.
##### Setup
```scala
val df = spark.range(1 << 20).selectExpr(
"id as key",
"array(rand(), rand(), rand(), rand(), rand()) as values",
"map('a', rand(), 'b', rand(), 'c', rand(), 'd', rand(), 'e',
rand()) pairs",
"concat('{\"key\": ', id, ', \"value\": \"v_', id, '\"}') json")
df.createTempView("df")
```
##### explode(array)
```java
> println(sql("explain codegen select key, explode(values) as value from
df").collect()(0))
/* ... */
/* 099 */ protected void processNext() throws java.io.IOException {
/* 100 */ // initialize Range
/* 101 */ if (!range_initRange) {
/* 102 */ range_initRange = true;
/* 103 */ initRange(partitionIndex);
/* 104 */ }
/* 105 */
/* 106 */ while (!range_overflow && range_number < range_partitionEnd) {
/* 107 */ long range_value = range_number;
/* 108 */ range_number += 1L;
/* 109 */ if (range_number < range_value ^ 1L < 0) {
/* 110 */ range_overflow = true;
/* 111 */ }
/* 112 */
/* 113 */ final boolean project_isNull1 = false;
/* 114 */ this.project_values = new Object[5];
/* 115 */ final double project_value2 = project_rng.nextDouble();
/* 116 */ if (false) {
/* 117 */ project_values[0] = null;
/* 118 */ } else {
/* 119 */ project_values[0] = project_value2;
/* 120 */ }
/* 121 */
/* 122 */ final double project_value3 = project_rng1.nextDouble();
/* 123 */ if (false) {
/* 124 */ project_values[1] = null;
/* 125 */ } else {
/* 126 */ project_values[1] = project_value3;
/* 127 */ }
/* 128 */
/* 129 */ final double project_value4 = project_rng2.nextDouble();
/* 130 */ if (false) {
/* 131 */ project_values[2] = null;
/* 132 */ } else {
/* 133 */ project_values[2] = project_value4;
/* 134 */ }
/* 135 */
/* 136 */ final double project_value5 = project_rng3.nextDouble();
/* 137 */ if (false) {
/* 138 */ project_values[3] = null;
/* 139 */ } else {
/* 140 */ project_values[3] = project_value5;
/* 141 */ }
/* 142 */
/* 143 */ final double project_value6 = project_rng4.nextDouble();
/* 144 */ if (false) {
/* 145 */ project_values[4] = null;
/* 146 */ } else {
/* 147 */ project_values[4] = project_value6;
/* 148 */ }
/* 149 */
/* 150 */ final ArrayData project_value1 = new
org.apache.spark.sql.catalyst.util.GenericArrayData(project_values);
/* 151 */ this.project_values = null;
/* 152 */
/* 153 */ int generate_numElements = project_isNull1 ? 0 :
project_value1.numElements();
/* 154 */ for (int generate_index = 0; generate_index <
generate_numElements; generate_index++) {
/* 155 */ generate_numOutputRows.add(1);
/* 156 */
/* 157 */ double generate_col =
project_value1.getDouble(generate_index);
/* 158 */ project_rowWriter1.write(0, range_value);
/* 159 */
/* 160 */ project_rowWriter1.write(1, generate_col);
/* 161 */ append(project_result1.copy());
/* 162 */
/* 163 */ }
/* 164 */
/* 165 */ if (shouldStop()) return;
/* 166 */ }
/* 167 */ }
/* 168 */ }
```
##### explode(map)
```java
> println(sql("explain codegen select key, explode(pairs) as (k, v) from
df").collect()(0))
/* 105 */ protected void processNext() throws java.io.IOException {
/* 106 */ // initialize Range
/* 107 */ if (!range_initRange) {
/* 108 */ range_initRange = true;
/* 109 */ initRange(partitionIndex);
/* 110 */ }
/* 111 */
/* 112 */ while (!range_overflow && range_number < range_partitionEnd) {
/* 113 */ long range_value = range_number;
/* 114 */ range_number += 1L;
/* 115 */ if (range_number < range_value ^ 1L < 0) {
/* 116 */ range_overflow = true;
/* 117 */ }
/* 118 */
/* 119 */ final boolean project_isNull1 = false;
/* 120 */ project_keyArray = new Object[5];
/* 121 */ project_valueArray = new Object[5];
/* 122 */
/* 123 */ Object project_obj = ((Expression)
references[1]).eval(null);
/* 124 */ UTF8String project_value2 = (UTF8String) project_obj;
/* 125 */ if (false) {
/* 126 */ throw new RuntimeException("Cannot use null as map key!");
/* 127 */ } else {
/* 128 */ project_keyArray[0] = project_value2;
/* 129 */ }
/* 130 */
/* 131 */ Object project_obj1 = ((Expression)
references[2]).eval(null);
/* 132 */ UTF8String project_value3 = (UTF8String) project_obj1;
/* 133 */ if (false) {
/* 134 */ throw new RuntimeException("Cannot use null as map key!");
/* 135 */ } else {
/* 136 */ project_keyArray[1] = project_value3;
/* 137 */ }
/* 138 */
/* 139 */ Object project_obj2 = ((Expression)
references[3]).eval(null);
/* 140 */ UTF8String project_value4 = (UTF8String) project_obj2;
/* 141 */ if (false) {
/* 142 */ throw new RuntimeException("Cannot use null as map key!");
/* 143 */ } else {
/* 144 */ project_keyArray[2] = project_value4;
/* 145 */ }
/* 146 */
/* 147 */ Object project_obj3 = ((Expression)
references[4]).eval(null);
/* 148 */ UTF8String project_value5 = (UTF8String) project_obj3;
/* 149 */ if (false) {
/* 150 */ throw new RuntimeException("Cannot use null as map key!");
/* 151 */ } else {
/* 152 */ project_keyArray[3] = project_value5;
/* 153 */ }
/* 154 */
/* 155 */ Object project_obj4 = ((Expression)
references[5]).eval(null);
/* 156 */ UTF8String project_value6 = (UTF8String) project_obj4;
/* 157 */ if (false) {
/* 158 */ throw new RuntimeException("Cannot use null as map key!");
/* 159 */ } else {
/* 160 */ project_keyArray[4] = project_value6;
/* 161 */ }
/* 162 */
/* 163 */ final double project_value7 = project_rng.nextDouble();
/* 164 */ if (false) {
/* 165 */ project_valueArray[0] = null;
/* 166 */ } else {
/* 167 */ project_valueArray[0] = project_value7;
/* 168 */ }
/* 169 */
/* 170 */ final double project_value8 = project_rng1.nextDouble();
/* 171 */ if (false) {
/* 172 */ project_valueArray[1] = null;
/* 173 */ } else {
/* 174 */ project_valueArray[1] = project_value8;
/* 175 */ }
/* 176 */
/* 177 */ final double project_value9 = project_rng2.nextDouble();
/* 178 */ if (false) {
/* 179 */ project_valueArray[2] = null;
/* 180 */ } else {
/* 181 */ project_valueArray[2] = project_value9;
/* 182 */ }
/* 183 */
/* 184 */ final double project_value10 = project_rng3.nextDouble();
/* 185 */ if (false) {
/* 186 */ project_valueArray[3] = null;
/* 187 */ } else {
/* 188 */ project_valueArray[3] = project_value10;
/* 189 */ }
/* 190 */
/* 191 */ final double project_value11 = project_rng4.nextDouble();
/* 192 */ if (false) {
/* 193 */ project_valueArray[4] = null;
/* 194 */ } else {
/* 195 */ project_valueArray[4] = project_value11;
/* 196 */ }
/* 197 */
/* 198 */ final MapData project_value1 = new
org.apache.spark.sql.catalyst.util.ArrayBasedMapData(new
org.apache.spark.sql.catalyst.util.GenericArrayData(project_keyArray), new
org.apache.spark.sql.catalyst.util.GenericArrayData(project_valueArray));
/* 199 */ this.project_keyArray = null;
/* 200 */ this.project_valueArray = null;
/* 201 */
/* 202 */ ArrayData generate_keyArray = project_isNull1 ? null :
project_value1.keyArray();
/* 203 */ ArrayData generate_valueArray = project_isNull1 ? null :
project_value1.valueArray();
/* 204 */
/* 205 */ int generate_numElements = project_isNull1 ? 0 :
project_value1.numElements();
/* 206 */ for (int generate_index = 0; generate_index <
generate_numElements; generate_index++) {
/* 207 */ generate_numOutputRows.add(1);
/* 208 */
/* 209 */ UTF8String generate_key =
generate_keyArray.getUTF8String(generate_index);
/* 210 */ double generate_value2 =
generate_valueArray.getDouble(generate_index);
/* 211 */ project_holder1.reset();
/* 212 */
/* 213 */ project_rowWriter1.write(0, range_value);
/* 214 */
/* 215 */ project_rowWriter1.write(1, generate_key);
/* 216 */
/* 217 */ project_rowWriter1.write(2, generate_value2);
/* 218 */ project_result1.setTotalSize(project_holder1.totalSize());
/* 219 */ append(project_result1.copy());
/* 220 */
/* 221 */ }
/* 222 */
/* 223 */ if (shouldStop()) return;
/* 224 */ }
/* 225 */ }
/* 226 */ }
```
##### json_tuple
```java
> println(sql("explain codegen select key, json_tuple(json, 'key', 'value')
as (k, v) from df").collect()(0))
/* 087 */ protected void processNext() throws java.io.IOException {
/* 088 */ // initialize Range
/* 089 */ if (!range_initRange) {
/* 090 */ range_initRange = true;
/* 091 */ initRange(partitionIndex);
/* 092 */ }
/* 093 */
/* 094 */ while (!range_overflow && range_number < range_partitionEnd) {
/* 095 */ long range_value = range_number;
/* 096 */ range_number += 1L;
/* 097 */ if (range_number < range_value ^ 1L < 0) {
/* 098 */ range_overflow = true;
/* 099 */ }
/* 100 */
/* 101 */ Object project_obj = ((Expression)
references[1]).eval(null);
/* 102 */ UTF8String project_value2 = (UTF8String) project_obj;
/* 103 */
/* 104 */ boolean project_isNull3 = false;
/* 105 */ UTF8String project_value3 = null;
/* 106 */ if (!false) {
/* 107 */ project_value3 =
UTF8String.fromString(String.valueOf(range_value));
/* 108 */ }
/* 109 */
/* 110 */ Object project_obj1 = ((Expression)
references[2]).eval(null);
/* 111 */ UTF8String project_value5 = (UTF8String) project_obj1;
/* 112 */
/* 113 */ boolean project_isNull6 = false;
/* 114 */ UTF8String project_value6 = null;
/* 115 */ if (!false) {
/* 116 */ project_value6 =
UTF8String.fromString(String.valueOf(range_value));
/* 117 */ }
/* 118 */
/* 119 */ Object project_obj2 = ((Expression)
references[3]).eval(null);
/* 120 */ UTF8String project_value8 = (UTF8String) project_obj2;
/* 121 */ boolean project_isNull1 = false;
/* 122 */ UTF8String project_value1 = UTF8String.concat(false ? null
: project_value2, project_isNull3 ? null : project_value3, false ? null :
project_value5, project_isNull6 ? null : project_value6, false ? null :
project_value8);
/* 123 */ if (project_value1 == null) {
/* 124 */ project_isNull1 = true;
/* 125 */ }
/* 126 */
/* 127 */ boolean generate_isNull = false;
/* 128 */ InternalRow generate_row = generate_nullRow;
/* 129 */ if (!(project_isNull1)) {
/* 130 */ Object[] generate_raw =
org.apache.spark.sql.catalyst.expressions.JsonTuple.extractTuple(project_value1,
generate_fieldNames);
/* 131 */ generate_row = generate_raw != null ? new
org.apache.spark.sql.catalyst.expressions.GenericInternalRow(generate_raw) :
generate_nullRow;
/* 132 */ }
/* 133 */ scala.collection.Iterator<InternalRow> generate_value =
scala.collection.Iterator$.MODULE$.single(generate_row);
/* 134 */ scala.collection.Iterator<InternalRow> generate_iterator =
generate_value.toIterator();
/* 135 */ for (boolean generate_hasNext =
generate_iterator.hasNext(); generate_hasNext; generate_hasNext =
generate_iterator.hasNext()) {
/* 136 */ generate_numOutputRows.add(1);
/* 137 */ InternalRow generate_row1 = (InternalRow)
generate_iterator.next();
/* 138 */
/* 139 */ boolean generate_isNull2 = generate_row1.isNullAt(0);
/* 140 */ UTF8String generate_c0 = generate_isNull2 ? null :
generate_row1.getUTF8String(0);
/* 141 */ boolean generate_isNull3 = generate_row1.isNullAt(1);
/* 142 */ UTF8String generate_c1 = generate_isNull3 ? null :
generate_row1.getUTF8String(1);
/* 143 */ project_holder1.reset();
/* 144 */
/* 145 */ project_rowWriter1.zeroOutNullBytes();
/* 146 */
/* 147 */ project_rowWriter1.write(0, range_value);
/* 148 */
/* 149 */ if (generate_isNull2) {
/* 150 */ project_rowWriter1.setNullAt(1);
/* 151 */ } else {
/* 152 */ project_rowWriter1.write(1, generate_c0);
/* 153 */ }
/* 154 */
/* 155 */ if (generate_isNull3) {
/* 156 */ project_rowWriter1.setNullAt(2);
/* 157 */ } else {
/* 158 */ project_rowWriter1.write(2, generate_c1);
/* 159 */ }
/* 160 */ project_result1.setTotalSize(project_holder1.totalSize());
/* 161 */ append(project_result1.copy());
/* 162 */
/* 163 */ }
/* 164 */
/* 165 */ if (shouldStop()) return;
/* 166 */ }
/* 167 */ }
/* 168 */ }
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]