rednaxelafx commented on issue #20965: [SPARK-21870][SQL] Split aggregation code into small functions URL: https://github.com/apache/spark/pull/20965#issuecomment-527779804 LGTM! Looks really good now, thanks a lot for all the efforts @maropu and all the other reviewers! For my own curiosity, I've built Spark from this PR and tried a few scenarios. 1. `select sum(id + id) from range(10)` The generated code is not split, which is good: it's pretty short and not really worth splitting ```java /* 116 */ private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException { /* 117 */ // do aggregate /* 118 */ // common sub-expressions /* 119 */ /* 120 */ // evaluate aggregate functions and update aggregation buffers /* 121 */ /* 122 */ // do aggregate for sum /* 123 */ // evaluate aggregate function /* 124 */ agg_agg_isNull_3_0 = true; /* 125 */ long agg_value_3 = -1L; /* 126 */ do { /* 127 */ if (!agg_bufIsNull_0) { /* 128 */ agg_agg_isNull_3_0 = false; /* 129 */ agg_value_3 = agg_bufValue_0; /* 130 */ continue; /* 131 */ } /* 132 */ /* 133 */ boolean agg_isNull_5 = false; /* 134 */ long agg_value_5 = -1L; /* 135 */ if (!false) { /* 136 */ agg_value_5 = (long) 0; /* 137 */ } /* 138 */ if (!agg_isNull_5) { /* 139 */ agg_agg_isNull_3_0 = false; /* 140 */ agg_value_3 = agg_value_5; /* 141 */ continue; /* 142 */ } /* 143 */ /* 144 */ } while (false); /* 145 */ long agg_value_7 = -1L; /* 146 */ /* 147 */ agg_value_7 = agg_expr_0_0 + agg_expr_0_0; /* 148 */ long agg_value_2 = -1L; /* 149 */ /* 150 */ agg_value_2 = agg_value_3 + agg_value_7; /* 151 */ // update aggregation buffers /* 152 */ agg_bufIsNull_0 = false; /* 153 */ agg_bufValue_0 = agg_value_2; /* 154 */ /* 155 */ } ``` 2. `select avg(id + id) from range(10)` The generated code is split out, which is...hmm, I don't know. If I hand-wrote this code I wouldn't have split it, since a single `avg` isn't going to go beyond the 8KB bytecode size. ```java /* 125 */ private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException { /* 126 */ // do aggregate /* 127 */ // common sub-expressions /* 128 */ long agg_value_4 = -1L; /* 129 */ /* 130 */ agg_value_4 = agg_expr_0_0 + agg_expr_0_0; /* 131 */ // evaluate aggregate functions and update aggregation buffers /* 132 */ agg_doAggregate_avg_0(agg_value_4); /* 133 */ /* 134 */ } /* 135 */ /* 136 */ private void agg_doAggregate_avg_0(long agg_value_4) throws java.io.IOException { /* 137 */ // do aggregate for avg /* 138 */ // evaluate aggregate function /* 139 */ boolean agg_isNull_7 = true; /* 140 */ double agg_value_7 = -1.0; /* 141 */ /* 142 */ if (!agg_bufIsNull_0) { /* 143 */ agg_agg_isNull_9_0 = true; /* 144 */ double agg_value_9 = -1.0; /* 145 */ do { /* 146 */ boolean agg_isNull_10 = false; /* 147 */ double agg_value_10 = -1.0; /* 148 */ if (!false) { /* 149 */ agg_value_10 = (double) agg_value_4; /* 150 */ } /* 151 */ if (!agg_isNull_10) { /* 152 */ agg_agg_isNull_9_0 = false; /* 153 */ agg_value_9 = agg_value_10; /* 154 */ continue; /* 155 */ } /* 156 */ /* 157 */ boolean agg_isNull_11 = false; /* 158 */ double agg_value_11 = -1.0; /* 159 */ if (!false) { /* 160 */ agg_value_11 = (double) 0; /* 161 */ } /* 162 */ if (!agg_isNull_11) { /* 163 */ agg_agg_isNull_9_0 = false; /* 164 */ agg_value_9 = agg_value_11; /* 165 */ continue; /* 166 */ } /* 167 */ /* 168 */ } while (false); /* 169 */ /* 170 */ agg_isNull_7 = false; // resultCode could change nullability. /* 171 */ /* 172 */ agg_value_7 = agg_bufValue_0 + agg_value_9; /* 173 */ /* 174 */ } /* 175 */ boolean agg_isNull_13 = false; /* 176 */ long agg_value_13 = -1L; /* 177 */ if (!false && false) { /* 178 */ agg_isNull_13 = agg_bufIsNull_1; /* 179 */ agg_value_13 = agg_bufValue_1; /* 180 */ } else { /* 181 */ boolean agg_isNull_16 = true; /* 182 */ long agg_value_16 = -1L; /* 183 */ /* 184 */ if (!agg_bufIsNull_1) { /* 185 */ agg_isNull_16 = false; // resultCode could change nullability. /* 186 */ /* 187 */ agg_value_16 = agg_bufValue_1 + 1L; /* 188 */ /* 189 */ } /* 190 */ agg_isNull_13 = agg_isNull_16; /* 191 */ agg_value_13 = agg_value_16; /* 192 */ } /* 193 */ // update aggregation buffers /* 194 */ agg_bufIsNull_0 = agg_isNull_7; /* 195 */ agg_bufValue_0 = agg_value_7; /* 196 */ /* 197 */ agg_bufIsNull_1 = agg_isNull_13; /* 198 */ agg_bufValue_1 = agg_value_13; /* 199 */ } ``` 3. `select sum(id * 2), sum(id * 3), avg(id * 2), avg(id * 3) from range(10)` The generated code is split and the arguments passed to the split off functions are at a minimal. This is very nice! Exactly what I had asked for. ```java /* 247 */ private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException { /* 248 */ // do aggregate /* 249 */ // common sub-expressions /* 250 */ long agg_value_9 = -1L; /* 251 */ /* 252 */ agg_value_9 = agg_expr_0_0 * 3L; /* 253 */ long agg_value_12 = -1L; /* 254 */ /* 255 */ agg_value_12 = agg_expr_0_0 * 2L; /* 256 */ // evaluate aggregate functions and update aggregation buffers /* 257 */ agg_doAggregate_sum_0(agg_value_12); /* 258 */ agg_doAggregate_sum_1(agg_value_9); /* 259 */ agg_doAggregate_avg_0(agg_value_12); /* 260 */ agg_doAggregate_avg_1(agg_value_9); /* 261 */ /* 262 */ } /* 050 */ private void agg_doAggregate_sum_0(long agg_value_12) throws java.io.IOException { /* 051 */ // do aggregate for sum /* 052 */ // evaluate aggregate function /* 053 */ agg_agg_isNull_16_0 = true; /* 054 */ long agg_value_16 = -1L; /* 055 */ do { /* 056 */ if (!agg_bufIsNull_0) { /* 057 */ agg_agg_isNull_16_0 = false; /* 058 */ agg_value_16 = agg_bufValue_0; /* 059 */ continue; /* 060 */ } /* 061 */ /* 062 */ boolean agg_isNull_18 = false; /* 063 */ long agg_value_18 = -1L; /* 064 */ if (!false) { /* 065 */ agg_value_18 = (long) 0; /* 066 */ } /* 067 */ if (!agg_isNull_18) { /* 068 */ agg_agg_isNull_16_0 = false; /* 069 */ agg_value_16 = agg_value_18; /* 070 */ continue; /* 071 */ } /* 072 */ /* 073 */ } while (false); /* 074 */ /* 075 */ long agg_value_15 = -1L; /* 076 */ /* 077 */ agg_value_15 = agg_value_16 + agg_value_12; /* 078 */ // update aggregation buffers /* 079 */ agg_bufIsNull_0 = false; /* 080 */ agg_bufValue_0 = agg_value_15; /* 081 */ } /* 264 */ private void agg_doAggregate_sum_1(long agg_value_9) throws java.io.IOException { /* 265 */ // do aggregate for sum /* 266 */ // evaluate aggregate function /* 267 */ agg_agg_isNull_21_0 = true; /* 268 */ long agg_value_21 = -1L; /* 269 */ do { /* 270 */ if (!agg_bufIsNull_1) { /* 271 */ agg_agg_isNull_21_0 = false; /* 272 */ agg_value_21 = agg_bufValue_1; /* 273 */ continue; /* 274 */ } /* 275 */ /* 276 */ boolean agg_isNull_23 = false; /* 277 */ long agg_value_23 = -1L; /* 278 */ if (!false) { /* 279 */ agg_value_23 = (long) 0; /* 280 */ } /* 281 */ if (!agg_isNull_23) { /* 282 */ agg_agg_isNull_21_0 = false; /* 283 */ agg_value_21 = agg_value_23; /* 284 */ continue; /* 285 */ } /* 286 */ /* 287 */ } while (false); /* 288 */ /* 289 */ long agg_value_20 = -1L; /* 290 */ /* 291 */ agg_value_20 = agg_value_21 + agg_value_9; /* 292 */ // update aggregation buffers /* 293 */ agg_bufIsNull_1 = false; /* 294 */ agg_bufValue_1 = agg_value_20; /* 295 */ } /* 297 */ private void agg_doAggregate_avg_0(long agg_value_12) throws java.io.IOException { /* 298 */ // do aggregate for avg /* 299 */ // evaluate aggregate function /* 300 */ boolean agg_isNull_25 = true; /* 301 */ double agg_value_25 = -1.0; /* 302 */ /* 303 */ if (!agg_bufIsNull_2) { /* 304 */ agg_agg_isNull_27_0 = true; /* 305 */ double agg_value_27 = -1.0; /* 306 */ do { /* 307 */ boolean agg_isNull_28 = false; /* 308 */ double agg_value_28 = -1.0; /* 309 */ if (!false) { /* 310 */ agg_value_28 = (double) agg_value_12; /* 311 */ } /* 312 */ if (!agg_isNull_28) { /* 313 */ agg_agg_isNull_27_0 = false; /* 314 */ agg_value_27 = agg_value_28; /* 315 */ continue; /* 316 */ } /* 317 */ /* 318 */ boolean agg_isNull_29 = false; /* 319 */ double agg_value_29 = -1.0; /* 320 */ if (!false) { /* 321 */ agg_value_29 = (double) 0; /* 322 */ } /* 323 */ if (!agg_isNull_29) { /* 324 */ agg_agg_isNull_27_0 = false; /* 325 */ agg_value_27 = agg_value_29; /* 326 */ continue; /* 327 */ } /* 328 */ /* 329 */ } while (false); /* 330 */ /* 331 */ agg_isNull_25 = false; // resultCode could change nullability. /* 332 */ /* 333 */ agg_value_25 = agg_bufValue_2 + agg_value_27; /* 334 */ /* 335 */ } /* 336 */ boolean agg_isNull_31 = false; /* 337 */ long agg_value_31 = -1L; /* 338 */ if (!false && false) { /* 339 */ agg_isNull_31 = agg_bufIsNull_3; /* 340 */ agg_value_31 = agg_bufValue_3; /* 341 */ } else { /* 342 */ boolean agg_isNull_34 = true; /* 343 */ long agg_value_34 = -1L; /* 344 */ /* 345 */ if (!agg_bufIsNull_3) { /* 346 */ agg_isNull_34 = false; // resultCode could change nullability. /* 347 */ /* 348 */ agg_value_34 = agg_bufValue_3 + 1L; /* 349 */ /* 350 */ } /* 351 */ agg_isNull_31 = agg_isNull_34; /* 352 */ agg_value_31 = agg_value_34; /* 353 */ } /* 354 */ // update aggregation buffers /* 355 */ agg_bufIsNull_2 = agg_isNull_25; /* 356 */ agg_bufValue_2 = agg_value_25; /* 357 */ /* 358 */ agg_bufIsNull_3 = agg_isNull_31; /* 359 */ agg_bufValue_3 = agg_value_31; /* 360 */ } /* 144 */ private void agg_doAggregate_avg_1(long agg_value_9) throws java.io.IOException { /* 145 */ // do aggregate for avg /* 146 */ // evaluate aggregate function /* 147 */ boolean agg_isNull_37 = true; /* 148 */ double agg_value_37 = -1.0; /* 149 */ /* 150 */ if (!agg_bufIsNull_4) { /* 151 */ agg_agg_isNull_39_0 = true; /* 152 */ double agg_value_39 = -1.0; /* 153 */ do { /* 154 */ boolean agg_isNull_40 = false; /* 155 */ double agg_value_40 = -1.0; /* 156 */ if (!false) { /* 157 */ agg_value_40 = (double) agg_value_9; /* 158 */ } /* 159 */ if (!agg_isNull_40) { /* 160 */ agg_agg_isNull_39_0 = false; /* 161 */ agg_value_39 = agg_value_40; /* 162 */ continue; /* 163 */ } /* 164 */ /* 165 */ boolean agg_isNull_41 = false; /* 166 */ double agg_value_41 = -1.0; /* 167 */ if (!false) { /* 168 */ agg_value_41 = (double) 0; /* 169 */ } /* 170 */ if (!agg_isNull_41) { /* 171 */ agg_agg_isNull_39_0 = false; /* 172 */ agg_value_39 = agg_value_41; /* 173 */ continue; /* 174 */ } /* 175 */ /* 176 */ } while (false); /* 177 */ /* 178 */ agg_isNull_37 = false; // resultCode could change nullability. /* 179 */ /* 180 */ agg_value_37 = agg_bufValue_4 + agg_value_39; /* 181 */ /* 182 */ } /* 183 */ boolean agg_isNull_43 = false; /* 184 */ long agg_value_43 = -1L; /* 185 */ if (!false && false) { /* 186 */ agg_isNull_43 = agg_bufIsNull_5; /* 187 */ agg_value_43 = agg_bufValue_5; /* 188 */ } else { /* 189 */ boolean agg_isNull_46 = true; /* 190 */ long agg_value_46 = -1L; /* 191 */ /* 192 */ if (!agg_bufIsNull_5) { /* 193 */ agg_isNull_46 = false; // resultCode could change nullability. /* 194 */ /* 195 */ agg_value_46 = agg_bufValue_5 + 1L; /* 196 */ /* 197 */ } /* 198 */ agg_isNull_43 = agg_isNull_46; /* 199 */ agg_value_43 = agg_value_46; /* 200 */ } /* 201 */ // update aggregation buffers /* 202 */ agg_bufIsNull_4 = agg_isNull_37; /* 203 */ agg_bufValue_4 = agg_value_37; /* 204 */ /* 205 */ agg_bufIsNull_5 = agg_isNull_43; /* 206 */ agg_bufValue_5 = agg_value_43; /* 207 */ } ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
