[
https://issues.apache.org/jira/browse/SPARK-10953?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Xiangrui Meng resolved SPARK-10953.
-----------------------------------
Resolution: Done
Fix Version/s: 1.6.0
> Benchmark codegen vs. hand-written code for univariate statistics
> -----------------------------------------------------------------
>
> Key: SPARK-10953
> URL: https://issues.apache.org/jira/browse/SPARK-10953
> Project: Spark
> Issue Type: Sub-task
> Components: SQL
> Reporter: Xiangrui Meng
> Assignee: Jihong MA
> Fix For: 1.6.0
>
>
> I checked the generated code for a simple stddev_pop call:
> {code}
> val df = sqlContext.range(100)
> df.select(stddev_pop(col("id"))).show()
> {code}
> This is the generated code for the merge part, which is very long and
> complex. I'm not sure whether we can get benefit from the code generation for
> univariate statistics. We should benchmark it against Scala implementation.
> {code}
> 15/10/06 10:10:57 DEBUG GenerateMutableProjection: code for if
> (isnull(input[1, DoubleType])) cast(0 as double) else input[1, DoubleType],if
> (isnull(input[1, DoubleType])) input[6, DoubleType] else if (isnull(input[6,
> DoubleType])) input[1, DoubleType] else (input[1, DoubleType] + input[6,
> DoubleType]),if (isnull(input[3, DoubleType])) cast(0 as double) else
> input[3, DoubleType],if (isnull(input[3, DoubleType])) input[8, DoubleType]
> else if (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3,
> DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * input[6,
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])),if
> (isnull(input[4, DoubleType])) input[9, DoubleType] else if (isnull(input[9,
> DoubleType])) input[4, DoubleType] else ((input[4, DoubleType] + input[9,
> DoubleType]) + ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8,
> DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6,
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))):
> public Object generate(org.apache.spark.sql.catalyst.expressions.Expression[]
> expr) {
> return new SpecificMutableProjection(expr);
> }
> class SpecificMutableProjection extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection {
> private org.apache.spark.sql.catalyst.expressions.Expression[] expressions;
> private org.apache.spark.sql.catalyst.expressions.MutableRow mutableRow;
> public
> SpecificMutableProjection(org.apache.spark.sql.catalyst.expressions.Expression[]
> expr) {
> expressions = expr;
> mutableRow = new
> org.apache.spark.sql.catalyst.expressions.GenericMutableRow(5);
> }
> public
> org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection
> target(org.apache.spark.sql.catalyst.expressions.MutableRow row) {
> mutableRow = row;
> return this;
> }
> /* Provide immutable access to the last projected row. */
> public InternalRow currentValue() {
> return (InternalRow) mutableRow;
> }
> public Object apply(Object _i) {
> InternalRow i = (InternalRow) _i;
> /* if (isnull(input[1, DoubleType])) cast(0 as double) else input[1,
> DoubleType] */
> /* isnull(input[1, DoubleType]) */
> /* input[1, DoubleType] */
> boolean isNull4 = i.isNullAt(1);
> double primitive5 = isNull4 ? -1.0 : (i.getDouble(1));
> boolean isNull0 = false;
> double primitive1 = -1.0;
> if (!false && isNull4) {
> /* cast(0 as double) */
> /* 0 */
> boolean isNull6 = false;
> double primitive7 = -1.0;
> if (!false) {
> primitive7 = (double) 0;
> }
> isNull0 = isNull6;
> primitive1 = primitive7;
> } else {
> /* input[1, DoubleType] */
> boolean isNull10 = i.isNullAt(1);
> double primitive11 = isNull10 ? -1.0 : (i.getDouble(1));
> isNull0 = isNull10;
> primitive1 = primitive11;
> }
> if (isNull0) {
> mutableRow.setNullAt(0);
> } else {
> mutableRow.setDouble(0, primitive1);
> }
> /* if (isnull(input[1, DoubleType])) input[6, DoubleType] else if
> (isnull(input[6, DoubleType])) input[1, DoubleType] else (input[1,
> DoubleType] + input[6, DoubleType]) */
> /* isnull(input[1, DoubleType]) */
> /* input[1, DoubleType] */
> boolean isNull16 = i.isNullAt(1);
> double primitive17 = isNull16 ? -1.0 : (i.getDouble(1));
> boolean isNull12 = false;
> double primitive13 = -1.0;
> if (!false && isNull16) {
> /* input[6, DoubleType] */
> boolean isNull18 = i.isNullAt(6);
> double primitive19 = isNull18 ? -1.0 : (i.getDouble(6));
> isNull12 = isNull18;
> primitive13 = primitive19;
> } else {
> /* if (isnull(input[6, DoubleType])) input[1, DoubleType] else
> (input[1, DoubleType] + input[6, DoubleType]) */
> /* isnull(input[6, DoubleType]) */
> /* input[6, DoubleType] */
> boolean isNull24 = i.isNullAt(6);
> double primitive25 = isNull24 ? -1.0 : (i.getDouble(6));
> boolean isNull20 = false;
> double primitive21 = -1.0;
> if (!false && isNull24) {
> /* input[1, DoubleType] */
> boolean isNull26 = i.isNullAt(1);
> double primitive27 = isNull26 ? -1.0 : (i.getDouble(1));
> isNull20 = isNull26;
> primitive21 = primitive27;
> } else {
> /* (input[1, DoubleType] + input[6, DoubleType]) */
> /* input[1, DoubleType] */
> boolean isNull30 = i.isNullAt(1);
> double primitive31 = isNull30 ? -1.0 : (i.getDouble(1));
> boolean isNull28 = isNull30;
> double primitive29 = -1.0;
> if (!isNull28) {
> /* input[6, DoubleType] */
> boolean isNull32 = i.isNullAt(6);
> double primitive33 = isNull32 ? -1.0 : (i.getDouble(6));
> if (!isNull32) {
> primitive29 = primitive31 + primitive33;
> } else {
> isNull28 = true;
> }
> }
> isNull20 = isNull28;
> primitive21 = primitive29;
> }
> isNull12 = isNull20;
> primitive13 = primitive21;
> }
> if (isNull12) {
> mutableRow.setNullAt(1);
> } else {
> mutableRow.setDouble(1, primitive13);
> }
> /* if (isnull(input[3, DoubleType])) cast(0 as double) else input[3,
> DoubleType] */
> /* isnull(input[3, DoubleType]) */
> /* input[3, DoubleType] */
> boolean isNull38 = i.isNullAt(3);
> double primitive39 = isNull38 ? -1.0 : (i.getDouble(3));
> boolean isNull34 = false;
> double primitive35 = -1.0;
> if (!false && isNull38) {
> /* cast(0 as double) */
> /* 0 */
> boolean isNull40 = false;
> double primitive41 = -1.0;
> if (!false) {
> primitive41 = (double) 0;
> }
> isNull34 = isNull40;
> primitive35 = primitive41;
> } else {
> /* input[3, DoubleType] */
> boolean isNull44 = i.isNullAt(3);
> double primitive45 = isNull44 ? -1.0 : (i.getDouble(3));
> isNull34 = isNull44;
> primitive35 = primitive45;
> }
> if (isNull34) {
> mutableRow.setNullAt(2);
> } else {
> mutableRow.setDouble(2, primitive35);
> }
> /* if (isnull(input[3, DoubleType])) input[8, DoubleType] else if
> (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3,
> DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] * input[6,
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
> /* isnull(input[3, DoubleType]) */
> /* input[3, DoubleType] */
> boolean isNull50 = i.isNullAt(3);
> double primitive51 = isNull50 ? -1.0 : (i.getDouble(3));
> boolean isNull46 = false;
> double primitive47 = -1.0;
> if (!false && isNull50) {
> /* input[8, DoubleType] */
> boolean isNull52 = i.isNullAt(8);
> double primitive53 = isNull52 ? -1.0 : (i.getDouble(8));
> isNull46 = isNull52;
> primitive47 = primitive53;
> } else {
> /* if (isnull(input[8, DoubleType])) input[3, DoubleType] else
> (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] *
> input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
> /* isnull(input[8, DoubleType]) */
> /* input[8, DoubleType] */
> boolean isNull58 = i.isNullAt(8);
> double primitive59 = isNull58 ? -1.0 : (i.getDouble(8));
> boolean isNull54 = false;
> double primitive55 = -1.0;
> if (!false && isNull58) {
> /* input[3, DoubleType] */
> boolean isNull60 = i.isNullAt(3);
> double primitive61 = isNull60 ? -1.0 : (i.getDouble(3));
> isNull54 = isNull60;
> primitive55 = primitive61;
> } else {
> /* (((input[3, DoubleType] * input[0, DoubleType]) + (input[8,
> DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6,
> DoubleType])) */
> /* (input[0, DoubleType] + input[6, DoubleType]) */
> /* input[0, DoubleType] */
> boolean isNull80 = i.isNullAt(0);
> double primitive81 = isNull80 ? -1.0 : (i.getDouble(0));
> boolean isNull78 = isNull80;
> double primitive79 = -1.0;
> if (!isNull78) {
> /* input[6, DoubleType] */
> boolean isNull82 = i.isNullAt(6);
> double primitive83 = isNull82 ? -1.0 : (i.getDouble(6));
> if (!isNull82) {
> primitive79 = primitive81 + primitive83;
> } else {
> isNull78 = true;
> }
> }
> boolean isNull62 = false;
> double primitive63 = -1.0;
> if (isNull78 || primitive79 == 0) {
> isNull62 = true;
> } else {
> /* ((input[3, DoubleType] * input[0, DoubleType]) + (input[8,
> DoubleType] * input[6, DoubleType])) */
> /* (input[3, DoubleType] * input[0, DoubleType]) */
> /* input[3, DoubleType] */
> boolean isNull68 = i.isNullAt(3);
> double primitive69 = isNull68 ? -1.0 : (i.getDouble(3));
> boolean isNull66 = isNull68;
> double primitive67 = -1.0;
> if (!isNull66) {
> /* input[0, DoubleType] */
> boolean isNull70 = i.isNullAt(0);
> double primitive71 = isNull70 ? -1.0 : (i.getDouble(0));
> if (!isNull70) {
> primitive67 = primitive69 * primitive71;
> } else {
> isNull66 = true;
> }
> }
> boolean isNull64 = isNull66;
> double primitive65 = -1.0;
> if (!isNull64) {
> /* (input[8, DoubleType] * input[6, DoubleType]) */
> /* input[8, DoubleType] */
> boolean isNull74 = i.isNullAt(8);
> double primitive75 = isNull74 ? -1.0 : (i.getDouble(8));
> boolean isNull72 = isNull74;
> double primitive73 = -1.0;
> if (!isNull72) {
> /* input[6, DoubleType] */
> boolean isNull76 = i.isNullAt(6);
> double primitive77 = isNull76 ? -1.0 : (i.getDouble(6));
> if (!isNull76) {
> primitive73 = primitive75 * primitive77;
> } else {
> isNull72 = true;
> }
> }
> if (!isNull72) {
> primitive65 = primitive67 + primitive73;
> } else {
> isNull64 = true;
> }
> }
> if (isNull64) {
> isNull62 = true;
> } else {
> primitive63 = (double)(primitive65 / primitive79);
> }
> }
> isNull54 = isNull62;
> primitive55 = primitive63;
> }
> isNull46 = isNull54;
> primitive47 = primitive55;
> }
> if (isNull46) {
> mutableRow.setNullAt(3);
> } else {
> mutableRow.setDouble(3, primitive47);
> }
> /* if (isnull(input[4, DoubleType])) input[9, DoubleType] else if
> (isnull(input[9, DoubleType])) input[4, DoubleType] else ((input[4,
> DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] - input[2,
> DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * (input[0,
> DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6,
> DoubleType]))) */
> /* isnull(input[4, DoubleType]) */
> /* input[4, DoubleType] */
> boolean isNull88 = i.isNullAt(4);
> double primitive89 = isNull88 ? -1.0 : (i.getDouble(4));
> boolean isNull84 = false;
> double primitive85 = -1.0;
> if (!false && isNull88) {
> /* input[9, DoubleType] */
> boolean isNull90 = i.isNullAt(9);
> double primitive91 = isNull90 ? -1.0 : (i.getDouble(9));
> isNull84 = isNull90;
> primitive85 = primitive91;
> } else {
> /* if (isnull(input[9, DoubleType])) input[4, DoubleType] else
> ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] -
> input[2, DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) *
> (input[0, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] +
> input[6, DoubleType]))) */
> /* isnull(input[9, DoubleType]) */
> /* input[9, DoubleType] */
> boolean isNull96 = i.isNullAt(9);
> double primitive97 = isNull96 ? -1.0 : (i.getDouble(9));
> boolean isNull92 = false;
> double primitive93 = -1.0;
> if (!false && isNull96) {
> /* input[4, DoubleType] */
> boolean isNull98 = i.isNullAt(4);
> double primitive99 = isNull98 ? -1.0 : (i.getDouble(4));
> isNull92 = isNull98;
> primitive93 = primitive99;
> } else {
> /* ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8,
> DoubleType] - input[2, DoubleType]) * (input[8, DoubleType] - input[2,
> DoubleType])) * (input[0, DoubleType] * input[6, DoubleType])) / (input[0,
> DoubleType] + input[6, DoubleType]))) */
> /* (input[4, DoubleType] + input[9, DoubleType]) */
> /* input[4, DoubleType] */
> boolean isNull104 = i.isNullAt(4);
> double primitive105 = isNull104 ? -1.0 : (i.getDouble(4));
> boolean isNull102 = isNull104;
> double primitive103 = -1.0;
> if (!isNull102) {
> /* input[9, DoubleType] */
> boolean isNull106 = i.isNullAt(9);
> double primitive107 = isNull106 ? -1.0 : (i.getDouble(9));
> if (!isNull106) {
> primitive103 = primitive105 + primitive107;
> } else {
> isNull102 = true;
> }
> }
> boolean isNull100 = isNull102;
> double primitive101 = -1.0;
> if (!isNull100) {
> /* ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8,
> DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6,
> DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
> /* (input[0, DoubleType] + input[6, DoubleType]) */
> /* input[0, DoubleType] */
> boolean isNull134 = i.isNullAt(0);
> double primitive135 = isNull134 ? -1.0 : (i.getDouble(0));
> boolean isNull132 = isNull134;
> double primitive133 = -1.0;
> if (!isNull132) {
> /* input[6, DoubleType] */
> boolean isNull136 = i.isNullAt(6);
> double primitive137 = isNull136 ? -1.0 : (i.getDouble(6));
> if (!isNull136) {
> primitive133 = primitive135 + primitive137;
> } else {
> isNull132 = true;
> }
> }
> boolean isNull108 = false;
> double primitive109 = -1.0;
> if (isNull132 || primitive133 == 0) {
> isNull108 = true;
> } else {
> /* (((input[8, DoubleType] - input[2, DoubleType]) * (input[8,
> DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6,
> DoubleType])) */
> /* ((input[8, DoubleType] - input[2, DoubleType]) * (input[8,
> DoubleType] - input[2, DoubleType])) */
> /* (input[8, DoubleType] - input[2, DoubleType]) */
> /* input[8, DoubleType] */
> boolean isNull116 = i.isNullAt(8);
> double primitive117 = isNull116 ? -1.0 : (i.getDouble(8));
> boolean isNull114 = isNull116;
> double primitive115 = -1.0;
> if (!isNull114) {
> /* input[2, DoubleType] */
> boolean isNull118 = i.isNullAt(2);
> double primitive119 = isNull118 ? -1.0 : (i.getDouble(2));
> if (!isNull118) {
> primitive115 = primitive117 - primitive119;
> } else {
> isNull114 = true;
> }
> }
> boolean isNull112 = isNull114;
> double primitive113 = -1.0;
> if (!isNull112) {
> /* (input[8, DoubleType] - input[2, DoubleType]) */
> /* input[8, DoubleType] */
> boolean isNull122 = i.isNullAt(8);
> double primitive123 = isNull122 ? -1.0 : (i.getDouble(8));
> boolean isNull120 = isNull122;
> double primitive121 = -1.0;
> if (!isNull120) {
> /* input[2, DoubleType] */
> boolean isNull124 = i.isNullAt(2);
> double primitive125 = isNull124 ? -1.0 : (i.getDouble(2));
> if (!isNull124) {
> primitive121 = primitive123 - primitive125;
> } else {
> isNull120 = true;
> }
> }
> if (!isNull120) {
> primitive113 = primitive115 * primitive121;
> } else {
> isNull112 = true;
> }
> }
> boolean isNull110 = isNull112;
> double primitive111 = -1.0;
> if (!isNull110) {
> /* (input[0, DoubleType] * input[6, DoubleType]) */
> /* input[0, DoubleType] */
> boolean isNull128 = i.isNullAt(0);
> double primitive129 = isNull128 ? -1.0 : (i.getDouble(0));
> boolean isNull126 = isNull128;
> double primitive127 = -1.0;
> if (!isNull126) {
> /* input[6, DoubleType] */
> boolean isNull130 = i.isNullAt(6);
> double primitive131 = isNull130 ? -1.0 : (i.getDouble(6));
> if (!isNull130) {
> primitive127 = primitive129 * primitive131;
> } else {
> isNull126 = true;
> }
> }
> if (!isNull126) {
> primitive111 = primitive113 * primitive127;
> } else {
> isNull110 = true;
> }
> }
> if (isNull110) {
> isNull108 = true;
> } else {
> primitive109 = (double)(primitive111 / primitive133);
> }
> }
> if (!isNull108) {
> primitive101 = primitive103 + primitive109;
> } else {
> isNull100 = true;
> }
> }
> isNull92 = isNull100;
> primitive93 = primitive101;
> }
> isNull84 = isNull92;
> primitive85 = primitive93;
> }
> if (isNull84) {
> mutableRow.setNullAt(4);
> } else {
> mutableRow.setDouble(4, primitive85);
> }
> return mutableRow;
> }
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]