Repository: spark
Updated Branches:
  refs/heads/master e5a1b301f -> 4a7636f2d


[SPARK-13844] [SQL] Generate better code for filters with a non-nullable column

## What changes were proposed in this pull request?

This PR simplifies generated code with a non-nullable column. This PR addresses 
three items:
1. Generate simplified code for and / or
2. Generate better code for divide and remainder with non-zero dividend
3. Pass nullable information into BoundReference at WholeStageCodegen

I have attached the generated code with and without this PR

## How was this patch tested?

Tested by existing test suites in sql/core

Here is a motivating example
````
(0 to 6).map(i => (i.toString, i.toInt)).toDF("k", "v")
  .filter("v % 2 == 0").filter("v <= 4").filter("v > 1").show()
````

Generated code without this PR
````java
/* 032 */   protected void processNext() throws java.io.IOException {
/* 033 */     /*** PRODUCE: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 034 */
/* 035 */     /*** PRODUCE: Filter ((isnotnull((_2#1 % 2)) && ((_2#1 % 2) = 0)) 
&& ((_2#1 <= 4) && (_2#1 > 1))) */
/* 036 */
/* 037 */     /*** PRODUCE: INPUT */
/* 038 */
/* 039 */     while (!shouldStop() && inputadapter_input.hasNext()) {
/* 040 */       InternalRow inputadapter_row = (InternalRow) 
inputadapter_input.next();
/* 041 */       /*** CONSUME: Filter ((isnotnull((_2#1 % 2)) && ((_2#1 % 2) = 
0)) && ((_2#1 <= 4) && (_2#1 > 1))) */
/* 042 */       /* input[1, int] */
/* 043 */       int filter_value1 = inputadapter_row.getInt(1);
/* 044 */
/* 045 */       /* isnotnull((input[1, int] % 2)) */
/* 046 */       /* (input[1, int] % 2) */
/* 047 */       boolean filter_isNull3 = false;
/* 048 */       int filter_value3 = -1;
/* 049 */       if (false || 2 == 0) {
/* 050 */         filter_isNull3 = true;
/* 051 */       } else {
/* 052 */         if (false) {
/* 053 */           filter_isNull3 = true;
/* 054 */         } else {
/* 055 */           filter_value3 = (int)(filter_value1 % 2);
/* 056 */         }
/* 057 */       }
/* 058 */       if (!(!(filter_isNull3))) continue;
/* 059 */
/* 060 */       /* ((input[1, int] % 2) = 0) */
/* 061 */       boolean filter_isNull6 = true;
/* 062 */       boolean filter_value6 = false;
/* 063 */       /* (input[1, int] % 2) */
/* 064 */       boolean filter_isNull7 = false;
/* 065 */       int filter_value7 = -1;
/* 066 */       if (false || 2 == 0) {
/* 067 */         filter_isNull7 = true;
/* 068 */       } else {
/* 069 */         if (false) {
/* 070 */           filter_isNull7 = true;
/* 071 */         } else {
/* 072 */           filter_value7 = (int)(filter_value1 % 2);
/* 073 */         }
/* 074 */       }
/* 075 */       if (!filter_isNull7) {
/* 076 */         filter_isNull6 = false; // resultCode could change 
nullability.
/* 077 */         filter_value6 = filter_value7 == 0;
/* 078 */
/* 079 */       }
/* 080 */       if (filter_isNull6 || !filter_value6) continue;
/* 081 */
/* 082 */       /* (input[1, int] <= 4) */
/* 083 */       boolean filter_value11 = false;
/* 084 */       filter_value11 = filter_value1 <= 4;
/* 085 */       if (!filter_value11) continue;
/* 086 */
/* 087 */       /* (input[1, int] > 1) */
/* 088 */       boolean filter_value14 = false;
/* 089 */       filter_value14 = filter_value1 > 1;
/* 090 */       if (!filter_value14) continue;
/* 091 */
/* 092 */       filter_metricValue.add(1);
/* 093 */
/* 094 */       /*** CONSUME: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 095 */
/* 096 */       /* input[0, string] */
/* 097 */       /* input[0, string] */
/* 098 */       boolean filter_isNull = inputadapter_row.isNullAt(0);
/* 099 */       UTF8String filter_value = filter_isNull ? null : 
(inputadapter_row.getUTF8String(0));
/* 100 */       project_holder.reset();
/* 101 */
/* 102 */       project_rowWriter.zeroOutNullBytes();
/* 103 */
/* 104 */       if (filter_isNull) {
/* 105 */         project_rowWriter.setNullAt(0);
/* 106 */       } else {
/* 107 */         project_rowWriter.write(0, filter_value);
/* 108 */       }
/* 109 */
/* 110 */       project_rowWriter.write(1, filter_value1);
/* 111 */       project_result.setTotalSize(project_holder.totalSize());
/* 112 */       append(project_result.copy());
/* 113 */     }
/* 114 */   }
/* 115 */ }
````

Generated code with this PR
````java
/* 032 */   protected void processNext() throws java.io.IOException {
/* 033 */     /*** PRODUCE: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 034 */
/* 035 */     /*** PRODUCE: Filter (((_2#1 % 2) = 0) && ((_2#1 <= 5) && (_2#1 > 
1))) */
/* 036 */
/* 037 */     /*** PRODUCE: INPUT */
/* 038 */
/* 039 */     while (!shouldStop() && inputadapter_input.hasNext()) {
/* 040 */       InternalRow inputadapter_row = (InternalRow) 
inputadapter_input.next();
/* 041 */       /*** CONSUME: Filter (((_2#1 % 2) = 0) && ((_2#1 <= 5) && (_2#1 
> 1))) */
/* 042 */       /* input[1, int] */
/* 043 */       int filter_value1 = inputadapter_row.getInt(1);
/* 044 */
/* 045 */       /* ((input[1, int] % 2) = 0) */
/* 046 */       /* (input[1, int] % 2) */
/* 047 */       int filter_value3 = (int)(filter_value1 % 2);
/* 048 */
/* 049 */       boolean filter_value2 = false;
/* 050 */       filter_value2 = filter_value3 == 0;
/* 051 */       if (!filter_value2) continue;
/* 052 */
/* 053 */       /* (input[1, int] <= 5) */
/* 054 */       boolean filter_value7 = false;
/* 055 */       filter_value7 = filter_value1 <= 5;
/* 056 */       if (!filter_value7) continue;
/* 057 */
/* 058 */       /* (input[1, int] > 1) */
/* 059 */       boolean filter_value10 = false;
/* 060 */       filter_value10 = filter_value1 > 1;
/* 061 */       if (!filter_value10) continue;
/* 062 */
/* 063 */       filter_metricValue.add(1);
/* 064 */
/* 065 */       /*** CONSUME: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 066 */
/* 067 */       /* input[0, string] */
/* 068 */       /* input[0, string] */
/* 069 */       boolean filter_isNull = inputadapter_row.isNullAt(0);
/* 070 */       UTF8String filter_value = filter_isNull ? null : 
(inputadapter_row.getUTF8String(0));
/* 071 */       project_holder.reset();
/* 072 */
/* 073 */       project_rowWriter.zeroOutNullBytes();
/* 074 */
/* 075 */       if (filter_isNull) {
/* 076 */         project_rowWriter.setNullAt(0);
/* 077 */       } else {
/* 078 */         project_rowWriter.write(0, filter_value);
/* 079 */       }
/* 080 */
/* 081 */       project_rowWriter.write(1, filter_value1);
/* 082 */       project_result.setTotalSize(project_holder.totalSize());
/* 083 */       append(project_result.copy());
/* 084 */     }
/* 085 */   }
/* 086 */ }
````

Author: Kazuaki Ishizaki <[email protected]>

Closes #11684 from kiszk/SPARK-13844.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4a7636f2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4a7636f2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4a7636f2

Branch: refs/heads/master
Commit: 4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f
Parents: e5a1b30
Author: Kazuaki Ishizaki <[email protected]>
Authored: Mon Mar 28 10:35:48 2016 -0700
Committer: Davies Liu <[email protected]>
Committed: Mon Mar 28 10:35:48 2016 -0700

----------------------------------------------------------------------
 .../sql/catalyst/expressions/arithmetic.scala   | 72 ++++++++++++------
 .../sql/catalyst/expressions/predicates.scala   | 78 +++++++++++++-------
 2 files changed, 102 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4a7636f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index ed812e0..1e9c971 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -237,21 +237,35 @@ case class Divide(left: Expression, right: Expression) 
extends BinaryArithmetic
     } else {
       s"($javaType)(${eval1.value} $symbol ${eval2.value})"
     }
-    s"""
-      ${eval2.code}
-      boolean ${ev.isNull} = false;
-      $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
-      if (${eval2.isNull} || $isZero) {
-        ${ev.isNull} = true;
-      } else {
-        ${eval1.code}
-        if (${eval1.isNull}) {
+    if (!left.nullable && !right.nullable) {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if ($isZero) {
           ${ev.isNull} = true;
         } else {
+          ${eval1.code}
           ${ev.value} = $divide;
         }
-      }
-    """
+      """
+    } else {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if (${eval2.isNull} || $isZero) {
+          ${ev.isNull} = true;
+        } else {
+          ${eval1.code}
+          if (${eval1.isNull}) {
+            ${ev.isNull} = true;
+          } else {
+            ${ev.value} = $divide;
+          }
+        }
+      """
+    }
   }
 }
 
@@ -299,21 +313,35 @@ case class Remainder(left: Expression, right: Expression) 
extends BinaryArithmet
     } else {
       s"($javaType)(${eval1.value} $symbol ${eval2.value})"
     }
-    s"""
-      ${eval2.code}
-      boolean ${ev.isNull} = false;
-      $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
-      if (${eval2.isNull} || $isZero) {
-        ${ev.isNull} = true;
-      } else {
-        ${eval1.code}
-        if (${eval1.isNull}) {
+    if (!left.nullable && !right.nullable) {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if ($isZero) {
           ${ev.isNull} = true;
         } else {
+          ${eval1.code}
           ${ev.value} = $remainder;
         }
-      }
-    """
+      """
+    } else {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if (${eval2.isNull} || $isZero) {
+          ${ev.isNull} = true;
+        } else {
+          ${eval1.code}
+          if (${eval1.isNull}) {
+            ${ev.isNull} = true;
+          } else {
+            ${ev.value} = $remainder;
+          }
+        }
+      """
+    }
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4a7636f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 20818bf..e23ad55 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -274,22 +274,35 @@ case class And(left: Expression, right: Expression) 
extends BinaryOperator with
     val eval2 = right.gen(ctx)
 
     // The result should be `false`, if any of them is `false` whenever the 
other is null or not.
-    s"""
-      ${eval1.code}
-      boolean ${ev.isNull} = false;
-      boolean ${ev.value} = false;
+    if (!left.nullable && !right.nullable) {
+      ev.isNull = "false"
+      s"""
+        ${eval1.code}
+        boolean ${ev.value} = false;
 
-      if (!${eval1.isNull} && !${eval1.value}) {
-      } else {
-        ${eval2.code}
-        if (!${eval2.isNull} && !${eval2.value}) {
-        } else if (!${eval1.isNull} && !${eval2.isNull}) {
-          ${ev.value} = true;
+        if (${eval1.value}) {
+          ${eval2.code}
+          ${ev.value} = ${eval2.value};
+        }
+      """
+    } else {
+      s"""
+        ${eval1.code}
+        boolean ${ev.isNull} = false;
+        boolean ${ev.value} = false;
+
+        if (!${eval1.isNull} && !${eval1.value}) {
         } else {
-          ${ev.isNull} = true;
+          ${eval2.code}
+          if (!${eval2.isNull} && !${eval2.value}) {
+          } else if (!${eval1.isNull} && !${eval2.isNull}) {
+            ${ev.value} = true;
+          } else {
+            ${ev.isNull} = true;
+          }
         }
-      }
-     """
+      """
+    }
   }
 }
 
@@ -325,22 +338,35 @@ case class Or(left: Expression, right: Expression) 
extends BinaryOperator with P
     val eval2 = right.gen(ctx)
 
     // The result should be `true`, if any of them is `true` whenever the 
other is null or not.
-    s"""
-      ${eval1.code}
-      boolean ${ev.isNull} = false;
-      boolean ${ev.value} = true;
+    if (!left.nullable && !right.nullable) {
+      ev.isNull = "false"
+      s"""
+        ${eval1.code}
+        boolean ${ev.value} = true;
 
-      if (!${eval1.isNull} && ${eval1.value}) {
-      } else {
-        ${eval2.code}
-        if (!${eval2.isNull} && ${eval2.value}) {
-        } else if (!${eval1.isNull} && !${eval2.isNull}) {
-          ${ev.value} = false;
+        if (!${eval1.value}) {
+          ${eval2.code}
+          ${ev.value} = ${eval2.value};
+        }
+      """
+    } else {
+      s"""
+        ${eval1.code}
+        boolean ${ev.isNull} = false;
+        boolean ${ev.value} = true;
+
+        if (!${eval1.isNull} && ${eval1.value}) {
         } else {
-          ${ev.isNull} = true;
+          ${eval2.code}
+          if (!${eval2.isNull} && ${eval2.value}) {
+          } else if (!${eval1.isNull} && !${eval2.isNull}) {
+            ${ev.value} = false;
+          } else {
+            ${ev.isNull} = true;
+          }
         }
-      }
-     """
+      """
+    }
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to