Github user cloud-fan commented on the pull request:
https://github.com/apache/spark/pull/12061#issuecomment-206320506
generated code for a single filter
```
case class Data(l: Long, s: String)
Seq(Data(1, "a")).toDS().filter(d => d.l % 2 == 0)
```
is:
```
/* 032 */ protected void processNext() throws java.io.IOException {
/* 033 */ /*** PRODUCE: Filter <function1>.apply */
/* 034 */
/* 035 */ /*** PRODUCE: INPUT */
/* 036 */
/* 037 */ while (inputadapter_input.hasNext()) {
/* 038 */ InternalRow inputadapter_row = (InternalRow)
inputadapter_input.next();
/* 039 */ /*** CONSUME: Filter <function1>.apply */
/* 040 */
/* 041 */ /* input[0, bigint] */
/* 042 */ long inputadapter_value = inputadapter_row.getLong(0);
/* 043 */ /* input[1, string] */
/* 044 */ boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 045 */ UTF8String inputadapter_value1 = inputadapter_isNull1 ?
null : (inputadapter_row.getUTF8String(1));
/* 046 */
/* 047 */ /* <function1>.apply */
/* 048 */ /* <function1> */
/* 049 */ /* expression: <function1> */
/* 050 */ Object filter_obj = ((Expression) references[1]).eval(null);
/* 051 */ scala.Function1 filter_value1 = (scala.Function1)
filter_obj;
/* 052 */ /* newInstance(class org.apache.spark.sql.execution.Data) */
/* 053 */ /* assertnotnull(input[0, bigint], - field (class:
"scala.Long", name: "l"), - root class: "org.apache.spark.sql.execution.Data")
*/
/* 054 */ if (false) {
/* 055 */ throw new RuntimeException((String) references[2]);
/* 056 */ }
/* 057 */ /* input[1, string].toString */
/* 058 */ java.lang.String filter_value5 = inputadapter_isNull1 ?
null : (java.lang.String) inputadapter_value1.toString();
/* 059 */ boolean filter_isNull5 = filter_value5 == null;
/* 060 */
/* 061 */ final org.apache.spark.sql.execution.Data filter_value2 =
new org.apache.spark.sql.execution.Data(inputadapter_value, filter_value5);
/* 062 */ final boolean filter_isNull2 = false;
/* 063 */ boolean filter_value = false ? false : (boolean)
((java.lang.Boolean)filter_value1.apply(filter_value2)).booleanValue();
/* 064 */ if (false || !filter_value) continue;
/* 065 */
/* 066 */ filter_metricValue.add(1);
/* 067 */
/* 068 */ /*** CONSUME: WholeStageCodegen */
/* 069 */
/* 070 */ filter_holder.reset();
/* 071 */
/* 072 */ filter_rowWriter.zeroOutNullBytes();
/* 073 */
/* 074 */ filter_rowWriter.write(0, inputadapter_value);
/* 075 */
/* 076 */ if (inputadapter_isNull1) {
/* 077 */ filter_rowWriter.setNullAt(1);
/* 078 */ } else {
/* 079 */ filter_rowWriter.write(1, inputadapter_value1);
/* 080 */ }
/* 081 */ filter_result.setTotalSize(filter_holder.totalSize());
/* 082 */ append(filter_result);
/* 083 */ if (shouldStop()) return;
/* 084 */ }
/* 085 */ }
```
for back-to-back filters
```
case class Data(l: Long, s: String)
Seq(Data(1, "a")).toDS().filter(d => d.l % 2 == 0).filter(d => d.l % 2 == 0)
```
is:
```
/* 049 */ protected void processNext() throws java.io.IOException {
/* 050 */ /*** PRODUCE: SerializeFromObject [input[0, object].l AS
l#20L,staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType,
fromSt... */
/* 051 */
/* 052 */ /*** PRODUCE: Project [obj#15 AS obj#19] */
/* 053 */
/* 054 */ /*** PRODUCE: Filter (<function1>.apply && <function1>.apply)
*/
/* 055 */
/* 056 */ /*** PRODUCE: DeserializeToObject newInstance(class
org.apache.spark.sql.execution.Data) AS obj#15 */
/* 057 */
/* 058 */ /*** PRODUCE: INPUT */
/* 059 */
/* 060 */ while (inputadapter_input.hasNext()) {
/* 061 */ InternalRow inputadapter_row = (InternalRow)
inputadapter_input.next();
/* 062 */ /*** CONSUME: DeserializeToObject newInstance(class
org.apache.spark.sql.execution.Data) AS obj#15 */
/* 063 */ /* input[0, bigint] */
/* 064 */ long inputadapter_value = inputadapter_row.getLong(0);
/* 065 */ /* input[1, string] */
/* 066 */ boolean inputadapter_isNull1 = inputadapter_row.isNullAt(1);
/* 067 */ UTF8String inputadapter_value1 = inputadapter_isNull1 ?
null : (inputadapter_row.getUTF8String(1));
/* 068 */
/* 069 */ /*** CONSUME: Filter (<function1>.apply &&
<function1>.apply) */
/* 070 */
/* 071 */ /* newInstance(class org.apache.spark.sql.execution.Data) */
/* 072 */ /* assertnotnull(input[0, bigint], - field (class:
"scala.Long", name: "l"), - root class: "org.apache.spark.sql.execution.Data")
*/
/* 073 */ if (false) {
/* 074 */ throw new RuntimeException((String) references[0]);
/* 075 */ }
/* 076 */ /* input[1, string].toString */
/* 077 */ java.lang.String deserializetoobject_value3 =
inputadapter_isNull1 ? null : (java.lang.String) inputadapter_value1.toString();
/* 078 */ boolean deserializetoobject_isNull3 =
deserializetoobject_value3 == null;
/* 079 */
/* 080 */ final org.apache.spark.sql.execution.Data
deserializetoobject_value = new
org.apache.spark.sql.execution.Data(inputadapter_value,
deserializetoobject_value3);
/* 081 */ final boolean deserializetoobject_isNull = false;
/* 082 */
/* 083 */ /* <function1>.apply */
/* 084 */ /* <function1> */
/* 085 */ /* expression: <function1> */
/* 086 */ Object filter_obj = ((Expression) references[2]).eval(null);
/* 087 */ scala.Function1 filter_value1 = (scala.Function1)
filter_obj;
/* 088 */
/* 089 */ boolean filter_value = false ? false : (boolean)
((java.lang.Boolean)filter_value1.apply(deserializetoobject_value)).booleanValue();
/* 090 */ if (false || !filter_value) continue;
/* 091 */ /* <function1>.apply */
/* 092 */ /* <function1> */
/* 093 */ /* expression: <function1> */
/* 094 */ Object filter_obj1 = ((Expression)
references[3]).eval(null);
/* 095 */ scala.Function1 filter_value4 = (scala.Function1)
filter_obj1;
/* 096 */
/* 097 */ boolean filter_value3 = false ? false : (boolean)
((java.lang.Boolean)filter_value4.apply(deserializetoobject_value)).booleanValue();
/* 098 */ if (false || !filter_value3) continue;
/* 099 */
/* 100 */ filter_metricValue.add(1);
/* 101 */
/* 102 */ /*** CONSUME: Project [obj#15 AS obj#19] */
/* 103 */
/* 104 */ /*** CONSUME: SerializeFromObject [input[0, object].l AS
l#20L,staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType,
fromSt... */
/* 105 */
/* 106 */ /*** CONSUME: WholeStageCodegen */
/* 107 */
/* 108 */ /* input[0, object].l */
/* 109 */ long serializefromobject_value = deserializetoobject_isNull
? -1L : (long) deserializetoobject_value.l();
/* 110 */ /* staticinvoke(class
org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0,
object].s, true) */
/* 111 */ /* input[0, object].s */
/* 112 */ java.lang.String serializefromobject_value3 =
deserializetoobject_isNull ? null : (java.lang.String)
deserializetoobject_value.s();
/* 113 */ boolean serializefromobject_isNull3 =
serializefromobject_value3 == null;
/* 114 */
/* 115 */ boolean serializefromobject_isNull2 =
!!(serializefromobject_isNull3);
/* 116 */ UTF8String serializefromobject_value2 = null;
/* 117 */
/* 118 */ if (!(serializefromobject_isNull3)) {
/* 119 */ serializefromobject_value2 =
org.apache.spark.unsafe.types.UTF8String.fromString(serializefromobject_value3);
/* 120 */ serializefromobject_isNull2 = serializefromobject_value2
== null;
/* 121 */ }
/* 122 */ serializefromobject_holder.reset();
/* 123 */
/* 124 */ serializefromobject_rowWriter.zeroOutNullBytes();
/* 125 */
/* 126 */ if (deserializetoobject_isNull) {
/* 127 */ serializefromobject_rowWriter.setNullAt(0);
/* 128 */ } else {
/* 129 */ serializefromobject_rowWriter.write(0,
serializefromobject_value);
/* 130 */ }
/* 131 */
/* 132 */ if (serializefromobject_isNull2) {
/* 133 */ serializefromobject_rowWriter.setNullAt(1);
/* 134 */ } else {
/* 135 */ serializefromobject_rowWriter.write(1,
serializefromobject_value2);
/* 136 */ }
/* 137 */
serializefromobject_result.setTotalSize(serializefromobject_holder.totalSize());
/* 138 */ append(serializefromobject_result);
/* 139 */ if (shouldStop()) return;
/* 140 */ }
/* 141 */ }
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]