Github user NarineK commented on the issue:
https://github.com/apache/spark/pull/12836
Hi @shivaram , hi @sun-rui ,
Surprisingly the `dataframe.queyExection.toString` both for dapply and
gapply is prepended by a huge array, which I'm not able to understand. It seems
that recent commits causes this.
I've added the following code snippet in mapPartitionsInR:
```
print(df.queryExecution)
print("this was dapply")
```
And this is what I see :(
[ ... 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 1, 44, 0, 4, 0, 9, 0, 0, 0,
0, 0, 4, 0, 9, 0, 0, 0, 6, 115, 99, 104, 101, 109, 97, 0, 4, 0, 9, 0, 0, 0, 1,
41, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 0, 0,
4, 0, 9, 0, 0, 0, 36, 35, 32, 78, 117, 109, 98, 101, 114, 32, 111, 102, 32,
112, 97, 114, 116, 105, 116, 105, 111, 110, 115, 32, 105, 115, 32, 101, 113,
117, 97, 108, 32, 116, 111, 32, 50, 0, 4, 0, 9, 0, 0, 0, 12, 101, 120, 112,
101, 99, 116, 95, 101, 113, 117, 97, 108, 0, 4, 0, 9, 0, 0, 0, 1, 40, 0, 4, 0,
9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 4, 110, 114, 111, 119, 0, 4, 0, 9, 0, 0, 0,
1, 40, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 3, 100, 102, 49, 0, 4, 0,
9, 0, 0, 0, 1, 41, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9,
0, 0, 0, 1, 44, 0, 4, 0, 9, 0, 0, 0, 1, 50, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9,
0, 0, 0, 1, 41, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 1, 125, 0, 4, 0,
9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 1, 41, 0, 4, 0, 9, 0, 0
, 0, 0, 0, 4, 0, 9, 0, 0, 0, 6, 117, 110, 108, 105, 110, 107, 0, 4, 0, 9, 0,
0, 0, 1, 40, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 11, 112, 97, 114,
113, 117, 101, 116, 80, 97, 116, 104, 0, 4, 0, 9, 0, 0, 0, 1, 41, 0, 4, 0, 9,
0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 6, 117, 110, 108, 105,
110, 107, 0, 4, 0, 9, 0, 0, 0, 1, 40, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0,
0, 8, 106, 115, 111, 110, 80, 97, 116, 104, 0, 4, 0, 9, 0, 0, 0, 1, 41, 0, 4,
0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 6, 117, 110,
108, 105, 110, 107, 0, 4, 0, 9, 0, 0, 0, 1, 40, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4,
0, 9, 0, 0, 0, 10, 106, 115, 111, 110, 80, 97, 116, 104, 78, 97, 0, 4, 0, 9, 0,
0, 0, 1, 41, 0, 4, 0, 9, 0, 0, 0, 0, 0, 4, 0, 9, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0,
9, -1, 0, 0, 0, 16, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 9, 112, 97, 114, 115, 101,
68, 97, 116, 97, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 4, 2, 0, 0, 9, -1, 0, 0, 0,
16, 0, 0, 0, 2, 0, 4, 0, 9, 0, 0, 0, 11, 115, 114, 99, 102
, 105, 108, 101, 99, 111, 112, 121, 0, 4, 0, 9, 0, 0, 0, 7, 115, 114, 99, 102,
105, 108, 101, 0, 0, 0, -2, 0, 0, 4, 2, 0, 0, 9, -1, 0, 0, 0, 16, 0, 0, 0, 1,
0, 4, 0, 9, 0, 0, 0, 6, 115, 114, 99, 114, 101, 102, 0, 0, 0, -2, 0, 0, 0, -2,
0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, -2, 0, 0, 0, 19, 0, 0, 0, 29, 0,
0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0,
0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0,
0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0,
0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0,
0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0,
0, 4, 2, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 1, 120, 0, 0, 0, -5, 0, 0, 0, -2, 0,
0, 2, 6, 0, 0, 4, 2, 0, 0, 1, -1, 0, 0, 0, 19, 0, 0, 0, 2, 0, 0, 3, 13, 0, 0,
0, 8, 0, 0, 8, 84, 0, 0, 0, 17, 0, 0, 8, 84, 0, 0, 0, 17, 0, 0, 0, 17, 0, 0, 0,
17, 0, 0, 8, 84, 0, 0, 8, 84, 0, 0, 4, 2, 0, 0, 2, -1,
0, 0, 3, -1, 0, 0, 4, 2, 0, 0, 9, -1, 0, 0, 0, 16, 0, 0, 0, 1, 0, 4, 0, 9, 0,
0, 0, 6, 115, 114, 99, 114, 101, 102, 0, 0, 0, -2, 0, 0, 3, 13, 0, 0, 0, 8, 0,
0, 8, 85, 0, 0, 0, 7, 0, 0, 8, 85, 0, 0, 0, 42, 0, 0, 0, 7, 0, 0, 0, 42, 0, 0,
8, 85, 0, 0, 8, 85, 0, 0, 4, 2, 0, 0, 2, -1, 0, 0, 3, -1, 0, 0, 4, 2, 0, 0, 9,
-1, 0, 0, 0, 16, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 6, 115, 114, 99, 114, 101,
102, 0, 0, 0, -2, 0, 0, 4, 2, 0, 0, 2, -1, 0, 0, 3, -1, 0, 0, 4, 2, 0, 0, 0, 1,
0, 4, 0, 9, 0, 0, 0, 11, 119, 104, 111, 108, 101, 83, 114, 99, 114, 101, 102,
0, 0, 3, 13, 0, 0, 0, 8, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 8, 86, 0, 0, 0, 5, 0, 0,
0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 8, 86, 0, 0, 4, 2, 0, 0, 2, -1, 0, 0, 3,
-1, 0, 0, 4, 2, 0, 0, 9, -1, 0, 0, 0, 16, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 6,
115, 114, 99, 114, 101, 102, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, 1, 0, 4, 0, 9,
0, 0, 0, 1, 123, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 2,
60, 45, 0, 0, 0, 2, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 1,
121, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 1, 40, 0, 0, 0,
2, 0, 0, 0, 6, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 10, 100, 97, 116, 97, 46, 102,
114, 97, 109, 101, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 1,
91, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 1, 36, 0, 0, 0, 2,
0, 0, 17, -1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 1, 97, 0, 0, 0, -2,
0, 0, 0, 2, 0, 0, 0, 14, 0, 0, 0, 1, 63, -16, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0,
0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 4, 109, 101, 97, 110, 0,
0, 0, 2, 0, 0, 0, 6, 0, 0, 25, -1, 0, 0, 0, 2, 0, 0, 17, -1, 0, 0, 0, 2, 0, 0,
0, 1, 0, 4, 0, 9, 0, 0, 0, 1, 98, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0, 0, -2, 0, 0,
0, -2, 0, 0, 0, -2, 0, 0, 0, -2], [88, 10, 0, 0, 0, 2, 0, 3, 2, 3, 0, 2, 3, 0,
0, 0, 0, 16, 0, 0, 0, 1, 0, 4, 0, 9, 0, 0, 0, 4, 112, 108, 121, 114],
[StructField(a,IntegerType,true), StructField(b,DoubleType,true),
StructField(c,StringType,true), StructField(d,DoubleType,true)], [Stru
ctField(a,IntegerType,true), StructField(avg,DoubleType,true)], obj#13648:
org.apache.spark.sql.Row
+- DeserializeToObject createexternalrow(a#13589, b#13590,
c#13591.toString, d#13592, StructField(a,IntegerType,true),
StructField(b,DoubleType,true), StructField(c,StringType,true),
StructField(d,DoubleType,true)), obj#13645: org.apache.spark.sql.Row
+- RepartitionByExpression [a#13589]
+- LogicalRDD [a#13589, b#13590, c#13591, d#13592]
== Physical Plan ==
*SerializeFromObject [if (assertnotnull(input[0, org.apache.spark.sql.Row,
true], top level row object).isNullAt) null else
getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top
level row object), 0, a, IntegerType) AS a#13651, if (assertnotnull(input[0,
org.apache.spark.sql.Row, true], top level row object).isNullAt) null else
getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top
level row object), 1, avg, DoubleType) AS avg#13652]
+- MapPartitions <function1>, obj#13648: org.apache.spark.sql.Row
+- DeserializeToObject createexternalrow(a#13589, b#13590,
c#13591.toString, d#13592, StructField(a,IntegerType,true),
StructField(b,DoubleType,true), StructField(c,StringType,true),
StructField(d,DoubleType,true)), obj#13645: org.apache.spark.sql.Row
+- Exchange hashpartitioning(a#13589, 200)
+- Scan ExistingRDD[a#13589,b#13590,c#13591,d#13592]
this was dapply.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]