rk.shuffle.consolidateFiles=true", and spark 1.5.1.
>
> Thanks a lot!
>
> - Without persist:
>
> (200) MapPartitionsRDD[26] at javaToPython at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[25] at javaToPython at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsWithPreparationRDD[22] at toPandas at
> :25 []
> | MapPartitionsWithPreparationRDD[21] at toPandas at
> :25 []
> | MapPartitionsRDD[20] at toPandas at :25 []
> | ZippedPartitionsRDD2[19] at toPandas at :25 []
> | MapPartitionsWithPreparationRDD[9] at toPandas at
> :25 []
> | ShuffledRowRDD[8] at toPandas at :25 []
> +-(2) MapPartitionsRDD[7] at toPandas at :25 []
> | MapPartitionsRDD[6] at toPandas at :25 []
> | MapPartitionsRDD[5] at toPandas at :25 []
> | MapPartitionsRDD[4] at applySchemaToPythonRDD at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[3] at map at SerDeUtil.scala:100 []
> | MapPartitionsRDD[2] at mapPartitions at SerDeUtil.scala:147 []
> | PythonRDD[1] at RDD at PythonRDD.scala:43 []
> | ParallelCollectionRDD[0] at parallelize at PythonRDD.scala:423 []
> | MapPartitionsWithPreparationRDD[18] at toPandas at
> :25 []
> | ShuffledRowRDD[17] at toPandas at :25 []
> +-(200) MapPartitionsRDD[16] at toPandas at :25 []
> | MapPartitionsRDD[15] at toPandas at :25 []
> | MapPartitionsWithPreparationRDD[14] at toPandas at
> :25 []
> | ShuffledRowRDD[13] at toPandas at :25 []
> +-(2) MapPartitionsRDD[12] at toPandas at :25 []
> | MapPartitionsWithPreparationRDD[11] at toPandas at
> :25 []
> | MapPartitionsRDD[10] at toPandas at :25 []
> | MapPartitionsRDD[4] at applySchemaToPythonRDD at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[3] at map at SerDeUtil.scala:100 []
> | MapPartitionsRDD[2] at mapPartitions at SerDeUtil.scala:147 []
> | PythonRDD[1] at RDD at PythonRDD.scala:43 []
> | ParallelCollectionRDD[0] at parallelize at PythonRDD.scala:423
> []
>
> - With persist:
>
> (200) MapPartitionsRDD[52] at javaToPython at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[51] at javaToPython at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsWithPreparationRDD[48] at toPandas at
> :25 []
> | ShuffledRowRDD[47] at toPandas at :25 []
> +-(200) MapPartitionsRDD[46] at toPandas at :25 []
> | MapPartitionsWithPreparationRDD[45] at toPandas at
> :25 []
> | MapPartitionsRDD[44] at toPandas at :25 []
> | MapPartitionsRDD[43] at toPandas at :25 []
> | TungstenProject [id#0,value#1,id_name#3]
> SortMergeOuterJoin [id#0], [id_join#2], LeftOuter, None
> TungstenSort [id#0 ASC], false, 0
>TungstenExchange hashpartitioning(id#0)
> ConvertToUnsafe
> Scan PhysicalRDD[id#0,value#1]
> TungstenSort [id_join#2 ASC], false, 0
>TungstenExchange hashpartitioning(id_join#2)
> TungstenProject [id#0 AS id_join#2,xxx AS id_name#3]
> TungstenAggregate(key=[id#0], functions=[], output=[id#0])
> TungstenExchange hashpartitioning(id#0)
>TungstenAggregate(key=[id#0], functions=[], output=[id#0])
> TungstenProject [id#0]
> Scan PhysicalRDD[id#0,value#1]
> MapPartitionsRDD[42] at persist at NativeMethodAccessorImpl.java:-2 []
> | CachedPartitions: 200; MemorySize: 54.0 KB;
> ExternalBlockStoreSize: 0.0 B; DiskSize: 0.0 B
> | MapPartitionsRDD[41] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | ZippedPartitionsRDD2[40] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsWithPreparationRDD[30] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | ShuffledRowRDD[29] at persist at NativeMethodAccessorImpl.java:-2
> []
> +-(2) MapPartitionsRDD[28] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[27] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[4] at applySchemaToPythonRDD at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[3] at map at SerDeUtil.scala:100 []
> | MapPartitionsRDD[2] at mapPartitions at SerDeUtil.scala:147 []
> | PythonRDD[1] at RDD at PythonRDD.scala:43 []
> | ParallelCollectionRDD[0] at parallelize at PythonRDD.scala:423
> []
> | MapPartitionsWithPreparationRDD[39] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | ShuffledRowRDD[38] at persist at NativeMethodAccessorImpl.java:-2
> []
> +-(200) MapPartitionsRDD[37] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[36] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsWithPreparationRDD[35] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | ShuffledRowRDD[34] at persist at
> NativeMethodAccessorImpl.java:-2 []
> +-(2) MapPartitionsRDD[33] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsWithPreparationRDD[32] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[31] at persist at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[4] at applySchemaToPythonRDD at
> NativeMethodAccessorImpl.java:-2 []
> | MapPartitionsRDD[3] at map at SerDeUtil.scala:100 []
> | MapPartitionsRDD[2] at mapPartitions at SerDeUtil.scala:147
> []
> | PythonRDD[1] at RDD at PythonRDD.scala:43 []
> | ParallelCollectionRDD[0] at parallelize at
> PythonRDD.scala:423 []
>
>
>
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/pyspark-results-differ-based-on-whether-persist-has-been-called-tp25121.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> -
> To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
> For additional commands, e-mail: user-h...@spark.apache.org
>
-
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org