[
https://issues.apache.org/jira/browse/SPARK-14246?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jim Powers updated SPARK-14246:
-------------------------------
Description:
Attached are two scripts. The problem only exhibits itself with Spark 1.6.0,
1.6.1, and 2.0.0 using Scala 2.11. Scala 2.10 does not exhibit this problem.
With the Regular Scala 2.11(.7) REPL:
{noformat}
scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
$anon$1@4149c063
scala> X
res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
$anon$1@4149c063
scala> val a = X.getArray(10)
warning: there was one feature warning; re-run with -feature for details
a: Array[Double] = Array(0.1701063617079236, 0.17570862034857437,
0.6065851472098629, 0.4683069994589304, 0.35194859652378363,
0.04033043823203897, 0.11917887149548367, 0.540367871104426,
0.18544859881040276, 0.7236380062803334)
scala> X = null
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
$anon$1@5860f3d7
scala> X
res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
$anon$1@5860f3d7
{noformat}
However, from within the Spark shell (Spark 1.6.0, Scala 2.11.7):
{noformat}
scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
$anon$1@750e2d33
scala> val a = X.getArray(100)
warning: there was one feature warning; re-run with -feature for details
a: Array[Double] = Array(0.6330055191546612, 0.017865502179445936,
0.6334775064489349, 0.9053929733525056, 0.7648311134918273, 0.5423177955113584,
0.5164344368587143, 0.420054677669768, 0.7842112717076851, 0.2098345684721057,
0.7925640951404774, 0.5604706596425998, 0.8104403239147542, 0.7567005967624031,
0.5221119883682028, 0.15766763970350484, 0.18693986227881698,
0.7475065360095031, 0.7766720862129398, 0.7844069968816826,
0.27481855935245014, 0.8498855383673198, 0.7496017097461324, 0.448373036252237,
0.7372969840779748, 0.26381835654323815, 0.7919478212349927, 0.773136240932345,
0.7441046289586369, 0.8774372628866844, 0.567152428053003, 0.7256375989728348,
0.654839959050646, 0.858953671296855, 0.47581286359760067,
0.039760801375546495, 0.7764165909218748, 0.6882803110041462, 0.8660302...
scala> X = null
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
scala> X
res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
$anon$1@48da64f2
scala> X
res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
{noformat}
However, if the script being loaded does not refer to an {{RDD}} then the
reload seems to work fine:
{noformat}
scala> :load Null.scala
Loading Null.scala...
X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@987a0bb
scala> val a = X.getArray(100)
warning: there was one feature warning; re-run with -feature for details
a: Array[Double] = Array(0.1383741239912808, 0.9648059677260219,
0.9189575875974628, 0.41397368933686096, 0.22201144446192966,
0.44243794397063774, 0.8784983685464675, 0.1340277408843078, 0.706263786679972,
0.7950404663404447, 0.24430810245881607, 0.5770760096607244,
0.2525706003922249, 0.28184231631420364, 0.008730677363379735,
0.81095065419385, 0.846743885175591, 0.9332265324673933, 0.7179553831600355,
0.8136413098595938, 0.815645757370769, 0.6841618927812075, 0.2543696773107338,
0.1307824382653785, 0.21866878494759168, 0.3565351406594982,
0.9395305162439264, 0.9817882504819025, 0.8848012359685327, 0.1256685393081879,
0.9907437397885274, 0.7316579278629144, 0.960786505005683, 0.05259590461101904,
0.22459289042641883, 0.482387624551172, 0.2118621194069078, 0.2412102388775842,
0.0423595...
scala> X = null
X: Serializable{def getArray(n: Int): Array[Double]} = null
scala> X
res0: Serializable{def getArray(n: Int): Array[Double]} = null
scala> :load Null.scala
Loading Null.scala...
X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
scala> X
res1: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
{noformat}
> vars not updated after Scala script reload
> ------------------------------------------
>
> Key: SPARK-14246
> URL: https://issues.apache.org/jira/browse/SPARK-14246
> Project: Spark
> Issue Type: Bug
> Components: Spark Shell
> Affects Versions: 1.6.0, 1.6.1, 2.0.0
> Reporter: Jim Powers
> Attachments: Null.scala, reproduce_transient_npe.scala
>
>
> Attached are two scripts. The problem only exhibits itself with Spark 1.6.0,
> 1.6.1, and 2.0.0 using Scala 2.11. Scala 2.10 does not exhibit this problem.
> With the Regular Scala 2.11(.7) REPL:
> {noformat}
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
> $anon$1@4149c063
> scala> X
> res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
> $anon$1@4149c063
> scala> val a = X.getArray(10)
> warning: there was one feature warning; re-run with -feature for details
> a: Array[Double] = Array(0.1701063617079236, 0.17570862034857437,
> 0.6065851472098629, 0.4683069994589304, 0.35194859652378363,
> 0.04033043823203897, 0.11917887149548367, 0.540367871104426,
> 0.18544859881040276, 0.7236380062803334)
> scala> X = null
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
> $anon$1@5860f3d7
> scala> X
> res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
> $anon$1@5860f3d7
> {noformat}
> However, from within the Spark shell (Spark 1.6.0, Scala 2.11.7):
> {noformat}
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
> $anon$1@750e2d33
> scala> val a = X.getArray(100)
> warning: there was one feature warning; re-run with -feature for details
> a: Array[Double] = Array(0.6330055191546612, 0.017865502179445936,
> 0.6334775064489349, 0.9053929733525056, 0.7648311134918273,
> 0.5423177955113584, 0.5164344368587143, 0.420054677669768,
> 0.7842112717076851, 0.2098345684721057, 0.7925640951404774,
> 0.5604706596425998, 0.8104403239147542, 0.7567005967624031,
> 0.5221119883682028, 0.15766763970350484, 0.18693986227881698,
> 0.7475065360095031, 0.7766720862129398, 0.7844069968816826,
> 0.27481855935245014, 0.8498855383673198, 0.7496017097461324,
> 0.448373036252237, 0.7372969840779748, 0.26381835654323815,
> 0.7919478212349927, 0.773136240932345, 0.7441046289586369,
> 0.8774372628866844, 0.567152428053003, 0.7256375989728348, 0.654839959050646,
> 0.858953671296855, 0.47581286359760067, 0.039760801375546495,
> 0.7764165909218748, 0.6882803110041462, 0.8660302...
> scala> X = null
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> scala> X
> res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} =
> $anon$1@48da64f2
> scala> X
> res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> {noformat}
> However, if the script being loaded does not refer to an {{RDD}} then the
> reload seems to work fine:
> {noformat}
> scala> :load Null.scala
> Loading Null.scala...
> X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@987a0bb
> scala> val a = X.getArray(100)
> warning: there was one feature warning; re-run with -feature for details
> a: Array[Double] = Array(0.1383741239912808, 0.9648059677260219,
> 0.9189575875974628, 0.41397368933686096, 0.22201144446192966,
> 0.44243794397063774, 0.8784983685464675, 0.1340277408843078,
> 0.706263786679972, 0.7950404663404447, 0.24430810245881607,
> 0.5770760096607244, 0.2525706003922249, 0.28184231631420364,
> 0.008730677363379735, 0.81095065419385, 0.846743885175591,
> 0.9332265324673933, 0.7179553831600355, 0.8136413098595938,
> 0.815645757370769, 0.6841618927812075, 0.2543696773107338,
> 0.1307824382653785, 0.21866878494759168, 0.3565351406594982,
> 0.9395305162439264, 0.9817882504819025, 0.8848012359685327,
> 0.1256685393081879, 0.9907437397885274, 0.7316579278629144,
> 0.960786505005683, 0.05259590461101904, 0.22459289042641883,
> 0.482387624551172, 0.2118621194069078, 0.2412102388775842, 0.0423595...
> scala> X = null
> X: Serializable{def getArray(n: Int): Array[Double]} = null
> scala> X
> res0: Serializable{def getArray(n: Int): Array[Double]} = null
> scala> :load Null.scala
> Loading Null.scala...
> X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
> scala> X
> res1: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]