[ 
https://issues.apache.org/jira/browse/SPARK-14246?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jim Powers updated SPARK-14246:
-------------------------------
    Description: 
Attached are two scripts.  The problem only exhibits itself with Spark 1.6.0, 
1.6.1, and 2.0.0 using Scala 2.11.  Scala 2.10 does not exhibit this problem.  
With the Regular Scala 2.11(.7) REPL:

{noformat}
scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
$anon$1@4149c063

scala> X
res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
$anon$1@4149c063

scala> val a = X.getArray(10)
warning: there was one feature warning; re-run with -feature for details
a: Array[Double] = Array(0.1701063617079236, 0.17570862034857437, 
0.6065851472098629, 0.4683069994589304, 0.35194859652378363, 
0.04033043823203897, 0.11917887149548367, 0.540367871104426, 
0.18544859881040276, 0.7236380062803334)

scala> X = null
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null

scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
$anon$1@5860f3d7

scala> X
res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
$anon$1@5860f3d7
{noformat}

However, from within the Spark shell (Spark 1.6.0, Scala 2.11.7):

{noformat}
scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
$anon$1@750e2d33

scala> val a = X.getArray(100)
warning: there was one feature warning; re-run with -feature for details
a: Array[Double] = Array(0.6330055191546612, 0.017865502179445936, 
0.6334775064489349, 0.9053929733525056, 0.7648311134918273, 0.5423177955113584, 
0.5164344368587143, 0.420054677669768, 0.7842112717076851, 0.2098345684721057, 
0.7925640951404774, 0.5604706596425998, 0.8104403239147542, 0.7567005967624031, 
0.5221119883682028, 0.15766763970350484, 0.18693986227881698, 
0.7475065360095031, 0.7766720862129398, 0.7844069968816826, 
0.27481855935245014, 0.8498855383673198, 0.7496017097461324, 0.448373036252237, 
0.7372969840779748, 0.26381835654323815, 0.7919478212349927, 0.773136240932345, 
0.7441046289586369, 0.8774372628866844, 0.567152428053003, 0.7256375989728348, 
0.654839959050646, 0.858953671296855, 0.47581286359760067, 
0.039760801375546495, 0.7764165909218748, 0.6882803110041462, 0.8660302...
scala> X = null
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null

scala> X
res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null

scala> :load reproduce_transient_npe.scala
Loading reproduce_transient_npe.scala...
X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
$anon$1@48da64f2

scala> X
res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
{noformat}

However, if the script being loaded does not refer to an {{RDD}} then the 
reload seems to work fine:

{noformat}
scala> :load Null.scala
Loading Null.scala...
X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@987a0bb

scala> val a = X.getArray(100)
warning: there was one feature warning; re-run with -feature for details
a: Array[Double] = Array(0.1383741239912808, 0.9648059677260219, 
0.9189575875974628, 0.41397368933686096, 0.22201144446192966, 
0.44243794397063774, 0.8784983685464675, 0.1340277408843078, 0.706263786679972, 
0.7950404663404447, 0.24430810245881607, 0.5770760096607244, 
0.2525706003922249, 0.28184231631420364, 0.008730677363379735, 
0.81095065419385, 0.846743885175591, 0.9332265324673933, 0.7179553831600355, 
0.8136413098595938, 0.815645757370769, 0.6841618927812075, 0.2543696773107338, 
0.1307824382653785, 0.21866878494759168, 0.3565351406594982, 
0.9395305162439264, 0.9817882504819025, 0.8848012359685327, 0.1256685393081879, 
0.9907437397885274, 0.7316579278629144, 0.960786505005683, 0.05259590461101904, 
0.22459289042641883, 0.482387624551172, 0.2118621194069078, 0.2412102388775842, 
0.0423595...
scala> X = null
X: Serializable{def getArray(n: Int): Array[Double]} = null

scala> X
res0: Serializable{def getArray(n: Int): Array[Double]} = null

scala> :load Null.scala
Loading Null.scala...
X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41

scala> X
res1: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
{noformat}

> vars not updated after Scala script reload
> ------------------------------------------
>
>                 Key: SPARK-14246
>                 URL: https://issues.apache.org/jira/browse/SPARK-14246
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Shell
>    Affects Versions: 1.6.0, 1.6.1, 2.0.0
>            Reporter: Jim Powers
>         Attachments: Null.scala, reproduce_transient_npe.scala
>
>
> Attached are two scripts.  The problem only exhibits itself with Spark 1.6.0, 
> 1.6.1, and 2.0.0 using Scala 2.11.  Scala 2.10 does not exhibit this problem. 
>  With the Regular Scala 2.11(.7) REPL:
> {noformat}
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
> $anon$1@4149c063
> scala> X
> res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
> $anon$1@4149c063
> scala> val a = X.getArray(10)
> warning: there was one feature warning; re-run with -feature for details
> a: Array[Double] = Array(0.1701063617079236, 0.17570862034857437, 
> 0.6065851472098629, 0.4683069994589304, 0.35194859652378363, 
> 0.04033043823203897, 0.11917887149548367, 0.540367871104426, 
> 0.18544859881040276, 0.7236380062803334)
> scala> X = null
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
> $anon$1@5860f3d7
> scala> X
> res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
> $anon$1@5860f3d7
> {noformat}
> However, from within the Spark shell (Spark 1.6.0, Scala 2.11.7):
> {noformat}
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
> $anon$1@750e2d33
> scala> val a = X.getArray(100)
> warning: there was one feature warning; re-run with -feature for details
> a: Array[Double] = Array(0.6330055191546612, 0.017865502179445936, 
> 0.6334775064489349, 0.9053929733525056, 0.7648311134918273, 
> 0.5423177955113584, 0.5164344368587143, 0.420054677669768, 
> 0.7842112717076851, 0.2098345684721057, 0.7925640951404774, 
> 0.5604706596425998, 0.8104403239147542, 0.7567005967624031, 
> 0.5221119883682028, 0.15766763970350484, 0.18693986227881698, 
> 0.7475065360095031, 0.7766720862129398, 0.7844069968816826, 
> 0.27481855935245014, 0.8498855383673198, 0.7496017097461324, 
> 0.448373036252237, 0.7372969840779748, 0.26381835654323815, 
> 0.7919478212349927, 0.773136240932345, 0.7441046289586369, 
> 0.8774372628866844, 0.567152428053003, 0.7256375989728348, 0.654839959050646, 
> 0.858953671296855, 0.47581286359760067, 0.039760801375546495, 
> 0.7764165909218748, 0.6882803110041462, 0.8660302...
> scala> X = null
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> scala> X
> res0: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> scala> :load reproduce_transient_npe.scala
> Loading reproduce_transient_npe.scala...
> X: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = 
> $anon$1@48da64f2
> scala> X
> res1: Serializable{val cf: Double; def getArray(n: Int): Array[Double]; def 
> multiplySum(x: Double,v: org.apache.spark.rdd.RDD[Double]): Double} = null
> {noformat}
> However, if the script being loaded does not refer to an {{RDD}} then the 
> reload seems to work fine:
> {noformat}
> scala> :load Null.scala
> Loading Null.scala...
> X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@987a0bb
> scala> val a = X.getArray(100)
> warning: there was one feature warning; re-run with -feature for details
> a: Array[Double] = Array(0.1383741239912808, 0.9648059677260219, 
> 0.9189575875974628, 0.41397368933686096, 0.22201144446192966, 
> 0.44243794397063774, 0.8784983685464675, 0.1340277408843078, 
> 0.706263786679972, 0.7950404663404447, 0.24430810245881607, 
> 0.5770760096607244, 0.2525706003922249, 0.28184231631420364, 
> 0.008730677363379735, 0.81095065419385, 0.846743885175591, 
> 0.9332265324673933, 0.7179553831600355, 0.8136413098595938, 
> 0.815645757370769, 0.6841618927812075, 0.2543696773107338, 
> 0.1307824382653785, 0.21866878494759168, 0.3565351406594982, 
> 0.9395305162439264, 0.9817882504819025, 0.8848012359685327, 
> 0.1256685393081879, 0.9907437397885274, 0.7316579278629144, 
> 0.960786505005683, 0.05259590461101904, 0.22459289042641883, 
> 0.482387624551172, 0.2118621194069078, 0.2412102388775842, 0.0423595...
> scala> X = null
> X: Serializable{def getArray(n: Int): Array[Double]} = null
> scala> X
> res0: Serializable{def getArray(n: Int): Array[Double]} = null
> scala> :load Null.scala
> Loading Null.scala...
> X: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
> scala> X
> res1: Serializable{def getArray(n: Int): Array[Double]} = $anon$1@bb12f41
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to