[GitHub] spark issue #19687: [SPARK-19644][SQL]Clean up Scala reflection garbage afte...

ManchesterUnited16 Wed, 08 Nov 2017 17:59:20 -0800

Github user ManchesterUnited16 commented on the issue:

    https://github.com/apache/spark/pull/19687
  
    import org.apache.spark.ml.evaluation.RegressionEvaluator
    import org.apache.spark.ml.recommendation.{ALS, ALSModel}
    import org.apache.spark.ml.recommendation.ALS.Rating
    import org.apache.spark.rdd.RDD
    import org.apache.spark.sql.{DataFrame, Row}
    // $example off$
    import org.apache.spark.sql.SparkSession
    
    object ALSExample {
    // $example on$
    case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long)
    def parseRating(str: String): Rating = {
    val fields = str.split("::")
    assert(fields.size == 4)
    Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat,fields(3).toLong)
        }
    // $example off$
    
    def main(args: Array[String]) {
    val spark = SparkSession
                    .builder
    .master("local")
                    .appName("ALSExample")
                    .getOrCreate()
    import spark.implicits._
    
    // $example on$
    val ratings: DataFrame = 
spark.read.textFile("D:\\xcar\\Spark_MLib\\ml_2.11.1\\src\\data\\mllib\\als\\sample_movielens_ratings.txt")
                    .map(parseRating)
                    .toDF()
    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))
    
    // Build the recommendation model using ALS on the training data
    val als = new ALS()
                    .setMaxIter(5)
                    .setRegParam(0.01)
                    .setUserCol("userId")
                    .setItemCol("movieId")
                    .setRatingCol("rating")
    val model: ALSModel = als.fit(training)
    
    // Evaluate the model by computing the RMSE on the test data
            // Note we set cold start strategy to 'drop' to ensure we don't get 
NaN evaluation metrics
    model.setColdStartStrategy("drop")
    val predictions: DataFrame = model.transform(test)
    
    val evaluator: RegressionEvaluator = new RegressionEvaluator()
                    .setMetricName("rmse")
                    .setLabelCol("rating")
                    .setPredictionCol("prediction")
    val rmse = evaluator.evaluate(predictions)
    println(s"Root-mean-square error = $rmse")
    
    // Generate top 10 movie recommendations for each user
    val userRecs: DataFrame = model.recommendForAllUsers(10)
    // Generate top 10 user recommendations for each movie
    val movieRecs: DataFrame = model.recommendForAllItems(10)
    
    // Generate top 10 movie recommendations for a specified set of users
    //        val users = ratings.select(als.getUserCol).distinct().limit(3)
    //        val userSubsetRecs = model.recommendForUserSubset(users, 10)
    //        // Generate top 10 user recommendations for a specified set of 
movies
    //        val movies = ratings.select(als.getItemCol).distinct().limit(3)
    //        val movieSubSetRecs = model.recommendForItemSubset(movies, 10)
            // $example off$
    userRecs.show()
    
    //movieRecs.show()
    //        userSubsetRecs.show()
    //        movieSubSetRecs.show()
    
    spark.stop()
        }
    
    }
    this is my code ,and when I run the line"userRecs.show()",there are some 
error "java.io.NotSerializableException: 
scala.reflect.api.TypeTags$PredefTypeCreator"
    
    
    
    
    
    
    At 2017-11-09 05:29:55, "Shixiong Zhu" <[email protected]> wrote:
    
    
    @zsxwing commented on this pull request.
    
    In 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala:
    
    > @@ -441,4 +443,28 @@ class ExpressionEncoderSuite extends PlanTest with 
AnalysisTest {
           }
         }
       }
    +
    +  /**
    +   * Verify the size of scala.reflect.runtime.JavaUniverse.undoLog before 
and after `func` to
    +   * ensure we don't leak Scala reflection garbage.
    +   *
    +   * @see 
org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects
    +   */
    +  private def verifyNotLeakingReflectionObjects[T](func: => T): T = {
    +    def undoLogSize: Int = {
    +      import scala.reflect.runtime.{JavaUniverse, universe}
    
    
    No special reason. I changed to use the fully qualified class name now.
    
    â
    You are receiving this because you were mentioned.
    Reply to this email directly, view it on GitHub, or mute the thread.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark issue #19687: [SPARK-19644][SQL]Clean up Scala reflection garbage afte...

Reply via email to