spark git commit: [SPARK-10234] [MLLIB] update since version in mllib.clustering

meng Tue, 25 Aug 2015 22:34:17 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 b7766699a -> be0c9915c



[SPARK-10234] [MLLIB] update since version in mllib.clustering

Same as #8421 but for `mllib.clustering`.

cc feynmanliang yu-iskw

Author: Xiangrui Meng <m...@databricks.com>

Closes #8435 from mengxr/SPARK-10234.

(cherry picked from commit d703372f86d6a59383ba8569fcd9d379849cffbf)
Signed-off-by: Xiangrui Meng <m...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be0c9915
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be0c9915
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be0c9915

Branch: refs/heads/branch-1.5
Commit: be0c9915c0084a187933f338e51e606dc68e93af
Parents: b776669
Author: Xiangrui Meng <m...@databricks.com>
Authored: Tue Aug 25 22:33:48 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Tue Aug 25 22:33:55 2015 -0700

----------------------------------------------------------------------
 .../mllib/clustering/GaussianMixture.scala      |  1 +
 .../mllib/clustering/GaussianMixtureModel.scala |  8 +++---
 .../apache/spark/mllib/clustering/KMeans.scala  |  1 +
 .../spark/mllib/clustering/KMeansModel.scala    |  4 +--
 .../spark/mllib/clustering/LDAModel.scala       | 28 +++++++++++++++-----
 .../clustering/PowerIterationClustering.scala   | 10 ++++---
 .../mllib/clustering/StreamingKMeans.scala      | 15 ++++++-----
 7 files changed, 44 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index daa947e..f82bd82 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -53,6 +53,7 @@ import org.apache.spark.util.Utils
  * @param maxIterations The maximum number of iterations to perform
  */
 @Experimental
+@Since("1.3.0")
 class GaussianMixture private (
     private var k: Int,
     private var convergenceTol: Double,

http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 1a10a8b..7f6163e 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -46,9 +46,9 @@ import org.apache.spark.sql.{SQLContext, Row}
  */
 @Since("1.3.0")
 @Experimental
-class GaussianMixtureModel(
-  val weights: Array[Double],
-  val gaussians: Array[MultivariateGaussian]) extends Serializable with 
Saveable {
+class GaussianMixtureModel @Since("1.3.0") (
+  @Since("1.3.0") val weights: Array[Double],
+  @Since("1.3.0") val gaussians: Array[MultivariateGaussian]) extends 
Serializable with Saveable {
 
   require(weights.length == gaussians.length, "Length of weight and Gaussian 
arrays must match")
 
@@ -178,7 +178,7 @@ object GaussianMixtureModel extends 
Loader[GaussianMixtureModel] {
           (weight, new MultivariateGaussian(mu, sigma))
       }.unzip
 
-      return new GaussianMixtureModel(weights.toArray, gaussians.toArray)
+      new GaussianMixtureModel(weights.toArray, gaussians.toArray)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 3e9545a..46920ff 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -37,6 +37,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * This is an iterative algorithm that will make multiple passes over the 
data, so any RDDs given
  * to it should be cached by the user.
  */
+@Since("0.8.0")
 class KMeans private (
     private var k: Int,
     private var maxIterations: Int,

http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 13fc4a8..45021f4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -37,8 +37,8 @@ import org.apache.spark.sql.Row
  * A clustering model for K-means. Each point belongs to the cluster with the 
closest center.
  */
 @Since("0.8.0")
-class KMeansModel (
-    val clusterCenters: Array[Vector]) extends Saveable with Serializable with 
PMMLExportable {
+class KMeansModel @Since("1.1.0") (@Since("1.0.0") val clusterCenters: 
Array[Vector])
+  extends Saveable with Serializable with PMMLExportable {
 
   /**
    * A Java-friendly constructor that takes an Iterable of Vectors.

http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 432bbed..15129e0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -43,12 +43,15 @@ import org.apache.spark.util.BoundedPriorityQueue
  * including local and distributed data structures.
  */
 @Experimental
+@Since("1.3.0")
 abstract class LDAModel private[clustering] extends Saveable {
 
   /** Number of topics */
+  @Since("1.3.0")
   def k: Int
 
   /** Vocabulary size (number of terms or terms in the vocabulary) */
+  @Since("1.3.0")
   def vocabSize: Int
 
   /**
@@ -57,6 +60,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    *
    * This is the parameter to a Dirichlet distribution.
    */
+  @Since("1.5.0")
   def docConcentration: Vector
 
   /**
@@ -68,6 +72,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    * Note: The topics' distributions over terms are called "beta" in the 
original LDA paper
    * by Blei et al., but are called "phi" in many later papers such as 
Asuncion et al., 2009.
    */
+  @Since("1.5.0")
   def topicConcentration: Double
 
   /**
@@ -81,6 +86,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    * This is a matrix of size vocabSize x k, where each column is a topic.
    * No guarantees are given about the ordering of the topics.
    */
+  @Since("1.3.0")
   def topicsMatrix: Matrix
 
   /**
@@ -91,6 +97,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    *          (term indices, term weights in topic).
    *          Each topic's terms are sorted in order of decreasing weight.
    */
+  @Since("1.3.0")
   def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])]
 
   /**
@@ -102,6 +109,7 @@ abstract class LDAModel private[clustering] extends 
Saveable {
    *          (term indices, term weights in topic).
    *          Each topic's terms are sorted in order of decreasing weight.
    */
+  @Since("1.3.0")
   def describeTopics(): Array[(Array[Int], Array[Double])] = 
describeTopics(vocabSize)
 
   /* TODO (once LDA can be trained with Strings or given a dictionary)
@@ -185,10 +193,11 @@ abstract class LDAModel private[clustering] extends 
Saveable {
  * @param topics Inferred topics (vocabSize x k matrix).
  */
 @Experimental
+@Since("1.3.0")
 class LocalLDAModel private[clustering] (
-    val topics: Matrix,
-    override val docConcentration: Vector,
-    override val topicConcentration: Double,
+    @Since("1.3.0") val topics: Matrix,
+    @Since("1.5.0") override val docConcentration: Vector,
+    @Since("1.5.0") override val topicConcentration: Double,
     override protected[clustering] val gammaShape: Double = 100)
   extends LDAModel with Serializable {
 
@@ -376,6 +385,7 @@ class LocalLDAModel private[clustering] (
 }
 
 @Experimental
+@Since("1.5.0")
 object LocalLDAModel extends Loader[LocalLDAModel] {
 
   private object SaveLoadV1_0 {
@@ -479,13 +489,14 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
  * than the [[LocalLDAModel]].
  */
 @Experimental
+@Since("1.3.0")
 class DistributedLDAModel private[clustering] (
     private[clustering] val graph: Graph[LDA.TopicCounts, LDA.TokenCount],
     private[clustering] val globalTopicTotals: LDA.TopicCounts,
-    val k: Int,
-    val vocabSize: Int,
-    override val docConcentration: Vector,
-    override val topicConcentration: Double,
+    @Since("1.3.0") val k: Int,
+    @Since("1.3.0") val vocabSize: Int,
+    @Since("1.5.0") override val docConcentration: Vector,
+    @Since("1.5.0") override val topicConcentration: Double,
     private[spark] val iterationTimes: Array[Double],
     override protected[clustering] val gammaShape: Double = 100)
   extends LDAModel {
@@ -603,6 +614,7 @@ class DistributedLDAModel private[clustering] (
    *         (term indices, topic indices).  Note that terms will be omitted 
if not present in
    *         the document.
    */
+  @Since("1.5.0")
   lazy val topicAssignments: RDD[(Long, Array[Int], Array[Int])] = {
     // For reference, compare the below code with the core part of 
EMLDAOptimizer.next().
     val eta = topicConcentration
@@ -634,6 +646,7 @@ class DistributedLDAModel private[clustering] (
   }
 
   /** Java-friendly version of [[topicAssignments]] */
+  @Since("1.5.0")
   lazy val javaTopicAssignments: JavaRDD[(java.lang.Long, Array[Int], 
Array[Int])] = {
     topicAssignments.asInstanceOf[RDD[(java.lang.Long, Array[Int], 
Array[Int])]].toJavaRDD()
   }
@@ -770,6 +783,7 @@ class DistributedLDAModel private[clustering] (
 
 
 @Experimental
+@Since("1.5.0")
 object DistributedLDAModel extends Loader[DistributedLDAModel] {
 
   private object SaveLoadV1_0 {

http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index 396b36f..da234bd 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -42,9 +42,10 @@ import org.apache.spark.{Logging, SparkContext, 
SparkException}
  */
 @Since("1.3.0")
 @Experimental
-class PowerIterationClusteringModel(
-    val k: Int,
-    val assignments: RDD[PowerIterationClustering.Assignment]) extends 
Saveable with Serializable {
+class PowerIterationClusteringModel @Since("1.3.0") (
+    @Since("1.3.0") val k: Int,
+    @Since("1.3.0") val assignments: RDD[PowerIterationClustering.Assignment])
+  extends Saveable with Serializable {
 
   @Since("1.4.0")
   override def save(sc: SparkContext, path: String): Unit = {
@@ -56,6 +57,8 @@ class PowerIterationClusteringModel(
 
 @Since("1.4.0")
 object PowerIterationClusteringModel extends 
Loader[PowerIterationClusteringModel] {
+
+  @Since("1.4.0")
   override def load(sc: SparkContext, path: String): 
PowerIterationClusteringModel = {
     PowerIterationClusteringModel.SaveLoadV1_0.load(sc, path)
   }
@@ -120,6 +123,7 @@ object PowerIterationClusteringModel extends 
Loader[PowerIterationClusteringMode
  * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering 
(Wikipedia)]]
  */
 @Experimental
+@Since("1.3.0")
 class PowerIterationClustering private[clustering] (
     private var k: Int,
     private var maxIterations: Int,

http://git-wip-us.apache.org/repos/asf/spark/blob/be0c9915/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index 41f2668..1d50ffe 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -66,9 +66,10 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 @Since("1.2.0")
 @Experimental
-class StreamingKMeansModel(
-    override val clusterCenters: Array[Vector],
-    val clusterWeights: Array[Double]) extends KMeansModel(clusterCenters) 
with Logging {
+class StreamingKMeansModel @Since("1.2.0") (
+    @Since("1.2.0") override val clusterCenters: Array[Vector],
+    @Since("1.2.0") val clusterWeights: Array[Double])
+  extends KMeansModel(clusterCenters) with Logging {
 
   /**
    * Perform a k-means update on a batch of data.
@@ -168,10 +169,10 @@ class StreamingKMeansModel(
  */
 @Since("1.2.0")
 @Experimental
-class StreamingKMeans(
-    var k: Int,
-    var decayFactor: Double,
-    var timeUnit: String) extends Logging with Serializable {
+class StreamingKMeans @Since("1.2.0") (
+    @Since("1.2.0") var k: Int,
+    @Since("1.2.0") var decayFactor: Double,
+    @Since("1.2.0") var timeUnit: String) extends Logging with Serializable {
 
   @Since("1.2.0")
   def this() = this(2, 1.0, StreamingKMeans.BATCHES)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10234] [MLLIB] update since version in mllib.clustering

Reply via email to