Repository: spark
Updated Branches:
  refs/heads/master 11ed2b180 -> 2a6590e51


[SPARK-9981] [ML] Made labels public for StringIndexerModel

Also added unit test for integration between StringIndexerModel and 
IndexToString

CC: holdenk We realized we should have left in your unit test (to catch the 
issue with removing the inverse() method), so this adds it back.  mengxr

Author: Joseph K. Bradley <jos...@databricks.com>

Closes #8211 from jkbradley/stridx-labels.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a6590e5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a6590e5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a6590e5

Branch: refs/heads/master
Commit: 2a6590e510aba3bfc6603d280023128b3f5ac702
Parents: 11ed2b1
Author: Joseph K. Bradley <jos...@databricks.com>
Authored: Fri Aug 14 14:05:03 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Fri Aug 14 14:05:03 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/ml/feature/StringIndexer.scala   |  5 ++++-
 .../spark/ml/feature/StringIndexerSuite.scala     | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2a6590e5/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 6347578..24250e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -97,14 +97,17 @@ class StringIndexer(override val uid: String) extends 
Estimator[StringIndexerMod
 /**
  * :: Experimental ::
  * Model fitted by [[StringIndexer]].
+ *
  * NOTE: During transformation, if the input column does not exist,
  * [[StringIndexerModel.transform]] would return the input dataset unmodified.
  * This is a temporary fix for the case when target labels do not exist during 
prediction.
+ *
+ * @param labels  Ordered list of labels, corresponding to indices to be 
assigned
  */
 @Experimental
 class StringIndexerModel (
     override val uid: String,
-    labels: Array[String]) extends Model[StringIndexerModel] with 
StringIndexerBase {
+    val labels: Array[String]) extends Model[StringIndexerModel] with 
StringIndexerBase {
 
   def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), 
labels)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2a6590e5/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index 0b4c8ba..05e05bd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -147,4 +147,22 @@ class StringIndexerSuite extends SparkFunSuite with 
MLlibTestSparkContext {
         assert(actual === expected)
     }
   }
+
+  test("StringIndexer, IndexToString are inverses") {
+    val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, 
"a"), (5, "c")), 2)
+    val df = sqlContext.createDataFrame(data).toDF("id", "label")
+    val indexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("labelIndex")
+      .fit(df)
+    val transformed = indexer.transform(df)
+    val idx2str = new IndexToString()
+      .setInputCol("labelIndex")
+      .setOutputCol("sameLabel")
+      .setLabels(indexer.labels)
+    idx2str.transform(transformed).select("label", 
"sameLabel").collect().foreach {
+      case Row(a: String, b: String) =>
+        assert(a === b)
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to