srowen commented on a change in pull request #20146: [SPARK-11215][ML] Add 
multiple columns support to StringIndexer
URL: https://github.com/apache/spark/pull/20146#discussion_r245677527
 
 

 ##########
 File path: mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
 ##########
 @@ -310,11 +470,25 @@ object StringIndexerModel extends 
MLReadable[StringIndexerModel] {
     override def load(path: String): StringIndexerModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
       val dataPath = new Path(path, "data").toString
-      val data = sparkSession.read.parquet(dataPath)
-        .select("labels")
-        .head()
-      val labels = data.getAs[Seq[String]](0).toArray
-      val model = new StringIndexerModel(metadata.uid, labels)
+
+      // We support to load old `StringIndexerModel` saved by previous Spark 
versions.
+      // Previous model has `labels`, but new model has `labelsArray`.
+      val (majorVersion, minorVersion) = 
majorMinorVersion(metadata.sparkVersion)
+      val labelsArray = if (majorVersion < 3) {
+        // Spark 2.4 and before.
 
 Review comment:
   Don't we normally handle this with model versions rather than tie to Spark 
versions?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to