Repository: spark Updated Branches: refs/heads/branch-2.0 557eee5b6 -> 3d8d95644
[SPARK-16133][ML] model loading backward compatibility for ml.feature ## What changes were proposed in this pull request? model loading backward compatibility for ml.feature, ## How was this patch tested? existing ut and manual test for loading 1.6 models. Author: Yuhao Yang <yuhao.y...@intel.com> Author: Yuhao Yang <hhb...@gmail.com> Closes #13844 from hhbyyh/featureComp. (cherry picked from commit cc6778ee0bf4fa7a78abd30542c4a6f80ea371c5) Signed-off-by: Xiangrui Meng <m...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d8d9564 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d8d9564 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d8d9564 Branch: refs/heads/branch-2.0 Commit: 3d8d956448fd3b7ae8d380e655bfa245b11c4ea0 Parents: 557eee5 Author: Yuhao Yang <yuhao.y...@intel.com> Authored: Thu Jun 23 21:50:25 2016 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Thu Jun 23 21:50:32 2016 -0700 ---------------------------------------------------------------------- mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 3 ++- .../scala/org/apache/spark/ml/feature/MinMaxScaler.scala | 9 ++++++--- .../scala/org/apache/spark/ml/feature/StandardScaler.scala | 4 +++- 3 files changed, 11 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3d8d9564/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 02d4e6a..5d6287f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -27,6 +27,7 @@ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors} +import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.functions._ @@ -180,9 +181,9 @@ object IDFModel extends MLReadable[IDFModel] { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString val data = sparkSession.read.parquet(dataPath) + val Row(idf: Vector) = MLUtils.convertVectorColumnsToML(data, "idf") .select("idf") .head() - val idf = data.getAs[Vector](0) val model = new IDFModel(metadata.uid, new feature.IDFModel(OldVectors.fromML(idf))) DefaultParamsReader.getAndSetParams(model, metadata) model http://git-wip-us.apache.org/repos/asf/spark/blob/3d8d9564/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 562b3f3..d5ad5ab 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -28,6 +28,7 @@ import org.apache.spark.ml.util._ import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors} import org.apache.spark.mllib.linalg.VectorImplicits._ import org.apache.spark.mllib.stat.Statistics +import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.functions._ @@ -232,9 +233,11 @@ object MinMaxScalerModel extends MLReadable[MinMaxScalerModel] { override def load(path: String): MinMaxScalerModel = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString - val Row(originalMin: Vector, originalMax: Vector) = sparkSession.read.parquet(dataPath) - .select("originalMin", "originalMax") - .head() + val data = sparkSession.read.parquet(dataPath) + val Row(originalMin: Vector, originalMax: Vector) = + MLUtils.convertVectorColumnsToML(data, "originalMin", "originalMax") + .select("originalMin", "originalMax") + .head() val model = new MinMaxScalerModel(metadata.uid, originalMin, originalMax) DefaultParamsReader.getAndSetParams(model, metadata) model http://git-wip-us.apache.org/repos/asf/spark/blob/3d8d9564/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index be58dc2..b4be954 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -28,6 +28,7 @@ import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors} import org.apache.spark.mllib.linalg.VectorImplicits._ +import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.functions._ @@ -211,7 +212,8 @@ object StandardScalerModel extends MLReadable[StandardScalerModel] { override def load(path: String): StandardScalerModel = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString - val Row(std: Vector, mean: Vector) = sparkSession.read.parquet(dataPath) + val data = sparkSession.read.parquet(dataPath) + val Row(std: Vector, mean: Vector) = MLUtils.convertVectorColumnsToML(data, "std", "mean") .select("std", "mean") .head() val model = new StandardScalerModel(metadata.uid, std, mean) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org