Github user sethah commented on a diff in the pull request:
https://github.com/apache/spark/pull/14834#discussion_r76463186
--- Diff:
mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
---
@@ -676,39 +936,54 @@ object LogisticRegressionModel extends
MLReadable[LogisticRegressionModel] {
private case class Data(
numClasses: Int,
numFeatures: Int,
- intercept: Double,
- coefficients: Vector)
+ interceptVector: Vector,
+ coefficientMatrix: Matrix,
+ isMultinomial: Boolean)
override protected def saveImpl(path: String): Unit = {
// Save metadata and Params
DefaultParamsWriter.saveMetadata(instance, path, sc)
// Save model data: numClasses, numFeatures, intercept, coefficients
- val data = Data(instance.numClasses, instance.numFeatures,
instance.intercept,
- instance.coefficients)
+ val data = Data(instance.numClasses, instance.numFeatures,
instance.interceptVector,
+ instance.coefficientMatrix, instance.isMultinomial)
val dataPath = new Path(path, "data").toString
sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
}
}
- private class LogisticRegressionModelReader
- extends MLReader[LogisticRegressionModel] {
+ private class LogisticRegressionModelReader extends
MLReader[LogisticRegressionModel] {
/** Checked against metadata when loading model */
private val className = classOf[LogisticRegressionModel].getName
override def load(path: String): LogisticRegressionModel = {
val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+ val versionRegex = "([0-9]+)\\.([0-9]+)\\.(.+)".r
+ val versionRegex(major, minor, _) = metadata.sparkVersion
val dataPath = new Path(path, "data").toString
val data = sparkSession.read.format("parquet").load(dataPath)
- // We will need numClasses, numFeatures in the future for
multinomial logreg support.
- // TODO: remove numClasses and numFeatures fields?
- val Row(numClasses: Int, numFeatures: Int, intercept: Double,
coefficients: Vector) =
- MLUtils.convertVectorColumnsToML(data, "coefficients")
- .select("numClasses", "numFeatures", "intercept", "coefficients")
- .head()
- val model = new LogisticRegressionModel(metadata.uid, coefficients,
intercept)
+ val model = if (major.toInt < 2 || (major.toInt == 2 && minor.toInt
== 0)) {
--- End diff --
I did an offline test to make sure that we can successfully load old models
into the new API
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]