zhengruifeng created SPARK-34356:
------------------------------------

             Summary: OVR transform avoid potential column conflict
                 Key: SPARK-34356
                 URL: https://issues.apache.org/jira/browse/SPARK-34356
             Project: Spark
          Issue Type: Improvement
          Components: ML
    Affects Versions: 3.2.0
            Reporter: zhengruifeng


{code:java}
import org.apache.spark.ml.classification._val df = 
spark.read.format("libsvm").load("/d0/Dev/Opensource/spark/data/mllib/sample_multiclass_classification_data.txt").withColumn("probability",
 lit(0.0))val classifier = new 
LogisticRegression().setMaxIter(1).setTol(1E-6).setFitIntercept(true)
val ovr = new OneVsRest().setClassifier(classifier)
val ovrm = ovr.fit(df)
ovrm.transform(df)
java.lang.IllegalArgumentException: requirement failed: Column probability 
already exists.
  at scala.Predef$.require(Predef.scala:281)
  at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:106)
  at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:96)
  at 
org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema(ProbabilisticClassifier.scala:38)
  at 
org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema$(ProbabilisticClassifier.scala:33)
  at 
org.apache.spark.ml.classification.LogisticRegressionModel.org$apache$spark$ml$classification$LogisticRegressionParams$$super$validateAndTransformSchema(LogisticRegression.scala:917)
  at 
org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema(LogisticRegression.scala:268)
  at 
org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema$(LogisticRegression.scala:255)
  at 
org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema(LogisticRegression.scala:917)
  at org.apache.spark.ml.PredictionModel.transformSchema(Predictor.scala:222)
  at 
org.apache.spark.ml.classification.ClassificationModel.transformSchema(Classifier.scala:182)
  at 
org.apache.spark.ml.classification.ProbabilisticClassificationModel.transformSchema(ProbabilisticClassifier.scala:88)
  at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:71)
  at 
org.apache.spark.ml.classification.ProbabilisticClassificationModel.transform(ProbabilisticClassifier.scala:107)
  at 
org.apache.spark.ml.classification.OneVsRestModel.$anonfun$transform$4(OneVsRest.scala:215)
  at scala.collection.IndexedSeqOptimized.foldLeft(IndexedSeqOptimized.scala:60)
  at 
scala.collection.IndexedSeqOptimized.foldLeft$(IndexedSeqOptimized.scala:68)
  at scala.collection.mutable.ArrayOps$ofRef.foldLeft(ArrayOps.scala:198)
  at 
org.apache.spark.ml.classification.OneVsRestModel.transform(OneVsRest.scala:203)
  ... 49 elided {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to