Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/5820#discussion_r29802362
  
    --- Diff: mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala ---
    @@ -161,43 +172,24 @@ class Pipeline extends Estimator[PipelineModel] {
     @AlphaComponent
     class PipelineModel private[ml] (
         override val parent: Pipeline,
    -    override val fittingParamMap: ParamMap,
    -    private[ml] val stages: Array[Transformer])
    +    val stages: Array[Transformer])
       extends Model[PipelineModel] with Logging {
     
    -  override def validate(paramMap: ParamMap): Unit = {
    -    val map = fittingParamMap ++ extractParamMap(paramMap)
    -    stages.foreach(_.validate(map))
    +  override def validateParams(): Unit = {
    +    super.validateParams()
    +    stages.foreach(_.validateParams())
       }
     
    -  /**
    -   * Gets the model produced by the input estimator. Throws an 
NoSuchElementException is the input
    -   * estimator does not exist in the pipeline.
    -   */
    -  def getModel[M <: Model[M]](stage: Estimator[M]): M = {
    -    val matched = stages.filter {
    -      case m: Model[_] => m.parent.eq(stage)
    -      case _ => false
    -    }
    -    if (matched.isEmpty) {
    -      throw new NoSuchElementException(s"Cannot find stage $stage from the 
pipeline.")
    -    } else if (matched.length > 1) {
    -      throw new IllegalStateException(s"Cannot have duplicate estimators 
in the sample pipeline.")
    -    } else {
    -      matched.head.asInstanceOf[M]
    -    }
    +  override def transform(dataset: DataFrame): DataFrame = {
    +    transformSchema(dataset.schema, logging = true)
    +    stages.foldLeft(dataset)((cur, transformer) => 
transformer.transform(cur))
       }
     
    -  override def transform(dataset: DataFrame, paramMap: ParamMap): 
DataFrame = {
    -    // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap
    -    val map = fittingParamMap ++ extractParamMap(paramMap)
    -    transformSchema(dataset.schema, map, logging = true)
    -    stages.foldLeft(dataset)((cur, transformer) => 
transformer.transform(cur, map))
    +  override def transformSchema(schema: StructType): StructType = {
    +    stages.foldLeft(schema)((cur, transformer) => 
transformer.transformSchema(cur))
       }
     
    -  override def transformSchema(schema: StructType, paramMap: ParamMap): 
StructType = {
    -    // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap
    -    val map = fittingParamMap ++ extractParamMap(paramMap)
    -    stages.foldLeft(schema)((cur, transformer) => 
transformer.transformSchema(cur, map))
    +  override def copy(extra: ParamMap): PipelineModel = {
    --- End diff --
    
    This should use "extra" and copy its stages


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to