cloud-fan commented on code in PR #52120: URL: https://github.com/apache/spark/pull/52120#discussion_r2354442816
########## sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/APITest.scala: ########## @@ -0,0 +1,599 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.pipelines.utils + +import scala.concurrent.duration._ +import scala.concurrent.duration.Duration + +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} +// scalastyle:off +import org.scalatest.funsuite.AnyFunSuite +// scalastyle:on +import org.scalatest.matchers.should.Matchers + +import org.apache.spark.sql.QueryTest.checkAnswer +import org.apache.spark.sql.Row +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier + +/** + * Representation of a pipeline specification + * @param catalog + * the catalog to publish data from the pipeline + * @param database + * the database to publish data from the pipeline + * @param include + * the list of source files to include in the pipeline spec + */ +case class TestPipelineSpec( + catalog: Option[String] = None, + database: Option[String] = None, + include: Seq[String]) + +/** + * Available configurations for running a test pipeline. + * + * @param pipelineSpec + * the pipeline specification to use Below are CLI options that affect execution, default is to + * update all datasets incrementally + * @param dryRun + * if true, the pipeline will be validated but not executed + * @param fullRefreshAll + * if true, perform a full graph reset and recompute + * @param fullRefreshSelection + * if non-empty, only reset and recompute the subset + * @param refreshSelection + * if non-empty, only update the specified subset of datasets + */ +case class TestPipelineConfiguration( + pipelineSpec: TestPipelineSpec, + dryRun: Boolean = false, + fullRefreshAll: Boolean = false, + fullRefreshSelection: Seq[String] = Seq.empty, + refreshSelection: Seq[String] = Seq.empty) + +/** + * Logical representation of a source file to be included in the pipeline spec. + */ +case class PipelineSourceFile(name: String, contents: String) + +/** + * Extendable traits for PipelineReference and UpdateReference to allow different level of + * implementations which stores pipeline execution and update execution specific information. + */ +trait PipelineReference {} + +trait APITest + extends AnyFunSuite // scalastyle:ignore funsuite Review Comment: why can't we extend `SparkFunSuite`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org