Github user marmbrus commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12246#discussion_r59764187
  
    --- Diff: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
 ---
    @@ -0,0 +1,379 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.catalyst.analysis
    +
    +import org.apache.spark.SparkFunSuite
    +import org.apache.spark.sql.AnalysisException
    +import org.apache.spark.sql.catalyst.dsl.expressions._
    +import org.apache.spark.sql.catalyst.dsl.plans._
    +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
    +import org.apache.spark.sql.catalyst.expressions.AttributeReference
    +import org.apache.spark.sql.catalyst.plans._
    +import org.apache.spark.sql.catalyst.plans.logical._
    +import org.apache.spark.sql.types.IntegerType
    +
    +class UnsupportedOperationsSuite extends SparkFunSuite {
    +
    +  val batchRelation = LocalRelation(AttributeReference("a", IntegerType, 
nullable = true)())
    +
    +  val streamRelation = new LocalRelation(
    +    Seq(AttributeReference("a", IntegerType, nullable = true)())) {
    +    override def isStreaming: Boolean = true
    +  }
    +
    +  /*
    +    
=======================================================================================
    +                                     BATCH QUERIES
    +    
=======================================================================================
    +   */
    +
    +  assertSupportedForBatch("local relation", batchRelation)
    +
    +  assertNotSupportedForBatch(
    +    "streaming source",
    +    streamRelation,
    +    Seq("with streaming source", "startStream"))
    +
    +  assertNotSupportedForBatch(
    +    "select on streaming source",
    +    streamRelation.select($"count(*)"),
    +    Seq("with streaming source", "startStream"))
    +
    +
    +  /*
    +    
=======================================================================================
    +                                     STREAMING QUERIES
    +    
=======================================================================================
    +   */
    +
    +  // Batch plan in streaming query
    +  testError("batch source", Seq("without streaming source", 
"startStream")) {
    +    
UnsupportedOperationChecker.checkForStreaming(batchRelation.select($"count(*)"),
 Append)
    +  }
    +
    +  // Commands
    +  assertNotSupportedForStreaming(
    +    "commmands",
    +    DescribeFunction("func", true),
    +    outputMode = Append,
    +    expectedMsgs = "commands" :: Nil)
    +
    +  // Aggregates: Not supported on streams in Append mode
    +  assertSupportedForStreaming(
    +    "aggregate - stream with update output mode",
    +    batchRelation.groupBy("a")("count(*)"),
    +    outputMode = Update)
    +
    +  assertSupportedForStreaming(
    +    "aggregate - batch with update output mode",
    +    streamRelation.groupBy("a")("count(*)"),
    +    outputMode = Update)
    +
    +  assertSupportedForStreaming(
    +    "aggregate - batch with append output mode",
    +    batchRelation.groupBy("a")("count(*)"),
    +    outputMode = Append)
    +
    +  assertNotSupportedForStreaming(
    +    "aggregate - stream with append output mode",
    +    streamRelation.groupBy("a")("count(*)"),
    +    outputMode = Append,
    +    Seq("aggregation", "append output mode"))
    +
    +  // Inner joins: Stream-stream not supported
    +  testBinaryOperationForStreaming(
    +    "inner join",
    +    _.join(_, joinType = Inner),
    +    streamStreamSupported = false)
    +
    +  // Full outer joins: only batch-batch is allowed
    +  testBinaryOperationForStreaming(
    +    "full outer join",
    +    _.join(_, joinType = FullOuter),
    +    streamStreamSupported = false,
    +    batchStreamSupported = false,
    +    streamBatchSupported = false)
    +
    +  // Left outer joins: *-stream not allowed
    +  testBinaryOperationForStreaming(
    +    "left outer join",
    +    _.join(_, joinType = LeftOuter),
    +    streamStreamSupported = false,
    +    batchStreamSupported = false,
    +    expectedMsg = "left outer/semi/anti joins")
    +
    +  // Left semi joins: stream-* not allowed
    +  testBinaryOperationForStreaming(
    +    "left semi join",
    +    _.join(_, joinType = LeftSemi),
    +    streamStreamSupported = false,
    +    batchStreamSupported = false,
    +    expectedMsg = "left outer/semi/anti joins")
    +
    +  // Left anti joins: stream-* not allowed
    +  testBinaryOperationForStreaming(
    +    "left anti join",
    +    _.join(_, joinType = LeftAnti),
    +    streamStreamSupported = false,
    +    batchStreamSupported = false,
    +    expectedMsg = "left outer/semi/anti joins")
    +
    +  // Right outer joins: stream-* not allowed
    +  testBinaryOperationForStreaming(
    +    "right outer join",
    +    _.join(_, joinType = RightOuter),
    +    streamStreamSupported = false,
    +    streamBatchSupported = false)
    +
    +  // Cogroup: only batch-batch is allowed
    +  testBinaryOperationForStreaming(
    +    "cogroup",
    +    genCogroup,
    +    streamStreamSupported = false,
    +    batchStreamSupported = false,
    +    streamBatchSupported = false)
    +
    +  def genCogroup(left: LogicalPlan, right: LogicalPlan): LogicalPlan = {
    +    def func(k: Int, left: Iterator[Int], right: Iterator[Int]): 
Iterator[Int] = {
    +      Iterator.empty
    +    }
    +    implicit val intEncoder = ExpressionEncoder[Int]
    +    CoGroup[Int, Int, Int, Int](
    +      func,
    +      AppendColumns[Int, Int]((x: Int) => x, left).newColumns,
    +      AppendColumns[Int, Int]((x: Int) => x, right).newColumns,
    +      left.output,
    +      right.output,
    +      left,
    +      right
    +    )
    +  }
    +
    +  // Union: Mixing between stream and batch not supported
    +  testBinaryOperationForStreaming(
    +    "union",
    +    _.union(_),
    +    streamBatchSupported = false,
    +    batchStreamSupported = false)
    +
    +  // Except: *-stream not supported
    +  testBinaryOperationForStreaming(
    +    "except",
    +    _.except(_),
    +    streamStreamSupported = false,
    +    batchStreamSupported = false)
    +
    +  // Intersect: stream-stream not supported
    +  testBinaryOperationForStreaming(
    +    "intersect",
    +    _.intersect(_),
    +    streamStreamSupported = false)
    +
    +
    +  // Unary operations
    +  testUnaryOperatorForStreaming("sort", Sort(Nil, true, _))
    +  testUnaryOperatorForStreaming("sort partitions", SortPartitions(Nil, _), 
expectedMsg = "sort")
    +  testUnaryOperatorForStreaming("sample", Sample(0.1, 1, true, 1L, _)(), 
expectedMsg = "sampling")
    +  testUnaryOperatorForStreaming(
    +    "window", Window(Nil, Nil, Nil, _), expectedMsg = "non-time-based 
windows")
    +
    +
    +  /*
    +    
=======================================================================================
    +                                     TESTING FUNCTIONS
    +    
=======================================================================================
    +   */
    +
    +  /**
    +   * Test that an unary operator correctly fails support check when it has 
a streaming child plan,
    +   * but not when it has batch child plan. There can be batch sub-plans 
inside a streaming plan,
    +   * so it is valid for the operator to have a batch child plan.
    +   *
    +   * This test wraps the logical plan in a fake operator that makes the 
whole plan look like
    +   * a streaming plan even if the child plan is a batch plan. This is to 
test that the operator
    +   * supports having a batch child plan, forming a batch subplan inside a 
streaming plan.
    +   */
    +  def testUnaryOperatorForStreaming(
    +    operationName: String,
    +    logicalPlanGenerator: LogicalPlan => LogicalPlan,
    +    outputMode: OutputMode = Append,
    +    expectedMsg: String = ""): Unit = {
    +
    +    val expectedMsgs = if (expectedMsg.isEmpty) Seq(operationName) else 
Seq(expectedMsg)
    +
    +    assertNotSupportedForStreaming(
    +      s"$operationName with stream relation",
    +      wrapInStreaming(logicalPlanGenerator(streamRelation)),
    +      outputMode,
    +      expectedMsgs)
    +
    +    assertSupportedForStreaming(
    +      s"$operationName with batch relation",
    +      wrapInStreaming(logicalPlanGenerator(batchRelation)),
    +      outputMode)
    +  }
    +
    +
    +  /**
    +   * Test that a binary operator correctly fails support check when it has 
combinations of
    +   * streaming and batch child plans. There can be batch sub-plans inside 
a streaming plan,
    +   * so it is valid for the operator to have a batch child plan.
    +   */
    +  def testBinaryOperationForStreaming(
    +      operationName: String,
    +      planGenerator: (LogicalPlan, LogicalPlan) => LogicalPlan,
    +      outputMode: OutputMode = Append,
    +      streamStreamSupported: Boolean = true,
    +      streamBatchSupported: Boolean = true,
    +      batchStreamSupported: Boolean = true,
    +      expectedMsg: String = ""): Unit = {
    +
    +    val expectedMsgs = if (expectedMsg.isEmpty) Seq(operationName) else 
Seq(expectedMsg)
    +
    +    if (streamStreamSupported) {
    +      assertSupportedForStreaming(
    +        s"$operationName with stream-stream relations",
    +        planGenerator(streamRelation, streamRelation),
    +        outputMode)
    +    } else {
    +      assertNotSupportedForStreaming(
    +        s"$operationName with stream-stream relations",
    +        planGenerator(streamRelation, streamRelation),
    +        outputMode,
    +        expectedMsgs)
    +    }
    +
    +    if (streamBatchSupported) {
    +      assertSupportedForStreaming(
    +        s"$operationName with stream-batch relations",
    +        planGenerator(streamRelation, batchRelation),
    +        outputMode)
    +    } else {
    +      assertNotSupportedForStreaming(
    +        s"$operationName with stream-batch relations",
    +        planGenerator(streamRelation, batchRelation),
    +        outputMode,
    +        expectedMsgs)
    +    }
    +
    +    if (batchStreamSupported) {
    +      assertSupportedForStreaming(
    +        s"$operationName with batch-stream relations",
    +        planGenerator(batchRelation, streamRelation),
    +        outputMode)
    +    } else {
    +      assertNotSupportedForStreaming(
    +        s"$operationName with batch-stream relations",
    +        planGenerator(batchRelation, streamRelation),
    +        outputMode,
    +        expectedMsgs)
    +    }
    +
    +    assertSupportedForStreaming(
    +      s"$operationName with batch-batch relations",
    +      planGenerator(batchRelation, batchRelation),
    +      outputMode)
    +  }
    +
    +  /**
    +   * Assert that the logical plan is supported as subplan insider a 
streaming plan.
    +   *
    +   * To test this correctly, the given logical plan is wrapped in a fake 
operator that makes the
    +   * whole plan look like a streaming plan. Otherwise, a batch plan may 
throw not supported
    +   * exception simply for not being a streaming plan, even though that 
plan could exists as batch
    +   * subplan inside some streaming plan.
    +   */
    +  def assertSupportedForStreaming(name: String, plan: LogicalPlan, 
outputMode: OutputMode): Unit = {
    +    test(s"streaming plan - $name: supported") {
    +      UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), 
outputMode)
    +    }
    +  }
    +
    +  /**
    +   * Assert that the logical plan is not supported inside a streaming plan.
    +   *
    +   * To test this correctly, the given logical plan is wrapped in a fake 
operator that makes the
    +   * whole plan look like a streaming plan. Otherwise, a batch plan may 
throw not supported
    +   * exception simply for not being a streaming plan, even though that 
plan could exists as batch
    +   * subplan inside some streaming plan.
    +   */
    +  def assertNotSupportedForStreaming(
    +      name: String,
    +      plan: LogicalPlan,
    +      outputMode: OutputMode,
    +      expectedMsgs: Seq[String]): Unit = {
    +    testError(
    +      s"streaming plan - $name: not supported",
    +      expectedMsgs :+ "streaming" :+ "DataFrame" :+ "Dataset" :+ "not 
supported") {
    +      UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), 
outputMode)
    +    }
    +  }
    +
    +  /** Assert that the logical plan is supported as a batch plan */
    +  def assertSupportedForBatch(name: String, plan: LogicalPlan): Unit = {
    +    test(s"batch plan - $name: supported") {
    +      UnsupportedOperationChecker.checkForBatch(plan)
    +    }
    +  }
    +
    +  /** Assert that the logical plan is not supported as a batch plan */
    +  def assertNotSupportedForBatch(
    +      name: String,
    +      plan: LogicalPlan,
    +      expectedMsgs: Seq[String]): Unit = {
    +    testError(s"batch plan - $name: not supported", expectedMsgs) {
    +      UnsupportedOperationChecker.checkForBatch(plan)
    +    }
    +  }
    +
    +  /**
    +   * Test whether the body of code will fail. If it does fail, then check 
if it has expected
    +   * messages.
    +   */
    +  def testError(testName: String, expectedMsgs: Seq[String])(testBody: => 
Unit): Unit = {
    +
    +    val e = intercept[AnalysisException] {
    +      testBody
    +    }
    +
    +    if 
(!expectedMsgs.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) {
    +      fail(
    +        s"""Exception message should contain the following substrings:
    +          |
    +          |  ${expectedMsgs.mkString("\n  ")}
    +          |
    +          |Actual exception message:
    +          |
    +          |  ${e.getMessage}
    +          """.stripMargin)
    +    }
    +  }
    +
    +  def wrapInStreaming(plan: LogicalPlan): LogicalPlan = {
    +    new StreamingPlanWrapper(plan)
    +  }
    +
    +  class StreamingPlanWrapper(plan: LogicalPlan) extends MapPartitions(
    --- End diff --
    
    Same concern about extending a case class.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to