dohongdayi commented on a change in pull request #34311:
URL: https://github.com/apache/spark/pull/34311#discussion_r738266697
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
##########
@@ -104,6 +105,7 @@ case class RowDataSourceScanExec(
filters: Set[Filter],
handledFilters: Set[Filter],
aggregation: Option[Aggregation],
+ sample: Option[TableSample],
Review comment:
So many pushdown related parameters, would it be better if they were
wrapped by some parent case class?
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
##########
@@ -357,3 +366,14 @@ private[sql] object SortValue {
None
}
}
+
+private[sql] final case class TableSampleValue(
+ methodName: String,
+ lowerBound: Double,
+ upperBound: Double,
+ withReplacement: Boolean,
+ seed: Long) extends TableSample {
+
+ override def describe(): String = s"$methodName $lowerBound $lowerBound
$upperBound" +
Review comment:
two `lowerBound`s ?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
##########
@@ -154,4 +155,21 @@ private object PostgresDialect extends JdbcDialect {
val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL"
s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)}
$nullable"
}
+
+ override def supportsTableSample: Boolean = true
+
+ override def getTableSample(sample: Option[TableSample]): String = {
+ if (sample.nonEmpty) {
+ val method = if (sample.get.methodName.isEmpty) {
Review comment:
If many of the dialects have default sample methods, would
`Option[String]` be better type for `TableSample.methodName`?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
##########
@@ -336,6 +336,7 @@ object DataSourceStrategy
Set.empty,
Set.empty,
None,
+ null,
Review comment:
I think here should be `None`
##########
File path:
external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
##########
@@ -284,4 +285,32 @@ private[v2] trait V2JDBCTest extends SharedSparkSession
with DockerIntegrationFu
testIndexUsingSQL(s"$catalogName.new_table")
}
}
+
+ def supportsTableSample: Boolean = false
+
+ test("Test TABLESAMPLE") {
+ withTable(s"$catalogName.new_table") {
+ sql(s"CREATE TABLE $catalogName.new_table (col1 INT, col2 INT)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (1, 2)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (3, 4)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (5, 6)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (7, 8)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (9, 10)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (11, 12)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (13, 14)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (15, 16)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (17, 18)")
+ sql(s"INSERT INTO TABLE $catalogName.new_table values (19, 20)")
+ if (supportsTableSample) {
Review comment:
If `supportsTableSample` was false, it would be no need to create
testing table or insert testing data at all
##########
File path: sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
##########
@@ -358,6 +358,10 @@ abstract class JdbcDialect extends Serializable with
Logging{
def classifyException(message: String, e: Throwable): AnalysisException = {
new AnalysisException(message, cause = Some(e))
}
+
+ def supportsTableSample: Boolean = false
Review comment:
Would `supportsTableSample()` need parameter `methodName:
Option[String]`, for the dialect might not support the specified sample method
or not support any sample method at all ?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
##########
@@ -104,6 +105,7 @@ case class RowDataSourceScanExec(
filters: Set[Filter],
handledFilters: Set[Filter],
aggregation: Option[Aggregation],
+ sample: TableSample,
Review comment:
Would `Option[TableSample]` be better type for sample might not be
provided ?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
##########
@@ -225,6 +226,27 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan]
with PredicateHelper {
withProjection
}
+ def applySample(plan: LogicalPlan): LogicalPlan = plan.transform {
+ case sample @ Sample(_, _, _, _, child) => child match {
+ case ScanOperation(_, _, sHolder: ScanBuilderHolder) =>
+ val tableSample = LogicalExpressions.tableSample(
+ "",
Review comment:
I didn't see any other possible value of `TableSample.methodName` beside
`""` here, so I'm not sure that`TableSample.methodName` was important ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]