MaxGekk commented on a change in pull request #21834: [SPARK-22814][SQL]
Support Date/Timestamp in a JDBC partition column
URL: https://github.com/apache/spark/pull/21834#discussion_r248474331
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
##########
@@ -109,23 +143,58 @@ private[sql] object JDBCRelation extends Logging {
ans += JDBCPartition(whereClause, i)
i = i + 1
}
- ans.toArray
+ val partitions = ans.toArray
+ logInfo(s"Number of partitions: $numPartitions, WHERE clauses of these
partitions: " +
+ partitions.map(_.asInstanceOf[JDBCPartition].whereClause).mkString(", "))
+ partitions
}
- // Verify column name based on the JDBC resolved schema
- private def verifyAndGetNormalizedColumnName(
+ // Verify column name and type based on the JDBC resolved schema
+ private def verifyAndGetNormalizedPartitionColumn(
schema: StructType,
columnName: String,
resolver: Resolver,
- jdbcOptions: JDBCOptions): String = {
+ jdbcOptions: JDBCOptions): (String, DataType) = {
val dialect = JdbcDialects.get(jdbcOptions.url)
- schema.map(_.name).find { fieldName =>
- resolver(fieldName, columnName) ||
- resolver(dialect.quoteIdentifier(fieldName), columnName)
- }.map(dialect.quoteIdentifier).getOrElse {
+ val column = schema.find { f =>
+ resolver(f.name, columnName) ||
resolver(dialect.quoteIdentifier(f.name), columnName)
+ }.getOrElse {
throw new AnalysisException(s"User-defined partition column $columnName
not " +
s"found in the JDBC relation:
${schema.simpleString(Utils.maxNumToStringFields)}")
}
+ column.dataType match {
+ case _: NumericType | DateType | TimestampType =>
+ case _ =>
+ throw new AnalysisException(
+ s"Partition column type should be ${NumericType.simpleString}, " +
+ s"${DateType.catalogString}, or ${TimestampType.catalogString},
but " +
+ s"${column.dataType.catalogString} found.")
+ }
+ (dialect.quoteIdentifier(column.name), column.dataType)
+ }
+
+ private def toInternalBoundValue(value: String, columnType: DataType): Long
= columnType match {
+ case _: NumericType => value.toLong
+ case DateType => DateTimeUtils.fromJavaDate(Date.valueOf(value)).toLong
+ case TimestampType =>
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(value))
Review comment:
@maropu The `Timestamp.valueOf` method expects timestamp in the format
`yyyy-[m]m-[d]d hh:mm:ss[.f...]`. Was it selected intentionally or just because
it is the default pattern for `Timestamp`?
For example, it cannot parse time zones as `Cast` can:
```
Timestamp.valueOf("1973-02-27 02:30:00.102030+01:00")
```
```
Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff]
java.lang.IllegalArgumentException: Timestamp format must be yyyy-mm-dd
hh:mm:ss[.fffffffff]
at java.sql.Timestamp.valueOf(Timestamp.java:251)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]