[GitHub] spark pull request #12601: [SPARK-14525][SQL] Make DataFrameWrite.save work ...

JustinPihony Wed, 07 Sep 2016 22:05:08 -0700

Github user JustinPihony commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12601#discussion_r77946299
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
 ---
    @@ -17,39 +17,113 @@
     
     package org.apache.spark.sql.execution.datasources.jdbc
     
    +import java.sql.SQLException
     import java.util.Properties
     
    -import org.apache.spark.sql.SQLContext
    -import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, 
RelationProvider}
    +import org.apache.spark.sql.{DataFrame, SaveMode, SQLContext}
    +import org.apache.spark.sql.sources.{BaseRelation, 
CreatableRelationProvider, DataSourceRegister, RelationProvider, 
SchemaRelationProvider}
    +import org.apache.spark.sql.types.StructType
     
    -class JdbcRelationProvider extends RelationProvider with 
DataSourceRegister {
    +class JdbcRelationProvider extends CreatableRelationProvider
    +  with SchemaRelationProvider with RelationProvider with 
DataSourceRegister {
     
       override def shortName(): String = "jdbc"
     
    -  /** Returns a new base relation with the given parameters. */
       override def createRelation(
           sqlContext: SQLContext,
           parameters: Map[String, String]): BaseRelation = {
    +    createRelation(sqlContext, parameters, null)
    +  }
    +
    +  /** Returns a new base relation with the given parameters. */
    +  override def createRelation(
    +      sqlContext: SQLContext,
    +      parameters: Map[String, String],
    +      schema: StructType): BaseRelation = {
         val jdbcOptions = new JDBCOptions(parameters)
    -    if (jdbcOptions.partitionColumn != null
    -      && (jdbcOptions.lowerBound == null
    -        || jdbcOptions.upperBound == null
    -        || jdbcOptions.numPartitions == null)) {
    -      sys.error("Partitioning incompletely specified")
    -    }
    +    val partitionColumn = jdbcOptions.partitionColumn
    +    val lowerBound = jdbcOptions.lowerBound
    +    val upperBound = jdbcOptions.upperBound
    +    val numPartitions = jdbcOptions.numPartitions
     
    -    val partitionInfo = if (jdbcOptions.partitionColumn == null) {
    -      null
    -    } else {
    +    val partitionInfo = if (partitionColumn == null) null
    +    else {
           JDBCPartitioningInfo(
    -        jdbcOptions.partitionColumn,
    -        jdbcOptions.lowerBound.toLong,
    -        jdbcOptions.upperBound.toLong,
    -        jdbcOptions.numPartitions.toInt)
    +        partitionColumn, lowerBound.toLong, upperBound.toLong, 
numPartitions.toInt)
         }
         val parts = JDBCRelation.columnPartition(partitionInfo)
         val properties = new Properties() // Additional properties that we 
will pass to getConnection
         parameters.foreach(kv => properties.setProperty(kv._1, kv._2))
    -    JDBCRelation(jdbcOptions.url, jdbcOptions.table, parts, 
properties)(sqlContext.sparkSession)
    +    JDBCRelation(jdbcOptions.url, jdbcOptions.table, parts, properties,
    +      Option(schema))(sqlContext.sparkSession)
    +  }
    +
    +  /*
    +   * The following structure applies to this code:
    +   *                 |    tableExists            |          !tableExists
    +   
*------------------------------------------------------------------------------------
    +   * Ignore          | BaseRelation              | CreateTable, saveTable, 
BaseRelation
    +   * ErrorIfExists   | ERROR                     | CreateTable, saveTable, 
BaseRelation
    +   * Overwrite*      | (DropTable, CreateTable,) | CreateTable, saveTable, 
BaseRelation
    +   *                 | saveTable, BaseRelation   |
    +   * Append          | saveTable, BaseRelation   | CreateTable, saveTable, 
BaseRelation
    +   *
    +   * *Overwrite & tableExists with truncate, will not drop & create, but 
instead truncate
    +   */
    +  override def createRelation(
    +      sqlContext: SQLContext,
    +      mode: SaveMode,
    +      parameters: Map[String, String],
    +      data: DataFrame): BaseRelation = {
    +    val jdbcOptions = new JDBCOptions(parameters)
    +    val url = jdbcOptions.url
    +    val table = jdbcOptions.table
    +
    +    import collection.JavaConverters._
    +    val props = new Properties()
    +    props.putAll(parameters.asJava)
    +    val conn = JdbcUtils.createConnectionFactory(url, props)()
    +
    +    try {
    +      val tableExists = JdbcUtils.tableExists(conn, url, table)
    +
    +      val (doCreate, doSave) = (mode, tableExists) match {
    --- End diff --
    
    Your way results in the need for a `return`, which can lead to problems and 
is [generally discouraged](https://tpolecat.github.io/2014/05/09/return.html). 
In the current implementation you could just have it do nothing and the next if 
block will be skipped anyway, but that leaves a lot of room for error in 
further code changes. Whereas this way is very explicit about the rules and 
what each combination will yield.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #12601: [SPARK-14525][SQL] Make DataFrameWrite.save work ...

Reply via email to