[GitHub] spark pull request: [SPARK-7961][SQL]Refactor SQLConf to display b...

liancheng Fri, 12 Jun 2015 01:30:07 -0700

Github user liancheng commented on a diff in the pull request:

    https://github.com/apache/spark/pull/6747#discussion_r32298245
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala ---
    @@ -219,82 +455,116 @@ private[sql] class SQLConf extends Serializable with 
CatalystConf {
        * in joins.
        */
       private[spark] def defaultSizeInBytes: Long =
    -    getConf(DEFAULT_SIZE_IN_BYTES, (autoBroadcastJoinThreshold + 
1).toString).toLong
    +    getConf(DEFAULT_SIZE_IN_BYTES, autoBroadcastJoinThreshold + 1L)
     
       /**
        * When set to true, we always treat byte arrays in Parquet files as 
strings.
        */
    -  private[spark] def isParquetBinaryAsString: Boolean =
    -    getConf(PARQUET_BINARY_AS_STRING, "false").toBoolean
    +  private[spark] def isParquetBinaryAsString: Boolean = 
getConf(PARQUET_BINARY_AS_STRING)
     
       /**
        * When set to true, we always treat INT96Values in Parquet files as 
timestamp.
        */
    -  private[spark] def isParquetINT96AsTimestamp: Boolean =
    -    getConf(PARQUET_INT96_AS_TIMESTAMP, "true").toBoolean
    +  private[spark] def isParquetINT96AsTimestamp: Boolean = 
getConf(PARQUET_INT96_AS_TIMESTAMP)
     
       /**
        * When set to true, partition pruning for in-memory columnar tables is 
enabled.
        */
    -  private[spark] def inMemoryPartitionPruning: Boolean =
    -    getConf(IN_MEMORY_PARTITION_PRUNING, "false").toBoolean
    +  private[spark] def inMemoryPartitionPruning: Boolean = 
getConf(IN_MEMORY_PARTITION_PRUNING)
     
    -  private[spark] def columnNameOfCorruptRecord: String =
    -    getConf(COLUMN_NAME_OF_CORRUPT_RECORD, "_corrupt_record")
    +  private[spark] def columnNameOfCorruptRecord: String = 
getConf(COLUMN_NAME_OF_CORRUPT_RECORD)
     
       /**
        * Timeout in seconds for the broadcast wait time in hash join
        */
    -  private[spark] def broadcastTimeout: Int =
    -    getConf(BROADCAST_TIMEOUT, (5 * 60).toString).toInt
    +  private[spark] def broadcastTimeout: Int = getConf(BROADCAST_TIMEOUT)
     
    -  private[spark] def defaultDataSourceName: String =
    -    getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
    +  private[spark] def defaultDataSourceName: String = 
getConf(DEFAULT_DATA_SOURCE_NAME)
     
    -  private[spark] def partitionDiscoveryEnabled() =
    -    getConf(SQLConf.PARTITION_DISCOVERY_ENABLED, "true").toBoolean
    +  private[spark] def partitionDiscoveryEnabled(): Boolean =
    +    getConf(SQLConf.PARTITION_DISCOVERY_ENABLED)
     
    -  private[spark] def partitionColumnTypeInferenceEnabled() =
    -    getConf(SQLConf.PARTITION_COLUMN_TYPE_INFERENCE, "true").toBoolean
    +  private[spark] def partitionColumnTypeInferenceEnabled(): Boolean =
    +    getConf(SQLConf.PARTITION_COLUMN_TYPE_INFERENCE)
     
       // Do not use a value larger than 4000 as the default value of this 
property.
       // See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more 
information.
    -  private[spark] def schemaStringLengthThreshold: Int =
    -    getConf(SCHEMA_STRING_LENGTH_THRESHOLD, "4000").toInt
    +  private[spark] def schemaStringLengthThreshold: Int = 
getConf(SCHEMA_STRING_LENGTH_THRESHOLD)
     
    -  private[spark] def dataFrameEagerAnalysis: Boolean =
    -    getConf(DATAFRAME_EAGER_ANALYSIS, "true").toBoolean
    +  private[spark] def dataFrameEagerAnalysis: Boolean = 
getConf(DATAFRAME_EAGER_ANALYSIS)
     
       private[spark] def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
    -    getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY, "true").toBoolean
    +    getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
     
    -  private[spark] def dataFrameRetainGroupColumns: Boolean =
    -    getConf(DATAFRAME_RETAIN_GROUP_COLUMNS, "true").toBoolean
    +  private[spark] def dataFrameRetainGroupColumns: Boolean = 
getConf(DATAFRAME_RETAIN_GROUP_COLUMNS)
     
       /** ********************** SQLConf functionality methods ************ */
     
       /** Set Spark SQL configuration properties. */
       def setConf(props: Properties): Unit = settings.synchronized {
    -    props.foreach { case (k, v) => settings.put(k, v) }
    +    props.foreach { case (k, v) => setConfString(k, v) }
       }
     
    -  /** Set the given Spark SQL configuration property. */
    -  def setConf(key: String, value: String): Unit = {
    +  /** Set the given Spark SQL configuration property using a `string` 
value. */
    +  def setConfString(key: String, value: String): Unit = {
         require(key != null, "key cannot be null")
         require(value != null, s"value cannot be null for key: $key")
    +    val entry = sqlConfEntries.get(key)
    +    if (entry != null) {
    +      // Only verify configs in the SQLConf object
    +      entry.valueConverter(value)
    +    }
         settings.put(key, value)
       }
     
    +  /** Set the given Spark SQL configuration property. */
    +  def setConf[T](entry: SQLConfEntry[T], value: T): Unit = {
    +    require(entry != null, "entry cannot be null")
    +    require(value != null, s"value cannot be null for key: ${entry.key}")
    +    require(sqlConfEntries.get(entry.key) == entry, s"$entry is not 
registered")
    +    settings.put(entry.key, entry.stringConverter(value))
    +  }
    +
       /** Return the value of Spark SQL configuration property for the given 
key. */
    -  def getConf(key: String): String = {
    -    Option(settings.get(key)).getOrElse(throw new 
NoSuchElementException(key))
    +  def getConfString(key: String): String = {
    +    Option(settings.get(key)).
    +      orElse {
    +      // Try to use the default value
    +      Option(sqlConfEntries.get(key)).map(_.defaultValueString)
    +      }.
    +      getOrElse(throw new NoSuchElementException(key))
    --- End diff --
    
    Indentations and newline are off here.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request: [SPARK-7961][SQL]Refactor SQLConf to display b...

Reply via email to