[GitHub] spark pull request #17847: [SPARK-20590] Map default input data source forma...

HyukjinKwon Wed, 03 May 2017 17:59:46 -0700

Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/17847#discussion_r114683317
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
 ---
    @@ -483,35 +483,42 @@ case class DataSource(
     
     object DataSource {
     
    +  private val jdbc = classOf[JdbcRelationProvider].getCanonicalName
    +  private val json = classOf[JsonFileFormat].getCanonicalName
    +  private val parquet = classOf[ParquetFileFormat].getCanonicalName
    +  private val csv = classOf[CSVFileFormat].getCanonicalName
    +  private val libsvm = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
    +  private val orc = "org.apache.spark.sql.hive.orc.OrcFileFormat"
    +
       /** A map to maintain backward compatibility in case we move data 
sources around. */
    -  private val backwardCompatibilityMap: Map[String, String] = {
    -    val jdbc = classOf[JdbcRelationProvider].getCanonicalName
    -    val json = classOf[JsonFileFormat].getCanonicalName
    -    val parquet = classOf[ParquetFileFormat].getCanonicalName
    -    val csv = classOf[CSVFileFormat].getCanonicalName
    -    val libsvm = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
    -    val orc = "org.apache.spark.sql.hive.orc.OrcFileFormat"
    -
    -    Map(
    -      "org.apache.spark.sql.jdbc" -> jdbc,
    -      "org.apache.spark.sql.jdbc.DefaultSource" -> jdbc,
    -      "org.apache.spark.sql.execution.datasources.jdbc.DefaultSource" -> 
jdbc,
    -      "org.apache.spark.sql.execution.datasources.jdbc" -> jdbc,
    -      "org.apache.spark.sql.json" -> json,
    -      "org.apache.spark.sql.json.DefaultSource" -> json,
    -      "org.apache.spark.sql.execution.datasources.json" -> json,
    -      "org.apache.spark.sql.execution.datasources.json.DefaultSource" -> 
json,
    -      "org.apache.spark.sql.parquet" -> parquet,
    -      "org.apache.spark.sql.parquet.DefaultSource" -> parquet,
    -      "org.apache.spark.sql.execution.datasources.parquet" -> parquet,
    -      "org.apache.spark.sql.execution.datasources.parquet.DefaultSource" 
-> parquet,
    -      "org.apache.spark.sql.hive.orc.DefaultSource" -> orc,
    -      "org.apache.spark.sql.hive.orc" -> orc,
    -      "org.apache.spark.ml.source.libsvm.DefaultSource" -> libsvm,
    -      "org.apache.spark.ml.source.libsvm" -> libsvm,
    -      "com.databricks.spark.csv" -> csv
    -    )
    -  }
    +  private val backwardCompatibilityMap: Map[String, String] = Map(
    +    "org.apache.spark.sql.jdbc" -> jdbc,
    +    "org.apache.spark.sql.jdbc.DefaultSource" -> jdbc,
    +    "org.apache.spark.sql.execution.datasources.jdbc.DefaultSource" -> 
jdbc,
    +    "org.apache.spark.sql.execution.datasources.jdbc" -> jdbc,
    +    "org.apache.spark.sql.json" -> json,
    +    "org.apache.spark.sql.json.DefaultSource" -> json,
    +    "org.apache.spark.sql.execution.datasources.json" -> json,
    +    "org.apache.spark.sql.execution.datasources.json.DefaultSource" -> 
json,
    +    "org.apache.spark.sql.parquet" -> parquet,
    +    "org.apache.spark.sql.parquet.DefaultSource" -> parquet,
    +    "org.apache.spark.sql.execution.datasources.parquet" -> parquet,
    +    "org.apache.spark.sql.execution.datasources.parquet.DefaultSource" -> 
parquet,
    +    "org.apache.spark.sql.hive.orc.DefaultSource" -> orc,
    +    "org.apache.spark.sql.hive.orc" -> orc,
    +    "org.apache.spark.ml.source.libsvm.DefaultSource" -> libsvm,
    +    "org.apache.spark.ml.source.libsvm" -> libsvm,
    +    "com.databricks.spark.csv" -> csv
    +  )
    +
    +  private val builtinShortNamesMap: Map[String, String] = Map(
    --- End diff --
    
    Probably, it is nicer if we explain why this one is needed with a small 
comment about why the shorten names of internal datasources should be mapped to 
fully qualified names.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #17847: [SPARK-20590] Map default input data source forma...

Reply via email to