imback82 commented on a change in pull request #29437:
URL: https://github.com/apache/spark/pull/29437#discussion_r471830558
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
##########
@@ -120,7 +120,7 @@ object TextInputJsonDataSource extends JsonDataSource {
sparkSession,
paths = inputPaths.map(_.getPath.toString),
className = classOf[TextFileFormat].getName,
- options = parsedOptions.parameters
+ options = parsedOptions.parameters.originalMap
Review comment:
ditto
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
##########
@@ -155,7 +155,7 @@ object TextInputCSVDataSource extends CSVDataSource {
sparkSession,
paths = paths,
className = classOf[TextFileFormat].getName,
- options = options.parameters
+ options = options.parameters.originalMap
Review comment:
this is not related to this PR, but I think this should be case
sensitive for hadoop configs, so I am fixing it here.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
##########
@@ -34,13 +35,21 @@ import org.apache.spark.sql.util.SchemaUtils
abstract class FileTable(
sparkSession: SparkSession,
- options: CaseInsensitiveStringMap,
+ originalOptions: CaseInsensitiveStringMap,
paths: Seq[String],
userSpecifiedSchema: Option[StructType])
extends Table with SupportsRead with SupportsWrite {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+ // Options without path-related options from `originalOptions`.
+ protected final lazy val options: CaseInsensitiveStringMap = {
+ val caseInsensitiveMap =
CaseInsensitiveMap(originalOptions.asCaseSensitiveMap.asScala.toMap)
+ val caseInsensitiveMapWithoutPaths = caseInsensitiveMap - "paths" - "path"
+ new CaseInsensitiveStringMap(
+
caseInsensitiveMapWithoutPaths.asInstanceOf[CaseInsensitiveMap[String]].originalMap.asJava)
+ }
Review comment:
@cloud-fan Should I move `FileDataSourceV2.getPaths` to this class so
that the operations with path-related options are in one place? (Wanted to make
sure before making a bigger change).
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
##########
@@ -34,13 +35,21 @@ import org.apache.spark.sql.util.SchemaUtils
abstract class FileTable(
sparkSession: SparkSession,
- options: CaseInsensitiveStringMap,
+ originalOptions: CaseInsensitiveStringMap,
paths: Seq[String],
userSpecifiedSchema: Option[StructType])
extends Table with SupportsRead with SupportsWrite {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+ // Options without path-related options from `originalOptions`.
+ protected final lazy val options: CaseInsensitiveStringMap = {
Review comment:
Note that this can still masked:
```scala
class DummyFileTable(
sparkSession: SparkSession,
options: CaseInsensitiveStringMap, // could be unintentional
paths: Seq[String],
expectedDataSchema: StructType,
userSpecifiedSchema: Option[StructType])
extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]