Github user MaxGekk commented on a diff in the pull request: https://github.com/apache/spark/pull/20894#discussion_r188961210 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala --- @@ -206,24 +280,33 @@ object MultiLineCSVDataSource extends CSVDataSource { conf: Configuration, file: PartitionedFile, parser: UnivocityParser, - schema: StructType): Iterator[InternalRow] = { + requiredSchema: StructType, + dataSchema: StructType, + caseSensitive: Boolean): Iterator[InternalRow] = { + def checkHeader(header: Array[String]): Unit = { + CSVDataSource.checkHeaderColumnNames(dataSchema, header, file.filePath, + checkHeaderFlag = !parser.options.enforceSchema, caseSensitive) + } + UnivocityParser.parseStream( CodecStreams.createInputStreamWithCloseResource(conf, new Path(new URI(file.filePath))), - parser.options.headerFlag, - parser, - schema) + parser.options.headerFlag, parser, requiredSchema, checkHeader) } override def infer( sparkSession: SparkSession, inputPaths: Seq[FileStatus], parsedOptions: CSVOptions): StructType = { val csv = createBaseRdd(sparkSession, inputPaths, parsedOptions) + // The header is not checked because there is no schema against with it could be check + def checkHeader(header: Array[String]): Unit = () --- End diff -- removed
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org