Github user MaxGekk commented on a diff in the pull request: https://github.com/apache/spark/pull/20894#discussion_r188615023 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala --- @@ -202,28 +263,33 @@ object TextInputCSVDataSource extends CSVDataSource { object MultiLineCSVDataSource extends CSVDataSource { override val isSplitable: Boolean = false - override def readFile( - conf: Configuration, - file: PartitionedFile, - parser: UnivocityParser, - schema: StructType): Iterator[InternalRow] = { + override def readFile(conf: Configuration, file: PartitionedFile, parser: UnivocityParser, + schema: StructType, dataSchema: StructType, + caseSensitive: Boolean): Iterator[InternalRow] = { + def checkHeader(header: Array[String]): Unit = { + CSVDataSource.checkHeaderColumnNames(dataSchema, header, file.filePath, + checkHeaderFlag = !parser.options.enforceSchema, caseSensitive) + } + UnivocityParser.parseStream( CodecStreams.createInputStreamWithCloseResource(conf, new Path(new URI(file.filePath))), - parser.options.headerFlag, - parser, - schema) + parser.options.headerFlag, parser, schema, checkHeader) } override def infer( sparkSession: SparkSession, inputPaths: Seq[FileStatus], parsedOptions: CSVOptions): StructType = { val csv = createBaseRdd(sparkSession, inputPaths, parsedOptions) + // The header is not checked because there is no schema against with it could be check + def checkHeader(header: Array[String]): Unit = () + csv.flatMap { lines => val path = new Path(lines.getPath()) UnivocityParser.tokenizeStream( CodecStreams.createInputStreamWithCloseResource(lines.getConfiguration, path), - shouldDropHeader = false, + dropFirstRecord = false, + checkHeader, --- End diff -- It is defined here because `header` is an entity of CSV level. Stream tokenizer (in UnivocityParser) works on lower level.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org