Github user MaxGekk commented on a diff in the pull request:
https://github.com/apache/spark/pull/20937#discussion_r178434446
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
---
@@ -175,33 +185,43 @@ object MultiLineJsonDataSource extends JsonDataSource
{
.values
}
- private def createParser(jsonFactory: JsonFactory, record:
PortableDataStream): JsonParser = {
+ private def createParser(
+ jsonFactory: JsonFactory,
+ record: PortableDataStream,
+ charset: Option[String] = None): JsonParser = {
val path = new Path(record.getPath())
CreateJacksonParser.inputStream(
jsonFactory,
-
CodecStreams.createInputStreamWithCloseResource(record.getConfiguration, path))
+
CodecStreams.createInputStreamWithCloseResource(record.getConfiguration, path),
+ charset
+ )
}
override def readFile(
conf: Configuration,
file: PartitionedFile,
parser: JacksonParser,
schema: StructType): Iterator[InternalRow] = {
+ def createInputStream() = {
--- End diff --
I don't know but for me it is easier to read if I see the same name instead
of comparing 2 expressions and recognizing them as the same. I believe the
below is more complicated:
```
Utils.tryWithResource(CodecStreams.createInputStreamWithCloseResource(conf,
new Path(new URI(file.filePath)))) { is =>
UTF8String.fromBytes(ByteStreams.toByteArray(is))
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]