Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20849#discussion_r175283216 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala --- @@ -2063,4 +2063,178 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { ) } } + + def testFile(fileName: String): String = { + Thread.currentThread().getContextClassLoader.getResource(fileName).toString + } + + test("json in UTF-16 with BOM") { + val fileName = "json-tests/utf16WithBOM.json" + val schema = new StructType().add("firstName", StringType).add("lastName", StringType) + val jsonDF = spark.read.schema(schema) + // The mode filters null rows produced because new line delimiter + // for UTF-8 is used by default. --- End diff -- @MaxGekk, see what happens in the test code here now. Lines are separated by a newline with UTF-8 and then the records are parsed by a different encoding.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org