Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/19224#discussion_r161800635
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
---
@@ -361,3 +361,78 @@ class JacksonParser(
}
}
}
+
+object JacksonParser {
+ private[spark] def splitDocuments(input: InputStream) = new
Iterator[String] {
+
+ private implicit class JsonCharacter(char: Char) {
+ def isJsonObjectFinished(endToken: Option[Char]): Boolean = {
+ endToken match {
+ case None => char == '}' || char == ']'
+ case Some(x) => char == x
+ }
+ }
+ }
+ private var currentChar: Char = input.read().toChar
+ private var previousToken: Option[Char] = None
+ private var nextRecord = readNext
+
+ override def hasNext: Boolean = nextRecord.isDefined
+
+ override def next(): String = {
+ if (!hasNext) {
+ throw new NoSuchElementException("End of stream")
+ }
+ val curRecord = nextRecord.get
+ nextRecord = readNext
+ curRecord
+ }
+
+ private def moveToNextChar() = {
+ if (!currentChar.isWhitespace) {
+ previousToken = Some(currentChar)
+ }
+ currentChar = input.read().toChar
+ }
+
+ private def readJsonObject: Option[String] = {
+ val endToken = currentChar match {
+ case '{' => Some('}')
+ case '[' => Some(']')
+ case _ => None
+ }
+
+ val sb = new StringBuilder()
+ sb.append(currentChar)
+ while (!currentChar.isJsonObjectFinished(endToken) &&
input.available() > 0) {
+ moveToNextChar()
+ currentChar match {
+ case '{' | '[' =>
--- End diff --
yes, you are right. Unfortunately it is very hard to support properly also
permissive mode. I am closing this PR. I'll open a new one if I'll manage to
find a better way.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]