Github user attilapiros commented on a diff in the pull request: https://github.com/apache/spark/pull/19224#discussion_r161807543 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala --- @@ -361,3 +361,78 @@ class JacksonParser( } } } + +object JacksonParser { + private[spark] def splitDocuments(input: InputStream) = new Iterator[String] { + + private implicit class JsonCharacter(char: Char) { + def isJsonObjectFinished(endToken: Option[Char]): Boolean = { + endToken match { + case None => char == '}' || char == ']' + case Some(x) => char == x + } + } + } + private var currentChar: Char = input.read().toChar + private var previousToken: Option[Char] = None + private var nextRecord = readNext + + override def hasNext: Boolean = nextRecord.isDefined + + override def next(): String = { + if (!hasNext) { + throw new NoSuchElementException("End of stream") + } + val curRecord = nextRecord.get + nextRecord = readNext + curRecord + } + + private def moveToNextChar() = { + if (!currentChar.isWhitespace) { + previousToken = Some(currentChar) + } + currentChar = input.read().toChar + } + + private def readJsonObject: Option[String] = { + val endToken = currentChar match { + case '{' => Some('}') + case '[' => Some(']') + case _ => None + } + + val sb = new StringBuilder() + sb.append(currentChar) + while (!currentChar.isJsonObjectFinished(endToken) && input.available() > 0) { + moveToNextChar() + currentChar match { + case '{' | '[' => --- End diff -- It is quite sad as you probably had a lot of work in it. I hope you can reuse some parts in the next try.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org