Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19224#discussion_r161858020
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
 ---
    @@ -361,3 +361,78 @@ class JacksonParser(
         }
       }
     }
    +
    +object JacksonParser {
    +  private[spark] def splitDocuments(input: InputStream) = new 
Iterator[String] {
    +
    +    private implicit class JsonCharacter(char: Char) {
    +      def isJsonObjectFinished(endToken: Option[Char]): Boolean = {
    +        endToken match {
    +          case None => char == '}' || char == ']'
    +          case Some(x) => char == x
    +        }
    +      }
    +    }
    +    private var currentChar: Char = input.read().toChar
    +    private var previousToken: Option[Char] = None
    +    private var nextRecord = readNext
    +
    +    override def hasNext: Boolean = nextRecord.isDefined
    +
    +    override def next(): String = {
    +      if (!hasNext) {
    +        throw new NoSuchElementException("End of stream")
    +      }
    +      val curRecord = nextRecord.get
    +      nextRecord = readNext
    +      curRecord
    +    }
    +
    +    private def moveToNextChar() = {
    +      if (!currentChar.isWhitespace) {
    +        previousToken = Some(currentChar)
    +      }
    +      currentChar = input.read().toChar
    +    }
    +
    +    private def readJsonObject: Option[String] = {
    +      val endToken = currentChar match {
    +        case '{' => Some('}')
    +        case '[' => Some(']')
    +        case _ => None
    +      }
    +
    +      val sb = new StringBuilder()
    +      sb.append(currentChar)
    +      while (!currentChar.isJsonObjectFinished(endToken) && 
input.available() > 0) {
    +        moveToNextChar()
    +        currentChar match {
    +          case '{' | '[' =>
    --- End diff --
    
    yes, but then then escapes should be taken in account, etc. etc. Then we 
would nearly have to rewrite the jackson library logic, which is something not 
desirable. This is the reason why I said that. Yes sure, I would be happy if we 
can find a solution together, but I think it is hard with this approach.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to