Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19224#discussion_r161811468
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
 ---
    @@ -361,3 +361,78 @@ class JacksonParser(
         }
       }
     }
    +
    +object JacksonParser {
    +  private[spark] def splitDocuments(input: InputStream) = new 
Iterator[String] {
    +
    +    private implicit class JsonCharacter(char: Char) {
    +      def isJsonObjectFinished(endToken: Option[Char]): Boolean = {
    +        endToken match {
    +          case None => char == '}' || char == ']'
    +          case Some(x) => char == x
    +        }
    +      }
    +    }
    +    private var currentChar: Char = input.read().toChar
    +    private var previousToken: Option[Char] = None
    +    private var nextRecord = readNext
    +
    +    override def hasNext: Boolean = nextRecord.isDefined
    +
    +    override def next(): String = {
    +      if (!hasNext) {
    +        throw new NoSuchElementException("End of stream")
    +      }
    +      val curRecord = nextRecord.get
    +      nextRecord = readNext
    +      curRecord
    +    }
    +
    +    private def moveToNextChar() = {
    +      if (!currentChar.isWhitespace) {
    +        previousToken = Some(currentChar)
    +      }
    +      currentChar = input.read().toChar
    +    }
    +
    +    private def readJsonObject: Option[String] = {
    +      val endToken = currentChar match {
    +        case '{' => Some('}')
    +        case '[' => Some(']')
    +        case _ => None
    +      }
    +
    +      val sb = new StringBuilder()
    +      sb.append(currentChar)
    +      while (!currentChar.isJsonObjectFinished(endToken) && 
input.available() > 0) {
    +        moveToNextChar()
    +        currentChar match {
    +          case '{' | '[' =>
    --- End diff --
    
    the main issue is that I should subclass the Jackson parsers to make it 
working with the current approach, since when a JSON is not valid Jackson 
doesn't allow to get the string which caused the problem. And we are using 
many. Thus it's gonna be a very big effort. If you have better ideas, please 
feel free to submit a PR. Thanks.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to