Github user srowen commented on a diff in the pull request:

    https://github.com/apache/spark/pull/23052#discussion_r237210777
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
 ---
    @@ -169,13 +169,18 @@ private[csv] class CsvOutputWriter(
         context: TaskAttemptContext,
         params: CSVOptions) extends OutputWriter with Logging {
     
    -  private val charset = Charset.forName(params.charset)
    +  private var univocityGenerator: Option[UnivocityGenerator] = None
     
    -  private val writer = CodecStreams.createOutputStreamWriter(context, new 
Path(path), charset)
    -
    -  private val gen = new UnivocityGenerator(dataSchema, writer, params)
    +  override def write(row: InternalRow): Unit = {
    +    val gen = univocityGenerator.getOrElse {
    +      val charset = Charset.forName(params.charset)
    +      val os = CodecStreams.createOutputStreamWriter(context, new 
Path(path), charset)
    +      new UnivocityGenerator(dataSchema, os, params)
    +    }
    +    univocityGenerator = Some(gen)
    --- End diff --
    
    Doesn't this need to be in the getOrElse block? although it doesn't matter, 
it's setting this to itself every time, and maybe that's a little bit of 
overhead to avoid.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to