This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c170af41ba GH-39552: [Go] inclusion of option to use replacer when 
creating csv strings with go library (#39576)
c170af41ba is described below

commit c170af41ba0c30b80aa4172da0b3637206368cf2
Author: Jânio <[email protected]>
AuthorDate: Wed Jan 17 14:00:39 2024 -0300

    GH-39552: [Go] inclusion of option to use replacer when creating csv 
strings with go library (#39576)
    
    Rationale for this change
    Make it possible to remove unwanted characters from strings
    
    What changes are included in this PR?
    Add new function to optionally  setup a replacer in csv Writer  Write method
    
    Are these changes tested?
    Yes
    
    Are there any user-facing changes?
    Adds an optional methods.
    
    * Closes: #39552
    
    Lead-authored-by: Jânio <[email protected]>
    Co-authored-by: janiodev <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/arrow/csv/common.go      | 14 ++++++++++++++
 go/arrow/csv/transformer.go | 12 ++++++------
 go/arrow/csv/writer.go      | 24 +++++++++++++-----------
 go/arrow/csv/writer_test.go |  6 ++++--
 4 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 99dac29f4d..31ca61f323 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -21,6 +21,7 @@ package csv
 import (
        "errors"
        "fmt"
+       "strings"
 
        "github.com/apache/arrow/go/v15/arrow"
        "github.com/apache/arrow/go/v15/arrow/memory"
@@ -223,6 +224,19 @@ func WithIncludeColumns(cols []string) Option {
        }
 }
 
+// WithStringsReplacer receives a replacer to be applied in the string fields
+// of the CSV. This is useful to remove unwanted characters from the string.
+func WithStringsReplacer(replacer *strings.Replacer) Option {
+       return func(cfg config) {
+               switch cfg := cfg.(type) {
+               case *Writer:
+                       cfg.stringReplacer = replacer.Replace
+               default:
+                       panic(fmt.Errorf("arrow/csv: unknown config type %T", 
cfg))
+               }
+       }
+}
+
 func validate(schema *arrow.Schema) {
        for i, f := range schema.Fields() {
                switch ft := f.Type.(type) {
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 0f0181520b..78b16446d4 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -29,7 +29,7 @@ import (
        "github.com/apache/arrow/go/v15/arrow/array"
 )
 
-func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) 
[]string {
+func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, 
stringsReplacer func(string)string) []string {
        res := make([]string, col.Len())
        switch typ.(type) {
        case *arrow.BooleanType:
@@ -144,7 +144,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
                arr := col.(*array.String)
                for i := 0; i < arr.Len(); i++ {
                        if arr.IsValid(i) {
-                               res[i] = arr.Value(i)
+                               res[i] = stringsReplacer(arr.Value(i))
                        } else {
                                res[i] = w.nullValue
                        }
@@ -153,7 +153,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
                arr := col.(*array.LargeString)
                for i := 0; i < arr.Len(); i++ {
                        if arr.IsValid(i) {
-                               res[i] = arr.Value(i)
+                               res[i] = stringsReplacer(arr.Value(i))
                        } else {
                                res[i] = w.nullValue
                        }
@@ -224,7 +224,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
                                var b bytes.Buffer
                                b.Write([]byte{'{'})
                                writer := csv.NewWriter(&b)
-                               
writer.Write(w.transformColToStringArr(list.DataType(), list))
+                               
writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
                                writer.Flush()
                                b.Truncate(b.Len() - 1)
                                b.Write([]byte{'}'})
@@ -243,7 +243,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
                                var b bytes.Buffer
                                b.Write([]byte{'{'})
                                writer := csv.NewWriter(&b)
-                               
writer.Write(w.transformColToStringArr(list.DataType(), list))
+                               
writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
                                writer.Flush()
                                b.Truncate(b.Len() - 1)
                                b.Write([]byte{'}'})
@@ -262,7 +262,7 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
                                var b bytes.Buffer
                                b.Write([]byte{'{'})
                                writer := csv.NewWriter(&b)
-                               
writer.Write(w.transformColToStringArr(list.DataType(), list))
+                               
writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
                                writer.Flush()
                                b.Truncate(b.Len() - 1)
                                b.Write([]byte{'}'})
diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go
index a672008b58..b939b72984 100644
--- a/go/arrow/csv/writer.go
+++ b/go/arrow/csv/writer.go
@@ -27,12 +27,13 @@ import (
 
 // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema.
 type Writer struct {
-       boolFormatter func(bool) string
-       header        bool
-       nullValue     string
-       once          sync.Once
-       schema        *arrow.Schema
-       w             *csv.Writer
+       boolFormatter  func(bool) string
+       header         bool
+       nullValue      string
+       stringReplacer func(string) string
+       once           sync.Once
+       schema         *arrow.Schema
+       w              *csv.Writer
 }
 
 // NewWriter returns a writer that writes arrow.Records to the CSV file
@@ -45,10 +46,11 @@ func NewWriter(w io.Writer, schema *arrow.Schema, opts 
...Option) *Writer {
        validate(schema)
 
        ww := &Writer{
-               boolFormatter: strconv.FormatBool, // override by passing 
WithBoolWriter() as an option
-               nullValue:     "NULL",             // override by passing 
WithNullWriter() as an option
-               schema:        schema,
-               w:             csv.NewWriter(w),
+               boolFormatter:  strconv.FormatBool,                 // override 
by passing WithBoolWriter() as an option
+               nullValue:      "NULL",                             // override 
by passing WithNullWriter() as an option
+               stringReplacer: func(x string) string { return x }, // override 
by passing WithStringsReplacer() as an option
+               schema:         schema,
+               w:              csv.NewWriter(w),
        }
        for _, opt := range opts {
                opt(ww)
@@ -81,7 +83,7 @@ func (w *Writer) Write(record arrow.Record) error {
        }
 
        for j, col := range record.Columns() {
-               rows := w.transformColToStringArr(w.schema.Field(j).Type, col)
+               rows := w.transformColToStringArr(w.schema.Field(j).Type, col, 
w.stringReplacer)
                for i, row := range rows {
                        recs[i][j] = row
                }
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index 644cae0933..b1bd3251c5 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -23,6 +23,7 @@ import (
        "fmt"
        "io"
        "log"
+       "strings"
        "testing"
 
        "github.com/apache/arrow/go/v15/arrow"
@@ -250,8 +251,8 @@ func testCSVWriter(t *testing.T, data [][]string, 
writeHeader bool, fmtr func(bo
        
b.Field(9).(*array.Float16Builder).AppendValues([]float16.Num{float16.New(0.0), 
float16.New(0.1), float16.New(0.2)}, nil)
        b.Field(10).(*array.Float32Builder).AppendValues([]float32{0.0, 0.1, 
0.2}, nil)
        b.Field(11).(*array.Float64Builder).AppendValues([]float64{0.0, 0.1, 
0.2}, nil)
-       b.Field(12).(*array.StringBuilder).AppendValues([]string{"str-0", 
"str-1", "str-2"}, nil)
-       b.Field(13).(*array.LargeStringBuilder).AppendValues([]string{"str-0", 
"str-1", "str-2"}, nil)
+       b.Field(12).(*array.StringBuilder).AppendValues([]string{"str_0", 
"str-1", "str-2"}, nil)
+       b.Field(13).(*array.LargeStringBuilder).AppendValues([]string{"str_0", 
"str-1", "str-2"}, nil)
        
b.Field(14).(*array.TimestampBuilder).AppendValues(genTimestamps(arrow.Second), 
nil)
        b.Field(15).(*array.Date32Builder).AppendValues([]arrow.Date32{17304, 
19304, 20304}, nil)
        
b.Field(16).(*array.Date64Builder).AppendValues([]arrow.Date64{1840400000000, 
1940400000000, 2040400000000}, nil)
@@ -300,6 +301,7 @@ func testCSVWriter(t *testing.T, data [][]string, 
writeHeader bool, fmtr func(bo
                csv.WithHeader(writeHeader),
                csv.WithNullWriter(nullVal),
                csv.WithBoolWriter(fmtr),
+               csv.WithStringsReplacer(strings.NewReplacer("_", "-")),
        )
        err := w.Write(rec)
        if err != nil {

Reply via email to