This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 600013ade9 GH-36623: [Go] NullType support for csv (#36624)
600013ade9 is described below

commit 600013ade91f95cdeab9c6429371e8977d620ffd
Author: George Godik <[email protected]>
AuthorDate: Wed Jul 12 11:35:13 2023 -0400

    GH-36623: [Go] NullType support for csv (#36624)
    
    ### Rationale for this change
    
    ### What changes are included in this PR?
    
    * adding `NullType` to `validate`
    * NullType handling in `Writer.transformColToStringArr` to write an 
`nullValue`
    
    ### Are these changes tested?
    
    Unit tests included in change
    
    ### Are there any user-facing changes?
    
    No
    * Closes: #36623
    
    Authored-by: ggodik <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/arrow/csv/common.go      |  1 +
 go/arrow/csv/transformer.go |  4 ++++
 go/arrow/csv/writer_test.go | 22 ++++++++++++----------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 48224d7ad2..ccdf964eae 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -225,6 +225,7 @@ func validate(schema *arrow.Schema) {
                case *arrow.ListType, *arrow.LargeListType, 
*arrow.FixedSizeListType:
                case *arrow.BinaryType, *arrow.LargeBinaryType, 
*arrow.FixedSizeBinaryType:
                case arrow.ExtensionType:
+               case *arrow.NullType:
                default:
                        panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid 
data type %T", i, f.Name, ft))
                }
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 886282d49f..3eeb44cfe6 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -308,6 +308,10 @@ func (w *Writer) transformColToStringArr(typ 
arrow.DataType, col arrow.Array) []
                                res[i] = arr.ValueStr(i)
                        }
                }
+       case *arrow.NullType:
+               for i := 0; i < col.Len(); i++ {
+                       res[i] = w.nullValue
+               }
        default:
                panic(fmt.Errorf("arrow/csv: field has unsupported data type 
%s", typ.String()))
        }
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index cfce4dd0a6..7216eb8cbd 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -134,18 +134,18 @@ func Example_writer() {
 
 var (
        fullData = [][]string{
-               {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", 
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128", 
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary", 
"large_binary", "fixed_size_binary", "uuid"},
-               {"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", 
"0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26", 
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC", 
"00000000-0000-0000-0000-000000000001"},
-               {"false", "0", "0", "0", "0", "1", "1", "1", "1", 
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05", 
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF", 
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002"},
-               {"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.19995117", 
"0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05", "2025-08-04", 
"2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}", "{7,8,9}", "", "", 
"AAAA", "00000000-0000-0000-0000-000000000003"},
-               {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal},
+               {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", 
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128", 
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary", 
"large_binary", "fixed_size_binary", "uuid", "null"},
+               {"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", 
"0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26", 
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC", 
"00000000-0000-0000-0000-000000000001", nullVal},
+               {"false", "0", "0", "0", "0", "1", "1", "1", "1", 
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05", 
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF", 
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002", nullVal},
+               {"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.19995117", 
"0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05", "2025-08-04", 
"2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}", "{7,8,9}", "", "", 
"AAAA", "00000000-0000-0000-0000-000000000003", nullVal},
+               {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal},
        }
        bananaData = [][]string{
-               {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", 
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128", 
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary", 
"large_binary", "fixed_size_binary", "uuid"},
-               {"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", 
"0", "0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26", 
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC", 
"00000000-0000-0000-0000-000000000001"},
-               {"MANGO", "0", "0", "0", "0", "1", "1", "1", "1", 
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05", 
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF", 
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002"},
-               {"BANANA", "1", "1", "1", "1", "2", "2", "2", "2", 
"0.19995117", "0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05", 
"2025-08-04", "2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}", 
"{7,8,9}", "", "", "AAAA", "00000000-0000-0000-0000-000000000003"},
-               {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal},
+               {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", 
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128", 
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary", 
"large_binary", "fixed_size_binary", "uuid", "null"},
+               {"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", 
"0", "0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26", 
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC", 
"00000000-0000-0000-0000-000000000001", nullVal},
+               {"MANGO", "0", "0", "0", "0", "1", "1", "1", "1", 
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05", 
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF", 
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002", nullVal},
+               {"BANANA", "1", "1", "1", "1", "2", "2", "2", "2", 
"0.19995117", "0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05", 
"2025-08-04", "2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}", 
"{7,8,9}", "", "", "AAAA", "00000000-0000-0000-0000-000000000003", nullVal},
+               {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, 
nullVal, nullVal, nullVal, nullVal},
        }
 )
 
@@ -230,6 +230,7 @@ func testCSVWriter(t *testing.T, data [][]string, 
writeHeader bool, fmtr func(bo
                        {Name: "large_binary", Type: 
arrow.BinaryTypes.LargeBinary},
                        {Name: "fixed_size_binary", Type: 
&arrow.FixedSizeBinaryType{ByteWidth: 3}},
                        {Name: "uuid", Type: types.NewUUIDType()},
+                       {Name: "null", Type: arrow.Null},
                },
                nil,
        )
@@ -284,6 +285,7 @@ func testCSVWriter(t *testing.T, data [][]string, 
writeHeader bool, fmtr func(bo
        b.Field(23).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 
4, 5}, {}}, nil)
        b.Field(24).(*array.FixedSizeBinaryBuilder).AppendValues([][]byte{{0, 
1, 2}, {3, 4, 5}, {}}, nil)
        
b.Field(25).(*types.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"),
 uuid.MustParse("00000000-0000-0000-0000-000000000002"), 
uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
+       b.Field(26).(*array.NullBuilder).AppendEmptyValues(3)
 
        for _, field := range b.Fields() {
                field.AppendNull()

Reply via email to