This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 600013ade9 GH-36623: [Go] NullType support for csv (#36624)
600013ade9 is described below
commit 600013ade91f95cdeab9c6429371e8977d620ffd
Author: George Godik <[email protected]>
AuthorDate: Wed Jul 12 11:35:13 2023 -0400
GH-36623: [Go] NullType support for csv (#36624)
### Rationale for this change
### What changes are included in this PR?
* adding `NullType` to `validate`
* NullType handling in `Writer.transformColToStringArr` to write an
`nullValue`
### Are these changes tested?
Unit tests included in change
### Are there any user-facing changes?
No
* Closes: #36623
Authored-by: ggodik <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
---
go/arrow/csv/common.go | 1 +
go/arrow/csv/transformer.go | 4 ++++
go/arrow/csv/writer_test.go | 22 ++++++++++++----------
3 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 48224d7ad2..ccdf964eae 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -225,6 +225,7 @@ func validate(schema *arrow.Schema) {
case *arrow.ListType, *arrow.LargeListType,
*arrow.FixedSizeListType:
case *arrow.BinaryType, *arrow.LargeBinaryType,
*arrow.FixedSizeBinaryType:
case arrow.ExtensionType:
+ case *arrow.NullType:
default:
panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid
data type %T", i, f.Name, ft))
}
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 886282d49f..3eeb44cfe6 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -308,6 +308,10 @@ func (w *Writer) transformColToStringArr(typ
arrow.DataType, col arrow.Array) []
res[i] = arr.ValueStr(i)
}
}
+ case *arrow.NullType:
+ for i := 0; i < col.Len(); i++ {
+ res[i] = w.nullValue
+ }
default:
panic(fmt.Errorf("arrow/csv: field has unsupported data type
%s", typ.String()))
}
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index cfce4dd0a6..7216eb8cbd 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -134,18 +134,18 @@ func Example_writer() {
var (
fullData = [][]string{
- {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64",
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128",
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary",
"large_binary", "fixed_size_binary", "uuid"},
- {"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0",
"0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26",
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC",
"00000000-0000-0000-0000-000000000001"},
- {"false", "0", "0", "0", "0", "1", "1", "1", "1",
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05",
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF",
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002"},
- {"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.19995117",
"0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05", "2025-08-04",
"2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}", "{7,8,9}", "", "",
"AAAA", "00000000-0000-0000-0000-000000000003"},
- {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal},
+ {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64",
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128",
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary",
"large_binary", "fixed_size_binary", "uuid", "null"},
+ {"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0",
"0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26",
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC",
"00000000-0000-0000-0000-000000000001", nullVal},
+ {"false", "0", "0", "0", "0", "1", "1", "1", "1",
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05",
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF",
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002", nullVal},
+ {"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.19995117",
"0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05", "2025-08-04",
"2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}", "{7,8,9}", "", "",
"AAAA", "00000000-0000-0000-0000-000000000003", nullVal},
+ {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal},
}
bananaData = [][]string{
- {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64",
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128",
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary",
"large_binary", "fixed_size_binary", "uuid"},
- {"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0",
"0", "0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26",
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC",
"00000000-0000-0000-0000-000000000001"},
- {"MANGO", "0", "0", "0", "0", "1", "1", "1", "1",
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05",
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF",
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002"},
- {"BANANA", "1", "1", "1", "1", "2", "2", "2", "2",
"0.19995117", "0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05",
"2025-08-04", "2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}",
"{7,8,9}", "", "", "AAAA", "00000000-0000-0000-0000-000000000003"},
- {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal},
+ {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64",
"f16", "f32", "f64", "str", "large_str", "ts_s", "d32", "d64", "dec128",
"dec256", "list(i64)", "large_list(i64)", "fixed_size_list(i64)", "binary",
"large_binary", "fixed_size_binary", "uuid", "null"},
+ {"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0",
"0", "0", "str-0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26",
"-123.45", "-123.45", "{1,2,3}", "{1,2,3}", "{1,2,3}", "AAEC", "AAEC", "AAEC",
"00000000-0000-0000-0000-000000000001", nullVal},
+ {"MANGO", "0", "0", "0", "0", "1", "1", "1", "1",
"0.099975586", "0.1", "0.1", "str-1", "str-1", "2016-09-08 15:04:05",
"2022-11-08", "2031-06-28", "0", "0", "{4,5,6}", "{4,5,6}", "{4,5,6}", "AwQF",
"AwQF", "AwQF", "00000000-0000-0000-0000-000000000002", nullVal},
+ {"BANANA", "1", "1", "1", "1", "2", "2", "2", "2",
"0.19995117", "0.2", "0.2", "str-2", "str-2", "2021-09-18 15:04:05",
"2025-08-04", "2034-08-28", "123.45", "123.45", "{7,8,9}", "{7,8,9}",
"{7,8,9}", "", "", "AAAA", "00000000-0000-0000-0000-000000000003", nullVal},
+ {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal,
nullVal, nullVal, nullVal, nullVal},
}
)
@@ -230,6 +230,7 @@ func testCSVWriter(t *testing.T, data [][]string,
writeHeader bool, fmtr func(bo
{Name: "large_binary", Type:
arrow.BinaryTypes.LargeBinary},
{Name: "fixed_size_binary", Type:
&arrow.FixedSizeBinaryType{ByteWidth: 3}},
{Name: "uuid", Type: types.NewUUIDType()},
+ {Name: "null", Type: arrow.Null},
},
nil,
)
@@ -284,6 +285,7 @@ func testCSVWriter(t *testing.T, data [][]string,
writeHeader bool, fmtr func(bo
b.Field(23).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3,
4, 5}, {}}, nil)
b.Field(24).(*array.FixedSizeBinaryBuilder).AppendValues([][]byte{{0,
1, 2}, {3, 4, 5}, {}}, nil)
b.Field(25).(*types.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"),
uuid.MustParse("00000000-0000-0000-0000-000000000002"),
uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
+ b.Field(26).(*array.NullBuilder).AppendEmptyValues(3)
for _, field := range b.Fields() {
field.AppendNull()