This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 15ee52111b GH-36696: [Go] Improve the MapOf and ListOf helpers (#36697)
15ee52111b is described below
commit 15ee52111b5ee6c1623e8d1826b850b54c6de6a5
Author: Mark Wolfe <[email protected]>
AuthorDate: Fri Jul 21 05:02:24 2023 +1000
GH-36696: [Go] Improve the MapOf and ListOf helpers (#36697)
### Rationale for this change
The aim is to improve the MapOf and ListOf helper functions without
breaking anything. I have added a `ListOfWithName` which matches the `MapOf`
function in that it takes a name, rather than deriving it from the elements
name, which should actually be `element`.
This just seems clearer to me as an interface, and makes construction a bit
more obvious.
### What changes are included in this PR?
* Removed references to panics I can't find
* Updated error messages for list and map to be clearer with validation
errors
* Added a ListOfWithName to provide a clearer matching method to MapOf
which takes a name
Closes #36696
### Are these changes tested?
Yes, I added a test for the new `ListOfWithName` function.
### Are there any user-facing changes?
* Closes: #36696
Authored-by: Mark Wolfe <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
---
go/parquet/schema/helpers.go | 62 ++++++++++++++++++++++++++-------------
go/parquet/schema/helpers_test.go | 19 ++++++++++++
2 files changed, 60 insertions(+), 21 deletions(-)
diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go
index 7cc89efca6..1198b0b926 100644
--- a/go/parquet/schema/helpers.go
+++ b/go/parquet/schema/helpers.go
@@ -24,43 +24,62 @@ import (
// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
-// <list-repetition> group <name> (LIST) {
-// repeated group list {
-// <element-repetition> <element-type> element;
-// }
-// }
+// <list-repetition> group <name> (LIST) {
+// repeated group list {
+// <element-repetition> <element-type> element;
+// }
+// }
//
-// <list-repetition> can only be optional or required. panics if repeated.
-// <element-repetition> can only be optional or required. panics if repeated.
+// <list-repetition> can only be optional or required.
+// <element-repetition> can only be optional or required.
func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error)
{
- if rep == parquet.Repetitions.Repeated || n.RepetitionType() ==
parquet.Repetitions.Repeated {
- return nil, xerrors.New("parquet: listof repetition and element
repetition must not be repeated.")
+ return ListOfWithName(n.Name(), n, rep, fieldID)
+}
+
+// ListOf is a convenience helper function to create a properly structured
+// list structure according to the Parquet Spec.
+//
+// <list-repetition> group <name> (LIST) {
+// repeated group list {
+// <element-repetition> <element-type> element;
+// }
+// }
+//
+// <list-repetition> can only be optional or required.
+// <element-repetition> can only be optional or required.
+func ListOfWithName(listName string, element Node, rep parquet.Repetition,
fieldID int32) (*GroupNode, error) {
+ if rep == parquet.Repetitions.Repeated {
+ return nil, xerrors.Errorf("parquet: listof repetition must not
be repeated, got :%s", rep)
}
- listName := n.Name()
- switch n := n.(type) {
+ if element.RepetitionType() == parquet.Repetitions.Repeated {
+ return nil, xerrors.Errorf("parquet: element repetition must
not be repeated, got: %s", element.RepetitionType())
+ }
+
+ switch n := element.(type) {
case *PrimitiveNode:
n.name = "element"
case *GroupNode:
n.name = "element"
}
- list, err := NewGroupNode("list" /* name */,
parquet.Repetitions.Repeated, FieldList{n}, -1 /* fieldID */)
+ list, err := NewGroupNode("list" /* name */,
parquet.Repetitions.Repeated, FieldList{element}, -1 /* fieldID */)
if err != nil {
return nil, err
}
+
return NewGroupNodeLogical(listName, rep, FieldList{list},
ListLogicalType{}, fieldID)
}
// MapOf is a convenience helper function to create a properly structured
// parquet map node setup according to the Parquet Spec.
//
-// <map-repetition> group <name> (MAP) {
-// repeated group key_value {
-// required <key-type> key;
-// <value-repetition> <value-type> value;
-// }
-// }
+// <map-repetition> group <name> (MAP) {
+// repeated group key_value {
+// required <key-type> key;
+// <value-repetition> <value-type> value;
+// }
+// }
//
// key node will be renamed to "key", value node if not nil will be renamed to
"value"
//
@@ -69,14 +88,15 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32)
(*GroupNode, error) {
// the key node *must* be required repetition. panics if optional or repeated
//
// value node can be nil (omitted) or have a repetition of required or
optional *only*.
-// panics if value node is not nil and has a repetition of repeated.
func MapOf(name string, key Node, value Node, mapRep parquet.Repetition,
fieldID int32) (*GroupNode, error) {
if mapRep == parquet.Repetitions.Repeated {
- return nil, xerrors.New("parquet: map repetition cannot be
Repeated")
+ return nil, xerrors.Errorf("parquet: map repetition cannot be
Repeated, got: %s", mapRep)
}
+
if key.RepetitionType() != parquet.Repetitions.Required {
- return nil, xerrors.New("parquet: map key repetition must be
Required")
+ return nil, xerrors.Errorf("parquet: map key repetition must be
Required, got: %s", key.RepetitionType())
}
+
if value != nil {
if value.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map value cannot have
repetition Repeated")
diff --git a/go/parquet/schema/helpers_test.go
b/go/parquet/schema/helpers_test.go
index 055fe7f46d..b4f0b68400 100644
--- a/go/parquet/schema/helpers_test.go
+++ b/go/parquet/schema/helpers_test.go
@@ -62,6 +62,25 @@ func TestListOfNested(t *testing.T) {
}`, strings.TrimSpace(buf.String()))
}
+func TestListOfWithNameNested(t *testing.T) {
+ n, err := schema.ListOfWithName("arrays",
schema.NewInt32Node("element", parquet.Repetitions.Required, -1),
parquet.Repetitions.Required, -1)
+ assert.NoError(t, err)
+ final, err := schema.ListOf(n, parquet.Repetitions.Required, -1)
+ assert.NoError(t, err)
+
+ var buf bytes.Buffer
+ schema.PrintSchema(final, &buf, 4)
+ assert.Equal(t,
+ `required group field_id=-1 arrays (List) {
+ repeated group field_id=-1 list {
+ required group field_id=-1 element (List) {
+ repeated group field_id=-1 list {
+ required int32 field_id=-1 element;
+ }
+ }
+ }
+}`, strings.TrimSpace(buf.String()))
+}
func TestMapOfNestedTypes(t *testing.T) {
n, err := schema.NewGroupNode("student", parquet.Repetitions.Required,
schema.FieldList{
schema.NewByteArrayNode("name", parquet.Repetitions.Required,
-1),