This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 41441ea9 fix(arrow/cdata): Avoid calling unsafe.Slice on zero-length
pointers (#513)
41441ea9 is described below
commit 41441ea911bf4d54d27c918ff0061ed71dca8a39
Author: Orson Peters <[email protected]>
AuthorDate: Mon Sep 22 21:40:50 2025 +0200
fix(arrow/cdata): Avoid calling unsafe.Slice on zero-length pointers (#513)
### Rationale for this change
Slices from FFI may have an arbitrary pointer when the length is zero,
but this is not allowed in Go, where the pointer must always be valid. I
believe this fixes https://github.com/apache/arrow-go/issues/28.
### What changes are included in this PR?
I changed the instances of `unsafe.Slice` in `arrow/cdata` I could find
to be robust when used with length zero.
### Are these changes tested?
No, I don't have a Go setup at all.
### Are there any user-facing changes?
No.
---------
Co-authored-by: Matt Topol <[email protected]>
---
arrow/cdata/cdata.go | 20 +++++++++++---------
arrow/cdata/cdata_fulltest.c | 30 +++++++++++++++++++++++++++++-
arrow/cdata/cdata_test.go | 14 ++++++++++++++
arrow/cdata/cdata_test_framework.go | 7 +++++++
4 files changed, 61 insertions(+), 10 deletions(-)
diff --git a/arrow/cdata/cdata.go b/arrow/cdata/cdata.go
index b38ceb5f..f006e494 100644
--- a/arrow/cdata/cdata.go
+++ b/arrow/cdata/cdata.go
@@ -370,10 +370,10 @@ func (imp *cimporter) importChild(parent *cimporter, src
*CArrowArray) error {
// import any child arrays for lists, structs, and so on.
func (imp *cimporter) doImportChildren() error {
- children := unsafe.Slice(imp.arr.children, imp.arr.n_children)
-
- if len(children) > 0 {
- imp.children = make([]cimporter, len(children))
+ var children []*CArrowArray
+ if imp.arr.n_children > 0 {
+ children = unsafe.Slice(imp.arr.children, imp.arr.n_children)
+ imp.children = make([]cimporter, imp.arr.n_children)
}
// handle the cases
@@ -711,10 +711,12 @@ func (imp *cimporter) importBinaryViewLike() (err error) {
return
}
- dataBufferSizes :=
unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])),
len(buffers)-2)
- for i, size := range dataBufferSizes {
- if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1,
size); err != nil {
- return
+ if len(buffers) > 2 {
+ dataBufferSizes :=
unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])),
len(buffers)-2)
+ for i, size := range dataBufferSizes {
+ if buffers[i+2], err =
imp.importVariableValuesBuffer(i+2, 1, size); err != nil {
+ return
+ }
}
}
@@ -866,7 +868,7 @@ func (imp *cimporter) checkNumBuffers(n int64) error {
func (imp *cimporter) importBuffer(bufferID int, sz int64) (*memory.Buffer,
error) {
// this is not a copy, we're just having a slice which points at the
data
// it's still owned by the C.ArrowArray object and its backing C++
object.
- if imp.cbuffers[bufferID] == nil {
+ if imp.cbuffers[bufferID] == nil || sz == 0 {
if sz != 0 {
return nil, errors.New("invalid buffer")
}
diff --git a/arrow/cdata/cdata_fulltest.c b/arrow/cdata/cdata_fulltest.c
index 8fd724d4..b4d94132 100644
--- a/arrow/cdata/cdata_fulltest.c
+++ b/arrow/cdata/cdata_fulltest.c
@@ -335,8 +335,13 @@ void export_int32_array(const int32_t*, int64_t, struct
ArrowArray*);
static void release_str_array(struct ArrowArray* array) {
assert(array->n_buffers == 3);
+ if (array->buffers[0] != NULL) {
+ free((void*) array->buffers[0]);
+ }
free((void*) array->buffers[1]);
- free((void*) array->buffers[2]);
+ if (array->buffers[2] != NULL && array->buffers[2] != (void*)0x1) {
+ free((void*) array->buffers[2]);
+ }
free(array->buffers);
array->release = NULL;
}
@@ -361,6 +366,29 @@ void export_str_array(const char* data, const int32_t*
offsets, int64_t nitems,
out->buffers[2] = data;
}
+void export_str_array_with_nulls(int64_t nitems, struct ArrowArray* out) {
+ *out = (struct ArrowArray) {
+ .length = nitems,
+ .offset = 0,
+ .null_count = nitems,
+ .n_buffers = 3,
+ .n_children = 0,
+ .children = NULL,
+ .dictionary = NULL,
+ // bookkeeping
+ .release = &release_str_array
+ };
+
+ out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers);
+ assert(out->buffers != NULL);
+ int64_t bitmap_nbytes = (nitems + 7) / 8;
+ out->buffers[0] = malloc(bitmap_nbytes);
+ memset((void*)out->buffers[0], 0, bitmap_nbytes);
+ out->buffers[1] = malloc((nitems + 1) * sizeof(int32_t));
+ memset((void*)out->buffers[1], 0, (nitems + 1) * sizeof(int32_t));
+ out->buffers[2] = (void*)0x1;
+}
+
static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) {
struct streamcounter* cnter = (struct streamcounter*)(st->private_data);
if (cnter->n == cnter->max) {
diff --git a/arrow/cdata/cdata_test.go b/arrow/cdata/cdata_test.go
index f504ce99..4db1abd5 100644
--- a/arrow/cdata/cdata_test.go
+++ b/arrow/cdata/cdata_test.go
@@ -737,6 +737,20 @@ func TestNestedArrays(t *testing.T) {
}
}
+func TestStrArrayAllNulls(t *testing.T) {
+ arr := exportStrArrayWithNulls(1000)
+ carr, err := ImportCArrayWithType(&arr, arrow.BinaryTypes.String)
+ require.NoError(t, err)
+ defer carr.Release()
+
+ buffer := carr.Data().Buffers()[2]
+ assert.NotNil(t, buffer)
+ bs := buffer.Bytes()
+ assert.Equal(t, 1000, carr.Len())
+ assert.Equal(t, 1000, carr.NullN())
+ assert.Empty(t, bs)
+}
+
func TestRecordBatch(t *testing.T) {
mem := mallocator.NewMallocator()
defer mem.AssertSize(t, 0)
diff --git a/arrow/cdata/cdata_test_framework.go
b/arrow/cdata/cdata_test_framework.go
index 581cc149..fef18e5d 100644
--- a/arrow/cdata/cdata_test_framework.go
+++ b/arrow/cdata/cdata_test_framework.go
@@ -53,6 +53,7 @@ package cdata
// }
// void export_int32_type(struct ArrowSchema* schema);
// void export_int32_array(const int32_t*, int64_t, struct ArrowArray*);
+// void export_str_array_with_nulls(int64_t nitems, struct ArrowArray* out);
// int test1_is_released();
// void test_primitive(struct ArrowSchema* schema, const char* fmt);
// void free_malloced_schemas(struct ArrowSchema**);
@@ -98,6 +99,12 @@ func exportInt32TypeSchema() CArrowSchema {
return s
}
+func exportStrArrayWithNulls(nitems int64) CArrowArray {
+ var arr CArrowArray
+ C.export_str_array_with_nulls(C.int64_t(nitems), &arr)
+ return arr
+}
+
func schemaIsReleased(s *CArrowSchema) bool {
return C.ArrowSchemaIsReleased(s) == 1
}