paleolimbot commented on code in PR #325:
URL: https://github.com/apache/arrow-nanoarrow/pull/325#discussion_r1409383744
##########
src/nanoarrow/nanoarrow_testing.hpp:
##########
@@ -1053,6 +1081,390 @@ class TestingJSONReader {
return NANOARROW_OK;
}
+ ArrowErrorCode SetArrayColumn(const json& value, ArrowArrayView* array_view,
+ ArrowArray* array, ArrowError* error,
+ const std::string& parent_error_prefix = "") {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_object(), error, "Expected Column to be a JSON
object"));
+
+ // Check + resolve name early to generate better error messages
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("name"), error, "Column missing key 'name'"));
+
+ const auto& name = value["name"];
+ NANOARROW_RETURN_NOT_OK(Check(name.is_null() || name.is_string(), error,
+ "Column name must be string or null"));
+
+ std::string error_prefix;
+ if (name.is_string()) {
+ error_prefix = parent_error_prefix + "-> Column '" +
name.get<std::string>() + "' ";
+ } else {
+ error_prefix = parent_error_prefix + "-> Column <name is null> ";
+ }
+
+ // Check, resolve, and recurse children
+ NANOARROW_RETURN_NOT_OK(
+ Check(array_view->n_children == 0 || value.contains("children"), error,
+ error_prefix + "missing key children"));
+
+ if (value.contains("children")) {
+ const auto& children = value["children"];
+ NANOARROW_RETURN_NOT_OK(
+ Check(children.is_array(), error, error_prefix + "children must be
array"));
+ NANOARROW_RETURN_NOT_OK(Check(children.size() == array_view->n_children,
error,
+ error_prefix + "children has incorrect
size"));
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(SetArrayColumn(children[i],
array_view->children[i],
+ array->children[i], error,
error_prefix));
+ }
+ }
+
+ // Build buffers
+ for (int i = 0; i < 3; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ PrefixError(SetArrayColumnBuffers(value, array_view, array, i,
error), error,
+ error_prefix));
+ }
+
+ // Check + resolve count
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("count"), error, error_prefix + "missing key
'count'"));
+ const auto& count = value["count"];
+ NANOARROW_RETURN_NOT_OK(
+ Check(count.is_number_integer(), error, error_prefix + "count must be
integer"));
+ array_view->length = count.get<int64_t>();
+
+ // Set ArrayView buffer views. This is because ArrowArrayInitFromSchema()
doesn't
+ // support custom type ids for unions but the ArrayView does (otherwise
+ // ArrowArrayFinishBuilding() would work).
+ for (int i = 0; i < 3; i++) {
+ ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ ArrowBufferView* buffer_view = array_view->buffer_views + i;
+ buffer_view->data.as_uint8 = buffer->data;
+ buffer_view->size_bytes = buffer->size_bytes;
+ }
+
+ // Validate the array view
+ NANOARROW_RETURN_NOT_OK(PrefixError(
+ ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL,
error), error,
+ error_prefix + "failed to validate: "));
+
+ // Flush length and buffer pointers to the Array
+ array->length = array_view->length;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE,
nullptr), error);
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetArrayColumnBuffers(const json& value, ArrowArrayView*
array_view,
+ ArrowArray* array, int buffer_i,
+ ArrowError* error) {
+ ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+
+ switch (array_view->layout.buffer_type[buffer_i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("VALIDITY"), error, "missing key
'VALIDITY'"));
+ const auto& validity = value["VALIDITY"];
+ NANOARROW_RETURN_NOT_OK(
+ SetBufferBitmap(validity, ArrowArrayValidityBitmap(array), error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_TYPE_ID: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("TYPE_ID"), error, "missing key 'TYPE_ID'"));
+ const auto& type_id = value["TYPE_ID"];
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(type_id, buffer, error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+ const auto& offset = value["OFFSET"];
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+ const auto& offset = value["OFFSET"];
+
+ if (array_view->layout.element_size_bits[buffer_i] == 32) {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer,
error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int64_t>(offset, buffer,
error));
+ }
+ break;
+ }
+
+ case NANOARROW_BUFFER_TYPE_DATA: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("DATA"), error, "missing key 'DATA'"));
+ const auto& data = value["DATA"];
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BOOL: {
+ nanoarrow::UniqueBitmap bitmap;
+ NANOARROW_RETURN_NOT_OK(SetBufferBitmap(data, bitmap.get(),
error));
+ ArrowBufferMove(&bitmap->buffer, buffer);
+ return NANOARROW_OK;
+ }
+ case NANOARROW_TYPE_INT8:
+ return SetBufferInt<int8_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT8:
+ return SetBufferInt<uint8_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT16:
+ return SetBufferInt<int16_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT16:
+ return SetBufferInt<uint16_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT32:
+ return SetBufferInt<int32_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT32:
+ return SetBufferInt<uint32_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT64:
+ return SetBufferInt<int64_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT64:
+ return SetBufferInt<uint64_t, uint64_t>(data, buffer, error);
+
+ case NANOARROW_TYPE_FLOAT:
+ return SetBufferFloatingPoint<float>(data, buffer, error);
+ case NANOARROW_TYPE_DOUBLE:
+ return SetBufferFloatingPoint<double>(data, buffer, error);
+
+ case NANOARROW_TYPE_STRING:
+ return SetBuffersString<int32_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_LARGE_STRING:
+ return SetBuffersString<int64_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_BINARY:
+ return SetBuffersBinary<int32_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_LARGE_BINARY:
+ return SetBuffersBinary<int64_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ return SetBuffersBinary<int64_t>(
+ data, nullptr, buffer, error,
+ array_view->layout.element_size_bits[buffer_i] / 8);
Review Comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]