This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new b10756e [C] Use nanoarrow to improve validation suite (#63)
b10756e is described below
commit b10756e5e225110d904c65629f5a301448e25629
Author: David Li <[email protected]>
AuthorDate: Fri Aug 12 15:44:59 2022 -0400
[C] Use nanoarrow to improve validation suite (#63)
* [C] Use nanoarrow to improve validation suite
* Fix embarrassingly blatant use-after-free
---
.gitattributes | 1 +
.pre-commit-config.yaml | 2 +
c/driver_manager/CMakeLists.txt | 3 +
c/driver_manager/adbc_driver_manager_test.cc | 1 +
c/drivers/sqlite/CMakeLists.txt | 3 +
c/drivers/sqlite/sqlite.cc | 66 +-
c/drivers/sqlite/sqlite_test.cc | 1 +
c/validation/adbc_validation.c | 227 ++++++-
c/validation/adbc_validation.h | 1 +
c/vendor/nanoarrow/allocator.c | 51 ++
c/vendor/nanoarrow/array.c | 230 +++++++
c/vendor/nanoarrow/array_inline.h | 77 +++
c/vendor/nanoarrow/bitmap_inline.h | 323 ++++++++++
c/vendor/nanoarrow/buffer_inline.h | 186 ++++++
c/vendor/nanoarrow/build-and-test.yaml | 121 ++++
c/vendor/nanoarrow/error.c | 42 ++
c/vendor/nanoarrow/metadata.c | 257 ++++++++
c/vendor/nanoarrow/nanoarrow.c | 23 +
c/vendor/nanoarrow/nanoarrow.h | 579 ++++++++++++++++++
c/vendor/nanoarrow/schema.c | 475 ++++++++++++++
c/vendor/nanoarrow/schema_view.c | 679 +++++++++++++++++++++
c/vendor/nanoarrow/typedefs_inline.h | 253 ++++++++
.../nanoarrow/utils_inline.h} | 34 +-
.gitattributes => c/vendor/vendor_nanoarrow.sh | 21 +-
24 files changed, 3575 insertions(+), 81 deletions(-)
diff --git a/.gitattributes b/.gitattributes
index 7aad2f3..2a3bd0c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -15,5 +15,6 @@
# specific language governing permissions and limitations
# under the License.
+c/vendor/* linguist-vendored
python/adbc_driver_manager/poetry.lock linguist-generated=true
python/adbc_driver_manager/requirements-dev.txt linguist-generated=true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6f1c611..3b9a137 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -69,3 +69,5 @@ repos:
hooks:
- id: isort
types_or: [python]
+
+exclude: "^c/vendor/.*"
diff --git a/c/driver_manager/CMakeLists.txt b/c/driver_manager/CMakeLists.txt
index b8a3a13..a64d8ad 100644
--- a/c/driver_manager/CMakeLists.txt
+++ b/c/driver_manager/CMakeLists.txt
@@ -55,8 +55,11 @@ if(ADBC_BUILD_TESTS)
SOURCES
adbc_driver_manager_test.cc
../validation/adbc_validation.c
+ ${REPOSITORY_ROOT}/c/vendor/nanoarrow/nanoarrow.c
EXTRA_LINK_LIBS
${TEST_LINK_LIBS})
+ target_include_directories(adbc-driver-manager-test SYSTEM
+ PRIVATE ${REPOSITORY_ROOT}/c/vendor/nanoarrow/)
endif()
validate_config()
diff --git a/c/driver_manager/adbc_driver_manager_test.cc
b/c/driver_manager/adbc_driver_manager_test.cc
index 3f59b82..c6dc713 100644
--- a/c/driver_manager/adbc_driver_manager_test.cc
+++ b/c/driver_manager/adbc_driver_manager_test.cc
@@ -319,6 +319,7 @@ TEST_F(DriverManager, ValidationSuite) {
AdbcValidateConnectionAutocommit(&ctx);
AdbcValidateStatementNewRelease(&ctx);
AdbcValidateStatementSqlExecute(&ctx);
+ AdbcValidateStatementSqlIngest(&ctx);
AdbcValidateStatementSqlPrepare(&ctx);
ASSERT_EQ(ctx.failed, 0);
ASSERT_EQ(ctx.total, ctx.passed);
diff --git a/c/drivers/sqlite/CMakeLists.txt b/c/drivers/sqlite/CMakeLists.txt
index bcff24e..2aab6e0 100644
--- a/c/drivers/sqlite/CMakeLists.txt
+++ b/c/drivers/sqlite/CMakeLists.txt
@@ -60,8 +60,11 @@ if(ADBC_BUILD_TESTS)
SOURCES
sqlite_test.cc
../../validation/adbc_validation.c
+ ${REPOSITORY_ROOT}/c/vendor/nanoarrow/nanoarrow.c
EXTRA_LINK_LIBS
${TEST_LINK_LIBS})
+ target_include_directories(adbc-driver-sqlite-test SYSTEM
+ PRIVATE ${REPOSITORY_ROOT}/c/vendor/nanoarrow/)
endif()
validate_config()
diff --git a/c/drivers/sqlite/sqlite.cc b/c/drivers/sqlite/sqlite.cc
index 9616ae5..6198f68 100644
--- a/c/drivers/sqlite/sqlite.cc
+++ b/c/drivers/sqlite/sqlite.cc
@@ -461,48 +461,44 @@ class SqliteStatementReader : public
arrow::RecordBatchReader {
for (int64_t row = 0; row < kBatchSize; row++) {
if (rc_ != SQLITE_DONE) {
for (int col = 0; col < schema_->num_fields(); col++) {
- const auto& field = schema_->field(col);
- switch (field->type()->id()) {
- case arrow::Type::DOUBLE: {
- // TODO: handle null values
- const double value = sqlite3_column_double(stmt_, col);
-
ARROW_RETURN_NOT_OK(dynamic_cast<arrow::DoubleBuilder*>(builders[col].get())
- ->Append(value));
- break;
- }
- case arrow::Type::INT64: {
- // TODO: handle null values
- const sqlite3_int64 value = sqlite3_column_int64(stmt_, col);
- ARROW_RETURN_NOT_OK(
-
dynamic_cast<arrow::Int64Builder*>(builders[col].get())->Append(value));
- break;
- }
- case arrow::Type::NA: {
- // TODO: handle null values
- ARROW_RETURN_NOT_OK(
-
dynamic_cast<arrow::NullBuilder*>(builders[col].get())->AppendNull());
- break;
- }
- case arrow::Type::STRING: {
- const char* value =
- reinterpret_cast<const char*>(sqlite3_column_text(stmt_,
col));
- if (!value) {
- // TODO: check field nullability
+ if (sqlite3_column_type(stmt_, col) == SQLITE_NULL) {
+ ARROW_RETURN_NOT_OK(builders[col]->AppendNull());
+ } else {
+ const auto& field = schema_->field(col);
+ switch (field->type()->id()) {
+ case arrow::Type::DOUBLE: {
+ const double value = sqlite3_column_double(stmt_, col);
ARROW_RETURN_NOT_OK(
- dynamic_cast<arrow::StringBuilder*>(builders[col].get())
- ->AppendNull());
- } else {
+ dynamic_cast<arrow::DoubleBuilder*>(builders[col].get())
+ ->Append(value));
+ break;
+ }
+ case arrow::Type::INT64: {
+ const sqlite3_int64 value = sqlite3_column_int64(stmt_, col);
+ ARROW_RETURN_NOT_OK(
+ dynamic_cast<arrow::Int64Builder*>(builders[col].get())
+ ->Append(value));
+ break;
+ }
+ case arrow::Type::NA: {
+ ARROW_RETURN_NOT_OK(
+
dynamic_cast<arrow::NullBuilder*>(builders[col].get())->AppendNull());
+ break;
+ }
+ case arrow::Type::STRING: {
+ const char* value =
+ reinterpret_cast<const char*>(sqlite3_column_text(stmt_,
col));
const arrow::util::string_view view(value, std::strlen(value));
ARROW_RETURN_NOT_OK(
dynamic_cast<arrow::StringBuilder*>(builders[col].get())
->Append(value));
+ break;
}
- break;
+ default:
+ return Status::NotImplemented("[SQLite3] Cannot read field '",
+ field->name(), "' of type ",
+ field->type()->ToString());
}
- default:
- return Status::NotImplemented("[SQLite3] Cannot read field '",
- field->name(), "' of type ",
- field->type()->ToString());
}
}
num_rows++;
diff --git a/c/drivers/sqlite/sqlite_test.cc b/c/drivers/sqlite/sqlite_test.cc
index 5af85ac..d60e354 100644
--- a/c/drivers/sqlite/sqlite_test.cc
+++ b/c/drivers/sqlite/sqlite_test.cc
@@ -908,6 +908,7 @@ TEST_F(Sqlite, ValidationSuite) {
AdbcValidateConnectionAutocommit(&ctx);
AdbcValidateStatementNewRelease(&ctx);
AdbcValidateStatementSqlExecute(&ctx);
+ AdbcValidateStatementSqlIngest(&ctx);
AdbcValidateStatementSqlPrepare(&ctx);
ASSERT_EQ(ctx.failed, 0);
ASSERT_EQ(ctx.total, ctx.passed);
diff --git a/c/validation/adbc_validation.c b/c/validation/adbc_validation.c
index 8bb415b..165d9e6 100644
--- a/c/validation/adbc_validation.c
+++ b/c/validation/adbc_validation.c
@@ -22,6 +22,7 @@
#include <string.h>
#include <adbc.h>
+#include <nanoarrow.h>
#define ADBCV_STRINGIFY(s) #s
const char* AdbcValidateStatusCodeMessage(AdbcStatusCode code) {
@@ -77,14 +78,20 @@ void AdbcValidatePass(struct AdbcValidateTestContext* ctx) {
void AdbcValidateFail(struct AdbcValidateTestContext* ctx, const char* file,
int lineno,
struct AdbcError* error) {
ctx->failed++;
- printf("FAIL\n");
- printf("%s:%d\n", file, lineno);
+ printf("\n%s:%d: FAIL\n", file, lineno);
if (error && error->release) {
printf("%s\n", error->message);
error->release(error);
}
}
+int AdbcValidationIsSet(struct ArrowArray* array, int64_t i) {
+ // TODO: unions
+ if (array->n_buffers == 0) return 0;
+ if (!array->buffers[0]) return 1;
+ return ArrowBitGet((const uint8_t*)array->buffers[0], i);
+}
+
#define ADBCV_CONCAT(a, b) a##b
#define ADBCV_NAME(a, b) ADBCV_CONCAT(a, b)
#define ADBCV_ASSERT_FAILS_WITH_IMPL(STATUS, ERROR, NAME, EXPR)
\
@@ -92,35 +99,55 @@ void AdbcValidateFail(struct AdbcValidateTestContext* ctx,
const char* file, int
AdbcValidateStatusCodeMessage(ADBC_STATUS_##STATUS)); \
AdbcStatusCode NAME = (EXPR);
\
if (ADBC_STATUS_##STATUS != NAME) {
\
- printf("(was %s) ", AdbcValidateStatusCodeMessage(NAME));
\
+ printf("\nActual value: %s\n", AdbcValidateStatusCodeMessage(NAME));
\
AdbcValidateFail(adbc_context, __FILE__, __LINE__, ERROR);
\
return;
\
}
\
AdbcValidatePass(adbc_context);
#define ADBCV_ASSERT_FAILS_WITH(STATUS, ERROR, EXPR) \
ADBCV_ASSERT_FAILS_WITH_IMPL(STATUS, ERROR, ADBCV_NAME(adbc_status_,
__COUNTER__), EXPR)
-#define ADBCV_ASSERT_OK(ERROR, EXPR) \
- AdbcValidateBeginAssert(adbc_context, "%s == %s", #EXPR, \
- AdbcValidateStatusCodeMessage(ADBC_STATUS_OK)); \
- if (ADBC_STATUS_OK != (EXPR)) { \
- AdbcValidateFail(adbc_context, __FILE__, __LINE__, ERROR); \
- return; \
- } \
+#define ADBCV_ASSERT_OK(ERROR, EXPR) ADBCV_ASSERT_FAILS_WITH(OK, ERROR, EXPR)
+#define ADBCV_ASSERT_EQ(EXPECTED, ACTUAL) \
+ AdbcValidateBeginAssert(adbc_context, "%s == %s", #ACTUAL, #EXPECTED); \
+ if ((EXPECTED) != (ACTUAL)) { \
+ AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
+ return; \
+ } \
AdbcValidatePass(adbc_context);
-#define ADBCV_ASSERT_EQ(EXPECTED, ACTUAL) \
- AdbcValidateBeginAssert(adbc_context, "%s == %s: ", #ACTUAL, #EXPECTED); \
- if ((EXPECTED) != (ACTUAL)) { \
- AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
- return; \
- } \
+#define ADBCV_ASSERT_NE(EXPECTED, ACTUAL) \
+ AdbcValidateBeginAssert(adbc_context, "%s == %s", #ACTUAL, #EXPECTED); \
+ if ((EXPECTED) == (ACTUAL)) { \
+ AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
+ return; \
+ } \
AdbcValidatePass(adbc_context);
-#define ADBCV_ASSERT_NE(EXPECTED, ACTUAL) \
- AdbcValidateBeginAssert(adbc_context, "%s == %s: ", #ACTUAL, #EXPECTED); \
- if ((EXPECTED) == (ACTUAL)) { \
- AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
- return; \
- } \
+#define ADBCV_ASSERT_TRUE(ACTUAL) \
+ AdbcValidateBeginAssert(adbc_context, "%s is true", #ACTUAL); \
+ if (!(ACTUAL)) { \
+ AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
+ return; \
+ } \
AdbcValidatePass(adbc_context);
+#define ADBCV_ASSERT_FALSE(ACTUAL) \
+ AdbcValidateBeginAssert(adbc_context, "%s is false", #ACTUAL); \
+ if (ACTUAL) { \
+ AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
+ return; \
+ } \
+ AdbcValidatePass(adbc_context);
+
+#define NA_ASSERT_OK_IMPL(ERROR_NAME, EXPR) \
+ do { \
+ AdbcValidateBeginAssert(adbc_context, "%s is OK (0)", #EXPR); \
+ ArrowErrorCode ERROR_NAME = (EXPR); \
+ if (ERROR_NAME) { \
+ AdbcValidateFail(adbc_context, __FILE__, __LINE__, NULL); \
+ return; \
+ } \
+ AdbcValidatePass(adbc_context); \
+ } while (0)
+
+#define NA_ASSERT_OK(EXPR) NA_ASSERT_OK_IMPL(ADBCV_NAME(na_status_,
__COUNTER__), EXPR)
void AdbcValidateDatabaseNewRelease(struct AdbcValidateTestContext*
adbc_context) {
struct AdbcError error;
@@ -325,14 +352,137 @@ void AdbcValidateStatementSqlExecute(struct
AdbcValidateTestContext* adbc_contex
AdbcValidateBeginCase(adbc_context, "StatementSql", "execute");
ADBCV_ASSERT_OK(&error, AdbcStatementNew(&connection, &statement, &error));
- ADBCV_ASSERT_OK(&error, AdbcStatementSetSqlQuery(&statement, "SELECT 1",
&error));
+ ADBCV_ASSERT_OK(&error, AdbcStatementSetSqlQuery(&statement, "SELECT 42",
&error));
ADBCV_ASSERT_OK(&error, AdbcStatementExecute(&statement, &error));
ADBCV_ASSERT_OK(&error, AdbcStatementGetStream(&statement, &out, &error));
ADBCV_ASSERT_NE(NULL, out.release);
+
+ struct ArrowSchema schema;
+ struct ArrowSchemaView schema_view;
+ ADBCV_ASSERT_EQ(0, out.get_schema(&out, &schema));
+ ADBCV_ASSERT_EQ(1, schema.n_children);
+ ADBCV_ASSERT_EQ(0, ArrowSchemaViewInit(&schema_view, schema.children[0],
NULL));
+ ADBCV_ASSERT_EQ(NANOARROW_TYPE_INT64, schema_view.data_type);
+
+ struct ArrowArray array;
+ ADBCV_ASSERT_EQ(0, out.get_next(&out, &array));
+ ADBCV_ASSERT_NE(NULL, array.release);
+
+ ADBCV_ASSERT_TRUE(AdbcValidationIsSet(array.children[0], 0));
+ ADBCV_ASSERT_EQ(42, ((int64_t*)array.children[0]->buffers[1])[0]);
+
+ array.release(&array);
+ ADBCV_ASSERT_EQ(0, out.get_next(&out, &array));
+ ADBCV_ASSERT_EQ(NULL, array.release);
+
+ schema.release(&schema);
out.release(&out);
ADBCV_ASSERT_OK(&error, AdbcStatementRelease(&statement, &error));
- AdbcValidateBeginCase(adbc_context, "StatementSql", "Teardown");
+ AdbcValidateBeginCase(adbc_context, "StatementSql", "teardown");
+ ADBCV_ASSERT_OK(&error, AdbcConnectionRelease(&connection, &error));
+ ADBCV_ASSERT_OK(&error, AdbcDatabaseRelease(&database, &error));
+ if (error.release) error.release(&error);
+}
+
+void AdbcValidateStatementSqlIngest(struct AdbcValidateTestContext*
adbc_context) {
+ struct AdbcError error;
+ struct AdbcDatabase database;
+ struct AdbcConnection connection;
+ struct AdbcStatement statement;
+ struct ArrowArrayStream out;
+ memset(&error, 0, sizeof(error));
+ memset(&database, 0, sizeof(database));
+ memset(&connection, 0, sizeof(connection));
+ memset(&statement, 0, sizeof(statement));
+
+ AdbcValidateBeginCase(adbc_context, "StatementSqlIngest", "setup");
+ ADBCV_ASSERT_OK(&error, AdbcDatabaseNew(&database, &error));
+ if (adbc_context->setup_database) {
+ ADBCV_ASSERT_OK(&error, adbc_context->setup_database(&database, &error));
+ }
+ ADBCV_ASSERT_OK(&error, AdbcDatabaseInit(&database, &error));
+ ADBCV_ASSERT_OK(&error, AdbcConnectionNew(&connection, &error));
+ ADBCV_ASSERT_OK(&error, AdbcConnectionInit(&connection, &database, &error));
+
+ AdbcValidateBeginCase(adbc_context, "StatementSqlIngest", "ingest int64");
+
+ struct ArrowSchema export_schema;
+ NA_ASSERT_OK(ArrowSchemaInit(&export_schema, NANOARROW_TYPE_STRUCT));
+ NA_ASSERT_OK(ArrowSchemaAllocateChildren(&export_schema, 1));
+ NA_ASSERT_OK(ArrowSchemaInit(export_schema.children[0],
NANOARROW_TYPE_INT64));
+ NA_ASSERT_OK(ArrowSchemaSetName(export_schema.children[0], "int64"));
+
+ struct ArrowArray export_array;
+ NA_ASSERT_OK(ArrowArrayInit(&export_array, NANOARROW_TYPE_STRUCT));
+ NA_ASSERT_OK(ArrowArrayAllocateChildren(&export_array, 1));
+ NA_ASSERT_OK(ArrowArrayInit(export_array.children[0], NANOARROW_TYPE_INT64));
+
+ struct ArrowBitmap* bitmap =
ArrowArrayValidityBitmap(export_array.children[0]);
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(export_array.children[0], 1);
+ NA_ASSERT_OK(ArrowBitmapReserve(bitmap, 5));
+ NA_ASSERT_OK(ArrowBufferReserve(buffer, 5 * sizeof(int64_t)));
+ ArrowBitmapAppendInt8Unsafe(bitmap, (int8_t[]){1, 1, 0, 0, 1}, 5);
+ NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 16));
+ NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, -1));
+ NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 0));
+ NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 0));
+ NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 42));
+ NA_ASSERT_OK(ArrowArrayFinishBuilding(export_array.children[0], 0));
+ NA_ASSERT_OK(ArrowArrayFinishBuilding(&export_array, 0));
+ export_array.children[0]->length = 5;
+ export_array.length = 5;
+
+ ADBCV_ASSERT_OK(&error, AdbcStatementNew(&connection, &statement, &error));
+ ADBCV_ASSERT_OK(&error,
+ AdbcStatementSetOption(&statement,
ADBC_INGEST_OPTION_TARGET_TABLE,
+ "bulk_insert", &error));
+ ADBCV_ASSERT_OK(&error,
+ AdbcStatementBind(&statement, &export_array, &export_schema,
&error));
+ ADBCV_ASSERT_OK(&error, AdbcStatementExecute(&statement, &error));
+ ADBCV_ASSERT_OK(&error, AdbcStatementRelease(&statement, &error));
+
+ AdbcValidateBeginCase(adbc_context, "StatementSqlIngest", "read back data");
+ ADBCV_ASSERT_OK(&error, AdbcStatementNew(&connection, &statement, &error));
+ ADBCV_ASSERT_OK(
+ &error, AdbcStatementSetSqlQuery(&statement, "SELECT * FROM
bulk_insert", &error));
+ ADBCV_ASSERT_OK(&error, AdbcStatementExecute(&statement, &error));
+ ADBCV_ASSERT_OK(&error, AdbcStatementGetStream(&statement, &out, &error));
+
+ struct ArrowSchema schema;
+ struct ArrowSchemaView schema_view;
+
+ NA_ASSERT_OK(out.get_schema(&out, &schema));
+ ADBCV_ASSERT_EQ(1, schema.n_children);
+ NA_ASSERT_OK(ArrowSchemaViewInit(&schema_view, schema.children[0], NULL));
+ ADBCV_ASSERT_EQ(NANOARROW_TYPE_INT64, schema_view.data_type);
+
+ struct ArrowArray array;
+ NA_ASSERT_OK(out.get_next(&out, &array));
+ ADBCV_ASSERT_NE(NULL, array.release);
+
+ ADBCV_ASSERT_EQ(5, array.length);
+ const int64_t* data = ((const int64_t*)array.children[0]->buffers[1]);
+ ADBCV_ASSERT_TRUE(AdbcValidationIsSet(array.children[0], 0));
+ ADBCV_ASSERT_TRUE(AdbcValidationIsSet(array.children[0], 1));
+ ADBCV_ASSERT_FALSE(AdbcValidationIsSet(array.children[0], 2));
+ ADBCV_ASSERT_FALSE(AdbcValidationIsSet(array.children[0], 3));
+ ADBCV_ASSERT_TRUE(AdbcValidationIsSet(array.children[0], 4));
+ ADBCV_ASSERT_EQ(16, data[0]);
+ ADBCV_ASSERT_EQ(-1, data[1]);
+ ADBCV_ASSERT_EQ(42, data[4]);
+
+ array.release(&array);
+ NA_ASSERT_OK(out.get_next(&out, &array));
+ ADBCV_ASSERT_EQ(NULL, array.release);
+
+ ADBCV_ASSERT_NE(NULL, schema.release);
+ schema.release(&schema);
+ ADBCV_ASSERT_NE(NULL, out.release);
+ out.release(&out);
+ ADBCV_ASSERT_OK(&error, AdbcStatementRelease(&statement, &error));
+
+ AdbcValidateBeginCase(adbc_context, "StatementSqlIngest", "teardown");
ADBCV_ASSERT_OK(&error, AdbcConnectionRelease(&connection, &error));
ADBCV_ASSERT_OK(&error, AdbcDatabaseRelease(&database, &error));
if (error.release) error.release(&error);
@@ -359,15 +509,38 @@ void AdbcValidateStatementSqlPrepare(struct
AdbcValidateTestContext* adbc_contex
ADBCV_ASSERT_OK(&error, AdbcConnectionInit(&connection, &database, &error));
struct ArrowSchema schema;
- memset(&schema, 0, sizeof(schema));
+ struct ArrowSchemaView schema_view;
+
AdbcValidateBeginCase(adbc_context, "StatementSql", "prepare");
ADBCV_ASSERT_OK(&error, AdbcStatementNew(&connection, &statement, &error));
- ADBCV_ASSERT_OK(&error, AdbcStatementSetSqlQuery(&statement, "SELECT 1",
&error));
+ ADBCV_ASSERT_OK(&error, AdbcStatementSetSqlQuery(&statement, "SELECT 42",
&error));
ADBCV_ASSERT_OK(&error, AdbcStatementPrepare(&statement, &error));
+
ADBCV_ASSERT_OK(&error, AdbcStatementGetParameterSchema(&statement, &schema,
&error));
+ ADBCV_ASSERT_EQ(0, schema.n_children);
schema.release(&schema);
+
ADBCV_ASSERT_OK(&error, AdbcStatementExecute(&statement, &error));
ADBCV_ASSERT_OK(&error, AdbcStatementGetStream(&statement, &out, &error));
+
+ NA_ASSERT_OK(out.get_schema(&out, &schema));
+ ADBCV_ASSERT_EQ(1, schema.n_children);
+ NA_ASSERT_OK(ArrowSchemaViewInit(&schema_view, schema.children[0], NULL));
+ ADBCV_ASSERT_EQ(NANOARROW_TYPE_INT64, schema_view.data_type);
+
+ struct ArrowArray array;
+ NA_ASSERT_OK(out.get_next(&out, &array));
+ ADBCV_ASSERT_NE(NULL, array.release);
+
+ ADBCV_ASSERT_TRUE(AdbcValidationIsSet(array.children[0], 0));
+ ADBCV_ASSERT_EQ(42, ((int64_t*)array.children[0]->buffers[1])[0]);
+
+ array.release(&array);
+ NA_ASSERT_OK(out.get_next(&out, &array));
+ ADBCV_ASSERT_EQ(NULL, array.release);
+
+ ADBCV_ASSERT_NE(NULL, schema.release);
+ schema.release(&schema);
ADBCV_ASSERT_NE(NULL, out.release);
out.release(&out);
ADBCV_ASSERT_OK(&error, AdbcStatementRelease(&statement, &error));
@@ -381,7 +554,7 @@ void AdbcValidateStatementSqlPrepare(struct
AdbcValidateTestContext* adbc_contex
ADBCV_ASSERT_EQ(NULL, out.release);
ADBCV_ASSERT_OK(&error, AdbcStatementRelease(&statement, &error));
- AdbcValidateBeginCase(adbc_context, "StatementSql", "Teardown");
+ AdbcValidateBeginCase(adbc_context, "StatementSql", "teardown");
ADBCV_ASSERT_OK(&error, AdbcConnectionRelease(&connection, &error));
ADBCV_ASSERT_OK(&error, AdbcDatabaseRelease(&database, &error));
if (error.release) error.release(&error);
diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h
index 2d702ae..f5883be 100644
--- a/c/validation/adbc_validation.h
+++ b/c/validation/adbc_validation.h
@@ -37,6 +37,7 @@ void AdbcValidateConnectionNewRelease(struct
AdbcValidateTestContext* adbc_conte
void AdbcValidateConnectionAutocommit(struct AdbcValidateTestContext*
adbc_context);
void AdbcValidateStatementNewRelease(struct AdbcValidateTestContext*
adbc_context);
void AdbcValidateStatementSqlExecute(struct AdbcValidateTestContext*
adbc_context);
+void AdbcValidateStatementSqlIngest(struct AdbcValidateTestContext*
adbc_context);
void AdbcValidateStatementSqlPrepare(struct AdbcValidateTestContext*
adbc_context);
#ifdef __cplusplus
diff --git a/c/vendor/nanoarrow/allocator.c b/c/vendor/nanoarrow/allocator.c
new file mode 100644
index 0000000..8495037
--- /dev/null
+++ b/c/vendor/nanoarrow/allocator.c
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "nanoarrow.h"
+
+void* ArrowMalloc(int64_t size) { return malloc(size); }
+
+void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); }
+
+void ArrowFree(void* ptr) { free(ptr); }
+
+static uint8_t* ArrowBufferAllocatorMallocAllocate(struct
ArrowBufferAllocator* allocator,
+ int64_t size) {
+ return ArrowMalloc(size);
+}
+
+static uint8_t* ArrowBufferAllocatorMallocReallocate(
+ struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
+ int64_t new_size) {
+ return ArrowRealloc(ptr, new_size);
+}
+
+static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator*
allocator,
+ uint8_t* ptr, int64_t size) {
+ ArrowFree(ptr);
+}
+
+static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
+ &ArrowBufferAllocatorMallocAllocate, &ArrowBufferAllocatorMallocReallocate,
+ &ArrowBufferAllocatorMallocFree, NULL};
+
+struct ArrowBufferAllocator* ArrowBufferAllocatorDefault() {
+ return &ArrowBufferAllocatorMalloc;
+}
diff --git a/c/vendor/nanoarrow/array.c b/c/vendor/nanoarrow/array.c
new file mode 100644
index 0000000..89e6cac
--- /dev/null
+++ b/c/vendor/nanoarrow/array.c
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+static void ArrowArrayRelease(struct ArrowArray* array) {
+ // Release buffers held by this array
+ struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
+ if (data != NULL) {
+ ArrowBitmapReset(&data->bitmap);
+ ArrowBufferReset(&data->buffers[0]);
+ ArrowBufferReset(&data->buffers[1]);
+ ArrowFree(data);
+ }
+
+ // This object owns the memory for all the children, but those
+ // children may have been generated elsewhere and might have
+ // their own release() callback.
+ if (array->children != NULL) {
+ for (int64_t i = 0; i < array->n_children; i++) {
+ if (array->children[i] != NULL) {
+ if (array->children[i]->release != NULL) {
+ array->children[i]->release(array->children[i]);
+ }
+
+ ArrowFree(array->children[i]);
+ }
+ }
+
+ ArrowFree(array->children);
+ }
+
+ // This object owns the memory for the dictionary but it
+ // may have been generated somewhere else and have its own
+ // release() callback.
+ if (array->dictionary != NULL) {
+ if (array->dictionary->release != NULL) {
+ array->dictionary->release(array->dictionary);
+ }
+
+ ArrowFree(array->dictionary);
+ }
+
+ // Mark released
+ array->release = NULL;
+}
+
+ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
+ enum ArrowType storage_type) {
+ switch (storage_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ case NANOARROW_TYPE_NA:
+ array->n_buffers = 0;
+ break;
+
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_MAP:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ array->n_buffers = 1;
+ break;
+
+ case NANOARROW_TYPE_BOOL:
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ case NANOARROW_TYPE_DENSE_UNION:
+ array->n_buffers = 2;
+ break;
+
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ array->n_buffers = 3;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
+ data->storage_type = storage_type;
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType
storage_type) {
+ array->length = 0;
+ array->null_count = -1;
+ array->offset = 0;
+ array->n_buffers = 0;
+ array->n_children = 0;
+ array->buffers = NULL;
+ array->children = NULL;
+ array->dictionary = NULL;
+ array->release = &ArrowArrayRelease;
+ array->private_data = NULL;
+
+ struct ArrowArrayPrivateData* data =
+ (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct
ArrowArrayPrivateData));
+ if (data == NULL) {
+ array->release = NULL;
+ return ENOMEM;
+ }
+
+ ArrowBitmapInit(&data->bitmap);
+ ArrowBufferInit(&data->buffers[0]);
+ ArrowBufferInit(&data->buffers[1]);
+ data->buffer_data[0] = NULL;
+ data->buffer_data[1] = NULL;
+ data->buffer_data[2] = NULL;
+
+ array->private_data = data;
+ array->buffers = (const void**)(&data->buffer_data);
+
+ int result = ArrowArraySetStorageType(array, storage_type);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t
n_children) {
+ if (array->children != NULL) {
+ return EINVAL;
+ }
+
+ if (n_children == 0) {
+ return NANOARROW_OK;
+ }
+
+ array->children =
+ (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct
ArrowArray*));
+ if (array->children == NULL) {
+ return ENOMEM;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array->children[i] = NULL;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct
ArrowArray));
+ if (array->children[i] == NULL) {
+ return ENOMEM;
+ }
+ array->children[i]->release = NULL;
+ }
+
+ array->n_children = n_children;
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) {
+ if (array->dictionary != NULL) {
+ return EINVAL;
+ }
+
+ array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct
ArrowArray));
+ if (array->dictionary == NULL) {
+ return ENOMEM;
+ }
+
+ array->dictionary->release = NULL;
+ return NANOARROW_OK;
+}
+
+void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap*
bitmap) {
+ struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
+ ArrowBufferMove(&bitmap->buffer, &data->bitmap.buffer);
+ data->bitmap.size_bits = bitmap->size_bits;
+ bitmap->size_bits = 0;
+ data->buffer_data[0] = data->bitmap.buffer.data;
+}
+
+ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
+ struct ArrowBuffer* buffer) {
+ struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
+
+ switch (i) {
+ case 0:
+ ArrowBufferMove(buffer, &data->bitmap.buffer);
+ data->buffer_data[i] = data->bitmap.buffer.data;
+ break;
+ case 1:
+ case 2:
+ ArrowBufferMove(buffer, &data->buffers[i - 1]);
+ data->buffer_data[i] = data->buffers[i - 1].data;
+ break;
+ default:
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
diff --git a/c/vendor/nanoarrow/array_inline.h
b/c/vendor/nanoarrow/array_inline.h
new file mode 100644
index 0000000..7b74534
--- /dev/null
+++ b/c/vendor/nanoarrow/array_inline.h
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED
+#define NANOARROW_ARRAY_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "bitmap_inline.h"
+#include "buffer_inline.h"
+#include "typedefs_inline.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray*
array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ return &private_data->bitmap;
+}
+
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array,
int64_t i) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ switch (i) {
+ case 0:
+ return &private_data->bitmap.buffer;
+ default:
+ return private_data->buffers + i - 1;
+ }
+}
+
+static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ char shrink_to_fit) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ // Make sure the value we get with array->buffers[i] is set to the actual
+ // pointer (which may have changed from the original due to reallocation)
+ int result;
+ for (int64_t i = 0; i < 3; i++) {
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ if (shrink_to_fit) {
+ result = ArrowBufferResize(buffer, buffer->size_bytes, shrink_to_fit);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+ }
+
+ private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+ }
+
+ return NANOARROW_OK;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c/vendor/nanoarrow/bitmap_inline.h
b/c/vendor/nanoarrow/bitmap_inline.h
new file mode 100644
index 0000000..763da2a
--- /dev/null
+++ b/c/vendor/nanoarrow/bitmap_inline.h
@@ -0,0 +1,323 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_BITMAP_INLINE_H_INCLUDED
+#define NANOARROW_BITMAP_INLINE_H_INCLUDED
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "buffer_inline.h"
+#include "typedefs_inline.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223,
191, 127};
+static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
+static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224,
192, 128};
+
+static const uint8_t _ArrowkBytePopcount[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2,
3, 3, 4, 3,
+ 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4,
4, 5, 3, 4,
+ 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3,
4, 3, 4, 4,
+ 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5,
+ 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
4, 4, 5, 2,
+ 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
4, 5, 4, 5,
+ 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4,
5, 3, 4, 4,
+ 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5,
4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) {
+ return (value + 7) & ~((int64_t)7);
+}
+
+static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) {
+ return (value / 8) * 8;
+}
+
+static inline int64_t _ArrowBytesForBits(int64_t bits) {
+ return (bits >> 3) + ((bits & 7) != 0);
+}
+
+static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
+ *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 |
values[4] << 4 |
+ values[5] << 5 | values[6] << 6 | values[7] << 7);
+}
+
+static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) {
+ *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 |
values[4] << 4 |
+ values[5] << 5 | values[6] << 6 | values[7] << 7);
+}
+
+static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
+ return (bits[i >> 3] >> (i & 0x07)) & 1;
+}
+
+static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
+ bits[i / 8] |= _ArrowkBitmask[i % 8];
+}
+
+static inline void ArrowBitClear(uint8_t* bits, int64_t i) {
+ bits[i / 8] &= _ArrowkFlippedBitmask[i % 8];
+}
+
+static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set)
{
+ bits[i / 8] ^=
+ ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) &
_ArrowkBitmask[i % 8];
+}
+
+static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t
length,
+ uint8_t bits_are_set) {
+ const int64_t i_begin = start_offset;
+ const int64_t i_end = start_offset + length;
+ const uint8_t fill_byte = (uint8_t)(-bits_are_set);
+
+ const int64_t bytes_begin = i_begin / 8;
+ const int64_t bytes_end = i_end / 8 + 1;
+
+ const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+ const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+
+ if (bytes_end == bytes_begin + 1) {
+ // set bits within a single byte
+ const uint8_t only_byte_mask =
+ i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask |
last_byte_mask);
+ bits[bytes_begin] &= only_byte_mask;
+ bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
+ return;
+ }
+
+ // set/clear trailing bits of first byte
+ bits[bytes_begin] &= first_byte_mask;
+ bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
+
+ if (bytes_end - bytes_begin > 2) {
+ // set/clear whole bytes
+ memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin
- 2));
+ }
+
+ if (i_end % 8 == 0) {
+ return;
+ }
+
+ // set/clear leading bits of last byte
+ bits[bytes_end - 1] &= last_byte_mask;
+ bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask);
+}
+
+static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t
start_offset,
+ int64_t length) {
+ if (length == 0) {
+ return 0;
+ }
+
+ const int64_t i_begin = start_offset;
+ const int64_t i_end = start_offset + length;
+
+ const int64_t bytes_begin = i_begin / 8;
+ const int64_t bytes_end = i_end / 8 + 1;
+
+ const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+ const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+
+ if (bytes_end == bytes_begin + 1) {
+ // count bits within a single byte
+ const uint8_t only_byte_mask =
+ i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask |
last_byte_mask);
+ const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
+ return _ArrowkBytePopcount[byte_masked];
+ }
+
+ int64_t count = 0;
+
+ // first byte
+ count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
+
+ // middle bytes
+ for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) {
+ count += _ArrowkBytePopcount[bits[i]];
+ }
+
+ // last byte
+ count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask];
+
+ return count;
+}
+
+static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) {
+ ArrowBufferInit(&bitmap->buffer);
+ bitmap->size_bits = 0;
+}
+
+static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
+ int64_t additional_size_bits) {
+ int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
+ if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
+ return NANOARROW_OK;
+ }
+
+ int result =
+ ArrowBufferReserve(&bitmap->buffer,
_ArrowBytesForBits(additional_size_bits));
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
+ int64_t new_capacity_bits,
+ char shrink_to_fit) {
+ if (new_capacity_bits < 0) {
+ return EINVAL;
+ }
+
+ int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
+ int result = ArrowBufferResize(&bitmap->buffer, new_capacity_bytes,
shrink_to_fit);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ if (new_capacity_bits < bitmap->size_bits) {
+ bitmap->size_bits = new_capacity_bits;
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
+ uint8_t bits_are_set, int64_t
length) {
+ int result = ArrowBitmapReserve(bitmap, length);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length);
+ return NANOARROW_OK;
+}
+
+static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
+ uint8_t bits_are_set, int64_t
length) {
+ ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set);
+ bitmap->size_bits += length;
+ bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits);
+}
+
+static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+ const int8_t* values, int64_t
n_values) {
+ if (n_values == 0) {
+ return;
+ }
+
+ const int8_t* values_cursor = values;
+ int64_t n_remaining = n_values;
+ int64_t out_i_cursor = bitmap->size_bits;
+ uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
+
+ // First byte
+ if ((out_i_cursor % 8) != 0) {
+ int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) -
out_i_cursor;
+ for (int i = 0; i < n_partial_bits; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
+ }
+
+ out_cursor++;
+ values_cursor += n_partial_bits;
+ n_remaining -= n_partial_bits;
+ }
+
+ // Middle bytes
+ int64_t n_full_bytes = n_remaining / 8;
+ for (int64_t i = 0; i < n_full_bytes; i++) {
+ _ArrowBitmapPackInt8(values_cursor, out_cursor);
+ values_cursor += 8;
+ out_cursor++;
+ }
+
+ // Last byte
+ out_i_cursor += n_full_bytes * 8;
+ n_remaining -= n_full_bytes * 8;
+ if (n_remaining > 0) {
+ for (int i = 0; i < n_remaining; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+ }
+ out_cursor++;
+ }
+
+ bitmap->size_bits += n_values;
+ bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
+}
+
+static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
+ const int32_t* values, int64_t
n_values) {
+ if (n_values == 0) {
+ return;
+ }
+
+ const int32_t* values_cursor = values;
+ int64_t n_remaining = n_values;
+ int64_t out_i_cursor = bitmap->size_bits;
+ uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
+
+ // First byte
+ if ((out_i_cursor % 8) != 0) {
+ int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) -
out_i_cursor;
+ for (int i = 0; i < n_partial_bits; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
+ }
+
+ out_cursor++;
+ values_cursor += n_partial_bits;
+ n_remaining -= n_partial_bits;
+ }
+
+ // Middle bytes
+ int64_t n_full_bytes = n_remaining / 8;
+ for (int64_t i = 0; i < n_full_bytes; i++) {
+ _ArrowBitmapPackInt32(values_cursor, out_cursor);
+ values_cursor += 8;
+ out_cursor++;
+ }
+
+ // Last byte
+ out_i_cursor += n_full_bytes * 8;
+ n_remaining -= n_full_bytes * 8;
+ if (n_remaining > 0) {
+ for (int i = 0; i < n_remaining; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+ }
+ out_cursor++;
+ }
+
+ bitmap->size_bits += n_values;
+ bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
+}
+
+static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) {
+ ArrowBufferReset(&bitmap->buffer);
+ bitmap->size_bits = 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c/vendor/nanoarrow/buffer_inline.h
b/c/vendor/nanoarrow/buffer_inline.h
new file mode 100644
index 0000000..5400a1b
--- /dev/null
+++ b/c/vendor/nanoarrow/buffer_inline.h
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED
+#define NANOARROW_BUFFER_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "typedefs_inline.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t
new_capacity) {
+ int64_t doubled_capacity = current_capacity * 2;
+ if (doubled_capacity > new_capacity) {
+ return doubled_capacity;
+ } else {
+ return new_capacity;
+ }
+}
+
+static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
+ buffer->data = NULL;
+ buffer->size_bytes = 0;
+ buffer->capacity_bytes = 0;
+ buffer->allocator = ArrowBufferAllocatorDefault();
+}
+
+static inline ArrowErrorCode ArrowBufferSetAllocator(
+ struct ArrowBuffer* buffer, struct ArrowBufferAllocator* allocator) {
+ if (buffer->data == NULL) {
+ buffer->allocator = allocator;
+ return NANOARROW_OK;
+ } else {
+ return EINVAL;
+ }
+}
+
+static inline void ArrowBufferReset(struct ArrowBuffer* buffer) {
+ if (buffer->data != NULL) {
+ buffer->allocator->free(buffer->allocator, (uint8_t*)buffer->data,
+ buffer->capacity_bytes);
+ buffer->data = NULL;
+ }
+
+ buffer->capacity_bytes = 0;
+ buffer->size_bytes = 0;
+}
+
+static inline void ArrowBufferMove(struct ArrowBuffer* buffer,
+ struct ArrowBuffer* buffer_out) {
+ memcpy(buffer_out, buffer, sizeof(struct ArrowBuffer));
+ buffer->data = NULL;
+ ArrowBufferReset(buffer);
+}
+
+static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
+ int64_t new_capacity_bytes,
+ char shrink_to_fit) {
+ if (new_capacity_bytes < 0) {
+ return EINVAL;
+ }
+
+ if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
+ buffer->data = buffer->allocator->reallocate(
+ buffer->allocator, buffer->data, buffer->capacity_bytes,
new_capacity_bytes);
+ if (buffer->data == NULL && new_capacity_bytes > 0) {
+ buffer->capacity_bytes = 0;
+ buffer->size_bytes = 0;
+ return ENOMEM;
+ }
+
+ buffer->capacity_bytes = new_capacity_bytes;
+ }
+
+ // Ensures that when shrinking that size <= capacity
+ if (new_capacity_bytes < buffer->size_bytes) {
+ buffer->size_bytes = new_capacity_bytes;
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
+ int64_t additional_size_bytes)
{
+ int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes;
+ if (min_capacity_bytes <= buffer->capacity_bytes) {
+ return NANOARROW_OK;
+ }
+
+ return ArrowBufferResize(
+ buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes),
0);
+}
+
+static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const
void* data,
+ int64_t size_bytes) {
+ if (size_bytes > 0) {
+ memcpy(buffer->data + buffer->size_bytes, data, size_bytes);
+ buffer->size_bytes += size_bytes;
+ }
+}
+
+static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
+ const void* data, int64_t
size_bytes) {
+ int result = ArrowBufferReserve(buffer, size_bytes);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ ArrowBufferAppendUnsafe(buffer, data, size_bytes);
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+ int8_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+ uint8_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+ int16_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer*
buffer,
+ uint16_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+ int32_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer*
buffer,
+ uint32_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+ int64_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer*
buffer,
+ uint64_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer*
buffer,
+ double value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(double));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+ float value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(float));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c/vendor/nanoarrow/build-and-test.yaml
b/c/vendor/nanoarrow/build-and-test.yaml
new file mode 100644
index 0000000..a2ad105
--- /dev/null
+++ b/c/vendor/nanoarrow/build-and-test.yaml
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+name: Build and Test
+
+jobs:
+ build-and-test:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get install -y cmake valgrind
+
+ - name: Cache Dependency Builds
+ id: cache-deps-build
+ uses: actions/cache@v3
+ with:
+ path: build-deps
+ key: ${{ runner.os }}-3
+
+ - name: Init build dir
+ if: steps.cache-deps-build.outputs.cache-hit != 'true'
+ run: mkdir build-deps
+
+ # There seems to be an error passing -DGTest_DIR into Arrow's build
+ # so we just build the same version of it and install
+ - name: Fetch googletest
+ if: steps.cache-deps-build.outputs.cache-hit != 'true'
+ uses: actions/checkout@v3
+ with:
+ repository: google/googletest
+ ref: release-1.11.0
+ path: build-deps/googletest
+ fetch-depth: 0
+
+ - name: Build googletest
+ if: steps.cache-deps-build.outputs.cache-hit != 'true'
+ run: |
+ cd build-deps/googletest
+ cmake .
+ cmake --build .
+ cmake --install . --prefix ../../dist
+
+ - name: Fetch Arrow
+ if: steps.cache-deps-build.outputs.cache-hit != 'true'
+ uses: actions/checkout@v3
+ with:
+ repository: apache/arrow
+ ref: apache-arrow-8.0.0
+ path: build-deps/arrow
+ fetch-depth: 0
+
+ - name: Build Arrow
+ if: steps.cache-deps-build.outputs.cache-hit != 'true'
+ run: |
+ mkdir build-deps/arrow-build
+ cd build-deps/arrow-build
+ cmake ../arrow/cpp -DARROW_JSON=ON -DARROW_TESTING=ON
-DBoost_SOURCE=BUNDLED
+ cmake --build .
+ cmake --install . --prefix ../../dist
+
+ - name: Install Dependencies
+ run: |
+ cd build-deps/arrow-build
+ cmake --install . --prefix ../../dist
+ cd ../googletest
+ cmake --install . --prefix ../../dist
+
+ - name: Build nanoarrow
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DCMAKE_BUILD_TYPE=Debug
-DGTest_DIR=`pwd`/../dist/lib/cmake/GTest
-DArrow_DIR=`pwd`/../dist/lib/cmake/arrow
-DArrowTesting_DIR=`pwd`/../dist/lib/cmake/arrow -DNANOARROW_CODE_COVERAGE=ON
-DNANOARROW_BUILD_TESTS=ON
+ cmake --build .
+
+ - name: Run tests
+ run: |
+ cd build
+ ctest -T test --output-on-failure .
+
+ - name: Run tests with valgrind
+ run: |
+ cd build
+ ctest -T memcheck .
+
+ - name: Calculate coverage
+ run: |
+ SOURCE_PREFIX=`pwd`
+ mkdir build/cov
+ cd build/cov
+ gcov -abcfu --source-prefix=$SOURCE_PREFIX `find
../CMakeFiles/nanoarrow.dir/ -name "*.gcno"`
+
+ - name: Upload coverage
+ uses: codecov/codecov-action@v2
+ with:
+ directory: build/cov
diff --git a/c/vendor/nanoarrow/error.c b/c/vendor/nanoarrow/error.c
new file mode 100644
index 0000000..74539d3
--- /dev/null
+++ b/c/vendor/nanoarrow/error.c
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
+ memset(error->message, 0, sizeof(error->message));
+
+ va_list args;
+ va_start(args, fmt);
+ int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt,
args);
+ va_end(args);
+
+ if (chars_needed < 0) {
+ return EINVAL;
+ } else if (chars_needed >= sizeof(error->message)) {
+ return ERANGE;
+ } else {
+ return NANOARROW_OK;
+ }
+}
+
+const char* ArrowErrorMessage(struct ArrowError* error) { return
error->message; }
diff --git a/c/vendor/nanoarrow/metadata.c b/c/vendor/nanoarrow/metadata.c
new file mode 100644
index 0000000..2f24cbc
--- /dev/null
+++ b/c/vendor/nanoarrow/metadata.c
@@ -0,0 +1,257 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
+ const char* metadata) {
+ reader->metadata = metadata;
+
+ if (reader->metadata == NULL) {
+ reader->offset = 0;
+ reader->remaining_keys = 0;
+ } else {
+ memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t));
+ reader->offset = sizeof(int32_t);
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
+ struct ArrowStringView* key_out,
+ struct ArrowStringView* value_out) {
+ if (reader->remaining_keys <= 0) {
+ return EINVAL;
+ }
+
+ int64_t pos = 0;
+
+ int32_t key_size;
+ memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
+ pos += sizeof(int32_t);
+
+ key_out->data = reader->metadata + reader->offset + pos;
+ key_out->n_bytes = key_size;
+ pos += key_size;
+
+ int32_t value_size;
+ memcpy(&value_size, reader->metadata + reader->offset + pos,
sizeof(int32_t));
+ pos += sizeof(int32_t);
+
+ value_out->data = reader->metadata + reader->offset + pos;
+ value_out->n_bytes = value_size;
+ pos += value_size;
+
+ reader->offset += pos;
+ reader->remaining_keys--;
+ return NANOARROW_OK;
+}
+
+int64_t ArrowMetadataSizeOf(const char* metadata) {
+ if (metadata == NULL) {
+ return 0;
+ }
+
+ struct ArrowMetadataReader reader;
+ struct ArrowStringView key;
+ struct ArrowStringView value;
+ ArrowMetadataReaderInit(&reader, metadata);
+
+ int64_t size = sizeof(int32_t);
+ while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) {
+ size += sizeof(int32_t) + key.n_bytes + sizeof(int32_t) + value.n_bytes;
+ }
+
+ return size;
+}
+
+static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata,
+ struct ArrowStringView*
key,
+ struct ArrowStringView*
value_out) {
+ struct ArrowMetadataReader reader;
+ struct ArrowStringView existing_key;
+ struct ArrowStringView existing_value;
+ ArrowMetadataReaderInit(&reader, metadata);
+
+ int64_t size = sizeof(int32_t);
+ while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) ==
+ NANOARROW_OK) {
+ int key_equal = key->n_bytes == existing_key.n_bytes &&
+ strncmp(key->data, existing_key.data,
existing_key.n_bytes) == 0;
+ if (key_equal) {
+ value_out->data = existing_value.data;
+ value_out->n_bytes = existing_value.n_bytes;
+ break;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct
ArrowStringView key,
+ struct ArrowStringView* value_out) {
+ if (value_out == NULL) {
+ return EINVAL;
+ }
+
+ return ArrowMetadataGetValueInternal(metadata, &key, value_out);
+}
+
+char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) {
+ struct ArrowStringView value = ArrowCharView(NULL);
+ ArrowMetadataGetValue(metadata, key, &value);
+ return value.data != NULL;
+}
+
+ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer,
+ const char* metadata) {
+ ArrowBufferInit(buffer);
+ int result = ArrowBufferAppend(buffer, metadata,
ArrowMetadataSizeOf(metadata));
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer*
buffer,
+ struct
ArrowStringView* key,
+ struct
ArrowStringView* value) {
+ if (value == NULL) {
+ return NANOARROW_OK;
+ }
+
+ int result;
+
+ if (buffer->capacity_bytes == 0) {
+ int32_t zero = 0;
+ result = ArrowBufferAppend(buffer, &zero, sizeof(int32_t));
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+ }
+
+ if (buffer->capacity_bytes < sizeof(int32_t)) {
+ return EINVAL;
+ }
+
+ int32_t n_keys;
+ memcpy(&n_keys, buffer->data, sizeof(int32_t));
+
+ int32_t key_size = key->n_bytes;
+ int32_t value_size = value->n_bytes;
+ result = ArrowBufferReserve(buffer,
+ sizeof(int32_t) + key_size + sizeof(int32_t) +
value_size);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t));
+ ArrowBufferAppendUnsafe(buffer, key->data, key_size);
+ ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t));
+ ArrowBufferAppendUnsafe(buffer, value->data, value_size);
+
+ n_keys++;
+ memcpy(buffer->data, &n_keys, sizeof(int32_t));
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer*
buffer,
+ struct ArrowStringView*
key,
+ struct ArrowStringView*
value) {
+ // Inspect the current value to see if we can avoid copying the buffer
+ struct ArrowStringView current_value = ArrowCharView(NULL);
+ int result =
+ ArrowMetadataGetValueInternal((const char*)buffer->data, key,
¤t_value);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ // The key should be removed but no key exists
+ if (value == NULL && current_value.data == NULL) {
+ return NANOARROW_OK;
+ }
+
+ // The key/value can be appended because no key exists
+ if (value != NULL && current_value.data == NULL) {
+ return ArrowMetadataBuilderAppendInternal(buffer, key, value);
+ }
+
+ struct ArrowMetadataReader reader;
+ struct ArrowStringView existing_key;
+ struct ArrowStringView existing_value;
+ result = ArrowMetadataReaderInit(&reader, (const char*)buffer->data);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ struct ArrowBuffer new_buffer;
+ result = ArrowMetadataBuilderInit(&new_buffer, NULL);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ while (reader.remaining_keys > 0) {
+ result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&new_buffer);
+ return result;
+ }
+
+ if (key->n_bytes == existing_key.n_bytes &&
+ strncmp((const char*)key->data, (const char*)existing_key.data,
+ existing_key.n_bytes) == 0) {
+ result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value);
+ value = NULL;
+ } else {
+ result =
+ ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key,
&existing_value);
+ }
+
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&new_buffer);
+ return result;
+ }
+ }
+
+ ArrowBufferReset(buffer);
+ ArrowBufferMove(&new_buffer, buffer);
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
+ struct ArrowStringView key,
+ struct ArrowStringView value) {
+ return ArrowMetadataBuilderAppendInternal(buffer, &key, &value);
+}
+
+ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
+ struct ArrowStringView key,
+ struct ArrowStringView value) {
+ return ArrowMetadataBuilderSetInternal(buffer, &key, &value);
+}
+
+ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
+ struct ArrowStringView key) {
+ return ArrowMetadataBuilderSetInternal(buffer, &key, NULL);
+}
diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c
new file mode 100644
index 0000000..d3e33c9
--- /dev/null
+++ b/c/vendor/nanoarrow/nanoarrow.c
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "allocator.c"
+#include "array.c"
+#include "error.c"
+#include "metadata.c"
+#include "schema.c"
+#include "schema_view.c"
diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h
new file mode 100644
index 0000000..c432e60
--- /dev/null
+++ b/c/vendor/nanoarrow/nanoarrow.h
@@ -0,0 +1,579 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_H_INCLUDED
+#define NANOARROW_H_INCLUDED
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "typedefs_inline.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// \file Arrow C Implementation
+///
+/// EXPERIMENTAL. Interface subject to change.
+
+/// \page object-model Object Model
+///
+/// Except where noted, objects are not thread-safe and clients should
+/// take care to serialize accesses to methods.
+///
+/// Because this library is intended to be vendored, it provides full type
+/// definitions and encourages clients to stack or statically allocate
+/// where convenient.
+
+/// \defgroup nanoarrow-malloc Memory management
+///
+/// Non-buffer members of a struct ArrowSchema and struct ArrowArray
+/// must be allocated using ArrowMalloc() or ArrowRealloc() and freed
+/// using ArrowFree for schemas and arrays allocated here. Buffer members
+/// are allocated using an ArrowBufferAllocator.
+
+/// \brief Allocate like malloc()
+void* ArrowMalloc(int64_t size);
+
+/// \brief Reallocate like realloc()
+void* ArrowRealloc(void* ptr, int64_t size);
+
+/// \brief Free a pointer allocated using ArrowMalloc() or ArrowRealloc().
+void ArrowFree(void* ptr);
+
+/// \brief Return the default allocator
+///
+/// The default allocator uses ArrowMalloc(), ArrowRealloc(), and
+/// ArrowFree().
+struct ArrowBufferAllocator* ArrowBufferAllocatorDefault();
+
+/// }@
+
+/// \defgroup nanoarrow-errors Error handling primitives
+/// Functions generally return an errno-compatible error code; functions that
+/// need to communicate more verbose error information accept a pointer
+/// to an ArrowError. This can be stack or statically allocated. The
+/// content of the message is undefined unless an error code has been
+/// returned.
+
+/// \brief Error type containing a UTF-8 encoded message.
+struct ArrowError {
+ char message[1024];
+};
+
+/// \brief Set the contents of an error using printf syntax
+ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...);
+
+/// \brief Get the contents of an error
+const char* ArrowErrorMessage(struct ArrowError* error);
+
+/// }@
+
+/// \defgroup nanoarrow-utils Utility data structures
+
+/// \brief Create a string view from a null-terminated string
+static inline struct ArrowStringView ArrowCharView(const char* value);
+
+/// \brief Arrow time unit enumerator
+///
+/// These names and values map to the corresponding arrow::TimeUnit::type
+/// enumerator.
+enum ArrowTimeUnit {
+ NANOARROW_TIME_UNIT_SECOND = 0,
+ NANOARROW_TIME_UNIT_MILLI = 1,
+ NANOARROW_TIME_UNIT_MICRO = 2,
+ NANOARROW_TIME_UNIT_NANO = 3
+};
+
+/// }@
+
+/// \defgroup nanoarrow-schema Schema producer helpers
+/// These functions allocate, copy, and destroy ArrowSchema structures
+
+/// \brief Initialize the fields of a schema
+///
+/// Initializes the fields and release callback of schema_out. Caller
+/// is responsible for calling the schema->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowSchemaInit(struct ArrowSchema* schema, enum ArrowType
type);
+
+/// \brief Initialize the fields of a fixed-size schema
+///
+/// Returns EINVAL for fixed_size <= 0 or for data_type that is not
+/// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST.
+ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema* schema,
+ enum ArrowType data_type, int32_t
fixed_size);
+
+/// \brief Initialize the fields of a decimal schema
+///
+/// Returns EINVAL for scale <= 0 or for data_type that is not
+/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256.
+ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
+ enum ArrowType data_type, int32_t
decimal_precision,
+ int32_t decimal_scale);
+
+/// \brief Initialize the fields of a time, timestamp, or duration schema
+///
+/// Returns EINVAL for data_type that is not
+/// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64,
+/// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The
+/// timezone parameter must be NULL for a non-timestamp data_type.
+ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema* schema,
+ enum ArrowType data_type,
+ enum ArrowTimeUnit time_unit,
+ const char* timezone);
+
+/// \brief Make a (recursive) copy of a schema
+///
+/// Allocates and copies fields of schema into schema_out.
+ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
+ struct ArrowSchema* schema_out);
+
+/// \brief Copy format into schema->format
+///
+/// schema must have been allocated using ArrowSchemaInit or
+/// ArrowSchemaDeepCopy.
+ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char*
format);
+
+/// \brief Copy name into schema->name
+///
+/// schema must have been allocated using ArrowSchemaInit or
+/// ArrowSchemaDeepCopy.
+ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char*
name);
+
+/// \brief Copy metadata into schema->metadata
+///
+/// schema must have been allocated using ArrowSchemaInit or
+/// ArrowSchemaDeepCopy.
+ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char*
metadata);
+
+/// \brief Allocate the schema->children array
+///
+/// Includes the memory for each child struct ArrowSchema.
+/// schema must have been allocated using ArrowSchemaInit or
+/// ArrowSchemaDeepCopy.
+ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
+ int64_t n_children);
+
+/// \brief Allocate the schema->dictionary member
+///
+/// schema must have been allocated using ArrowSchemaInit or
+/// ArrowSchemaDeepCopy.
+ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema);
+
+/// \brief Reader for key/value pairs in schema metadata
+struct ArrowMetadataReader {
+ const char* metadata;
+ int64_t offset;
+ int32_t remaining_keys;
+};
+
+/// \brief Initialize an ArrowMetadataReader
+ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
+ const char* metadata);
+
+/// \brief Read the next key/value pair from an ArrowMetadataReader
+ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
+ struct ArrowStringView* key_out,
+ struct ArrowStringView* value_out);
+
+/// \brief The number of bytes in in a key/value metadata string
+int64_t ArrowMetadataSizeOf(const char* metadata);
+
+/// \brief Check for a key in schema metadata
+char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key);
+
+/// \brief Extract a value from schema metadata
+///
+/// If key does not exist in metadata, value_out is unmodified
+ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct
ArrowStringView key,
+ struct ArrowStringView* value_out);
+
+/// \brief Initialize a builder for schema metadata from key/value pairs
+///
+/// metadata can be an existing metadata string or NULL to initialize
+/// an empty metadata string.
+ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const
char* metadata);
+
+/// \brief Append a key/value pair to a buffer containing serialized metadata
+ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
+ struct ArrowStringView key,
+ struct ArrowStringView value);
+
+/// \brief Set a key/value pair to a buffer containing serialized metadata
+///
+/// Ensures that the only entry for key in the metadata is set to value.
+/// This function maintains the existing position of (the first instance of)
+/// key if present in the data.
+ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
+ struct ArrowStringView key,
+ struct ArrowStringView value);
+
+/// \brief Remove a key from a buffer containing serialized metadata
+ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
+ struct ArrowStringView key);
+
+/// }@
+
+/// \defgroup nanoarrow-schema-view Schema consumer helpers
+
+/// \brief A non-owning view of a parsed ArrowSchema
+///
+/// Contains more readily extractable values than a raw ArrowSchema.
+/// Clients can stack or statically allocate this structure but are
+/// encouraged to use the provided getters to ensure forward
+/// compatiblity.
+struct ArrowSchemaView {
+ /// \brief A pointer to the schema represented by this view
+ struct ArrowSchema* schema;
+
+ /// \brief The data type represented by the schema
+ ///
+ /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a
+ /// non-null dictionary member; datetime types are valid values.
+ /// This value will never be NANOARROW_TYPE_EXTENSION (see
+ /// extension_name and/or extension_metadata to check for
+ /// an extension type).
+ enum ArrowType data_type;
+
+ /// \brief The storage data type represented by the schema
+ ///
+ /// This value will never be NANOARROW_TYPE_DICTIONARY,
NANOARROW_TYPE_EXTENSION
+ /// or any datetime type. This value represents only the type required to
+ /// interpret the buffers in the array.
+ enum ArrowType storage_data_type;
+
+ /// \brief The extension type name if it exists
+ ///
+ /// If the ARROW:extension:name key is present in schema.metadata,
+ /// extension_name.data will be non-NULL.
+ struct ArrowStringView extension_name;
+
+ /// \brief The extension type metadata if it exists
+ ///
+ /// If the ARROW:extension:metadata key is present in schema.metadata,
+ /// extension_metadata.data will be non-NULL.
+ struct ArrowStringView extension_metadata;
+
+ /// \brief The expected number of buffers in a paired ArrowArray
+ int32_t n_buffers;
+
+ /// \brief The index of the validity buffer or -1 if one does not exist
+ int32_t validity_buffer_id;
+
+ /// \brief The index of the offset buffer or -1 if one does not exist
+ int32_t offset_buffer_id;
+
+ /// \brief The index of the data buffer or -1 if one does not exist
+ int32_t data_buffer_id;
+
+ /// \brief The index of the type_ids buffer or -1 if one does not exist
+ int32_t type_id_buffer_id;
+
+ /// \brief Format fixed size parameter
+ ///
+ /// This value is set when parsing a fixed-size binary or fixed-size
+ /// list schema; this value is undefined for other types. For a
+ /// fixed-size binary schema this value is in bytes; for a fixed-size
+ /// list schema this value refers to the number of child elements for
+ /// each element of the parent.
+ int32_t fixed_size;
+
+ /// \brief Decimal bitwidth
+ ///
+ /// This value is set when parsing a decimal type schema;
+ /// this value is undefined for other types.
+ int32_t decimal_bitwidth;
+
+ /// \brief Decimal precision
+ ///
+ /// This value is set when parsing a decimal type schema;
+ /// this value is undefined for other types.
+ int32_t decimal_precision;
+
+ /// \brief Decimal scale
+ ///
+ /// This value is set when parsing a decimal type schema;
+ /// this value is undefined for other types.
+ int32_t decimal_scale;
+
+ /// \brief Format time unit parameter
+ ///
+ /// This value is set when parsing a date/time type. The value is
+ /// undefined for other types.
+ enum ArrowTimeUnit time_unit;
+
+ /// \brief Format timezone parameter
+ ///
+ /// This value is set when parsing a timestamp type and represents
+ /// the timezone format parameter. The ArrowStrintgView points to
+ /// data within the schema and the value is undefined for other types.
+ struct ArrowStringView timezone;
+
+ /// \brief Union type ids parameter
+ ///
+ /// This value is set when parsing a union type and represents
+ /// type ids parameter. The ArrowStringView points to
+ /// data within the schema and the value is undefined for other types.
+ struct ArrowStringView union_type_ids;
+};
+
+/// \brief Initialize an ArrowSchemaView
+ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
+ struct ArrowSchema* schema, struct
ArrowError* error);
+
+/// }@
+
+/// \defgroup nanoarrow-buffer Owning, growable buffers
+
+/// \brief Initialize an ArrowBuffer
+///
+/// Initialize a buffer with a NULL, zero-size buffer using the default
+/// buffer allocator.
+static inline void ArrowBufferInit(struct ArrowBuffer* buffer);
+
+/// \brief Set a newly-initialized buffer's allocator
+///
+/// Returns EINVAL if the buffer has already been allocated.
+static inline ArrowErrorCode ArrowBufferSetAllocator(
+ struct ArrowBuffer* buffer, struct ArrowBufferAllocator* allocator);
+
+/// \brief Reset an ArrowBuffer
+///
+/// Releases the buffer using the allocator's free method if
+/// the buffer's data member is non-null, sets the data member
+/// to NULL, and sets the buffer's size and capacity to 0.
+static inline void ArrowBufferReset(struct ArrowBuffer* buffer);
+
+/// \brief Move an ArrowBuffer
+///
+/// Transfers the buffer data and lifecycle management to another
+/// address and resets buffer.
+static inline void ArrowBufferMove(struct ArrowBuffer* buffer,
+ struct ArrowBuffer* buffer_out);
+
+/// \brief Grow or shrink a buffer to a given capacity
+///
+/// When shrinking the capacity of the buffer, the buffer is only reallocated
+/// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not
+/// adjust the buffer's size member except to ensure that the invariant
+/// capacity >= size remains true.
+static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
+ int64_t new_capacity_bytes,
+ char shrink_to_fit);
+
+/// \brief Ensure a buffer has at least a given additional capacity
+///
+/// Ensures that the buffer has space to append at least
+/// additional_size_bytes, overallocating when required.
+static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
+ int64_t additional_size_bytes);
+
+/// \brief Write data to buffer and increment the buffer size
+///
+/// This function does not check that buffer has the required capacity
+static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const
void* data,
+ int64_t size_bytes);
+
+/// \brief Write data to buffer and increment the buffer size
+///
+/// This function writes and ensures that the buffer has the required capacity,
+/// possibly by reallocating the buffer. Like ArrowBufferReserve, this will
+/// overallocate when reallocation is required.
+static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
+ const void* data, int64_t
size_bytes);
+
+/// \brief Write an 8-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+ int8_t value);
+
+/// \brief Write an unsigned 8-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+ uint8_t value);
+
+/// \brief Write a 16-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+ int16_t value);
+
+/// \brief Write an unsigned 16-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer*
buffer,
+ uint16_t value);
+
+/// \brief Write a 32-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+ int32_t value);
+
+/// \brief Write an unsigned 32-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer*
buffer,
+ uint32_t value);
+
+/// \brief Write a 64-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+ int64_t value);
+
+/// \brief Write an unsigned 64-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer*
buffer,
+ uint64_t value);
+
+/// \brief Write a double to a buffer
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer*
buffer,
+ double value);
+
+/// \brief Write a float to a buffer
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+ float value);
+
+/// }@
+
+/// \defgroup nanoarrow-bitmap Bitmap utilities
+
+/// \brief Extract a boolean value from a bitmap
+static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i);
+
+/// \brief Set a boolean value to a bitmap to true
+static inline void ArrowBitSet(uint8_t* bits, int64_t i);
+
+/// \brief Set a boolean value to a bitmap to false
+static inline void ArrowBitClear(uint8_t* bits, int64_t i);
+
+/// \brief Set a boolean value to a bitmap
+static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value);
+
+/// \brief Set a boolean value to a range in a bitmap
+static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t
length,
+ uint8_t bits_are_set);
+
+/// \brief Count true values in a bitmap
+static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from,
int64_t i_to);
+
+/// \brief Initialize an ArrowBitmap
+///
+/// Initialize the builder's buffer, empty its cache, and reset the size to
zero
+static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap);
+
+/// \brief Ensure a bitmap builder has at least a given additional capacity
+///
+/// Ensures that the buffer has space to append at least
+/// additional_size_bits, overallocating when required.
+static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
+ int64_t additional_size_bits);
+
+/// \brief Grow or shrink a bitmap to a given capacity
+///
+/// When shrinking the capacity of the bitmap, the bitmap is only reallocated
+/// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not
+/// adjust the buffer's size member except when shrinking new_capacity_bits
+/// to a value less than the current number of bits in the bitmap.
+static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
+ int64_t new_capacity_bits,
+ char shrink_to_fit);
+
+/// \brief Reserve space for and append zero or more of the same boolean value
to a bitmap
+static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
+ uint8_t bits_are_set, int64_t
length);
+
+/// \brief Append zero or more of the same boolean value to a bitmap
+static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
+ uint8_t bits_are_set, int64_t
length);
+
+/// \brief Append boolean values encoded as int8_t to a bitmap
+///
+/// The values must all be 0 or 1.
+static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+ const int8_t* values, int64_t
n_values);
+
+/// \brief Append boolean values encoded as int32_t to a bitmap
+///
+/// The values must all be 0 or 1.
+static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
+ const int32_t* values, int64_t
n_values);
+
+/// \brief Reset a bitmap builder
+///
+/// Releases any memory held by buffer, empties the cache, and resets the size
to zero
+static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap);
+
+/// }@
+
+/// \defgroup nanoarrow-array Array producer helpers
+/// These functions allocate, copy, and destroy ArrowArray structures
+
+/// \brief Initialize the fields of an array
+///
+/// Initializes the fields and release callback of array. Caller
+/// is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType
storage_type);
+
+/// \brief Allocate the array->children array
+///
+/// Includes the memory for each child struct ArrowArray,
+/// whose members are marked as released and may be subsequently initialized
+/// with ArrowArrayInit or moved from an existing ArrowArray.
+/// schema must have been allocated using ArrowArrayInit.
+ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t
n_children);
+
+/// \brief Allocate the array->dictionary member
+///
+/// Includes the memory for the struct ArrowArray, whose contents
+/// is marked as released and may be subsequently initialized
+/// with ArrowArrayInit or moved from an existing ArrowArray.
+/// array must have been allocated using ArrowArrayInit
+ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array);
+
+/// \brief Set the validity bitmap of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap*
bitmap);
+
+/// \brief Set a buffer of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
+ struct ArrowBuffer* buffer);
+
+/// \brief Get the validity bitmap of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray*
array);
+
+/// \brief Get a buffer of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array,
int64_t i);
+
+/// \brief Finish building an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ char shrink_to_fit);
+
+/// }@
+
+// Inline function definitions
+#include "array_inline.h"
+#include "bitmap_inline.h"
+#include "buffer_inline.h"
+#include "utils_inline.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c/vendor/nanoarrow/schema.c b/c/vendor/nanoarrow/schema.c
new file mode 100644
index 0000000..c4220d9
--- /dev/null
+++ b/c/vendor/nanoarrow/schema.c
@@ -0,0 +1,475 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+void ArrowSchemaRelease(struct ArrowSchema* schema) {
+ if (schema->format != NULL) ArrowFree((void*)schema->format);
+ if (schema->name != NULL) ArrowFree((void*)schema->name);
+ if (schema->metadata != NULL) ArrowFree((void*)schema->metadata);
+
+ // This object owns the memory for all the children, but those
+ // children may have been generated elsewhere and might have
+ // their own release() callback.
+ if (schema->children != NULL) {
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ if (schema->children[i] != NULL) {
+ if (schema->children[i]->release != NULL) {
+ schema->children[i]->release(schema->children[i]);
+ }
+
+ ArrowFree(schema->children[i]);
+ }
+ }
+
+ ArrowFree(schema->children);
+ }
+
+ // This object owns the memory for the dictionary but it
+ // may have been generated somewhere else and have its own
+ // release() callback.
+ if (schema->dictionary != NULL) {
+ if (schema->dictionary->release != NULL) {
+ schema->dictionary->release(schema->dictionary);
+ }
+
+ ArrowFree(schema->dictionary);
+ }
+
+ // private data not currently used
+ if (schema->private_data != NULL) {
+ ArrowFree(schema->private_data);
+ }
+
+ schema->release = NULL;
+}
+
+const char* ArrowSchemaFormatTemplate(enum ArrowType data_type) {
+ switch (data_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ return NULL;
+ case NANOARROW_TYPE_NA:
+ return "n";
+ case NANOARROW_TYPE_BOOL:
+ return "b";
+
+ case NANOARROW_TYPE_UINT8:
+ return "C";
+ case NANOARROW_TYPE_INT8:
+ return "c";
+ case NANOARROW_TYPE_UINT16:
+ return "S";
+ case NANOARROW_TYPE_INT16:
+ return "s";
+ case NANOARROW_TYPE_UINT32:
+ return "I";
+ case NANOARROW_TYPE_INT32:
+ return "i";
+ case NANOARROW_TYPE_UINT64:
+ return "L";
+ case NANOARROW_TYPE_INT64:
+ return "l";
+
+ case NANOARROW_TYPE_HALF_FLOAT:
+ return "e";
+ case NANOARROW_TYPE_FLOAT:
+ return "f";
+ case NANOARROW_TYPE_DOUBLE:
+ return "g";
+
+ case NANOARROW_TYPE_STRING:
+ return "u";
+ case NANOARROW_TYPE_LARGE_STRING:
+ return "U";
+ case NANOARROW_TYPE_BINARY:
+ return "z";
+ case NANOARROW_TYPE_LARGE_BINARY:
+ return "Z";
+
+ case NANOARROW_TYPE_DATE32:
+ return "tdD";
+ case NANOARROW_TYPE_DATE64:
+ return "tdm";
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ return "tiM";
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ return "tiD";
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ return "tin";
+
+ case NANOARROW_TYPE_LIST:
+ return "+l";
+ case NANOARROW_TYPE_LARGE_LIST:
+ return "+L";
+ case NANOARROW_TYPE_STRUCT:
+ return "+s";
+ case NANOARROW_TYPE_MAP:
+ return "+m";
+
+ default:
+ return NULL;
+ }
+}
+
+ArrowErrorCode ArrowSchemaInit(struct ArrowSchema* schema, enum ArrowType
data_type) {
+ schema->format = NULL;
+ schema->name = NULL;
+ schema->metadata = NULL;
+ schema->flags = ARROW_FLAG_NULLABLE;
+ schema->n_children = 0;
+ schema->children = NULL;
+ schema->dictionary = NULL;
+ schema->private_data = NULL;
+ schema->release = &ArrowSchemaRelease;
+
+ // We don't allocate the dictionary because it has to be nullptr
+ // for non-dictionary-encoded arrays.
+
+ // Set the format to a valid format string for data_type
+ const char* template_format = ArrowSchemaFormatTemplate(data_type);
+
+ // If data_type isn't recognized and not explicitly unset
+ if (template_format == NULL && data_type != NANOARROW_TYPE_UNINITIALIZED) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ int result = ArrowSchemaSetFormat(schema, template_format);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema* schema,
+ enum ArrowType data_type, int32_t
fixed_size) {
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ if (fixed_size <= 0) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ char buffer[64];
+ int n_chars;
+ switch (data_type) {
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
+ break;
+ default:
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ buffer[n_chars] = '\0';
+ result = ArrowSchemaSetFormat(schema, buffer);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ }
+
+ return result;
+}
+
+ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
+ enum ArrowType data_type, int32_t
decimal_precision,
+ int32_t decimal_scale) {
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ if (decimal_precision <= 0) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ char buffer[64];
+ int n_chars;
+ switch (data_type) {
+ case NANOARROW_TYPE_DECIMAL128:
+ n_chars =
+ snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision,
decimal_scale);
+ break;
+ case NANOARROW_TYPE_DECIMAL256:
+ n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256",
decimal_precision,
+ decimal_scale);
+ break;
+ default:
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ buffer[n_chars] = '\0';
+
+ result = ArrowSchemaSetFormat(schema, buffer);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
+ switch (time_unit) {
+ case NANOARROW_TIME_UNIT_SECOND:
+ return "s";
+ case NANOARROW_TIME_UNIT_MILLI:
+ return "m";
+ case NANOARROW_TIME_UNIT_MICRO:
+ return "u";
+ case NANOARROW_TIME_UNIT_NANO:
+ return "n";
+ default:
+ return NULL;
+ }
+}
+
+ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema* schema,
+ enum ArrowType data_type,
+ enum ArrowTimeUnit time_unit,
+ const char* timezone) {
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ const char* time_unit_str = ArrowTimeUnitString(time_unit);
+ if (time_unit_str == NULL) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ char buffer[128];
+ int n_chars;
+ switch (data_type) {
+ case NANOARROW_TYPE_TIME32:
+ case NANOARROW_TYPE_TIME64:
+ if (timezone != NULL) {
+ schema->release(schema);
+ return EINVAL;
+ }
+ n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str);
+ break;
+ case NANOARROW_TYPE_TIMESTAMP:
+ if (timezone == NULL) {
+ timezone = "";
+ }
+ n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str,
timezone);
+ break;
+ case NANOARROW_TYPE_DURATION:
+ if (timezone != NULL) {
+ schema->release(schema);
+ return EINVAL;
+ }
+ n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str);
+ break;
+ default:
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ if (n_chars >= sizeof(buffer)) {
+ schema->release(schema);
+ return ERANGE;
+ }
+
+ buffer[n_chars] = '\0';
+
+ result = ArrowSchemaSetFormat(schema, buffer);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char*
format) {
+ if (schema->format != NULL) {
+ ArrowFree((void*)schema->format);
+ }
+
+ if (format != NULL) {
+ size_t format_size = strlen(format) + 1;
+ schema->format = (const char*)ArrowMalloc(format_size);
+ if (schema->format == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy((void*)schema->format, format, format_size);
+ } else {
+ schema->format = NULL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char*
name) {
+ if (schema->name != NULL) {
+ ArrowFree((void*)schema->name);
+ }
+
+ if (name != NULL) {
+ size_t name_size = strlen(name) + 1;
+ schema->name = (const char*)ArrowMalloc(name_size);
+ if (schema->name == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy((void*)schema->name, name, name_size);
+ } else {
+ schema->name = NULL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char*
metadata) {
+ if (schema->metadata != NULL) {
+ ArrowFree((void*)schema->metadata);
+ }
+
+ if (metadata != NULL) {
+ size_t metadata_size = ArrowMetadataSizeOf(metadata);
+ schema->metadata = (const char*)ArrowMalloc(metadata_size);
+ if (schema->metadata == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy((void*)schema->metadata, metadata, metadata_size);
+ } else {
+ schema->metadata = NULL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
+ int64_t n_children) {
+ if (schema->children != NULL) {
+ return EEXIST;
+ }
+
+ if (n_children > 0) {
+ schema->children =
+ (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct
ArrowSchema*));
+
+ if (schema->children == NULL) {
+ return ENOMEM;
+ }
+
+ schema->n_children = n_children;
+
+ memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*));
+
+ for (int64_t i = 0; i < n_children; i++) {
+ schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct
ArrowSchema));
+
+ if (schema->children[i] == NULL) {
+ return ENOMEM;
+ }
+
+ schema->children[i]->release = NULL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) {
+ if (schema->dictionary != NULL) {
+ return EEXIST;
+ }
+
+ schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct
ArrowSchema));
+ if (schema->dictionary == NULL) {
+ return ENOMEM;
+ }
+
+ schema->dictionary->release = NULL;
+ return NANOARROW_OK;
+}
+
+int ArrowSchemaDeepCopy(struct ArrowSchema* schema, struct ArrowSchema*
schema_out) {
+ int result;
+ result = ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ result = ArrowSchemaSetFormat(schema_out, schema->format);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaSetName(schema_out, schema->name);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaSetMetadata(schema_out, schema->metadata);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaAllocateChildren(schema_out, schema->n_children);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+ }
+
+ if (schema->dictionary != NULL) {
+ result = ArrowSchemaAllocateDictionary(schema_out);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
diff --git a/c/vendor/nanoarrow/schema_view.c b/c/vendor/nanoarrow/schema_view.c
new file mode 100644
index 0000000..7a3ca93
--- /dev/null
+++ b/c/vendor/nanoarrow/schema_view.c
@@ -0,0 +1,679 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view,
+ enum ArrowType data_type) {
+ schema_view->data_type = data_type;
+ schema_view->storage_data_type = data_type;
+ schema_view->n_buffers = 2;
+ schema_view->validity_buffer_id = 0;
+ schema_view->data_buffer_id = 1;
+}
+
+static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
+ const char* format,
+ const char** format_end_out,
+ struct ArrowError* error) {
+ schema_view->validity_buffer_id = -1;
+ schema_view->offset_buffer_id = -1;
+ schema_view->offset_buffer_id = -1;
+ schema_view->data_buffer_id = -1;
+ schema_view->type_id_buffer_id = -1;
+ *format_end_out = format;
+
+ // needed for decimal parsing
+ const char* parse_start;
+ char* parse_end;
+
+ switch (format[0]) {
+ case 'n':
+ schema_view->data_type = NANOARROW_TYPE_NA;
+ schema_view->storage_data_type = NANOARROW_TYPE_NA;
+ schema_view->n_buffers = 0;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'b':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'c':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'C':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'S':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'i':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'I':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'l':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'L':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'e':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'f':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'g':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+
+ // decimal
+ case 'd':
+ if (format[1] != ':' || format[2] == '\0') {
+ ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following
'd'",
+ format + 3);
+ return EINVAL;
+ }
+
+ parse_start = format + 2;
+ schema_view->decimal_precision = strtol(parse_start, &parse_end, 10);
+ if (parse_end == parse_start || parse_end[0] != ',') {
+ ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following
'd:'");
+ return EINVAL;
+ }
+
+ parse_start = parse_end + 1;
+ schema_view->decimal_scale = strtol(parse_start, &parse_end, 10);
+ if (parse_end == parse_start) {
+ ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following
'd:precision,'");
+ return EINVAL;
+ } else if (parse_end[0] != ',') {
+ schema_view->decimal_bitwidth = 128;
+ } else {
+ parse_start = parse_end + 1;
+ schema_view->decimal_bitwidth = strtol(parse_start, &parse_end, 10);
+ if (parse_start == parse_end) {
+ ArrowErrorSet(error, "Expected precision following
'd:precision,scale,'");
+ return EINVAL;
+ }
+ }
+
+ *format_end_out = parse_end;
+
+ switch (schema_view->decimal_bitwidth) {
+ case 128:
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128);
+ return NANOARROW_OK;
+ case 256:
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256);
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but
found %d",
+ (int)schema_view->decimal_bitwidth);
+ return EINVAL;
+ }
+
+ // validity + data
+ case 'w':
+ schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
+ schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
+ if (format[1] != ':' || format[2] == '\0') {
+ ArrowErrorSet(error, "Expected ':<width>' following 'w'");
+ return EINVAL;
+ }
+
+ schema_view->n_buffers = 2;
+ schema_view->validity_buffer_id = 0;
+ schema_view->data_buffer_id = 1;
+ schema_view->fixed_size = strtol(format + 2, (char**)format_end_out, 10);
+ return NANOARROW_OK;
+
+ // validity + offset + data
+ case 'z':
+ schema_view->data_type = NANOARROW_TYPE_BINARY;
+ schema_view->storage_data_type = NANOARROW_TYPE_BINARY;
+ schema_view->n_buffers = 3;
+ schema_view->validity_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ schema_view->data_buffer_id = 2;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'u':
+ schema_view->data_type = NANOARROW_TYPE_STRING;
+ schema_view->storage_data_type = NANOARROW_TYPE_STRING;
+ schema_view->n_buffers = 3;
+ schema_view->validity_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ schema_view->data_buffer_id = 2;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+
+ // validity + large_offset + data
+ case 'Z':
+ schema_view->data_type = NANOARROW_TYPE_LARGE_BINARY;
+ schema_view->storage_data_type = NANOARROW_TYPE_LARGE_BINARY;
+ schema_view->n_buffers = 3;
+ schema_view->validity_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ schema_view->data_buffer_id = 2;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'U':
+ schema_view->data_type = NANOARROW_TYPE_LARGE_STRING;
+ schema_view->storage_data_type = NANOARROW_TYPE_LARGE_STRING;
+ schema_view->n_buffers = 3;
+ schema_view->validity_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ schema_view->data_buffer_id = 2;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+
+ // nested types
+ case '+':
+ switch (format[1]) {
+ // list has validity + offset or offset
+ case 'l':
+ schema_view->storage_data_type = NANOARROW_TYPE_LIST;
+ schema_view->data_type = NANOARROW_TYPE_LIST;
+ schema_view->n_buffers = 2;
+ schema_view->validity_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
+ // large list has validity + large_offset or large_offset
+ case 'L':
+ schema_view->storage_data_type = NANOARROW_TYPE_LARGE_LIST;
+ schema_view->data_type = NANOARROW_TYPE_LARGE_LIST;
+ schema_view->n_buffers = 2;
+ schema_view->validity_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
+ // just validity buffer
+ case 'w':
+ if (format[2] != ':' || format[3] == '\0') {
+ ArrowErrorSet(error, "Expected ':<width>' following '+w'");
+ return EINVAL;
+ }
+
+ schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
+ schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
+ schema_view->n_buffers = 1;
+ schema_view->validity_buffer_id = 0;
+ schema_view->fixed_size = strtol(format + 3, (char**)format_end_out,
10);
+ return NANOARROW_OK;
+ case 's':
+ schema_view->storage_data_type = NANOARROW_TYPE_STRUCT;
+ schema_view->data_type = NANOARROW_TYPE_STRUCT;
+ schema_view->n_buffers = 1;
+ schema_view->validity_buffer_id = 0;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+ case 'm':
+ schema_view->storage_data_type = NANOARROW_TYPE_MAP;
+ schema_view->data_type = NANOARROW_TYPE_MAP;
+ schema_view->n_buffers = 1;
+ schema_view->validity_buffer_id = 0;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
+ // unions
+ case 'u':
+ switch (format[2]) {
+ case 'd':
+ schema_view->storage_data_type = NANOARROW_TYPE_DENSE_UNION;
+ schema_view->data_type = NANOARROW_TYPE_DENSE_UNION;
+ schema_view->n_buffers = 2;
+ schema_view->type_id_buffer_id = 0;
+ schema_view->offset_buffer_id = 1;
+ break;
+ case 's':
+ schema_view->storage_data_type = NANOARROW_TYPE_SPARSE_UNION;
+ schema_view->data_type = NANOARROW_TYPE_SPARSE_UNION;
+ schema_view->n_buffers = 1;
+ schema_view->type_id_buffer_id = 0;
+ break;
+ default:
+ ArrowErrorSet(error,
+ "Expected union format string +us:<type_ids> or "
+ "+ud:<type_ids> but found '%s'",
+ format);
+ return EINVAL;
+ }
+
+ if (format[3] == ':') {
+ schema_view->union_type_ids.data = format + 4;
+ schema_view->union_type_ids.n_bytes = strlen(format + 4);
+ *format_end_out = format + strlen(format);
+ return NANOARROW_OK;
+ } else {
+ ArrowErrorSet(error,
+ "Expected union format string +us:<type_ids> or
+ud:<type_ids> "
+ "but found '%s'",
+ format);
+ return EINVAL;
+ }
+ }
+
+ // date/time types
+ case 't':
+ switch (format[1]) {
+ // date
+ case 'd':
+ switch (format[2]) {
+ case 'D':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_DATE32;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_DATE64;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but
found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ // time of day
+ case 't':
+ switch (format[2]) {
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIME32;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIME32;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIME64;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIME64;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(
+ error, "Expected 's', 'm', 'u', or 'n' following 'tt' but
found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ // timestamp
+ case 's':
+ switch (format[2]) {
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+ break;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+ break;
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+ break;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+ break;
+ default:
+ ArrowErrorSet(
+ error, "Expected 's', 'm', 'u', or 'n' following 'ts' but
found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ if (format[3] != ':') {
+ ArrowErrorSet(error, "Expected ':' following '%.3s' but found
'%s'", format,
+ format + 3);
+ return EINVAL;
+ }
+
+ schema_view->timezone.data = format + 4;
+ schema_view->timezone.n_bytes = strlen(format + 4);
+ *format_end_out = format + strlen(format);
+ return NANOARROW_OK;
+
+ // duration
+ case 'D':
+ switch (format[2]) {
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error,
+ "Expected 's', 'm', u', or 'n' following 'tD' but
found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ // interval
+ case 'i':
+ switch (format[2]) {
+ case 'M':
+ ArrowSchemaViewSetPrimitive(schema_view,
NANOARROW_TYPE_INTERVAL_MONTHS);
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'D':
+ ArrowSchemaViewSetPrimitive(schema_view,
NANOARROW_TYPE_INTERVAL_DAY_TIME);
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view,
+
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO);
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error,
+ "Expected 'M', 'D', or 'n' following 'ti' but
found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ default:
+ ArrowErrorSet(
+ error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but
found '%s'",
+ format + 1);
+ return EINVAL;
+ }
+
+ default:
+ ArrowErrorSet(error, "Unknown format: '%s'", format);
+ return EINVAL;
+ }
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateNChildren(
+ struct ArrowSchemaView* schema_view, int64_t n_children, struct
ArrowError* error) {
+ if (n_children != -1 && schema_view->schema->n_children != n_children) {
+ ArrowErrorSet(error, "Expected schema with %d children but found %d
children",
+ (int)n_children, (int)schema_view->schema->n_children);
+ return EINVAL;
+ }
+
+ // Don't do a full validation of children but do check that they won't
+ // segfault if inspected
+ struct ArrowSchema* child;
+ for (int64_t i = 0; i < schema_view->schema->n_children; i++) {
+ child = schema_view->schema->children[i];
+ if (child == NULL) {
+ ArrowErrorSet(error, "Expected valid schema at schema->children[%d] but
found NULL",
+ i);
+ return EINVAL;
+ } else if (child->release == NULL) {
+ ArrowErrorSet(
+ error,
+ "Expected valid schema at schema->children[%d] but found a released
schema", i);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView*
schema_view,
+ struct ArrowError* error) {
+ return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView*
schema_view,
+ struct ArrowError* error) {
+ int result = ArrowSchemaViewValidateNChildren(schema_view, 1, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ if (schema_view->schema->children[0]->n_children != 2) {
+ ArrowErrorSet(error, "Expected child of map type to have 2 children but
found %d",
+ (int)schema_view->schema->children[0]->n_children);
+ return EINVAL;
+ }
+
+ if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) {
+ ArrowErrorSet(error, "Expected format of child of map type to be '+s' but
found '%s'",
+ schema_view->schema->children[0]->format);
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateDictionary(
+ struct ArrowSchemaView* schema_view, struct ArrowError* error) {
+ // check for valid index type
+ switch (schema_view->storage_data_type) {
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ break;
+ default:
+ ArrowErrorSet(
+ error,
+ "Expected dictionary schema index type to be an integral type but
found '%s'",
+ schema_view->schema->format);
+ return EINVAL;
+ }
+
+ struct ArrowSchemaView dictionary_schema_view;
+ return ArrowSchemaViewInit(&dictionary_schema_view,
schema_view->schema->dictionary,
+ error);
+}
+
+static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView*
schema_view,
+ enum ArrowType data_type,
+ struct ArrowError* error) {
+ switch (data_type) {
+ case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_BOOL:
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256:
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_DATE32:
+ case NANOARROW_TYPE_DATE64:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ case NANOARROW_TYPE_TIMESTAMP:
+ case NANOARROW_TYPE_TIME32:
+ case NANOARROW_TYPE_TIME64:
+ case NANOARROW_TYPE_DURATION:
+ return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ if (schema_view->fixed_size <= 0) {
+ ArrowErrorSet(error, "Expected size > 0 for fixed size binary but
found size %d",
+ schema_view->fixed_size);
+ return EINVAL;
+ }
+ return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
+
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
+
+ case NANOARROW_TYPE_STRUCT:
+ return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
+
+ case NANOARROW_TYPE_SPARSE_UNION:
+ case NANOARROW_TYPE_DENSE_UNION:
+ return ArrowSchemaViewValidateUnion(schema_view, error);
+
+ case NANOARROW_TYPE_MAP:
+ return ArrowSchemaViewValidateMap(schema_view, error);
+
+ case NANOARROW_TYPE_DICTIONARY:
+ return ArrowSchemaViewValidateDictionary(schema_view, error);
+
+ default:
+ ArrowErrorSet(error, "Expected a valid enum ArrowType value but found
%d",
+ (int)schema_view->data_type);
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
+ struct ArrowSchema* schema, struct
ArrowError* error) {
+ if (schema == NULL) {
+ ArrowErrorSet(error, "Expected non-NULL schema");
+ return EINVAL;
+ }
+
+ if (schema->release == NULL) {
+ ArrowErrorSet(error, "Expected non-released schema");
+ return EINVAL;
+ }
+
+ schema_view->schema = schema;
+
+ const char* format = schema->format;
+ if (format == NULL) {
+ ArrowErrorSet(
+ error,
+ "Error parsing schema->format: Expected a null-terminated string but
found NULL");
+ return EINVAL;
+ }
+
+ int format_len = strlen(format);
+ if (format_len == 0) {
+ ArrowErrorSet(error, "Error parsing schema->format: Expected a string with
size > 0");
+ return EINVAL;
+ }
+
+ const char* format_end_out;
+ ArrowErrorCode result =
+ ArrowSchemaViewParse(schema_view, format, &format_end_out, error);
+
+ if (result != NANOARROW_OK) {
+ char child_error[1024];
+ memcpy(child_error, ArrowErrorMessage(error), 1024);
+ ArrowErrorSet(error, "Error parsing schema->format: %s", child_error);
+ return result;
+ }
+
+ if ((format + format_len) != format_end_out) {
+ ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%d
characters",
+ format, (int)(format_end_out - format), (int)(format_len));
+ return EINVAL;
+ }
+
+ if (schema->dictionary != NULL) {
+ schema_view->data_type = NANOARROW_TYPE_DICTIONARY;
+ }
+
+ result = ArrowSchemaViewValidate(schema_view,
schema_view->storage_data_type, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ if (schema_view->storage_data_type != schema_view->data_type) {
+ result = ArrowSchemaViewValidate(schema_view, schema_view->data_type,
error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+ }
+
+ schema_view->extension_name = ArrowCharView(NULL);
+ schema_view->extension_metadata = ArrowCharView(NULL);
+ ArrowMetadataGetValue(schema->metadata,
ArrowCharView("ARROW:extension:name"),
+ &schema_view->extension_name);
+ ArrowMetadataGetValue(schema->metadata,
ArrowCharView("ARROW:extension:metadata"),
+ &schema_view->extension_metadata);
+
+ return NANOARROW_OK;
+}
diff --git a/c/vendor/nanoarrow/typedefs_inline.h
b/c/vendor/nanoarrow/typedefs_inline.h
new file mode 100644
index 0000000..5aca1ec
--- /dev/null
+++ b/c/vendor/nanoarrow/typedefs_inline.h
@@ -0,0 +1,253 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_TYPEDEFS_INLINE_H_INCLUDED
+#define NANOARROW_TYPEDEFS_INLINE_H_INCLUDED
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// \defgroup nanoarrow-inline-typedef Type definitions used in inlined
implementations
+
+// Extra guard for versions of Arrow without the canonical guard
+#ifndef ARROW_FLAG_DICTIONARY_ORDERED
+
+#ifndef ARROW_C_DATA_INTERFACE
+#define ARROW_C_DATA_INTERFACE
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+ // Array type description
+ const char* format;
+ const char* name;
+ const char* metadata;
+ int64_t flags;
+ int64_t n_children;
+ struct ArrowSchema** children;
+ struct ArrowSchema* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowSchema*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+struct ArrowArray {
+ // Array data description
+ int64_t length;
+ int64_t null_count;
+ int64_t offset;
+ int64_t n_buffers;
+ int64_t n_children;
+ const void** buffers;
+ struct ArrowArray** children;
+ struct ArrowArray* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowArray*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#endif // ARROW_C_DATA_INTERFACE
+
+#ifndef ARROW_C_STREAM_INTERFACE
+#define ARROW_C_STREAM_INTERFACE
+
+struct ArrowArrayStream {
+ // Callback to get the stream type
+ // (will be the same for all arrays in the stream).
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowSchema must be released independently from the
stream.
+ int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+ // Callback to get the next array
+ // (if no error and the array is released, the stream has ended)
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowArray must be released independently from the
stream.
+ int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+ // Callback to get optional detailed error information.
+ // This must only be called if the last stream operation failed
+ // with a non-0 return code.
+ //
+ // Return value: pointer to a null-terminated character array describing
+ // the last error, or NULL if no description is available.
+ //
+ // The returned pointer is only valid until the next operation on this stream
+ // (including release).
+ const char* (*get_last_error)(struct ArrowArrayStream*);
+
+ // Release callback: release the stream's own resources.
+ // Note that arrays returned by `get_next` must be individually released.
+ void (*release)(struct ArrowArrayStream*);
+
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#endif // ARROW_C_STREAM_INTERFACE
+#endif // ARROW_FLAG_DICTIONARY_ORDERED
+
+/// \brief Return code for success.
+#define NANOARROW_OK 0
+
+/// \brief Represents an errno-compatible error code
+typedef int ArrowErrorCode;
+
+/// \brief Arrow type enumerator
+///
+/// These names are intended to map to the corresponding arrow::Type::type
+/// enumerator; however, the numeric values are specifically not equal
+/// (i.e., do not rely on numeric comparison).
+enum ArrowType {
+ NANOARROW_TYPE_UNINITIALIZED = 0,
+ NANOARROW_TYPE_NA = 1,
+ NANOARROW_TYPE_BOOL,
+ NANOARROW_TYPE_UINT8,
+ NANOARROW_TYPE_INT8,
+ NANOARROW_TYPE_UINT16,
+ NANOARROW_TYPE_INT16,
+ NANOARROW_TYPE_UINT32,
+ NANOARROW_TYPE_INT32,
+ NANOARROW_TYPE_UINT64,
+ NANOARROW_TYPE_INT64,
+ NANOARROW_TYPE_HALF_FLOAT,
+ NANOARROW_TYPE_FLOAT,
+ NANOARROW_TYPE_DOUBLE,
+ NANOARROW_TYPE_STRING,
+ NANOARROW_TYPE_BINARY,
+ NANOARROW_TYPE_FIXED_SIZE_BINARY,
+ NANOARROW_TYPE_DATE32,
+ NANOARROW_TYPE_DATE64,
+ NANOARROW_TYPE_TIMESTAMP,
+ NANOARROW_TYPE_TIME32,
+ NANOARROW_TYPE_TIME64,
+ NANOARROW_TYPE_INTERVAL_MONTHS,
+ NANOARROW_TYPE_INTERVAL_DAY_TIME,
+ NANOARROW_TYPE_DECIMAL128,
+ NANOARROW_TYPE_DECIMAL256,
+ NANOARROW_TYPE_LIST,
+ NANOARROW_TYPE_STRUCT,
+ NANOARROW_TYPE_SPARSE_UNION,
+ NANOARROW_TYPE_DENSE_UNION,
+ NANOARROW_TYPE_DICTIONARY,
+ NANOARROW_TYPE_MAP,
+ NANOARROW_TYPE_EXTENSION,
+ NANOARROW_TYPE_FIXED_SIZE_LIST,
+ NANOARROW_TYPE_DURATION,
+ NANOARROW_TYPE_LARGE_STRING,
+ NANOARROW_TYPE_LARGE_BINARY,
+ NANOARROW_TYPE_LARGE_LIST,
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+};
+
+/// \brief An non-owning view of a string
+struct ArrowStringView {
+ /// \brief A pointer to the start of the string
+ ///
+ /// If n_bytes is 0, this value may be NULL.
+ const char* data;
+
+ /// \brief The size of the string in bytes,
+ ///
+ /// (Not including the null terminator.)
+ int64_t n_bytes;
+};
+
+/// \brief Array buffer allocation and deallocation
+///
+/// Container for allocate, reallocate, and free methods that can be used
+/// to customize allocation and deallocation of buffers when constructing
+/// an ArrowArray.
+struct ArrowBufferAllocator {
+ /// \brief Allocate a buffer or return NULL if it cannot be allocated
+ uint8_t* (*allocate)(struct ArrowBufferAllocator* allocator, int64_t size);
+
+ /// \brief Reallocate a buffer or return NULL if it cannot be reallocated
+ uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+ int64_t old_size, int64_t new_size);
+
+ /// \brief Deallocate a buffer allocated by this allocator
+ void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t
size);
+
+ /// \brief Opaque data specific to the allocator
+ void* private_data;
+};
+
+/// \brief An owning mutable view of a buffer
+struct ArrowBuffer {
+ /// \brief A pointer to the start of the buffer
+ ///
+ /// If capacity_bytes is 0, this value may be NULL.
+ uint8_t* data;
+
+ /// \brief The size of the buffer in bytes
+ int64_t size_bytes;
+
+ /// \brief The capacity of the buffer in bytes
+ int64_t capacity_bytes;
+
+ /// \brief The allocator that will be used to reallocate and/or free the
buffer
+ struct ArrowBufferAllocator* allocator;
+};
+
+/// \brief An owning mutable view of a bitmap
+struct ArrowBitmap {
+ /// \brief An ArrowBuffer to hold the allocated memory
+ struct ArrowBuffer buffer;
+
+ /// \brief The number of bits that have been appended to the bitmap
+ int64_t size_bits;
+};
+
+// Used as the private data member for ArrowArrays allocated here and accessed
+// internally within inline ArrowArray* helpers.
+struct ArrowArrayPrivateData {
+ // Holder for the validity buffer (or first buffer for union types, which are
+ // the only type whose first buffer is not a valdiity buffer)
+ struct ArrowBitmap bitmap;
+
+ // Holder for additional buffers as required
+ struct ArrowBuffer buffers[2];
+
+ // The array of pointers to buffers. This must be updated after a sequence
+ // of appends to synchronize its values with the actual buffer addresses
+ // (which may have ben reallocated uring that time)
+ const void* buffer_data[3];
+
+ // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
+ enum ArrowType storage_type;
+};
+
+/// }@
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/c/validation/adbc_validation.h b/c/vendor/nanoarrow/utils_inline.h
similarity index 51%
copy from c/validation/adbc_validation.h
copy to c/vendor/nanoarrow/utils_inline.h
index 2d702ae..4c61555 100644
--- a/c/validation/adbc_validation.h
+++ b/c/vendor/nanoarrow/utils_inline.h
@@ -15,32 +15,32 @@
// specific language governing permissions and limitations
// under the License.
-#ifndef ADBC_VALIDATION_H
-#define ADBC_VALIDATION_H
+#ifndef NANOARROW_UTILS_INLINE_H_INCLUDED
+#define NANOARROW_UTILS_INLINE_H_INCLUDED
+
+#include <string.h>
+
+#include "typedefs_inline.h"
#ifdef __cplusplus
extern "C" {
#endif
-#include <adbc.h>
+static inline struct ArrowStringView ArrowCharView(const char* value) {
+ struct ArrowStringView out;
-struct AdbcValidateTestContext {
- int total;
- int passed;
- int failed;
- AdbcStatusCode (*setup_database)(struct AdbcDatabase* database,
- struct AdbcError* error);
-};
+ out.data = value;
+ if (value) {
+ out.n_bytes = (int64_t)strlen(value);
+ } else {
+ out.n_bytes = 0;
+ }
-void AdbcValidateDatabaseNewRelease(struct AdbcValidateTestContext*
adbc_context);
-void AdbcValidateConnectionNewRelease(struct AdbcValidateTestContext*
adbc_context);
-void AdbcValidateConnectionAutocommit(struct AdbcValidateTestContext*
adbc_context);
-void AdbcValidateStatementNewRelease(struct AdbcValidateTestContext*
adbc_context);
-void AdbcValidateStatementSqlExecute(struct AdbcValidateTestContext*
adbc_context);
-void AdbcValidateStatementSqlPrepare(struct AdbcValidateTestContext*
adbc_context);
+ return out;
+}
#ifdef __cplusplus
}
#endif
-#endif // ADBC_VALIDATION_H
+#endif
diff --git a/.gitattributes b/c/vendor/vendor_nanoarrow.sh
old mode 100644
new mode 100755
similarity index 59%
copy from .gitattributes
copy to c/vendor/vendor_nanoarrow.sh
index 7aad2f3..8aeee16
--- a/.gitattributes
+++ b/c/vendor/vendor_nanoarrow.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -15,5 +16,21 @@
# specific language governing permissions and limitations
# under the License.
-python/adbc_driver_manager/poetry.lock linguist-generated=true
-python/adbc_driver_manager/requirements-dev.txt linguist-generated=true
+# Download and extract the latest Nanoarrow.
+
+main() {
+ local -r repo_url="https://github.com/apache/arrow-nanoarrow"
+ local -r commit_sha=$(git ls-remote "$repo_url" HEAD | awk '{print $2}')
+ TARBALL="$(pwd)/nanoarrow.tar.gz"
+
+ echo "Fetching $commit_sha from $repo_url"
+ wget -O "$TARBALL" "$repo_url/archive/$commit_sha.tar.gz"
+ trap 'rm "$TARBALL"' EXIT
+
+ mkdir -p nanoarrow
+ # Keep only the sources
+ tar --strip-components 3 -C nanoarrow -xf "$TARBALL"
+ rm nanoarrow/*_test.cc
+}
+
+main "$@"