This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 036663205 feat(c): Implement ingestion and testing for float16, 
string_view, and binary_view (#2234)
036663205 is described below

commit 036663205ec811adf5f7bdf76edc03a4b160351b
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Oct 17 02:22:22 2024 +0000

    feat(c): Implement ingestion and testing for float16, string_view, and 
binary_view (#2234)
    
    This PR tests ingestion of half float, string view, and binary view, for
    which support was added between nanoarrow 0.5.0 and nanoarrow 0.6.0. It
    also removes some skips for various combinations of numeric types for
    which support was already available that I missed when adding support
    for those types.
---
 c/driver/postgresql/copy/writer.h            | 10 ++++++--
 c/driver/postgresql/postgres_type.h          |  4 +++
 c/driver/postgresql/postgresql_test.cc       | 18 +++++++++----
 c/driver/sqlite/sqlite_test.cc               |  8 +++++-
 c/driver/sqlite/statement_reader.c           | 11 ++++++--
 c/driver_manager/adbc_driver_manager_test.cc | 12 ++++++---
 c/validation/adbc_validation.h               | 10 ++++++++
 c/validation/adbc_validation_statement.cc    | 38 ++++++++++++++++++++++++++++
 c/validation/adbc_validation_util.cc         | 12 ++++++++-
 c/validation/adbc_validation_util.h          |  8 ++++++
 10 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/c/driver/postgresql/copy/writer.h 
b/c/driver/postgresql/copy/writer.h
index b97628f34..e88ed691c 100644
--- a/c/driver/postgresql/copy/writer.h
+++ b/c/driver/postgresql/copy/writer.h
@@ -590,8 +590,9 @@ static inline ArrowErrorCode MakeCopyFieldWriter(
       *out = T::Create<T>(array_view);
       return NANOARROW_OK;
     }
+    case NANOARROW_TYPE_UINT32:
     case NANOARROW_TYPE_INT64:
-    case NANOARROW_TYPE_UINT32: {
+    case NANOARROW_TYPE_UINT64: {
       using T = PostgresCopyNetworkEndianFieldWriter<int64_t>;
       *out = T::Create<T>(array_view);
       return NANOARROW_OK;
@@ -612,6 +613,7 @@ static inline ArrowErrorCode MakeCopyFieldWriter(
           return ADBC_STATUS_NOT_IMPLEMENTED;
       }
     }
+    case NANOARROW_TYPE_HALF_FLOAT:
     case NANOARROW_TYPE_FLOAT: {
       using T = PostgresCopyFloatFieldWriter;
       *out = T::Create<T>(array_view);
@@ -637,8 +639,12 @@ static inline ArrowErrorCode MakeCopyFieldWriter(
       return NANOARROW_OK;
     }
     case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_LARGE_BINARY:
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+    case NANOARROW_TYPE_BINARY_VIEW:
     case NANOARROW_TYPE_STRING:
-    case NANOARROW_TYPE_LARGE_STRING: {
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_STRING_VIEW: {
       using T = PostgresCopyBinaryFieldWriter;
       *out = T::Create<T>(array_view);
       return NANOARROW_OK;
diff --git a/c/driver/postgresql/postgres_type.h 
b/c/driver/postgresql/postgres_type.h
index 28891a7a9..b3cfc209f 100644
--- a/c/driver/postgresql/postgres_type.h
+++ b/c/driver/postgresql/postgres_type.h
@@ -558,17 +558,21 @@ inline ArrowErrorCode PostgresType::FromSchema(const 
PostgresTypeResolver& resol
       return resolver.Find(resolver.GetOID(PostgresTypeId::kInt4), out, error);
     case NANOARROW_TYPE_UINT32:
     case NANOARROW_TYPE_INT64:
+    case NANOARROW_TYPE_UINT64:
       return resolver.Find(resolver.GetOID(PostgresTypeId::kInt8), out, error);
+    case NANOARROW_TYPE_HALF_FLOAT:
     case NANOARROW_TYPE_FLOAT:
       return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat4), out, 
error);
     case NANOARROW_TYPE_DOUBLE:
       return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat8), out, 
error);
     case NANOARROW_TYPE_STRING:
     case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_STRING_VIEW:
       return resolver.Find(resolver.GetOID(PostgresTypeId::kText), out, error);
     case NANOARROW_TYPE_BINARY:
     case NANOARROW_TYPE_LARGE_BINARY:
     case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+    case NANOARROW_TYPE_BINARY_VIEW:
       return resolver.Find(resolver.GetOID(PostgresTypeId::kBytea), out, 
error);
     case NANOARROW_TYPE_DATE32:
     case NANOARROW_TYPE_DATE64:
diff --git a/c/driver/postgresql/postgresql_test.cc 
b/c/driver/postgresql/postgresql_test.cc
index 08742d517..be32bd893 100644
--- a/c/driver/postgresql/postgresql_test.cc
+++ b/c/driver/postgresql/postgresql_test.cc
@@ -116,11 +116,24 @@ class PostgresQuirks : public 
adbc_validation::DriverQuirks {
   ArrowType IngestSelectRoundTripType(ArrowType ingest_type) const override {
     switch (ingest_type) {
       case NANOARROW_TYPE_INT8:
+      case NANOARROW_TYPE_UINT8:
         return NANOARROW_TYPE_INT16;
+      case NANOARROW_TYPE_UINT16:
+        return NANOARROW_TYPE_INT32;
+      case NANOARROW_TYPE_UINT32:
+      case NANOARROW_TYPE_UINT64:
+        return NANOARROW_TYPE_INT64;
+      case NANOARROW_TYPE_HALF_FLOAT:
+        return NANOARROW_TYPE_FLOAT;
       case NANOARROW_TYPE_DURATION:
         return NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO;
       case NANOARROW_TYPE_LARGE_STRING:
+      case NANOARROW_TYPE_STRING_VIEW:
         return NANOARROW_TYPE_STRING;
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      case NANOARROW_TYPE_BINARY_VIEW:
+        return NANOARROW_TYPE_BINARY;
       case NANOARROW_TYPE_DECIMAL128:
       case NANOARROW_TYPE_DECIMAL256:
         return NANOARROW_TYPE_STRING;
@@ -886,11 +899,6 @@ class PostgresStatementTest : public ::testing::Test,
   void SetUp() override { ASSERT_NO_FATAL_FAILURE(SetUpTest()); }
   void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); }
 
-  void TestSqlIngestUInt8() { GTEST_SKIP() << "Not implemented"; }
-  void TestSqlIngestUInt16() { GTEST_SKIP() << "Not implemented"; }
-  void TestSqlIngestUInt32() { GTEST_SKIP() << "Not implemented"; }
-  void TestSqlIngestUInt64() { GTEST_SKIP() << "Not implemented"; }
-
   void TestSqlPrepareErrorParamCountMismatch() { GTEST_SKIP() << "Not yet 
implemented"; }
   void TestSqlPrepareGetParameterSchema() { GTEST_SKIP() << "Not yet 
implemented"; }
   void TestSqlPrepareSelectParams() { GTEST_SKIP() << "Not yet implemented"; }
diff --git a/c/driver/sqlite/sqlite_test.cc b/c/driver/sqlite/sqlite_test.cc
index e053e8725..8ceb747ac 100644
--- a/c/driver/sqlite/sqlite_test.cc
+++ b/c/driver/sqlite/sqlite_test.cc
@@ -79,10 +79,16 @@ class SqliteQuirks : public adbc_validation::DriverQuirks {
       case NANOARROW_TYPE_UINT32:
       case NANOARROW_TYPE_UINT64:
         return NANOARROW_TYPE_INT64;
+      case NANOARROW_TYPE_HALF_FLOAT:
       case NANOARROW_TYPE_FLOAT:
-      case NANOARROW_TYPE_DOUBLE:
         return NANOARROW_TYPE_DOUBLE;
       case NANOARROW_TYPE_LARGE_STRING:
+      case NANOARROW_TYPE_STRING_VIEW:
+        return NANOARROW_TYPE_STRING;
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      case NANOARROW_TYPE_BINARY_VIEW:
+        return NANOARROW_TYPE_BINARY;
       case NANOARROW_TYPE_DATE32:
       case NANOARROW_TYPE_TIMESTAMP:
         return NANOARROW_TYPE_STRING;
diff --git a/c/driver/sqlite/statement_reader.c 
b/c/driver/sqlite/statement_reader.c
index 97a134a99..f73151673 100644
--- a/c/driver/sqlite/statement_reader.c
+++ b/c/driver/sqlite/statement_reader.c
@@ -89,8 +89,11 @@ AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* 
binder,
       switch (value_view.type) {
         case NANOARROW_TYPE_STRING:
         case NANOARROW_TYPE_LARGE_STRING:
+        case NANOARROW_TYPE_STRING_VIEW:
         case NANOARROW_TYPE_BINARY:
         case NANOARROW_TYPE_LARGE_BINARY:
+        case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+        case NANOARROW_TYPE_BINARY_VIEW:
           break;
         default:
           SetError(error, "Column %d dictionary has unsupported type %s", i,
@@ -326,7 +329,9 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct 
AdbcSqliteBinder* binder, sqlite3
     } else {
       switch (binder->types[col]) {
         case NANOARROW_TYPE_BINARY:
-        case NANOARROW_TYPE_LARGE_BINARY: {
+        case NANOARROW_TYPE_LARGE_BINARY:
+        case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+        case NANOARROW_TYPE_BINARY_VIEW: {
           struct ArrowBufferView value =
               ArrowArrayViewGetBytesUnsafe(binder->batch.children[col], 
binder->next_row);
           status = sqlite3_bind_blob(stmt, col + 1, value.data.as_char, 
value.size_bytes,
@@ -359,6 +364,7 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct 
AdbcSqliteBinder* binder, sqlite3
           status = sqlite3_bind_int64(stmt, col + 1, value);
           break;
         }
+        case NANOARROW_TYPE_HALF_FLOAT:
         case NANOARROW_TYPE_FLOAT:
         case NANOARROW_TYPE_DOUBLE: {
           double value = 
ArrowArrayViewGetDoubleUnsafe(binder->batch.children[col],
@@ -367,7 +373,8 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct 
AdbcSqliteBinder* binder, sqlite3
           break;
         }
         case NANOARROW_TYPE_STRING:
-        case NANOARROW_TYPE_LARGE_STRING: {
+        case NANOARROW_TYPE_LARGE_STRING:
+        case NANOARROW_TYPE_STRING_VIEW: {
           struct ArrowBufferView value =
               ArrowArrayViewGetBytesUnsafe(binder->batch.children[col], 
binder->next_row);
           status = sqlite3_bind_text(stmt, col + 1, value.data.as_char, 
value.size_bytes,
diff --git a/c/driver_manager/adbc_driver_manager_test.cc 
b/c/driver_manager/adbc_driver_manager_test.cc
index f58b2ec32..c2342ebae 100644
--- a/c/driver_manager/adbc_driver_manager_test.cc
+++ b/c/driver_manager/adbc_driver_manager_test.cc
@@ -187,10 +187,18 @@ class SqliteQuirks : public adbc_validation::DriverQuirks 
{
       case NANOARROW_TYPE_UINT32:
       case NANOARROW_TYPE_UINT64:
         return NANOARROW_TYPE_INT64;
+      case NANOARROW_TYPE_HALF_FLOAT:
       case NANOARROW_TYPE_FLOAT:
-      case NANOARROW_TYPE_DOUBLE:
         return NANOARROW_TYPE_DOUBLE;
       case NANOARROW_TYPE_LARGE_STRING:
+      case NANOARROW_TYPE_STRING_VIEW:
+        return NANOARROW_TYPE_STRING;
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      case NANOARROW_TYPE_BINARY_VIEW:
+        return NANOARROW_TYPE_BINARY;
+      case NANOARROW_TYPE_DATE32:
+      case NANOARROW_TYPE_TIMESTAMP:
         return NANOARROW_TYPE_STRING;
       default:
         return ingest_type;
@@ -267,8 +275,6 @@ class SqliteStatementTest : public ::testing::Test,
   void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); }
 
   void TestSqlIngestUInt64() { GTEST_SKIP() << "Cannot ingest UINT64 (out of 
range)"; }
-  void TestSqlIngestBinary() { GTEST_SKIP() << "Cannot ingest BINARY (not 
implemented)"; }
-  void TestSqlIngestDate32() { GTEST_SKIP() << "Cannot ingest DATE (not 
implemented)"; }
   void TestSqlIngestTimestamp() {
     GTEST_SKIP() << "Cannot ingest TIMESTAMP (not implemented)";
   }
diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h
index 9e5ec8628..fa3c1cdcc 100644
--- a/c/validation/adbc_validation.h
+++ b/c/validation/adbc_validation.h
@@ -373,13 +373,18 @@ class StatementTest {
   void TestSqlIngestUInt64();
 
   // Floats
+  void TestSqlIngestFloat16();
   void TestSqlIngestFloat32();
   void TestSqlIngestFloat64();
 
   // Strings
   void TestSqlIngestString();
   void TestSqlIngestLargeString();
+  void TestSqlIngestStringView();
   void TestSqlIngestBinary();
+  void TestSqlIngestLargeBinary();
+  void TestSqlIngestFixedSizeBinary();
+  void TestSqlIngestBinaryView();
 
   // Temporal
   void TestSqlIngestDuration();
@@ -494,11 +499,16 @@ class StatementTest {
   TEST_F(FIXTURE, SqlIngestUInt16) { TestSqlIngestUInt16(); }                  
         \
   TEST_F(FIXTURE, SqlIngestUInt32) { TestSqlIngestUInt32(); }                  
         \
   TEST_F(FIXTURE, SqlIngestUInt64) { TestSqlIngestUInt64(); }                  
         \
+  TEST_F(FIXTURE, SqlIngestFloat16) { TestSqlIngestFloat16(); }                
         \
   TEST_F(FIXTURE, SqlIngestFloat32) { TestSqlIngestFloat32(); }                
         \
   TEST_F(FIXTURE, SqlIngestFloat64) { TestSqlIngestFloat64(); }                
         \
   TEST_F(FIXTURE, SqlIngestString) { TestSqlIngestString(); }                  
         \
   TEST_F(FIXTURE, SqlIngestLargeString) { TestSqlIngestLargeString(); }        
         \
+  TEST_F(FIXTURE, SqlIngestStringView) { TestSqlIngestStringView(); }          
         \
   TEST_F(FIXTURE, SqlIngestBinary) { TestSqlIngestBinary(); }                  
         \
+  TEST_F(FIXTURE, SqlIngestLargeBinary) { TestSqlIngestLargeBinary(); }        
         \
+  TEST_F(FIXTURE, SqlIngestFixedSizeBinary) { TestSqlIngestFixedSizeBinary(); 
}         \
+  TEST_F(FIXTURE, SqlIngestBinaryView) { TestSqlIngestBinaryView(); }          
         \
   TEST_F(FIXTURE, SqlIngestDuration) { TestSqlIngestDuration(); }              
         \
   TEST_F(FIXTURE, SqlIngestDate32) { TestSqlIngestDate32(); }                  
         \
   TEST_F(FIXTURE, SqlIngestTimestamp) { TestSqlIngestTimestamp(); }            
         \
diff --git a/c/validation/adbc_validation_statement.cc 
b/c/validation/adbc_validation_statement.cc
index 4549faf20..07ab0b22a 100644
--- a/c/validation/adbc_validation_statement.cc
+++ b/c/validation/adbc_validation_statement.cc
@@ -245,6 +245,10 @@ void StatementTest::TestSqlIngestInt64() {
   
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<int64_t>(NANOARROW_TYPE_INT64));
 }
 
+void StatementTest::TestSqlIngestFloat16() {
+  
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<float>(NANOARROW_TYPE_HALF_FLOAT));
+}
+
 void StatementTest::TestSqlIngestFloat32() {
   
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<float>(NANOARROW_TYPE_FLOAT));
 }
@@ -263,6 +267,12 @@ void StatementTest::TestSqlIngestLargeString() {
       NANOARROW_TYPE_LARGE_STRING, {std::nullopt, "", "", "1234", "例"}, 
false));
 }
 
+void StatementTest::TestSqlIngestStringView() {
+  ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::string>(
+      NANOARROW_TYPE_STRING_VIEW, {std::nullopt, "", "", "longer than 12 
bytes", "例"},
+      false));
+}
+
 void StatementTest::TestSqlIngestBinary() {
   ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::vector<std::byte>>(
       NANOARROW_TYPE_BINARY,
@@ -274,6 +284,34 @@ void StatementTest::TestSqlIngestBinary() {
       false));
 }
 
+void StatementTest::TestSqlIngestLargeBinary() {
+  ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::vector<std::byte>>(
+      NANOARROW_TYPE_LARGE_BINARY,
+      {std::nullopt, std::vector<std::byte>{},
+       std::vector<std::byte>{std::byte{0x00}, std::byte{0x01}},
+       std::vector<std::byte>{std::byte{0x01}, std::byte{0x02}, 
std::byte{0x03},
+                              std::byte{0x04}},
+       std::vector<std::byte>{std::byte{0xfe}, std::byte{0xff}}},
+      false));
+}
+
+void StatementTest::TestSqlIngestFixedSizeBinary() {
+  SchemaField field = SchemaField::FixedSize("col", 
NANOARROW_TYPE_FIXED_SIZE_BINARY, 4);
+  ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::string>(
+      field, {std::nullopt, "abcd", "efgh", "ijkl", "mnop"}, false));
+}
+
+void StatementTest::TestSqlIngestBinaryView() {
+  ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::vector<std::byte>>(
+      NANOARROW_TYPE_LARGE_BINARY,
+      {std::nullopt, std::vector<std::byte>{},
+       std::vector<std::byte>{std::byte{0x00}, std::byte{0x01}},
+       std::vector<std::byte>{std::byte{0x01}, std::byte{0x02}, 
std::byte{0x03},
+                              std::byte{0x04}},
+       std::vector<std::byte>{std::byte{0xfe}, std::byte{0xff}}},
+      false));
+}
+
 void StatementTest::TestSqlIngestDate32() {
   
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<int32_t>(NANOARROW_TYPE_DATE32));
 }
diff --git a/c/validation/adbc_validation_util.cc 
b/c/validation/adbc_validation_util.cc
index 91876dae3..7d97ad762 100644
--- a/c/validation/adbc_validation_util.cc
+++ b/c/validation/adbc_validation_util.cc
@@ -166,7 +166,16 @@ void IsAdbcStatusCode::DescribeNegationTo(std::ostream* 
os) const {
   } while (false);
 
 static int MakeSchemaColumnImpl(struct ArrowSchema* column, const SchemaField& 
field) {
-  CHECK_ERRNO(ArrowSchemaSetType(column, field.type));
+  switch (field.type) {
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      CHECK_ERRNO(ArrowSchemaSetTypeFixedSize(column, field.type, 
field.fixed_size));
+      break;
+    default:
+      CHECK_ERRNO(ArrowSchemaSetType(column, field.type));
+      break;
+  }
+
   CHECK_ERRNO(ArrowSchemaSetName(column, field.name.c_str()));
 
   if (!field.nullable) {
@@ -181,6 +190,7 @@ static int MakeSchemaColumnImpl(struct ArrowSchema* column, 
const SchemaField& f
     // SetType for a list will allocate and initialize children
     case NANOARROW_TYPE_LIST:
     case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
     case NANOARROW_TYPE_MAP: {
       size_t i = 0;
       for (const SchemaField& child : field.children) {
diff --git a/c/validation/adbc_validation_util.h 
b/c/validation/adbc_validation_util.h
index e7a4d76b2..b4f5d6f81 100644
--- a/c/validation/adbc_validation_util.h
+++ b/c/validation/adbc_validation_util.h
@@ -256,6 +256,7 @@ struct GetObjectsReader {
 struct SchemaField {
   std::string name;
   ArrowType type = NANOARROW_TYPE_UNINITIALIZED;
+  int32_t fixed_size = 0;
   bool nullable = true;
   std::vector<SchemaField> children;
 
@@ -271,6 +272,13 @@ struct SchemaField {
     out.children = std::move(children);
     return out;
   }
+
+  static SchemaField FixedSize(std::string name, ArrowType type, int32_t 
fixed_size,
+                               std::vector<SchemaField> children = {}) {
+    SchemaField out = Nested(name, type, std::move(children));
+    out.fixed_size = fixed_size;
+    return out;
+  }
 };
 
 /// \brief Make a schema from a vector of (name, type, nullable) tuples.

Reply via email to