This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch spec-1.1.0
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/spec-1.1.0 by this push:
new a0e734f3 feat(c/driver/postgresql): implement statistics API (#960)
a0e734f3 is described below
commit a0e734f308a8aa3cb020850add318189224c253f
Author: David Li <[email protected]>
AuthorDate: Thu Aug 3 16:06:54 2023 -0400
feat(c/driver/postgresql): implement statistics API (#960)
---
adbc.h | 4 +-
c/driver/postgresql/connection.cc | 364 +++++++++++++++++++++
c/driver/postgresql/connection.h | 4 +
c/driver/postgresql/postgresql.cc | 38 +++
c/driver/postgresql/postgresql_test.cc | 165 ++++++++++
c/driver/postgresql/statement.cc | 2 +-
c/driver/sqlite/sqlite.c | 14 +
c/driver_manager/adbc_driver_manager.cc | 36 ++
c/validation/adbc_validation.cc | 25 ++
c/validation/adbc_validation.h | 8 +-
go/adbc/adbc.go | 4 +-
go/adbc/drivermgr/adbc.h | 4 +-
go/adbc/drivermgr/adbc_driver_manager.cc | 36 ++
go/adbc/standard_schemas.go | 4 +-
.../org/apache/arrow/adbc/core/AdbcConnection.java | 12 +-
.../apache/arrow/adbc/core/StandardSchemas.java | 4 +-
.../driver/flightsql/FlightSqlStatementTest.java | 12 +
.../adbc/driver/flightsql/FlightSqlStatement.java | 12 +
.../adbc/driver/jdbc/derby/DerbyStatementTest.java | 5 +
.../adbc_driver_manager/dbapi.py | 2 +-
r/adbcpostgresql/bootstrap.R | 2 +
r/adbcpostgresql/src/.gitignore | 2 +
r/adbcpostgresql/src/Makevars.in | 1 +
r/adbcpostgresql/src/Makevars.ucrt | 1 +
r/adbcpostgresql/src/Makevars.win | 1 +
25 files changed, 743 insertions(+), 19 deletions(-)
diff --git a/adbc.h b/adbc.h
index 32eaf8b2..f1b9d675 100644
--- a/adbc.h
+++ b/adbc.h
@@ -1742,14 +1742,14 @@ AdbcStatusCode AdbcConnectionGetOptionDouble(struct
AdbcConnection* connection,
/// | Field Name | Field Type |
/// |--------------------------|----------------------------------|
/// | catalog_name | utf8 |
-/// | catalog_db_schemas | list<DB_SCHEMA_SCHEMA> |
+/// | catalog_db_schemas | list<DB_SCHEMA_SCHEMA> not null |
///
/// DB_SCHEMA_SCHEMA is a Struct with fields:
///
/// | Field Name | Field Type |
/// |--------------------------|----------------------------------|
/// | db_schema_name | utf8 |
-/// | db_schema_statistics | list<STATISTICS_SCHEMA> |
+/// | db_schema_statistics | list<STATISTICS_SCHEMA> not null |
///
/// STATISTICS_SCHEMA is a Struct with fields:
///
diff --git a/c/driver/postgresql/connection.cc
b/c/driver/postgresql/connection.cc
index df612ea8..3be238fd 100644
--- a/c/driver/postgresql/connection.cc
+++ b/c/driver/postgresql/connection.cc
@@ -19,6 +19,7 @@
#include <cassert>
#include <cinttypes>
+#include <cmath>
#include <cstring>
#include <memory>
#include <sstream>
@@ -47,10 +48,21 @@ static const std::unordered_map<std::string, std::string>
kPgTableTypes = {
{"table", "r"}, {"view", "v"}, {"materialized_view", "m"},
{"toast_table", "t"}, {"foreign_table", "f"}, {"partitioned_table", "p"}};
+/// \brief A single column in a single row of a result set.
struct PqRecord {
const char* data;
const int len;
const bool is_null;
+
+ // XXX: can't use optional due to R
+ std::pair<bool, double> ParseDouble() const {
+ char* end;
+ double result = std::strtod(data, &end);
+ if (errno != 0 || end == data) {
+ return std::make_pair(false, 0.0);
+ }
+ return std::make_pair(true, result);
+ }
};
// Used by PqResultHelper to provide index-based access to the records within
each
@@ -905,6 +917,358 @@ AdbcStatusCode PostgresConnection::GetOptionDouble(const
char* option, double* v
return ADBC_STATUS_NOT_FOUND;
}
+AdbcStatusCode PostgresConnectionGetStatisticsImpl(PGconn* conn, const char*
db_schema,
+ const char* table_name,
+ struct ArrowSchema* schema,
+ struct ArrowArray* array,
+ struct AdbcError* error) {
+ // Set up schema
+ auto uschema = nanoarrow::UniqueSchema();
+ {
+ ArrowSchemaInit(uschema.get());
+ CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(uschema.get(),
/*num_columns=*/2), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema->children[0],
NANOARROW_TYPE_STRING),
+ error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema->children[0],
"catalog_name"), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema->children[1],
NANOARROW_TYPE_LIST),
+ error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema->children[1],
"catalog_db_schemas"),
+ error);
+ CHECK_NA(INTERNAL,
ArrowSchemaSetTypeStruct(uschema->children[1]->children[0], 2),
+ error);
+ uschema->children[1]->flags &= ~ARROW_FLAG_NULLABLE;
+
+ struct ArrowSchema* db_schema_schema = uschema->children[1]->children[0];
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(db_schema_schema->children[0],
NANOARROW_TYPE_STRING),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetName(db_schema_schema->children[0],
"db_schema_name"), error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(db_schema_schema->children[1],
NANOARROW_TYPE_LIST),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetName(db_schema_schema->children[1],
"db_schema_statistics"),
+ error);
+ CHECK_NA(INTERNAL,
+
ArrowSchemaSetTypeStruct(db_schema_schema->children[1]->children[0], 5),
+ error);
+ db_schema_schema->children[1]->flags &= ~ARROW_FLAG_NULLABLE;
+
+ struct ArrowSchema* statistics_schema =
db_schema_schema->children[1]->children[0];
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(statistics_schema->children[0],
NANOARROW_TYPE_STRING),
+ error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(statistics_schema->children[0],
"table_name"),
+ error);
+ statistics_schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(statistics_schema->children[1],
NANOARROW_TYPE_STRING),
+ error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(statistics_schema->children[1],
"column_name"),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(statistics_schema->children[2],
NANOARROW_TYPE_INT16),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetName(statistics_schema->children[2],
"statistic_key"), error);
+ statistics_schema->children[2]->flags &= ~ARROW_FLAG_NULLABLE;
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetTypeUnion(statistics_schema->children[3],
+ NANOARROW_TYPE_DENSE_UNION, 4),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetName(statistics_schema->children[3],
"statistic_value"),
+ error);
+ statistics_schema->children[3]->flags &= ~ARROW_FLAG_NULLABLE;
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(statistics_schema->children[4],
NANOARROW_TYPE_BOOL),
+ error);
+ CHECK_NA(
+ INTERNAL,
+ ArrowSchemaSetName(statistics_schema->children[4],
"statistic_is_approximate"),
+ error);
+ statistics_schema->children[4]->flags &= ~ARROW_FLAG_NULLABLE;
+
+ struct ArrowSchema* value_schema = statistics_schema->children[3];
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(value_schema->children[0],
NANOARROW_TYPE_INT64), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[0], "int64"),
error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(value_schema->children[1],
NANOARROW_TYPE_UINT64), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[1],
"uint64"), error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(value_schema->children[2],
NANOARROW_TYPE_DOUBLE), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[2],
"float64"), error);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(value_schema->children[3],
NANOARROW_TYPE_BINARY), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(value_schema->children[3],
"binary"), error);
+ }
+
+ // Set up builders
+ struct ArrowError na_error = {0};
+ CHECK_NA_DETAIL(INTERNAL, ArrowArrayInitFromSchema(array, uschema.get(),
&na_error),
+ &na_error, error);
+ CHECK_NA(INTERNAL, ArrowArrayStartAppending(array), error);
+
+ struct ArrowArray* catalog_name_col = array->children[0];
+ struct ArrowArray* catalog_db_schemas_col = array->children[1];
+ struct ArrowArray* catalog_db_schemas_items =
catalog_db_schemas_col->children[0];
+ struct ArrowArray* db_schema_name_col =
catalog_db_schemas_items->children[0];
+ struct ArrowArray* db_schema_statistics_col =
catalog_db_schemas_items->children[1];
+ struct ArrowArray* db_schema_statistics_items =
db_schema_statistics_col->children[0];
+ struct ArrowArray* statistics_table_name_col =
db_schema_statistics_items->children[0];
+ struct ArrowArray* statistics_column_name_col =
db_schema_statistics_items->children[1];
+ struct ArrowArray* statistics_key_col =
db_schema_statistics_items->children[2];
+ struct ArrowArray* statistics_value_col =
db_schema_statistics_items->children[3];
+ struct ArrowArray* statistics_is_approximate_col =
+ db_schema_statistics_items->children[4];
+ // struct ArrowArray* value_int64_col = statistics_value_col->children[0];
+ // struct ArrowArray* value_uint64_col = statistics_value_col->children[1];
+ struct ArrowArray* value_float64_col = statistics_value_col->children[2];
+ // struct ArrowArray* value_binary_col = statistics_value_col->children[3];
+
+ // Query (could probably be massively improved)
+ std::string query = R"(
+ WITH
+ class AS (
+ SELECT nspname, relname, reltuples
+ FROM pg_namespace
+ INNER JOIN pg_class ON pg_class.relnamespace = pg_namespace.oid
+ )
+ SELECT tablename, attname, null_frac, avg_width, n_distinct, reltuples
+ FROM pg_stats
+ INNER JOIN class ON pg_stats.schemaname = class.nspname AND
pg_stats.tablename = class.relname
+ WHERE pg_stats.schemaname = $1 AND tablename LIKE $2
+ ORDER BY tablename
+)";
+
+ CHECK_NA(INTERNAL, ArrowArrayAppendString(catalog_name_col,
ArrowCharView(PQdb(conn))),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendString(db_schema_name_col,
ArrowCharView(db_schema)),
+ error);
+
+ constexpr int8_t kStatsVariantFloat64 = 2;
+
+ std::string prev_table;
+
+ {
+ PqResultHelper result_helper{
+ conn, query, {db_schema, table_name ? table_name : "%"}, error};
+ RAISE_ADBC(result_helper.Prepare());
+ RAISE_ADBC(result_helper.Execute());
+
+ for (PqResultRow row : result_helper) {
+ auto reltuples = row[5].ParseDouble();
+ if (!reltuples.first) {
+ SetError(error, "[libpq] Invalid double value in reltuples: '%s'",
row[5].data);
+ return ADBC_STATUS_INTERNAL;
+ }
+
+ if (std::strcmp(prev_table.c_str(), row[0].data) != 0) {
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_table_name_col,
+ ArrowStringView{row[0].data,
row[0].len}),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendNull(statistics_column_name_col,
1), error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendInt(statistics_key_col,
ADBC_STATISTIC_ROW_COUNT_KEY),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendDouble(value_float64_col,
reltuples.second),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayFinishUnionElement(statistics_value_col,
kStatsVariantFloat64),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col,
1), error);
+ CHECK_NA(INTERNAL,
ArrowArrayFinishElement(db_schema_statistics_items), error);
+ prev_table = std::string(row[0].data, row[0].len);
+ }
+
+ auto null_frac = row[2].ParseDouble();
+ if (!null_frac.first) {
+ SetError(error, "[libpq] Invalid double value in null_frac: '%s'",
row[2].data);
+ return ADBC_STATUS_INTERNAL;
+ }
+
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_table_name_col,
+ ArrowStringView{row[0].data,
row[0].len}),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_column_name_col,
+ ArrowStringView{row[1].data,
row[1].len}),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendInt(statistics_key_col,
ADBC_STATISTIC_NULL_COUNT_KEY),
+ error);
+ CHECK_NA(
+ INTERNAL,
+ ArrowArrayAppendDouble(value_float64_col, null_frac.second *
reltuples.second),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayFinishUnionElement(statistics_value_col,
kStatsVariantFloat64),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col,
1), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items),
error);
+
+ auto average_byte_width = row[3].ParseDouble();
+ if (!average_byte_width.first) {
+ SetError(error, "[libpq] Invalid double value in avg_width: '%s'",
row[3].data);
+ return ADBC_STATUS_INTERNAL;
+ }
+
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_table_name_col,
+ ArrowStringView{row[0].data,
row[0].len}),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_column_name_col,
+ ArrowStringView{row[1].data,
row[1].len}),
+ error);
+ CHECK_NA(
+ INTERNAL,
+ ArrowArrayAppendInt(statistics_key_col,
ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_KEY),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendDouble(value_float64_col,
average_byte_width.second),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayFinishUnionElement(statistics_value_col,
kStatsVariantFloat64),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col,
1), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items),
error);
+
+ auto n_distinct = row[4].ParseDouble();
+ if (!n_distinct.first) {
+ SetError(error, "[libpq] Invalid double value in avg_width: '%s'",
row[4].data);
+ return ADBC_STATUS_INTERNAL;
+ }
+
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_table_name_col,
+ ArrowStringView{row[0].data,
row[0].len}),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendString(statistics_column_name_col,
+ ArrowStringView{row[1].data,
row[1].len}),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayAppendInt(statistics_key_col,
ADBC_STATISTIC_DISTINCT_COUNT_KEY),
+ error);
+ // > If greater than zero, the estimated number of distinct values in
+ // > the column. If less than zero, the negative of the number of
+ // > distinct values divided by the number of rows.
+ // https://www.postgresql.org/docs/current/view-pg-stats.html
+ CHECK_NA(
+ INTERNAL,
+ ArrowArrayAppendDouble(value_float64_col,
+ n_distinct.second > 0
+ ? n_distinct.second
+ : (std::fabs(n_distinct.second) *
reltuples.second)),
+ error);
+ CHECK_NA(INTERNAL,
+ ArrowArrayFinishUnionElement(statistics_value_col,
kStatsVariantFloat64),
+ error);
+ CHECK_NA(INTERNAL, ArrowArrayAppendInt(statistics_is_approximate_col,
1), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_items),
error);
+ }
+ }
+
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(db_schema_statistics_col), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(catalog_db_schemas_items), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(catalog_db_schemas_col), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishElement(array), error);
+
+ CHECK_NA_DETAIL(INTERNAL, ArrowArrayFinishBuildingDefault(array, &na_error),
&na_error,
+ error);
+ uschema.move(schema);
+ return ADBC_STATUS_OK;
+}
+
+AdbcStatusCode PostgresConnection::GetStatistics(const char* catalog,
+ const char* db_schema,
+ const char* table_name, bool
approximate,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ // Simplify our jobs here
+ if (!approximate) {
+ SetError(error, "[libpq] Exact statistics are not implemented");
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+ } else if (!db_schema) {
+ SetError(error, "[libpq] Must request statistics for a single schema");
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+ } else if (catalog && std::strcmp(catalog, PQdb(conn_)) != 0) {
+ SetError(error, "[libpq] Can only request statistics for current catalog");
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+ }
+
+ struct ArrowSchema schema;
+ std::memset(&schema, 0, sizeof(schema));
+ struct ArrowArray array;
+ std::memset(&array, 0, sizeof(array));
+
+ AdbcStatusCode status = PostgresConnectionGetStatisticsImpl(
+ conn_, db_schema, table_name, &schema, &array, error);
+ if (status != ADBC_STATUS_OK) {
+ if (schema.release) schema.release(&schema);
+ if (array.release) array.release(&array);
+ return status;
+ }
+
+ return BatchToArrayStream(&array, &schema, out, error);
+}
+
+AdbcStatusCode PostgresConnectionGetStatisticNamesImpl(struct ArrowSchema*
schema,
+ struct ArrowArray*
array,
+ struct AdbcError*
error) {
+ auto uschema = nanoarrow::UniqueSchema();
+ ArrowSchemaInit(uschema.get());
+
+ CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema.get(), NANOARROW_TYPE_STRUCT),
error);
+ CHECK_NA(INTERNAL, ArrowSchemaAllocateChildren(uschema.get(),
/*num_columns=*/2),
+ error);
+
+ ArrowSchemaInit(uschema.get()->children[0]);
+ CHECK_NA(INTERNAL,
+ ArrowSchemaSetType(uschema.get()->children[0],
NANOARROW_TYPE_STRING), error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema.get()->children[0],
"statistic_name"),
+ error);
+ uschema.get()->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+
+ ArrowSchemaInit(uschema.get()->children[1]);
+ CHECK_NA(INTERNAL, ArrowSchemaSetType(uschema.get()->children[1],
NANOARROW_TYPE_INT16),
+ error);
+ CHECK_NA(INTERNAL, ArrowSchemaSetName(uschema.get()->children[1],
"statistic_key"),
+ error);
+ uschema.get()->children[1]->flags &= ~ARROW_FLAG_NULLABLE;
+
+ CHECK_NA(INTERNAL, ArrowArrayInitFromSchema(array, uschema.get(), NULL),
error);
+ CHECK_NA(INTERNAL, ArrowArrayStartAppending(array), error);
+ CHECK_NA(INTERNAL, ArrowArrayFinishBuildingDefault(array, NULL), error);
+
+ uschema.move(schema);
+ return ADBC_STATUS_OK;
+}
+
+AdbcStatusCode PostgresConnection::GetStatisticNames(struct ArrowArrayStream*
out,
+ struct AdbcError* error) {
+ // We don't support any extended statistics, just return an empty stream
+ struct ArrowSchema schema;
+ std::memset(&schema, 0, sizeof(schema));
+ struct ArrowArray array;
+ std::memset(&array, 0, sizeof(array));
+
+ AdbcStatusCode status = PostgresConnectionGetStatisticNamesImpl(&schema,
&array, error);
+ if (status != ADBC_STATUS_OK) {
+ if (schema.release) schema.release(&schema);
+ if (array.release) array.release(&array);
+ return status;
+ }
+ return BatchToArrayStream(&array, &schema, out, error);
+
+ return ADBC_STATUS_OK;
+}
+
AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
const char* db_schema,
const char* table_name,
diff --git a/c/driver/postgresql/connection.h b/c/driver/postgresql/connection.h
index a9900564..cb7647ed 100644
--- a/c/driver/postgresql/connection.h
+++ b/c/driver/postgresql/connection.h
@@ -50,6 +50,10 @@ class PostgresConnection {
struct AdbcError* error);
AdbcStatusCode GetOptionInt(const char* option, int64_t* value,
struct AdbcError* error);
+ AdbcStatusCode GetStatistics(const char* catalog, const char* db_schema,
+ const char* table_name, bool approximate,
+ struct ArrowArrayStream* out, struct AdbcError*
error);
+ AdbcStatusCode GetStatisticNames(struct ArrowArrayStream* out, struct
AdbcError* error);
AdbcStatusCode GetTableSchema(const char* catalog, const char* db_schema,
const char* table_name, struct ArrowSchema*
schema,
struct AdbcError* error);
diff --git a/c/driver/postgresql/postgresql.cc
b/c/driver/postgresql/postgresql.cc
index d88a0b28..0c51f8b8 100644
--- a/c/driver/postgresql/postgresql.cc
+++ b/c/driver/postgresql/postgresql.cc
@@ -291,6 +291,27 @@ AdbcStatusCode PostgresConnectionGetOptionInt(struct
AdbcConnection* connection,
return (*ptr)->GetOptionInt(key, value, error);
}
+AdbcStatusCode PostgresConnectionGetStatistics(struct AdbcConnection*
connection,
+ const char* catalog, const
char* db_schema,
+ const char* table_name, char
approximate,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ if (!connection->private_data) return ADBC_STATUS_INVALID_STATE;
+ auto ptr =
+
reinterpret_cast<std::shared_ptr<PostgresConnection>*>(connection->private_data);
+ return (*ptr)->GetStatistics(catalog, db_schema, table_name, approximate ==
1, out,
+ error);
+}
+
+AdbcStatusCode PostgresConnectionGetStatisticNames(struct AdbcConnection*
connection,
+ struct ArrowArrayStream*
out,
+ struct AdbcError* error) {
+ if (!connection->private_data) return ADBC_STATUS_INVALID_STATE;
+ auto ptr =
+
reinterpret_cast<std::shared_ptr<PostgresConnection>*>(connection->private_data);
+ return (*ptr)->GetStatisticNames(out, error);
+}
+
AdbcStatusCode PostgresConnectionGetTableSchema(
struct AdbcConnection* connection, const char* catalog, const char*
db_schema,
const char* table_name, struct ArrowSchema* schema, struct AdbcError*
error) {
@@ -443,6 +464,21 @@ AdbcStatusCode AdbcConnectionGetOptionDouble(struct
AdbcConnection* connection,
return PostgresConnectionGetOptionDouble(connection, key, value, error);
}
+AdbcStatusCode AdbcConnectionGetStatistics(struct AdbcConnection* connection,
+ const char* catalog, const char*
db_schema,
+ const char* table_name, char
approximate,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ return PostgresConnectionGetStatistics(connection, catalog, db_schema,
table_name,
+ approximate, out, error);
+}
+
+AdbcStatusCode AdbcConnectionGetStatisticNames(struct AdbcConnection*
connection,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ return PostgresConnectionGetStatisticNames(connection, out, error);
+}
+
AdbcStatusCode AdbcConnectionGetTableSchema(struct AdbcConnection* connection,
const char* catalog, const char*
db_schema,
const char* table_name,
@@ -826,6 +862,8 @@ AdbcStatusCode PostgresqlDriverInit(int version, void*
raw_driver,
driver->ConnectionGetOptionBytes = PostgresConnectionGetOptionBytes;
driver->ConnectionGetOptionDouble = PostgresConnectionGetOptionDouble;
driver->ConnectionGetOptionInt = PostgresConnectionGetOptionInt;
+ driver->ConnectionGetStatistics = PostgresConnectionGetStatistics;
+ driver->ConnectionGetStatisticNames = PostgresConnectionGetStatisticNames;
driver->ConnectionSetOptionBytes = PostgresConnectionSetOptionBytes;
driver->ConnectionSetOptionDouble = PostgresConnectionSetOptionDouble;
driver->ConnectionSetOptionInt = PostgresConnectionSetOptionInt;
diff --git a/c/driver/postgresql/postgresql_test.cc
b/c/driver/postgresql/postgresql_test.cc
index 07fdfb2e..96fc2e44 100644
--- a/c/driver/postgresql/postgresql_test.cc
+++ b/c/driver/postgresql/postgresql_test.cc
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <cmath>
#include <cstdlib>
#include <cstring>
#include <limits>
@@ -127,6 +128,7 @@ class PostgresQuirks : public adbc_validation::DriverQuirks
{
}
bool supports_metadata_current_catalog() const override { return true; }
bool supports_metadata_current_db_schema() const override { return true; }
+ bool supports_statistics() const override { return true; }
};
class PostgresDatabaseTest : public ::testing::Test,
@@ -637,6 +639,169 @@ TEST_F(PostgresConnectionTest,
MetadataSetCurrentDbSchema) {
ASSERT_THAT(AdbcStatementRelease(&statement.value, &error),
IsOkStatus(&error));
}
+TEST_F(PostgresConnectionTest, MetadataGetStatistics) {
+ if (!quirks()->supports_statistics()) {
+ GTEST_SKIP();
+ }
+
+ ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error));
+ ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error),
IsOkStatus(&error));
+
+ // Create sample table
+ {
+ adbc_validation::Handle<struct AdbcStatement> statement;
+ ASSERT_THAT(AdbcStatementNew(&connection, &statement.value, &error),
+ IsOkStatus(&error));
+
+ ASSERT_THAT(AdbcStatementSetSqlQuery(&statement.value,
+ "DROP TABLE IF EXISTS statstable",
&error),
+ IsOkStatus(&error));
+ ASSERT_THAT(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr,
&error),
+ IsOkStatus(&error));
+
+ ASSERT_THAT(
+ AdbcStatementSetSqlQuery(&statement.value,
+ "CREATE TABLE statstable (ints INT, strs
TEXT)", &error),
+ IsOkStatus(&error));
+ ASSERT_THAT(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr,
&error),
+ IsOkStatus(&error));
+
+ ASSERT_THAT(
+ AdbcStatementSetSqlQuery(
+ &statement.value,
+ "INSERT INTO statstable VALUES (1, 'a'), (NULL, 'bcd'), (-5,
NULL)", &error),
+ IsOkStatus(&error));
+ ASSERT_THAT(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr,
&error),
+ IsOkStatus(&error));
+
+ ASSERT_THAT(AdbcStatementSetSqlQuery(&statement.value, "ANALYZE
statstable", &error),
+ IsOkStatus(&error));
+ ASSERT_THAT(AdbcStatementExecuteQuery(&statement.value, nullptr, nullptr,
&error),
+ IsOkStatus(&error));
+
+ ASSERT_THAT(AdbcStatementRelease(&statement.value, &error),
IsOkStatus(&error));
+ }
+
+ adbc_validation::StreamReader reader;
+ ASSERT_THAT(
+ AdbcConnectionGetStatistics(&connection, nullptr,
quirks()->db_schema().c_str(),
+ "statstable", 1, &reader.stream.value,
&error),
+ IsOkStatus(&error));
+ ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
+
+ ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema(
+ &reader.schema.value, {
+ {"catalog_name", NANOARROW_TYPE_STRING, true},
+ {"catalog_db_schemas", NANOARROW_TYPE_LIST,
false},
+ }));
+
+ ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema(
+ reader.schema->children[1]->children[0],
+ {
+ {"db_schema_name", NANOARROW_TYPE_STRING, true},
+ {"db_schema_statistics", NANOARROW_TYPE_LIST, false},
+ }));
+
+ ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema(
+ reader.schema->children[1]->children[0]->children[1]->children[0],
+ {
+ {"table_name", NANOARROW_TYPE_STRING, false},
+ {"column_name", NANOARROW_TYPE_STRING, true},
+ {"statistic_key", NANOARROW_TYPE_INT16, false},
+ {"statistic_value", NANOARROW_TYPE_DENSE_UNION, false},
+ {"statistic_is_approximate", NANOARROW_TYPE_BOOL, false},
+ }));
+
+ ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema(
+
reader.schema->children[1]->children[0]->children[1]->children[0]->children[3],
+ {
+ {"int64", NANOARROW_TYPE_INT64, true},
+ {"uint64", NANOARROW_TYPE_UINT64, true},
+ {"float64", NANOARROW_TYPE_DOUBLE, true},
+ {"binary", NANOARROW_TYPE_BINARY, true},
+ }));
+
+ std::vector<std::tuple<std::optional<std::string>, int16_t, int64_t>> seen;
+ while (true) {
+ ASSERT_NO_FATAL_FAILURE(reader.Next());
+ if (!reader.array->release) break;
+
+ for (int64_t catalog_index = 0; catalog_index < reader.array->length;
+ catalog_index++) {
+ struct ArrowStringView catalog_name =
+ ArrowArrayViewGetStringUnsafe(reader.array_view->children[0],
catalog_index);
+ ASSERT_EQ(quirks()->catalog(),
+ std::string_view(catalog_name.data,
+
static_cast<int64_t>(catalog_name.size_bytes)));
+
+ struct ArrowArrayView* catalog_db_schemas =
reader.array_view->children[1];
+ struct ArrowArrayView* schema_stats =
catalog_db_schemas->children[0]->children[1];
+ struct ArrowArrayView* stats =
+ catalog_db_schemas->children[0]->children[1]->children[0];
+ for (int64_t schema_index =
+ ArrowArrayViewListChildOffset(catalog_db_schemas,
catalog_index);
+ schema_index <
+ ArrowArrayViewListChildOffset(catalog_db_schemas, catalog_index +
1);
+ schema_index++) {
+ struct ArrowStringView schema_name = ArrowArrayViewGetStringUnsafe(
+ catalog_db_schemas->children[0]->children[0], schema_index);
+ ASSERT_EQ(quirks()->db_schema(),
+ std::string_view(schema_name.data,
+
static_cast<int64_t>(schema_name.size_bytes)));
+
+ for (int64_t stat_index =
+ ArrowArrayViewListChildOffset(schema_stats, schema_index);
+ stat_index < ArrowArrayViewListChildOffset(schema_stats,
schema_index + 1);
+ stat_index++) {
+ struct ArrowStringView table_name =
+ ArrowArrayViewGetStringUnsafe(stats->children[0], stat_index);
+ ASSERT_EQ("statstable",
+ std::string_view(table_name.data,
+
static_cast<int64_t>(table_name.size_bytes)));
+ std::optional<std::string> column_name;
+ if (!ArrowArrayViewIsNull(stats->children[1], stat_index)) {
+ struct ArrowStringView value =
+ ArrowArrayViewGetStringUnsafe(stats->children[1], stat_index);
+ column_name = std::string(value.data, value.size_bytes);
+ }
+ ASSERT_TRUE(ArrowArrayViewGetIntUnsafe(stats->children[4],
stat_index));
+
+ const int16_t stat_key = static_cast<int16_t>(
+ ArrowArrayViewGetIntUnsafe(stats->children[2], stat_index));
+ const int32_t offset =
+ stats->children[3]->buffer_views[1].data.as_int32[stat_index];
+ int64_t stat_value;
+ switch (stat_key) {
+ case ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_KEY:
+ case ADBC_STATISTIC_DISTINCT_COUNT_KEY:
+ case ADBC_STATISTIC_NULL_COUNT_KEY:
+ case ADBC_STATISTIC_ROW_COUNT_KEY:
+ stat_value = static_cast<int64_t>(
+ std::round(100 * ArrowArrayViewGetDoubleUnsafe(
+ stats->children[3]->children[2],
offset)));
+ break;
+ default:
+ continue;
+ }
+ seen.emplace_back(std::move(column_name), stat_key, stat_value);
+ }
+ }
+ }
+ }
+
+ ASSERT_THAT(seen,
+ ::testing::UnorderedElementsAreArray(
+ std::vector<std::tuple<std::optional<std::string>, int16_t,
int64_t>>{
+ {"ints", ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_KEY, 400},
+ {"strs", ADBC_STATISTIC_AVERAGE_BYTE_WIDTH_KEY, 300},
+ {"ints", ADBC_STATISTIC_NULL_COUNT_KEY, 100},
+ {"strs", ADBC_STATISTIC_NULL_COUNT_KEY, 100},
+ {"ints", ADBC_STATISTIC_DISTINCT_COUNT_KEY, 200},
+ {"strs", ADBC_STATISTIC_DISTINCT_COUNT_KEY, 200},
+ {std::nullopt, ADBC_STATISTIC_ROW_COUNT_KEY, 300},
+ }));
+}
+
ADBCV_TEST_CONNECTION(PostgresConnectionTest)
class PostgresStatementTest : public ::testing::Test,
diff --git a/c/driver/postgresql/statement.cc b/c/driver/postgresql/statement.cc
index f95ee20d..3e6f92e2 100644
--- a/c/driver/postgresql/statement.cc
+++ b/c/driver/postgresql/statement.cc
@@ -28,11 +28,11 @@
#include <adbc.h>
#include <libpq-fe.h>
-#include <postgresql/error.h>
#include <nanoarrow/nanoarrow.hpp>
#include "common/utils.h"
#include "connection.h"
+#include "error.h"
#include "postgres_copy_reader.h"
#include "postgres_type.h"
#include "postgres_util.h"
diff --git a/c/driver/sqlite/sqlite.c b/c/driver/sqlite/sqlite.c
index efbcd03c..12538b87 100644
--- a/c/driver/sqlite/sqlite.c
+++ b/c/driver/sqlite/sqlite.c
@@ -1711,6 +1711,20 @@ AdbcStatusCode AdbcConnectionGetObjects(struct
AdbcConnection* connection, int d
table_type, column_name, out, error);
}
+AdbcStatusCode AdbcConnectionGetStatistics(struct AdbcConnection* connection,
+ const char* catalog, const char*
db_schema,
+ const char* table_name, char
approximate,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+}
+
+AdbcStatusCode AdbcConnectionGetStatisticNames(struct AdbcConnection*
connection,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+}
+
AdbcStatusCode AdbcConnectionGetTableSchema(struct AdbcConnection* connection,
const char* catalog, const char*
db_schema,
const char* table_name,
diff --git a/c/driver_manager/adbc_driver_manager.cc
b/c/driver_manager/adbc_driver_manager.cc
index 370b3eea..d5579303 100644
--- a/c/driver_manager/adbc_driver_manager.cc
+++ b/c/driver_manager/adbc_driver_manager.cc
@@ -221,6 +221,17 @@ AdbcStatusCode ConnectionGetOptionDouble(struct
AdbcConnection* connection,
return ADBC_STATUS_NOT_FOUND;
}
+AdbcStatusCode ConnectionGetStatistics(struct AdbcConnection*, const char*,
const char*,
+ const char*, char, struct
ArrowArrayStream*,
+ struct AdbcError*) {
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+}
+
+AdbcStatusCode ConnectionGetStatisticNames(struct AdbcConnection*,
+ struct ArrowArrayStream*, struct
AdbcError*) {
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+}
+
AdbcStatusCode ConnectionGetTableSchema(struct AdbcConnection*, const char*,
const char*,
const char*, struct ArrowSchema*,
struct AdbcError* error) {
@@ -802,6 +813,29 @@ AdbcStatusCode AdbcConnectionGetOptionDouble(struct
AdbcConnection* connection,
error);
}
+AdbcStatusCode AdbcConnectionGetStatistics(struct AdbcConnection* connection,
+ const char* catalog, const char*
db_schema,
+ const char* table_name, char
approximate,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ if (!connection->private_driver) {
+ return ADBC_STATUS_INVALID_STATE;
+ }
+ INIT_ERROR(error, connection);
+ return connection->private_driver->ConnectionGetStatistics(
+ connection, catalog, db_schema, table_name, approximate == 1, out,
error);
+}
+
+AdbcStatusCode AdbcConnectionGetStatisticNames(struct AdbcConnection*
connection,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ if (!connection->private_driver) {
+ return ADBC_STATUS_INVALID_STATE;
+ }
+ INIT_ERROR(error, connection);
+ return connection->private_driver->ConnectionGetStatisticNames(connection,
out, error);
+}
+
AdbcStatusCode AdbcConnectionGetTableSchema(struct AdbcConnection* connection,
const char* catalog, const char*
db_schema,
const char* table_name,
@@ -1464,6 +1498,8 @@ AdbcStatusCode
AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int vers
FILL_DEFAULT(driver, ConnectionGetOptionBytes);
FILL_DEFAULT(driver, ConnectionGetOptionDouble);
FILL_DEFAULT(driver, ConnectionGetOptionInt);
+ FILL_DEFAULT(driver, ConnectionGetStatistics);
+ FILL_DEFAULT(driver, ConnectionGetStatisticNames);
FILL_DEFAULT(driver, ConnectionSetOptionBytes);
FILL_DEFAULT(driver, ConnectionSetOptionDouble);
FILL_DEFAULT(driver, ConnectionSetOptionInt);
diff --git a/c/validation/adbc_validation.cc b/c/validation/adbc_validation.cc
index 05014b92..b7fd00f2 100644
--- a/c/validation/adbc_validation.cc
+++ b/c/validation/adbc_validation.cc
@@ -1017,6 +1017,31 @@ void ConnectionTest::TestMetadataGetObjectsCancel() {
}
}
+void ConnectionTest::TestMetadataGetStatisticNames() {
+ if (!quirks()->supports_statistics()) {
+ GTEST_SKIP();
+ }
+
+ ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error));
+ ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error),
IsOkStatus(&error));
+
+ StreamReader reader;
+ ASSERT_THAT(AdbcConnectionGetStatisticNames(&connection,
&reader.stream.value, &error),
+ IsOkStatus(&error));
+ ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
+
+ ASSERT_NO_FATAL_FAILURE(CompareSchema(
+ &reader.schema.value, {
+ {"statistic_name", NANOARROW_TYPE_STRING,
NOT_NULL},
+ {"statistic_key", NANOARROW_TYPE_INT16,
NOT_NULL},
+ }));
+
+ while (true) {
+ ASSERT_NO_FATAL_FAILURE(reader.Next());
+ if (!reader.array->release) break;
+ }
+}
+
//------------------------------------------------------------
// Tests of AdbcStatement
diff --git a/c/validation/adbc_validation.h b/c/validation/adbc_validation.h
index acca5be4..c11f4763 100644
--- a/c/validation/adbc_validation.h
+++ b/c/validation/adbc_validation.h
@@ -136,6 +136,9 @@ class DriverQuirks {
/// \brief Whether ExecuteQuery sets rows_affected appropriately
virtual bool supports_rows_affected() const { return true; }
+ /// \brief Whether we can get statistics
+ virtual bool supports_statistics() const { return false; }
+
/// \brief Default catalog to use for tests
virtual std::string catalog() const { return ""; }
@@ -196,6 +199,8 @@ class ConnectionTest {
void TestMetadataGetObjectsPrimaryKey();
void TestMetadataGetObjectsCancel();
+ void TestMetadataGetStatisticNames();
+
protected:
struct AdbcError error;
struct AdbcDatabase database;
@@ -226,7 +231,8 @@ class ConnectionTest {
TestMetadataGetObjectsConstraints();
\
}
\
TEST_F(FIXTURE, MetadataGetObjectsPrimaryKey) {
TestMetadataGetObjectsPrimaryKey(); } \
- TEST_F(FIXTURE, MetadataGetObjectsCancel) { TestMetadataGetObjectsCancel(); }
+ TEST_F(FIXTURE, MetadataGetObjectsCancel) { TestMetadataGetObjectsCancel();
} \
+ TEST_F(FIXTURE, MetadataGetStatisticNames) {
TestMetadataGetStatisticNames(); }
class StatementTest {
public:
diff --git a/go/adbc/adbc.go b/go/adbc/adbc.go
index 9936f786..8f29c91b 100644
--- a/go/adbc/adbc.go
+++ b/go/adbc/adbc.go
@@ -696,14 +696,14 @@ type ConnectionGetStatistics interface {
// Field Name | Field Type
//
-------------------------|----------------------------------
// catalog_name | utf8
- // catalog_db_schemas | list<DB_SCHEMA_SCHEMA>
+ // catalog_db_schemas | list<DB_SCHEMA_SCHEMA> not
null
//
// DB_SCHEMA_SCHEMA is a Struct with fields:
//
// Field Name | Field Type
//
-------------------------|----------------------------------
// db_schema_name | utf8
- // db_schema_statistics | list<STATISTICS_SCHEMA>
+ // db_schema_statistics | list<STATISTICS_SCHEMA> not
null
//
// STATISTICS_SCHEMA is a Struct with fields:
//
diff --git a/go/adbc/drivermgr/adbc.h b/go/adbc/drivermgr/adbc.h
index 32eaf8b2..f1b9d675 100644
--- a/go/adbc/drivermgr/adbc.h
+++ b/go/adbc/drivermgr/adbc.h
@@ -1742,14 +1742,14 @@ AdbcStatusCode AdbcConnectionGetOptionDouble(struct
AdbcConnection* connection,
/// | Field Name | Field Type |
/// |--------------------------|----------------------------------|
/// | catalog_name | utf8 |
-/// | catalog_db_schemas | list<DB_SCHEMA_SCHEMA> |
+/// | catalog_db_schemas | list<DB_SCHEMA_SCHEMA> not null |
///
/// DB_SCHEMA_SCHEMA is a Struct with fields:
///
/// | Field Name | Field Type |
/// |--------------------------|----------------------------------|
/// | db_schema_name | utf8 |
-/// | db_schema_statistics | list<STATISTICS_SCHEMA> |
+/// | db_schema_statistics | list<STATISTICS_SCHEMA> not null |
///
/// STATISTICS_SCHEMA is a Struct with fields:
///
diff --git a/go/adbc/drivermgr/adbc_driver_manager.cc
b/go/adbc/drivermgr/adbc_driver_manager.cc
index 370b3eea..d5579303 100644
--- a/go/adbc/drivermgr/adbc_driver_manager.cc
+++ b/go/adbc/drivermgr/adbc_driver_manager.cc
@@ -221,6 +221,17 @@ AdbcStatusCode ConnectionGetOptionDouble(struct
AdbcConnection* connection,
return ADBC_STATUS_NOT_FOUND;
}
+AdbcStatusCode ConnectionGetStatistics(struct AdbcConnection*, const char*,
const char*,
+ const char*, char, struct
ArrowArrayStream*,
+ struct AdbcError*) {
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+}
+
+AdbcStatusCode ConnectionGetStatisticNames(struct AdbcConnection*,
+ struct ArrowArrayStream*, struct
AdbcError*) {
+ return ADBC_STATUS_NOT_IMPLEMENTED;
+}
+
AdbcStatusCode ConnectionGetTableSchema(struct AdbcConnection*, const char*,
const char*,
const char*, struct ArrowSchema*,
struct AdbcError* error) {
@@ -802,6 +813,29 @@ AdbcStatusCode AdbcConnectionGetOptionDouble(struct
AdbcConnection* connection,
error);
}
+AdbcStatusCode AdbcConnectionGetStatistics(struct AdbcConnection* connection,
+ const char* catalog, const char*
db_schema,
+ const char* table_name, char
approximate,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ if (!connection->private_driver) {
+ return ADBC_STATUS_INVALID_STATE;
+ }
+ INIT_ERROR(error, connection);
+ return connection->private_driver->ConnectionGetStatistics(
+ connection, catalog, db_schema, table_name, approximate == 1, out,
error);
+}
+
+AdbcStatusCode AdbcConnectionGetStatisticNames(struct AdbcConnection*
connection,
+ struct ArrowArrayStream* out,
+ struct AdbcError* error) {
+ if (!connection->private_driver) {
+ return ADBC_STATUS_INVALID_STATE;
+ }
+ INIT_ERROR(error, connection);
+ return connection->private_driver->ConnectionGetStatisticNames(connection,
out, error);
+}
+
AdbcStatusCode AdbcConnectionGetTableSchema(struct AdbcConnection* connection,
const char* catalog, const char*
db_schema,
const char* table_name,
@@ -1464,6 +1498,8 @@ AdbcStatusCode
AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int vers
FILL_DEFAULT(driver, ConnectionGetOptionBytes);
FILL_DEFAULT(driver, ConnectionGetOptionDouble);
FILL_DEFAULT(driver, ConnectionGetOptionInt);
+ FILL_DEFAULT(driver, ConnectionGetStatistics);
+ FILL_DEFAULT(driver, ConnectionGetStatisticNames);
FILL_DEFAULT(driver, ConnectionSetOptionBytes);
FILL_DEFAULT(driver, ConnectionSetOptionDouble);
FILL_DEFAULT(driver, ConnectionSetOptionInt);
diff --git a/go/adbc/standard_schemas.go b/go/adbc/standard_schemas.go
index 5ec888b4..5ad1ae8b 100644
--- a/go/adbc/standard_schemas.go
+++ b/go/adbc/standard_schemas.go
@@ -107,12 +107,12 @@ var (
StatisticsDBSchemaSchema = arrow.StructOf(
arrow.Field{Name: "db_schema_name", Type:
arrow.BinaryTypes.String, Nullable: true},
- arrow.Field{Name: "db_schema_statistics", Type:
arrow.ListOf(StatisticsSchema), Nullable: true},
+ arrow.Field{Name: "db_schema_statistics", Type:
arrow.ListOf(StatisticsSchema), Nullable: false},
)
GetStatisticsSchema = arrow.NewSchema([]arrow.Field{
{Name: "catalog_name", Type: arrow.BinaryTypes.String,
Nullable: true},
- {Name: "catalog_db_schemas", Type:
arrow.ListOf(StatisticsDBSchemaSchema), Nullable: true},
+ {Name: "catalog_db_schemas", Type:
arrow.ListOf(StatisticsDBSchemaSchema), Nullable: false},
}, nil)
GetStatisticNamesSchema = arrow.NewSchema([]arrow.Field{
diff --git
a/java/core/src/main/java/org/apache/arrow/adbc/core/AdbcConnection.java
b/java/core/src/main/java/org/apache/arrow/adbc/core/AdbcConnection.java
index 9008bcc6..5fcb10a3 100644
--- a/java/core/src/main/java/org/apache/arrow/adbc/core/AdbcConnection.java
+++ b/java/core/src/main/java/org/apache/arrow/adbc/core/AdbcConnection.java
@@ -247,18 +247,18 @@ public interface AdbcConnection extends AutoCloseable,
AdbcOptions {
* <p>The result is an Arrow dataset with the following schema:
*
* <table border="1">
- * <tr><th>Field Name</th> <th>Field Type</th>
</tr>
- * <tr><td>catalog_name</td> <td>utf8</td>
</tr>
- * <tr><td>catalog_db_schemas</td> <td>list[DB_SCHEMA_SCHEMA]</td>
</tr>
+ * <tr><th>Field Name</th> <th>Field Type</th>
</tr>
+ * <tr><td>catalog_name</td> <td>utf8</td>
</tr>
+ * <tr><td>catalog_db_schemas</td> <td>list[DB_SCHEMA_SCHEMA] not
null</td> </tr>
* <caption>The definition of the GetStatistics result schema.</caption>
* </table>
*
* <p>DB_SCHEMA_SCHEMA is a Struct with fields:
*
* <table border="1">
- * <tr><th>Field Name</th> <th>Field Type</th>
</tr>
- * <tr><td>db_schema_name</td> <td>utf8</td>
</tr>
- * <tr><td>db_schema_statistics</td>
<td>list[STATISTICS_SCHEMA]</td></tr>
+ * <tr><th>Field Name</th> <th>Field Type</th>
</tr>
+ * <tr><td>db_schema_name</td> <td>utf8</td>
</tr>
+ * <tr><td>db_schema_statistics</td> <td>list[STATISTICS_SCHEMA] not
null</td></tr>
* <caption>The definition of DB_SCHEMA_SCHEMA.</caption>
* </table>
*
diff --git
a/java/core/src/main/java/org/apache/arrow/adbc/core/StandardSchemas.java
b/java/core/src/main/java/org/apache/arrow/adbc/core/StandardSchemas.java
index c1e5594b..c059bb1b 100644
--- a/java/core/src/main/java/org/apache/arrow/adbc/core/StandardSchemas.java
+++ b/java/core/src/main/java/org/apache/arrow/adbc/core/StandardSchemas.java
@@ -183,7 +183,7 @@ public final class StandardSchemas {
new Field("db_schema_name",
FieldType.notNullable(ArrowType.Utf8.INSTANCE), null),
new Field(
"db_schema_statistics",
- FieldType.nullable(ArrowType.List.INSTANCE),
+ FieldType.notNullable(ArrowType.List.INSTANCE),
Collections.singletonList(
new Field(
"item", FieldType.nullable(ArrowType.Struct.INSTANCE),
STATISTICS_SCHEMA))));
@@ -198,7 +198,7 @@ public final class StandardSchemas {
new Field("catalog_name",
FieldType.notNullable(ArrowType.Utf8.INSTANCE), null),
new Field(
"catalog_db_schemas",
- FieldType.nullable(ArrowType.List.INSTANCE),
+ FieldType.notNullable(ArrowType.List.INSTANCE),
Collections.singletonList(
new Field(
"item",
diff --git
a/java/driver/flight-sql-validation/src/test/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatementTest.java
b/java/driver/flight-sql-validation/src/test/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatementTest.java
index 306f69e4..8a407149 100644
---
a/java/driver/flight-sql-validation/src/test/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatementTest.java
+++
b/java/driver/flight-sql-validation/src/test/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatementTest.java
@@ -30,4 +30,16 @@ class FlightSqlStatementTest extends AbstractStatementTest {
@Override
@Disabled("Requires spec clarification")
public void prepareQueryWithParameters() {}
+
+ @Override
+ @Disabled("Not supported")
+ public void executeSchema() {}
+
+ @Override
+ @Disabled("Not supported")
+ public void executeSchemaPrepared() {}
+
+ @Override
+ @Disabled("Not supported")
+ public void executeSchemaParams() {}
}
diff --git
a/java/driver/flight-sql/src/main/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatement.java
b/java/driver/flight-sql/src/main/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatement.java
index 1fd8b910..e64508b4 100644
---
a/java/driver/flight-sql/src/main/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatement.java
+++
b/java/driver/flight-sql/src/main/java/org/apache/arrow/adbc/driver/flightsql/FlightSqlStatement.java
@@ -247,6 +247,18 @@ public class FlightSqlStatement implements AdbcStatement {
new FlightInfoReader(allocator, client, clientCache,
info.getEndpoints()));
}
+ @Override
+ public Schema executeSchema() throws AdbcException {
+ if (bulkOperation != null) {
+ throw AdbcException.invalidState("[Flight SQL] Must executeUpdate() for
bulk ingestion");
+ } else if (sqlQuery == null) {
+ throw AdbcException.invalidState("[Flight SQL] Must setSqlQuery() before
execute");
+ }
+ return execute(
+ FlightSqlClient.PreparedStatement::getResultSetSchema,
+ (client) -> client.getExecuteSchema(sqlQuery).getSchema());
+ }
+
@Override
public UpdateResult executeUpdate() throws AdbcException {
if (bulkOperation != null) {
diff --git
a/java/driver/jdbc-validation-derby/src/test/java/org/apache/arrow/adbc/driver/jdbc/derby/DerbyStatementTest.java
b/java/driver/jdbc-validation-derby/src/test/java/org/apache/arrow/adbc/driver/jdbc/derby/DerbyStatementTest.java
index 0f713870..9d80935c 100644
---
a/java/driver/jdbc-validation-derby/src/test/java/org/apache/arrow/adbc/driver/jdbc/derby/DerbyStatementTest.java
+++
b/java/driver/jdbc-validation-derby/src/test/java/org/apache/arrow/adbc/driver/jdbc/derby/DerbyStatementTest.java
@@ -20,6 +20,7 @@ package org.apache.arrow.adbc.driver.jdbc.derby;
import java.nio.file.Path;
import org.apache.arrow.adbc.driver.testsuite.AbstractStatementTest;
import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.io.TempDir;
class DerbyStatementTest extends AbstractStatementTest {
@@ -29,4 +30,8 @@ class DerbyStatementTest extends AbstractStatementTest {
static void beforeAll() {
quirks = new DerbyQuirks(tempDir);
}
+
+ @Override
+ @Disabled("Not supported")
+ public void executeSchemaParams() {}
}
diff --git a/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
b/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
index d9b1f554..6c32429e 100644
--- a/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
+++ b/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
@@ -790,7 +790,7 @@ class Cursor(_Closeable):
self,
table_name: str,
data: Union[pyarrow.RecordBatch, pyarrow.Table,
pyarrow.RecordBatchReader],
- mode: Literal["append", "create", "replace", "append_create"] =
"create",
+ mode: Literal["append", "create", "replace", "create_append"] =
"create",
) -> int:
"""
Ingest Arrow data into a database table.
diff --git a/r/adbcpostgresql/bootstrap.R b/r/adbcpostgresql/bootstrap.R
index a68d888f..2670f760 100644
--- a/r/adbcpostgresql/bootstrap.R
+++ b/r/adbcpostgresql/bootstrap.R
@@ -26,6 +26,8 @@ files_to_vendor <- c(
"../../c/driver/postgresql/statement.cc",
"../../c/driver/postgresql/connection.h",
"../../c/driver/postgresql/connection.cc",
+ "../../c/driver/postgresql/error.h",
+ "../../c/driver/postgresql/error.cc",
"../../c/driver/postgresql/database.h",
"../../c/driver/postgresql/database.cc",
"../../c/driver/postgresql/postgresql.cc",
diff --git a/r/adbcpostgresql/src/.gitignore b/r/adbcpostgresql/src/.gitignore
index 34dd9749..cd8318e2 100644
--- a/r/adbcpostgresql/src/.gitignore
+++ b/r/adbcpostgresql/src/.gitignore
@@ -23,6 +23,8 @@ connection.cc
connection.h
database.h
database.cc
+error.h
+error.cc
postgresql.cc
statement.h
statement.cc
diff --git a/r/adbcpostgresql/src/Makevars.in b/r/adbcpostgresql/src/Makevars.in
index f90db0cb..8244768b 100644
--- a/r/adbcpostgresql/src/Makevars.in
+++ b/r/adbcpostgresql/src/Makevars.in
@@ -19,6 +19,7 @@ PKG_CPPFLAGS=-I../src @cppflags@ -DADBC_EXPORT=""
PKG_LIBS=@libs@
OBJECTS = init.o \
+ error.o \
connection.o \
database.o \
statement.o \
diff --git a/r/adbcpostgresql/src/Makevars.ucrt
b/r/adbcpostgresql/src/Makevars.ucrt
index ebdc16fb..a72e9845 100644
--- a/r/adbcpostgresql/src/Makevars.ucrt
+++ b/r/adbcpostgresql/src/Makevars.ucrt
@@ -19,6 +19,7 @@ CRT=-ucrt
include Makevars.win
OBJECTS = init.o \
+ error.o \
connection.o \
database.o \
statement.o \
diff --git a/r/adbcpostgresql/src/Makevars.win
b/r/adbcpostgresql/src/Makevars.win
index 0f039305..331ef274 100644
--- a/r/adbcpostgresql/src/Makevars.win
+++ b/r/adbcpostgresql/src/Makevars.win
@@ -22,6 +22,7 @@ PKG_LIBS = -L$(RWINLIB)/lib${R_ARCH}${CRT} \
-lpq -lpgport -lpgcommon -lssl -lcrypto -lwsock32 -lsecur32 -lws2_32
-lgdi32 -lcrypt32 -lwldap32
OBJECTS = init.o \
+ error.o \
connection.o \
database.o \
statement.o \