This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 5ed0f5b0 docs: update driver implementation status (#1383)
5ed0f5b0 is described below
commit 5ed0f5b07d193b443107a6f42feb3b8a467a981e
Author: David Li <[email protected]>
AuthorDate: Tue Dec 19 15:25:42 2023 -0500
docs: update driver implementation status (#1383)
Fixes #1382.
---
docs/source/driver/postgresql.rst | 131 ++++++++++++++++++++-
docs/source/driver/status.rst | 12 +-
docs/source/python/recipe/postgresql.rst | 12 ++
.../recipe/postgresql_create_append_table.py | 27 ++++-
...nd_table.py => postgresql_create_temp_table.py} | 65 +++++-----
.../python/recipe/postgresql_get_query_schema.py | 55 +++++++++
.../adbc_driver_manager/dbapi.py | 2 +-
7 files changed, 263 insertions(+), 41 deletions(-)
diff --git a/docs/source/driver/postgresql.rst
b/docs/source/driver/postgresql.rst
index 7ec5b7f5..36349644 100644
--- a/docs/source/driver/postgresql.rst
+++ b/docs/source/driver/postgresql.rst
@@ -31,7 +31,7 @@ overall approach.
.. _libpq: https://www.postgresql.org/docs/current/libpq.html
.. _pgeon: https://github.com/0x0L/pgeon
-.. note:: The PostgreSQL driver is experimental.
+.. note:: The PostgreSQL driver is in beta.
Performance/optimization and support for complex types and
different ADBC features is still ongoing.
@@ -188,6 +188,129 @@ PostgreSQL allows defining new types at runtime, so the
driver must
build a mapping of available types. This is currently done once at
startup.
-Type support is currently limited. Parameter binding and bulk
-ingestion support int16, int32, int64, and string. Reading result
-sets is limited to int32, int64, float, double, and string.
+Type support is currently limited depending on the type and whether it is
+being read or written.
+
+.. list-table:: Arrow type to PostgreSQL type mapping
+ :header-rows: 1
+
+ * - Arrow Type
+ - As Bind Parameter
+ - In Bulk Ingestion
+
+ * - binary
+ - Y
+ - BYTEA
+
+ * - bool
+ - Y
+ - BOOLEAN
+
+ * - date32
+ - Y
+ - DATE
+
+ * - date64
+ - N
+ - N
+
+ * - dictionary
+ - (as unpacked type)
+ - (as unpacked type, only for binary/string)
+
+ * - duration
+ - Y
+ - INTERVAL
+
+ * - float32
+ - Y
+ - REAL
+
+ * - float64
+ - Y
+ - DOUBLE PRECISION
+
+ * - int8
+ - Y
+ - SMALLINT
+
+ * - int16
+ - Y
+ - SMALLINT
+
+ * - int32
+ - Y
+ - INTEGER
+
+ * - int64
+ - Y
+ - BIGINT
+
+ * - large_binary
+ - N
+ - N
+
+ * - large_string
+ - Y
+ - TEXT
+
+ * - month_day_nano_interval
+ - Y
+ - INTERVAL
+
+ * - NA
+ - N
+ - N
+
+ * - string
+ - Y
+ - TEXT
+
+ * - timestamp
+ - Y
+ - TIMESTAMP/TIMESTAMP WITH TIMEZONE
+
+.. list-table:: PostgreSQL type to Arrow type mapping
+ :header-rows: 1
+
+ * - PostgreSQL Type
+ - In Result Set
+
+ * - ARRAY
+ - list
+ * - BIGINT
+ - int64
+ * - BINARY
+ - binary
+ * - BOOLEAN
+ - bool
+ * - CHAR
+ - utf8
+ * - DATE
+ - date32
+ * - DOUBLE PRECISION
+ - float64
+ * - INTEGER
+ - int32
+ * - INTERVAL
+ - month_day_nano_interval
+ * - NUMERIC
+ - utf8 [#numeric-utf8]_
+ * - REAL
+ - float32
+ * - SMALLINT
+ - int16
+ * - TEXT
+ - utf8
+ * - TIME
+ - time64
+ * - TIMESTAMP WITH TIME ZONE
+ - timestamp[unit, UTC]
+ * - TIMESTAMP WITHOUT TIME ZONE
+ - timestamp[unit]
+ * - VARCHAR
+ - utf8
+
+.. [#numeric-utf8] NUMERIC types are read as the string representation of the
+ value, because the PostgreSQL NUMERIC type cannot be
+ losslessly converted to the Arrow decimal types.
diff --git a/docs/source/driver/status.rst b/docs/source/driver/status.rst
index 64c50532..d295bc3f 100644
--- a/docs/source/driver/status.rst
+++ b/docs/source/driver/status.rst
@@ -34,6 +34,11 @@ Implementation Status
- Implementation Language
- Status
+ * - BigQuery
+ - C#
+ - C#
+ - Experimental
+
* - Flight SQL (Go)
- C, Go
- Go
@@ -64,11 +69,6 @@ Implementation Status
- Go
- Experimental
- * - BigQuery
- - C#
- - C#
- - Experimental
-
.. [#supported-languages] C drivers are usable from Go, Python, and Ruby as
well.
Feature Support
@@ -177,10 +177,10 @@ Update Queries
- Y
* - PostgreSQL
+ - N/A
- Y
- Y
- Y
- - N/A
- Y
* - SQLite
diff --git a/docs/source/python/recipe/postgresql.rst
b/docs/source/python/recipe/postgresql.rst
index dbf28adb..7e93a479 100644
--- a/docs/source/python/recipe/postgresql.rst
+++ b/docs/source/python/recipe/postgresql.rst
@@ -24,11 +24,18 @@ Authenticate with a username and password
.. recipe:: postgresql_authenticate.py
+.. _recipe-postgresql-create-append:
+
Create/append to a table from an Arrow table
============================================
.. recipe:: postgresql_create_append_table.py
+Create/append to a temporary table
+==================================
+
+.. recipe:: postgresql_create_temp_table.py
+
Execute a statement with bind parameters
========================================
@@ -39,6 +46,11 @@ Get the Arrow schema of a table
.. recipe:: postgresql_get_table_schema.py
+Get the Arrow schema of a query
+===============================
+
+.. recipe:: postgresql_get_query_schema.py
+
List catalogs, schemas, and tables
==================================
diff --git a/docs/source/python/recipe/postgresql_create_append_table.py
b/docs/source/python/recipe/postgresql_create_append_table.py
index 54331ba0..36e29b93 100644
--- a/docs/source/python/recipe/postgresql_create_append_table.py
+++ b/docs/source/python/recipe/postgresql_create_append_table.py
@@ -28,10 +28,11 @@ import adbc_driver_postgresql.dbapi
uri = os.environ["ADBC_POSTGRESQL_TEST_URI"]
conn = adbc_driver_postgresql.dbapi.connect(uri)
-#: For the purposes of testing, we'll first make sure the table
-#: doesn't exist.
+#: For the purposes of testing, we'll first make sure the tables we're about
+#: to use don't exist.
with conn.cursor() as cur:
cur.execute("DROP TABLE IF EXISTS example")
+ cur.execute("DROP TABLE IF EXISTS example2")
#: Now we can create the table.
with conn.cursor() as cur:
@@ -77,4 +78,26 @@ with conn.cursor() as cur:
cur.execute("SELECT COUNT(*) FROM example")
assert cur.fetchone() == (8,)
+#: We can also choose to create the table if it doesn't exist, and otherwise
+#: append.
+
+with conn.cursor() as cur:
+ cur.adbc_ingest("example2", data, mode="create_append")
+
+ cur.execute("SELECT COUNT(*) FROM example2")
+ assert cur.fetchone() == (4,)
+
+ cur.adbc_ingest("example2", data, mode="create_append")
+
+ cur.execute("SELECT COUNT(*) FROM example2")
+ assert cur.fetchone() == (8,)
+
+#: Finally, we can replace the table.
+
+with conn.cursor() as cur:
+ cur.adbc_ingest("example", data.slice(0, 2), mode="replace")
+
+ cur.execute("SELECT COUNT(*) FROM example")
+ assert cur.fetchone() == (2,)
+
conn.close()
diff --git a/docs/source/python/recipe/postgresql_create_append_table.py
b/docs/source/python/recipe/postgresql_create_temp_table.py
similarity index 52%
copy from docs/source/python/recipe/postgresql_create_append_table.py
copy to docs/source/python/recipe/postgresql_create_temp_table.py
index 54331ba0..2d762b9a 100644
--- a/docs/source/python/recipe/postgresql_create_append_table.py
+++ b/docs/source/python/recipe/postgresql_create_temp_table.py
@@ -16,8 +16,7 @@
# under the License.
# RECIPE STARTS HERE
-#: ADBC allows creating and appending to database tables using Arrow
-#: tables.
+#: ADBC allows creating and appending to temporary tables as well.
import os
@@ -28,24 +27,25 @@ import adbc_driver_postgresql.dbapi
uri = os.environ["ADBC_POSTGRESQL_TEST_URI"]
conn = adbc_driver_postgresql.dbapi.connect(uri)
-#: For the purposes of testing, we'll first make sure the table
-#: doesn't exist.
+#: For the purposes of testing, we'll first make sure the tables we're about
+#: to use don't exist.
with conn.cursor() as cur:
cur.execute("DROP TABLE IF EXISTS example")
-#: Now we can create the table.
-with conn.cursor() as cur:
- data = pyarrow.table(
+#: To create a temporary table, just specify the option "temporary".
+data = pyarrow.table(
+ [
+ [1, 2, None, 4],
+ ],
+ schema=pyarrow.schema(
[
- [1, 2, None, 4],
- ],
- schema=pyarrow.schema(
- [
- ("ints", "int32"),
- ]
- ),
- )
- cur.adbc_ingest("example", data, mode="create")
+ ("ints", "int32"),
+ ]
+ ),
+)
+
+with conn.cursor() as cur:
+ cur.adbc_ingest("example", data, mode="create", temporary=True)
conn.commit()
@@ -58,23 +58,32 @@ with conn.cursor() as cur:
cur.execute("SELECT COUNT(*) FROM example")
assert cur.fetchone() == (4,)
-#: If we try to ingest again, it'll fail, because the table already
-#: exists.
+#: Temporary tables are separate from regular tables, even if they have the
+#: same name.
+
with conn.cursor() as cur:
- try:
- cur.adbc_ingest("example", data, mode="create")
- except conn.ProgrammingError:
- pass
- else:
- raise RuntimeError("Should have failed!")
+ cur.adbc_ingest("example", data.slice(0, 2), mode="create",
temporary=False)
-conn.rollback()
+conn.commit()
-#: Instead, we can append to the table.
with conn.cursor() as cur:
- cur.adbc_ingest("example", data, mode="append")
+ #: Because we have two tables with the same name, we have to explicitly
+ #: reference the normal temporary table here.
+ cur.execute("SELECT COUNT(*) FROM public.example")
+ assert cur.fetchone() == (2,)
cur.execute("SELECT COUNT(*) FROM example")
- assert cur.fetchone() == (8,)
+ assert cur.fetchone() == (4,)
conn.close()
+
+#: After closing the connection, the temporary table is implicitly dropped.
+#: If we reconnect, the table won't exist; we'll see only the 'normal' table.
+
+with adbc_driver_postgresql.dbapi.connect(uri) as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT COUNT(*) FROM example")
+ assert cur.fetchone() == (2,)
+
+#: All the regular ingestion options apply to temporary tables, too. See
+#: :ref:`recipe-postgresql-create-append` for more examples.
diff --git a/docs/source/python/recipe/postgresql_get_query_schema.py
b/docs/source/python/recipe/postgresql_get_query_schema.py
new file mode 100644
index 00000000..25684539
--- /dev/null
+++ b/docs/source/python/recipe/postgresql_get_query_schema.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# RECIPE STARTS HERE
+
+#: ADBC lets you get the schema of a result set, without executing the query.
+
+import os
+
+import pyarrow
+
+import adbc_driver_postgresql.dbapi
+
+uri = os.environ["ADBC_POSTGRESQL_TEST_URI"]
+conn = adbc_driver_postgresql.dbapi.connect(uri)
+
+#: We'll create an example table to test.
+with conn.cursor() as cur:
+ cur.execute("DROP TABLE IF EXISTS example")
+ cur.execute("CREATE TABLE example (ints INT, bigints BIGINT)")
+
+conn.commit()
+
+expected = pyarrow.schema(
+ [
+ ("ints", "int32"),
+ ("bigints", "int64"),
+ ]
+)
+
+with conn.cursor() as cur:
+ assert cur.adbc_execute_schema("SELECT * FROM example") == expected
+
+ #: PostgreSQL doesn't know the type here, so it just returns a guess.
+ assert cur.adbc_execute_schema("SELECT $1 AS res") == pyarrow.schema(
+ [
+ ("res", "string"),
+ ]
+ )
+
+conn.close()
diff --git a/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
b/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
index 4c36ad5c..8f06ed03 100644
--- a/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
+++ b/python/adbc_driver_manager/adbc_driver_manager/dbapi.py
@@ -807,7 +807,7 @@ class Cursor(_Closeable):
The Arrow data to insert. This can be a pyarrow RecordBatch, Table
or RecordBatchReader, or any Arrow-compatible data that implements
the Arrow PyCapsule Protocol (i.e. has an ``__arrow_c_array__``
- or ``__arrow_c_stream__ ``method).
+ or ``__arrow_c_stream__`` method).
mode
How to deal with existing data: