This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 9053ce48b docs: add SQLite cookbook example for batch size/inference
(#2523)
9053ce48b is described below
commit 9053ce48bd95ddae1ec2c15767fa34fa45eb9b44
Author: David Li <[email protected]>
AuthorDate: Sun Feb 16 20:20:38 2025 -0500
docs: add SQLite cookbook example for batch size/inference (#2523)
Related to #581.
---
docs/source/driver/sqlite.rst | 2 +
docs/source/python/recipe/index.rst | 1 +
.../source/python/recipe/{index.rst => sqlite.rst} | 17 ++---
docs/source/python/recipe/sqlite_batch_rows.py | 82 ++++++++++++++++++++++
.../python/recipe/sqlite_batch_rows.py.stdout.txt | 2 +
5 files changed, 93 insertions(+), 11 deletions(-)
diff --git a/docs/source/driver/sqlite.rst b/docs/source/driver/sqlite.rst
index 64a845305..fc6b41384 100644
--- a/docs/source/driver/sqlite.rst
+++ b/docs/source/driver/sqlite.rst
@@ -104,6 +104,8 @@ shared across all connections.
with adbc_driver_sqlite.dbapi.connect() as conn:
pass
+ For more examples, see :doc:`../python/recipe/sqlite`.
+
.. tab-item:: R
:sync: r
diff --git a/docs/source/python/recipe/index.rst
b/docs/source/python/recipe/index.rst
index 3645e0aa6..deff425ba 100644
--- a/docs/source/python/recipe/index.rst
+++ b/docs/source/python/recipe/index.rst
@@ -28,3 +28,4 @@ Python.
driver_manager
flight_sql
postgresql
+ sqlite
diff --git a/docs/source/python/recipe/index.rst
b/docs/source/python/recipe/sqlite.rst
similarity index 80%
copy from docs/source/python/recipe/index.rst
copy to docs/source/python/recipe/sqlite.rst
index 3645e0aa6..5b9f1f795 100644
--- a/docs/source/python/recipe/index.rst
+++ b/docs/source/python/recipe/sqlite.rst
@@ -15,16 +15,11 @@
.. specific language governing permissions and limitations
.. under the License.
-===============
-Python Cookbook
-===============
+==============
+SQLite Recipes
+==============
-The cookbook provides task-oriented example code for using ADBC in
-Python.
+Change the batch size of the result set
+=======================================
-.. toctree::
- :maxdepth: 2
-
- driver_manager
- flight_sql
- postgresql
+.. recipe:: sqlite_batch_rows.py
diff --git a/docs/source/python/recipe/sqlite_batch_rows.py
b/docs/source/python/recipe/sqlite_batch_rows.py
new file mode 100644
index 000000000..c880f3a9d
--- /dev/null
+++ b/docs/source/python/recipe/sqlite_batch_rows.py
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# RECIPE CATEGORY: SQLite
+# RECIPE KEYWORDS: batch size, type inference
+# RECIPE STARTS HERE
+
+#: The ADBC SQLite driver allows control over the size of batches in result
+#: sets. Because the driver performs type inference, this also controls how
+#: many rows the driver will look at to figure out the type. If you know your
+#: result set has many NULL rows up front, you may consider increasing the
+#: batch size so that the driver can infer the correct types.
+
+import adbc_driver_sqlite.dbapi
+
+conn = adbc_driver_sqlite.dbapi.connect()
+
+#: First we'll set up a demo table with 1024 NULL values.
+
+with conn.cursor() as cur:
+ cur.execute("CREATE TABLE demo (val TEXT)")
+
+ cur.execute(
+ """
+ WITH RECURSIVE series(n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1
+ FROM series
+ WHERE n + 1 <= 1024
+ )
+ INSERT INTO demo (val)
+ SELECT NULL
+ FROM series
+ """
+ )
+
+ cur.execute("INSERT INTO demo VALUES ('foo'), ('bar'), ('baz')")
+
+#: If we query the table naively, we'll get an error, because the driver first
+#: looks at the first 1024 values to determine the column type. But since
+#: every value is NULL, it falls back to the default type of int64, which poses
+#: a problem when it then encounters a string in the next batch.
+
+with conn.cursor() as cur:
+ try:
+ cur.execute("SELECT * FROM demo")
+ print(cur.fetchallarrow().schema)
+ except OSError as e:
+ print(e)
+ # Output:
+ # [SQLite] Type mismatch in column 0: expected INT64 but got
STRING/BINARY
+ else:
+ raise RuntimeError("Expected an error")
+
+#: We can tell the driver to increase the batch size (and hence look at more
+#: rows).
+
+with conn.cursor() as cur:
+ cur.adbc_statement.set_options(
+ **{
+ adbc_driver_sqlite.StatementOptions.BATCH_ROWS.value: 2048,
+ }
+ )
+ cur.execute("SELECT * FROM demo")
+ print(cur.fetchallarrow().schema)
+ # Output:
+ # val: string
diff --git a/docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt
b/docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt
new file mode 100644
index 000000000..33fc458a3
--- /dev/null
+++ b/docs/source/python/recipe/sqlite_batch_rows.py.stdout.txt
@@ -0,0 +1,2 @@
+[SQLite] Type mismatch in column 0: expected INT64 but got STRING/BINARY
+val: string