This is an automated email from the ASF dual-hosted git repository.
houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 9f9a57d Use UUID to create unique table names in python binding
(#1111)
9f9a57d is described below
commit 9f9a57de71d9505e72d87a6c6723497bf8cfe1eb
Author: Aidar Talibzhanov <[email protected]>
AuthorDate: Thu Oct 14 16:34:31 2021 +0100
Use UUID to create unique table names in python binding (#1111)
---
python/Cargo.toml | 1 +
python/src/context.rs | 11 +++++------
python/tests/test_df_sql.py | 20 ++++++++++++++++++++
3 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/python/Cargo.toml b/python/Cargo.toml
index e20302b..731edcb 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -32,6 +32,7 @@ tokio = { version = "1.0", features = ["macros", "rt",
"rt-multi-thread", "sync"
rand = "0.7"
pyo3 = { version = "0.14.1", features = ["extension-module", "abi3",
"abi3-py36"] }
datafusion = { path = "../datafusion", version = "5.1.0" }
+uuid = { version = "0.8", features = ["v4"] }
[lib]
name = "datafusion"
diff --git a/python/src/context.rs b/python/src/context.rs
index 24a2cb8..b813f27 100644
--- a/python/src/context.rs
+++ b/python/src/context.rs
@@ -18,8 +18,7 @@
use std::path::PathBuf;
use std::{collections::HashSet, sync::Arc};
-use rand::distributions::Alphanumeric;
-use rand::Rng;
+use uuid::Uuid;
use tokio::runtime::Runtime;
@@ -91,10 +90,10 @@ impl ExecutionContext {
// generate a random (unique) name for this table
// table name cannot start with numeric digit
- let name = std::iter::once('c')
- .chain(rand::thread_rng().sample_iter(&Alphanumeric))
- .take(10)
- .collect::<String>();
+ let name = "c".to_owned()
+ + &Uuid::new_v4()
+ .to_simple()
+ .encode_lower(&mut Uuid::encode_buffer());
errors::wrap(self.ctx.register_table(&*name, Arc::new(table)))?;
Ok(dataframe::DataFrame::new(
diff --git a/python/tests/test_df_sql.py b/python/tests/test_df_sql.py
index 17a7645..ebc38b1 100644
--- a/python/tests/test_df_sql.py
+++ b/python/tests/test_df_sql.py
@@ -41,3 +41,23 @@ def test_register_record_batches(ctx):
assert result[0].column(0) == pa.array([5, 7, 9])
assert result[0].column(1) == pa.array([-3, -3, -3])
+
+
+def test_create_dataframe_registers_unique_table_name(ctx):
+ # create a RecordBatch and register it as memtable
+ batch = pa.RecordBatch.from_arrays(
+ [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
+ names=["a", "b"],
+ )
+
+ df = ctx.create_dataframe([[batch]])
+ tables = list(ctx.tables())
+
+ assert df
+ assert len(tables) == 1
+ assert len(tables[0]) == 33
+ assert tables[0].startswith("c")
+ # ensure that the rest of the table name contains
+ # only hexadecimal numbers
+ for c in tables[0][1:]:
+ assert c in "0123456789abcdef"