This is an automated email from the ASF dual-hosted git repository.

houqp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 9f9a57d  Use UUID to create unique table names in python binding 
(#1111)
9f9a57d is described below

commit 9f9a57de71d9505e72d87a6c6723497bf8cfe1eb
Author: Aidar Talibzhanov <[email protected]>
AuthorDate: Thu Oct 14 16:34:31 2021 +0100

    Use UUID to create unique table names in python binding (#1111)
---
 python/Cargo.toml           |  1 +
 python/src/context.rs       | 11 +++++------
 python/tests/test_df_sql.py | 20 ++++++++++++++++++++
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/python/Cargo.toml b/python/Cargo.toml
index e20302b..731edcb 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -32,6 +32,7 @@ tokio = { version = "1.0", features = ["macros", "rt", 
"rt-multi-thread", "sync"
 rand = "0.7"
 pyo3 = { version = "0.14.1", features = ["extension-module", "abi3", 
"abi3-py36"] }
 datafusion = { path = "../datafusion", version = "5.1.0" }
+uuid = { version = "0.8", features = ["v4"] }
 
 [lib]
 name = "datafusion"
diff --git a/python/src/context.rs b/python/src/context.rs
index 24a2cb8..b813f27 100644
--- a/python/src/context.rs
+++ b/python/src/context.rs
@@ -18,8 +18,7 @@
 use std::path::PathBuf;
 use std::{collections::HashSet, sync::Arc};
 
-use rand::distributions::Alphanumeric;
-use rand::Rng;
+use uuid::Uuid;
 
 use tokio::runtime::Runtime;
 
@@ -91,10 +90,10 @@ impl ExecutionContext {
 
         // generate a random (unique) name for this table
         // table name cannot start with numeric digit
-        let name = std::iter::once('c')
-            .chain(rand::thread_rng().sample_iter(&Alphanumeric))
-            .take(10)
-            .collect::<String>();
+        let name = "c".to_owned()
+            + &Uuid::new_v4()
+                .to_simple()
+                .encode_lower(&mut Uuid::encode_buffer());
 
         errors::wrap(self.ctx.register_table(&*name, Arc::new(table)))?;
         Ok(dataframe::DataFrame::new(
diff --git a/python/tests/test_df_sql.py b/python/tests/test_df_sql.py
index 17a7645..ebc38b1 100644
--- a/python/tests/test_df_sql.py
+++ b/python/tests/test_df_sql.py
@@ -41,3 +41,23 @@ def test_register_record_batches(ctx):
 
     assert result[0].column(0) == pa.array([5, 7, 9])
     assert result[0].column(1) == pa.array([-3, -3, -3])
+
+
+def test_create_dataframe_registers_unique_table_name(ctx):
+    # create a RecordBatch and register it as memtable
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
+        names=["a", "b"],
+    )
+
+    df = ctx.create_dataframe([[batch]])
+    tables = list(ctx.tables())
+
+    assert df
+    assert len(tables) == 1
+    assert len(tables[0]) == 33
+    assert tables[0].startswith("c")
+    # ensure that the rest of the table name contains
+    # only hexadecimal numbers
+    for c in tables[0][1:]:
+        assert c in "0123456789abcdef"

Reply via email to