[superset] branch master updated: fix(hive): Update CSV to Hive upload prefix (#14255)

johnbodley Fri, 23 Apr 2021 18:21:27 -0700

This is an automated email from the ASF dual-hosted git repository.

johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git



The following commit(s) were added to refs/heads/master by this push:
     new a8781c5  fix(hive): Update CSV to Hive upload prefix (#14255)
a8781c5 is described below

commit a8781c53133c8ae762ae91660bcd1bc78fa1f924
Author: John Bodley <[email protected]>
AuthorDate: Sat Apr 24 13:20:31 2021 +1200

    fix(hive): Update CSV to Hive upload prefix (#14255)
    
    * fix(hive): Update CSV to Hive upload prefix
    
    * Trigger notification
    
    Co-authored-by: John Bodley <[email protected]>
---
 UPDATING.md                         |  2 ++
 superset/config.py                  | 13 ++++++++++---
 tests/db_engine_specs/hive_tests.py | 24 ++++++++++++++++++++----
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/UPDATING.md b/UPDATING.md
index 77eb720..a86bf13 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -27,6 +27,8 @@ assists people when migrating to a new version.
 
 - [13980](https://github.com/apache/superset/pull/13980): Data health checks 
no longer use the metadata database as an interim cache. Though non-breaking, 
deployments which implement complex logic should likely memoize the callback 
function. Refer to documentation in the confg.py file for more detail.
 
+- [14255](https://github.com/apache/superset/pull/14255): The default 
`CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC` callable logic has been updated to leverage 
the specified database and schema to ensure the upload S3 key prefix is unique. 
Previously tables generated via upload from CSV with the same name but differ 
schema and/or cluster would use the same S3 key prefix. Note this change does 
not impact previously imported tables.
+
 ### Breaking Changes
 ### Potential Downtime
 ### Deprecations
diff --git a/superset/config.py b/superset/config.py
index 7143a0a..3ddb7cd 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -791,9 +791,16 @@ CSV_TO_HIVE_UPLOAD_S3_BUCKET = None
 CSV_TO_HIVE_UPLOAD_DIRECTORY = "EXTERNAL_HIVE_TABLES/"
 # Function that creates upload directory dynamically based on the
 # database used, user and schema provided.
-CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC: Callable[
-    ["Database", "models.User", str], Optional[str]
-] = lambda database, user, schema: CSV_TO_HIVE_UPLOAD_DIRECTORY
+def CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC(
+    database: "Database",
+    user: "models.User",  # pylint: disable=unused-argument
+    schema: Optional[str],
+) -> str:
+    # Note the final empty path enforces a trailing slash.
+    return os.path.join(
+        CSV_TO_HIVE_UPLOAD_DIRECTORY, str(database.id), schema or "", ""
+    )
+
 
 # The namespace within hive where the tables created from
 # uploading CSVs will be stored.
diff --git a/tests/db_engine_specs/hive_tests.py 
b/tests/db_engine_specs/hive_tests.py
index 71595a0..51306dc 100644
--- a/tests/db_engine_specs/hive_tests.py
+++ b/tests/db_engine_specs/hive_tests.py
@@ -173,7 +173,7 @@ def test_create_table_from_csv_append() -> None:
 
 @mock.patch(
     "superset.db_engine_specs.hive.config",
-    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: True},
+    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: ""},
 )
 @mock.patch("superset.db_engine_specs.hive.g", spec={})
 @mock.patch("tableschema.Table")
@@ -190,7 +190,7 @@ def test_create_table_from_csv_if_exists_fail(mock_table, 
mock_g):
 
 @mock.patch(
     "superset.db_engine_specs.hive.config",
-    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: True},
+    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: ""},
 )
 @mock.patch("superset.db_engine_specs.hive.g", spec={})
 @mock.patch("tableschema.Table")
@@ -211,7 +211,7 @@ def 
test_create_table_from_csv_if_exists_fail_with_schema(mock_table, mock_g):
 
 @mock.patch(
     "superset.db_engine_specs.hive.config",
-    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: True},
+    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: ""},
 )
 @mock.patch("superset.db_engine_specs.hive.g", spec={})
 @mock.patch("tableschema.Table")
@@ -239,7 +239,7 @@ def 
test_create_table_from_csv_if_exists_replace(mock_upload_to_s3, mock_table,
 
 @mock.patch(
     "superset.db_engine_specs.hive.config",
-    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: True},
+    {**app.config, "CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC": lambda *args: ""},
 )
 @mock.patch("superset.db_engine_specs.hive.g", spec={})
 @mock.patch("tableschema.Table")
@@ -347,6 +347,22 @@ def test_is_readonly():
     assert is_readonly("WITH (SELECT 1) bla SELECT * from bla")
 
 
[email protected](
+    "schema,upload_prefix",
+    [("foo", "EXTERNAL_HIVE_TABLES/1/foo/"), (None, 
"EXTERNAL_HIVE_TABLES/1/")],
+)
+def test_s3_upload_prefix(schema: str, upload_prefix: str) -> None:
+    mock_database = mock.MagicMock()
+    mock_database.id = 1
+
+    assert (
+        app.config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"](
+            database=mock_database, user=mock.MagicMock(), schema=schema
+        )
+        == upload_prefix
+    )
+
+
 def test_upload_to_s3_no_bucket_path():
     with pytest.raises(
         Exception,

[superset] branch master updated: fix(hive): Update CSV to Hive upload prefix (#14255)

Reply via email to