[incubator-superset] branch master updated: feat: implement csv upload configuration func for the schema enforcement (#9734)

johnbodley Thu, 21 May 2020 13:50:32 -0700

This is an automated email from the ASF dual-hosted git repository.

johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git



The following commit(s) were added to refs/heads/master by this push:
     new 3e8e441  feat: implement csv upload configuration func for the schema 
enforcement (#9734)
3e8e441 is described below

commit 3e8e441bfcb5b188840d3d08265073db1f67d1a6
Author: Bogdan <[email protected]>
AuthorDate: Thu May 21 13:49:53 2020 -0700

    feat: implement csv upload configuration func for the schema enforcement 
(#9734)
    
    * Implement csv upload func for schema enforcement
    
    Implement function controlled csv upload schema
    
    Refactor + fix tests
    
    Fixing hive as well
    
    * Add explore_db to the extras
    
    Co-authored-by: bogdan kyryliuk <[email protected]>
---
 superset/config.py               |  16 ++++
 superset/db_engine_specs/base.py |  62 ++++--------
 superset/db_engine_specs/hive.py |  55 ++++-------
 superset/models/core.py          |   8 +-
 superset/views/database/views.py | 113 ++++++++++++++++------
 tests/core_tests.py              | 198 ++++++++++++++++++++++++++-------------
 6 files changed, 272 insertions(+), 180 deletions(-)

diff --git a/superset/config.py b/superset/config.py
index 1668f57..e3d3ffb 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -586,11 +586,27 @@ CSV_TO_HIVE_UPLOAD_S3_BUCKET = None
 # The directory within the bucket specified above that will
 # contain all the external tables
 CSV_TO_HIVE_UPLOAD_DIRECTORY = "EXTERNAL_HIVE_TABLES/"
+# Function that creates upload directory dynamically based on the
+# database used, user and schema provided.
+CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC: Callable[
+    ["Database", "models.User", str], Optional[str]
+] = lambda database, user, schema: CSV_TO_HIVE_UPLOAD_DIRECTORY
 
 # The namespace within hive where the tables created from
 # uploading CSVs will be stored.
 UPLOADED_CSV_HIVE_NAMESPACE = None
 
+# Function that computes the allowed schemas for the CSV uploads.
+# Allowed schemas will be a union of schemas_allowed_for_csv_upload
+# db configuration and a result of this function.
+
+# mypy doesn't catch that if case ensures list content being always str
+ALLOWED_USER_CSV_SCHEMA_FUNC: Callable[
+    ["Database", "models.User"], List[str]
+] = lambda database, user: [
+    UPLOADED_CSV_HIVE_NAMESPACE  # type: ignore
+] if UPLOADED_CSV_HIVE_NAMESPACE else []
+
 # A dictionary of items that gets merged into the Jinja context for
 # SQL Lab. The existing context gets updated with this dictionary,
 # meaning values for existing keys get overwritten by the content of this
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 845b22c..a593f59 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -18,7 +18,6 @@
 import hashlib
 import json
 import logging
-import os
 import re
 from contextlib import closing
 from datetime import datetime
@@ -49,11 +48,11 @@ from sqlalchemy.orm import Session
 from sqlalchemy.sql import quoted_name, text
 from sqlalchemy.sql.expression import ColumnClause, ColumnElement, Select, 
TextAsFrom
 from sqlalchemy.types import TypeEngine
-from wtforms.form import Form
 
 from superset import app, sql_parse
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
 from superset.models.sql_lab import Query
+from superset.sql_parse import Table
 from superset.utils import core as utils
 
 if TYPE_CHECKING:
@@ -454,55 +453,26 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         df.to_sql(**kwargs)
 
     @classmethod
-    def create_table_from_csv(cls, form: Form, database: "Database") -> None:
+    def create_table_from_csv(  # pylint: disable=too-many-arguments
+        cls,
+        filename: str,
+        table: Table,
+        database: "Database",
+        csv_to_df_kwargs: Dict[str, Any],
+        df_to_sql_kwargs: Dict[str, Any],
+    ) -> None:
         """
         Create table from contents of a csv. Note: this method does not create
         metadata for the table.
-
-        :param form: Parameters defining how to process data
-        :param database: Database model object for the target database
         """
-
-        def _allowed_file(filename: str) -> bool:
-            # Only allow specific file extensions as specified in the config
-            extension = os.path.splitext(filename)[1].lower()
-            return (
-                extension is not None and extension[1:] in 
config["ALLOWED_EXTENSIONS"]
-            )
-
-        filename = form.csv_file.data.filename
-
-        if not _allowed_file(filename):
-            raise Exception("Invalid file type selected")
-        csv_to_df_kwargs = {
-            "filepath_or_buffer": filename,
-            "sep": form.sep.data,
-            "header": form.header.data if form.header.data else 0,
-            "index_col": form.index_col.data,
-            "mangle_dupe_cols": form.mangle_dupe_cols.data,
-            "skipinitialspace": form.skipinitialspace.data,
-            "skiprows": form.skiprows.data,
-            "nrows": form.nrows.data,
-            "skip_blank_lines": form.skip_blank_lines.data,
-            "parse_dates": form.parse_dates.data,
-            "infer_datetime_format": form.infer_datetime_format.data,
-            "chunksize": 10000,
-        }
-        df = cls.csv_to_df(**csv_to_df_kwargs)
-
+        df = cls.csv_to_df(filepath_or_buffer=filename, **csv_to_df_kwargs,)
         engine = cls.get_engine(database)
-
-        df_to_sql_kwargs = {
-            "df": df,
-            "name": form.name.data,
-            "con": engine,
-            "schema": form.schema.data,
-            "if_exists": form.if_exists.data,
-            "index": form.index.data,
-            "index_label": form.index_label.data,
-            "chunksize": 10000,
-        }
-        cls.df_to_sql(**df_to_sql_kwargs)
+        if table.schema:
+            # only add schema when it is preset and non empty
+            df_to_sql_kwargs["schema"] = table.schema
+        if engine.dialect.supports_multivalues_insert:
+            df_to_sql_kwargs["method"] = "multi"
+        cls.df_to_sql(df=df, con=engine, **df_to_sql_kwargs)
 
     @classmethod
     def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
index cb810f1..3fb09ef 100644
--- a/superset/db_engine_specs/hive.py
+++ b/superset/db_engine_specs/hive.py
@@ -23,18 +23,19 @@ from typing import Any, Dict, List, Optional, Tuple, 
TYPE_CHECKING
 from urllib import parse
 
 import pandas as pd
+from flask import g
 from sqlalchemy import Column
 from sqlalchemy.engine.base import Engine
 from sqlalchemy.engine.reflection import Inspector
 from sqlalchemy.engine.url import make_url, URL
 from sqlalchemy.orm import Session
 from sqlalchemy.sql.expression import ColumnClause, Select
-from wtforms.form import Form
 
 from superset import app, cache, conf
 from superset.db_engine_specs.base import BaseEngineSpec
 from superset.db_engine_specs.presto import PrestoEngineSpec
 from superset.models.sql_lab import Query
+from superset.sql_parse import Table
 from superset.utils import core as utils
 
 if TYPE_CHECKING:
@@ -105,8 +106,13 @@ class HiveEngineSpec(PrestoEngineSpec):
             return []
 
     @classmethod
-    def create_table_from_csv(  # pylint: disable=too-many-locals
-        cls, form: Form, database: "Database"
+    def create_table_from_csv(  # pylint: disable=too-many-arguments, 
too-many-locals
+        cls,
+        filename: str,
+        table: Table,
+        database: "Database",
+        csv_to_df_kwargs: Dict[str, Any],
+        df_to_sql_kwargs: Dict[str, Any],
     ) -> None:
         """Uploads a csv file and creates a superset datasource in Hive."""
 
@@ -128,38 +134,16 @@ class HiveEngineSpec(PrestoEngineSpec):
                 "No upload bucket specified. You can specify one in the config 
file."
             )
 
-        table_name = form.name.data
-        schema_name = form.schema.data
-
-        if config["UPLOADED_CSV_HIVE_NAMESPACE"]:
-            if "." in table_name or schema_name:
-                raise Exception(
-                    "You can't specify a namespace. "
-                    "All tables will be uploaded to the `{}` namespace".format(
-                        config["HIVE_NAMESPACE"]
-                    )
-                )
-            full_table_name = "{}.{}".format(
-                config["UPLOADED_CSV_HIVE_NAMESPACE"], table_name
-            )
-        else:
-            if "." in table_name and schema_name:
-                raise Exception(
-                    "You can't specify a namespace both in the name of the 
table "
-                    "and in the schema field. Please remove one"
-                )
-
-            full_table_name = (
-                "{}.{}".format(schema_name, table_name) if schema_name else 
table_name
-            )
-
-        filename = form.csv_file.data.filename
-        upload_prefix = config["CSV_TO_HIVE_UPLOAD_DIRECTORY"]
+        upload_prefix = config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"](
+            database, g.user, table.schema
+        )
 
         # Optional dependency
-        from tableschema import Table  # pylint: disable=import-error
+        from tableschema import (  # pylint: disable=import-error
+            Table as TableSchemaTable,
+        )
 
-        hive_table_schema = Table(filename).infer()
+        hive_table_schema = TableSchemaTable(filename).infer()
         column_name_and_type = []
         for column_info in hive_table_schema["fields"]:
             column_name_and_type.append(
@@ -173,13 +157,14 @@ class HiveEngineSpec(PrestoEngineSpec):
         import boto3  # pylint: disable=import-error
 
         s3 = boto3.client("s3")
-        location = os.path.join("s3a://", bucket_path, upload_prefix, 
table_name)
+        location = os.path.join("s3a://", bucket_path, upload_prefix, 
table.table)
         s3.upload_file(
             filename,
             bucket_path,
-            os.path.join(upload_prefix, table_name, 
os.path.basename(filename)),
+            os.path.join(upload_prefix, table.table, 
os.path.basename(filename)),
         )
-        sql = f"""CREATE TABLE {full_table_name} ( {schema_definition} )
+        # TODO(bkyryliuk): support other delimiters
+        sql = f"""CREATE TABLE {str(table)} ( {schema_definition} )
             ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
             TEXTFILE LOCATION '{location}'
             tblproperties ('skip.header.line.count'='1')"""
diff --git a/superset/models/core.py b/superset/models/core.py
index 94383c8..abcb210 100755
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -609,7 +609,13 @@ class Database(
     def get_schema_access_for_csv_upload(  # pylint: disable=invalid-name
         self,
     ) -> List[str]:
-        return self.get_extra().get("schemas_allowed_for_csv_upload", [])
+        allowed_databases = 
self.get_extra().get("schemas_allowed_for_csv_upload", [])
+        if hasattr(g, "user"):
+            extra_allowed_databases = config["ALLOWED_USER_CSV_SCHEMA_FUNC"](
+                self, g.user
+            )
+            allowed_databases += extra_allowed_databases
+        return sorted(set(allowed_databases))
 
     @property
     def sqlalchemy_uri_decrypted(self) -> str:
diff --git a/superset/views/database/views.py b/superset/views/database/views.py
index 47bf1c3..208d351 100644
--- a/superset/views/database/views.py
+++ b/superset/views/database/views.py
@@ -30,6 +30,7 @@ from superset import app, db
 from superset.connectors.sqla.models import SqlaTable
 from superset.constants import RouteMethod
 from superset.exceptions import CertificateException
+from superset.sql_parse import Table
 from superset.utils import core as utils
 from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
 
@@ -109,66 +110,116 @@ class CsvToDatabaseView(SimpleFormView):
 
     def form_post(self, form):
         database = form.con.data
-        schema_name = form.schema.data or ""
+        csv_table = Table(table=form.name.data, schema=form.schema.data)
 
-        if not schema_allows_csv_upload(database, schema_name):
+        if not schema_allows_csv_upload(database, csv_table.schema):
             message = _(
                 'Database "%(database_name)s" schema "%(schema_name)s" '
                 "is not allowed for csv uploads. Please contact your Superset 
Admin.",
                 database_name=database.database_name,
-                schema_name=schema_name,
+                schema_name=csv_table.schema,
             )
             flash(message, "danger")
             return redirect("/csvtodatabaseview/form")
 
-        csv_filename = form.csv_file.data.filename
-        extension = os.path.splitext(csv_filename)[1].lower()
-        path = tempfile.NamedTemporaryFile(
-            dir=app.config["UPLOAD_FOLDER"], suffix=extension, delete=False
+        if "." in csv_table.table and csv_table.schema:
+            message = _(
+                "You cannot specify a namespace both in the name of the table: 
"
+                '"%(csv_table.table)s" and in the schema field: '
+                '"%(csv_table.schema)s". Please remove one',
+                table=csv_table.table,
+                schema=csv_table.schema,
+            )
+            flash(message, "danger")
+            return redirect("/csvtodatabaseview/form")
+
+        uploaded_tmp_file_path = tempfile.NamedTemporaryFile(
+            dir=app.config["UPLOAD_FOLDER"],
+            suffix=os.path.splitext(form.csv_file.data.filename)[1].lower(),
+            delete=False,
         ).name
-        form.csv_file.data.filename = path
 
         try:
             utils.ensure_path_exists(config["UPLOAD_FOLDER"])
-            upload_stream_write(form.csv_file.data, path)
-            table_name = form.name.data
+            upload_stream_write(form.csv_file.data, uploaded_tmp_file_path)
 
             con = form.data.get("con")
             database = (
                 
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
             )
-            database.db_engine_spec.create_table_from_csv(form, database)
-            table = (
+            csv_to_df_kwargs = {
+                "sep": form.sep.data,
+                "header": form.header.data if form.header.data else 0,
+                "index_col": form.index_col.data,
+                "mangle_dupe_cols": form.mangle_dupe_cols.data,
+                "skipinitialspace": form.skipinitialspace.data,
+                "skiprows": form.skiprows.data,
+                "nrows": form.nrows.data,
+                "skip_blank_lines": form.skip_blank_lines.data,
+                "parse_dates": form.parse_dates.data,
+                "infer_datetime_format": form.infer_datetime_format.data,
+                "chunksize": 1000,
+            }
+            df_to_sql_kwargs = {
+                "name": csv_table.table,
+                "if_exists": form.if_exists.data,
+                "index": form.index.data,
+                "index_label": form.index_label.data,
+                "chunksize": 1000,
+            }
+            database.db_engine_spec.create_table_from_csv(
+                uploaded_tmp_file_path,
+                csv_table,
+                database,
+                csv_to_df_kwargs,
+                df_to_sql_kwargs,
+            )
+
+            # Connect table to the database that should be used for 
exploration.
+            # E.g. if hive was used to upload a csv, presto will be a better 
option
+            # to explore the table.
+            expore_database = database
+            explore_database_id = 
database.get_extra().get("explore_database_id", None)
+            if explore_database_id:
+                expore_database = (
+                    db.session.query(models.Database)
+                    .filter_by(id=explore_database_id)
+                    .one_or_none()
+                    or database
+                )
+
+            sqla_table = (
                 db.session.query(SqlaTable)
                 .filter_by(
-                    table_name=table_name,
-                    schema=form.schema.data,
-                    database_id=database.id,
+                    table_name=csv_table.table,
+                    schema=csv_table.schema,
+                    database_id=expore_database.id,
                 )
                 .one_or_none()
             )
-            if table:
-                table.fetch_metadata()
-            if not table:
-                table = SqlaTable(table_name=table_name)
-                table.database = database
-                table.database_id = database.id
-                table.user_id = g.user.id
-                table.schema = form.schema.data
-                table.fetch_metadata()
-                db.session.add(table)
+
+            if sqla_table:
+                sqla_table.fetch_metadata()
+            if not sqla_table:
+                sqla_table = SqlaTable(table_name=csv_table.table)
+                sqla_table.database = expore_database
+                sqla_table.database_id = database.id
+                sqla_table.user_id = g.user.id
+                sqla_table.schema = csv_table.schema
+                sqla_table.fetch_metadata()
+                db.session.add(sqla_table)
             db.session.commit()
         except Exception as ex:  # pylint: disable=broad-except
             db.session.rollback()
             try:
-                os.remove(path)
+                os.remove(uploaded_tmp_file_path)
             except OSError:
                 pass
             message = _(
                 'Unable to upload CSV file "%(filename)s" to table '
                 '"%(table_name)s" in database "%(db_name)s". '
                 "Error message: %(error_msg)s",
-                filename=csv_filename,
+                filename=form.csv_file.data.filename,
                 table_name=form.name.data,
                 db_name=database.database_name,
                 error_msg=str(ex),
@@ -178,14 +229,14 @@ class CsvToDatabaseView(SimpleFormView):
             stats_logger.incr("failed_csv_upload")
             return redirect("/csvtodatabaseview/form")
 
-        os.remove(path)
+        os.remove(uploaded_tmp_file_path)
         # Go back to welcome page / splash screen
         message = _(
             'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in 
'
             'database "%(db_name)s"',
-            csv_filename=csv_filename,
-            table_name=form.name.data,
-            db_name=table.database.database_name,
+            csv_filename=form.csv_file.data.filename,
+            table_name=str(csv_table),
+            db_name=sqla_table.database.database_name,
         )
         flash(message, "info")
         stats_logger.incr("successful_csv_upload")
diff --git a/tests/core_tests.py b/tests/core_tests.py
index 5c4f2b7..12293c1 100644
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -24,6 +24,8 @@ import io
 import json
 import logging
 import os
+from typing import Dict, List, Optional
+
 import pytz
 import random
 import re
@@ -44,6 +46,7 @@ from superset import (
     is_feature_enabled,
 )
 from superset.connectors.sqla.models import SqlaTable
+from superset.datasets.dao import DatasetDAO
 from superset.db_engine_specs.base import BaseEngineSpec
 from superset.db_engine_specs.mssql import MssqlEngineSpec
 from superset.models import core as models
@@ -769,102 +772,163 @@ class CoreTests(SupersetTestCase):
         self.get_json_resp(slc_url, {"form_data": json.dumps(slc.form_data)})
         self.assertEqual(1, qry.count())
 
-    def test_import_csv(self):
-        self.login(username="admin")
-        table_name = "".join(random.choice(string.ascii_uppercase) for _ in 
range(5))
+    def create_sample_csvfile(self, filename: str, content: List[str]) -> None:
+        with open(filename, "w+") as test_file:
+            for l in content:
+                test_file.write(f"{l}\n")
 
-        filename_1 = "testCSV.csv"
-        test_file_1 = open(filename_1, "w+")
-        test_file_1.write("a,b\n")
-        test_file_1.write("john,1\n")
-        test_file_1.write("paul,2\n")
-        test_file_1.close()
+    def enable_csv_upload(self, database: models.Database) -> None:
+        """Enables csv upload in the given database."""
+        database.allow_csv_upload = True
+        db.session.commit()
+        add_datasource_page = self.get_resp("/databaseview/list/")
+        self.assertIn("Upload a CSV", add_datasource_page)
 
-        filename_2 = "testCSV2.csv"
-        test_file_2 = open(filename_2, "w+")
-        test_file_2.write("b,c,d\n")
-        test_file_2.write("john,1,x\n")
-        test_file_2.write("paul,2,y\n")
-        test_file_2.close()
+        form_get = self.get_resp("/csvtodatabaseview/form")
+        self.assertIn("CSV to Database configuration", form_get)
 
-        example_db = utils.get_example_database()
-        example_db.allow_csv_upload = True
-        db_id = example_db.id
-        db.session.commit()
+    def upload_csv(
+        self, filename: str, table_name: str, extra: Optional[Dict[str, str]] 
= None
+    ):
         form_data = {
-            "csv_file": open(filename_1, "rb"),
+            "csv_file": open(filename, "rb"),
             "sep": ",",
             "name": table_name,
-            "con": db_id,
+            "con": utils.get_example_database().id,
             "if_exists": "fail",
             "index_label": "test_label",
             "mangle_dupe_cols": False,
         }
-        url = "/databaseview/list/"
-        add_datasource_page = self.get_resp(url)
-        self.assertIn("Upload a CSV", add_datasource_page)
-
-        url = "/csvtodatabaseview/form"
-        form_get = self.get_resp(url)
-        self.assertIn("CSV to Database configuration", form_get)
+        if extra:
+            form_data.update(extra)
+        return self.get_resp("/csvtodatabaseview/form", data=form_data)
 
+    @mock.patch(
+        "superset.models.core.config",
+        {**app.config, "ALLOWED_USER_CSV_SCHEMA_FUNC": lambda d, u: 
["admin_database"]},
+    )
+    def test_import_csv_enforced_schema(self):
+        if utils.get_example_database().backend == "sqlite":
+            # sqlite doesn't support schema / database creation
+            return
+        self.login(username="admin")
+        table_name = "".join(random.choice(string.ascii_lowercase) for _ in 
range(5))
+        full_table_name = f"admin_database.{table_name}"
+        filename = "testCSV.csv"
+        self.create_sample_csvfile(filename, ["a,b", "john,1", "paul,2"])
         try:
-            # initial upload with fail mode
-            resp = self.get_resp(url, data=form_data)
+            self.enable_csv_upload(utils.get_example_database())
+
+            # no schema specified, fail upload
+            resp = self.upload_csv(filename, table_name)
             self.assertIn(
-                f'CSV file "{filename_1}" uploaded to table "{table_name}"', 
resp
+                'Database "examples" schema "None" is not allowed for csv 
uploads', resp
             )
 
-            # upload again with fail mode; should fail
-            form_data["csv_file"] = open(filename_1, "rb")
-            resp = self.get_resp(url, data=form_data)
+            # user specified schema matches the expected schema, append
+            success_msg = f'CSV file "{filename}" uploaded to table 
"{full_table_name}"'
+            resp = self.upload_csv(
+                filename,
+                table_name,
+                extra={"schema": "admin_database", "if_exists": "append"},
+            )
+            self.assertIn(success_msg, resp)
+
+            resp = self.upload_csv(
+                filename,
+                table_name,
+                extra={"schema": "admin_database", "if_exists": "replace"},
+            )
+            self.assertIn(success_msg, resp)
+
+            # user specified schema doesn't match, fail
+            resp = self.upload_csv(filename, table_name, extra={"schema": 
"gold"})
             self.assertIn(
-                f'Unable to upload CSV file "{filename_1}" to table 
"{table_name}"',
+                'Database "examples" schema "gold" is not allowed for csv 
uploads',
                 resp,
             )
+        finally:
+            os.remove(filename)
+
+    def test_import_csv_explore_database(self):
+        if utils.get_example_database().backend == "sqlite":
+            # sqlite doesn't support schema / database creation
+            return
+        explore_db_id = utils.get_example_database().id
+
+        upload_db = utils.get_or_create_db(
+            "csv_explore_db", app.config["SQLALCHEMY_DATABASE_URI"]
+        )
+        upload_db_id = upload_db.id
+        extra = upload_db.get_extra()
+        extra["explore_database_id"] = explore_db_id
+        upload_db.extra = json.dumps(extra)
+        db.session.commit()
+
+        self.login(username="admin")
+        self.enable_csv_upload(DatasetDAO.get_database_by_id(upload_db_id))
+        table_name = "".join(random.choice(string.ascii_uppercase) for _ in 
range(5))
+
+        f = "testCSV.csv"
+        self.create_sample_csvfile(f, ["a,b", "john,1", "paul,2"])
+        # initial upload with fail mode
+        resp = self.upload_csv(f, table_name)
+        self.assertIn(f'CSV file "{f}" uploaded to table "{table_name}"', resp)
+        table = self.get_table_by_name(table_name)
+        self.assertEqual(table.database_id, explore_db_id)
+
+        # cleanup
+        db.session.delete(table)
+        db.session.delete(DatasetDAO.get_database_by_id(upload_db_id))
+        db.session.commit()
+        os.remove(f)
+
+    def test_import_csv(self):
+        self.login(username="admin")
+        table_name = "".join(random.choice(string.ascii_uppercase) for _ in 
range(5))
+
+        f1 = "testCSV.csv"
+        self.create_sample_csvfile(f1, ["a,b", "john,1", "paul,2"])
+        f2 = "testCSV2.csv"
+        self.create_sample_csvfile(f2, ["b,c,d", "john,1,x", "paul,2,y"])
+        self.enable_csv_upload(utils.get_example_database())
+
+        try:
+            success_msg_f1 = f'CSV file "{f1}" uploaded to table 
"{table_name}"'
+
+            # initial upload with fail mode
+            resp = self.upload_csv(f1, table_name)
+            self.assertIn(success_msg_f1, resp)
+
+            # upload again with fail mode; should fail
+            fail_msg = f'Unable to upload CSV file "{f1}" to table 
"{table_name}"'
+            resp = self.upload_csv(f1, table_name)
+            self.assertIn(fail_msg, resp)
 
             # upload again with append mode
-            form_data["csv_file"] = open(filename_1, "rb")
-            form_data["if_exists"] = "append"
-            resp = self.get_resp(url, data=form_data)
-            self.assertIn(
-                f'CSV file "{filename_1}" uploaded to table "{table_name}"', 
resp
-            )
+            resp = self.upload_csv(f1, table_name, extra={"if_exists": 
"append"})
+            self.assertIn(success_msg_f1, resp)
 
             # upload again with replace mode
-            form_data["csv_file"] = open(filename_1, "rb")
-            form_data["if_exists"] = "replace"
-            resp = self.get_resp(url, data=form_data)
-            self.assertIn(
-                f'CSV file "{filename_1}" uploaded to table "{table_name}"', 
resp
-            )
+            resp = self.upload_csv(f1, table_name, extra={"if_exists": 
"replace"})
+            self.assertIn(success_msg_f1, resp)
 
             # try to append to table from file with different schema
-            form_data["csv_file"] = open(filename_2, "rb")
-            form_data["if_exists"] = "append"
-            resp = self.get_resp(url, data=form_data)
-            self.assertIn(
-                f'Unable to upload CSV file "{filename_2}" to table 
"{table_name}"',
-                resp,
-            )
+            resp = self.upload_csv(f2, table_name, extra={"if_exists": 
"append"})
+            fail_msg_f2 = f'Unable to upload CSV file "{f2}" to table 
"{table_name}"'
+            self.assertIn(fail_msg_f2, resp)
 
             # replace table from file with different schema
-            form_data["csv_file"] = open(filename_2, "rb")
-            form_data["if_exists"] = "replace"
-            resp = self.get_resp(url, data=form_data)
-            self.assertIn(
-                f'CSV file "{filename_2}" uploaded to table "{table_name}"', 
resp
-            )
-            table = (
-                db.session.query(SqlaTable)
-                .filter_by(table_name=table_name, database_id=db_id)
-                .first()
-            )
+            resp = self.upload_csv(f2, table_name, extra={"if_exists": 
"replace"})
+            success_msg_f2 = f'CSV file "{f2}" uploaded to table 
"{table_name}"'
+            self.assertIn(success_msg_f2, resp)
+
+            table = self.get_table_by_name(table_name)
             # make sure the new column name is reflected in the table metadata
             self.assertIn("d", table.column_names)
         finally:
-            os.remove(filename_1)
-            os.remove(filename_2)
+            os.remove(f1)
+            os.remove(f2)
 
     def test_dataframe_timezone(self):
         tz = pytz.FixedOffset(60)

[incubator-superset] branch master updated: feat: implement csv upload configuration func for the schema enforcement (#9734)

Reply via email to