This is an automated email from the ASF dual-hosted git repository.

dpgaspar pushed a commit to branch 0.35
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git

commit 16459902e000b190c1b0660fa76a7616c30e87a1
Author: Ville Brofeldt <[email protected]>
AuthorDate: Thu Nov 7 20:03:42 2019 +0200

    [fix] Improve csv upload functionality (#8457)
    
    * [fix] csv upload when table metadata present
    
    * Remove table from hive spec
    
    * Move upload before table metadata creation
    
    * Refine upload logic, dd unit tests and fix translations
    
    * Use ALLOWED_EXTENSIONS from config
    
    * Address review comments
    
    * Fix error message grammar
    
    * Add return type to hive csv upload and replace first with one_or_none
---
 superset/config.py                                 |  2 +-
 superset/db_engine_specs/base.py                   | 18 ++---
 superset/db_engine_specs/hive.py                   |  2 +-
 superset/translations/en/LC_MESSAGES/messages.po   |  3 +-
 superset/translations/fr/LC_MESSAGES/messages.json |  2 +-
 superset/translations/fr/LC_MESSAGES/messages.po   |  4 +-
 superset/translations/it/LC_MESSAGES/messages.json |  4 +-
 superset/translations/it/LC_MESSAGES/messages.po   |  3 +-
 superset/translations/ko/LC_MESSAGES/messages.po   |  2 +-
 superset/translations/messages.pot                 |  3 +-
 superset/translations/ru/LC_MESSAGES/messages.json |  6 +-
 superset/translations/ru/LC_MESSAGES/messages.po   |  4 +-
 superset/translations/zh/LC_MESSAGES/messages.json |  4 +-
 superset/translations/zh/LC_MESSAGES/messages.po   |  4 +-
 superset/views/core.py                             |  2 +-
 superset/views/database/forms.py                   | 12 ++-
 superset/views/database/views.py                   | 64 +++++++++++-----
 tests/core_tests.py                                | 86 ++++++++++++++++++----
 18 files changed, 156 insertions(+), 69 deletions(-)

diff --git a/superset/config.py b/superset/config.py
index b263285..a58b318 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -279,7 +279,7 @@ SUPERSET_WEBSERVER_DOMAINS = None
 
 # Allowed format types for upload on Database view
 # TODO: Add processing of other spreadsheet formats (xls, xlsx etc)
-ALLOWED_EXTENSIONS = set(["csv"])
+ALLOWED_EXTENSIONS = {"csv", "tsv"}
 
 # CSV Options: key/value pairs that will be passed as argument to 
DataFrame.to_csv
 # method.
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 97e214f..7060887 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -37,7 +37,7 @@ from sqlalchemy.sql.expression import ColumnClause, 
ColumnElement, Select, TextA
 from sqlalchemy.types import TypeEngine
 from werkzeug.utils import secure_filename
 
-from superset import app, db, sql_parse
+from superset import app, sql_parse
 from superset.utils import core as utils
 
 if TYPE_CHECKING:
@@ -388,15 +388,17 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         df.to_sql(**kwargs)
 
     @classmethod
-    def create_table_from_csv(cls, form, table):
-        """ Create table (including metadata in backend) from contents of a 
csv.
+    def create_table_from_csv(cls, form) -> None:
+        """
+        Create table from contents of a csv. Note: this method does not create
+        metadata for the table.
+
         :param form: Parameters defining how to process data
-        :param table: Metadata of new table to be created
         """
 
         def _allowed_file(filename: str) -> bool:
             # Only allow specific file extensions as specified in the config
-            extension = os.path.splitext(filename)[1]
+            extension = os.path.splitext(filename)[1].lower()
             return (
                 extension is not None and extension[1:] in 
config["ALLOWED_EXTENSIONS"]
             )
@@ -432,12 +434,6 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         }
         cls.df_to_sql(**df_to_sql_kwargs)
 
-        table.user_id = g.user.id
-        table.schema = form.schema.data
-        table.fetch_metadata()
-        db.session.add(table)
-        db.session.commit()
-
     @classmethod
     def convert_dttm(cls, target_type: str, dttm: datetime) -> str:
         """
diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
index dbcb337..1c5319e 100644
--- a/superset/db_engine_specs/hive.py
+++ b/superset/db_engine_specs/hive.py
@@ -98,7 +98,7 @@ class HiveEngineSpec(PrestoEngineSpec):
             return []
 
     @classmethod
-    def create_table_from_csv(cls, form, table):  # pylint: 
disable=too-many-locals
+    def create_table_from_csv(cls, form) -> None:  # pylint: 
disable=too-many-locals
         """Uploads a csv file and creates a superset datasource in Hive."""
 
         def convert_to_hive_type(col_type):
diff --git a/superset/translations/en/LC_MESSAGES/messages.po 
b/superset/translations/en/LC_MESSAGES/messages.po
index 34c5787..6bdd3a1 100644
--- a/superset/translations/en/LC_MESSAGES/messages.po
+++ b/superset/translations/en/LC_MESSAGES/messages.po
@@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
 msgstr ""
 
 #: superset/views/core.py:375
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
 msgstr ""
 
 #: superset/views/core.py:401 superset/views/core.py:667
@@ -7765,4 +7765,3 @@ msgstr ""
 
 #~ msgid "Saved Queries"
 #~ msgstr ""
-
diff --git a/superset/translations/fr/LC_MESSAGES/messages.json 
b/superset/translations/fr/LC_MESSAGES/messages.json
index b49c735..4114301 100644
--- a/superset/translations/fr/LC_MESSAGES/messages.json
+++ b/superset/translations/fr/LC_MESSAGES/messages.json
@@ -3124,7 +3124,7 @@
          "CSV to Database configuration": [
             ""
          ],
-         "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
+         "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" 
in database \"%(db_name)s\"": [
             ""
          ],
          "User": [
diff --git a/superset/translations/fr/LC_MESSAGES/messages.po 
b/superset/translations/fr/LC_MESSAGES/messages.po
index 180e0e7..ed66cdc 100644
--- a/superset/translations/fr/LC_MESSAGES/messages.po
+++ b/superset/translations/fr/LC_MESSAGES/messages.po
@@ -4880,8 +4880,8 @@ msgid "CSV to Database configuration"
 msgstr "CSV vers la configuration de la base de données"
 
 #: superset/views/core.py:375
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
-msgstr "Fichier CSV \"{0}\" chargé dans la table \"{1}\" de la base de données 
\"{2}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
+msgstr "Fichier CSV \"%(csv_filename)s\" chargé dans la table 
\"%(table_name)s\" de la base de données \"%(db_name)s\""
 
 #: superset/views/core.py:401 superset/views/core.py:667
 #: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60
diff --git a/superset/translations/it/LC_MESSAGES/messages.json 
b/superset/translations/it/LC_MESSAGES/messages.json
index 71378d4..5583613 100644
--- a/superset/translations/it/LC_MESSAGES/messages.json
+++ b/superset/translations/it/LC_MESSAGES/messages.json
@@ -2923,7 +2923,7 @@
          "CSV to Database configuration": [
             ""
          ],
-         "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
+         "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" 
in database \"%(db_name)s\"": [
             ""
          ],
          "User": [
@@ -3132,4 +3132,4 @@
          ]
       }
    }
-}
\ No newline at end of file
+}
diff --git a/superset/translations/it/LC_MESSAGES/messages.po 
b/superset/translations/it/LC_MESSAGES/messages.po
index e3ca6e2..ba49fcb 100644
--- a/superset/translations/it/LC_MESSAGES/messages.po
+++ b/superset/translations/it/LC_MESSAGES/messages.po
@@ -4432,7 +4432,7 @@ msgid "CSV to Database configuration"
 msgstr ""
 
 #: superset/views/core.py:363
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
 msgstr ""
 
 #: superset/views/core.py:389 superset/views/core.py:655
@@ -6461,4 +6461,3 @@ msgstr "Query salvate"
 
 #~ msgid "Slice"
 #~ msgstr "Slice"
-
diff --git a/superset/translations/ko/LC_MESSAGES/messages.po 
b/superset/translations/ko/LC_MESSAGES/messages.po
index 2b52cb7..e998c29 100644
--- a/superset/translations/ko/LC_MESSAGES/messages.po
+++ b/superset/translations/ko/LC_MESSAGES/messages.po
@@ -3788,7 +3788,7 @@ msgstr "대시보드 가져오기"
 msgid "CSV to Database configuration"
 msgstr ""
 
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
 msgstr ""
 
 msgid "User"
diff --git a/superset/translations/messages.pot 
b/superset/translations/messages.pot
index cd4a3f7..6c70ee1 100644
--- a/superset/translations/messages.pot
+++ b/superset/translations/messages.pot
@@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration"
 msgstr ""
 
 #: superset/views/core.py:375
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
 msgstr ""
 
 #: superset/views/core.py:401 superset/views/core.py:667
@@ -4967,4 +4967,3 @@ msgstr ""
 #: superset/views/sql_lab.py:86
 msgid "Saved Queries"
 msgstr ""
-
diff --git a/superset/translations/ru/LC_MESSAGES/messages.json 
b/superset/translations/ru/LC_MESSAGES/messages.json
index f375364..f8bb352 100644
--- a/superset/translations/ru/LC_MESSAGES/messages.json
+++ b/superset/translations/ru/LC_MESSAGES/messages.json
@@ -2923,8 +2923,8 @@
          "CSV to Database configuration": [
             "Настройка CSV для БД"
          ],
-         "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
-            "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
+         "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" 
in database \"%(db_name)s\"": [
+            "CSV-файл \"%(csv_filename)s\" загружен в таблицу 
\"%(table_name)s\" базы данных \"%(db_name)s\""
          ],
          "User": [
             "Пользователь"
@@ -3132,4 +3132,4 @@
          ]
       }
    }
-}
\ No newline at end of file
+}
diff --git a/superset/translations/ru/LC_MESSAGES/messages.po 
b/superset/translations/ru/LC_MESSAGES/messages.po
index ea89a33..29c44f0 100644
--- a/superset/translations/ru/LC_MESSAGES/messages.po
+++ b/superset/translations/ru/LC_MESSAGES/messages.po
@@ -4544,8 +4544,8 @@ msgid "CSV to Database configuration"
 msgstr "Настройка CSV для БД"
 
 #: superset/views/core.py:363
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
-msgstr "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
+msgstr "CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" 
базы данных \"%(db_name)s\""
 
 #: superset/views/core.py:389 superset/views/core.py:655
 #: superset/views/sql_lab.py:16 superset/views/sql_lab.py:53
diff --git a/superset/translations/zh/LC_MESSAGES/messages.json 
b/superset/translations/zh/LC_MESSAGES/messages.json
index 76640df..05c1de1 100644
--- a/superset/translations/zh/LC_MESSAGES/messages.json
+++ b/superset/translations/zh/LC_MESSAGES/messages.json
@@ -3124,8 +3124,8 @@
       "CSV to Database configuration": [
         "csv 到数据库配置"
       ],
-      "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [
-        "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
+      "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\"": [
+        "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 
\"%(table_name)s\""
       ],
       "User": [
         "用户"
diff --git a/superset/translations/zh/LC_MESSAGES/messages.po 
b/superset/translations/zh/LC_MESSAGES/messages.po
index 5077f76..6b43381 100644
--- a/superset/translations/zh/LC_MESSAGES/messages.po
+++ b/superset/translations/zh/LC_MESSAGES/messages.po
@@ -4746,8 +4746,8 @@ msgid "CSV to Database configuration"
 msgstr "csv 到数据库配置"
 
 #: superset/views/core.py:375
-msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\""
-msgstr "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\""
+msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in 
database \"%(db_name)s\""
+msgstr "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 
\"%(table_name)s\""
 
 #: superset/views/core.py:401 superset/views/core.py:667
 #: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60
diff --git a/superset/views/core.py b/superset/views/core.py
index 0c6b6ec..7483e55 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -2999,7 +2999,7 @@ class Superset(BaseSupersetView):
         except Exception:
             return json_error_response(
                 "Failed to fetch schemas allowed for csv upload in this 
database! "
-                "Please contact Superset Admin!",
+                "Please contact your Superset Admin!",
                 stacktrace=utils.get_stacktrace(),
             )
 
diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py
index e705b57..57144bc 100644
--- a/superset/views/database/forms.py
+++ b/superset/views/database/forms.py
@@ -90,7 +90,17 @@ class CsvToDatabaseForm(DynamicForm):
     csv_file = FileField(
         _("CSV File"),
         description=_("Select a CSV file to be uploaded to a database."),
-        validators=[FileRequired(), FileAllowed(["csv"], _("CSV Files 
Only!"))],
+        validators=[
+            FileRequired(),
+            FileAllowed(
+                config["ALLOWED_EXTENSIONS"],
+                _(
+                    "Only the following file extensions are allowed: "
+                    "%(allowed_extensions)s",
+                    allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
+                ),
+            ),
+        ],
     )
     con = QuerySelectField(
         _("Database"),
diff --git a/superset/views/database/views.py b/superset/views/database/views.py
index 3298b9f..bc32c19 100644
--- a/superset/views/database/views.py
+++ b/superset/views/database/views.py
@@ -17,7 +17,7 @@
 # pylint: disable=C,R,W
 import os
 
-from flask import flash, redirect
+from flask import flash, g, redirect
 from flask_appbuilder import SimpleFormView
 from flask_appbuilder.forms import DynamicForm
 from flask_appbuilder.models.sqla.interface import SQLAInterface
@@ -28,7 +28,7 @@ from wtforms.fields import StringField
 from wtforms.validators import ValidationError
 
 import superset.models.core as models
-from superset import app, appbuilder, security_manager
+from superset import app, appbuilder, db, security_manager
 from superset.connectors.sqla.models import SqlaTable
 from superset.utils import core as utils
 from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
@@ -104,10 +104,10 @@ class CsvToDatabaseView(SimpleFormView):
 
         if not self.is_schema_allowed(database, schema_name):
             message = _(
-                'Database "{0}" Schema "{1}" is not allowed for csv uploads. '
-                "Please contact Superset Admin".format(
-                    database.database_name, schema_name
-                )
+                'Database "%(database_name)s" schema "%(schema_name)s" '
+                "is not allowed for csv uploads. Please contact your Superset 
Admin.",
+                database_name=database.database_name,
+                schema_name=schema_name,
             )
             flash(message, "danger")
             return redirect("/csvtodatabaseview/form")
@@ -119,32 +119,58 @@ class CsvToDatabaseView(SimpleFormView):
         try:
             utils.ensure_path_exists(config["UPLOAD_FOLDER"])
             csv_file.save(path)
-            table = SqlaTable(table_name=form.name.data)
-            table.database = form.data.get("con")
-            table.database_id = table.database.id
-            table.database.db_engine_spec.create_table_from_csv(form, table)
+            table_name = form.name.data
+            database = form.data.get("con")
+            database.db_engine_spec.create_table_from_csv(form)
+
+            table = (
+                db.session.query(SqlaTable)
+                .filter_by(
+                    table_name=table_name,
+                    schema=form.schema.data,
+                    database_id=database.id,
+                )
+                .one_or_none()
+            )
+            if table:
+                table.fetch_metadata()
+            if not table:
+                table = SqlaTable(table_name=table_name)
+                table.database = database
+                table.database_id = database.id
+                table.user_id = g.user.id
+                table.schema = form.schema.data
+                table.fetch_metadata()
+                db.session.add(table)
+            db.session.commit()
         except Exception as e:
+            db.session.rollback()
             try:
                 os.remove(path)
             except OSError:
                 pass
-            message = (
-                "Table name {} already exists. Please pick another".format(
-                    form.name.data
-                )
-                if isinstance(e, IntegrityError)
-                else str(e)
+            message = _(
+                'Unable to upload CSV file "%(filename)s" to table '
+                '"%(table_name)s" in database "%(db_name)s". '
+                "Error message: %(error_msg)s",
+                filename=csv_filename,
+                table_name=form.name.data,
+                db_name=database.database_name,
+                error_msg=str(e),
             )
+
             flash(message, "danger")
             stats_logger.incr("failed_csv_upload")
             return redirect("/csvtodatabaseview/form")
 
         os.remove(path)
         # Go back to welcome page / splash screen
-        db_name = table.database.database_name
         message = _(
-            'CSV file "{0}" uploaded to table "{1}" in '
-            'database "{2}"'.format(csv_filename, form.name.data, db_name)
+            'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in 
'
+            'database "%(db_name)s"',
+            csv_filename=csv_filename,
+            table_name=form.name.data,
+            db_name=table.database.database_name,
         )
         flash(message, "info")
         stats_logger.incr("successful_csv_upload")
diff --git a/tests/core_tests.py b/tests/core_tests.py
index 2110cdb..70d30a5 100644
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -598,42 +598,100 @@ class CoreTests(SupersetTestCase):
 
     def test_import_csv(self):
         self.login(username="admin")
-        filename = "testCSV.csv"
         table_name = "".join(random.choice(string.ascii_uppercase) for _ in 
range(5))
 
-        test_file = open(filename, "w+")
-        test_file.write("a,b\n")
-        test_file.write("john,1\n")
-        test_file.write("paul,2\n")
-        test_file.close()
+        filename_1 = "testCSV.csv"
+        test_file_1 = open(filename_1, "w+")
+        test_file_1.write("a,b\n")
+        test_file_1.write("john,1\n")
+        test_file_1.write("paul,2\n")
+        test_file_1.close()
+
+        filename_2 = "testCSV2.csv"
+        test_file_2 = open(filename_2, "w+")
+        test_file_2.write("b,c,d\n")
+        test_file_2.write("john,1,x\n")
+        test_file_2.write("paul,2,y\n")
+        test_file_2.close()
+
         example_db = utils.get_example_database()
         example_db.allow_csv_upload = True
         db_id = example_db.id
         db.session.commit()
-        test_file = open(filename, "rb")
         form_data = {
-            "csv_file": test_file,
+            "csv_file": open(filename_1, "rb"),
             "sep": ",",
             "name": table_name,
             "con": db_id,
-            "if_exists": "append",
+            "if_exists": "fail",
             "index_label": "test_label",
             "mangle_dupe_cols": False,
         }
         url = "/databaseview/list/"
         add_datasource_page = self.get_resp(url)
-        assert "Upload a CSV" in add_datasource_page
+        self.assertIn("Upload a CSV", add_datasource_page)
 
         url = "/csvtodatabaseview/form"
         form_get = self.get_resp(url)
-        assert "CSV to Database configuration" in form_get
+        self.assertIn("CSV to Database configuration", form_get)
 
         try:
-            # ensure uploaded successfully
+            # initial upload with fail mode
+            resp = self.get_resp(url, data=form_data)
+            self.assertIn(
+                f'CSV file "{filename_1}" uploaded to table "{table_name}"', 
resp
+            )
+
+            # upload again with fail mode; should fail
+            form_data["csv_file"] = open(filename_1, "rb")
             resp = self.get_resp(url, data=form_data)
-            assert 'CSV file "testCSV.csv" uploaded to table' in resp
+            self.assertIn(
+                f'Unable to upload CSV file "{filename_1}" to table 
"{table_name}"',
+                resp,
+            )
+
+            # upload again with append mode
+            form_data["csv_file"] = open(filename_1, "rb")
+            form_data["if_exists"] = "append"
+            resp = self.get_resp(url, data=form_data)
+            self.assertIn(
+                f'CSV file "{filename_1}" uploaded to table "{table_name}"', 
resp
+            )
+
+            # upload again with replace mode
+            form_data["csv_file"] = open(filename_1, "rb")
+            form_data["if_exists"] = "replace"
+            resp = self.get_resp(url, data=form_data)
+            self.assertIn(
+                f'CSV file "{filename_1}" uploaded to table "{table_name}"', 
resp
+            )
+
+            # try to append to table from file with different schema
+            form_data["csv_file"] = open(filename_2, "rb")
+            form_data["if_exists"] = "append"
+            resp = self.get_resp(url, data=form_data)
+            self.assertIn(
+                f'Unable to upload CSV file "{filename_2}" to table 
"{table_name}"',
+                resp,
+            )
+
+            # replace table from file with different schema
+            form_data["csv_file"] = open(filename_2, "rb")
+            form_data["if_exists"] = "replace"
+            resp = self.get_resp(url, data=form_data)
+            self.assertIn(
+                f'CSV file "{filename_2}" uploaded to table "{table_name}"', 
resp
+            )
+            table = (
+                db.session.query(SqlaTable)
+                .filter_by(table_name=table_name, database_id=db_id)
+                .first()
+            )
+            # make sure the new column name is reflected in the table metadata
+            self.assertIn("d", table.column_names)
         finally:
-            os.remove(filename)
+            os.remove(filename_1)
+            os.remove(filename_2)
 
     def test_dataframe_timezone(self):
         tz = psycopg2.tz.FixedOffsetTimezone(offset=60, name=None)

Reply via email to