This is an automated email from the ASF dual-hosted git repository. dpgaspar pushed a commit to branch 0.35 in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
commit 16459902e000b190c1b0660fa76a7616c30e87a1 Author: Ville Brofeldt <[email protected]> AuthorDate: Thu Nov 7 20:03:42 2019 +0200 [fix] Improve csv upload functionality (#8457) * [fix] csv upload when table metadata present * Remove table from hive spec * Move upload before table metadata creation * Refine upload logic, dd unit tests and fix translations * Use ALLOWED_EXTENSIONS from config * Address review comments * Fix error message grammar * Add return type to hive csv upload and replace first with one_or_none --- superset/config.py | 2 +- superset/db_engine_specs/base.py | 18 ++--- superset/db_engine_specs/hive.py | 2 +- superset/translations/en/LC_MESSAGES/messages.po | 3 +- superset/translations/fr/LC_MESSAGES/messages.json | 2 +- superset/translations/fr/LC_MESSAGES/messages.po | 4 +- superset/translations/it/LC_MESSAGES/messages.json | 4 +- superset/translations/it/LC_MESSAGES/messages.po | 3 +- superset/translations/ko/LC_MESSAGES/messages.po | 2 +- superset/translations/messages.pot | 3 +- superset/translations/ru/LC_MESSAGES/messages.json | 6 +- superset/translations/ru/LC_MESSAGES/messages.po | 4 +- superset/translations/zh/LC_MESSAGES/messages.json | 4 +- superset/translations/zh/LC_MESSAGES/messages.po | 4 +- superset/views/core.py | 2 +- superset/views/database/forms.py | 12 ++- superset/views/database/views.py | 64 +++++++++++----- tests/core_tests.py | 86 ++++++++++++++++++---- 18 files changed, 156 insertions(+), 69 deletions(-) diff --git a/superset/config.py b/superset/config.py index b263285..a58b318 100644 --- a/superset/config.py +++ b/superset/config.py @@ -279,7 +279,7 @@ SUPERSET_WEBSERVER_DOMAINS = None # Allowed format types for upload on Database view # TODO: Add processing of other spreadsheet formats (xls, xlsx etc) -ALLOWED_EXTENSIONS = set(["csv"]) +ALLOWED_EXTENSIONS = {"csv", "tsv"} # CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv # method. diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 97e214f..7060887 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -37,7 +37,7 @@ from sqlalchemy.sql.expression import ColumnClause, ColumnElement, Select, TextA from sqlalchemy.types import TypeEngine from werkzeug.utils import secure_filename -from superset import app, db, sql_parse +from superset import app, sql_parse from superset.utils import core as utils if TYPE_CHECKING: @@ -388,15 +388,17 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods df.to_sql(**kwargs) @classmethod - def create_table_from_csv(cls, form, table): - """ Create table (including metadata in backend) from contents of a csv. + def create_table_from_csv(cls, form) -> None: + """ + Create table from contents of a csv. Note: this method does not create + metadata for the table. + :param form: Parameters defining how to process data - :param table: Metadata of new table to be created """ def _allowed_file(filename: str) -> bool: # Only allow specific file extensions as specified in the config - extension = os.path.splitext(filename)[1] + extension = os.path.splitext(filename)[1].lower() return ( extension is not None and extension[1:] in config["ALLOWED_EXTENSIONS"] ) @@ -432,12 +434,6 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods } cls.df_to_sql(**df_to_sql_kwargs) - table.user_id = g.user.id - table.schema = form.schema.data - table.fetch_metadata() - db.session.add(table) - db.session.commit() - @classmethod def convert_dttm(cls, target_type: str, dttm: datetime) -> str: """ diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py index dbcb337..1c5319e 100644 --- a/superset/db_engine_specs/hive.py +++ b/superset/db_engine_specs/hive.py @@ -98,7 +98,7 @@ class HiveEngineSpec(PrestoEngineSpec): return [] @classmethod - def create_table_from_csv(cls, form, table): # pylint: disable=too-many-locals + def create_table_from_csv(cls, form) -> None: # pylint: disable=too-many-locals """Uploads a csv file and creates a superset datasource in Hive.""" def convert_to_hive_type(col_type): diff --git a/superset/translations/en/LC_MESSAGES/messages.po b/superset/translations/en/LC_MESSAGES/messages.po index 34c5787..6bdd3a1 100644 --- a/superset/translations/en/LC_MESSAGES/messages.po +++ b/superset/translations/en/LC_MESSAGES/messages.po @@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration" msgstr "" #: superset/views/core.py:375 -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" msgstr "" #: superset/views/core.py:401 superset/views/core.py:667 @@ -7765,4 +7765,3 @@ msgstr "" #~ msgid "Saved Queries" #~ msgstr "" - diff --git a/superset/translations/fr/LC_MESSAGES/messages.json b/superset/translations/fr/LC_MESSAGES/messages.json index b49c735..4114301 100644 --- a/superset/translations/fr/LC_MESSAGES/messages.json +++ b/superset/translations/fr/LC_MESSAGES/messages.json @@ -3124,7 +3124,7 @@ "CSV to Database configuration": [ "" ], - "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [ + "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [ "" ], "User": [ diff --git a/superset/translations/fr/LC_MESSAGES/messages.po b/superset/translations/fr/LC_MESSAGES/messages.po index 180e0e7..ed66cdc 100644 --- a/superset/translations/fr/LC_MESSAGES/messages.po +++ b/superset/translations/fr/LC_MESSAGES/messages.po @@ -4880,8 +4880,8 @@ msgid "CSV to Database configuration" msgstr "CSV vers la configuration de la base de données" #: superset/views/core.py:375 -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" -msgstr "Fichier CSV \"{0}\" chargé dans la table \"{1}\" de la base de données \"{2}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" +msgstr "Fichier CSV \"%(csv_filename)s\" chargé dans la table \"%(table_name)s\" de la base de données \"%(db_name)s\"" #: superset/views/core.py:401 superset/views/core.py:667 #: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60 diff --git a/superset/translations/it/LC_MESSAGES/messages.json b/superset/translations/it/LC_MESSAGES/messages.json index 71378d4..5583613 100644 --- a/superset/translations/it/LC_MESSAGES/messages.json +++ b/superset/translations/it/LC_MESSAGES/messages.json @@ -2923,7 +2923,7 @@ "CSV to Database configuration": [ "" ], - "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [ + "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [ "" ], "User": [ @@ -3132,4 +3132,4 @@ ] } } -} \ No newline at end of file +} diff --git a/superset/translations/it/LC_MESSAGES/messages.po b/superset/translations/it/LC_MESSAGES/messages.po index e3ca6e2..ba49fcb 100644 --- a/superset/translations/it/LC_MESSAGES/messages.po +++ b/superset/translations/it/LC_MESSAGES/messages.po @@ -4432,7 +4432,7 @@ msgid "CSV to Database configuration" msgstr "" #: superset/views/core.py:363 -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" msgstr "" #: superset/views/core.py:389 superset/views/core.py:655 @@ -6461,4 +6461,3 @@ msgstr "Query salvate" #~ msgid "Slice" #~ msgstr "Slice" - diff --git a/superset/translations/ko/LC_MESSAGES/messages.po b/superset/translations/ko/LC_MESSAGES/messages.po index 2b52cb7..e998c29 100644 --- a/superset/translations/ko/LC_MESSAGES/messages.po +++ b/superset/translations/ko/LC_MESSAGES/messages.po @@ -3788,7 +3788,7 @@ msgstr "대시보드 가져오기" msgid "CSV to Database configuration" msgstr "" -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" msgstr "" msgid "User" diff --git a/superset/translations/messages.pot b/superset/translations/messages.pot index cd4a3f7..6c70ee1 100644 --- a/superset/translations/messages.pot +++ b/superset/translations/messages.pot @@ -4669,7 +4669,7 @@ msgid "CSV to Database configuration" msgstr "" #: superset/views/core.py:375 -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" msgstr "" #: superset/views/core.py:401 superset/views/core.py:667 @@ -4967,4 +4967,3 @@ msgstr "" #: superset/views/sql_lab.py:86 msgid "Saved Queries" msgstr "" - diff --git a/superset/translations/ru/LC_MESSAGES/messages.json b/superset/translations/ru/LC_MESSAGES/messages.json index f375364..f8bb352 100644 --- a/superset/translations/ru/LC_MESSAGES/messages.json +++ b/superset/translations/ru/LC_MESSAGES/messages.json @@ -2923,8 +2923,8 @@ "CSV to Database configuration": [ "Настройка CSV для БД" ], - "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [ - "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\"" + "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [ + "CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\"" ], "User": [ "Пользователь" @@ -3132,4 +3132,4 @@ ] } } -} \ No newline at end of file +} diff --git a/superset/translations/ru/LC_MESSAGES/messages.po b/superset/translations/ru/LC_MESSAGES/messages.po index ea89a33..29c44f0 100644 --- a/superset/translations/ru/LC_MESSAGES/messages.po +++ b/superset/translations/ru/LC_MESSAGES/messages.po @@ -4544,8 +4544,8 @@ msgid "CSV to Database configuration" msgstr "Настройка CSV для БД" #: superset/views/core.py:363 -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" -msgstr "CSV-файл \"{0}\" загружен в таблицу \"{1}\" базы данных \"{2}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" +msgstr "CSV-файл \"%(csv_filename)s\" загружен в таблицу \"%(table_name)s\" базы данных \"%(db_name)s\"" #: superset/views/core.py:389 superset/views/core.py:655 #: superset/views/sql_lab.py:16 superset/views/sql_lab.py:53 diff --git a/superset/translations/zh/LC_MESSAGES/messages.json b/superset/translations/zh/LC_MESSAGES/messages.json index 76640df..05c1de1 100644 --- a/superset/translations/zh/LC_MESSAGES/messages.json +++ b/superset/translations/zh/LC_MESSAGES/messages.json @@ -3124,8 +3124,8 @@ "CSV to Database configuration": [ "csv 到数据库配置" ], - "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"": [ - "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\"" + "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"": [ + "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\"" ], "User": [ "用户" diff --git a/superset/translations/zh/LC_MESSAGES/messages.po b/superset/translations/zh/LC_MESSAGES/messages.po index 5077f76..6b43381 100644 --- a/superset/translations/zh/LC_MESSAGES/messages.po +++ b/superset/translations/zh/LC_MESSAGES/messages.po @@ -4746,8 +4746,8 @@ msgid "CSV to Database configuration" msgstr "csv 到数据库配置" #: superset/views/core.py:375 -msgid "CSV file \"{0}\" uploaded to table \"{1}\" in database \"{2}\"" -msgstr "csv 文件 \"{0}\" 上传到数据库 \"{2}\" 中的表 \"{1}\"" +msgid "CSV file \"%(csv_filename)s\" uploaded to table \"%(table_name)s\" in database \"%(db_name)s\"" +msgstr "csv 文件 \"%(csv_filename)s\" 上传到数据库 \"%(db_name)s\" 中的表 \"%(table_name)s\"" #: superset/views/core.py:401 superset/views/core.py:667 #: superset/views/sql_lab.py:23 superset/views/sql_lab.py:60 diff --git a/superset/views/core.py b/superset/views/core.py index 0c6b6ec..7483e55 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2999,7 +2999,7 @@ class Superset(BaseSupersetView): except Exception: return json_error_response( "Failed to fetch schemas allowed for csv upload in this database! " - "Please contact Superset Admin!", + "Please contact your Superset Admin!", stacktrace=utils.get_stacktrace(), ) diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py index e705b57..57144bc 100644 --- a/superset/views/database/forms.py +++ b/superset/views/database/forms.py @@ -90,7 +90,17 @@ class CsvToDatabaseForm(DynamicForm): csv_file = FileField( _("CSV File"), description=_("Select a CSV file to be uploaded to a database."), - validators=[FileRequired(), FileAllowed(["csv"], _("CSV Files Only!"))], + validators=[ + FileRequired(), + FileAllowed( + config["ALLOWED_EXTENSIONS"], + _( + "Only the following file extensions are allowed: " + "%(allowed_extensions)s", + allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]), + ), + ), + ], ) con = QuerySelectField( _("Database"), diff --git a/superset/views/database/views.py b/superset/views/database/views.py index 3298b9f..bc32c19 100644 --- a/superset/views/database/views.py +++ b/superset/views/database/views.py @@ -17,7 +17,7 @@ # pylint: disable=C,R,W import os -from flask import flash, redirect +from flask import flash, g, redirect from flask_appbuilder import SimpleFormView from flask_appbuilder.forms import DynamicForm from flask_appbuilder.models.sqla.interface import SQLAInterface @@ -28,7 +28,7 @@ from wtforms.fields import StringField from wtforms.validators import ValidationError import superset.models.core as models -from superset import app, appbuilder, security_manager +from superset import app, appbuilder, db, security_manager from superset.connectors.sqla.models import SqlaTable from superset.utils import core as utils from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin @@ -104,10 +104,10 @@ class CsvToDatabaseView(SimpleFormView): if not self.is_schema_allowed(database, schema_name): message = _( - 'Database "{0}" Schema "{1}" is not allowed for csv uploads. ' - "Please contact Superset Admin".format( - database.database_name, schema_name - ) + 'Database "%(database_name)s" schema "%(schema_name)s" ' + "is not allowed for csv uploads. Please contact your Superset Admin.", + database_name=database.database_name, + schema_name=schema_name, ) flash(message, "danger") return redirect("/csvtodatabaseview/form") @@ -119,32 +119,58 @@ class CsvToDatabaseView(SimpleFormView): try: utils.ensure_path_exists(config["UPLOAD_FOLDER"]) csv_file.save(path) - table = SqlaTable(table_name=form.name.data) - table.database = form.data.get("con") - table.database_id = table.database.id - table.database.db_engine_spec.create_table_from_csv(form, table) + table_name = form.name.data + database = form.data.get("con") + database.db_engine_spec.create_table_from_csv(form) + + table = ( + db.session.query(SqlaTable) + .filter_by( + table_name=table_name, + schema=form.schema.data, + database_id=database.id, + ) + .one_or_none() + ) + if table: + table.fetch_metadata() + if not table: + table = SqlaTable(table_name=table_name) + table.database = database + table.database_id = database.id + table.user_id = g.user.id + table.schema = form.schema.data + table.fetch_metadata() + db.session.add(table) + db.session.commit() except Exception as e: + db.session.rollback() try: os.remove(path) except OSError: pass - message = ( - "Table name {} already exists. Please pick another".format( - form.name.data - ) - if isinstance(e, IntegrityError) - else str(e) + message = _( + 'Unable to upload CSV file "%(filename)s" to table ' + '"%(table_name)s" in database "%(db_name)s". ' + "Error message: %(error_msg)s", + filename=csv_filename, + table_name=form.name.data, + db_name=database.database_name, + error_msg=str(e), ) + flash(message, "danger") stats_logger.incr("failed_csv_upload") return redirect("/csvtodatabaseview/form") os.remove(path) # Go back to welcome page / splash screen - db_name = table.database.database_name message = _( - 'CSV file "{0}" uploaded to table "{1}" in ' - 'database "{2}"'.format(csv_filename, form.name.data, db_name) + 'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in ' + 'database "%(db_name)s"', + csv_filename=csv_filename, + table_name=form.name.data, + db_name=table.database.database_name, ) flash(message, "info") stats_logger.incr("successful_csv_upload") diff --git a/tests/core_tests.py b/tests/core_tests.py index 2110cdb..70d30a5 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -598,42 +598,100 @@ class CoreTests(SupersetTestCase): def test_import_csv(self): self.login(username="admin") - filename = "testCSV.csv" table_name = "".join(random.choice(string.ascii_uppercase) for _ in range(5)) - test_file = open(filename, "w+") - test_file.write("a,b\n") - test_file.write("john,1\n") - test_file.write("paul,2\n") - test_file.close() + filename_1 = "testCSV.csv" + test_file_1 = open(filename_1, "w+") + test_file_1.write("a,b\n") + test_file_1.write("john,1\n") + test_file_1.write("paul,2\n") + test_file_1.close() + + filename_2 = "testCSV2.csv" + test_file_2 = open(filename_2, "w+") + test_file_2.write("b,c,d\n") + test_file_2.write("john,1,x\n") + test_file_2.write("paul,2,y\n") + test_file_2.close() + example_db = utils.get_example_database() example_db.allow_csv_upload = True db_id = example_db.id db.session.commit() - test_file = open(filename, "rb") form_data = { - "csv_file": test_file, + "csv_file": open(filename_1, "rb"), "sep": ",", "name": table_name, "con": db_id, - "if_exists": "append", + "if_exists": "fail", "index_label": "test_label", "mangle_dupe_cols": False, } url = "/databaseview/list/" add_datasource_page = self.get_resp(url) - assert "Upload a CSV" in add_datasource_page + self.assertIn("Upload a CSV", add_datasource_page) url = "/csvtodatabaseview/form" form_get = self.get_resp(url) - assert "CSV to Database configuration" in form_get + self.assertIn("CSV to Database configuration", form_get) try: - # ensure uploaded successfully + # initial upload with fail mode + resp = self.get_resp(url, data=form_data) + self.assertIn( + f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp + ) + + # upload again with fail mode; should fail + form_data["csv_file"] = open(filename_1, "rb") resp = self.get_resp(url, data=form_data) - assert 'CSV file "testCSV.csv" uploaded to table' in resp + self.assertIn( + f'Unable to upload CSV file "{filename_1}" to table "{table_name}"', + resp, + ) + + # upload again with append mode + form_data["csv_file"] = open(filename_1, "rb") + form_data["if_exists"] = "append" + resp = self.get_resp(url, data=form_data) + self.assertIn( + f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp + ) + + # upload again with replace mode + form_data["csv_file"] = open(filename_1, "rb") + form_data["if_exists"] = "replace" + resp = self.get_resp(url, data=form_data) + self.assertIn( + f'CSV file "{filename_1}" uploaded to table "{table_name}"', resp + ) + + # try to append to table from file with different schema + form_data["csv_file"] = open(filename_2, "rb") + form_data["if_exists"] = "append" + resp = self.get_resp(url, data=form_data) + self.assertIn( + f'Unable to upload CSV file "{filename_2}" to table "{table_name}"', + resp, + ) + + # replace table from file with different schema + form_data["csv_file"] = open(filename_2, "rb") + form_data["if_exists"] = "replace" + resp = self.get_resp(url, data=form_data) + self.assertIn( + f'CSV file "{filename_2}" uploaded to table "{table_name}"', resp + ) + table = ( + db.session.query(SqlaTable) + .filter_by(table_name=table_name, database_id=db_id) + .first() + ) + # make sure the new column name is reflected in the table metadata + self.assertIn("d", table.column_names) finally: - os.remove(filename) + os.remove(filename_1) + os.remove(filename_2) def test_dataframe_timezone(self): tz = psycopg2.tz.FixedOffsetTimezone(offset=60, name=None)
