This is an automated email from the ASF dual-hosted git repository. villebro pushed a commit to branch 0.37 in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/0.37 by this push: new 6d07273 fix: excel sheet upload is not working (#10450) 6d07273 is described below commit 6d07273360fab106ca6e4241a6112513409a124b Author: pphszx <pph...@gmail.com> AuthorDate: Wed Jul 29 15:20:44 2020 +0800 fix: excel sheet upload is not working (#10450) * remove conflicts with csv upload * revert StringField * change description * remove redundant space * apply string approach --- superset/db_engine_specs/base.py | 7 ++----- superset/views/database/forms.py | 40 +++++++++++++++++++++------------------- superset/views/database/views.py | 18 ++++++++++++------ 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index a32c21a..fb8a4e0 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -438,10 +438,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods """ kwargs["encoding"] = "utf-8" kwargs["iterator"] = True - chunks = pd.io.excel.read_excel( - io=kwargs["filepath_or_buffer"], sheet_name=kwargs["sheet_name"] - ) - df = pd.concat(chunk for chunk in chunks.values()) + df = pd.read_excel(**kwargs) return df @staticmethod @@ -513,7 +510,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods Create table from contents of a excel. Note: this method does not create metadata for the table. """ - df = cls.excel_to_df(filepath_or_buffer=filename, **excel_to_df_kwargs,) + df = cls.excel_to_df(io=filename, **excel_to_df_kwargs,) engine = cls.get_engine(database) if table.schema: # only add schema when it is preset and non empty diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py index 20003ad..1dd1c0b 100644 --- a/superset/views/database/forms.py +++ b/superset/views/database/forms.py @@ -180,20 +180,18 @@ class CsvToDatabaseForm(DynamicForm): ) skip_blank_lines = BooleanField( _("Skip Blank Lines"), - description=_( - "Skip blank lines rather than interpreting them " "as NaN values." - ), + description=_("Skip blank lines rather than interpreting them as NaN values."), ) parse_dates = CommaSeparatedListField( _("Parse Dates"), description=_( - "A comma separated list of columns that should be " "parsed as dates." + "A comma separated list of columns that should be parsed as dates." ), filters=[filter_not_empty_values], ) infer_datetime_format = BooleanField( _("Infer Datetime Format"), - description=_("Use Pandas to interpret the datetime format " "automatically."), + description=_("Use Pandas to interpret the datetime format automatically."), ) decimal = StringField( _("Decimal Character"), @@ -228,16 +226,16 @@ class CsvToDatabaseForm(DynamicForm): class ExcelToDatabaseForm(DynamicForm): # pylint: disable=E0211 - def excel_allowed_dbs(): # type: ignore - excel_allowed_dbs = [] + def excel_allowed_dbs() -> List[Database]: # type: ignore # TODO: change allow_csv_upload to allow_file_upload excel_enabled_dbs = ( db.session.query(Database).filter_by(allow_csv_upload=True).all() ) - for excel_enabled_db in excel_enabled_dbs: - if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db): - excel_allowed_dbs.append(excel_enabled_db) - return excel_allowed_dbs + return [ + excel_enabled_db + for excel_enabled_db in excel_enabled_dbs + if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db) + ] @staticmethod def at_least_one_schema_is_allowed(database: Database) -> bool: @@ -265,10 +263,7 @@ class ExcelToDatabaseForm(DynamicForm): b) if database supports schema user is able to upload to schema in schemas_allowed_for_csv_upload """ - if ( - security_manager.database_access(database) - or security_manager.all_datasource_access() - ): + if security_manager.can_access_database(database): return True schemas = database.get_schema_access_for_csv_upload() if schemas and security_manager.schemas_accessible_by_user( @@ -304,7 +299,10 @@ class ExcelToDatabaseForm(DynamicForm): ) sheet_name = StringField( - _("Sheet Name"), description="Sheet Name", validators=[Optional()] + _("Sheet Name"), + description=_("Strings used for sheet names (default is the first sheet)."), + validators=[Optional()], + widget=BS3TextFieldWidget(), ) con = QuerySelectField( @@ -356,9 +354,6 @@ class ExcelToDatabaseForm(DynamicForm): _("Mangle Duplicate Columns"), description=_('Specify duplicate columns as "X.0, X.1".'), ) - skipinitialspace = BooleanField( - _("Skip Initial Space"), description=_("Skip spaces after delimiter.") - ) skiprows = IntegerField( _("Skip Rows"), description=_("Number of rows to skip at start of file."), @@ -371,6 +366,13 @@ class ExcelToDatabaseForm(DynamicForm): validators=[Optional(), NumberRange(min=0)], widget=BS3TextFieldWidget(), ) + parse_dates = CommaSeparatedListField( + _("Parse Dates"), + description=_( + "A comma separated list of columns that should be parsed as dates." + ), + filters=[filter_not_empty_values], + ) decimal = StringField( _("Decimal Character"), default=".", diff --git a/superset/views/database/views.py b/superset/views/database/views.py index 70b5907..0aff888 100644 --- a/superset/views/database/views.py +++ b/superset/views/database/views.py @@ -263,10 +263,9 @@ class ExcelToDatabaseView(SimpleFormView): def form_get(self, form: ExcelToDatabaseForm) -> None: form.header.data = 0 form.mangle_dupe_cols.data = True - form.skipinitialspace.data = False form.decimal.data = "." form.if_exists.data = "fail" - form.sheet_name = None + form.sheet_name.data = "" def form_post(self, form: ExcelToDatabaseForm) -> Response: database = form.con.data @@ -307,16 +306,23 @@ class ExcelToDatabaseView(SimpleFormView): database = ( db.session.query(models.Database).filter_by(id=con.data.get("id")).one() ) + + # some params are not supported by pandas.read_excel (e.g. chunksize). + # More can be found here: + # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html excel_to_df_kwargs = { "header": form.header.data if form.header.data else 0, "index_col": form.index_col.data, "mangle_dupe_cols": form.mangle_dupe_cols.data, - "skipinitialspace": form.skipinitialspace.data, "skiprows": form.skiprows.data, "nrows": form.nrows.data, - "sheet_name": form.sheet_name.data, - "chunksize": 1000, + "sheet_name": form.sheet_name.data if form.sheet_name.data else 0, + "parse_dates": form.parse_dates.data, } + if form.null_values.data: + excel_to_df_kwargs["na_values"] = form.null_values.data + excel_to_df_kwargs["keep_default_na"] = False + df_to_sql_kwargs = { "name": excel_table.table, "if_exists": form.if_exists.data, @@ -336,7 +342,7 @@ class ExcelToDatabaseView(SimpleFormView): # E.g. if hive was used to upload a excel, presto will be a better option # to explore the table. expore_database = database - explore_database_id = database.get_extra().get("explore_database_id", None) + explore_database_id = database.explore_database_id if explore_database_id: expore_database = ( db.session.query(models.Database)