[incubator-superset] branch master updated: fix: excel sheet upload is not working (#10450)

villebro Wed, 29 Jul 2020 00:41:18 -0700

This is an automated email from the ASF dual-hosted git repository.

villebro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git



The following commit(s) were added to refs/heads/master by this push:
     new 473fe10  fix: excel sheet upload is not working (#10450)
473fe10 is described below

commit 473fe1003aadcc0ba5a0884b7666bea190837180
Author: pphszx <pph...@gmail.com>
AuthorDate: Wed Jul 29 15:20:44 2020 +0800

    fix: excel sheet upload is not working (#10450)
    
    * remove conflicts with csv upload
    
    * revert StringField
    
    * change description
    
    * remove redundant space
    
    * apply string approach
---
 superset/db_engine_specs/base.py |  7 ++-----
 superset/views/database/forms.py | 40 +++++++++++++++++++++-------------------
 superset/views/database/views.py | 18 ++++++++++++------
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 8815990..120ea4b 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -438,10 +438,7 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         """
         kwargs["encoding"] = "utf-8"
         kwargs["iterator"] = True
-        chunks = pd.io.excel.read_excel(
-            io=kwargs["filepath_or_buffer"], sheet_name=kwargs["sheet_name"]
-        )
-        df = pd.concat(chunk for chunk in chunks.values())
+        df = pd.read_excel(**kwargs)
         return df
 
     @staticmethod
@@ -513,7 +510,7 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         Create table from contents of a excel. Note: this method does not 
create
         metadata for the table.
         """
-        df = cls.excel_to_df(filepath_or_buffer=filename, 
**excel_to_df_kwargs,)
+        df = cls.excel_to_df(io=filename, **excel_to_df_kwargs,)
         engine = cls.get_engine(database)
         if table.schema:
             # only add schema when it is preset and non empty
diff --git a/superset/views/database/forms.py b/superset/views/database/forms.py
index 20003ad..1dd1c0b 100644
--- a/superset/views/database/forms.py
+++ b/superset/views/database/forms.py
@@ -180,20 +180,18 @@ class CsvToDatabaseForm(DynamicForm):
     )
     skip_blank_lines = BooleanField(
         _("Skip Blank Lines"),
-        description=_(
-            "Skip blank lines rather than interpreting them " "as NaN values."
-        ),
+        description=_("Skip blank lines rather than interpreting them as NaN 
values."),
     )
     parse_dates = CommaSeparatedListField(
         _("Parse Dates"),
         description=_(
-            "A comma separated list of columns that should be " "parsed as 
dates."
+            "A comma separated list of columns that should be parsed as dates."
         ),
         filters=[filter_not_empty_values],
     )
     infer_datetime_format = BooleanField(
         _("Infer Datetime Format"),
-        description=_("Use Pandas to interpret the datetime format " 
"automatically."),
+        description=_("Use Pandas to interpret the datetime format 
automatically."),
     )
     decimal = StringField(
         _("Decimal Character"),
@@ -228,16 +226,16 @@ class CsvToDatabaseForm(DynamicForm):
 
 class ExcelToDatabaseForm(DynamicForm):
     # pylint: disable=E0211
-    def excel_allowed_dbs():  # type: ignore
-        excel_allowed_dbs = []
+    def excel_allowed_dbs() -> List[Database]:  # type: ignore
         # TODO: change allow_csv_upload to allow_file_upload
         excel_enabled_dbs = (
             db.session.query(Database).filter_by(allow_csv_upload=True).all()
         )
-        for excel_enabled_db in excel_enabled_dbs:
-            if 
ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
-                excel_allowed_dbs.append(excel_enabled_db)
-        return excel_allowed_dbs
+        return [
+            excel_enabled_db
+            for excel_enabled_db in excel_enabled_dbs
+            if 
ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db)
+        ]
 
     @staticmethod
     def at_least_one_schema_is_allowed(database: Database) -> bool:
@@ -265,10 +263,7 @@ class ExcelToDatabaseForm(DynamicForm):
                 b) if database supports schema
                     user is able to upload to schema in 
schemas_allowed_for_csv_upload
         """
-        if (
-            security_manager.database_access(database)
-            or security_manager.all_datasource_access()
-        ):
+        if security_manager.can_access_database(database):
             return True
         schemas = database.get_schema_access_for_csv_upload()
         if schemas and security_manager.schemas_accessible_by_user(
@@ -304,7 +299,10 @@ class ExcelToDatabaseForm(DynamicForm):
     )
 
     sheet_name = StringField(
-        _("Sheet Name"), description="Sheet Name", validators=[Optional()]
+        _("Sheet Name"),
+        description=_("Strings used for sheet names (default is the first 
sheet)."),
+        validators=[Optional()],
+        widget=BS3TextFieldWidget(),
     )
 
     con = QuerySelectField(
@@ -356,9 +354,6 @@ class ExcelToDatabaseForm(DynamicForm):
         _("Mangle Duplicate Columns"),
         description=_('Specify duplicate columns as "X.0, X.1".'),
     )
-    skipinitialspace = BooleanField(
-        _("Skip Initial Space"), description=_("Skip spaces after delimiter.")
-    )
     skiprows = IntegerField(
         _("Skip Rows"),
         description=_("Number of rows to skip at start of file."),
@@ -371,6 +366,13 @@ class ExcelToDatabaseForm(DynamicForm):
         validators=[Optional(), NumberRange(min=0)],
         widget=BS3TextFieldWidget(),
     )
+    parse_dates = CommaSeparatedListField(
+        _("Parse Dates"),
+        description=_(
+            "A comma separated list of columns that should be parsed as dates."
+        ),
+        filters=[filter_not_empty_values],
+    )
     decimal = StringField(
         _("Decimal Character"),
         default=".",
diff --git a/superset/views/database/views.py b/superset/views/database/views.py
index 70b5907..0aff888 100644
--- a/superset/views/database/views.py
+++ b/superset/views/database/views.py
@@ -263,10 +263,9 @@ class ExcelToDatabaseView(SimpleFormView):
     def form_get(self, form: ExcelToDatabaseForm) -> None:
         form.header.data = 0
         form.mangle_dupe_cols.data = True
-        form.skipinitialspace.data = False
         form.decimal.data = "."
         form.if_exists.data = "fail"
-        form.sheet_name = None
+        form.sheet_name.data = ""
 
     def form_post(self, form: ExcelToDatabaseForm) -> Response:
         database = form.con.data
@@ -307,16 +306,23 @@ class ExcelToDatabaseView(SimpleFormView):
             database = (
                 
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
             )
+
+            # some params are not supported by pandas.read_excel (e.g. 
chunksize).
+            # More can be found here:
+            # 
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
             excel_to_df_kwargs = {
                 "header": form.header.data if form.header.data else 0,
                 "index_col": form.index_col.data,
                 "mangle_dupe_cols": form.mangle_dupe_cols.data,
-                "skipinitialspace": form.skipinitialspace.data,
                 "skiprows": form.skiprows.data,
                 "nrows": form.nrows.data,
-                "sheet_name": form.sheet_name.data,
-                "chunksize": 1000,
+                "sheet_name": form.sheet_name.data if form.sheet_name.data 
else 0,
+                "parse_dates": form.parse_dates.data,
             }
+            if form.null_values.data:
+                excel_to_df_kwargs["na_values"] = form.null_values.data
+                excel_to_df_kwargs["keep_default_na"] = False
+
             df_to_sql_kwargs = {
                 "name": excel_table.table,
                 "if_exists": form.if_exists.data,
@@ -336,7 +342,7 @@ class ExcelToDatabaseView(SimpleFormView):
             # E.g. if hive was used to upload a excel, presto will be a better 
option
             # to explore the table.
             expore_database = database
-            explore_database_id = 
database.get_extra().get("explore_database_id", None)
+            explore_database_id = database.explore_database_id
             if explore_database_id:
                 expore_database = (
                     db.session.query(models.Database)

[incubator-superset] branch master updated: fix: excel sheet upload is not working (#10450)

Reply via email to