villebro commented on a change in pull request #9825:
URL:
https://github.com/apache/incubator-superset/pull/9825#discussion_r430154224
##########
File path: superset/app.py
##########
@@ -342,6 +342,15 @@ def init_views(self) -> None:
category_label=__("Sources"),
category_icon="fa-wrench",
)
+ appbuilder.add_link(
+ "Upload a Excel",
+ label=__("Upload a Excel"),
Review comment:
I would call this either "Upload Excel" or "Upload Excel sheet"
##########
File path: superset/db_engine_specs/base.py
##########
@@ -429,6 +429,20 @@ def set_or_update_query_limit(cls, sql: str, limit: int)
-> str:
parsed_query = sql_parse.ParsedQuery(sql)
return parsed_query.set_or_update_query_limit(limit)
+ @staticmethod
+ def excel_to_df(**kwargs: Any) -> pd.DataFrame:
+ """ Read excel into Pandas DataFrame
+ :param kwargs: params to be passed to DataFrame.read_excel
+ :return: Pandas DataFrame containing data from csv
+ """
+ kwargs["encoding"] = "utf-8"
+ kwargs["iterator"] = True
+ chunks = pd.io.excel.read_excel(
+ io=kwargs["filepath_or_buffer"], sheet_name=None
Review comment:
Perhaps `sheet_name` could be a parameter in the form.
##########
File path: superset/views/database/forms.py
##########
@@ -207,3 +207,177 @@ def at_least_one_schema_is_allowed(database):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
+
+
+class ExcelToDatabaseForm(DynamicForm):
+ # pylint: disable=E0211
+ def excel_allowed_dbs(): # type: ignore
+ excel_allowed_dbs = []
+ # TODO: change allow_csv_upload to allow_file_upload
+ excel_enabled_dbs = (
+
db.session.query(models.Database).filter_by(allow_csv_upload=True).all()
+ )
+ for excel_enabled_db in excel_enabled_dbs:
+ if
CsvToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
+ excel_allowed_dbs.append(excel_enabled_db)
+ return excel_allowed_dbs
+
+ @staticmethod
+ def at_least_one_schema_is_allowed(database):
+ """
+ If the user has access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is able to upload csv without specifying schema name
+ b) if database supports schema
+ user is able to upload csv to any schema
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and upload will fail
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ elif the user does not access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is unable to upload csv
+ b) if database supports schema
+ user is unable to upload csv
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and user is unable to upload
csv
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ """
+ if (
+ security_manager.database_access(database)
+ or security_manager.all_datasource_access()
+ ):
+ return True
+ schemas = database.get_schema_access_for_csv_upload()
+ if schemas and security_manager.schemas_accessible_by_user(
+ database, schemas, False
+ ):
+ return True
+ return False
+
+ name = StringField(
+ _("Table Name"),
+ description=_("Name of table to be created from excel data."),
+ validators=[DataRequired()],
+ widget=BS3TextFieldWidget(),
+ )
+ excel_file = FileField(
+ _("Excel File"),
+ description=_("Select a Excel file to be uploaded to a database."),
+ validators=[
+ FileRequired(),
+ FileAllowed(
+ config["ALLOWED_EXTENSIONS"],
+ _(
+ "Only the following file extensions are allowed: "
+ "%(allowed_extensions)s",
+ allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
+ ),
+ ),
+ ],
+ )
+ con = QuerySelectField(
+ _("Database"),
+ query_factory=excel_allowed_dbs,
+ get_pk=lambda a: a.id,
+ get_label=lambda a: a.database_name,
+ )
+ schema = StringField(
+ _("Schema"),
+ description=_("Specify a schema (if database flavor supports this)."),
+ validators=[Optional()],
+ widget=BS3TextFieldWidget(),
+ )
+ if_exists = SelectField(
+ _("Table Exists"),
+ description=_(
+ "If table exists do one of the following: "
+ "Fail (do nothing), Replace (drop and recreate table) "
+ "or Append (insert data)."
+ ),
+ choices=[
+ ("fail", _("Fail")),
+ ("replace", _("Replace")),
+ ("append", _("Append")),
+ ],
+ validators=[DataRequired()],
+ )
+ header = IntegerField(
+ _("Header Row"),
+ description=_(
+ "Row containing the headers to use as "
+ "column names (0 is first line of data). "
+ "Leave empty if there is no header row."
+ ),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ index_col = IntegerField(
+ _("Index Column"),
+ description=_(
+ "Column to use as the row labels of the "
+ "dataframe. Leave empty if no index column."
+ ),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ mangle_dupe_cols = BooleanField(
+ _("Mangle Duplicate Columns"),
+ description=_('Specify duplicate columns as "X.0, X.1".'),
+ )
+ skipinitialspace = BooleanField(
+ _("Skip Initial Space"), description=_("Skip spaces after delimiter.")
+ )
+ skiprows = IntegerField(
+ _("Skip Rows"),
+ description=_("Number of rows to skip at start of file."),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ nrows = IntegerField(
+ _("Rows to Read"),
+ description=_("Number of rows of file to read."),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ skip_blank_lines = BooleanField(
+ _("Skip Blank Lines"),
+ description=_(
+ "Skip blank lines rather than interpreting them " "as NaN values."
Review comment:
The string break can be removed
##########
File path: superset/views/database/views.py
##########
@@ -241,3 +241,152 @@ def form_post(self, form):
flash(message, "info")
stats_logger.incr("successful_csv_upload")
return redirect("/tablemodelview/list/")
+
+
+class ExcelToDatabaseView(SimpleFormView):
+ form = ExcelToDatabaseForm
+ form_template = "superset/form_view/excel_to_database_view/edit.html"
+ form_title = _("Excel to Database configuration")
+ add_columns = ["database", "schema", "table_name"]
+
+ def form_get(self, form):
+ form.header.data = 0
+ form.mangle_dupe_cols.data = True
+ form.skipinitialspace.data = False
+ form.skip_blank_lines.data = True
+ form.infer_datetime_format.data = True
+ form.decimal.data = "."
+ form.if_exists.data = "fail"
+
+ def form_post(self, form):
+ database = form.con.data
+ excel_table = Table(table=form.name.data, schema=form.schema.data)
+
+ if not schema_allows_csv_upload(database, excel_table.schema):
+ message = _(
+ 'Database "%(database_name)s" schema "%(schema_name)s" '
+ "is not allowed for csv uploads. Please contact your Superset
Admin.",
Review comment:
We should not refer to CSV in the Excel upload. Perhaps this could be
rephrased simply as "..not allowed for uploads".
##########
File path: superset/views/database/forms.py
##########
@@ -207,3 +207,177 @@ def at_least_one_schema_is_allowed(database):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
+
+
+class ExcelToDatabaseForm(DynamicForm):
+ # pylint: disable=E0211
+ def excel_allowed_dbs(): # type: ignore
+ excel_allowed_dbs = []
+ # TODO: change allow_csv_upload to allow_file_upload
+ excel_enabled_dbs = (
+
db.session.query(models.Database).filter_by(allow_csv_upload=True).all()
+ )
+ for excel_enabled_db in excel_enabled_dbs:
+ if
CsvToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
+ excel_allowed_dbs.append(excel_enabled_db)
+ return excel_allowed_dbs
+
+ @staticmethod
+ def at_least_one_schema_is_allowed(database):
+ """
+ If the user has access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is able to upload csv without specifying schema name
+ b) if database supports schema
+ user is able to upload csv to any schema
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and upload will fail
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ elif the user does not access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is unable to upload csv
+ b) if database supports schema
+ user is unable to upload csv
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and user is unable to upload
csv
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ """
+ if (
+ security_manager.database_access(database)
+ or security_manager.all_datasource_access()
+ ):
+ return True
+ schemas = database.get_schema_access_for_csv_upload()
+ if schemas and security_manager.schemas_accessible_by_user(
+ database, schemas, False
+ ):
+ return True
+ return False
+
+ name = StringField(
+ _("Table Name"),
+ description=_("Name of table to be created from excel data."),
+ validators=[DataRequired()],
+ widget=BS3TextFieldWidget(),
+ )
+ excel_file = FileField(
+ _("Excel File"),
+ description=_("Select a Excel file to be uploaded to a database."),
+ validators=[
+ FileRequired(),
+ FileAllowed(
+ config["ALLOWED_EXTENSIONS"],
+ _(
+ "Only the following file extensions are allowed: "
+ "%(allowed_extensions)s",
+ allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
Review comment:
Here we should probably only check for allowed Excel extensions, i.e.
the intersection of `ALLOWED_EXTENSIONS` and `EXCEL_EXTENSION`
##########
File path: superset/app.py
##########
@@ -342,6 +342,15 @@ def init_views(self) -> None:
category_label=__("Sources"),
category_icon="fa-wrench",
)
+ appbuilder.add_link(
Review comment:
I would check if Excel extensions from `EXCEL_EXTENSION` are enabled in
the `ALLOWED_EXTENSIONS` whitelist. While at it, the same check could be done
for `CSV` extensions.
##########
File path: superset/views/database/views.py
##########
@@ -241,3 +241,152 @@ def form_post(self, form):
flash(message, "info")
stats_logger.incr("successful_csv_upload")
return redirect("/tablemodelview/list/")
+
+
+class ExcelToDatabaseView(SimpleFormView):
+ form = ExcelToDatabaseForm
+ form_template = "superset/form_view/excel_to_database_view/edit.html"
+ form_title = _("Excel to Database configuration")
+ add_columns = ["database", "schema", "table_name"]
+
+ def form_get(self, form):
+ form.header.data = 0
+ form.mangle_dupe_cols.data = True
+ form.skipinitialspace.data = False
+ form.skip_blank_lines.data = True
+ form.infer_datetime_format.data = True
+ form.decimal.data = "."
+ form.if_exists.data = "fail"
+
+ def form_post(self, form):
+ database = form.con.data
+ excel_table = Table(table=form.name.data, schema=form.schema.data)
+
+ if not schema_allows_csv_upload(database, excel_table.schema):
+ message = _(
+ 'Database "%(database_name)s" schema "%(schema_name)s" '
+ "is not allowed for csv uploads. Please contact your Superset
Admin.",
+ database_name=database.database_name,
+ schema_name=excel_table.schema,
+ )
+ flash(message, "danger")
+ return redirect("/exceltodatabaseview/form")
+
+ if "." in excel_table.table and excel_table.schema:
+ message = _(
+ "You cannot specify a namespace both in the name of the table:
"
+ '"%(csv_table.table)s" and in the schema field: '
+ '"%(csv_table.schema)s". Please remove one',
+ table=excel_table.table,
+ schema=excel_table.schema,
+ )
Review comment:
These should probably be `%(table)s` and `%(schema)s` respectively, as
that's what the `flask_babel.lazy_gettext` keywords are referring to.
##########
File path: superset/views/database/forms.py
##########
@@ -207,3 +207,177 @@ def at_least_one_schema_is_allowed(database):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
+
+
+class ExcelToDatabaseForm(DynamicForm):
+ # pylint: disable=E0211
+ def excel_allowed_dbs(): # type: ignore
+ excel_allowed_dbs = []
+ # TODO: change allow_csv_upload to allow_file_upload
+ excel_enabled_dbs = (
+
db.session.query(models.Database).filter_by(allow_csv_upload=True).all()
+ )
+ for excel_enabled_db in excel_enabled_dbs:
+ if
CsvToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
+ excel_allowed_dbs.append(excel_enabled_db)
+ return excel_allowed_dbs
+
+ @staticmethod
+ def at_least_one_schema_is_allowed(database):
+ """
+ If the user has access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is able to upload csv without specifying schema name
+ b) if database supports schema
+ user is able to upload csv to any schema
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and upload will fail
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ elif the user does not access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is unable to upload csv
+ b) if database supports schema
+ user is unable to upload csv
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and user is unable to upload
csv
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ """
+ if (
+ security_manager.database_access(database)
+ or security_manager.all_datasource_access()
+ ):
+ return True
+ schemas = database.get_schema_access_for_csv_upload()
+ if schemas and security_manager.schemas_accessible_by_user(
+ database, schemas, False
+ ):
+ return True
+ return False
+
+ name = StringField(
+ _("Table Name"),
+ description=_("Name of table to be created from excel data."),
+ validators=[DataRequired()],
+ widget=BS3TextFieldWidget(),
+ )
+ excel_file = FileField(
+ _("Excel File"),
+ description=_("Select a Excel file to be uploaded to a database."),
+ validators=[
+ FileRequired(),
+ FileAllowed(
+ config["ALLOWED_EXTENSIONS"],
+ _(
+ "Only the following file extensions are allowed: "
+ "%(allowed_extensions)s",
+ allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
+ ),
+ ),
+ ],
+ )
+ con = QuerySelectField(
+ _("Database"),
+ query_factory=excel_allowed_dbs,
+ get_pk=lambda a: a.id,
+ get_label=lambda a: a.database_name,
+ )
+ schema = StringField(
+ _("Schema"),
+ description=_("Specify a schema (if database flavor supports this)."),
+ validators=[Optional()],
+ widget=BS3TextFieldWidget(),
+ )
+ if_exists = SelectField(
+ _("Table Exists"),
+ description=_(
+ "If table exists do one of the following: "
+ "Fail (do nothing), Replace (drop and recreate table) "
+ "or Append (insert data)."
+ ),
+ choices=[
+ ("fail", _("Fail")),
+ ("replace", _("Replace")),
+ ("append", _("Append")),
+ ],
+ validators=[DataRequired()],
+ )
+ header = IntegerField(
+ _("Header Row"),
+ description=_(
+ "Row containing the headers to use as "
+ "column names (0 is first line of data). "
+ "Leave empty if there is no header row."
+ ),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ index_col = IntegerField(
+ _("Index Column"),
+ description=_(
+ "Column to use as the row labels of the "
+ "dataframe. Leave empty if no index column."
+ ),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ mangle_dupe_cols = BooleanField(
+ _("Mangle Duplicate Columns"),
+ description=_('Specify duplicate columns as "X.0, X.1".'),
+ )
+ skipinitialspace = BooleanField(
+ _("Skip Initial Space"), description=_("Skip spaces after delimiter.")
+ )
+ skiprows = IntegerField(
+ _("Skip Rows"),
+ description=_("Number of rows to skip at start of file."),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ nrows = IntegerField(
+ _("Rows to Read"),
+ description=_("Number of rows of file to read."),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ skip_blank_lines = BooleanField(
+ _("Skip Blank Lines"),
+ description=_(
+ "Skip blank lines rather than interpreting them " "as NaN values."
+ ),
+ )
+ parse_dates = CommaSeparatedListField(
+ _("Parse Dates"),
+ description=_(
+ "A comma separated list of columns that should be " "parsed as
dates."
+ ),
+ filters=[filter_not_empty_values],
+ )
+ infer_datetime_format = BooleanField(
+ _("Infer Datetime Format"),
+ description=_("Use Pandas to interpret the datetime format "
"automatically."),
Review comment:
Same here
##########
File path: superset/views/database/forms.py
##########
@@ -207,3 +207,177 @@ def at_least_one_schema_is_allowed(database):
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
+
+
+class ExcelToDatabaseForm(DynamicForm):
+ # pylint: disable=E0211
+ def excel_allowed_dbs(): # type: ignore
+ excel_allowed_dbs = []
+ # TODO: change allow_csv_upload to allow_file_upload
+ excel_enabled_dbs = (
+
db.session.query(models.Database).filter_by(allow_csv_upload=True).all()
+ )
+ for excel_enabled_db in excel_enabled_dbs:
+ if
CsvToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
+ excel_allowed_dbs.append(excel_enabled_db)
+ return excel_allowed_dbs
+
+ @staticmethod
+ def at_least_one_schema_is_allowed(database):
+ """
+ If the user has access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is able to upload csv without specifying schema name
+ b) if database supports schema
+ user is able to upload csv to any schema
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and upload will fail
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ elif the user does not access to the database or all datasource
+ 1. if schemas_allowed_for_csv_upload is empty
+ a) if database does not support schema
+ user is unable to upload csv
+ b) if database supports schema
+ user is unable to upload csv
+ 2. if schemas_allowed_for_csv_upload is not empty
+ a) if database does not support schema
+ This situation is impossible and user is unable to upload
csv
+ b) if database supports schema
+ user is able to upload to schema in
schemas_allowed_for_csv_upload
+ """
+ if (
+ security_manager.database_access(database)
+ or security_manager.all_datasource_access()
+ ):
+ return True
+ schemas = database.get_schema_access_for_csv_upload()
+ if schemas and security_manager.schemas_accessible_by_user(
+ database, schemas, False
+ ):
+ return True
+ return False
+
+ name = StringField(
+ _("Table Name"),
+ description=_("Name of table to be created from excel data."),
+ validators=[DataRequired()],
+ widget=BS3TextFieldWidget(),
+ )
+ excel_file = FileField(
+ _("Excel File"),
+ description=_("Select a Excel file to be uploaded to a database."),
+ validators=[
+ FileRequired(),
+ FileAllowed(
+ config["ALLOWED_EXTENSIONS"],
+ _(
+ "Only the following file extensions are allowed: "
+ "%(allowed_extensions)s",
+ allowed_extensions=", ".join(config["ALLOWED_EXTENSIONS"]),
+ ),
+ ),
+ ],
+ )
+ con = QuerySelectField(
+ _("Database"),
+ query_factory=excel_allowed_dbs,
+ get_pk=lambda a: a.id,
+ get_label=lambda a: a.database_name,
+ )
+ schema = StringField(
+ _("Schema"),
+ description=_("Specify a schema (if database flavor supports this)."),
+ validators=[Optional()],
+ widget=BS3TextFieldWidget(),
+ )
+ if_exists = SelectField(
+ _("Table Exists"),
+ description=_(
+ "If table exists do one of the following: "
+ "Fail (do nothing), Replace (drop and recreate table) "
+ "or Append (insert data)."
+ ),
+ choices=[
+ ("fail", _("Fail")),
+ ("replace", _("Replace")),
+ ("append", _("Append")),
+ ],
+ validators=[DataRequired()],
+ )
+ header = IntegerField(
+ _("Header Row"),
+ description=_(
+ "Row containing the headers to use as "
+ "column names (0 is first line of data). "
+ "Leave empty if there is no header row."
+ ),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ index_col = IntegerField(
+ _("Index Column"),
+ description=_(
+ "Column to use as the row labels of the "
+ "dataframe. Leave empty if no index column."
+ ),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ mangle_dupe_cols = BooleanField(
+ _("Mangle Duplicate Columns"),
+ description=_('Specify duplicate columns as "X.0, X.1".'),
+ )
+ skipinitialspace = BooleanField(
+ _("Skip Initial Space"), description=_("Skip spaces after delimiter.")
+ )
+ skiprows = IntegerField(
+ _("Skip Rows"),
+ description=_("Number of rows to skip at start of file."),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ nrows = IntegerField(
+ _("Rows to Read"),
+ description=_("Number of rows of file to read."),
+ validators=[Optional(), NumberRange(min=0)],
+ widget=BS3TextFieldWidget(),
+ )
+ skip_blank_lines = BooleanField(
+ _("Skip Blank Lines"),
+ description=_(
+ "Skip blank lines rather than interpreting them " "as NaN values."
+ ),
+ )
+ parse_dates = CommaSeparatedListField(
+ _("Parse Dates"),
+ description=_(
+ "A comma separated list of columns that should be " "parsed as
dates."
Review comment:
Same here
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]