exemplary-citizen commented on a change in pull request #14449:
URL: https://github.com/apache/superset/pull/14449#discussion_r636614194
##########
File path: superset/views/database/views.py
##########
@@ -128,6 +128,32 @@ def form_get(self, form: CsvToDatabaseForm) -> None:
def form_post(self, form: CsvToDatabaseForm) -> Response:
database = form.con.data
csv_table = Table(table=form.name.data, schema=form.schema.data)
+ file_type = form.csv_file.data.filename.split(".")[-1]
+ if file_type == "parquet":
+ read = pd.read_parquet
+ kwargs = {
+ "columns": form.usecols.data,
+ }
+ else:
+ read = pd.read_csv
+ kwargs = {
+ "chunksize": 1000,
+ "encoding": "utf-8",
+ "header": form.header.data if form.header.data else 0,
+ "index_col": form.index_col.data,
+ "infer_datetime_format": form.infer_datetime_format.data,
+ "iterator": True,
+ "keep_default_na": not form.null_values.data,
+ "mangle_dupe_cols": form.mangle_dupe_cols.data,
+ "usecols": form.usecols.data,
Review comment:
added a scenario to `test_import_csv` that tests uploading a CSV with
specific columns
##########
File path: superset/views/database/forms.py
##########
@@ -163,6 +165,15 @@ def at_least_one_schema_is_allowed(database: Database) ->
bool:
_("Mangle Duplicate Columns"),
description=_('Specify duplicate columns as "X.0, X.1".'),
)
+ usecols = JsonListField(
+ _("Use Columns"),
+ default=None,
+ description=_(
+ "Json list of the column names that should be read. "
+ "If not None, only these columns will be read from the file."
+ ),
+ validators=[Optional()],
+ )
Review comment:
Added a screenshot to the summary above
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]