dpgaspar commented on code in PR #28192: URL: https://github.com/apache/superset/pull/28192#discussion_r1590762609
########## superset/commands/database/uploaders/csv_reader.py: ########## @@ -100,3 +85,59 @@ def file_to_dataframe(self, file: Any) -> pd.DataFrame: ) from ex except Exception as ex: raise DatabaseUploadFailed(_("Error reading CSV file")) from ex + + def file_to_dataframe(self, file: FileStorage) -> pd.DataFrame: + """ + Read CSV file into a DataFrame + + :return: pandas DataFrame + :throws DatabaseUploadFailed: if there is an error reading the file + """ + kwargs = { + "chunksize": READ_CSV_CHUNK_SIZE, + "encoding": "utf-8", + "header": self._options.get("header_row", 0), + "decimal": self._options.get("decimal_character", "."), + "index_col": self._options.get("index_column"), + "dayfirst": self._options.get("day_first", False), + "iterator": True, + "keep_default_na": not self._options.get("null_values"), + "usecols": self._options.get("columns_read") + if self._options.get("columns_read") # None if an empty list + else None, + "na_values": self._options.get("null_values") + if self._options.get("null_values") # None if an empty list + else None, + "nrows": self._options.get("rows_to_read"), + "parse_dates": self._options.get("column_dates"), + "sep": self._options.get("delimiter", ","), + "skip_blank_lines": self._options.get("skip_blank_lines", False), + "skipinitialspace": self._options.get("skip_initial_space", False), + "skiprows": self._options.get("skip_rows", 0), + "dtype": self._options.get("column_data_types") + if self._options.get("column_data_types") + else None, + } + return self._read_csv(file, kwargs) + + def file_metadata(self, file: FileStorage) -> FileMetadata: + """ + Get metadata from a CSV file + + :return: FileMetadata + :throws DatabaseUploadFailed: if there is an error reading the file + """ + kwargs = { + "nrows": ROWS_TO_READ_METADATA, + "header": self._options.get("header_row", 0), + "sep": self._options.get("delimiter", ","), + } + df = self._read_csv(file, kwargs) + return { + "items": [ + { + "column_names": df.columns.tolist(), + "sheet_name": None, Review Comment: Don't think so, since `FileStorage` from wekzeug already contains an attribute with the filename -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For additional commands, e-mail: notifications-h...@superset.apache.org