Re: [PR] feat: new Columnar upload form and API [superset]

via GitHub Mon, 06 May 2024 02:42:36 -0700


dpgaspar commented on code in PR #28192:
URL: https://github.com/apache/superset/pull/28192#discussion_r1590762609



##########
superset/commands/database/uploaders/csv_reader.py:
##########
@@ -100,3 +85,59 @@ def file_to_dataframe(self, file: Any) -> pd.DataFrame:
             ) from ex
         except Exception as ex:
             raise DatabaseUploadFailed(_("Error reading CSV file")) from ex
+
+    def file_to_dataframe(self, file: FileStorage) -> pd.DataFrame:
+        """
+        Read CSV file into a DataFrame
+
+        :return: pandas DataFrame
+        :throws DatabaseUploadFailed: if there is an error reading the file
+        """
+        kwargs = {
+            "chunksize": READ_CSV_CHUNK_SIZE,
+            "encoding": "utf-8",
+            "header": self._options.get("header_row", 0),
+            "decimal": self._options.get("decimal_character", "."),
+            "index_col": self._options.get("index_column"),
+            "dayfirst": self._options.get("day_first", False),
+            "iterator": True,
+            "keep_default_na": not self._options.get("null_values"),
+            "usecols": self._options.get("columns_read")
+            if self._options.get("columns_read")  # None if an empty list
+            else None,
+            "na_values": self._options.get("null_values")
+            if self._options.get("null_values")  # None if an empty list
+            else None,
+            "nrows": self._options.get("rows_to_read"),
+            "parse_dates": self._options.get("column_dates"),
+            "sep": self._options.get("delimiter", ","),
+            "skip_blank_lines": self._options.get("skip_blank_lines", False),
+            "skipinitialspace": self._options.get("skip_initial_space", False),
+            "skiprows": self._options.get("skip_rows", 0),
+            "dtype": self._options.get("column_data_types")
+            if self._options.get("column_data_types")
+            else None,
+        }
+        return self._read_csv(file, kwargs)
+
+    def file_metadata(self, file: FileStorage) -> FileMetadata:
+        """
+        Get metadata from a CSV file
+
+        :return: FileMetadata
+        :throws DatabaseUploadFailed: if there is an error reading the file
+        """
+        kwargs = {
+            "nrows": ROWS_TO_READ_METADATA,
+            "header": self._options.get("header_row", 0),
+            "sep": self._options.get("delimiter", ","),
+        }
+        df = self._read_csv(file, kwargs)
+        return {
+            "items": [
+                {
+                    "column_names": df.columns.tolist(),
+                    "sheet_name": None,

Review Comment:
   Don't think so, since `FileStorage` from wekzeug already contains an 
attribute with the filename



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org
For additional commands, e-mail: notifications-h...@superset.apache.org

Re: [PR] feat: new Columnar upload form and API [superset]

Reply via email to