Copilot commented on code in PR #37791:
URL: https://github.com/apache/superset/pull/37791#discussion_r2790585496


##########
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:
##########
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { FunctionComponent, useEffect, useState } from 'react';
+import { t } from '@apache-superset/core';
+import { Modal, Table, TableSize } from '@superset-ui/core/components';
+import type { ColumnsType } from 'antd/es/table';
+import type { UploadType } from './uploadDataModalTour';
+
+const PREVIEW_ROW_LIMIT = 100;
+
+interface DataPreviewModalProps {
+  show: boolean;
+  onHide: () => void;
+  file: File | null;
+  type: UploadType;
+  delimiter?: string;
+  sheetName?: string;
+}
+
+async function parseCSV(
+  file: File,
+  delimiter: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const text = await file.text();
+  const lines = text.split(/\r?\n/).filter(line => line.trim().length > 0);
+  if (lines.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerLine = lines[0];
+  const columns = headerLine
+    .split(delimiter)
+    .map((c, i) => c.trim() || `col_${i}`);
+  const dataRows = lines.slice(1, PREVIEW_ROW_LIMIT + 1);
+  const data = dataRows.map(row => {
+    const values = row.split(delimiter);
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = values[i]?.trim() ?? '';
+    });
+    return obj;
+  });
+  return { columns, data };
+}
+
+async function parseExcel(
+  file: File,
+  sheetName?: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const XLSX = (await import(/* webpackChunkName: "xlsx" */ 'xlsx')).default;
+  const arrayBuffer = await file.arrayBuffer();
+  const workbook = XLSX.read(arrayBuffer, { type: 'array' });
+  const sheet = sheetName
+    ? workbook.Sheets[sheetName]
+    : workbook.Sheets[workbook.SheetNames[0]];
+  if (!sheet) {
+    return { columns: [], data: [] };
+  }
+  const jsonData = XLSX.utils.sheet_to_json<unknown[]>(sheet, {
+    header: 1,
+    defval: '',
+  }) as unknown[][];
+  if (jsonData.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerRow = jsonData[0] as (string | number)[];
+  const columns = headerRow.map((c, i) => String(c ?? '').trim() || 
`col_${i}`);
+  const dataRows = jsonData.slice(1, PREVIEW_ROW_LIMIT + 1) as (
+    | string
+    | number
+  )[][];
+  const data = dataRows.map(row => {
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = String(row[i] ?? '');
+    });
+    return obj;
+  });
+  return { columns, data };
+}
+
+export const DataPreviewModal: FunctionComponent<DataPreviewModalProps> = ({
+  show,
+  onHide,
+  file,
+  type,
+  delimiter = ',',
+  sheetName,
+}) => {
+  const [loading, setLoading] = useState(false);
+  const [columns, setColumns] = useState<string[]>([]);
+  const [data, setData] = useState<Record<string, string>[]>([]);
+  const [error, setError] = useState<string | null>(null);
+
+  useEffect(() => {
+    if (!show || !file) {
+      setColumns([]);
+      setData([]);
+      setError(null);
+      return;
+    }
+    if (type === 'columnar') {
+      setError(t('Data preview is not available for Parquet files.'));
+      setColumns([]);
+      setData([]);
+      return;
+    }
+    setLoading(true);
+    setError(null);
+    const parse =
+      type === 'csv' ? parseCSV(file, delimiter) : parseExcel(file, sheetName);
+    parse
+      .then(({ columns: cols, data: parsedData }) => {
+        setColumns(cols);
+        setData(parsedData);
+      })
+      .catch(err => {
+        setError(err?.message || t('Failed to parse file'));
+        setColumns([]);
+        setData([]);
+      })
+      .finally(() => setLoading(false));

Review Comment:
   The `useEffect` parsing flow doesn’t guard against in-flight parse promises 
when `show`, `file`, `delimiter`, or `sheetName` change. If the modal is closed 
or a different file is selected while parsing, a late promise resolution can 
overwrite state with stale columns/data; add an effect cleanup/cancellation 
guard before calling `setColumns`/`setData`/`setError`/`setLoading`.
   ```suggestion
       let cancelled = false;
   
       const resetState = () => {
         setColumns([]);
         setData([]);
         setError(null);
         setLoading(false);
       };
   
       if (!show || !file) {
         resetState();
         return () => {
           cancelled = true;
         };
       }
   
       if (type === 'columnar') {
         setError(t('Data preview is not available for Parquet files.'));
         setColumns([]);
         setData([]);
         setLoading(false);
         return () => {
           cancelled = true;
         };
       }
   
       setLoading(true);
       setError(null);
       const parse =
         type === 'csv' ? parseCSV(file, delimiter) : parseExcel(file, 
sheetName);
       parse
         .then(({ columns: cols, data: parsedData }) => {
           if (cancelled) {
             return;
           }
           setColumns(cols);
           setData(parsedData);
         })
         .catch(err => {
           if (cancelled) {
             return;
           }
           setError(err?.message || t('Failed to parse file'));
           setColumns([]);
           setData([]);
         })
         .finally(() => {
           if (cancelled) {
             return;
           }
           setLoading(false);
         });
   
       return () => {
         cancelled = true;
       };
   ```



##########
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:
##########
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { FunctionComponent, useEffect, useState } from 'react';
+import { t } from '@apache-superset/core';
+import { Modal, Table, TableSize } from '@superset-ui/core/components';
+import type { ColumnsType } from 'antd/es/table';
+import type { UploadType } from './uploadDataModalTour';
+
+const PREVIEW_ROW_LIMIT = 100;
+
+interface DataPreviewModalProps {
+  show: boolean;
+  onHide: () => void;
+  file: File | null;
+  type: UploadType;
+  delimiter?: string;
+  sheetName?: string;
+}
+
+async function parseCSV(
+  file: File,
+  delimiter: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const text = await file.text();
+  const lines = text.split(/\r?\n/).filter(line => line.trim().length > 0);
+  if (lines.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerLine = lines[0];
+  const columns = headerLine
+    .split(delimiter)
+    .map((c, i) => c.trim() || `col_${i}`);
+  const dataRows = lines.slice(1, PREVIEW_ROW_LIMIT + 1);
+  const data = dataRows.map(row => {
+    const values = row.split(delimiter);
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = values[i]?.trim() ?? '';
+    });
+    return obj;
+  });

Review Comment:
   `parseCSV` uses a simple `split(delimiter)` approach, which will mis-parse 
common CSV cases (quoted fields containing delimiters/newlines, escaped 
quotes). This can produce incorrect column counts and misleading previews; 
consider using a standards-compliant CSV parser (or an existing utility in the 
codebase) for the preview parsing.
   ```suggestion
     const Papa = (await import(
       /* webpackChunkName: "papaparse" */ 'papaparse'
     )).default;
   
     const text = await file.text();
   
     const result = Papa.parse<Record<string, string>>(text, {
       delimiter,
       header: true,
       skipEmptyLines: 'greedy',
       preview: PREVIEW_ROW_LIMIT + 1,
       quoteChar: '"',
       escapeChar: '"',
     });
   
     if (!result.data || result.data.length === 0) {
       return { columns: [], data: [] };
     }
   
     const rawFields = result.meta.fields && result.meta.fields.length > 0
       ? result.meta.fields
       : Object.keys(result.data[0] ?? {});
   
     const columns = rawFields.map((c, i) => (c ?? '').trim() || `col_${i}`);
   
     const data = result.data.slice(0, PREVIEW_ROW_LIMIT).map(row => {
       const obj: Record<string, string> = {};
       columns.forEach(col => {
         const value = row[col] ?? '';
         obj[col] = String(value);
       });
       return obj;
     });
   ```



##########
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:
##########
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { FunctionComponent, useEffect, useState } from 'react';
+import { t } from '@apache-superset/core';
+import { Modal, Table, TableSize } from '@superset-ui/core/components';
+import type { ColumnsType } from 'antd/es/table';
+import type { UploadType } from './uploadDataModalTour';

Review Comment:
   This file imports `ColumnsType` from `antd/es/table`, which goes against the 
repo’s frontend convention to avoid direct Ant Design imports (and 
`@superset-ui/core/components` already re-exports `ColumnsType` via its Table 
barrel). Prefer importing the type from `@superset-ui/core/components` to keep 
imports consistent and reduce coupling to antd internals.



##########
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:
##########
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { FunctionComponent, useEffect, useState } from 'react';
+import { t } from '@apache-superset/core';
+import { Modal, Table, TableSize } from '@superset-ui/core/components';
+import type { ColumnsType } from 'antd/es/table';
+import type { UploadType } from './uploadDataModalTour';
+
+const PREVIEW_ROW_LIMIT = 100;
+
+interface DataPreviewModalProps {
+  show: boolean;
+  onHide: () => void;
+  file: File | null;
+  type: UploadType;
+  delimiter?: string;
+  sheetName?: string;
+}
+
+async function parseCSV(
+  file: File,
+  delimiter: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const text = await file.text();
+  const lines = text.split(/\r?\n/).filter(line => line.trim().length > 0);
+  if (lines.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerLine = lines[0];
+  const columns = headerLine
+    .split(delimiter)
+    .map((c, i) => c.trim() || `col_${i}`);
+  const dataRows = lines.slice(1, PREVIEW_ROW_LIMIT + 1);
+  const data = dataRows.map(row => {
+    const values = row.split(delimiter);
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = values[i]?.trim() ?? '';
+    });
+    return obj;
+  });
+  return { columns, data };
+}
+
+async function parseExcel(
+  file: File,
+  sheetName?: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const XLSX = (await import(/* webpackChunkName: "xlsx" */ 'xlsx')).default;
+  const arrayBuffer = await file.arrayBuffer();
+  const workbook = XLSX.read(arrayBuffer, { type: 'array' });
+  const sheet = sheetName
+    ? workbook.Sheets[sheetName]
+    : workbook.Sheets[workbook.SheetNames[0]];
+  if (!sheet) {
+    return { columns: [], data: [] };
+  }
+  const jsonData = XLSX.utils.sheet_to_json<unknown[]>(sheet, {
+    header: 1,
+    defval: '',
+  }) as unknown[][];
+  if (jsonData.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerRow = jsonData[0] as (string | number)[];
+  const columns = headerRow.map((c, i) => String(c ?? '').trim() || 
`col_${i}`);
+  const dataRows = jsonData.slice(1, PREVIEW_ROW_LIMIT + 1) as (
+    | string
+    | number
+  )[][];
+  const data = dataRows.map(row => {
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = String(row[i] ?? '');
+    });
+    return obj;
+  });
+  return { columns, data };
+}

Review Comment:
   `parseExcel` (and the dynamic `xlsx` import path) isn’t exercised by tests 
here, even though Excel preview is a key part of the feature. Add a unit test 
that mocks the `xlsx` module and verifies sheet selection + row/column 
extraction so regressions are caught.



##########
superset-frontend/src/features/databases/UploadDataModel/index.tsx:
##########
@@ -515,30 +535,44 @@ const UploadDataModal: 
FunctionComponent<UploadDataModalProps> = ({
     }));
 
   const onChangeFile = async (info: UploadChangeParam<any>) => {
-    setFileList([
+    const newFileList = [
       {
         ...info.file,
-        status: 'done',
+        status: 'done' as const,
       },
-    ]);
+    ];
+    setFileList(newFileList);
+    fileListRef.current = newFileList;
+    if (info.file.originFileObj) {
+      form.validateFields(['file']).catch(() => {});
+    }
     if (!previewUploadedFile) {
       return;
     }
+    if (isFileTooLargeForPreview(info.file.originFileObj)) {
+      setColumns([]);
+      return;
+    }
     await loadFileMetadata(info.file.originFileObj);
   };

Review Comment:
   `onChangeFile` awaits `loadFileMetadata` but there’s no protection against 
out-of-order responses if the user selects/removes another file quickly. A slow 
metadata request for a previous file can still call 
`setColumns`/`setSheetNames` later, potentially showing columns for the wrong 
file (and this is more noticeable now when large files skip metadata calls). 
Consider tracking the active file UID/request id (or aborting prior requests) 
and ignoring stale responses.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to