codeant-ai-for-open-source[bot] commented on code in PR #40912: URL: https://github.com/apache/superset/pull/40912#discussion_r3382610259
########## superset-frontend/src/features/lineage/LineageView.tsx: ########## @@ -0,0 +1,690 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { FC, useMemo, useState, useCallback } from 'react'; +import { t } from '@apache-superset/core/translation'; +import { styled, useTheme } from '@apache-superset/core/theme'; +import { Empty, Loading } from '@superset-ui/core/components'; +import { Button } from '@superset-ui/core/components'; +import { ResourceStatus } from 'src/hooks/apiResources/apiResources'; +import type { Resource } from 'src/hooks/apiResources/apiResources'; +import type { + DatasetLineage, + ChartLineage, + DashboardLineage, + ChartEntity, + DashboardEntity, + DatasetEntity, + DatabaseEntity, +} from 'src/hooks/apiResources/lineage'; +import Echart from '../../../plugins/plugin-chart-echarts/src/components/Echart'; +import type { EChartsCoreOption } from 'echarts/core'; + +const LineageContainer = styled.div` + display: flex; + flex-direction: column; + width: 100%; + height: 100%; +`; + +const Legend = styled.div` + ${({ theme }) => ` + display: flex; + justify-content: center; + align-items: center; + gap: ${theme.sizeUnit * 4}px; + padding: ${theme.sizeUnit * 3}px; + background-color: ${theme.colorBgLayout}; + border-bottom: 1px solid ${theme.colorBorder}; + `} +`; + +const LegendItem = styled.div<{ color: string }>` + ${({ theme, color }) => ` + display: flex; + align-items: center; + gap: ${theme.sizeUnit * 2}px; + font-size: ${theme.fontSizeSM}px; + color: ${theme.colorText}; + + &::before { + content: ''; + width: 12px; + height: 12px; + border-radius: 2px; + background-color: ${color}; + } + `} +`; + +const DetailsPanel = styled.div` + ${({ theme }) => ` + padding: ${theme.sizeUnit * 4}px; + background-color: ${theme.colorBgLayout}; + border-top: 1px solid ${theme.colorBorder}; + min-height: 120px; + `} +`; + +const DetailsPanelHeader = styled.div` + ${({ theme }) => ` + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: ${theme.sizeUnit * 3}px; + `} +`; + +const DetailsPanelActions = styled.div` + ${({ theme }) => ` + display: flex; + gap: ${theme.sizeUnit * 2}px; + `} +`; + +const DetailsPanelTitle = styled.h4` + ${({ theme }) => ` + margin: 0; + font-size: ${theme.fontSizeLG}px; + font-weight: ${theme.fontWeightStrong}; + color: ${theme.colorText}; + `} +`; + +const DetailsPanelContent = styled.div` + ${({ theme }) => ` + display: flex; + flex-direction: column; + gap: ${theme.sizeUnit * 2}px; + `} +`; + +const DetailRow = styled.div` + ${({ theme }) => ` + display: flex; + gap: ${theme.sizeUnit * 2}px; + font-size: ${theme.fontSizeSM}px; + color: ${theme.colorText}; + `} +`; + +const DetailLabel = styled.span` + ${({ theme }) => ` + font-weight: ${theme.fontWeightStrong}; + min-width: 100px; + `} +`; + +const DetailValue = styled.span` + ${({ theme }) => ` + color: ${theme.colorTextSecondary}; + `} +`; + +type NodeDetails = { + name: string; + type: 'database' | 'dataset' | 'chart' | 'dashboard'; + id?: number; + additionalInfo?: Record<string, any>; +}; + +type LineageViewProps = { + lineageResource: + | Resource<DatasetLineage> + | Resource<ChartLineage> + | Resource<DashboardLineage>; + entityType: 'dataset' | 'chart' | 'dashboard'; +}; + +const LineageView: FC<LineageViewProps> = ({ lineageResource, entityType }) => { + const theme = useTheme(); + const [selectedNode, setSelectedNode] = useState<NodeDetails | null>(null); + + // Create a mapping of node names to their details + const nodeDetailsMap = useMemo(() => { + if ( + lineageResource.status !== ResourceStatus.Complete || + !lineageResource.result + ) { + return new Map<string, NodeDetails>(); + } + + const data = lineageResource.result; + const map = new Map<string, NodeDetails>(); + + if (entityType === 'dataset' && 'dataset' in data) { + const { dataset, upstream, downstream } = data as DatasetLineage; + + // Add current dataset + map.set(dataset.name, { + name: dataset.name, + type: 'dataset', + id: dataset.id, + additionalInfo: { + schema: dataset.schema, + table_name: dataset.table_name, + database_name: dataset.database_name, + }, + }); + + // Add upstream database + if (upstream?.database) { + map.set(upstream.database.database_name, { + name: upstream.database.database_name, + type: 'database', + id: upstream.database.id, + }); + } + + // Add downstream charts + if (downstream?.charts?.result) { + downstream.charts.result.forEach((chart: ChartEntity) => { + map.set(chart.slice_name, { + name: chart.slice_name, + type: 'chart', + id: chart.id, + additionalInfo: { + viz_type: chart.viz_type, + }, + }); + }); + } + + // Add downstream dashboards + if (downstream?.dashboards?.result) { + downstream.dashboards.result.forEach((dashboard: DashboardEntity) => { + map.set(dashboard.title, { + name: dashboard.title, + type: 'dashboard', + id: dashboard.id, + additionalInfo: { + slug: dashboard.slug, + }, + }); + }); + } + } else if (entityType === 'chart' && 'chart' in data) { + const { chart, upstream, downstream } = data as ChartLineage; + + // Add current chart + map.set(chart.slice_name, { + name: chart.slice_name, + type: 'chart', + id: chart.id, + additionalInfo: { + viz_type: chart.viz_type, + }, + }); + + // Add upstream dataset + if (upstream?.dataset) { + map.set(upstream.dataset.name, { + name: upstream.dataset.name, + type: 'dataset', + id: upstream.dataset.id, + additionalInfo: { + schema: upstream.dataset.schema, + table_name: upstream.dataset.table_name, + }, + }); + } + + // Add upstream database + if (upstream?.database) { + map.set(upstream.database.database_name, { + name: upstream.database.database_name, + type: 'database', + id: upstream.database.id, + }); + } + + // Add downstream dashboards + if (downstream?.dashboards?.result) { + downstream.dashboards.result.forEach((dashboard: DashboardEntity) => { + map.set(dashboard.title, { + name: dashboard.title, + type: 'dashboard', + id: dashboard.id, + additionalInfo: { + slug: dashboard.slug, + }, + }); + }); + } + } else if (entityType === 'dashboard' && 'dashboard' in data) { + const { dashboard, upstream } = data as DashboardLineage; + + // Add current dashboard + map.set(dashboard.title, { + name: dashboard.title, + type: 'dashboard', + id: dashboard.id, + additionalInfo: { + slug: dashboard.slug, + }, + }); + + // First pass: detect duplicate chart names + const chartNameCounts = new Map<string, number>(); + if (upstream?.charts?.result) { + upstream.charts.result.forEach((chart: ChartEntity) => { + const count = chartNameCounts.get(chart.slice_name) || 0; + chartNameCounts.set(chart.slice_name, count + 1); + }); + } + + // Add upstream charts + if (upstream?.charts?.result) { + upstream.charts.result.forEach((chart: ChartEntity) => { + // Only append ID if there are duplicate names + const hasDuplicate = (chartNameCounts.get(chart.slice_name) || 0) > 1; + const chartNodeName = hasDuplicate + ? `${chart.slice_name} (#${chart.id})` + : chart.slice_name; + map.set(chartNodeName, { + name: chart.slice_name, + type: 'chart', + id: chart.id, + additionalInfo: { + viz_type: chart.viz_type, + }, + }); + }); + } + + // Add upstream datasets + if (upstream?.datasets?.result) { + upstream.datasets.result.forEach((dataset: DatasetEntity) => { + map.set(dataset.name, { + name: dataset.name, + type: 'dataset', + id: dataset.id, + additionalInfo: { + schema: dataset.schema, + table_name: dataset.table_name, + }, + }); + }); + } + + // Add upstream databases + if (upstream?.databases?.result) { + upstream.databases.result.forEach((database: DatabaseEntity) => { + map.set(database.database_name, { + name: database.database_name, + type: 'database', + id: database.id, + }); + }); + } + } + + return map; + }, [lineageResource, entityType]); + + // Handle node click + const handleNodeClick = useCallback( + (params: any) => { + if (params.dataType === 'node') { + const nodeName = params.name; + const nodeDetails = nodeDetailsMap.get(nodeName); + if (nodeDetails) { + setSelectedNode(nodeDetails); + } + } + // Always stop event propagation to prevent tooltip issues + if (params.event) { + params.event.stop(); + } + }, + [nodeDetailsMap], + ); + + const echartOptions: EChartsCoreOption | null = useMemo(() => { + if ( + lineageResource.status !== ResourceStatus.Complete || + !lineageResource.result + ) { + return null; + } + + const data = lineageResource.result; + const nodes: { + name: string; + itemStyle?: { color: string }; + label?: { position?: string }; + }[] = []; + const links: { source: string; target: string; value: number }[] = []; + const nodeSet = new Set<string>(); + + // Helper to add a node with label position + const addNode = ( + name: string, + color: string, + labelPosition: 'left' | 'right' | 'inside', + ) => { + if (!nodeSet.has(name)) { + nodeSet.add(name); + nodes.push({ + name, + itemStyle: { color }, + label: { + position: labelPosition, + }, + }); + } + }; + + // Helper to add a link + const addLink = (source: string, target: string) => { + links.push({ source, target, value: 1 }); + }; + + // Build nodes and links based on entity type + if (entityType === 'dataset' && 'dataset' in data) { + const { dataset, upstream, downstream } = data as DatasetLineage; + + // Add current dataset node (center) - label inside + addNode(dataset.name, theme.colorPrimary, 'inside'); + + // Add upstream database - label on left + if (upstream?.database) { + addNode(upstream.database.database_name, theme.colorInfo, 'left'); + addLink(upstream.database.database_name, dataset.name); + } + + // Add downstream charts - label on right + const chartMap = new Map<number, ChartEntity>(); + if (downstream?.charts?.result) { + downstream.charts.result.forEach((chart: ChartEntity) => { + chartMap.set(chart.id, chart); + addNode(chart.slice_name, theme.colorSuccess, 'right'); + addLink(dataset.name, chart.slice_name); + }); Review Comment: **🟠Architect Review — HIGH** Node identity in the Sankey graph is keyed solely by the display name (e.g., `slice_name`, `title`, `database_name`), so entities with duplicate names are merged into a single node, producing incorrect links and incorrect details when clicked (only the last entity written to the map is represented). The dashboard lineage path already special-cases duplicate chart titles, but dataset and chart lineage flows do not. **Suggestion:** Use a stable unique node key (for example `type:id` or an internally generated unique name) for graph identity and for the `nodeDetailsMap`, and keep the human-readable title as a separate label field; apply the duplicate-handling approach used in the dashboard lineage branch consistently across dataset/chart/dashboard flows so that nodes never collide on name alone. [Fix in Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=6bb128864b2d4eefb93b7f259b85f77f&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) | [Fix in VSCode Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=6bb128864b2d4eefb93b7f259b85f77f&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) *(Use Cmd/Ctrl + Click for best experience)* <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is an **Architect / Logical Review** comment left during a code review. These reviews are first-class, important findings — not optional suggestions. Do NOT dismiss this as a 'big architectural change' just because the title says architect review; most of these can be resolved with a small, localized fix once the intent is understood. **Path:** superset-frontend/src/features/lineage/LineageView.tsx **Line:** 381:418 **Comment:** *HIGH: Node identity in the Sankey graph is keyed solely by the display name (e.g., `slice_name`, `title`, `database_name`), so entities with duplicate names are merged into a single node, producing incorrect links and incorrect details when clicked (only the last entity written to the map is represented). The dashboard lineage path already special-cases duplicate chart titles, but dataset and chart lineage flows do not. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. If a suggested approach is provided above, use it as the authoritative instruction. If no explicit code suggestion is given, you MUST still draft and apply your own minimal, localized fix — do not punt back with 'no suggestion provided, review manually'. Keep the change as small as possible: add a guard clause, gate on a loading state, reorder an await, wrap in a conditional, etc. Do not refactor surrounding code or expand scope beyond the finding. Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix ``` </details> ########## superset/datasets/api.py: ########## @@ -846,6 +849,119 @@ def related_objects(self, id_or_uuid: str) -> Response: dashboards={"count": len(dashboards), "result": dashboards}, ) + @expose("/<id_or_uuid>/lineage", methods=("GET",)) + @protect() + @safe + @statsd_metrics + @event_logger.log_this_with_context( + action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.lineage", + log_to_statsd=False, + ) + def lineage(self, id_or_uuid: str) -> Response: + """Get lineage information for a dataset. + --- + get: + summary: Get lineage information for a dataset + description: >- + Returns upstream (database) and downstream (charts, dashboards) lineage + information for a dataset + parameters: + - in: path + name: id_or_uuid + schema: + type: string + description: Either the id of the dataset, or its uuid + responses: + 200: + description: Lineage information + content: + application/json: + schema: + $ref: "#/components/schemas/DatasetLineageResponseSchema" + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + 500: + $ref: '#/components/responses/500' + """ + dataset = DatasetDAO.find_by_id_or_uuid(id_or_uuid) + if not dataset: + return self.response_404() + + dataset_info = { + "id": dataset.id, + "name": dataset.name, + "database_id": dataset.database_id, + "database_name": ( + dataset.database.database_name if dataset.database else None + ), + "schema": dataset.schema, + "table_name": dataset.table_name, + } + + # Get upstream (database) information + upstream: dict[str, Any] = {} + if dataset.database: + upstream["database"] = { + "id": dataset.database.id, + "database_name": dataset.database.database_name, + "backend": dataset.database.backend, + } + else: + upstream["database"] = None + + # Get downstream (charts and dashboards) information + related_data = DatasetDAO.get_related_objects(dataset.id) + + # Build chart information with dashboard IDs + charts = [] + for chart in related_data["charts"]: + dashboard_ids = [d.id for d in chart.dashboards] + charts.append( + { + "id": chart.id, + "slice_name": chart.slice_name, + "viz_type": chart.viz_type, + "dashboard_ids": dashboard_ids, + } + ) + + # Build dashboard information with chart IDs + dashboards = [] + for dashboard in related_data["dashboards"]: + chart_ids = [ + chart.id + for chart in dashboard.slices + if chart.datasource_id == dataset.id + ] + dashboards.append( + { + "id": dashboard.id, + "title": dashboard.dashboard_title, + "slug": dashboard.slug, + "chart_ids": chart_ids, + } Review Comment: **🟠Architect Review — HIGH** The dataset `/lineage` endpoint builds downstream chart and dashboard lists from `DatasetDAO.get_related_objects` without any per-object permission filtering, unlike the existing `/related_objects` endpoint which filters with `security_manager.can_access_chart` / `can_access_dashboard`. This allows users who can read a dataset to see IDs, titles, and slugs of related charts and dashboards they may not have access to. **Suggestion:** Mirror the access control used in `related_objects`: filter `related_data["charts"]` and `related_data["dashboards"]` through `security_manager.can_access_chart` / `can_access_dashboard` before constructing the response, and compute the `count` fields from these filtered collections so lineage never exposes unauthorized downstream assets. [Fix in Cursor](https://app.codeant.ai/fix-in-ide?tool=cursor&prompt_id=7cacf392966a43a69d4f5b9a96680e8d&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) | [Fix in VSCode Claude](https://app.codeant.ai/fix-in-ide?tool=vscode-claude&prompt_id=7cacf392966a43a69d4f5b9a96680e8d&service=github&base_url=https%3A%2F%2Fgithub.com&org=apache&repo=apache%2Fsuperset) *(Use Cmd/Ctrl + Click for best experience)* <details> <summary><b>Prompt for AI Agent 🤖 </b></summary> ```mdx This is an **Architect / Logical Review** comment left during a code review. These reviews are first-class, important findings — not optional suggestions. Do NOT dismiss this as a 'big architectural change' just because the title says architect review; most of these can be resolved with a small, localized fix once the intent is understood. **Path:** superset/datasets/api.py **Line:** 919:944 **Comment:** *HIGH: The dataset `/lineage` endpoint builds downstream chart and dashboard lists from `DatasetDAO.get_related_objects` without any per-object permission filtering, unlike the existing `/related_objects` endpoint which filters with `security_manager.can_access_chart` / `can_access_dashboard`. This allows users who can read a dataset to see IDs, titles, and slugs of related charts and dashboards they may not have access to. Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise. If a suggested approach is provided above, use it as the authoritative instruction. If no explicit code suggestion is given, you MUST still draft and apply your own minimal, localized fix — do not punt back with 'no suggestion provided, review manually'. Keep the change as small as possible: add a guard clause, gate on a loading state, reorder an await, wrap in a conditional, etc. Do not refactor surrounding code or expand scope beyond the finding. Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
