gemini-code-assist[bot] commented on code in PR #35947: URL: https://github.com/apache/beam/pull/35947#discussion_r2301315544
########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py: ########## @@ -0,0 +1,199 @@ +# Licensed under the Apache License, Version 2.0 (the 'License'); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import yaml +import json +from typing import Dict, List, Optional, TypedDict, Any +import dataclasses +from dataclasses import dataclass + +from apache_beam.yaml.main import build_pipeline_components_from_yaml +import apache_beam as beam + +# ======================== Type Definitions ======================== + +@dataclass +class NodeData: + id: str + label: str + type: str = "" + + def __post_init__(self): + # Ensure ID is not empty + if not self.id: + raise ValueError("Node ID cannot be empty") + +@dataclass +class EdgeData: + source: str + target: str + label: str = "" + + def __post_init__(self): + if not self.source or not self.target: + raise ValueError("Edge source and target cannot be empty") + +class FlowGraph(TypedDict): + nodes: List[Dict[str, Any]] + edges: List[Dict[str, Any]] + +class ApiResponse(TypedDict): + data: Optional[FlowGraph] + error: Optional[str] + +# ======================== Main Function ======================== + +def parse_beam_yaml(yaml_str: str, isDryRunMode: bool = False) -> ApiResponse: + """ + Parse Beam YAML and convert to flow graph data structure + + Args: + yaml_str: Input YAML string + + Returns: + Standardized response format: + - Success: {'status': 'success', 'data': {...}, 'error': None} + - Failure: {'status': 'error', 'data': None, 'error': 'message'} + """ + # Phase 1: YAML Parsing + try: + parsed_yaml = yaml.safe_load(yaml_str) + if not parsed_yaml or 'pipeline' not in parsed_yaml: + return build_error_response("Invalid YAML structure: missing 'pipeline' section") + except yaml.YAMLError as e: + return build_error_response(f"YAML parsing error: {str(e)}") + + # Phase 2: Pipeline Validation + try: + options, constructor = build_pipeline_components_from_yaml( + yaml_str, + [], + validate_schema='per_transform' + ) + if isDryRunMode: + with beam.Pipeline(options=options) as p: + constructor(p) + except Exception as e: + return build_error_response(f"Pipeline validation failed: {str(e)}") + + # Phase 3: Graph Construction + try: + pipeline = parsed_yaml['pipeline'] + pipeline_type = pipeline.get('type', '') + transforms = pipeline.get('transforms', []) + + nodes: List[NodeData] = [] + edges: List[EdgeData] = [] + + + nodes.append(NodeData( + id='0', + label=f'Input', + type='input' + )) + nodes.append(NodeData( + id='1', + label=f'Output', + type='output' + )) + + # Process transform nodes + for idx, transform in enumerate(transforms): + if not isinstance(transform, dict): + continue + + payload = {k: v for k, v in transform.items() if k not in {"type"}} + + node_id = f"t{idx}" + nodes.append({ + 'id': node_id, + 'label': transform.get('type', 'unnamed'), + 'type': 'default', + **payload + }) + + # Create connections between nodes + if idx > 0: + edges.append(EdgeData( + source=f"t{idx-1}", + target=node_id, + label='chain' + )) Review Comment:  The current graph construction logic assumes a simple linear chain of transforms, connecting each transform to the previous one (`t{idx-1}` to `t{idx}`). This does not correctly represent the pipeline topology for many common cases, as it ignores the `input` field within each transform which specifies the actual data flow. For any pipeline that is not a simple linear chain (e.g., those with branches or joins), the visualization will be incorrect. The logic should be updated to parse the `input` field to build the correct graph edges. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py: ########## @@ -0,0 +1,199 @@ +# Licensed under the Apache License, Version 2.0 (the 'License'); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import yaml +import json +from typing import Dict, List, Optional, TypedDict, Any +import dataclasses +from dataclasses import dataclass + +from apache_beam.yaml.main import build_pipeline_components_from_yaml +import apache_beam as beam + +# ======================== Type Definitions ======================== + +@dataclass +class NodeData: + id: str + label: str + type: str = "" + + def __post_init__(self): + # Ensure ID is not empty + if not self.id: + raise ValueError("Node ID cannot be empty") + +@dataclass +class EdgeData: + source: str + target: str + label: str = "" + + def __post_init__(self): + if not self.source or not self.target: + raise ValueError("Edge source and target cannot be empty") + +class FlowGraph(TypedDict): + nodes: List[Dict[str, Any]] + edges: List[Dict[str, Any]] + +class ApiResponse(TypedDict): + data: Optional[FlowGraph] + error: Optional[str] + +# ======================== Main Function ======================== + +def parse_beam_yaml(yaml_str: str, isDryRunMode: bool = False) -> ApiResponse: + """ + Parse Beam YAML and convert to flow graph data structure + + Args: + yaml_str: Input YAML string + + Returns: + Standardized response format: + - Success: {'status': 'success', 'data': {...}, 'error': None} + - Failure: {'status': 'error', 'data': None, 'error': 'message'} + """ + # Phase 1: YAML Parsing + try: + parsed_yaml = yaml.safe_load(yaml_str) + if not parsed_yaml or 'pipeline' not in parsed_yaml: + return build_error_response("Invalid YAML structure: missing 'pipeline' section") + except yaml.YAMLError as e: + return build_error_response(f"YAML parsing error: {str(e)}") + + # Phase 2: Pipeline Validation + try: + options, constructor = build_pipeline_components_from_yaml( + yaml_str, + [], + validate_schema='per_transform' + ) + if isDryRunMode: + with beam.Pipeline(options=options) as p: + constructor(p) + except Exception as e: + return build_error_response(f"Pipeline validation failed: {str(e)}") + + # Phase 3: Graph Construction + try: + pipeline = parsed_yaml['pipeline'] + pipeline_type = pipeline.get('type', '') + transforms = pipeline.get('transforms', []) + + nodes: List[NodeData] = [] + edges: List[EdgeData] = [] + + + nodes.append(NodeData( + id='0', + label=f'Input', + type='input' + )) + nodes.append(NodeData( + id='1', + label=f'Output', + type='output' + )) + + # Process transform nodes + for idx, transform in enumerate(transforms): + if not isinstance(transform, dict): + continue + + payload = {k: v for k, v in transform.items() if k not in {"type"}} + + node_id = f"t{idx}" + nodes.append({ + 'id': node_id, + 'label': transform.get('type', 'unnamed'), + 'type': 'default', + **payload + }) + + # Create connections between nodes + if idx > 0: + edges.append(EdgeData( + source=f"t{idx-1}", + target=node_id, + label='chain' + )) + + edges.append(EdgeData( + source='0', + target='t0', + label='start' + )) + edges.append(EdgeData( + source=node_id, + target='1', + label='stop' + )) Review Comment:  If the `transforms` list from the parsed YAML is empty, the loop over it on lines 110-130 will not execute. Consequently, `node_id` will not be defined, causing an `UnboundLocalError` on line 138. This will crash the parser for valid empty pipelines. You should handle this case, for example by checking if `transforms` is empty before attempting to create edges that depend on transform nodes. ```suggestion if transforms: edges.append(EdgeData( source='0', target='t0', label='start' )) edges.append(EdgeData( source=node_id, target='1', label='stop' )) ``` ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/Yaml.tsx: ########## @@ -0,0 +1,279 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React from 'react'; +import { ISessionContext } from '@jupyterlab/apputils'; +import { Card } from '@rmwc/card'; +import '@rmwc/textfield/styles'; +import '@rmwc/grid/styles'; +import '@rmwc/card/styles'; +import Split from 'react-split'; +import { debounce } from 'lodash'; + +import { Node, Edge } from '@xyflow/react'; +import '@xyflow/react/dist/style.css'; +import '../../style/yaml/Yaml.css'; + + +import { YamlEditor } from './YamlEditor'; +import { EditableKeyValuePanel } from './EditablePanel'; +import { FlowEditor } from './YamlFlow'; +import { ApiResponse } from './DataType'; +import { nodeWidth, nodeHeight } from './DataType'; + +interface YamlProps { + sessionContext: ISessionContext; +} + +interface YamlState { + yamlContent: string; + elements: any; + selectedNode: Node | null; + errors: string[], + isDryRunMode: boolean; + Nodes: Node[]; + Edges: Edge[]; +} + +const initialNodes: Node[] = [ + { id: '0', width: nodeWidth, position: { x: 0, y: 0 }, type: 'input', data: { label: 'Input' } }, + { id: '1', width: nodeWidth, position: { x: 0, y: 100 }, type: 'default', data: { label: '1' } }, + { id: '2', width: nodeWidth, position: { x: 0, y: 200 }, type: 'default', data: { label: '2' } }, + { id: '3', width: nodeWidth, position: { x: 0, y: 300 }, type: 'output', data: { label: 'Output' } }, +]; + +const initialEdges: Edge[] = [{ id: 'e0-1', source: '0', target: '1' }, +{ id: 'e1-2', source: '1', target: '2' }, { id: 'e2-3', source: '2', target: '3' } +]; + +/** + * A YAML pipeline editor component with integrated flow visualization. + * + * Features: + * - Three-panel layout with YAML editor, flow diagram, and node properties + * - Real-time YAML validation and error display + * - Automatic flow diagram generation from YAML content + * - Interactive node selection and editing + * - Dry run mode support for pipeline testing + * - Kernel-based YAML parsing using Apache Beam utilities + * - Debounced updates to optimize performance + * - Split-pane resizable interface + * + * State Management: + * - yamlContent: Current YAML text content + * - elements: Combined nodes and edges for the flow diagram + * - selectedNode: Currently selected node in the flow diagram + * - errors: Array of validation errors + * - Nodes: Array of flow nodes + * - Edges: Array of flow edges + * - isDryRunMode: Flag for dry run mode state + * + * Props: + * @param {YamlProps} props - Component props including sessionContext for kernel communication + * + * Methods: + * - handleNodeClick: Handles node selection in the flow diagram + * - handleYamlChange: Debounced handler for YAML content changes + * - validateAndRenderYaml: Validates YAML and updates the flow diagram + */ +export class Yaml extends React.Component<YamlProps, YamlState> { + constructor(props: YamlProps) { + super(props); + this.state = { yamlContent: '', elements: [], selectedNode: null, errors: [], Nodes: initialNodes, Edges: initialEdges, isDryRunMode: false }; + } + + componentDidMount(): void { + this.props.sessionContext.ready.then(() => { + const kernel = this.props.sessionContext.session?.kernel; + if (!kernel) { + console.error('Kernel is not available even after ready'); + return; + } + + console.log('Kernel is ready:', kernel.name); + }); + } + + componentDidUpdate(prevProps: YamlProps, prevState: YamlState) { + if (prevState.selectedNode !== this.state.selectedNode) { + console.log('selectedNodeData changed:', this.state.selectedNode); + } + } + + handleNodeClick = (node: Node) => { + this.setState({ + selectedNode: node + }); + }; + + //debounce methods to prevent excessive rendering + private handleYamlChange = debounce((value?: string) => { + const yamlText = value || ''; + this.setState({ yamlContent: yamlText }); + this.validateAndRenderYaml(yamlText); + }, 2000); + + validateAndRenderYaml(yamlText: string) { + const escapedYaml = yamlText.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); Review Comment:  Embedding the YAML content directly into a Python multiline string is unsafe. If the YAML content contains a `\"\"\"` sequence, it will break the Python string, leading to a syntax error and failing the execution. A more robust approach would be to pass the YAML content as a base64-encoded string and decode it in Python, or to use a mechanism that doesn't rely on string embedding. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py: ########## @@ -0,0 +1,199 @@ +# Licensed under the Apache License, Version 2.0 (the 'License'); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import yaml +import json +from typing import Dict, List, Optional, TypedDict, Any +import dataclasses +from dataclasses import dataclass + +from apache_beam.yaml.main import build_pipeline_components_from_yaml +import apache_beam as beam + +# ======================== Type Definitions ======================== + +@dataclass +class NodeData: + id: str + label: str + type: str = "" + + def __post_init__(self): + # Ensure ID is not empty + if not self.id: + raise ValueError("Node ID cannot be empty") + +@dataclass +class EdgeData: + source: str + target: str + label: str = "" + + def __post_init__(self): + if not self.source or not self.target: + raise ValueError("Edge source and target cannot be empty") + +class FlowGraph(TypedDict): + nodes: List[Dict[str, Any]] + edges: List[Dict[str, Any]] + +class ApiResponse(TypedDict): + data: Optional[FlowGraph] + error: Optional[str] + +# ======================== Main Function ======================== + +def parse_beam_yaml(yaml_str: str, isDryRunMode: bool = False) -> ApiResponse: Review Comment:  The function is type-hinted to return `ApiResponse`, which is a `TypedDict`, but it actually returns a JSON-encoded string via `build_success_response` or `build_error_response`. This is a type mismatch. The function should either return the dictionary as hinted, or the type hint should be changed to `str`. Since the calling TypeScript code seems to expect a JSON string, changing the hint to `str` is likely the correct fix. This also applies to `build_success_response` (line 163) and `build_error_response` (line 173). ```suggestion def parse_beam_yaml(yaml_str: str, isDryRunMode: bool = False) -> str: ``` ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py: ########## @@ -0,0 +1,199 @@ +# Licensed under the Apache License, Version 2.0 (the 'License'); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import yaml +import json +from typing import Dict, List, Optional, TypedDict, Any +import dataclasses +from dataclasses import dataclass + +from apache_beam.yaml.main import build_pipeline_components_from_yaml +import apache_beam as beam + +# ======================== Type Definitions ======================== + +@dataclass +class NodeData: + id: str + label: str + type: str = "" + + def __post_init__(self): + # Ensure ID is not empty + if not self.id: + raise ValueError("Node ID cannot be empty") + +@dataclass +class EdgeData: + source: str + target: str + label: str = "" + + def __post_init__(self): + if not self.source or not self.target: + raise ValueError("Edge source and target cannot be empty") + +class FlowGraph(TypedDict): + nodes: List[Dict[str, Any]] + edges: List[Dict[str, Any]] + +class ApiResponse(TypedDict): + data: Optional[FlowGraph] + error: Optional[str] + +# ======================== Main Function ======================== + +def parse_beam_yaml(yaml_str: str, isDryRunMode: bool = False) -> ApiResponse: + """ + Parse Beam YAML and convert to flow graph data structure + + Args: + yaml_str: Input YAML string + + Returns: + Standardized response format: + - Success: {'status': 'success', 'data': {...}, 'error': None} + - Failure: {'status': 'error', 'data': None, 'error': 'message'} + """ + # Phase 1: YAML Parsing + try: + parsed_yaml = yaml.safe_load(yaml_str) + if not parsed_yaml or 'pipeline' not in parsed_yaml: + return build_error_response("Invalid YAML structure: missing 'pipeline' section") + except yaml.YAMLError as e: + return build_error_response(f"YAML parsing error: {str(e)}") + + # Phase 2: Pipeline Validation + try: + options, constructor = build_pipeline_components_from_yaml( + yaml_str, + [], + validate_schema='per_transform' + ) + if isDryRunMode: + with beam.Pipeline(options=options) as p: + constructor(p) + except Exception as e: + return build_error_response(f"Pipeline validation failed: {str(e)}") + + # Phase 3: Graph Construction + try: + pipeline = parsed_yaml['pipeline'] + pipeline_type = pipeline.get('type', '') + transforms = pipeline.get('transforms', []) + + nodes: List[NodeData] = [] Review Comment:  The `nodes` list is annotated as `List[NodeData]`, but on line 117, a dictionary is appended to it. This will cause a static type checker to report an error. To fix this, you could change the type hint to be more permissive, for example `List[Any]`. ```suggestion nodes: List[Any] = [] ``` ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/EditablePanel.tsx: ########## @@ -0,0 +1,380 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React from 'react'; +import { Node } from '@xyflow/react'; +import '../../style/yaml/YamlEditor.css'; +import { transformEmojiMap } from "./EmojiMap"; + +type EditableKeyValuePanelProps = { + node: Node; + onChange: (newData: Record<string, any>) => void; + depth?: number; +}; + +type EditableKeyValuePanelState = { + localData: Record<string, any>; + collapsedKeys: Set<string>; +}; + +/** + * An editable key-value panel component for displaying and modifying node properties. + * + * Features: + * - Nested object support with collapsible sections + * - Real-time key-value editing with validation + * - Dynamic field addition and deletion + * - Support for multi-line text values + * - Object conversion for nested structures + * - Reference documentation integration + * - Visual hierarchy with depth-based indentation + * - Interactive UI with hover effects and transitions + * + * State Management: + * - localData: Local copy of the node data being edited + * - collapsedKeys: Set of keys that are currently collapsed + * + * Props: + * @param {Node} node - The node containing data to be edited + * @param {(data: Record<string, any>) => void} onChange - Callback for data changes + * @param {number} [depth=0] - Current nesting depth for recursive rendering + * + * Methods: + * - toggleCollapse: Toggles collapse state of nested objects + * - handleKeyChange: Updates keys with validation + * - handleValueChange: Updates values in the local data + * - handleDelete: Removes key-value pairs + * - handleAddPair: Adds new key-value pairs + * - convertToObject: Converts primitive values to objects + * - renderValueEditor: Renders appropriate input based on value type + * + * UI Features: + * - Collapsible nested object sections + * - Multi-line text support for complex values + * - Add/Delete buttons for field management + * - Reference documentation links + * - Visual feedback for user interactions + * - Responsive design with proper spacing + */ +export class EditableKeyValuePanel extends React.Component<EditableKeyValuePanelProps, EditableKeyValuePanelState> { + static defaultProps = { + depth: 0, + }; + + constructor(props: EditableKeyValuePanelProps) { + super(props); + this.state = { + localData: { ...(props.node ? props.node.data : {}) }, + collapsedKeys: new Set(), + }; + } + + componentDidUpdate(prevProps: EditableKeyValuePanelProps) { + if (prevProps.node !== this.props.node && this.props.node) { + this.setState({ localData: { ...(this.props.node.data ?? {}) } }); + console.log('EditableKeyValuePanel updated with new node data:', this.props.node.data); Review Comment:  This `console.log` statement seems to be for debugging purposes and should be removed before merging. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/EditablePanel.tsx: ########## @@ -0,0 +1,380 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React from 'react'; +import { Node } from '@xyflow/react'; +import '../../style/yaml/YamlEditor.css'; +import { transformEmojiMap } from "./EmojiMap"; + +type EditableKeyValuePanelProps = { + node: Node; + onChange: (newData: Record<string, any>) => void; + depth?: number; +}; + +type EditableKeyValuePanelState = { + localData: Record<string, any>; + collapsedKeys: Set<string>; +}; + +/** + * An editable key-value panel component for displaying and modifying node properties. + * + * Features: + * - Nested object support with collapsible sections + * - Real-time key-value editing with validation + * - Dynamic field addition and deletion + * - Support for multi-line text values + * - Object conversion for nested structures + * - Reference documentation integration + * - Visual hierarchy with depth-based indentation + * - Interactive UI with hover effects and transitions + * + * State Management: + * - localData: Local copy of the node data being edited + * - collapsedKeys: Set of keys that are currently collapsed + * + * Props: + * @param {Node} node - The node containing data to be edited + * @param {(data: Record<string, any>) => void} onChange - Callback for data changes + * @param {number} [depth=0] - Current nesting depth for recursive rendering + * + * Methods: + * - toggleCollapse: Toggles collapse state of nested objects + * - handleKeyChange: Updates keys with validation + * - handleValueChange: Updates values in the local data + * - handleDelete: Removes key-value pairs + * - handleAddPair: Adds new key-value pairs + * - convertToObject: Converts primitive values to objects + * - renderValueEditor: Renders appropriate input based on value type + * + * UI Features: + * - Collapsible nested object sections + * - Multi-line text support for complex values + * - Add/Delete buttons for field management + * - Reference documentation links + * - Visual feedback for user interactions + * - Responsive design with proper spacing + */ +export class EditableKeyValuePanel extends React.Component<EditableKeyValuePanelProps, EditableKeyValuePanelState> { + static defaultProps = { + depth: 0, + }; + + constructor(props: EditableKeyValuePanelProps) { + super(props); + this.state = { + localData: { ...(props.node ? props.node.data : {}) }, + collapsedKeys: new Set(), + }; + } + + componentDidUpdate(prevProps: EditableKeyValuePanelProps) { + if (prevProps.node !== this.props.node && this.props.node) { + this.setState({ localData: { ...(this.props.node.data ?? {}) } }); + console.log('EditableKeyValuePanel updated with new node data:', this.props.node.data); + } + } + + toggleCollapse = (key: string) => { + this.setState(({ collapsedKeys }) => { + const newSet = new Set(collapsedKeys); + newSet.has(key) ? newSet.delete(key) : newSet.add(key); + return { collapsedKeys: newSet }; + }); + }; + + handleKeyChange = (oldKey: string, newKey: string) => { + newKey = newKey.trim(); + if (newKey === oldKey || newKey === '') return alert('Invalid Key!'); + if (this.state.localData.hasOwnProperty(newKey)) return alert('Duplicated Key!'); Review Comment:  Using `alert()` for validation feedback is disruptive to the user experience as it's a blocking dialog. Consider using a less intrusive way to display errors, such as an inline error message next to the input field or a toast notification. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/Yaml.tsx: ########## @@ -0,0 +1,279 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React from 'react'; +import { ISessionContext } from '@jupyterlab/apputils'; +import { Card } from '@rmwc/card'; +import '@rmwc/textfield/styles'; +import '@rmwc/grid/styles'; +import '@rmwc/card/styles'; +import Split from 'react-split'; +import { debounce } from 'lodash'; + +import { Node, Edge } from '@xyflow/react'; +import '@xyflow/react/dist/style.css'; +import '../../style/yaml/Yaml.css'; + + +import { YamlEditor } from './YamlEditor'; +import { EditableKeyValuePanel } from './EditablePanel'; +import { FlowEditor } from './YamlFlow'; +import { ApiResponse } from './DataType'; +import { nodeWidth, nodeHeight } from './DataType'; + +interface YamlProps { + sessionContext: ISessionContext; +} + +interface YamlState { + yamlContent: string; + elements: any; + selectedNode: Node | null; + errors: string[], + isDryRunMode: boolean; + Nodes: Node[]; + Edges: Edge[]; +} + +const initialNodes: Node[] = [ + { id: '0', width: nodeWidth, position: { x: 0, y: 0 }, type: 'input', data: { label: 'Input' } }, + { id: '1', width: nodeWidth, position: { x: 0, y: 100 }, type: 'default', data: { label: '1' } }, + { id: '2', width: nodeWidth, position: { x: 0, y: 200 }, type: 'default', data: { label: '2' } }, + { id: '3', width: nodeWidth, position: { x: 0, y: 300 }, type: 'output', data: { label: 'Output' } }, +]; + +const initialEdges: Edge[] = [{ id: 'e0-1', source: '0', target: '1' }, +{ id: 'e1-2', source: '1', target: '2' }, { id: 'e2-3', source: '2', target: '3' } +]; + +/** + * A YAML pipeline editor component with integrated flow visualization. + * + * Features: + * - Three-panel layout with YAML editor, flow diagram, and node properties + * - Real-time YAML validation and error display + * - Automatic flow diagram generation from YAML content + * - Interactive node selection and editing + * - Dry run mode support for pipeline testing + * - Kernel-based YAML parsing using Apache Beam utilities + * - Debounced updates to optimize performance + * - Split-pane resizable interface + * + * State Management: + * - yamlContent: Current YAML text content + * - elements: Combined nodes and edges for the flow diagram + * - selectedNode: Currently selected node in the flow diagram + * - errors: Array of validation errors + * - Nodes: Array of flow nodes + * - Edges: Array of flow edges + * - isDryRunMode: Flag for dry run mode state + * + * Props: + * @param {YamlProps} props - Component props including sessionContext for kernel communication + * + * Methods: + * - handleNodeClick: Handles node selection in the flow diagram + * - handleYamlChange: Debounced handler for YAML content changes + * - validateAndRenderYaml: Validates YAML and updates the flow diagram + */ +export class Yaml extends React.Component<YamlProps, YamlState> { + constructor(props: YamlProps) { + super(props); + this.state = { yamlContent: '', elements: [], selectedNode: null, errors: [], Nodes: initialNodes, Edges: initialEdges, isDryRunMode: false }; + } + + componentDidMount(): void { + this.props.sessionContext.ready.then(() => { + const kernel = this.props.sessionContext.session?.kernel; + if (!kernel) { + console.error('Kernel is not available even after ready'); + return; + } + + console.log('Kernel is ready:', kernel.name); + }); + } + + componentDidUpdate(prevProps: YamlProps, prevState: YamlState) { + if (prevState.selectedNode !== this.state.selectedNode) { + console.log('selectedNodeData changed:', this.state.selectedNode); + } + } + + handleNodeClick = (node: Node) => { + this.setState({ + selectedNode: node + }); + }; + + //debounce methods to prevent excessive rendering + private handleYamlChange = debounce((value?: string) => { + const yamlText = value || ''; + this.setState({ yamlContent: yamlText }); + this.validateAndRenderYaml(yamlText); + }, 2000); + + validateAndRenderYaml(yamlText: string) { + const escapedYaml = yamlText.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + const code = ` +from apache_beam_jupyterlab_sidepanel.yaml_parse_utils import parse_beam_yaml +print(parse_beam_yaml("""${escapedYaml}""", isDryRunMode=${this.state.isDryRunMode ? "True" : "False"})) +`.trim(); + const session = this.props.sessionContext.session; + if (!session?.kernel) { + console.error('No kernel available'); + return; + } + + // Clear previous state immediately + this.setState({ + Nodes: [], + Edges: [], + selectedNode: null, + errors: [] + }); + + const future = session.kernel.requestExecute({ code }); + + // Handle kernel execution results + future.onIOPub = (msg) => { + if (msg.header.msg_type === 'stream') { + const content = msg.content as { name: string; text: string }; + const output = content.text.trim(); + + console.log('Received output:', output); + + try { + const result: ApiResponse = JSON.parse(output); + console.log('Received result:', result); + + if (result.error) { + this.setState({ + elements: [], + selectedNode: null, + errors: [result.error] + }); + } else { + + const flowNodes: Node[] = result.data.nodes.map(node => ({ + id: node.id, + type: node.type, + width: nodeWidth, + height: nodeHeight, + position: { x: 0, y: 0 }, // Will be auto-layouted + data: { + label: node.label, + ...node // include all original properties + } + })); + + // Transform edges for React Flow + const flowEdges: Edge[] = result.data.edges.map(edge => ({ + id: `${edge.source}-${edge.target}`, + source: edge.source, + target: edge.target, + animated: edge.label === 'pipeline_entry', + label: edge.label, + type: 'default' // or your custom edge type + })); + + this.setState({ + Nodes: flowNodes, + Edges: flowEdges, + errors: [] + }); + } + } catch (err) { + this.setState({ + elements: [], + selectedNode: null, + errors: [output] + }); + } + } + }; + } + + render(): React.ReactNode { + return ( + <div style={{ + height: '100vh', + width: '100vw', + }}> + <Split + direction="horizontal" + sizes={[25, 35, 20]} // L/C/R + minSize={100} + gutterSize={6} + className='split-pane' + > + {/* Left */} + <div style={{ + height: '100%', + overflow: 'auto', + minWidth: '100px' + }}> + <YamlEditor + value={this.state.yamlContent} + onChange={(value) => { + // Clear old errors & Handle new errors + this.setState({ errors: [] }); + this.handleYamlChange(value || ''); + }} + errors={this.state.errors} + showConsole={true} + onDryRunModeChange={(newValue) => this.setState({ isDryRunMode: newValue })} + /> + </div> + + <div style={{ + padding: '1rem', + height: '100%', + }}> + <Card style={{ + height: '100%', + display: 'flex', + flexDirection: 'column' + }}> + <div className="w-full h-full bg-gray-50 dark:bg-zinc-900 text-sm font-sans" style={{ flex: 1, minHeight: 0 }}> + <FlowEditor + Nodes={this.state.Nodes} + Edges={this.state.Edges} + onNodesUpdate={(nodes: Node[]) => this.setState({ Nodes: nodes })} + onEdgesUpdate={(edges: Edge[]) => this.setState({ Edges: edges })} + onNodeClick={this.handleNodeClick} + /> + </div> + </Card> + </div> + + {/* Right */} + <div style={{ + height: '100%', + overflow: 'auto', + padding: '12px', + boxSizing: 'border-box' + }}> + <EditableKeyValuePanel + node={this.state.selectedNode} + // TODO: implement onChange to update node data from Panel + onChange={() => { }} Review Comment:  The `onChange` handler for the `EditableKeyValuePanel` is a no-op with a `TODO` comment. This means that any edits made in the properties panel are not saved or reflected in the YAML editor or the flow diagram, making the panel effectively read-only. This is a key piece of functionality that appears to be missing. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/YamlFlow.tsx: ########## @@ -0,0 +1,204 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React, { useEffect, useCallback, useMemo } from 'react'; +import { + ReactFlow, + useNodesState, + useEdgesState, + addEdge, + MiniMap, + Controls, + Background, + Panel, + Node, + Edge, applyNodeChanges, + NodeChange, Connection +} from '@xyflow/react'; +import { debounce } from 'lodash'; +import dagre from 'dagre'; +import { DefaultNode, InputNode, OutputNode, AnimatedSVGEdge } from './CustomStyle'; +import { nodeWidth, nodeHeight } from './DataType'; + +import '@xyflow/react/dist/style.css' + +interface FlowEditorProps { + Nodes: Node[]; + Edges: Edge[]; + onNodesUpdate?: (nodes: Node[]) => void; + onEdgesUpdate?: (edges: Edge[]) => void; + onNodeClick?: (node: Node) => void; + debounceWait?: number; +} + +/** + * A flow diagram editor component built with React Flow. + * + * Features: + * - Interactive node-based flow diagram editor + * - Auto-layout functionality using Dagre graph layout + * - Support for different node types (default, input, output) + * - Animated edge connections + * - Mini-map and controls for navigation + * - Debounced updates to optimize performance + * - Real-time node and edge manipulation + * + * Props: + * @param {Node[]} Nodes - Initial array of nodes + * @param {Edge[]} Edges - Initial array of edges + * @param {(nodes: Node[]) => void} onNodesUpdate - Callback when nodes are updated + * @param {(edges: Edge[]) => void} onEdgesUpdate - Callback when edges are updated + * @param {(event: React.MouseEvent, node: Node) => void} onNodeClick - Callback when a node is clicked + * @param {number} [debounceWait=500] - Debounce wait time in milliseconds for updates + */ +export const FlowEditor: React.FC<FlowEditorProps> = ({ + Nodes, + Edges, + onNodesUpdate, + onEdgesUpdate, + onNodeClick, + debounceWait = 500 //Default debounce wait time +}: FlowEditorProps) => { + + const [nodes, setNodes, _] = useNodesState(Nodes); + const [edges, setEdges, onEdgesChange] = useEdgesState(Edges); + + // Debounce callback + const debouncedNodesUpdate = useMemo( + () => debounce((nodes: Node[]) => { + onNodesUpdate?.(nodes); + }, debounceWait), + [onNodesUpdate, debounceWait] + ); + + const debouncedEdgesUpdate = useMemo( + () => debounce((edges: Edge[]) => { + onEdgesUpdate?.(edges); + }, debounceWait), + [onEdgesUpdate, debounceWait] + ); + + const onConnect = useCallback( + (params: Connection) => setEdges((eds) => addEdge(params, eds)), + [setEdges] + ); + + // Listen initialNodes/initialEdges changes and update state accordingly + useEffect(() => { + setNodes(Nodes); + }, [Nodes]); + + useEffect(() => { + setEdges(Edges); + }, [Edges]); + + const handleNodeClick = useCallback((event: React.MouseEvent, node: Node) => { + onNodeClick?.(node); + }, [onNodeClick]); + + const applyAutoLayout = useCallback((nodes: Node[], edges: Edge[]): Node[] => { + const g = new dagre.graphlib.Graph(); + g.setDefaultEdgeLabel(() => ({})); + g.setGraph({ + rankdir: 'TB', + ranksep: 100, + }); // TB = Top to Bottom. Use LR for Left to Right + + nodes.forEach((node) => { + g.setNode(node.id, { width: nodeWidth, height: nodeHeight }); + }); + + edges.forEach((edge) => { + g.setEdge(edge.source, edge.target); + }); + + dagre.layout(g); + + return nodes.map((node) => { + const pos = g.node(node.id); + return { + ...node, + position: { + x: pos.x - nodeWidth / 2, + y: pos.y - nodeHeight / 2 + } + }; + }); + }, []); + + const handleNodesChange = useCallback( + (changes: NodeChange[]) => { + const updatedNodes = applyNodeChanges(changes, nodes); + + // Judge whether the node data is changed or not(except position / dragging / selected) + const structuralChanges = changes.filter( + (c) => !['position', 'dragging', 'selected', 'select'].includes(c.type) + ); + + if (structuralChanges.length > 0) { + // Only when there are structural changes, we apply auto-layout + const autoLayouted = applyAutoLayout(updatedNodes, edges); + setNodes(autoLayouted); + onNodesUpdate?.(autoLayouted); + console.log('Auto-layout applied:', autoLayouted); + console.log('Changes:', structuralChanges); Review Comment:  These `console.log` statements seem to be for debugging and should be removed. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/Yaml.tsx: ########## @@ -0,0 +1,279 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React from 'react'; +import { ISessionContext } from '@jupyterlab/apputils'; +import { Card } from '@rmwc/card'; +import '@rmwc/textfield/styles'; +import '@rmwc/grid/styles'; +import '@rmwc/card/styles'; +import Split from 'react-split'; +import { debounce } from 'lodash'; + +import { Node, Edge } from '@xyflow/react'; +import '@xyflow/react/dist/style.css'; +import '../../style/yaml/Yaml.css'; + + +import { YamlEditor } from './YamlEditor'; +import { EditableKeyValuePanel } from './EditablePanel'; +import { FlowEditor } from './YamlFlow'; +import { ApiResponse } from './DataType'; +import { nodeWidth, nodeHeight } from './DataType'; + +interface YamlProps { + sessionContext: ISessionContext; +} + +interface YamlState { + yamlContent: string; + elements: any; Review Comment:  The `elements` property in the `YamlState` interface and the component's state is initialized and written to, but its value is never read. It seems to be unused and should be removed to simplify the code. ########## sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/yaml/Yaml.tsx: ########## @@ -0,0 +1,279 @@ +// Licensed under the Apache License, Version 2.0 (the 'License'); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +import React from 'react'; +import { ISessionContext } from '@jupyterlab/apputils'; +import { Card } from '@rmwc/card'; +import '@rmwc/textfield/styles'; +import '@rmwc/grid/styles'; +import '@rmwc/card/styles'; +import Split from 'react-split'; +import { debounce } from 'lodash'; + +import { Node, Edge } from '@xyflow/react'; +import '@xyflow/react/dist/style.css'; +import '../../style/yaml/Yaml.css'; + + +import { YamlEditor } from './YamlEditor'; +import { EditableKeyValuePanel } from './EditablePanel'; +import { FlowEditor } from './YamlFlow'; +import { ApiResponse } from './DataType'; +import { nodeWidth, nodeHeight } from './DataType'; + +interface YamlProps { + sessionContext: ISessionContext; +} + +interface YamlState { + yamlContent: string; + elements: any; + selectedNode: Node | null; + errors: string[], + isDryRunMode: boolean; + Nodes: Node[]; + Edges: Edge[]; +} + +const initialNodes: Node[] = [ + { id: '0', width: nodeWidth, position: { x: 0, y: 0 }, type: 'input', data: { label: 'Input' } }, + { id: '1', width: nodeWidth, position: { x: 0, y: 100 }, type: 'default', data: { label: '1' } }, + { id: '2', width: nodeWidth, position: { x: 0, y: 200 }, type: 'default', data: { label: '2' } }, + { id: '3', width: nodeWidth, position: { x: 0, y: 300 }, type: 'output', data: { label: 'Output' } }, +]; + +const initialEdges: Edge[] = [{ id: 'e0-1', source: '0', target: '1' }, +{ id: 'e1-2', source: '1', target: '2' }, { id: 'e2-3', source: '2', target: '3' } +]; + +/** + * A YAML pipeline editor component with integrated flow visualization. + * + * Features: + * - Three-panel layout with YAML editor, flow diagram, and node properties + * - Real-time YAML validation and error display + * - Automatic flow diagram generation from YAML content + * - Interactive node selection and editing + * - Dry run mode support for pipeline testing + * - Kernel-based YAML parsing using Apache Beam utilities + * - Debounced updates to optimize performance + * - Split-pane resizable interface + * + * State Management: + * - yamlContent: Current YAML text content + * - elements: Combined nodes and edges for the flow diagram + * - selectedNode: Currently selected node in the flow diagram + * - errors: Array of validation errors + * - Nodes: Array of flow nodes + * - Edges: Array of flow edges + * - isDryRunMode: Flag for dry run mode state + * + * Props: + * @param {YamlProps} props - Component props including sessionContext for kernel communication + * + * Methods: + * - handleNodeClick: Handles node selection in the flow diagram + * - handleYamlChange: Debounced handler for YAML content changes + * - validateAndRenderYaml: Validates YAML and updates the flow diagram + */ +export class Yaml extends React.Component<YamlProps, YamlState> { + constructor(props: YamlProps) { + super(props); + this.state = { yamlContent: '', elements: [], selectedNode: null, errors: [], Nodes: initialNodes, Edges: initialEdges, isDryRunMode: false }; + } + + componentDidMount(): void { + this.props.sessionContext.ready.then(() => { + const kernel = this.props.sessionContext.session?.kernel; + if (!kernel) { + console.error('Kernel is not available even after ready'); + return; + } + + console.log('Kernel is ready:', kernel.name); + }); + } + + componentDidUpdate(prevProps: YamlProps, prevState: YamlState) { + if (prevState.selectedNode !== this.state.selectedNode) { + console.log('selectedNodeData changed:', this.state.selectedNode); Review Comment:  This `console.log` statement and others in this file appear to be for debugging. They should be removed before merging. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org