chaokunyang commented on code in PR #3394: URL: https://github.com/apache/fory/pull/3394#discussion_r2890583254
########## compiler/fory_compiler/generators/javascript.py: ########## @@ -0,0 +1,773 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""JavaScript/TypeScript code generator.""" + +from pathlib import Path +from typing import Dict, List, Optional, Set, Tuple, Union as TypingUnion + +from fory_compiler.frontend.utils import parse_idl_file +from fory_compiler.generators.base import BaseGenerator, GeneratedFile +from fory_compiler.ir.ast import ( + Enum, + Field, + FieldType, + ListType, + MapType, + Message, + NamedType, + PrimitiveType, + Schema, + Union, +) +from fory_compiler.ir.types import PrimitiveKind + + +class JavaScriptGenerator(BaseGenerator): + """Generates JavaScript/TypeScript type definitions and Fory registration helpers from IDL.""" + + language_name = "javascript" + file_extension = ".ts" + + # TypeScript/JavaScript reserved keywords that cannot be used as identifiers + TS_KEYWORDS = { + "abstract", + "any", + "as", + "asserts", + "async", + "await", + "bigint", + "boolean", + "break", + "case", + "catch", + "class", + "const", + "continue", + "debugger", + "declare", + "default", + "delete", + "do", + "else", + "enum", + "export", + "extends", + "false", + "finally", + "for", + "from", + "function", + "get", + "if", + "implements", + "import", + "in", + "infer", + "instanceof", + "interface", + "is", + "keyof", + "let", + "module", + "namespace", + "never", + "new", + "null", + "number", + "object", + "of", + "package", + "private", + "protected", + "public", + "readonly", + "require", + "return", + "set", + "static", + "string", + "super", + "switch", + "symbol", + "this", + "throw", + "true", + "try", + "type", + "typeof", + "undefined", + "unique", + "unknown", + "var", + "void", + "while", + "with", + "yield", + } + + # Mapping from FDL primitive types to TypeScript types + PRIMITIVE_MAP = { + PrimitiveKind.BOOL: "boolean", + PrimitiveKind.INT8: "number", + PrimitiveKind.INT16: "number", + PrimitiveKind.INT32: "number", + PrimitiveKind.VARINT32: "number", + PrimitiveKind.INT64: "bigint | number", + PrimitiveKind.VARINT64: "bigint | number", + PrimitiveKind.TAGGED_INT64: "bigint | number", + PrimitiveKind.UINT8: "number", + PrimitiveKind.UINT16: "number", + PrimitiveKind.UINT32: "number", + PrimitiveKind.VAR_UINT32: "number", + PrimitiveKind.UINT64: "bigint | number", + PrimitiveKind.VAR_UINT64: "bigint | number", + PrimitiveKind.TAGGED_UINT64: "bigint | number", + PrimitiveKind.FLOAT16: "number", + PrimitiveKind.BFLOAT16: "number", + PrimitiveKind.FLOAT32: "number", + PrimitiveKind.FLOAT64: "number", + PrimitiveKind.STRING: "string", + PrimitiveKind.BYTES: "Uint8Array", + PrimitiveKind.DATE: "Date", + PrimitiveKind.TIMESTAMP: "Date", + PrimitiveKind.DURATION: "number", + PrimitiveKind.DECIMAL: "number", + PrimitiveKind.ANY: "any", + } + + def __init__(self, schema: Schema, options): + super().__init__(schema, options) + self.indent_str = " " # TypeScript uses 2 spaces + self._qualified_type_names: Dict[int, str] = {} + self._build_qualified_type_name_index() + + def _build_qualified_type_name_index(self) -> None: + """Build an index mapping type object ids to their qualified names.""" + for enum in self.schema.enums: + self._qualified_type_names[id(enum)] = enum.name + for union in self.schema.unions: + self._qualified_type_names[id(union)] = union.name + + def visit_message(message: Message, parents: List[str]) -> None: + path = ".".join(parents + [message.name]) + self._qualified_type_names[id(message)] = path + for nested_enum in message.nested_enums: + self._qualified_type_names[id(nested_enum)] = ( + f"{path}.{nested_enum.name}" + ) + for nested_union in message.nested_unions: + self._qualified_type_names[id(nested_union)] = ( + f"{path}.{nested_union.name}" + ) + for nested_msg in message.nested_messages: + visit_message(nested_msg, parents + [message.name]) + + for message in self.schema.messages: + visit_message(message, []) + + def safe_identifier(self, name: str) -> str: + """Escape identifiers that collide with TypeScript reserved words.""" + if name in self.TS_KEYWORDS: + return f"{name}_" + return name + + def safe_type_identifier(self, name: str) -> str: + """Escape type names that collide with TypeScript reserved words.""" + return self.safe_identifier(name) + + def safe_member_name(self, name: str) -> str: + """Generate a safe camelCase member name.""" + return self.safe_identifier(self.to_camel_case(name)) + + def _nested_type_names_for_message(self, message: Message) -> Set[str]: + """Collect safe type names of nested types to detect collisions.""" + names: Set[str] = set() + for nested in ( + list(message.nested_enums) + + list(message.nested_unions) + + list(message.nested_messages) + ): + names.add(self.safe_type_identifier(nested.name)) + return names + + def _field_member_name( + self, + field: Field, + message: Message, + used_names: Set[str], + ) -> str: + """Produce a unique safe member name for a field, avoiding collisions.""" + base = self.safe_member_name(field.name) + nested_type_names = self._nested_type_names_for_message(message) + if base in nested_type_names: + base = f"{base}Value" + + candidate = base + suffix = 1 + while candidate in used_names: + candidate = f"{base}{suffix}" + suffix += 1 + used_names.add(candidate) + return candidate + + def is_imported_type(self, type_def: object) -> bool: + """Return True if a type definition comes from an imported IDL file.""" + if not self.schema.source_file: + return False + location = getattr(type_def, "location", None) + if location is None or not location.file: + return False + try: + return ( + Path(location.file).resolve() != Path(self.schema.source_file).resolve() + ) + except Exception: + return location.file != self.schema.source_file + + def split_imported_types( + self, items: List[object] + ) -> Tuple[List[object], List[object]]: + imported: List[object] = [] + local: List[object] = [] + for item in items: + if self.is_imported_type(item): + imported.append(item) + else: + local.append(item) + return imported, local # Return (imported, local) tuple + + def get_module_name(self) -> str: + """Get the TypeScript module name from package.""" + if self.package: + parts = self.package.split(".") + return self.to_camel_case(parts[-1]) Review Comment: [P1] Module naming uses only the last package segment (`foo.v1` -> `v1`, `bar.v1` -> `v1`), which can overwrite generated files and produce invalid imports. I reproduced this with two imported schemas (`foo.v1` and `bar.v1`): both generated `v1.ts`, and the consumer import failed (`Module "./v1" has no exported member Alpha`). Please use a collision-safe module/file naming strategy (e.g., full package path or source-file stem with disambiguation). ########## compiler/fory_compiler/generators/javascript.py: ########## @@ -0,0 +1,773 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""JavaScript/TypeScript code generator.""" + +from pathlib import Path +from typing import Dict, List, Optional, Set, Tuple, Union as TypingUnion + +from fory_compiler.frontend.utils import parse_idl_file +from fory_compiler.generators.base import BaseGenerator, GeneratedFile +from fory_compiler.ir.ast import ( + Enum, + Field, + FieldType, + ListType, + MapType, + Message, + NamedType, + PrimitiveType, + Schema, + Union, +) +from fory_compiler.ir.types import PrimitiveKind + + +class JavaScriptGenerator(BaseGenerator): + """Generates JavaScript/TypeScript type definitions and Fory registration helpers from IDL.""" + + language_name = "javascript" + file_extension = ".ts" + + # TypeScript/JavaScript reserved keywords that cannot be used as identifiers + TS_KEYWORDS = { + "abstract", + "any", + "as", + "asserts", + "async", + "await", + "bigint", + "boolean", + "break", + "case", + "catch", + "class", + "const", + "continue", + "debugger", + "declare", + "default", + "delete", + "do", + "else", + "enum", + "export", + "extends", + "false", + "finally", + "for", + "from", + "function", + "get", + "if", + "implements", + "import", + "in", + "infer", + "instanceof", + "interface", + "is", + "keyof", + "let", + "module", + "namespace", + "never", + "new", + "null", + "number", + "object", + "of", + "package", + "private", + "protected", + "public", + "readonly", + "require", + "return", + "set", + "static", + "string", + "super", + "switch", + "symbol", + "this", + "throw", + "true", + "try", + "type", + "typeof", + "undefined", + "unique", + "unknown", + "var", + "void", + "while", + "with", + "yield", + } + + # Mapping from FDL primitive types to TypeScript types + PRIMITIVE_MAP = { + PrimitiveKind.BOOL: "boolean", + PrimitiveKind.INT8: "number", + PrimitiveKind.INT16: "number", + PrimitiveKind.INT32: "number", + PrimitiveKind.VARINT32: "number", + PrimitiveKind.INT64: "bigint | number", + PrimitiveKind.VARINT64: "bigint | number", + PrimitiveKind.TAGGED_INT64: "bigint | number", + PrimitiveKind.UINT8: "number", + PrimitiveKind.UINT16: "number", + PrimitiveKind.UINT32: "number", + PrimitiveKind.VAR_UINT32: "number", + PrimitiveKind.UINT64: "bigint | number", + PrimitiveKind.VAR_UINT64: "bigint | number", + PrimitiveKind.TAGGED_UINT64: "bigint | number", + PrimitiveKind.FLOAT16: "number", + PrimitiveKind.BFLOAT16: "number", + PrimitiveKind.FLOAT32: "number", + PrimitiveKind.FLOAT64: "number", + PrimitiveKind.STRING: "string", + PrimitiveKind.BYTES: "Uint8Array", + PrimitiveKind.DATE: "Date", + PrimitiveKind.TIMESTAMP: "Date", + PrimitiveKind.DURATION: "number", + PrimitiveKind.DECIMAL: "number", + PrimitiveKind.ANY: "any", + } + + def __init__(self, schema: Schema, options): + super().__init__(schema, options) + self.indent_str = " " # TypeScript uses 2 spaces + self._qualified_type_names: Dict[int, str] = {} + self._build_qualified_type_name_index() + + def _build_qualified_type_name_index(self) -> None: + """Build an index mapping type object ids to their qualified names.""" + for enum in self.schema.enums: + self._qualified_type_names[id(enum)] = enum.name + for union in self.schema.unions: + self._qualified_type_names[id(union)] = union.name + + def visit_message(message: Message, parents: List[str]) -> None: + path = ".".join(parents + [message.name]) + self._qualified_type_names[id(message)] = path + for nested_enum in message.nested_enums: + self._qualified_type_names[id(nested_enum)] = ( + f"{path}.{nested_enum.name}" + ) + for nested_union in message.nested_unions: + self._qualified_type_names[id(nested_union)] = ( + f"{path}.{nested_union.name}" + ) + for nested_msg in message.nested_messages: + visit_message(nested_msg, parents + [message.name]) + + for message in self.schema.messages: + visit_message(message, []) + + def safe_identifier(self, name: str) -> str: + """Escape identifiers that collide with TypeScript reserved words.""" + if name in self.TS_KEYWORDS: + return f"{name}_" + return name + + def safe_type_identifier(self, name: str) -> str: + """Escape type names that collide with TypeScript reserved words.""" + return self.safe_identifier(name) + + def safe_member_name(self, name: str) -> str: + """Generate a safe camelCase member name.""" + return self.safe_identifier(self.to_camel_case(name)) + + def _nested_type_names_for_message(self, message: Message) -> Set[str]: + """Collect safe type names of nested types to detect collisions.""" + names: Set[str] = set() + for nested in ( + list(message.nested_enums) + + list(message.nested_unions) + + list(message.nested_messages) + ): + names.add(self.safe_type_identifier(nested.name)) + return names + + def _field_member_name( + self, + field: Field, + message: Message, + used_names: Set[str], + ) -> str: + """Produce a unique safe member name for a field, avoiding collisions.""" + base = self.safe_member_name(field.name) + nested_type_names = self._nested_type_names_for_message(message) + if base in nested_type_names: + base = f"{base}Value" + + candidate = base + suffix = 1 + while candidate in used_names: + candidate = f"{base}{suffix}" + suffix += 1 + used_names.add(candidate) + return candidate + + def is_imported_type(self, type_def: object) -> bool: + """Return True if a type definition comes from an imported IDL file.""" + if not self.schema.source_file: + return False + location = getattr(type_def, "location", None) + if location is None or not location.file: + return False + try: + return ( + Path(location.file).resolve() != Path(self.schema.source_file).resolve() + ) + except Exception: + return location.file != self.schema.source_file + + def split_imported_types( + self, items: List[object] + ) -> Tuple[List[object], List[object]]: + imported: List[object] = [] + local: List[object] = [] + for item in items: + if self.is_imported_type(item): + imported.append(item) + else: + local.append(item) + return imported, local # Return (imported, local) tuple + + def get_module_name(self) -> str: + """Get the TypeScript module name from package.""" + if self.package: + parts = self.package.split(".") + return self.to_camel_case(parts[-1]) + return "generated" + + def _module_file_name(self) -> str: + """Determine the output file name.""" + if self.schema.source_file and not self.schema.source_file.startswith("<"): + return f"{Path(self.schema.source_file).stem}.ts" + if self.schema.package: + return f"{self.schema.package.replace('.', '_')}.ts" + return "generated.ts" + + def get_registration_function_name(self) -> str: + """Get the name of the registration function.""" + return f"register{self.to_pascal_case(self.get_module_name())}Types" + + def _normalize_import_path(self, path_str: str) -> str: + if not path_str: + return path_str + try: + return str(Path(path_str).resolve()) + except Exception: + return path_str + + def _load_schema(self, file_path: str) -> Optional[Schema]: + if not file_path: + return None + if not hasattr(self, "_schema_cache"): + self._schema_cache: Dict[Path, Schema] = {} + path = Path(file_path).resolve() + if path in self._schema_cache: + return self._schema_cache[path] + try: + schema = parse_idl_file(path) + except Exception: + return None + self._schema_cache[path] = schema + return schema + + def _module_name_for_schema(self, schema: Schema) -> str: + """Derive a module name from another schema.""" + if schema.package: + parts = schema.package.split(".") + return self.to_camel_case(parts[-1]) + return "generated" + + def _registration_fn_for_schema(self, schema: Schema) -> str: + """Derive the registration function name for an imported schema.""" + mod = self._module_name_for_schema(schema) + return f"register{self.to_pascal_case(mod)}Types" + + def _collect_imported_registrations(self) -> List[Tuple[str, str]]: + """Collect (module_path, registration_fn) pairs for imported schemas.""" + file_info: Dict[str, Tuple[str, str]] = {} + for type_def in self.schema.enums + self.schema.unions + self.schema.messages: + if not self.is_imported_type(type_def): + continue + location = getattr(type_def, "location", None) + file_path = getattr(location, "file", None) if location else None + if not file_path: + continue + normalized = self._normalize_import_path(file_path) + if normalized in file_info: + continue + imported_schema = self._load_schema(file_path) + if imported_schema is None: + continue + reg_fn = self._registration_fn_for_schema(imported_schema) + mod_name = self._module_name_for_schema(imported_schema) + file_info[normalized] = (f"./{mod_name}", reg_fn) + + ordered: List[Tuple[str, str]] = [] + used: Set[str] = set() + + if self.schema.source_file: + base_dir = Path(self.schema.source_file).resolve().parent + for imp in self.schema.imports: + candidate = self._normalize_import_path( + str((base_dir / imp.path).resolve()) + ) + if candidate in file_info and candidate not in used: + ordered.append(file_info[candidate]) + used.add(candidate) + + for key in sorted(file_info.keys()): + if key in used: + continue + ordered.append(file_info[key]) + + deduped: List[Tuple[str, str]] = [] + seen: Set[Tuple[str, str]] = set() + for item in ordered: + if item in seen: + continue + seen.add(item) + deduped.append(item) + return deduped + + def _resolve_named_type( + self, name: str, parent_stack: Optional[List[Message]] = None + ) -> Optional[TypingUnion[Message, Enum, Union]]: + """Resolve a named type reference to its definition.""" + parent_stack = parent_stack or [] + if "." in name: + return self.schema.get_type(name) + for msg in reversed(parent_stack): + nested = msg.get_nested_type(name) + if nested is not None: + return nested + return self.schema.get_type(name) + + def generate_type( + self, + field_type: FieldType, + nullable: bool = False, + parent_stack: Optional[List[Message]] = None, + ) -> str: + """Generate TypeScript type string for a field type.""" + parent_stack = parent_stack or [] + type_str = "" + + if isinstance(field_type, PrimitiveType): + if field_type.kind not in self.PRIMITIVE_MAP: + raise ValueError( + f"Unsupported primitive type for TypeScript: {field_type.kind}" + ) + type_str = self.PRIMITIVE_MAP[field_type.kind] + elif isinstance(field_type, NamedType): + # Check if this NamedType matches a primitive type name + primitive_name = field_type.name.lower() + # Map common shorthand names to primitive kinds + shorthand_map = { + "float": PrimitiveKind.FLOAT32, + "double": PrimitiveKind.FLOAT64, + } + if primitive_name in shorthand_map: + type_str = self.PRIMITIVE_MAP.get(shorthand_map[primitive_name], "any") + else: + # Check if it matches any primitive kind directly + for primitive_kind, ts_type in self.PRIMITIVE_MAP.items(): + if primitive_kind.value == primitive_name: + type_str = ts_type + break + if not type_str: + # If not a primitive, treat as a message/enum type + type_str = self.safe_type_identifier( + self.to_pascal_case(field_type.name) Review Comment: [P1] Qualified named types can generate invalid TS for nested messages. Here `field_type.name` is converted directly to PascalCase, so `Outer.Inner` stays `Outer.Inner`, while nested declarations are flattened to `export interface Inner { ... }`. I reproduced with a field `Outer.Inner inner = 1;` and TypeScript failed with `Outer only refers to a type, but is being used as a namespace here`. Please resolve named types against the schema and emit the actual generated symbol name. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
