korbit-ai[bot] commented on code in PR #35018: URL: https://github.com/apache/superset/pull/35018#discussion_r2330607807
########## superset/daos/base.py: ########## @@ -251,3 +455,205 @@ def filter_by(cls, **filter_by: Any) -> list[T]: cls.id_column_name, data_model ).apply(query, None) return query.filter_by(**filter_by).all() + + @classmethod + def apply_column_operators( + cls, query: Any, column_operators: Optional[List[ColumnOperator]] = None + ) -> Any: + """ + Apply column operators (list of ColumnOperator) to the query using + ColumnOperatorEnum logic. Raises ValueError if a filter references a + non-existent column. + """ + if not column_operators: + return query + for c in column_operators: + if not isinstance(c, ColumnOperator): + continue + col = c.col + opr = c.opr + value = c.value + if not col or not hasattr(cls.model_cls, col): + model_name = cls.model_cls.__name__ if cls.model_cls else "Unknown" + logging.error( + f"Invalid filter: column '{col}' does not exist on {model_name}" + ) + raise ValueError( + f"Invalid filter: column '{col}' does not exist on {model_name}" + ) + column = getattr(cls.model_cls, col) + try: + # Always use ColumnOperatorEnum's apply method + operator_enum = ColumnOperatorEnum(opr) + query = query.filter(operator_enum.apply(column, value)) + except Exception as e: + logging.error(f"Error applying filter on column '{col}': {e}") + raise + return query + + @classmethod + def get_filterable_columns_and_operators(cls) -> Dict[str, List[str]]: + """ + Returns a dict mapping filterable columns (including hybrid/computed fields if + present) to their supported operators. Used by MCP tools to dynamically expose + filter options. Custom fields supported by the DAO but not present on the model + should be documented here. + """ + + mapper = inspect(cls.model_cls) + columns = {c.key: c for c in mapper.columns} + # Add hybrid properties + hybrids = { + name: attr + for name, attr in vars(cls.model_cls).items() + if isinstance(attr, hybrid_property) + } + # You may add custom fields here, e.g.: + # custom_fields = {"tags": ["eq", "in_", "like"], ...} + custom_fields: Dict[str, List[str]] = {} + + filterable = {} + for name, col in columns.items(): + if isinstance(col.type, (sa.String, sa.Text)): + filterable[name] = TYPE_OPERATOR_MAP["string"] + elif isinstance(col.type, (sa.Boolean,)): + filterable[name] = TYPE_OPERATOR_MAP["boolean"] + elif isinstance(col.type, (sa.Integer, sa.Float, sa.Numeric)): + filterable[name] = TYPE_OPERATOR_MAP["number"] + elif isinstance(col.type, (sa.DateTime, sa.Date, sa.Time)): + filterable[name] = TYPE_OPERATOR_MAP["datetime"] + else: + # Fallback to eq/ne/null + filterable[name] = ["eq", "ne", "is_null", "is_not_null"] + # Add hybrid properties as string fields by default + for name in hybrids: + filterable[name] = TYPE_OPERATOR_MAP["string"] + # Add custom fields + filterable.update(custom_fields) + return filterable + + @classmethod + def _build_query( + cls, + column_operators: Optional[List[ColumnOperator]] = None, + search: Optional[str] = None, + search_columns: Optional[List[str]] = None, + custom_filters: Optional[Dict[str, BaseFilter]] = None, + skip_base_filter: bool = False, + data_model: Optional[SQLAInterface] = None, + ) -> Any: + """ + Build a SQLAlchemy query with base filter, column operators, search, and + custom filters. + """ + if data_model is None: + data_model = SQLAInterface(cls.model_cls, db.session) + query = data_model.session.query(cls.model_cls) + query = cls._apply_base_filter( + query, skip_base_filter=skip_base_filter, data_model=data_model + ) + if search and search_columns: + search_filters = [] + for column_name in search_columns: + if hasattr(cls.model_cls, column_name): + column = getattr(cls.model_cls, column_name) + search_filters.append(cast(column, Text).ilike(f"%{search}%")) + if search_filters: + query = query.filter(or_(*search_filters)) + if custom_filters: + for filter_class in custom_filters.values(): + query = filter_class.apply(query, None) + if column_operators: + query = cls.apply_column_operators(query, column_operators) + return query + + @classmethod + def list( # noqa: C901 + cls, + column_operators: Optional[List[ColumnOperator]] = None, + order_column: str = "changed_on", Review Comment: ### Invalid default order column assumption <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The list method assumes 'changed_on' column exists in all models by using it as default order_column ###### Why this matters If a model doesn't have a 'changed_on' column, the query will raise an AttributeError when trying to order results ###### Suggested change ∙ *Feature Preview* Change the default order column to use the model's primary key or make it optional: ```python @classmethod def list( # noqa: C901 cls, column_operators: Optional[List[ColumnOperator]] = None, order_column: Optional[str] = None, ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/609016ae-df79-49eb-affc-b50551844803/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/609016ae-df79-49eb-affc-b50551844803?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/609016ae-df79-49eb-affc-b50551844803?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/609016ae-df79-49eb-affc-b50551844803?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/609016ae-df79-49eb-affc-b50551844803) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:13f6e983-0064-412b-a140-7f642e23e36a --> [](13f6e983-0064-412b-a140-7f642e23e36a) ########## superset/daos/base.py: ########## @@ -251,3 +455,205 @@ def filter_by(cls, **filter_by: Any) -> list[T]: cls.id_column_name, data_model ).apply(query, None) return query.filter_by(**filter_by).all() + + @classmethod + def apply_column_operators( + cls, query: Any, column_operators: Optional[List[ColumnOperator]] = None + ) -> Any: + """ + Apply column operators (list of ColumnOperator) to the query using + ColumnOperatorEnum logic. Raises ValueError if a filter references a + non-existent column. + """ + if not column_operators: + return query + for c in column_operators: + if not isinstance(c, ColumnOperator): + continue + col = c.col + opr = c.opr + value = c.value + if not col or not hasattr(cls.model_cls, col): + model_name = cls.model_cls.__name__ if cls.model_cls else "Unknown" + logging.error( + f"Invalid filter: column '{col}' does not exist on {model_name}" + ) + raise ValueError( + f"Invalid filter: column '{col}' does not exist on {model_name}" + ) + column = getattr(cls.model_cls, col) + try: + # Always use ColumnOperatorEnum's apply method + operator_enum = ColumnOperatorEnum(opr) + query = query.filter(operator_enum.apply(column, value)) + except Exception as e: + logging.error(f"Error applying filter on column '{col}': {e}") + raise + return query + + @classmethod + def get_filterable_columns_and_operators(cls) -> Dict[str, List[str]]: + """ + Returns a dict mapping filterable columns (including hybrid/computed fields if + present) to their supported operators. Used by MCP tools to dynamically expose + filter options. Custom fields supported by the DAO but not present on the model + should be documented here. + """ + + mapper = inspect(cls.model_cls) + columns = {c.key: c for c in mapper.columns} + # Add hybrid properties + hybrids = { + name: attr + for name, attr in vars(cls.model_cls).items() + if isinstance(attr, hybrid_property) + } + # You may add custom fields here, e.g.: + # custom_fields = {"tags": ["eq", "in_", "like"], ...} + custom_fields: Dict[str, List[str]] = {} + + filterable = {} + for name, col in columns.items(): + if isinstance(col.type, (sa.String, sa.Text)): + filterable[name] = TYPE_OPERATOR_MAP["string"] + elif isinstance(col.type, (sa.Boolean,)): + filterable[name] = TYPE_OPERATOR_MAP["boolean"] + elif isinstance(col.type, (sa.Integer, sa.Float, sa.Numeric)): + filterable[name] = TYPE_OPERATOR_MAP["number"] + elif isinstance(col.type, (sa.DateTime, sa.Date, sa.Time)): + filterable[name] = TYPE_OPERATOR_MAP["datetime"] + else: + # Fallback to eq/ne/null + filterable[name] = ["eq", "ne", "is_null", "is_not_null"] + # Add hybrid properties as string fields by default + for name in hybrids: + filterable[name] = TYPE_OPERATOR_MAP["string"] + # Add custom fields + filterable.update(custom_fields) + return filterable + + @classmethod + def _build_query( + cls, + column_operators: Optional[List[ColumnOperator]] = None, + search: Optional[str] = None, + search_columns: Optional[List[str]] = None, + custom_filters: Optional[Dict[str, BaseFilter]] = None, + skip_base_filter: bool = False, + data_model: Optional[SQLAInterface] = None, + ) -> Any: + """ + Build a SQLAlchemy query with base filter, column operators, search, and + custom filters. + """ + if data_model is None: + data_model = SQLAInterface(cls.model_cls, db.session) + query = data_model.session.query(cls.model_cls) + query = cls._apply_base_filter( + query, skip_base_filter=skip_base_filter, data_model=data_model + ) + if search and search_columns: + search_filters = [] + for column_name in search_columns: + if hasattr(cls.model_cls, column_name): + column = getattr(cls.model_cls, column_name) + search_filters.append(cast(column, Text).ilike(f"%{search}%")) Review Comment: ### Unsanitized Search Input in LIKE Queries <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? User-provided search input is directly interpolated into SQL LIKE queries without proper sanitization, making it potentially vulnerable to SQL injection attacks. ###### Why this matters Malicious users could inject SQL code through the search parameter that could alter the query's behavior or access unauthorized data. ###### Suggested change ∙ *Feature Preview* Use SQLAlchemy's bind parameters to safely handle the search input: ```python search_filters.append(cast(column, Text).ilike('%' + search + '%')) ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d8bce617-60bb-43e9-a999-a65eb7b707ee/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d8bce617-60bb-43e9-a999-a65eb7b707ee?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d8bce617-60bb-43e9-a999-a65eb7b707ee?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d8bce617-60bb-43e9-a999-a65eb7b707ee?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d8bce617-60bb-43e9-a999-a65eb7b707ee) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:968f87f8-8059-4293-98d2-fd1bb05195ad --> [](968f87f8-8059-4293-98d2-fd1bb05195ad) ########## superset/daos/base.py: ########## @@ -32,6 +51,100 @@ T = TypeVar("T", bound=Model) +class ColumnOperatorEnum(str, Enum): + eq = "eq" + ne = "ne" + sw = "sw" + ew = "ew" + in_ = "in" + nin = "nin" + gt = "gt" + gte = "gte" + lt = "lt" + lte = "lte" + like = "like" + ilike = "ilike" + is_null = "is_null" + is_not_null = "is_not_null" + + @classmethod + def operator_map(cls) -> Dict[ColumnOperatorEnum, Any]: + return { + cls.eq: lambda col, val: col == val, + cls.ne: lambda col, val: col != val, + cls.sw: lambda col, val: col.like(f"{val}%"), + cls.ew: lambda col, val: col.like(f"%{val}"), + cls.in_: lambda col, val: col.in_( + val if isinstance(val, (list, tuple)) else [val] + ), + cls.nin: lambda col, val: ~col.in_( + val if isinstance(val, (list, tuple)) else [val] + ), + cls.gt: lambda col, val: col > val, + cls.gte: lambda col, val: col >= val, + cls.lt: lambda col, val: col < val, + cls.lte: lambda col, val: col <= val, + cls.like: lambda col, val: col.like(f"%{val}%"), + cls.ilike: lambda col, val: col.ilike(f"%{val}%"), + cls.is_null: lambda col, _: col.is_(None), + cls.is_not_null: lambda col, _: col.isnot(None), + } + + def apply(self, column: Any, value: Any) -> Any: + op_func = self.operator_map().get(self) + if not op_func: + raise ValueError(f"Unsupported operator: {self}") + return op_func(column, value) Review Comment: ### Missing value type validation <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The apply method in ColumnOperatorEnum doesn't validate the value type before applying the operator ###### Why this matters Invalid value types could lead to runtime errors or incorrect SQL queries, especially for operators like 'in_' that expect specific value types ###### Suggested change ∙ *Feature Preview* Add value type validation before applying the operator: ```python def apply(self, column: Any, value: Any) -> Any: op_func = self.operator_map().get(self) if not op_func: raise ValueError(f"Unsupported operator: {self}") if self in (self.in_, self.nin) and not isinstance(value, (list, tuple)): raise ValueError(f"Operator {self} requires a list or tuple value") if value is None and self not in (self.is_null, self.is_not_null): raise ValueError(f"None value not allowed for operator {self}") return op_func(column, value) ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/056ffcbd-a2f7-4005-98ff-a64b4f08abb7/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/056ffcbd-a2f7-4005-98ff-a64b4f08abb7?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/056ffcbd-a2f7-4005-98ff-a64b4f08abb7?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/056ffcbd-a2f7-4005-98ff-a64b4f08abb7?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/056ffcbd-a2f7-4005-98ff-a64b4f08abb7) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:d2cf643a-8e34-48c5-826c-152e044b14a0 --> [](d2cf643a-8e34-48c5-826c-152e044b14a0) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For additional commands, e-mail: notifications-h...@superset.apache.org