GitHub user eram edited a comment on the discussion: SQL injection with Jinja 2 templates: how to escape values?
merge this into your supsert_config.py ``` import re from jinja2 import Environment, FileSystemLoader # --- Regex Compilation (Optimization) --- # Compile patterns once when the module is loaded. _ALLOWED_PATTERN = re.compile(r'[^a-zA-Z0-9 / \\\\ { } \* & % $ # @ ! = : \' \(\) \[\] -]') _COMBINED_BLACKLIST_PATTERN = re.compile(r';|--|/\*|\*/') # --- Custom Jinja Filter Definitions --- def _custom_alphanum_symbol_only(value): """ Processes a string for direct inclusion as a string literal within PostgreSQL. Combines whitelisting, blacklist detection, and SQL single-quote escaping. Steps: 1. Whitelist characters: Removes any characters not in the allowed set. 2. Detect whitelist changes: If characters were removed, indicates unsafe input (returns None). 3. Detect blacklist patterns: Checks for known SQL command/comment initiators. If found, indicates unsafe input (returns None). 4. Escape single quotes: Doubles single quotes (' becomes ''). Returns the safe, escaped string, or None if the input is deemed unsafe. """ if not isinstance(value, str): value = str(value) original_val_str = value # Keep original string for comparison # Step 1: Whitelist allowed characters cleaned_val_after_whitelist = _ALLOWED_PATTERN.sub('', original_val_str) # Step 2: Detect if whitelisting removed any characters. If so, input was not "clean". if cleaned_val_after_whitelist != original_val_str: return None # Indicate unsafe input: characters outside whitelist were present # Step 3: Check for presence of combined blacklist patterns on the already whitelisted string. # If any are found, return None. if _COMBINED_BLACKLIST_PATTERN.search(cleaned_val_after_whitelist): return None # Indicate unsafe input: forbidden sequence was present # Step 4: If all safety checks pass, apply SQL string literal escaping for single quotes. # This must be the *last* transformation step. final_escaped_val = cleaned_val_after_whitelist.replace("'", "''") return final_escaped_val def escape_filters(value): # Renamed from escape_sql_literal """ Public filter to safely escape a single string for PostgreSQL literal inclusion. Proxies directly to _custom_alphanum_symbol_only for all processing. WARNING: While this layered approach is more robust than a pure blacklist, it is still NOT as secure as using parameterized queries, which is the industry standard for preventing SQL injection. Use with extreme caution. """ return _custom_alphanum_symbol_only(value) def escape_filters_in(value_list): # Renamed from format_sql_array_literal """ Takes a Python list of values, applies escape_filters to each. If any individual value is deemed unsafe by escape_filters, it will be skipped. Formats the safe elements into a PostgreSQL ARRAY['val1','val2']::text[] literal. Returns None if the input list is empty or no safe elements remain, allowing for a direct truthiness check in Jinja. """ if not isinstance(value_list, (list, tuple)): return None escaped_and_safe_elements = [] for item in value_list: # Attempt to escape each item. If unsafe, escape_filters returns None. escaped_item = escape_filters(str(item)) if escaped_item is not None: # If safe, wrap in single quotes and add to list escaped_and_safe_elements.append(f"'{escaped_item}'") if not escaped_and_safe_elements: # If the list was empty or all elements were unsafe, return None. return None # Join all safe, escaped, and quoted elements into the ARRAY constructor return f"ARRAY[{', '.join(escaped_and_safe_elements)}]::text[]" # --- Jinja2 Environment Setup --- # This part would typically be in your Superset config.py # Example of how to set up your Jinja2 environment and add these custom filters. # Replace '/path/to/templates' with the actual path where your Jinja templates are stored. env = Environment( loader=FileSystemLoader('/path/to/templates'), # Other Jinja settings can go here (e.g., trim_blocks=True, lstrip_blocks=True) ) # Register the custom filters with your Jinja2 environment # These lines explicitly register your Python functions as Jinja filters env.filters['escape_filters'] = escape_filters env.filters['escape_filters_in'] = escape_filters_in # --- Jinja Template Example Usage --- # This demonstrates how you would use the filters directly within any Jinja template. """ {# Example of a Python list of strings that would be passed into the Jinja template context. In Superset, this might come from a filter control or other data source. Here, we hardcode it for demonstration. #} {% set user_input_list = ["Value One", "Value Two with 'quote'", "Value; Drop Table", "Valid-Data/Path"] %} {# Use the custom escape_filters_in filter to safely format the Python list into a PostgreSQL array literal string. If no valid elements are present after sanitization, safe_array_literal will be None. #} {% set safe_array_literal = user_input_list | escape_filters_in %} SELECT some_table.id, some_table.name, some_table.array_column FROM some_table WHERE 1=1 {# Only include the AND clause if a non-None array literal was generated #} {% if safe_array_literal %} AND ( {# Check for overlap between some_table.array_column and the filtered values #} ({{ safe_array_literal }} && coalesce(some_table.array_column, '{}')) OR {# Special handling: 'N/A' in filter_values and array_column is empty #} ('N/A'=ANY({{ safe_array_literal }}) AND ('{}'::text[] = coalesce(some_table.array_column, '{}'))) ) {% endif %} -- Add other WHERE clauses here as needed, e.g.: -- AND some_table.status = 'ACTIVE' """ ``` GitHub link: https://github.com/apache/superset/discussions/18358#discussioncomment-13473730 ---- This is an automatically sent email for notifications@superset.apache.org. To unsubscribe, please send an email to: notifications-unsubscr...@superset.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For additional commands, e-mail: notifications-h...@superset.apache.org