GitHub user eram added a comment to the discussion: SQL injection with Jinja 2 
templates: how to escape values?

merge this into your supsert_config.py


```
import re
from jinja2 import Environment, FileSystemLoader

# --- Regex Compilation (Optimization) ---
# Compile patterns once when the module is loaded.
_ALLOWED_PATTERN = re.compile(r'[^a-zA-Z0-9 / \\\\ { } \* & % $ # @ ! = : \' 
\(\) \[\] -]')
_COMBINED_BLACKLIST_PATTERN = re.compile(r';|--|/\*|\*/')

# --- Custom Jinja Filter Definitions ---

def _custom_alphanum_symbol_only(value):
  """
  Processes a string for direct inclusion as a string literal within PostgreSQL.
  Combines whitelisting, blacklist detection, and SQL single-quote escaping.
  
  Steps:
  1. Whitelist characters: Removes any characters not in the allowed set.
  2. Detect whitelist changes: If characters were removed, indicates unsafe 
input (returns None).
  3. Detect blacklist patterns: Checks for known SQL command/comment 
initiators. If found, indicates unsafe input (returns None).
  4. Escape single quotes: Doubles single quotes (' becomes '').
  
  Returns the safe, escaped string, or None if the input is deemed unsafe.
  """
  if not isinstance(value, str):
    value = str(value)
  
  original_val_str = value # Keep original string for comparison

  # Step 1: Whitelist allowed characters
  cleaned_val_after_whitelist = _ALLOWED_PATTERN.sub('', original_val_str)
  
  # Step 2: Detect if whitelisting removed any characters. If so, input was not 
"clean".
  if cleaned_val_after_whitelist != original_val_str:
      return None # Indicate unsafe input: characters outside whitelist were 
present

  # Step 3: Check for presence of combined blacklist patterns on the already 
whitelisted string.
  # If any are found, return None.
  if _COMBINED_BLACKLIST_PATTERN.search(cleaned_val_after_whitelist):
      return None # Indicate unsafe input: forbidden sequence was present

  # Step 4: If all safety checks pass, apply SQL string literal escaping for 
single quotes.
  # This must be the *last* transformation step.
  final_escaped_val = cleaned_val_after_whitelist.replace("'", "''")
  
  return final_escaped_val

def escape_filters(value): # Renamed from escape_sql_literal
  """
  Public filter to safely escape a single string for PostgreSQL literal 
inclusion.
  Proxies directly to _custom_alphanum_symbol_only for all processing.
  
  WARNING: While this layered approach is more robust than a pure blacklist,
  it is still NOT as secure as using parameterized queries, which is the 
industry standard
  for preventing SQL injection. Use with extreme caution.
  """
  return _custom_alphanum_symbol_only(value)

def escape_filters_in(value_list): # Renamed from format_sql_array_literal
  """
  Takes a Python list of values, applies escape_filters to each.
  If any individual value is deemed unsafe by escape_filters, it will be 
skipped.
  Formats the safe elements into a PostgreSQL ARRAY['val1','val2']::text[] 
literal.
  
  Returns None if the input list is empty or no safe elements remain,
  allowing for a direct truthiness check in Jinja.
  """
  if not isinstance(value_list, (list, tuple)):
    return None

  escaped_and_safe_elements = []
  for item in value_list:
    # Attempt to escape each item. If unsafe, escape_filters returns None.
    escaped_item = escape_filters(str(item))
    if escaped_item is not None:
      # If safe, wrap in single quotes and add to list
      escaped_and_safe_elements.append(f"'{escaped_item}'")
  
  if not escaped_and_safe_elements:
    # If the list was empty or all elements were unsafe, return None.
    return None
    
  # Join all safe, escaped, and quoted elements into the ARRAY constructor
  return f"ARRAY[{', '.join(escaped_and_safe_elements)}]::text[]"

# --- Jinja2 Environment Setup ---

# This part would typically be in your Superset config.py
# Example of how to set up your Jinja2 environment and add these custom filters.
# Replace '/path/to/templates' with the actual path where your Jinja templates 
are stored.
env = Environment(
    loader=FileSystemLoader('/path/to/templates'),
    # Other Jinja settings can go here (e.g., trim_blocks=True, 
lstrip_blocks=True)
)

# Register the custom filters with your Jinja2 environment
env.filters['escape_filters'] = escape_filters
env.filters['escape_filters_in'] = escape_filters_in


# --- Jinja Template Example Usage (for SQL Lab in Superset) ---
"""
SELECT
    ci.id,
    ci.component_name,
    ci.component_dependency_type
FROM
    component_info ci
WHERE 1=1
    {#
      Using the custom escape_filters_in filter to safely format
      the list of relation_value parameters into a PostgreSQL array literal.
      filter_values('relation_value') is expected to return a Python list of 
strings.
      If no valid elements are present, safe_relation_array_literal will be 
None.
    #}
    {% set safe_relation_array_literal = filter_values('relation_value') | 
escape_filters_in %}

    {# Only include the AND clause if a non-None array literal was generated #}
    {% if safe_relation_array_literal %}
        AND (
            {# Check for overlap between component_dependency_type and the 
filtered relation_values #}
            ({{ safe_relation_array_literal }} && 
coalesce(ci.component_dependency_type, '{}'))
            OR
            {# Special handling for 'N/A' if it's in the filter and 
component_dependency_type is empty #}
            ('N/A'=ANY({{ safe_relation_array_literal }}) AND ('{}'::text[] = 
coalesce(ci.component_dependency_type, '{}')))
        )
    {% endif %}
    -- Add other WHERE clauses here as needed
"""
```

GitHub link: 
https://github.com/apache/superset/discussions/18358#discussioncomment-13473730

----
This is an automatically sent email for notifications@superset.apache.org.
To unsubscribe, please send an email to: 
notifications-unsubscr...@superset.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org
For additional commands, e-mail: notifications-h...@superset.apache.org

Reply via email to