Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-nh3 for openSUSE:Factory checked in at 2026-06-29 17:29:29 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-nh3 (Old) and /work/SRC/openSUSE:Factory/.python-nh3.new.11887 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-nh3" Mon Jun 29 17:29:29 2026 rev:9 rq:1362136 version:0.3.6 Changes: -------- --- /work/SRC/openSUSE:Factory/python-nh3/python-nh3.changes 2026-05-12 19:26:22.842372016 +0200 +++ /work/SRC/openSUSE:Factory/.python-nh3.new.11887/python-nh3.changes 2026-06-29 17:29:47.792871770 +0200 @@ -1,0 +2,10 @@ +Sun Jun 28 09:58:11 UTC 2026 - Dirk Müller <[email protected]> + +- update to 0.3.6: + * Validate clean_content_tags conflict with tags + * Document tag_attribute_values as alternate to attributes + * Add nh3.escape alias for clean_text + * Bump pyo3 from 0.28.3 to 0.29.0 + * Expose ammonia's url_relative policy via url_relative kwarg + +------------------------------------------------------------------- Old: ---- nh3-0.3.5.tar.gz New: ---- nh3-0.3.6.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-nh3.spec ++++++ --- /var/tmp/diff_new_pack.HBnkja/_old 2026-06-29 17:29:49.336924249 +0200 +++ /var/tmp/diff_new_pack.HBnkja/_new 2026-06-29 17:29:49.340924385 +0200 @@ -18,7 +18,7 @@ %{?sle15_python_module_pythons} Name: python-nh3 -Version: 0.3.5 +Version: 0.3.6 Release: 0 Summary: Ammonia HTML sanitizer Python binding License: MIT ++++++ _service ++++++ --- /var/tmp/diff_new_pack.HBnkja/_old 2026-06-29 17:29:49.372925473 +0200 +++ /var/tmp/diff_new_pack.HBnkja/_new 2026-06-29 17:29:49.376925608 +0200 @@ -1,7 +1,7 @@ <services> <service name="download_files" mode="manual"/> <service name="cargo_vendor" mode="manual"> - <param name="src">nh3-0.3.5.tar.gz</param> + <param name="src">nh3-0.3.6.tar.gz</param> <param name="compression">zst</param> </service> </services> ++++++ nh3-0.3.5.tar.gz -> nh3-0.3.6.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/.github/workflows/CI.yml new/nh3-0.3.6/.github/workflows/CI.yml --- old/nh3-0.3.5/.github/workflows/CI.yml 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/.github/workflows/CI.yml 2006-07-24 03:21:28.000000000 +0200 @@ -184,7 +184,7 @@ manylinux: auto args: --release --out dist -i python3.13 sccache: true - - uses: uraimo/[email protected] + - uses: uraimo/[email protected] if: matrix.platform.arch != 'ppc64' name: Install built wheel with: @@ -276,7 +276,7 @@ manylinux: musllinux_1_2 args: --release --out dist -i python3.14t sccache: true - - uses: uraimo/[email protected] + - uses: uraimo/[email protected] name: Install built wheel with: arch: ${{ matrix.platform.arch }} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/Cargo.lock new/nh3-0.3.6/Cargo.lock --- old/nh3-0.3.5/Cargo.lock 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/Cargo.lock 2006-07-24 03:21:28.000000000 +0200 @@ -23,19 +23,9 @@ [[package]] name = "bitflags" -version = "2.11.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" - -[[package]] -name = "cc" -version = "1.2.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" -dependencies = [ - "find-msvc-tools", - "shlex", -] +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" [[package]] name = "cfg-if" @@ -68,9 +58,9 @@ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -93,12 +83,6 @@ ] [[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] name = "form_urlencoded" version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -235,9 +219,9 @@ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -272,9 +256,9 @@ [[package]] name = "log" -version = "0.4.29" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad" [[package]] name = "mac" @@ -318,7 +302,7 @@ [[package]] name = "nh3" -version = "0.3.5" +version = "0.3.6" dependencies = [ "ammonia", "ouroboros", @@ -481,9 +465,9 @@ [[package]] name = "pyo3" -version = "0.28.3" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" +checksum = "cd274650b21d4bfc26a0a47587962c1edb425f69287324355cd040c3ea66071c" dependencies = [ "libc", "once_cell", @@ -495,19 +479,18 @@ [[package]] name = "pyo3-build-config" -version = "0.28.3" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" +checksum = "c5e2a7d2f0d013342f295c048ad19237add5154a55b1c5a254c0ec93d4109078" dependencies = [ - "python3-dll-a", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.28.3" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" +checksum = "ca85c467da1bbc8d866eea5deff9cf29ea5f7785054a17da36e65bda9c05845b" dependencies = [ "libc", "pyo3-build-config", @@ -515,9 +498,9 @@ [[package]] name = "pyo3-macros" -version = "0.28.3" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" +checksum = "9ac53762fd065daa3194dd09337a38bd793a188100fd1a9304c4ab312d901771" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -527,27 +510,17 @@ [[package]] name = "pyo3-macros-backend" -version = "0.28.3" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" +checksum = "4ca3a1557399783172dc5bf39cfca835157732532cba56b71d2292161e53b362" dependencies = [ "heck 0.5.0", "proc-macro2", - "pyo3-build-config", "quote", "syn", ] [[package]] -name = "python3-dll-a" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d80ba7540edb18890d444c5aa8e1f1f99b1bdf26fb26ae383135325f4a36042b" -dependencies = [ - "cc", -] - -[[package]] name = "quote" version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -616,22 +589,16 @@ ] [[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] name = "siphasher" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" [[package]] name = "smallvec" -version = "1.15.1" +version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" [[package]] name = "stable_deref_trait" @@ -672,9 +639,9 @@ [[package]] name = "syn" -version = "2.0.117" +version = "2.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" dependencies = [ "proc-macro2", "quote", @@ -787,9 +754,9 @@ [[package]] name = "yoke" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -810,9 +777,9 @@ [[package]] name = "zerofrom" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/Cargo.toml new/nh3-0.3.6/Cargo.toml --- old/nh3-0.3.5/Cargo.toml 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/Cargo.toml 2006-07-24 03:21:28.000000000 +0200 @@ -1,6 +1,6 @@ [package] name = "nh3" -version = "0.3.5" +version = "0.3.6" authors = ["messense <[email protected]>"] edition = "2024" description = "Python bindings to the ammonia HTML sanitization library." @@ -15,5 +15,5 @@ [dependencies] ammonia = "4.1.2" -pyo3 = { version = "0.28.3", features = ["abi3-py38", "generate-import-lib"] } +pyo3 = { version = "0.29.0", features = ["abi3-py38", "generate-import-lib"] } ouroboros = "0.18" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/PKG-INFO new/nh3-0.3.6/PKG-INFO --- old/nh3-0.3.5/PKG-INFO 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/PKG-INFO 2006-07-24 03:21:28.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: nh3 -Version: 0.3.5 +Version: 0.3.6 Classifier: Programming Language :: Rust Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/nh3.pyi new/nh3-0.3.6/nh3.pyi --- old/nh3-0.3.5/nh3.pyi 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/nh3.pyi 2006-07-24 03:21:28.000000000 +0200 @@ -1,10 +1,27 @@ -from typing import AbstractSet, Callable, Dict, Mapping, Optional, Set +from typing import ( + AbstractSet, + Callable, + Dict, + Literal, + Mapping, + Optional, + Set, + Tuple, + Union, +) ALLOWED_TAGS: Set[str] ALLOWED_ATTRIBUTES: Dict[str, Set[str]] ALLOWED_URL_SCHEMES: Set[str] CLEAN_CONTENT_TAGS: Set[str] +UrlRelative = Union[ + Literal["pass_through", "deny"], + Tuple[Literal["rewrite_with_base"], str], + Tuple[Literal["rewrite_with_root"], str, str], + Callable[[str], Optional[str]], +] + class Cleaner: def __init__( self, @@ -20,6 +37,7 @@ url_schemes: Optional[AbstractSet[str]] = None, allowed_classes: Optional[Mapping[str, AbstractSet[str]]] = None, filter_style_properties: Optional[AbstractSet[str]] = None, + url_relative: Optional[UrlRelative] = None, ) -> None: ... def clean(self, html: str) -> str: ... @@ -37,6 +55,8 @@ url_schemes: Optional[AbstractSet[str]] = None, allowed_classes: Optional[Mapping[str, AbstractSet[str]]] = None, filter_style_properties: Optional[AbstractSet[str]] = None, + url_relative: Optional[UrlRelative] = None, ) -> str: ... def clean_text(html: str, tags: Optional[AbstractSet[str]] = None) -> str: ... +def escape(html: str, tags: Optional[AbstractSet[str]] = None) -> str: ... def is_html(html: str) -> bool: ... diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/src/lib.rs new/nh3-0.3.6/src/lib.rs --- old/nh3-0.3.5/src/lib.rs 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/src/lib.rs 2006-07-24 03:21:28.000000000 +0200 @@ -6,6 +6,19 @@ use pyo3::prelude::*; use pyo3::types::{PyString, PyTuple}; +/// Internal representation of the parsed `url_relative` keyword argument. +/// +/// Parsing and validation happen eagerly when the `Cleaner` is constructed; this +/// enum is the validated result that gets converted to `ammonia::UrlRelative` in +/// `build_ammonia_from_config`. +enum UrlRelativeConfig { + PassThrough, + Deny, + RewriteWithBase(ammonia::Url), + RewriteWithRoot { root: ammonia::Url, path: String }, + Custom(Py<PyAny>), +} + struct Config { tags: Option<HashSet<String>>, clean_content_tags: Option<HashSet<String>>, @@ -19,6 +32,7 @@ url_schemes: Option<HashSet<String>>, allowed_classes: Option<HashMap<String, HashSet<String>>>, filter_style_properties: Option<HashSet<String>>, + url_relative: Option<UrlRelativeConfig>, } impl Default for Config { @@ -36,10 +50,85 @@ url_schemes: None, allowed_classes: None, filter_style_properties: None, + url_relative: None, } } } +/// Parse the Python `url_relative` argument into a validated [`UrlRelativeConfig`]. +/// +/// Accepts the strings ``"pass_through"`` / ``"deny"``, the tuples +/// ``("rewrite_with_base", base_url)`` / ``("rewrite_with_root", root_url, path)``, +/// or a callable. Any other value raises ``ValueError`` (bad mode / unparseable +/// URL / malformed tuple) or ``TypeError`` (unsupported type). +fn parse_url_relative(obj: &Bound<'_, PyAny>) -> PyResult<UrlRelativeConfig> { + if obj.cast::<PyString>().is_ok() { + let s: String = obj.extract()?; + return match s.as_str() { + "pass_through" => Ok(UrlRelativeConfig::PassThrough), + "deny" => Ok(UrlRelativeConfig::Deny), + other => Err(PyValueError::new_err(format!( + "invalid url_relative string {other:?}; expected \"pass_through\" or \"deny\"" + ))), + }; + } + if let Ok(tuple) = obj.cast::<PyTuple>() { + let mode: String = tuple + .get_item(0) + .map_err(|_| PyValueError::new_err("url_relative tuple must not be empty"))? + .extract() + .map_err(|_| PyValueError::new_err("url_relative tuple mode must be a string"))?; + return match mode.as_str() { + "rewrite_with_base" => { + if tuple.len() != 2 { + return Err(PyValueError::new_err( + "url_relative (\"rewrite_with_base\", base_url) expects exactly 2 elements", + )); + } + let base: String = tuple.get_item(1)?.extract().map_err(|_| { + PyValueError::new_err( + "url_relative rewrite_with_base base_url must be a string", + ) + })?; + let url = ammonia::Url::parse(&base).map_err(|e| { + PyValueError::new_err(format!("invalid url_relative base URL {base:?}: {e}")) + })?; + Ok(UrlRelativeConfig::RewriteWithBase(url)) + } + "rewrite_with_root" => { + if tuple.len() != 3 { + return Err(PyValueError::new_err( + "url_relative (\"rewrite_with_root\", root_url, path) expects exactly 3 elements", + )); + } + let root_url: String = tuple.get_item(1)?.extract().map_err(|_| { + PyValueError::new_err( + "url_relative rewrite_with_root root_url must be a string", + ) + })?; + let path: String = tuple.get_item(2)?.extract().map_err(|_| { + PyValueError::new_err("url_relative rewrite_with_root path must be a string") + })?; + let root = ammonia::Url::parse(&root_url).map_err(|e| { + PyValueError::new_err(format!( + "invalid url_relative root URL {root_url:?}: {e}" + )) + })?; + Ok(UrlRelativeConfig::RewriteWithRoot { root, path }) + } + other => Err(PyValueError::new_err(format!( + "invalid url_relative mode {other:?}; expected \"rewrite_with_base\" or \"rewrite_with_root\"" + ))), + }; + } + if obj.is_callable() { + return Ok(UrlRelativeConfig::Custom(obj.clone().unbind())); + } + Err(PyTypeError::new_err( + "url_relative must be a string, a tuple, or a callable", + )) +} + #[self_referencing] struct Inner { config: Config, @@ -53,6 +142,8 @@ /// :param tags: Sets the tags that are allowed. /// :type tags: ``set[str]``, optional /// :param clean_content_tags: Sets the tags whose contents will be completely removed from the output. +/// Must be disjoint from ``tags`` (or the default allowed set when ``tags`` +/// is omitted); a tag cannot be both kept and have its content stripped. /// :type clean_content_tags: ``set[str]``, optional /// :param attributes: Sets the HTML attributes that are allowed on specific tags, /// ``*`` key means the attributes are allowed on any tag. @@ -76,6 +167,12 @@ /// :param tag_attribute_values: Sets the values of HTML attributes that are allowed on specific tags. /// The value is structured as a map from tag names to a map from attribute names to a set of attribute values. /// If a tag is not itself whitelisted, adding entries to this map will do nothing. +/// +/// This map is an *alternate* to the entries of ``attributes`` (and ``attributes["*"]``): +/// if the same attribute is also whitelisted there for the same tag, every value is +/// accepted and this per-value whitelist is ignored for that attribute. To actually +/// restrict the allowed values, whitelist the tag but do **not** also list the +/// attribute in ``attributes``. /// :type tag_attribute_values: ``dict[str, dict[str, set[str]]]``, optional /// :param set_tag_attribute_values: Sets the values of HTML attributes that are to be set on specific tags. /// The value is structured as a map from tag names to a map from attribute names to an attribute value. @@ -93,6 +190,19 @@ /// invalid declarations and @rules will be removed, with only syntactically valid /// declarations kept. /// :type filter_style_properties: ``set[str]``, optional +/// :param url_relative: Configures how relative URLs (in ``href`` / ``src`` / +/// ``<object data=...>``) are handled. Defaults to ``None`` (pass relative +/// URLs through unchanged). Accepted values: +/// +/// - ``"pass_through"``: keep relative URLs unchanged (explicit default). +/// - ``"deny"``: strip relative URLs entirely. +/// - ``("rewrite_with_base", base_url)``: resolve relative URLs against ``base_url``. +/// - ``("rewrite_with_root", root_url, path)``: force paths into a directory. +/// - a callable ``(url) -> str | None``: rewrite relative URLs; return a +/// string to replace, or ``None`` to strip. A callback that raises (or +/// returns a non-string, non-``None`` value) strips the URL, and the error +/// is reported via ``sys.unraisablehook``. +/// :type url_relative: ``str | tuple | Callable[[str], str | None]``, optional /// /// Example usage: /// @@ -254,6 +364,60 @@ .collect(), ); } + if let Some(url_relative) = config.url_relative.as_ref() { + let value = match url_relative { + UrlRelativeConfig::PassThrough => ammonia::UrlRelative::PassThrough, + UrlRelativeConfig::Deny => ammonia::UrlRelative::Deny, + UrlRelativeConfig::RewriteWithBase(url) => { + ammonia::UrlRelative::RewriteWithBase(url.clone()) + } + UrlRelativeConfig::RewriteWithRoot { root, path } => { + ammonia::UrlRelative::RewriteWithRoot { + root: root.clone(), + path: path.clone(), + } + } + UrlRelativeConfig::Custom(callback) => { + let callback = Python::attach(|py| callback.clone_ref(py)); + // Help the compiler infer the higher-ranked `Fn` bound that + // `UrlRelative::Custom` requires: the closure only ever returns + // owned/None values, so without this it cannot tie the output + // lifetime to the input `&str`. + fn constrain<F>(f: F) -> F + where + F: for<'a> Fn(&'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static, + { + f + } + let evaluate = constrain(move |url: &str| { + Python::attach(|py| { + let res = callback.call1(py, (url,)); + let err = match res { + Ok(val) => { + if val.is_none(py) { + return None; + } + match val.extract::<String>(py) { + Ok(s) => return Some(Cow::Owned(s)), + Err(_) => PyTypeError::new_err( + "expected url_relative callback to return str or None", + ), + } + } + Err(err) => err, + }; + // A failing or mistyped callback strips the URL, keeping + // clean() infallible (unlike attribute_filter, which + // preserves the original value on error). + err.write_unraisable(py, None); + None + }) + }); + ammonia::UrlRelative::Custom(Box::new(evaluate)) + } + }; + builder.url_relative(value); + } builder } @@ -279,7 +443,8 @@ set_tag_attribute_values = None, url_schemes = None, allowed_classes = None, - filter_style_properties = None + filter_style_properties = None, + url_relative = None ))] fn py_new( py: Python, @@ -295,12 +460,17 @@ url_schemes: Option<HashSet<String>>, allowed_classes: Option<HashMap<String, HashSet<String>>>, filter_style_properties: Option<HashSet<String>>, + url_relative: Option<Py<PyAny>>, ) -> PyResult<Self> { if let Some(callback) = attribute_filter.as_ref() { if !callback.bind(py).is_callable() { return Err(PyTypeError::new_err("attribute_filter must be callable")); } } + let url_relative = match url_relative { + Some(obj) => Some(parse_url_relative(obj.bind(py))?), + None => None, + }; if link_rel.is_some() { if let Some(ref attrs) = attributes { for (tag, attr_set) in attrs.iter() { @@ -314,6 +484,30 @@ } } } + if let Some(ref clean_tags) = clean_content_tags { + // A tag listed in both the allowed `tags` set and `clean_content_tags` + // makes ammonia panic. Raise an explicit ValueError instead. When the + // caller omits `tags`, ammonia falls back to its default allowed set, + // so check against that default in order to catch e.g. + // `clean_content_tags={"p"}`. + let conflict = match tags.as_ref() { + Some(allowed) => clean_tags.iter().find(|t| allowed.contains(t.as_str())), + None => { + let default_tags = ammonia::Builder::default().clone_tags(); + clean_tags + .iter() + .find(|t| default_tags.contains(t.as_str())) + } + }; + if let Some(tag) = conflict { + return Err(PyValueError::new_err(format!( + "tag \"{}\" cannot appear in both `tags` and `clean_content_tags`; \ + either remove it from `clean_content_tags` or pass an explicit \ + `tags` set that excludes it", + tag + ))); + } + } let config = Config { tags, clean_content_tags, @@ -327,6 +521,7 @@ url_schemes, allowed_classes, filter_style_properties, + url_relative, }; Ok(Self::new(config)) } @@ -385,8 +580,12 @@ /// ... ) /// '<a href="/" id="link" rel="noopener noreferrer">click</a>' /// -/// ``tag_attribute_values`` restricts an attribute to a set of allowed values, -/// while ``set_tag_attribute_values`` unconditionally adds attributes: +/// ``tag_attribute_values`` restricts an attribute to a set of allowed values +/// (values outside the set cause the attribute to be stripped), while +/// ``set_tag_attribute_values`` unconditionally adds attributes. Note that +/// ``tag_attribute_values`` is an *alternate* to ``attributes`` — if the same +/// attribute is also whitelisted in ``attributes`` for that tag, every value +/// is allowed and the per-value whitelist is ignored: /// /// .. code-block:: pycon /// @@ -396,6 +595,11 @@ /// ... ) /// '<div role="alert">warning</div>' /// >>> nh3.clean( +/// ... "<div role='banner'>warning</div>", +/// ... tag_attribute_values={"div": {"role": {"alert", "status"}}}, +/// ... ) +/// '<div>warning</div>' +/// >>> nh3.clean( /// ... "<div>content</div>", /// ... set_tag_attribute_values={"div": {"class": "safe"}}, /// ... ) @@ -451,6 +655,29 @@ /// >>> nh3.clean("<a href='https://tag.example' rel='tag'>#tag</a>", /// ... link_rel=None, attributes=attributes) /// '<a href="https://tag.example" rel="tag">#tag</a>' +/// +/// ``url_relative`` controls how relative URLs are handled. ``"deny"`` strips +/// them, while ``("rewrite_with_base", base)`` resolves them against a base URL: +/// +/// .. code-block:: pycon +/// +/// >>> nh3.clean('<a href="/foo">x</a>', url_relative="deny") +/// '<a rel="noopener noreferrer">x</a>' +/// >>> nh3.clean( +/// ... '<a href="/foo">x</a>', +/// ... url_relative=("rewrite_with_base", "https://example.com"), +/// ... ) +/// '<a href="https://example.com/foo" rel="noopener noreferrer">x</a>' +/// +/// A callable rewrites relative URLs (return ``None`` to strip): +/// +/// .. code-block:: pycon +/// +/// >>> nh3.clean( +/// ... '<img src="/a.png">', +/// ... url_relative=lambda url: f"https://cdn.example.com{url}", +/// ... ) +/// '<img src="https://cdn.example.com/a.png">' #[pyfunction(signature = ( html, @@ -465,7 +692,8 @@ set_tag_attribute_values = None, url_schemes = None, allowed_classes = None, - filter_style_properties = None + filter_style_properties = None, + url_relative = None ))] #[allow(clippy::too_many_arguments)] fn clean( @@ -483,6 +711,7 @@ url_schemes: Option<HashSet<String>>, allowed_classes: Option<HashMap<String, HashSet<String>>>, filter_style_properties: Option<HashSet<String>>, + url_relative: Option<Py<PyAny>>, ) -> PyResult<String> { let cleaner = Cleaner::py_new( py, @@ -498,12 +727,16 @@ url_schemes, allowed_classes, filter_style_properties, + url_relative, )?; Ok(py.detach(|| cleaner.clean(html))) } /// Turn an arbitrary string into unformatted HTML. /// +/// Also exposed as :func:`escape`, which is the preferred name — the function escapes +/// input rather than sanitizing HTML. +/// /// Roughly equivalent to Python's html.escape() or PHP's htmlspecialchars and /// htmlentities. Escaping is as strict as possible, encoding every character /// that has special meaning to the HTML parser. @@ -545,6 +778,40 @@ } } +/// HTML-escape an arbitrary string. +/// +/// Alias for :func:`clean_text` — same signature, same behaviour. The ``escape`` name +/// is preferred because the function escapes input rather than sanitizing HTML. +/// +/// Note: this is stricter than Python's stdlib :func:`html.escape`. ``html.escape`` +/// only encodes ``&``, ``<``, ``>``, and optionally ``"`` and ``'``; ``nh3.escape`` +/// encodes every character that has special meaning to the HTML parser. +/// +/// If ``tags`` is given, those tags are passed through with no attributes; everything +/// else is stripped (content kept). Behaves like :func:`clean` with ``attributes={}`` +/// restricted to the given tag set. +/// +/// :param html: Input HTML fragment +/// :type html: ``str`` +/// :param tags: Tags to preserve; when omitted the string is fully escaped. +/// :type tags: ``set[str]``, optional +/// :return: Escaped text +/// :rtype: ``str`` +/// +/// For example: +/// +/// .. code-block:: pycon +/// +/// >>> import nh3 +/// >>> nh3.escape('Robert"); abuse();//') +/// 'Robert"); abuse();//' +/// >>> nh3.escape('<span>hello <mention>moto</mention>!</span>', tags={'mention'}) +/// 'hello <mention>moto</mention>!' +#[pyfunction(signature = (html, tags = None))] +fn escape(py: Python, html: &str, tags: Option<HashSet<String>>) -> String { + clean_text(py, html, tags) +} + /// Determine if a given string contains HTML. /// /// This function parses the full string and checks for any HTML syntax. @@ -575,6 +842,7 @@ m.add("__version__", env!("CARGO_PKG_VERSION"))?; m.add_function(wrap_pyfunction!(clean, m)?)?; m.add_function(wrap_pyfunction!(clean_text, m)?)?; + m.add_function(wrap_pyfunction!(escape, m)?)?; m.add_function(wrap_pyfunction!(is_html, m)?)?; m.add_class::<Cleaner>()?; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/tests/test_doctests.py new/nh3-0.3.6/tests/test_doctests.py --- old/nh3-0.3.5/tests/test_doctests.py 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/tests/test_doctests.py 2006-07-24 03:21:28.000000000 +0200 @@ -7,7 +7,7 @@ finder = doctest.DocTestFinder() runner = doctest.DocTestRunner(verbose=False) globs = {"nh3": nh3} - for name in ["clean", "clean_text", "is_html"]: + for name in ["clean", "clean_text", "escape", "is_html"]: obj = getattr(nh3, name) for test in finder.find(obj, f"nh3.{name}"): if test.examples: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/nh3-0.3.5/tests/test_nh3.py new/nh3-0.3.6/tests/test_nh3.py --- old/nh3-0.3.5/tests/test_nh3.py 2006-07-24 03:21:28.000000000 +0200 +++ new/nh3-0.3.6/tests/test_nh3.py 2006-07-24 03:21:28.000000000 +0200 @@ -130,6 +130,50 @@ assert result == '<a href="http://example.com" rel="nofollow">test</a>' +def test_clean_content_tags_overlap_with_default_tags(): + # Without explicit ``tags``, ammonia's default allowed tags are used; placing + # any of those tags in ``clean_content_tags`` would otherwise panic the + # interpreter. Validate up-front with a clear ValueError instead. + with pytest.raises(ValueError, match="clean_content_tags"): + nh3.clean("<p>hi</p>", clean_content_tags={"p"}) + + with pytest.raises(ValueError, match="clean_content_tags"): + nh3.clean("<div><b>hi</b></div>", clean_content_tags={"b", "script"}) + + +def test_clean_content_tags_overlap_with_explicit_tags(): + # Explicit ``tags`` set that intersects ``clean_content_tags`` is also a + # contradiction and must raise rather than panic. + with pytest.raises(ValueError, match="clean_content_tags"): + nh3.clean( + "<div><b>hi</b></div>", + tags={"div", "b"}, + clean_content_tags={"b"}, + ) + + +def test_clean_content_tags_no_overlap_ok(): + # ``clean_content_tags`` works with tags absent from the allowed set + # (default or explicit). + assert nh3.clean("<script>x</script>safe", clean_content_tags={"script"}) == "safe" + assert ( + nh3.clean( + "<div><b>hi</b></div>", + tags={"div"}, + clean_content_tags={"b"}, + ) + == "<div></div>" + ) + + +def test_cleaner_clean_content_tags_overlap(): + with pytest.raises(ValueError, match="clean_content_tags"): + nh3.Cleaner(clean_content_tags={"p"}) + + with pytest.raises(ValueError, match="clean_content_tags"): + nh3.Cleaner(tags={"a"}, clean_content_tags={"a"}) + + def test_clean_text(): res = nh3.clean_text('Robert"); abuse();//') assert res == "Robert"); abuse();//" @@ -173,6 +217,125 @@ assert cleaner.clean("<b><img src='x'>hi</b>") == '<b><img src="x">hi</b>' +def test_clean_url_relative_pass_through_is_default(): + html = '<a href="/foo">x</a>' + # Omitting url_relative keeps relative URLs (ammonia default), and the + # explicit "pass_through" string must behave identically. + assert nh3.clean(html) == '<a href="/foo" rel="noopener noreferrer">x</a>' + assert nh3.clean(html, url_relative="pass_through") == nh3.clean(html) + + +def test_clean_url_relative_deny(): + # Relative URLs are stripped, absolute URLs are kept. + assert ( + nh3.clean('<a href="/foo">x</a>', url_relative="deny") + == '<a rel="noopener noreferrer">x</a>' + ) + assert ( + nh3.clean('<a href="https://example.com/foo">x</a>', url_relative="deny") + == '<a href="https://example.com/foo" rel="noopener noreferrer">x</a>' + ) + + +def test_clean_url_relative_rewrite_with_base(): + assert ( + nh3.clean( + '<a href="/foo">x</a>', + url_relative=("rewrite_with_base", "https://example.com"), + ) + == '<a href="https://example.com/foo" rel="noopener noreferrer">x</a>' + ) + + +def test_clean_url_relative_rewrite_with_root(): + out = nh3.clean( + '<a href="/CONTRIBUTING.md">x</a>', + url_relative=( + "rewrite_with_root", + "https://github.com/rust-ammonia/ammonia/blob/master/", + "README.md", + ), + ) + assert ( + 'href="https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md"' + in out + ) + + +def test_clean_url_relative_custom_replace(): + def rewrite(url): + return f"https://cdn.example.com{url}" if url.startswith("/") else None + + assert ( + nh3.clean('<img src="/a.png">', url_relative=rewrite) + == '<img src="https://cdn.example.com/a.png">' + ) + + +def test_clean_url_relative_custom_strip_on_none(): + assert ( + nh3.clean('<a href="/x">y</a>', url_relative=lambda _url: None) + == '<a rel="noopener noreferrer">y</a>' + ) + + +def test_clean_url_relative_custom_exception_strips(): + def boom(_url): + raise RuntimeError("nope") + + # A failing callback strips the URL; clean() itself stays infallible. The + # callback error is reported via sys.unraisablehook (surfaced by pytest as a + # PytestUnraisableExceptionWarning), mirroring attribute_filter's behaviour. + assert ( + nh3.clean('<a href="/x">y</a>', url_relative=boom) + == '<a rel="noopener noreferrer">y</a>' + ) + + +def test_clean_url_relative_invalid(): + with pytest.raises(ValueError): + nh3.clean("x", url_relative="bogus") + with pytest.raises(ValueError): + nh3.clean("x", url_relative=("bogus_mode", "https://example.com")) + with pytest.raises(ValueError): + nh3.clean("x", url_relative=("rewrite_with_base", "not a url")) + with pytest.raises(ValueError): + nh3.clean("x", url_relative=("rewrite_with_base",)) + with pytest.raises(TypeError): + nh3.clean("x", url_relative=123) + + +def test_cleaner_url_relative_reusable(): + cleaner = nh3.Cleaner(url_relative="deny") + assert cleaner.clean('<a href="/foo">x</a>') == '<a rel="noopener noreferrer">x</a>' + assert ( + cleaner.clean('<a href="https://example.com">y</a>') + == '<a href="https://example.com" rel="noopener noreferrer">y</a>' + ) + + def test_is_html(): assert not nh3.is_html("plain text") assert nh3.is_html("<p>html!</p>") + + +def test_escape(): + # No-arg: full escape, identical to clean_text + assert nh3.escape('Robert"); abuse();//') == "Robert"); abuse();//" + + # With tags=: listed tags preserved (no attributes), the rest escaped/stripped + assert ( + nh3.escape( + '<span>hello <mention>moto</mention>, welcome!</span>', + tags={'mention'}, + ) + == 'hello <mention>moto</mention>, welcome!' + ) + + # Parity with clean_text for a few representative inputs + for sample, kwargs in [ + ('Robert"); abuse();//', {}), + ('<b>bold</b> and <i>italic</i>', {"tags": {"b"}}), + ("<a href='http://example.com' rel='nofollow'>test</a>", {"tags": {"a"}}), + ]: + assert nh3.escape(sample, **kwargs) == nh3.clean_text(sample, **kwargs) ++++++ vendor.tar.zst ++++++ ++++ 501705 lines of diff (skipped)
