Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package rumdl for openSUSE:Factory checked in at 2026-05-21 18:32:17 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/rumdl (Old) and /work/SRC/openSUSE:Factory/.rumdl.new.2084 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "rumdl" Thu May 21 18:32:17 2026 rev:69 rq:1354437 version:0.1.96 Changes: -------- --- /work/SRC/openSUSE:Factory/rumdl/rumdl.changes 2026-05-20 15:26:52.043939781 +0200 +++ /work/SRC/openSUSE:Factory/.rumdl.new.2084/rumdl.changes 2026-05-21 18:33:14.345114713 +0200 @@ -1,0 +2,14 @@ +Thu May 21 11:02:22 UTC 2026 - Johannes Kastl <[email protected]> + +- Update to version 0.1.96: + * Fixed + - md073: protect code-span contents from link/image stripping + (530e41d) + - md073: preserve inline code spans and emphasis in generated + TOC entries (#634) (897c76a) + - md057: handle trailing-slash directory links that include a + fragment or query (539a6d0) + - md057: eliminate duplicate warnings for broken relative links + and accept existing directory targets (#631, #632) (6a37ada) + +------------------------------------------------------------------- Old: ---- rumdl-0.1.95.obscpio New: ---- rumdl-0.1.96.obscpio ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ rumdl.spec ++++++ --- /var/tmp/diff_new_pack.VG9wT0/_old 2026-05-21 18:33:24.753542518 +0200 +++ /var/tmp/diff_new_pack.VG9wT0/_new 2026-05-21 18:33:24.761542847 +0200 @@ -17,7 +17,7 @@ Name: rumdl -Version: 0.1.95 +Version: 0.1.96 Release: 0 Summary: Markdown Linter written in Rust License: MIT ++++++ _service ++++++ --- /var/tmp/diff_new_pack.VG9wT0/_old 2026-05-21 18:33:24.813544984 +0200 +++ /var/tmp/diff_new_pack.VG9wT0/_new 2026-05-21 18:33:24.817545148 +0200 @@ -3,7 +3,7 @@ <param name="url">https://github.com/rvben/rumdl.git</param> <param name="scm">git</param> <param name="submodules">enable</param> - <param name="revision">v0.1.95</param> + <param name="revision">v0.1.96</param> <param name="match-tag">v*.*.*</param> <param name="versionformat">@PARENT_TAG@</param> <param name="versionrewrite-pattern">v(.*)</param> ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.VG9wT0/_old 2026-05-21 18:33:24.849546464 +0200 +++ /var/tmp/diff_new_pack.VG9wT0/_new 2026-05-21 18:33:24.853546628 +0200 @@ -1,6 +1,6 @@ <servicedata> <service name="tar_scm"> <param name="url">https://github.com/rvben/rumdl.git</param> - <param name="changesrevision">b2164bb1b33cbc3b416f686180033b8be7374f37</param></service></servicedata> + <param name="changesrevision">73f3a8c02f7b513546ca67df3a1e1c9702062b5d</param></service></servicedata> (No newline at EOF) ++++++ rumdl-0.1.95.obscpio -> rumdl-0.1.96.obscpio ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/CHANGELOG.md new/rumdl-0.1.96/CHANGELOG.md --- old/rumdl-0.1.95/CHANGELOG.md 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/CHANGELOG.md 2026-05-21 10:13:56.000000000 +0200 @@ -38,6 +38,16 @@ + +## [0.1.96](https://github.com/rvben/rumdl/compare/v0.1.95...v0.1.96) - 2026-05-21 + +### Fixed + +- **md073**: protect code-span contents from link/image stripping ([530e41d](https://github.com/rvben/rumdl/commit/530e41d61c5d34dcb517fa677816c69e5c2ea884)) +- **md073**: preserve inline code spans and emphasis in generated TOC entries (#634) ([897c76a](https://github.com/rvben/rumdl/commit/897c76a8fb48395dabde8030fffc02cc22b0d0e7)) +- **md057**: handle trailing-slash directory links that include a fragment or query ([539a6d0](https://github.com/rvben/rumdl/commit/539a6d087b332e097abc85cce56ea478840184cc)) +- **md057**: eliminate duplicate warnings for broken relative links and accept existing directory targets (#631, #632) ([6a37ada](https://github.com/rvben/rumdl/commit/6a37ada04337079f08e656087c6d681a78181d46)) + ## [0.1.95](https://github.com/rvben/rumdl/compare/v0.1.94...v0.1.95) - 2026-05-19 ### Added diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/Cargo.lock new/rumdl-0.1.96/Cargo.lock --- old/rumdl-0.1.95/Cargo.lock 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/Cargo.lock 2026-05-21 10:13:56.000000000 +0200 @@ -2274,7 +2274,7 @@ [[package]] name = "rumdl" -version = "0.1.95" +version = "0.1.96" dependencies = [ "anyhow", "assert_cmd", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/Cargo.toml new/rumdl-0.1.96/Cargo.toml --- old/rumdl-0.1.95/Cargo.toml 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/Cargo.toml 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ [package] name = "rumdl" -version = "0.1.95" +version = "0.1.96" edition = "2024" rust-version = "1.94.0" description = "A fast Markdown linter written in Rust (Ru(st) MarkDown Linter)" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/README.md new/rumdl-0.1.96/README.md --- old/rumdl-0.1.95/README.md 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/README.md 2026-05-21 10:13:56.000000000 +0200 @@ -206,7 +206,7 @@ mise install rumdl # Use a specific version for the project -mise use [email protected] +mise use [email protected] ``` ### Using Nix (macOS/Linux) @@ -405,7 +405,7 @@ ```yaml repos: - repo: https://github.com/rvben/rumdl-pre-commit - rev: v0.1.95 + rev: v0.1.96 hooks: - id: rumdl # Lint only (fails on issues) - id: rumdl-fmt # Auto-format and fail if issues remain @@ -427,7 +427,7 @@ ```yaml repos: - repo: https://github.com/rvben/rumdl-pre-commit - rev: v0.1.95 + rev: v0.1.96 hooks: - id: rumdl args: [--no-exclude] # Disable all exclude patterns diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/docs/global-settings.md new/rumdl-0.1.96/docs/global-settings.md --- old/rumdl-0.1.95/docs/global-settings.md 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/docs/global-settings.md 2026-05-21 10:13:56.000000000 +0200 @@ -1342,7 +1342,7 @@ ```yaml - repo: https://github.com/rvben/rumdl-pre-commit - rev: v0.1.95 + rev: v0.1.96 hooks: - id: rumdl args: [--config=.rumdl.toml] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-darwin-arm64/package.json new/rumdl-0.1.96/npm/cli-darwin-arm64/package.json --- old/rumdl-0.1.95/npm/cli-darwin-arm64/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-darwin-arm64/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-darwin-arm64", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for macOS ARM64 (Apple Silicon)", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-darwin-x64/package.json new/rumdl-0.1.96/npm/cli-darwin-x64/package.json --- old/rumdl-0.1.95/npm/cli-darwin-x64/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-darwin-x64/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-darwin-x64", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for macOS x64 (Intel)", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-linux-arm64/package.json new/rumdl-0.1.96/npm/cli-linux-arm64/package.json --- old/rumdl-0.1.95/npm/cli-linux-arm64/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-linux-arm64/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-linux-arm64", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for Linux ARM64 (glibc)", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-linux-arm64-musl/package.json new/rumdl-0.1.96/npm/cli-linux-arm64-musl/package.json --- old/rumdl-0.1.95/npm/cli-linux-arm64-musl/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-linux-arm64-musl/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-linux-arm64-musl", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for Linux ARM64 (musl/Alpine)", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-linux-x64/package.json new/rumdl-0.1.96/npm/cli-linux-x64/package.json --- old/rumdl-0.1.95/npm/cli-linux-x64/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-linux-x64/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-linux-x64", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for Linux x64 (glibc)", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-linux-x64-musl/package.json new/rumdl-0.1.96/npm/cli-linux-x64-musl/package.json --- old/rumdl-0.1.95/npm/cli-linux-x64-musl/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-linux-x64-musl/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-linux-x64-musl", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for Linux x64 (musl/Alpine)", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/cli-win32-x64/package.json new/rumdl-0.1.96/npm/cli-win32-x64/package.json --- old/rumdl-0.1.95/npm/cli-win32-x64/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/cli-win32-x64/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "@rumdl/cli-win32-x64", - "version": "0.1.95", + "version": "0.1.96", "description": "rumdl binary for Windows x64", "license": "MIT", "repository": { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/npm/rumdl/package.json new/rumdl-0.1.96/npm/rumdl/package.json --- old/rumdl-0.1.95/npm/rumdl/package.json 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/npm/rumdl/package.json 2026-05-21 10:13:56.000000000 +0200 @@ -1,6 +1,6 @@ { "name": "rumdl", - "version": "0.1.95", + "version": "0.1.96", "description": "A fast Markdown linter written in Rust", "license": "MIT", "repository": { @@ -33,12 +33,12 @@ "node": ">=18.0.0" }, "optionalDependencies": { - "@rumdl/cli-darwin-x64": "0.1.95", - "@rumdl/cli-darwin-arm64": "0.1.95", - "@rumdl/cli-linux-x64": "0.1.95", - "@rumdl/cli-linux-arm64": "0.1.95", - "@rumdl/cli-linux-x64-musl": "0.1.95", - "@rumdl/cli-linux-arm64-musl": "0.1.95", - "@rumdl/cli-win32-x64": "0.1.95" + "@rumdl/cli-darwin-x64": "0.1.96", + "@rumdl/cli-darwin-arm64": "0.1.96", + "@rumdl/cli-linux-x64": "0.1.96", + "@rumdl/cli-linux-arm64": "0.1.96", + "@rumdl/cli-linux-x64-musl": "0.1.96", + "@rumdl/cli-linux-arm64-musl": "0.1.96", + "@rumdl/cli-win32-x64": "0.1.96" } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/src/rules/md057_existing_relative_links.rs new/rumdl-0.1.96/src/rules/md057_existing_relative_links.rs --- old/rumdl-0.1.95/src/rules/md057_existing_relative_links.rs 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/src/rules/md057_existing_relative_links.rs 2026-05-21 10:13:56.000000000 +0200 @@ -167,19 +167,6 @@ } } - /// Project root used for absolute-link resolution and `search-paths` anchoring. - /// - /// Returns the explicit base path when set via `with_path()` (used by tests - /// to isolate filesystem state to a temp dir); otherwise returns the - /// process-wide discovered project root. - fn project_root(&self) -> PathBuf { - self.base_path - .lock() - .ok() - .and_then(|g| g.clone()) - .unwrap_or_else(|| PROJECT_ROOT.clone()) - } - /// Resolve a config-supplied path string (from `roots` or `search-paths`) /// against the project root: absolute strings are taken verbatim, relative /// strings are joined onto `project_root`. @@ -407,7 +394,9 @@ let (decoded, is_directory_link) = Self::prepare_absolute_url(url); - match Self::resolve_under_root(&docs_dir, &decoded, is_directory_link) { + // MkDocs mode: an extensionless directory link must have index.md. + // `require_index_for_dirs = true` enforces this for all directory hits. + match Self::resolve_under_root_with_opts(&docs_dir, &decoded, is_directory_link, true) { Resolution::Found => None, Resolution::DirectoryWithoutIndex { resolved } => Some(format!( "Absolute link '{url}' resolves to directory '{}' which has no index.md", @@ -433,8 +422,10 @@ for root in roots { let root_path = Self::resolve_against_project_root(root, project_root); + // Filesystem mode: an existing directory without trailing slash is valid. + // `require_index_for_dirs = false` aligns with relative-link behavior. (#632) if matches!( - Self::resolve_under_root(&root_path, &decoded, is_directory_link), + Self::resolve_under_root_with_opts(&root_path, &decoded, is_directory_link, false), Resolution::Found ) { return None; @@ -442,7 +433,8 @@ } if matches!( - Self::resolve_under_root(project_root, &decoded, is_directory_link), + // Filesystem mode: see above. + Self::resolve_under_root_with_opts(project_root, &decoded, is_directory_link, false), Resolution::Found ) { return None; @@ -469,18 +461,40 @@ /// Try to resolve a decoded absolute-link path under a single root directory. /// - /// Applies four resolution strategies in order: - /// 1. Directory-style links: look for `<resolved>/index.md`. - /// 2. Direct existence (with markdown-extension fallback for extensionless links). - /// 3. `.html`/`.htm` links: look for a markdown source with the same stem in the same directory. - fn resolve_under_root(root_path: &Path, decoded: &str, is_directory_link: bool) -> Resolution { + /// `require_index_for_dirs` controls how extensionless links that resolve to a + /// directory are treated: + /// + /// - `true` (MkDocs / docs-dir mode): a directory must contain `index.md` to be + /// considered valid, even when the link has no trailing slash. This matches + /// MkDocs' URL routing convention where `/section` serves `section/index.md`. + /// + /// - `false` (roots / filesystem mode): an existing directory is accepted as a + /// valid target for an extensionless link, matching the behavior of relative + /// links (which use `path.exists()`). Only an explicit trailing-slash link + /// (`is_directory_link == true`) still requires `index.md`. + /// + /// Applies resolution strategies in order: + /// 1. Directory-style links (explicit `/` suffix or `require_index_for_dirs`): + /// look for `<resolved>/index.md`; report `DirectoryWithoutIndex` on failure. + /// 2. Filesystem-mode directory hit (`require_index_for_dirs == false` and + /// `is_directory_link == false`): accept the existing directory as `Found`. + /// 3. Direct existence (with markdown-extension fallback for extensionless links). + /// 4. `.html`/`.htm` links: look for a markdown source with the same stem. + fn resolve_under_root_with_opts( + root_path: &Path, + decoded: &str, + is_directory_link: bool, + require_index_for_dirs: bool, + ) -> Resolution { let resolved = root_path.join(decoded); - // Directory-style links resolve via `index.md` inside the directory. - // Must be checked before `file_exists_or_markdown_extension` because - // `path.exists()` returns true for directories. let is_dir = resolved.is_dir(); - if is_directory_link || is_dir { + + // When the link explicitly ends with `/` or the caller requires index.md + // for all directory hits (MkDocs mode), apply the stricter check first. + // Must be checked before `file_exists_or_markdown_extension` because + // `path.exists()` returns `true` for directories. + if is_directory_link || (require_index_for_dirs && is_dir) { let index_path = resolved.join("index.md"); if file_exists_with_cache(&index_path) { return Resolution::Found; @@ -490,6 +504,16 @@ } } + // Filesystem mode (roots): an existing directory without a trailing slash + // is valid — mirrors how relative links accept directories via `path.exists()`. + // Exclude decoded paths that end with `/`: a URL like `/guide/#intro` strips + // the fragment to `guide/`, so `decoded` carries the trailing slash even though + // `is_directory_link` is false (the raw URL ends with `#intro`, not `/`). + let decoded_has_trailing_slash = decoded.ends_with('/'); + if !require_index_for_dirs && !is_directory_link && !decoded_has_trailing_slash && is_dir { + return Resolution::Found; + } + if file_exists_or_markdown_extension(&resolved) { return Resolution::Found; } @@ -1206,81 +1230,20 @@ fn cross_file_check( &self, - file_path: &Path, - file_index: &FileIndex, - workspace_index: &crate::workspace_index::WorkspaceIndex, + _file_path: &Path, + _file_index: &FileIndex, + _workspace_index: &crate::workspace_index::WorkspaceIndex, ) -> LintResult { - // Reset the file existence cache for a fresh run - reset_file_existence_cache(); - - let mut warnings = Vec::new(); - - // Get the directory containing this file for resolving relative links - let file_dir = file_path.parent(); - - // Compute additional search paths for fallback link resolution - let base_path = file_dir.map_or_else(|| CURRENT_DIR.clone(), std::path::Path::to_path_buf); - let project_root = self.project_root(); - let extra_search_paths = self.compute_search_paths(self.flavor, Some(file_path), &base_path, &project_root); - - for cross_link in &file_index.cross_file_links { - // URL-decode the path for filesystem operations - // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤) - let decoded_target = Self::url_decode(&cross_link.target_path); - - // Skip absolute paths — they are already handled by check() - // which validates them according to the absolute_links config. - // Handling them here too would produce duplicate warnings. - if decoded_target.starts_with('/') { - continue; - } - - // Resolve relative path - let target_path = if let Some(dir) = file_dir { - dir.join(&decoded_target) - } else { - Path::new(&decoded_target).to_path_buf() - }; - - // Normalize the path (handle .., ., etc.) - let target_path = normalize_path(&target_path); - - // Check if the target file exists, also trying markdown extensions for extensionless links - let file_exists = - workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path); - - if !file_exists { - // For .html/.htm links, check if a corresponding markdown source exists - // This handles doc sites (mdBook, etc.) where .md is compiled to .html - let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str()) - && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm")) - && let (Some(stem), Some(parent)) = - (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent()) - { - MARKDOWN_EXTENSIONS.iter().any(|md_ext| { - let source_path = parent.join(format!("{stem}{md_ext}")); - workspace_index.contains_file(&source_path) || source_path.exists() - }) - } else { - false - }; - - if !has_md_source && !Self::exists_in_search_paths(&decoded_target, &extra_search_paths) { - warnings.push(LintWarning { - rule_name: Some(self.name().to_string()), - line: cross_link.line, - column: cross_link.column, - end_line: cross_link.line, - end_column: cross_link.column + cross_link.target_path.len(), - message: format!("Relative link '{}' does not exist", cross_link.target_path), - severity: Severity::Error, - fix: None, - }); - } - } - } - - Ok(warnings) + // All link targets are already validated by check() on each per-file pass. + // check() resolves relative links against the file's own directory, handles + // configured search paths, and applies the absolute_links config. + // Validating them here too would produce identical duplicate warnings for + // every broken link. (#631) + // + // The cross_file_scope / contribute_to_index / workspace-index infrastructure + // remains in place to support future cross-file analyses (e.g. heading-anchor + // validation across files). + Ok(Vec::new()) } } @@ -2105,14 +2068,13 @@ #[test] fn test_cross_file_check_missing_link() { + // cross_file_check delegates all validation to check() to avoid duplicates. + // It always returns empty — the per-file check() path is authoritative. use crate::workspace_index::WorkspaceIndex; let rule = MD057ExistingRelativeLinks::new(); - - // Create an empty workspace index let workspace_index = WorkspaceIndex::new(); - // Create file index with a link to a missing file let mut file_index = FileIndex::new(); file_index.add_cross_file_link(CrossFileLinkIndex { target_path: "missing.md".to_string(), @@ -2121,15 +2083,15 @@ column: 1, }); - // Run cross-file check let warnings = rule .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index) .unwrap(); - // Should have one warning for the missing file - assert_eq!(warnings.len(), 1); - assert!(warnings[0].message.contains("missing.md")); - assert!(warnings[0].message.contains("does not exist")); + // cross_file_check defers to check(); it produces no warnings of its own. + assert!( + warnings.is_empty(), + "cross_file_check must not duplicate check()'s per-file warnings. Got: {warnings:?}" + ); } #[test] @@ -2195,15 +2157,14 @@ #[test] fn test_cross_file_check_html_link_without_source() { - // Test that .html links without corresponding .md source ARE flagged + // cross_file_check delegates all validation to check() to avoid duplicates. + // Verifying that .html links without a matching .md source are caught is + // already covered by test_html_link_with_md_source (check() path). use crate::workspace_index::WorkspaceIndex; let rule = MD057ExistingRelativeLinks::new(); - - // Create an empty workspace index let workspace_index = WorkspaceIndex::new(); - // Create file index with an .html link to a non-existent file let mut file_index = FileIndex::new(); file_index.add_cross_file_link(CrossFileLinkIndex { target_path: "missing.html".to_string(), @@ -2212,14 +2173,15 @@ column: 5, }); - // Run cross-file check from docs/index.md let warnings = rule .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index) .unwrap(); - // Should have one warning - no .md source exists - assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source"); - assert!(warnings[0].message.contains("missing.html")); + // cross_file_check defers to check(); it produces no warnings of its own. + assert!( + warnings.is_empty(), + "cross_file_check must not duplicate check()'s per-file warnings. Got: {warnings:?}" + ); } #[test] @@ -3547,81 +3509,259 @@ } #[test] - fn test_cross_file_check_clears_stale_cache() { - // Verify that cross_file_check() resets the file existence cache so stale - // entries from a previous lint cycle do not affect results. - use crate::workspace_index::WorkspaceIndex; + fn test_check_clears_stale_cache() { + // Verify that check() resets the file existence cache so stale entries from + // a previous lint cycle do not suppress valid warnings. + let temp_dir = tempdir().unwrap(); + let base_path = temp_dir.path(); - let rule = MD057ExistingRelativeLinks::new(); + let rule = MD057ExistingRelativeLinks::new().with_path(base_path); - // Seed the cache with a stale entry: pretend "docs/phantom.md" exists on disk. - // In reality, neither the filesystem nor the workspace index has this file. + // Seed the cache with a stale "exists" entry for a file that is NOT on disk. + let phantom_path = base_path.join("phantom.md"); { let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap(); - cache.insert(PathBuf::from("docs/phantom.md"), true); + cache.insert(phantom_path.clone(), true); } - let workspace_index = WorkspaceIndex::new(); - - let mut file_index = FileIndex::new(); - file_index.add_cross_file_link(CrossFileLinkIndex { - target_path: "phantom.md".to_string(), - fragment: "".to_string(), - line: 1, - column: 1, - }); - - let warnings = rule - .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index) - .unwrap(); + let content = "[phantom](phantom.md)\n"; + let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None); + let warnings = rule.check(&ctx).unwrap(); - // With cache reset, cross_file_check must detect that phantom.md does not exist + // check() must reset the cache; stale "exists=true" must not suppress the warning. assert_eq!( warnings.len(), 1, - "cross_file_check should report missing file after clearing stale cache. Got: {warnings:?}" + "check() should report missing file after clearing stale cache. Got: {warnings:?}" ); assert!(warnings[0].message.contains("phantom.md")); } #[test] - fn test_cross_file_check_does_not_carry_over_cache_between_runs() { - // Two consecutive cross_file_check() calls should each start with a fresh cache. - use crate::workspace_index::WorkspaceIndex; + fn test_check_does_not_carry_over_cache_between_runs() { + // Two consecutive check() calls should each start with a fresh cache. + let temp_dir = tempdir().unwrap(); + let base_path = temp_dir.path(); - let rule = MD057ExistingRelativeLinks::new(); - let workspace_index = WorkspaceIndex::new(); + let rule = MD057ExistingRelativeLinks::new().with_path(base_path); - // First run: link to a file that doesn't exist - let mut file_index_1 = FileIndex::new(); - file_index_1.add_cross_file_link(CrossFileLinkIndex { - target_path: "nonexistent.md".to_string(), - fragment: "".to_string(), - line: 1, - column: 1, - }); + let content = "[missing](nonexistent.md)\n"; + let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None); - let warnings_1 = rule - .cross_file_check(Path::new("docs/a.md"), &file_index_1, &workspace_index) - .unwrap(); + // First run: file doesn't exist — warning expected. + let warnings_1 = rule.check(&ctx).unwrap(); assert_eq!(warnings_1.len(), 1, "First run should detect missing file"); - // Between runs, inject a stale "exists = true" entry for the same resolved path + // Inject a stale "exists = true" entry for the resolved path. + let nonexistent_path = base_path.join("nonexistent.md"); { let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap(); - cache.insert(PathBuf::from("docs/nonexistent.md"), true); + cache.insert(nonexistent_path.clone(), true); } - // Second run: same link, but now cache says file exists (stale data) - let warnings_2 = rule - .cross_file_check(Path::new("docs/a.md"), &file_index_1, &workspace_index) + // Second run: cache says file exists, but check() should reset it first. + let warnings_2 = rule.check(&ctx).unwrap(); + assert_eq!( + warnings_2.len(), + 1, + "Second check() run should still detect missing file after cache reset. Got: {warnings_2:?}" + ); + } + + // --- Bug #631: duplicate warnings for broken relative links --- + + /// Regression test: a single broken relative link must produce exactly one + /// warning across both check() and cross_file_check(). Previously, each + /// code path emitted an identical warning independently, causing duplicates. + #[test] + fn test_no_duplicate_warnings_for_broken_relative_link() { + use crate::workspace_index::WorkspaceIndex; + + let temp_dir = tempdir().unwrap(); + let base_path = temp_dir.path(); + + // The broken link target does NOT exist on disk. + let source_file = base_path.join("index.md"); + std::fs::write(&source_file, "[broken](does/not/exist.md)\n").unwrap(); + + let content = "[broken](does/not/exist.md)\n"; + + let rule = MD057ExistingRelativeLinks::new().with_path(base_path); + + // Collect warnings from check() (per-file path) + let ctx = crate::lint_context::LintContext::new( + content, + crate::config::MarkdownFlavor::Standard, + Some(source_file.clone()), + ); + let check_warnings = rule.check(&ctx).unwrap(); + + // Collect warnings from cross_file_check() (workspace-index path) + let mut file_index = FileIndex::new(); + rule.contribute_to_index(&ctx, &mut file_index); + let workspace_index = WorkspaceIndex::new(); + let cross_warnings = rule + .cross_file_check(&source_file, &file_index, &workspace_index) .unwrap(); - // The second run must also detect the missing file because the cache should be reset + let total = check_warnings.len() + cross_warnings.len(); assert_eq!( - warnings_2.len(), + total, 1, + "Expected exactly 1 warning total across check() and cross_file_check(), got {total}: \ + check={check_warnings:?}, cross={cross_warnings:?}" + ); + } + + // --- Bug #632: absolute directory links incorrectly flagged --- + + /// With absolute-links = "relative_to_roots", links to existing targets must + /// be accepted for all four cases: {relative, absolute} x {file, directory}. + #[test] + fn test_absolute_dir_link_accepted_relative_to_roots() { + let temp_dir = tempdir().unwrap(); + let root = temp_dir.path(); + + // Create directory `d` with a file inside (but no index.md) + let dir_d = root.join("d"); + std::fs::create_dir_all(&dir_d).unwrap(); + std::fs::write(dir_d.join("foo.md"), "# Foo\n").unwrap(); + + // Content exercises all four matrix cells: + // relative file, relative dir, absolute file, absolute dir + let content = "\ +[absolute dir](/d)\n\ +[relative dir](d)\n\ +[absolute file](/d/foo.md)\n\ +[relative file](d/foo.md)\n"; + + let config = MD057Config { + absolute_links: AbsoluteLinksOption::RelativeToRoots, + roots: vec![], + ..Default::default() + }; + let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(root); + + let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None); + let result = rule.check(&ctx).unwrap(); + + assert!( + result.is_empty(), + "All four {{relative,absolute}} x {{file,dir}} links to existing targets must pass. Got: {result:?}" + ); + } + + /// A directory link with a trailing slash and no index.md should be reported + /// as invalid under relative_to_roots (docs-convention: trailing slash implies index.md). + #[test] + fn test_absolute_trailing_slash_dir_link_requires_index() { + let temp_dir = tempdir().unwrap(); + let root = temp_dir.path(); + + // Create directory `d` WITHOUT index.md + let dir_d = root.join("d"); + std::fs::create_dir_all(&dir_d).unwrap(); + std::fs::write(dir_d.join("foo.md"), "# Foo\n").unwrap(); + + // Trailing slash signals "this is a directory index" — index.md must exist. + let content = "[dir with slash](/d/)\n"; + + let config = MD057Config { + absolute_links: AbsoluteLinksOption::RelativeToRoots, + roots: vec![], + ..Default::default() + }; + let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(root); + + let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None); + let result = rule.check(&ctx).unwrap(); + + assert_eq!( + result.len(), + 1, + "Trailing-slash directory link without index.md must be flagged. Got: {result:?}" + ); + } + + /// The docs_dir (MkDocs) variant must still flag a directory link when index.md + /// is absent. This is tested via the full check() path with RelativeToDocs config + /// and a real mkdocs.yml pointing at a docs dir that contains the directory target. + #[test] + fn test_docs_dir_variant_still_enforces_index_md() { + let temp_dir = tempdir().unwrap(); + let root = temp_dir.path(); + + // Create a minimal mkdocs.yml pointing at a "docs" directory + std::fs::write(root.join("mkdocs.yml"), "site_name: Test\ndocs_dir: docs\n").unwrap(); + + // Create docs/section/ WITHOUT index.md + let docs_dir = root.join("docs"); + std::fs::create_dir_all(&docs_dir).unwrap(); + let section_dir = docs_dir.join("section"); + std::fs::create_dir_all(§ion_dir).unwrap(); + std::fs::write(section_dir.join("page.md"), "# Page\n").unwrap(); + + // Create the source markdown file inside docs/ + let source_file = docs_dir.join("index.md"); + std::fs::write(&source_file, "[sec](/section)\n").unwrap(); + + let config = MD057Config { + absolute_links: AbsoluteLinksOption::RelativeToDocs, + ..Default::default() + }; + let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(&docs_dir); + + let content = "[sec](/section)\n"; + let ctx = crate::lint_context::LintContext::new( + content, + crate::config::MarkdownFlavor::Standard, + Some(source_file.clone()), + ); + let result = rule.check(&ctx).unwrap(); + + // MkDocs enforces index.md for directory links, so this should be flagged. + assert_eq!( + result.len(), + 1, + "MkDocs docs_dir variant must flag directory link without index.md. Got: {result:?}" + ); + assert!( + result[0].message.contains("index.md") || result[0].message.contains("section"), + "Message should mention the directory or missing index.md: {}", + result[0].message + ); + } + + /// Regression test for the edge case where a trailing-slash directory URL has a + /// fragment suffix (e.g. `/guide/#intro`). After stripping the fragment, the + /// decoded path is `guide/` (ends with `/`), but `is_directory_link` was computed + /// from `url.ends_with('/')` which is false when the URL ends with `#intro`. + /// The fix must still treat such links as directory links and require index.md. + #[test] + fn test_trailing_slash_with_fragment_treated_as_directory_link() { + let temp_dir = tempdir().unwrap(); + let root = temp_dir.path(); + + // Create directory `guide` WITHOUT index.md + let guide_dir = root.join("guide"); + std::fs::create_dir_all(&guide_dir).unwrap(); + std::fs::write(guide_dir.join("page.md"), "# Page\n").unwrap(); + + // /guide/#intro has a trailing slash before the fragment — must require index.md + let content = "[guide with fragment](/guide/#intro)\n"; + + let config = MD057Config { + absolute_links: AbsoluteLinksOption::RelativeToRoots, + roots: vec![], + ..Default::default() + }; + let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(root); + let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None); + let result = rule.check(&ctx).unwrap(); + + assert_eq!( + result.len(), 1, - "Second run should still detect missing file after cache reset. Got: {warnings_2:?}" + "Trailing-slash link with fragment and no index.md must be flagged. Got: {result:?}" ); } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/rumdl-0.1.95/src/rules/md073_toc_validation.rs new/rumdl-0.1.96/src/rules/md073_toc_validation.rs --- old/rumdl-0.1.95/src/rules/md073_toc_validation.rs 2026-05-19 22:15:04.000000000 +0200 +++ new/rumdl-0.1.96/src/rules/md073_toc_validation.rs 2026-05-21 10:13:56.000000000 +0200 @@ -80,111 +80,119 @@ }, } -/// Regex patterns for stripping markdown formatting from heading text +/// Regex patterns used by `strip_links_and_images`. static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap()); static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap()); static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap()); -/// Strip code spans from text, handling multi-backtick spans per CommonMark spec. -/// E.g., `` `code` ``, ``` ``code with ` backtick`` ```, etc. -fn strip_code_spans(text: &str) -> String { + +/// Extract code-span byte ranges from `text` using the CommonMark rule: +/// a run of N backticks opens a span closed by exactly N backticks. +/// Returns a sorted list of `(start, end)` byte offsets that are inside code spans +/// (including the backtick delimiters themselves). +fn code_span_ranges(text: &str) -> Vec<(usize, usize)> { let chars: Vec<char> = text.chars().collect(); let len = chars.len(); - let mut result = String::with_capacity(text.len()); + let mut ranges = Vec::new(); let mut i = 0; while i < len { if chars[i] == '`' { - // Count opening backticks - let open_start = i; + let span_start = i; while i < len && chars[i] == '`' { i += 1; } - let backtick_count = i - open_start; + let n = i - span_start; - // Find matching closing backticks (same count) - let content_start = i; - let mut found_close = false; - while i < len { - if chars[i] == '`' { - let close_start = i; - while i < len && chars[i] == '`' { - i += 1; + // Search for the matching closing sequence of exactly n backticks + let mut j = i; + let mut found = false; + while j < len { + if chars[j] == '`' { + let close_start = j; + while j < len && chars[j] == '`' { + j += 1; } - if i - close_start == backtick_count { - // Found matching close - extract content - let content: String = chars[content_start..close_start].iter().collect(); - // CommonMark: strip one leading and one trailing space if both exist - let stripped = if content.starts_with(' ') && content.ends_with(' ') && content.len() > 1 { - &content[1..content.len() - 1] - } else { - &content - }; - result.push_str(stripped); - found_close = true; + if j - close_start == n { + // Convert char indices to byte offsets + let byte_start: usize = text.char_indices().nth(span_start).map_or(0, |(b, _)| b); + let byte_end: usize = text.char_indices().nth(j).map_or(text.len(), |(b, _)| b); + ranges.push((byte_start, byte_end)); + i = j; + found = true; break; } } else { - i += 1; + j += 1; } } - if !found_close { - // No matching close found - emit backticks literally - for _ in 0..backtick_count { - result.push('`'); - } - let remaining: String = chars[content_start..].iter().collect(); - result.push_str(&remaining); - break; + if !found { + // No matching close; skip past the opening backticks + i = span_start + n; } } else { - result.push(chars[i]); i += 1; } } - result + ranges } -static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap()); -static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap()); -static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap()); -// Match underscore italic at word boundaries (space or start/end) -// Handles: "_text_", " _text_ ", "start _text_", "_text_ end" -static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> = - LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap()); -/// Strip markdown formatting from text, preserving plain text content. -/// Used for TOC entry display text. +/// Strip only links and images from `text`, preserving all other inline +/// formatting (code spans, bold, italic, etc.). +/// +/// Links and images cannot appear inside a Markdown link label `[...]`, so they +/// must be removed when building TOC display text. Code spans and emphasis are +/// valid inside link labels and should be kept so the TOC entry faithfully +/// reflects the heading's visual appearance. +/// +/// Code-span contents are protected: link-like syntax such as `[foo](bar)` that +/// appears inside backticks is left untouched. /// /// Examples: -/// - `[terminal](url)` → `terminal` -/// - `**bold**` → `bold` -/// - `` `code` `` → `code` +/// - `` `my header` `` → `` `my header` `` (code ticks preserved) +/// - `[terminal](url)` → `terminal` (link stripped) +/// - `` → `alt` (image stripped) +/// - `**bold**` → `**bold**` (emphasis preserved) +/// - `` `[foo](bar)` `` → `` `[foo](bar)` `` (link inside code span preserved) /// - `Tool: [terminal](url)` → `Tool: terminal` -fn strip_markdown_formatting(text: &str) -> String { - let mut result = text.to_string(); - - // Strip images first (before links, since images use similar syntax) - result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string(); - - // Strip links: [text](url) → text - result = MARKDOWN_LINK.replace_all(&result, "$1").to_string(); - - // Strip reference links: [text][ref] → text - result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string(); - - // Strip code spans (handles multi-backtick spans like ``code with ` backtick``) - result = strip_code_spans(&result); - - // Strip bold (do double before single to handle nested) - result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string(); - result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string(); - - // Strip italic - result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string(); - // Underscore italic: preserve boundary chars, extract content - result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string(); +fn strip_links_and_images(text: &str) -> String { + // Collect code-span byte ranges so we can protect their contents from + // the link/image regex substitutions. + let protected = code_span_ranges(text); + + // If there are no code spans the fast path avoids all the extra work. + if protected.is_empty() { + let mut result = text.to_string(); + result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string(); + result = MARKDOWN_LINK.replace_all(&result, "$1").to_string(); + result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string(); + return result; + } + + // Replace each code span with a unique placeholder that cannot be matched + // by the link/image regexes, apply the regexes, then restore the originals. + let mut placeholders: Vec<(&str, String)> = Vec::with_capacity(protected.len()); + let mut masked = text.to_string(); + // Process spans in reverse order so byte offsets remain valid after each replacement. + for (i, &(start, end)) in protected.iter().enumerate().rev() { + // Placeholder: a string containing no `[`, `]`, `(`, `)`, `!` characters. + let placeholder = format!("\x00CODESPAN{i}\x00"); + let original = &text[start..end]; + placeholders.push((original, placeholder.clone())); + masked.replace_range(start..end, &placeholder); + } + + // Apply link/image stripping to the masked string + masked = MARKDOWN_IMAGE.replace_all(&masked, "$1").to_string(); + masked = MARKDOWN_LINK.replace_all(&masked, "$1").to_string(); + masked = MARKDOWN_REF_LINK.replace_all(&masked, "$1").to_string(); + + // Restore the original code-span text + for (original, placeholder) in &placeholders { + masked = masked.replace(placeholder.as_str(), original); + } - result + masked } /// MD073: Table of Contents Validation @@ -439,13 +447,15 @@ } } - // Check for text mismatches (compare stripped versions) + // Check for text mismatches. Compare with the same normalization used in + // generate_toc: strip only links and images, preserve code spans and emphasis. + // This ensures a correct user-written TOC entry like `` [`my header`](#anchor) `` + // is not flagged against a heading `` `my header` ``. for entry in actual { if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) { - // Compare stripped text (removes markdown formatting like links, emphasis) - let actual_stripped = strip_markdown_formatting(entry.text.trim()); - let expected_stripped = strip_markdown_formatting(exp.text.trim()); - if actual_stripped != expected_stripped { + let actual_normalized = strip_links_and_images(entry.text.trim()); + let expected_normalized = strip_links_and_images(exp.text.trim()); + if actual_normalized != expected_normalized { mismatches.push(TocMismatch::TextMismatch { entry: entry.clone(), expected: (*exp).clone(), @@ -538,8 +548,10 @@ let level_diff = entry.level.saturating_sub(base_level) as usize; let indent = indent_str.repeat(level_diff); - // Strip markdown formatting from heading text for clean TOC entries - let display_text = strip_markdown_formatting(&entry.text); + // Build display text: strip only links and images (which would create invalid + // nested-link syntax inside `[...]`), but preserve code spans and emphasis so + // the TOC entry reflects the heading's visual appearance. + let display_text = strip_links_and_images(&entry.text); result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor)); } @@ -747,6 +759,86 @@ mod tests { use super::*; use crate::config::MarkdownFlavor; + use regex::Regex; + use std::sync::LazyLock; + + // ---- Test-only helpers for stripping all inline formatting ---- + // These are not used in production code; they exist only to test + // the individual stripping primitives in isolation. + + /// Strip code spans from text, handling multi-backtick spans per CommonMark spec. + fn strip_code_spans(text: &str) -> String { + let chars: Vec<char> = text.chars().collect(); + let len = chars.len(); + let mut result = String::with_capacity(text.len()); + let mut i = 0; + + while i < len { + if chars[i] == '`' { + let open_start = i; + while i < len && chars[i] == '`' { + i += 1; + } + let backtick_count = i - open_start; + + let content_start = i; + let mut found_close = false; + while i < len { + if chars[i] == '`' { + let close_start = i; + while i < len && chars[i] == '`' { + i += 1; + } + if i - close_start == backtick_count { + let content: String = chars[content_start..close_start].iter().collect(); + let stripped = if content.starts_with(' ') && content.ends_with(' ') && content.len() > 1 { + content[1..content.len() - 1].to_string() + } else { + content + }; + result.push_str(&stripped); + found_close = true; + break; + } + } else { + i += 1; + } + } + if !found_close { + for _ in 0..backtick_count { + result.push('`'); + } + let remaining: String = chars[content_start..].iter().collect(); + result.push_str(&remaining); + break; + } + } else { + result.push(chars[i]); + i += 1; + } + } + + result + } + + static TEST_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap()); + static TEST_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap()); + static TEST_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap()); + static TEST_ITALIC_UNDERSCORE: LazyLock<Regex> = + LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap()); + + /// Strip all inline markdown formatting from text, reducing it to plain text. + /// Builds on `strip_links_and_images` and additionally removes code spans, + /// bold, and italic markers. Used in tests only. + fn strip_markdown_formatting(text: &str) -> String { + let mut result = strip_links_and_images(text); + result = strip_code_spans(&result); + result = TEST_BOLD_ASTERISK.replace_all(&result, "$1").to_string(); + result = TEST_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string(); + result = TEST_ITALIC_ASTERISK.replace_all(&result, "$1").to_string(); + result = TEST_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string(); + result + } fn create_ctx(content: &str) -> LintContext<'_> { LintContext::new(content, MarkdownFlavor::Standard, None) @@ -1753,8 +1845,8 @@ let ctx = create_ctx(content); let fixed = rule.fix(&ctx).unwrap(); - // Generated TOC should have stripped text (bold markers removed) - assert!(fixed.contains("- [Important Section](#important-section)")); + // Generated TOC preserves bold markers in display text; anchor strips them. + assert!(fixed.contains("- [**Important** Section](#important-section)")); } #[test] @@ -1773,8 +1865,8 @@ let ctx = create_ctx(content); let fixed = rule.fix(&ctx).unwrap(); - // Generated TOC should have stripped text (backticks removed) - assert!(fixed.contains("- [Using async Functions](#using-async-functions)")); + // Generated TOC preserves code ticks in display text; anchor strips them. + assert!(fixed.contains("- [Using `async` Functions](#using-async-functions)")); } // ========== Custom Anchor Tests ========== @@ -2008,12 +2100,12 @@ #[test] fn test_toc_with_double_backtick_heading() { let rule = create_enabled_rule(); + // Use fix() to generate the correct TOC (including anchor), then check() + // should produce no warnings on the fixed output. let content = r#"# Title <!-- toc --> -- [Using code with backtick](#using-code-with-backtick) - <!-- tocstop --> ## Using ``code with ` backtick`` @@ -2022,14 +2114,25 @@ "#; let ctx = create_ctx(content); // The heading uses double-backtick code span: ``code with ` backtick`` - // After stripping, heading text = "Using code with ` backtick" - // The fix should produce a TOC entry with the stripped text + // TOC display text preserves the code span; anchor is derived from raw text. let fixed = rule.fix(&ctx).unwrap(); - // The generated TOC should have the stripped heading text + + // Verify that the generated TOC entry preserves the double-backtick code span + // in the display text. + let toc_start = fixed.find("<!-- toc -->").unwrap(); + let toc_end = fixed.find("<!-- tocstop -->").unwrap(); + let toc_content = &fixed[toc_start..toc_end]; assert!( - fixed.contains("code with ` backtick") || fixed.contains("code with backtick"), - "Fix should strip double-backtick code span from heading. Got TOC: {}", - &fixed[fixed.find("<!-- toc -->").unwrap()..fixed.find("<!-- tocstop -->").unwrap()] + toc_content.contains("``code with ` backtick``"), + "Fix should preserve double-backtick code span in TOC display text. Got: {toc_content}" + ); + + // After fix, check() must produce no warnings (idempotency check) + let ctx2 = create_ctx(&fixed); + let result = rule.check(&ctx2).unwrap(); + assert!( + result.is_empty(), + "check() should not warn on fixed output. Warnings: {result:?}" ); } @@ -2120,4 +2223,205 @@ let fixed = rule.fix(&ctx).unwrap(); assert_eq!(fixed, content, "TOC in a disabled region should be preserved exactly"); } + + // ========== Inline Formatting Preservation Tests (#634) ========== + + /// Backticks in a heading must be preserved in the TOC display text. + /// The anchor is generated from the raw heading text (which includes backticks) + /// and must still use the stripped form. + #[test] + fn test_fix_code_ticks_preserved_in_toc_display_text() { + let rule = MD073TocValidation::new(); + let content = r#"# Title + +<!-- toc --> + +<!-- tocstop --> + +### `my header` + +Content. +"#; + let ctx = create_ctx(content); + let fixed = rule.fix(&ctx).unwrap(); + + assert!( + fixed.contains("- [`my header`](#my-header)"), + "Code ticks must be preserved in TOC display text. Got: {fixed}" + ); + } + + /// A correct user-written TOC entry with code ticks must not be re-flagged. + #[test] + fn test_validate_toc_with_code_ticks_is_valid() { + let rule = create_enabled_rule(); + let content = r#"# Title + +<!-- toc --> + +- [`my header`](#my-header) + +<!-- tocstop --> + +## `my header` + +Content. +"#; + let ctx = create_ctx(content); + let result = rule.check(&ctx).unwrap(); + assert!( + result.is_empty(), + "A TOC entry with preserved code ticks should be accepted as valid: {result:?}" + ); + } + + /// A heading with bold/italic preserves emphasis markers in the TOC display text; + /// the anchor is generated from the raw (formatted) heading text and still uses + /// the stripped form. + #[test] + fn test_fix_emphasis_preserved_in_toc_display_text() { + let rule = MD073TocValidation::new(); + let content = r#"# Title + +<!-- toc --> + +<!-- tocstop --> + +## **bold** and *italic* + +Content. +"#; + let ctx = create_ctx(content); + let fixed = rule.fix(&ctx).unwrap(); + + assert!( + fixed.contains("- [**bold** and *italic*](#bold-and-italic)"), + "Emphasis markers must be preserved in TOC display text. Got: {fixed}" + ); + } + + /// A heading containing a link must have the link stripped from the TOC display + /// text (nested links are invalid in Markdown). + #[test] + fn test_fix_link_in_heading_is_stripped() { + let rule = MD073TocValidation::new(); + let content = r#"# Title + +<!-- toc --> + +<!-- tocstop --> + +## See [docs](http://example.com) for details + +Content. +"#; + let ctx = create_ctx(content); + let fixed = rule.fix(&ctx).unwrap(); + + assert!( + fixed.contains("- [See docs for details](#see-docs-for-details)"), + "Link must be stripped from TOC display text. Got: {fixed}" + ); + // Ensure no URL leaks into TOC entry + let toc_start = fixed.find("<!-- toc -->").unwrap(); + let toc_end = fixed.find("<!-- tocstop -->").unwrap(); + let toc_content = &fixed[toc_start..toc_end]; + assert!( + !toc_content.contains("http://example.com"), + "TOC should not contain link URL: {toc_content}" + ); + } + + /// An image in a heading must still be stripped from the TOC display text. + #[test] + fn test_fix_image_in_heading_is_stripped() { + let rule = MD073TocValidation::new(); + let content = r#"# Title + +<!-- toc --> + +<!-- tocstop --> + +## Section  Title + +Content. +"#; + let ctx = create_ctx(content); + let fixed = rule.fix(&ctx).unwrap(); + + assert!( + fixed.contains("- [Section icon Title](#section-icon-title)"), + "Image must be stripped from TOC display text. Got: {fixed}" + ); + } + + /// Running fix() twice on a document with inline-formatted headings must + /// produce stable output (idempotency). + #[test] + fn test_fix_idempotent_with_inline_formatting() { + let rule = MD073TocValidation::new(); + let content = r#"# Title + +<!-- toc --> + +<!-- tocstop --> + +## `code` heading + +### **bold** heading + +## See [link](http://x.com) + +"#; + let ctx = create_ctx(content); + let fixed1 = rule.fix(&ctx).unwrap(); + let ctx2 = create_ctx(&fixed1); + let fixed2 = rule.fix(&ctx2).unwrap(); + + assert_eq!(fixed1, fixed2, "fix() must be idempotent for inline-formatted headings"); + + // After fix, check() must produce no warnings + let warnings = rule.check(&ctx2).unwrap(); + assert!( + warnings.is_empty(), + "check() must not warn after fix() for inline-formatted headings: {warnings:?}" + ); + } + + /// Link-like syntax inside a code span must not be stripped, because it is + /// literal content of the code span and not a real Markdown link. + #[test] + fn test_link_inside_code_span_preserved_in_toc() { + let rule = MD073TocValidation::new(); + let content = r#"# Title + +<!-- toc --> + +<!-- tocstop --> + +## Use `[foo](bar)` syntax + +Content. +"#; + let ctx = create_ctx(content); + let fixed = rule.fix(&ctx).unwrap(); + + // The code span `[foo](bar)` must survive intact in the TOC display text. + // The anchor is generated from the raw heading text by the GitHub algorithm, + // which strips backtick, bracket, and paren characters. Verify only the + // display-text preservation, not the exact anchor (which depends on the anchor + // generation algorithm's treatment of non-alphanumeric chars in code spans). + let toc_start = fixed.find("<!-- toc -->").unwrap(); + let toc_end = fixed.find("<!-- tocstop -->").unwrap(); + let toc_content = &fixed[toc_start..toc_end]; + assert!( + toc_content.contains("Use `[foo](bar)` syntax"), + "Link-like text inside code span must be preserved in TOC display text. Got: {toc_content}" + ); + // Also ensure the real link stripping (outside code spans) still works + assert!( + !toc_content.contains("http://"), + "Real links (outside code spans) should be stripped: {toc_content}" + ); + } } ++++++ rumdl.obsinfo ++++++ --- /var/tmp/diff_new_pack.VG9wT0/_old 2026-05-21 18:33:26.209602364 +0200 +++ /var/tmp/diff_new_pack.VG9wT0/_new 2026-05-21 18:33:26.221602857 +0200 @@ -1,5 +1,5 @@ name: rumdl -version: 0.1.95 -mtime: 1779221704 -commit: b2164bb1b33cbc3b416f686180033b8be7374f37 +version: 0.1.96 +mtime: 1779351236 +commit: 73f3a8c02f7b513546ca67df3a1e1c9702062b5d ++++++ vendor.tar.zst ++++++ /work/SRC/openSUSE:Factory/rumdl/vendor.tar.zst /work/SRC/openSUSE:Factory/.rumdl.new.2084/vendor.tar.zst differ: char 7, line 1
