I made a small mistake in the test suite which is fixed in this version
of the patch. Sorry about that.From 5699d163f8b0f4f963adba7a61127a4bf545958b Mon Sep 17 00:00:00 2001
From: Lukas Epple <em...@lukasepple.de>
Date: Mon, 21 Jul 2025 11:46:44 +0200
Subject: [PATCH 1/6] lisp/ox-html.el: Convert numeric to named character
references
* lisp/ox-html.el (org-html-special-string-regexps, org-html--tags,
org-html-format-headline-default-function, org-html-timestamp,
org-html-table-cell, org-html-verse-block): Replace numeric character
references with their named equivalent. This should make the code
clearer since the reader only needs to remember character names instead
of hexadecimal Unicode code points. The equivalency table can be
found in section 13.5 of the HTML standard:
https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references.
All used named character references were introduced in HTML 4.0 (or
earlier) which means that they should be supported in XHTML as well.
(org-html-checkbox-types): Change   to as
elsewhere. Unfortunately, ☐ and &x#2611; don't have a
corresponding named character reference in the HTML standard, but it
should be clear enough what they are supposed to represent from the
context.
* testing/lisp/test-ox-html.el (ox-html/checkbox-ascii): Adjust to
use of in `org-html-checkbox-types'. Since isn't part
of (vanilla) XML, we need to use `libxml-parse-html-region' which
changes the output a little bit: The surrounding html and body elements
are retained (?) as well as the newlines between the li elements.
---
lisp/ox-html.el | 20 ++++++++++----------
testing/lisp/test-ox-html.el | 24 ++++++++++++++++--------
2 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/lisp/ox-html.el b/lisp/ox-html.el
index 1fee9f05d..f60bda02b 100644
--- a/lisp/ox-html.el
+++ b/lisp/ox-html.el
@@ -233,10 +233,10 @@ For blocks that should contain headlines, use the HTML_CONTAINER
property on the headline itself.")
(defconst org-html-special-string-regexps
- '(("\\\\-" . "­") ; shy
- ("---\\([^-]\\)" . "—\\1") ; mdash
- ("--\\([^-]\\)" . "–\\1") ; ndash
- ("\\.\\.\\." . "…")) ; hellip
+ '(("\\\\-" . "­")
+ ("---\\([^-]\\)" . "—\\1")
+ ("--\\([^-]\\)" . "–\\1")
+ ("\\.\\.\\." . "…"))
"Regular expressions for special string conversion.")
(defvar org-html--id-attr-prefix "ID-"
@@ -1130,7 +1130,7 @@ org-info.js for your website."
((on . "☑") (off . "☐") (trans . "☐")))
(ascii .
((on . "<code>[X]</code>")
- (off . "<code>[ ]</code>")
+ (off . "<code>[ ]</code>")
(trans . "<code>[-]</code>")))
(html .
((on . "<input type='checkbox' checked='checked' />")
@@ -2346,7 +2346,7 @@ INFO is a plist containing export options."
(concat (plist-get info :html-tag-class-prefix)
(org-html-fix-class-name tag))
tag))
- tags " "))))
+ tags " "))))
;;;; Src Code
@@ -2872,7 +2872,7 @@ description of TODO, PRIORITY, TEXT, TAGS, and INFO arguments."
(concat todo (and todo " ")
priority (and priority " ")
text
- (and tags "   ") tags)))
+ (and tags "  ") tags)))
(defun org-html--container (headline info)
"Return HTML container name for HEADLINE as a string.
@@ -3763,7 +3763,7 @@ channel."
" align=\"%s\"" " class=\"org-%s\"")
(org-export-table-cell-alignment table-cell info)))))
(when (or (not contents) (string= "" (org-trim contents)))
- (setq contents " "))
+ (setq contents " "))
(cond
((and (org-export-table-has-header-p table info)
(= 1 (org-export-table-row-group table-row info)))
@@ -3942,7 +3942,7 @@ information."
:post-blank 0))
(value (org-html-plain-text (org-timestamp-translate timestamp-no-blank) info)))
(format "<span class=\"timestamp-wrapper\"><span class=\"timestamp\">%s</span></span>"
- (replace-regexp-in-string "--" "–" value))))
+ (replace-regexp-in-string "--" "–" value))))
;;;; Underline
@@ -3972,7 +3972,7 @@ contextual information."
(format "<p class=\"verse\">\n%s</p>"
;; Replace leading white spaces with non-breaking spaces.
(replace-regexp-in-string
- "^[ \t]+" (lambda (m) (org-html--make-string (length m) " "))
+ "^[ \t]+" (lambda (m) (org-html--make-string (length m) " "))
;; Replace each newline character with line break. Also
;; remove any trailing "br" close-tag so as to avoid
;; duplicates.
diff --git a/testing/lisp/test-ox-html.el b/testing/lisp/test-ox-html.el
index c02d47fea..ec8a7b18f 100644
--- a/testing/lisp/test-ox-html.el
+++ b/testing/lisp/test-ox-html.el
@@ -822,13 +822,21 @@ $x$"
(skip-unless (libxml-available-p))
(should
(equal
- `(ul ((class . "org-ul"))
- (li ((class . "off"))
- (code nil ,(format "[%c]" (char-from-name "NO-BREAK SPACE"))) " not yet")
- (li ((class . "on"))
- (code nil "[X]") " I am done")
- (li ((class . "trans"))
- (code nil "[-]") " unclear"))
+ `(html nil
+ (body nil
+ (ul ((class . "org-ul"))
+ (li ((class . "off"))
+ (code nil ,(format "[%c]" (char-from-name "NO-BREAK SPACE"))) " not yet")
+ "
+"
+ (li ((class . "on"))
+ (code nil "[X]") " I am done")
+ "
+"
+ (li ((class . "trans"))
+ (code nil "[-]") " unclear")
+ "
+")))
(org-test-with-temp-text "
- [ ] not yet
- [X] I am done
@@ -839,7 +847,7 @@ $x$"
(org-export-to-buffer 'html export-buffer
nil nil nil t nil)
(with-current-buffer export-buffer
- (libxml-parse-xml-region (point-min) (point-max))))))))
+ (libxml-parse-html-region (point-min) (point-max))))))))
(ert-deftest ox-html/checkbox-html ()
"Test HTML checkbox rendering"
--
2.50.0