commit 554eb63d3967fb88a38452114265d8a1bd5fee04
Author: Thibaut Cuvelier <[email protected]>
Date: Fri Feb 7 00:52:18 2025 +0100
InsetERT: recognise whenever the contents looks sufficiently like XML to
attempt outputting it as raw XML.
This behaviour is closer to what LyX 2.3 and previous handled ERTs for
DocBook: they were always output as SGML. This no longer makes sense for the
new DocBook support (you can export basically any document to DocBook and have
reasonable output, while previously you had to use a DocBook template), but
this heuristic should capture most previous use cases. (If the ERT is just XML:
output it. Otherwise, it's probably not meant to be output as-is -- LaTeX,
text, etc.)
---
src/insets/InsetERT.cpp | 41 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/src/insets/InsetERT.cpp b/src/insets/InsetERT.cpp
index baf3b023ae..bfececdbab 100644
--- a/src/insets/InsetERT.cpp
+++ b/src/insets/InsetERT.cpp
@@ -30,6 +30,7 @@
#include "support/TempFile.h"
#include "Encoding.h"
+#include <algorithm>
#include <sstream>
#include <regex>
#include <iostream>
@@ -129,6 +130,7 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const &
runparams) const
// Try to recognise some commands to have a nicer DocBook output.
bool output_as_comment = true;
+ docstring os_trimmed = trim(os.str());
// First step: some commands have a direct mapping to DocBook, mostly
because the mapping is simply text or
// an XML entity.
@@ -137,8 +139,6 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const &
runparams) const
// recognised should simply be put in comments: have a list of
elements that are either already recognised or are
// not yet recognised? Global transformations like \string should then
come first.)
{
- docstring os_trimmed = trim(os.str());
-
// Rewrite \"u to \"{u}.
static regex const regNoBraces(R"(^\\\W\w)");
if (regex_search(to_utf8(os_trimmed), regNoBraces)) {
@@ -190,6 +190,43 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const
& runparams) const
}
}
+ // Third step: maybe this is XML, after all.
+ // Reminder: < is <, > is >.
+ if (prefixIs(os_trimmed, from_ascii("<")) &&
+ suffixIs(os_trimmed, from_ascii(">"))) {
+ // To avoid false positives, ensure that the contents are only
full XML tags, like:
+ // `<revhistory>`. This means that, in some cases, the user
might expect this case
+ // to be triggered, but we decline to output it as raw XML to
avoid errors. For
+ // instance: ERT[<revhistory] ERT[>]. It's quite unlikely for
LaTeX code to have
+ // exactly the same number of < and >, but well-formed XML
always does. This check
+ // does not enforce that the full ERT contains a valid XML
excerpt: there might be
+ // a tag opening without a closing, like: ERT[<revhistory>]
ERT[</revhistory>].
+ auto count_substrings = [&os_trimmed](const docstring &
substring) -> int {
+ // Hypothesis: no overlapping sequence. This is
perfectly fine for this use case.
+ int occurrences = 0;
+ std::string::size_type pos = 0;
+ while ((pos = os_trimmed.find(substring, pos)) !=
std::string::npos) {
+ ++occurrences;
+ pos += substring.length();
+ }
+ return occurrences;
+ };
+ const int num_open_tags = count_substrings(from_ascii("<"));
+ const int num_close_tags = count_substrings(from_ascii(">"));
+
+ if (num_close_tags == num_open_tags) {
+ // Decide this ERT is close enough to well-formed XML:
unescape
+ // XML elements and output the string as-is (to avoid
that
+ // XMLStream escapes the characters again).
+ xs << XMLStream::ESCAPE_NONE
+ << subst(subst(subst(os.str(),
+ from_ascii("<"), from_ascii("<")),
+ from_ascii(">"), from_ascii(">")),
+ from_ascii("&"), from_ascii("&"));
+ output_as_comment = false;
+ }
+ }
+
// Otherwise, output the ERT as a comment with the appropriate escaping
if the command is not recognised.
if (output_as_comment) {
xs << XMLStream::ESCAPE_NONE << "<!-- ";
--
lyx-cvs mailing list
[email protected]
https://lists.lyx.org/mailman/listinfo/lyx-cvs