commit f70409b3b00cca400c01ea2a28b174e360b9024f
Author: Thibaut Cuvelier <[email protected]>
Date: Mon Nov 4 02:07:41 2024 +0100
MathStream: perform the conversion for MathML per-character for MathML Core
in case there is an active font.
"Per-character" is performed as a user might see it: you need to find
entities before doing the mapping!
---
src/mathed/InsetMathBoldSymbol.cpp | 14 ++-
src/mathed/InsetMathBox.cpp | 6 +-
src/mathed/InsetMathBrace.cpp | 4 +-
src/mathed/InsetMathChar.cpp | 4 +-
src/mathed/InsetMathSize.cpp | 4 +-
src/mathed/InsetMathSymbol.cpp | 9 +-
src/mathed/MathStream.cpp | 215 ++++++++++++++++++++++++++++++++++++-
src/mathed/MathStream.h | 42 ++++++++
8 files changed, 284 insertions(+), 14 deletions(-)
diff --git a/src/mathed/InsetMathBoldSymbol.cpp
b/src/mathed/InsetMathBoldSymbol.cpp
index cd78ff7a68..ea9be7edf8 100644
--- a/src/mathed/InsetMathBoldSymbol.cpp
+++ b/src/mathed/InsetMathBoldSymbol.cpp
@@ -110,9 +110,17 @@ void InsetMathBoldSymbol::write(TeXMathStream & os) const
void InsetMathBoldSymbol::mathmlize(MathMLStream & ms) const
{
- ms << MTagInline("mstyle", "mathvariant='bold'")
- << cell(0)
- << ETagInline("mstyle");
+ if (ms.version() == MathMLVersion::mathmlCore) {
+ // All three kinds have the same meaning (and are recognised in
+ // MathFontInfo::fromMacro).
+ MathFontInfo old_font =
ms.fontInfo().mergeWith(MathFontInfo::fromMacro(from_ascii("boldsymbol")));
+ ms << cell(0);
+ ms.fontInfo() = old_font;
+ } else {
+ ms << MTagInline("mstyle", "mathvariant='bold'")
+ << cell(0)
+ << ETagInline("mstyle");
+ }
}
diff --git a/src/mathed/InsetMathBox.cpp b/src/mathed/InsetMathBox.cpp
index 578370ffd1..17415adcbf 100644
--- a/src/mathed/InsetMathBox.cpp
+++ b/src/mathed/InsetMathBox.cpp
@@ -406,9 +406,9 @@ void InsetMathBoxed::infoize(odocstream & os) const
void InsetMathBoxed::mathmlize(MathMLStream & ms) const
{
- ms << MTag("mrow", "class='boxed'");
- ms << cell(0);
- ms << ETag("mrow");
+ ms << MTag("mrow", "class='boxed'")
+ << cell(0)
+ << ETag("mrow");
}
diff --git a/src/mathed/InsetMathBrace.cpp b/src/mathed/InsetMathBrace.cpp
index 4455c42a17..544bbf63ec 100644
--- a/src/mathed/InsetMathBrace.cpp
+++ b/src/mathed/InsetMathBrace.cpp
@@ -102,7 +102,9 @@ void InsetMathBrace::octave(OctaveStream & os) const
void InsetMathBrace::mathmlize(MathMLStream & ms) const
{
- ms << MTag("mrow") << cell(0) << ETag("mrow");
+ ms << MTag("mrow")
+ << cell(0)
+ << ETag("mrow");
}
diff --git a/src/mathed/InsetMathChar.cpp b/src/mathed/InsetMathChar.cpp
index 801cab60aa..4ff2e2a462 100644
--- a/src/mathed/InsetMathChar.cpp
+++ b/src/mathed/InsetMathChar.cpp
@@ -262,7 +262,7 @@ void InsetMathChar::mathmlize(MathMLStream & ms) const
if (ms.inText()) {
if (entity.empty())
- ms << char_;
+ ms << StartRespectFont() << char_ << StopRespectFont();
else
ms << from_ascii(entity);
return;
@@ -279,7 +279,7 @@ void InsetMathChar::mathmlize(MathMLStream & ms) const
(isAlphaASCII(char_) || Encodings::isMathAlpha(char_))
? "mi" : "mo";
ms << MTagInline(type, std::string(type) == "mo" ? "stretchy='false'" :
"")
- << char_type(char_)
+ << StartRespectFont() << char_type(char_) << StopRespectFont()
<< ETagInline(type);
}
diff --git a/src/mathed/InsetMathSize.cpp b/src/mathed/InsetMathSize.cpp
index 5055f2d73e..97308c752f 100644
--- a/src/mathed/InsetMathSize.cpp
+++ b/src/mathed/InsetMathSize.cpp
@@ -85,7 +85,9 @@ void InsetMathSize::mathmlize(MathMLStream & ms) const
stringstream attrs;
attrs << "displaystyle='" << (dispstyle ? "true" : "false")
<< "' scriptlevel='" << scriptlevel << "'";
- ms << MTag("mstyle", attrs.str()) << cell(0) << ETag("mstyle");
+ ms << MTag("mstyle", attrs.str())
+ << cell(0)
+ << ETag("mstyle");
}
diff --git a/src/mathed/InsetMathSymbol.cpp b/src/mathed/InsetMathSymbol.cpp
index 14e84fdfd2..69d6b03d98 100644
--- a/src/mathed/InsetMathSymbol.cpp
+++ b/src/mathed/InsetMathSymbol.cpp
@@ -161,11 +161,16 @@ void InsetMathSymbol::mathmlize(MathMLStream & ms) const
// FIXME We may need to do more interesting things
// with MathMLtype.
ms << MTagInline(sym_->MathMLtype());
- if (sym_->xmlname == "x")
+ if (sym_->xmlname == "x") {
// unknown so far
ms << name();
- else
+ } else if (strcmp(sym_->MathMLtype(), "mi") == 0) {
+ // If it's a character or a Greek letter (i.e. "mi"), map to a
font.
+ ms << StartRespectFont() << sym_->xmlname << StopRespectFont();
+ } else {
+ // Operators do not have font variants.
ms << sym_->xmlname;
+ }
ms << ETagInline(sym_->MathMLtype());
}
diff --git a/src/mathed/MathStream.cpp b/src/mathed/MathStream.cpp
index c6f78f72c1..ea1459e5d8 100644
--- a/src/mathed/MathStream.cpp
+++ b/src/mathed/MathStream.cpp
@@ -27,6 +27,8 @@
#include <cstring>
#include <FontInfo.h>
+#include "support/lstrings.h"
+
using namespace std;
namespace lyx {
@@ -69,7 +71,8 @@ MathFontInfo MathFontInfo::fromMacro(const docstring& tag)
font.shape_ = MATH_UP_SHAPE;
else if (tag == "frak" || tag == "mathfrak")
font.family_ = MATH_FRAKTUR_FAMILY;
- else if (tag == "mathbf" || tag == "textbf")
+ else if (tag == "mathbf" || tag == "textbf"
+ || tag == "boldsymbol" || tag == "bm" || tag == "hm")
font.series_ = MATH_BOLD_SERIES;
else if (tag == "mathbb" || tag == "mathbbm"
|| tag == "mathds")
@@ -193,6 +196,139 @@ std::string MathFontInfo::toHTMLSpanClass() const
}
+docstring MathFontInfo::convertCharacterToUnicodeEntityWithFont(const
docstring & c, bool in_text) const
+{
+ if (c.size() <= 1) {
+ return c;
+ }
+ // Otherwise, it's an entity, like 0x1d44e (as a hexadecimal number).
+ return from_ascii("&#") + convertCharacterToUnicodeWithFont(c, in_text)
+ from_ascii(";");
+}
+
+
+docstring MathFontInfo::convertCharacterToUnicodeWithFont(const docstring & c,
bool in_text) const
+{
+ MathVariantList const & mvl = mathedVariantList();
+
+ // If this character is unknown, exit early.
+ const auto it = mvl.find(support::ascii_lowercase(c));
+ if (it == mvl.end()) {
+ return c;
+ }
+
+ // Check for the best variant. Heuristically:
+ // - First check the font type: normal, script, fraktur, etc. This is
the
+ // most constraining factor.
+ // - Second, check for shape and series.
+ // If the variant for one factor does not exist, ignore it and continue
+ // the search. Hence, we store the copies of family, shape, and series.
+ UnicodeVariants const & variants = it->second;
+
+ MathFontFamily family = family_;
+ MathFontSeries series = series_;
+ MathFontShape shape = shape_;
+
+ if (family == MATH_INHERIT_FAMILY) {
+ family = MATH_NORMAL_FAMILY;
+ }
+ if (series == MATH_INHERIT_SERIES) {
+ series = MATH_MEDIUM_SERIES;
+ }
+ if (shape == MATH_INHERIT_SHAPE) {
+ shape = in_text ? MATH_UP_SHAPE : MATH_ITALIC_SHAPE;
+ }
+
+ if (family == MATH_MONOSPACE_FAMILY) {
+ if (!variants.monospace.empty()) return variants.monospace;
+ family = MATH_NORMAL_FAMILY;
+ }
+
+ if (family == MATH_DOUBLE_STRUCK_FAMILY) {
+ if (!variants.double_struck.empty()) return
variants.double_struck;
+ family = MATH_NORMAL_FAMILY;
+ }
+
+ if (family == MATH_FRAKTUR_FAMILY) {
+ if (series == MATH_BOLD_SERIES) {
+ if (!variants.bold_fraktur.empty()) return
variants.bold_fraktur;
+ series = MATH_MEDIUM_SERIES;
+ }
+
+ if (series == MATH_MEDIUM_SERIES) {
+ if (!variants.fraktur.empty()) return variants.fraktur;
+ }
+
+ family = MATH_NORMAL_FAMILY;
+ }
+
+ if (family == MATH_SCRIPT_FAMILY) {
+ if (series == MATH_BOLD_SERIES) {
+ if (!variants.bold_script.empty()) return
variants.bold_script;
+ series = MATH_MEDIUM_SERIES;
+ }
+
+ if (series == MATH_MEDIUM_SERIES) {
+ if (!variants.script.empty()) return variants.script;
+ }
+
+ family = MATH_NORMAL_FAMILY;
+ }
+
+ if (family == MATH_SANS_FAMILY) {
+ if (series == MATH_BOLD_SERIES) {
+ if (shape == MATH_UP_SHAPE) {
+ if (!variants.bold_sans.empty()) return
variants.bold_sans;
+ } else {
+ if (!variants.bold_italic_sans.empty()) return
variants.bold_italic_sans;
+ }
+ series = MATH_MEDIUM_SERIES;
+ }
+
+ if (series == MATH_MEDIUM_SERIES) {
+ if (shape == MATH_UP_SHAPE) {
+ if (!variants.sans.empty()) return
variants.sans;
+ } else {
+ if (!variants.italic_sans.empty()) return
variants.italic_sans;
+ }
+ }
+
+ family = MATH_NORMAL_FAMILY;
+ }
+
+ if (family != MATH_NORMAL_FAMILY) {
+ LYXERR(Debug::MATHED,
+ "Unexpected case in
MathFontInfo::convertCharacterToUnicodeWithFont"
+ <<"(c = " << to_ascii(c) << ", in_text = " <<
in_text << "), unrecognised family: "
+ << "family_ = " << family_ << ", series = " <<
series_ << ", shape = " << shape_);
+ // Continue processing to return a value that matches the other
constraints.
+ }
+
+ if (series == MATH_BOLD_SERIES) {
+ if (shape == MATH_UP_SHAPE) {
+ if (!variants.bold.empty()) return variants.bold;
+ } else {
+ if (!variants.bold_italic.empty()) return
variants.bold_italic;
+ }
+ series = MATH_MEDIUM_SERIES;
+ }
+
+ if (series == MATH_MEDIUM_SERIES) {
+ if (shape == MATH_UP_SHAPE) {
+ if (!variants.character.empty()) return
variants.character;
+ } else {
+ if (!variants.italic.empty()) return variants.italic;
+ }
+ }
+
+ // The previous cases should have matched, unless this code is not up
to date.
+ LYXERR(Debug::MATHED,
+ "Unexpected case in
MathFontInfo::convertCharacterToUnicodeWithFont"
+ <<"(c = " << c << ", in_text = " << in_text << "),
unrecognised series/shape: "
+ << "family_ = " << family_ << ", series = " << series_
<< ", shape = " << shape_);
+ return variants.character;
+}
+
+
NormalStream & operator<<(NormalStream & ns, MathAtom const & at)
{
at->normalize(ns);
@@ -515,7 +651,68 @@ MathMLStream & operator<<(MathMLStream & ms, MathData
const & ar)
MathMLStream & operator<<(MathMLStream & ms, docstring const & s)
{
ms.beforeText();
- ms.os_ << s;
+ if (!ms.respect_font_) {
+ // Ignore fonts for now. This is especially useful for tags.
+ ms.os_ << s;
+ } else {
+ // Only care about fonts if they are currently enabled.
+ if (ms.version() == MathMLVersion::mathmlCore) {
+ // New case: MathML uses Unicode characters to indicate
fonts.
+ // If possible, avoid doing the mapping: it involves
looking up a hash
+ // table and doing a lot of conditions *per character*
+ bool needs_no_mapping =
+ (ms.current_font_.family() ==
MathFontInfo::MathFontFamily::MATH_INHERIT_FAMILY ||
+ ms.current_font_.family() ==
MathFontInfo::MathFontFamily::MATH_NORMAL_FAMILY) &&
+ (ms.current_font_.series() ==
MathFontInfo::MathFontSeries::MATH_INHERIT_SERIES ||
+ ms.current_font_.series() ==
MathFontInfo::MathFontSeries::MATH_MEDIUM_SERIES) &&
+ (ms.current_font_.shape() ==
MathFontInfo::MathFontShape::MATH_INHERIT_SHAPE ||
+ (ms.in_mtext_ &&
ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_UP_SHAPE) ||
+ (!ms.in_mtext_ &&
ms.current_font_.shape() == MathFontInfo::MathFontShape::MATH_ITALIC_SHAPE));
+ if (needs_no_mapping) {
+ ms.os_ << s;
+ } else {
+ // Perform the conversion character per
character (which might
+ // mean consume a complete Greek entity!).
+ docstring buf;
+ bool within_entity = false;
+ for (const char_type c : s) {
+ if (!within_entity && c == '&') { //
New entity.
+ within_entity = true;
+ } else if (within_entity && c == '#') {
// Still new entity.
+ // Nothing to do:
unicode_alphanum_variants only has
+ // the code point, not the full
XML/HTML entity.
+ } else if (within_entity && c == ';') {
// End of entity.
+ if (buf.starts_with('x')) {
+ // An HTML entity is
typically α, but
+ //
unicode_alpha_num_variants has 0x3B1.
+ buf.insert(0,
from_ascii("0"));
+ }
+ ms.os_ <<
ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText());
+ buf.clear();
+ within_entity = false;
+ } else if (within_entity) { // Within
new entity.
+ buf += c;
+ } else {
+ buf = docstring(c, 1);
+ ms.os_ <<
ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText());
+ buf.clear();
+ }
+
+ if (!within_entity && !buf.empty()) {
+ lyxerr << "Assertion failed in
MathLMStream::operator<<(docstring): not reading an entity "
+ << "while the buffer is
not empty (" << buf << ")";
+ }
+ }
+ if (!buf.empty()) {
+ lyxerr << "Assertion failed in
MathLMStream::operator<<(docstring): the buffer is not empty (" << buf << ")";
+ ms.os_ <<
ms.current_font_.convertCharacterToUnicodeEntityWithFont(buf, ms.inText());
+ }
+ }
+ } else {
+ // Old case (MathML3): MathML uses mathvariant to
indicate fonts.
+ ms.os_ << s;
+ }
+ }
return ms;
}
@@ -606,6 +803,20 @@ MathMLStream & operator<<(MathMLStream & ms, CTag const &
t)
}
+MathMLStream & operator<<(MathMLStream & ms, StartRespectFont)
+{
+ ms.respect_font_ = true;
+ return ms;
+}
+
+
+MathMLStream & operator<<(MathMLStream & ms, StopRespectFont)
+{
+ ms.respect_font_ = false;
+ return ms;
+}
+
+
//////////////////////////////////////////////////////////////////////
diff --git a/src/mathed/MathStream.h b/src/mathed/MathStream.h
index d611ec8c7c..4ae9b7e2fc 100644
--- a/src/mathed/MathStream.h
+++ b/src/mathed/MathStream.h
@@ -88,6 +88,32 @@ public:
/// Transforms this font into a class attribute for the HTML span tag.
std::string toHTMLSpanClass() const;
+ /// Converts the character into the closest Unicode character that
encodes
+ /// this font. If there is only a partial mapping, parts of the mapping
are
+ /// applied. For instance, take the character C and a bold-italic font.
+ /// - If there is a bold-italic mapping for this character, it is returned.
+ /// - If there is only a bold mapping for this character, a bold character
+ /// is returned. This font encoding is the closest one to the font.
+ /// - If there are two mappings (one bold, one italic), one of them is
+ /// returned (arbitrary choice between the two).
+ /// - If there are no mappings, the original character is returned.
+ /// The mappings are defined in the global variable theMathVariantList.
+ ///
+ /// The character is supposed to be a single Latin letter (a-z, A-Z) or
+ /// digit (0-9) or the entity encoding a Greek character (0x3b1-0x3c9
+ /// for lower case, 0x3b1-0x3c9 for upper case), exactly like the
+ /// `unicode_alphanum_variants` file.
+ ///
+ /// If in_text, the default shape is up. If not in_text, the default
shape
+ /// is italic. This behaviour matches that of MathMLStream::in_text_.
+ [[nodiscard]]
+ docstring convertCharacterToUnicodeWithFont(const docstring & c, bool
in_text) const;
+ /// Converts the character into the closest Unicode character that
encodes
+ /// this font as an entity if the character is not ASCII.
+ /// Also see convertCharacterToUnicodeWithFont.
+ [[nodiscard]]
+ docstring convertCharacterToUnicodeEntityWithFont(const docstring & c,
bool in_text) const;
+
private:
MathFontFamily family_;
MathFontSeries series_;
@@ -438,6 +464,14 @@ public:
};
+/// Signalling elements for font handling. They do not output anything per se,
+/// they alter the state of the stream to either start or stop respecting
+/// fonts (i.e. output Unicode entities encoding the font, such as
+/// "Mathematical Italic Small A" d44e;).
+struct StartRespectFont{};
+struct StopRespectFont{};
+
+
/// Throw MathExportException to signal that the attempt to export
/// some math in the current format did not succeed. E.g., we can't
/// export xymatrix as MathML, so that will throw, and we'll fall back
@@ -503,6 +537,8 @@ private:
MathStyle font_math_style_;
/// Current font (which might be nested).
MathFontInfo current_font_;
+ /// whether the output shall respect the current font
+ bool respect_font_ = false;
///
friend class SetMode;
friend MathMLStream & operator<<(MathMLStream &, MathAtom const &);
@@ -513,6 +549,8 @@ private:
friend MathMLStream & operator<<(MathMLStream &, ETag const &);
friend MathMLStream & operator<<(MathMLStream &, ETagInline const &);
friend MathMLStream & operator<<(MathMLStream &, CTag const &);
+ friend MathMLStream & operator<<(MathMLStream &, StartRespectFont);
+ friend MathMLStream & operator<<(MathMLStream &, StopRespectFont);
};
///
@@ -537,6 +575,10 @@ MathMLStream & operator<<(MathMLStream &, ETag const &);
MathMLStream & operator<<(MathMLStream &, ETagInline const &);
///
MathMLStream & operator<<(MathMLStream &, CTag const &);
+/// Starts respecting fonts until meeting StopRespectFont.
+MathMLStream & operator<<(MathMLStream &, StartRespectFont);
+/// Stops respecting fonts.
+MathMLStream & operator<<(MathMLStream &, StopRespectFont);
/// A simpler version of ModeSpecifier, for MathML
--
lyx-cvs mailing list
[email protected]
https://lists.lyx.org/mailman/listinfo/lyx-cvs