commit 0b2fae66e32bb626611e7ce055c2cd8f41d6e151
Author: Juergen Spitzmueller <[email protected]>
Date: Sat Apr 28 13:31:29 2018 +0200
unicodesymbols: add general way to require a feature only for specific
encodings
A feature can now be required only for specific input or font encodings:
- <feature>=enc1;enc2... Require the feature <feature> only if the
character is used in one if the specified font
or input encodings.
- <feature>!=enc1;enc2... Require the feature <feature> only if the
character is used in a font or input encoding
that is not among the specified.
---
lib/unicodesymbols | 74 +++++++++++++++++++++++++---------------------
src/BufferEncodings.cpp | 5 ++-
src/Encoding.cpp | 7 +---
src/LaTeXFeatures.cpp | 8 ++--
src/Paragraph.cpp | 39 ++++++++++++++++++++++--
5 files changed, 85 insertions(+), 48 deletions(-)
diff --git a/lib/unicodesymbols b/lib/unicodesymbols
index 29fdb11..c4f2303 100644
--- a/lib/unicodesymbols
+++ b/lib/unicodesymbols
@@ -25,30 +25,36 @@
# syntax:
# ucs4 textcommand textpreamble flags mathcommand mathpreamble
-# textcommand and textpreamble are used if the symbol occurs in textmode.
-# mathcommand and mathpreamble are used if the symbol occurs in mathmode.
-# Both mathcommand and mathpreamble are optional.
-# textpreamble and mathpreamble can either be a feature known by the
LaTeXFeatures
-# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}).
-# Features may be combined using '|', in this case one of the alternatives is
-# chosen. The algorithm tries to satisfy as many requirements as possible.
-# Therefore it may depend on the whole document contents which feature is
chosen.
-# Known flags:
-# - combining This is a combining char that will get combined with a
base char
-# - force Always output replacement command
-# - force=enc1;enc2... Always output replacement command in the specified
encodings.
-# - force!=en1;en2... Always output replacement command in all but the
specified encodings.
-# Symbols are never forced in encodings with iconv name
-# UTF-8 and package none (currently only utf8-plain).
-# - mathalpha This character is considered as a math variable in
mathmode
-# - notermination=text Do not terminate this textcommand (by {} or space).
-# This is set by default if textcommand ends with }.
-# - notermination=math Do not terminate this mathcommand (by {} or space).
-# This is set by default if mathcommand ends with }.
-# - notermination=both Do not terminate this textcommand and mathcommand (by
{} or space).
-# - notermination=none Always terminate this textcommand and mathcommand (by
{} or space).
-# - tipashortcut=<shortcut> Shortcut notation for TIPA
-# - deprecated Do not use this symbol for backwards conversion in LyX
and tex2lyx.
+#
+# * textcommand and textpreamble are used if the symbol occurs in textmode.
+# * mathcommand and mathpreamble are used if the symbol occurs in mathmode.
+# * Both mathcommand and mathpreamble are optional.
+# * textpreamble and mathpreamble can either be a feature known by the
LaTeXFeatures
+# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}).
+# * Features may be combined using '|', in this case one of the alternatives is
+# chosen. The algorithm tries to satisfy as many requirements as possible.
+# Therefore it may depend on the whole document contents which feature is
chosen.
+# * A feature can be required only for specific input encodings or font
encodings:
+# - <feature>=enc1;enc2... Require the feature <feature> only if the
character is used in
+# one if the specified font or input encodings.
+# - <feature>!=enc1;enc2... Require the feature <feature> only if the
character is used in
+# a font or input encoding that is not among the
specified.
+# * Known flags:
+# - combining This is a combining char that will get combined
with a base char
+# - force Always output replacement command
+# - force=enc1;enc2... Always output replacement command in the
specified encodings.
+# - force!=enc1;enc2... Always output replacement command in all but the
specified encodings.
+# Symbols are never forced in encodings with iconv
name
+# UTF-8 and package none (currently only
utf8-plain).
+# - mathalpha This character is considered as a math variable
in mathmode
+# - notermination=text Do not terminate this textcommand (by {} or
space).
+# This is set by default if textcommand ends with
}.
+# - notermination=math Do not terminate this mathcommand (by {} or
space).
+# This is set by default if mathcommand ends with
}.
+# - notermination=both Do not terminate this textcommand and
mathcommand (by {} or space).
+# - notermination=none Always terminate this textcommand and
mathcommand (by {} or space).
+# - tipashortcut=<shortcut> Shortcut notation for TIPA
+# - deprecated Do not use this symbol for backwards conversion
in LyX and tex2lyx.
#
# 2 Latin-1 Supplement
@@ -186,8 +192,8 @@
0x011f "\\u{g}" "" "mathalpha" "\\breve{g}" # LATIN SMALL
LETTER G WITH BREVE
0x0120 "\\.{G}" "" "mathalpha" "\\dot{G}" # LATIN CAPITAL
LETTER G WITH DOT ABOVE
0x0121 "\\.{g}" "" "mathalpha" "\\dot{g}" # LATIN SMALL
LETTER G WITH DOT ABOVE
-0x0122 "\\c{G}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA
(actually a comma accent, Latvian)
-0x0123 "\\c{g}" "textbaltic"
"mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN
SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian)
+0x0122 "\\c{G}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA
(actually a comma accent, Latvian)
+0x0123 "\\c{g}" "textbaltic!=L7x"
"mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN
SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian)
0x0124 "\\^{H}" "" "mathalpha" "\\hat{H}" # LATIN CAPITAL
LETTER H WITH CIRCUMFLEX
0x0125 "\\^{h}" "" "mathalpha" "\\hat{h}" # LATIN SMALL
LETTER H WITH CIRCUMFLEX
#0x0126 "" "" "" "" "" # LATIN CAPITAL LETTER H WITH
STROKE
@@ -206,13 +212,13 @@
0x0133 "ij" ""
"mathalpha,force=utf8x,notermination=both" "ij" "" # LATIN SMALL LIGATURE IJ
0x0134 "\\^{J}" "" "mathalpha" "\\hat{J}" # LATIN CAPITAL
LETTER J WITH CIRCUMFLEX
0x0135 "\\^{\\j}" "" "mathalpha,force=utf8" "\\hat{\\jmath}"
"" # LATIN SMALL LETTER J WITH CIRCUMFLEX
-0x0136 "\\c{K}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA
(actually a comma accent, Latvian)
-0x0137 "\\c{k}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA (actually
a comma accent, Latvian)
+0x0136 "\\c{K}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA
(actually a comma accent, Latvian)
+0x0137 "\\c{k}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA (actually
a comma accent, Latvian)
#0x0138 "" "" "" "" "" # LATIN SMALL LETTER KRA
0x0139 "\\'{L}" "" "mathalpha" "\\acute{L}" # LATIN CAPITAL
LETTER L WITH ACUTE
0x013a "\\'{l}" "" "mathalpha" "\\acute{l}" # LATIN SMALL
LETTER L WITH ACUTE
-0x013b "\\c{L}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA
(actually a comma accent, Latvian)
-0x013c "\\c{l}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually
a comma accent, Latvian)
+0x013b "\\c{L}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA
(actually a comma accent, Latvian)
+0x013c "\\c{l}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually
a comma accent, Latvian)
0x013d "\\v{L}" "" "mathalpha" "L\\mkern-7mu\\mathchar19" #
LATIN CAPITAL LETTER L WITH CARON
0x013e "\\v{l}" "" "mathalpha" "l\\mkern-5mu\\mathchar19" #
LATIN SMALL LETTER L WITH CARON
#0x013f "L\\textperiodcentered" "" "" "" "" # LATIN CAPITAL LETTER L WITH
MIDDLE DOT
@@ -221,8 +227,8 @@
0x0142 "\\l" "" "mathalpha,notermination=math"
"\\mathchar'40\\mkern-5mu l" # LATIN SMALL LETTER L WITH STROKE
0x0143 "\\'{N}" "" "mathalpha" "\\acute{N}" # LATIN CAPITAL
LETTER N WITH ACUTE
0x0144 "\\'{n}" "" "mathalpha" "\\acute{n}" # LATIN SMALL
LETTER N WITH ACUTE
-0x0145 "\\c{N}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA
(actually a comma accent, Latvian)
-0x0146 "\\c{n}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually
a comma accent, Latvian)
+0x0145 "\\c{N}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA
(actually a comma accent, Latvian)
+0x0146 "\\c{n}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually
a comma accent, Latvian)
0x0147 "\\v{N}" "" "mathalpha" "\\check{N}" # LATIN CAPITAL
LETTER N WITH CARON
0x0148 "\\v{n}" "" "mathalpha" "\\check{n}" # LATIN SMALL
LETTER N WITH CARON
0x0149 "'n" "" "force=utf8;utf8x,deprecated" "" "" #
LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
@@ -238,8 +244,8 @@
0x0153 "\\oe" "" "" "" "" # LATIN SMALL LIGATURE OE
0x0154 "\\'{R}" "" "mathalpha" "\\acute{R}" # LATIN CAPITAL
LETTER R WITH ACUTE
0x0155 "\\'{r}" "" "mathalpha" "\\acute{r}" # LATIN SMALL
LETTER R WITH ACUTE
-0x0156 "\\c{R}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA
(actually a comma accent, Latvian)
-0x0157 "\\c{r}" "textbaltic" "mathalpha,force=utf8"
"\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually
a comma accent, Latvian)
+0x0156 "\\c{R}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA
(actually a comma accent, Latvian)
+0x0157 "\\c{r}" "textbaltic!=L7x" "mathalpha,force=utf8"
"\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually
a comma accent, Latvian)
0x0158 "\\v{R}" "" "mathalpha" "\\check{R}" # LATIN CAPITAL
LETTER R WITH CARON
0x0159 "\\v{r}" "" "mathalpha" "\\check{r}" # LATIN SMALL
LETTER R WITH CARON
0x015a "\\'{S}" "" "mathalpha" "\\acute{S}" # LATIN CAPITAL
LETTER S WITH ACUTE
diff --git a/src/BufferEncodings.cpp b/src/BufferEncodings.cpp
index 2cd07f2..82fd3dc 100644
--- a/src/BufferEncodings.cpp
+++ b/src/BufferEncodings.cpp
@@ -93,7 +93,10 @@ void BufferEncodings::validate(char_type c, LaTeXFeatures &
features, bool for_m
while (!feats.empty()) {
string feat;
feats = split(feats, feat, ',');
- features.require(feat);
+ // context-dependent features
are handled
+ // in
Paragraph::Private::validate()
+ if (!contains(feat, '='))
+ features.require(feat);
}
} else
features.addPreambleSnippet(from_utf8(textpreamble));
diff --git a/src/Encoding.cpp b/src/Encoding.cpp
index f6197af..026ad3c 100644
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@@ -589,11 +589,10 @@ bool Encodings::isKnownScriptChar(char_type const c,
string & preamble)
return false;
if (it->second.textpreamble() != "textgreek"
- && it->second.textpreamble() != "textcyrillic"
- && it->second.textpreamble() != "textbaltic")
+ && it->second.textpreamble() != "textcyrillic")
return false;
- if (preamble.empty() && it->second.textpreamble() != "textbaltic") {
+ if (preamble.empty()) {
preamble = it->second.textpreamble();
return true;
}
@@ -609,8 +608,6 @@ bool Encodings::needsScriptWrapper(string const & script,
string const & fontenc
return (fontenc != "T2A" && fontenc != "T2B"
&& fontenc != "T2C" && fontenc != "X2");
}
- if (script == "textbaltic")
- return (fontenc != "L7x");
return false;
}
diff --git a/src/LaTeXFeatures.cpp b/src/LaTeXFeatures.cpp
index 49b93f6..17d7ae4 100644
--- a/src/LaTeXFeatures.cpp
+++ b/src/LaTeXFeatures.cpp
@@ -1388,19 +1388,19 @@ TexString LaTeXFeatures::getMacros() const
// non-standard text accents:
if (mustProvide("textcommaabove") || mustProvide("textcommaaboveright")
||
- mustProvide("textcommabelow") || mustProvide("textbalticdefs"))
+ mustProvide("textcommabelow") || mustProvide("textbaltic"))
macros << lyxaccent_def;
- if (mustProvide("textcommabelow") || mustProvide("textbalticdefs"))
+ if (mustProvide("textcommabelow") || mustProvide("textbaltic"))
macros << textcommabelow_def << '\n';
- if (mustProvide("textcommaabove") || mustProvide("textbalticdefs"))
+ if (mustProvide("textcommaabove") || mustProvide("textbaltic"))
macros << textcommaabove_def << '\n';
if (mustProvide("textcommaaboveright"))
macros << textcommaaboveright_def << '\n';
- if (mustProvide("textbalticdefs"))
+ if (mustProvide("textbaltic"))
macros << textbaltic_def << '\n';
// split-level fractions
diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp
index dc41fb9..5fdfb15 100644
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -1553,21 +1553,52 @@ void Paragraph::Private::validate(LaTeXFeatures &
features) const
// then the contents
BufferParams const bp = features.runparams().is_child
? features.buffer().masterParams() : features.buffer().params();
- string bscript = "textbaltic";
for (pos_type i = 0; i < int(text_.size()) ; ++i) {
char_type c = text_[i];
+ CharInfo const & ci = Encodings::unicodeCharInfo(c);
if (c == 0x0022) {
if (features.runparams().isFullUnicode() &&
bp.useNonTeXFonts)
features.require("textquotedblp");
else if (bp.main_font_encoding() != "T1"
|| ((&owner_->getFontSettings(bp,
i))->language()->internalFontEncoding()))
features.require("textquotedbl");
- } else if (Encodings::isKnownScriptChar(c, bscript)){
+ } else if (ci.textfeature() && contains(ci.textpreamble(),
'=')) {
+ // features that depend on the font or input encoding
+ string feats = ci.textpreamble();
string fontenc = (&owner_->getFontSettings(bp,
i))->language()->fontenc(bp);
if (fontenc.empty())
fontenc = features.runparams().main_fontenc;
- if (Encodings::needsScriptWrapper("textbaltic",
fontenc))
- features.require("textbalticdefs");
+ while (!feats.empty()) {
+ string feat;
+ feats = split(feats, feat, ',');
+ if (contains(feat, "!=")) {
+ // a feature that is required except
for the spcified
+ // font or input encodings
+ string realfeature;
+ string const contexts =
ltrim(split(feat, realfeature, '!'), "=");
+ // multiple encodings are separated by
semicolon
+ vector<string> context =
getVectorFromString(contexts, ";");
+ // require feature if the context
matches neither current font
+ // nor input encoding
+ if (std::find(context.begin(),
context.end(), fontenc) == context.end()
+ && std::find(context.begin(),
context.end(),
+
features.runparams().encoding->name()) == context.end())
+ features.require(realfeature);
+ } else if (contains(feat, '=')) {
+ // a feature that is required only for
the spcified
+ // font or input encodings
+ string realfeature;
+ string const contexts = split(feat,
realfeature, '=');
+ // multiple encodings are separated by
semicolon
+ vector<string> context =
getVectorFromString(contexts, ";");
+ // require feature if the context
matches either current font
+ // or input encoding
+ if (std::find(context.begin(),
context.end(), fontenc) != context.end()
+ || std::find(context.begin(),
context.end(),
+
features.runparams().encoding->name()) != context.end())
+ features.require(realfeature);
+ }
+ }
} else if (!bp.use_dash_ligatures
&& (c == 0x2013 || c == 0x2014)
&& bp.useNonTeXFonts