commit f35ed0c16836db4e8241770ad25925af9b94009e
Author: Georg Baum <[email protected]>
Date: Fri Jan 8 21:06:24 2016 +0100
Improve \AA and \aa tex2lyx import
The lib/unicodesymbols part is based on work by Günter Milde:
Both, \r{A} and \AA (rsp. \r{a} and \aa) are equivalent standard LICR macros
for Aring/aring as well as the deprecated "angstrom sign" character (212B).
However, with \AA for 212B and \r{A} for 00C5, tex2lyx converts \AA to the
deprecated "angstrom sign" which is missing in many fonts including the
Unicode version of Latin Modern.
I added the normalize_c() calls so that tex2lyx prefers the precomposed
forms
(these are better editable in LyX) and the deprecated flag.
diff --git a/lib/unicodesymbols b/lib/unicodesymbols
index ff53354..a830b06 100644
--- a/lib/unicodesymbols
+++ b/lib/unicodesymbols
@@ -48,6 +48,7 @@
# - notermination=both Do not terminate this textcommand and mathcommand (by
{} or space).
# - notermination=none Always terminate this textcommand and mathcommand (by
{} or space).
# - tipashortcut=<shortcut> Shortcut notation for TIPA
+# - deprecated Do not use this symbol for backwards conversion in LyX
and tex2lyx.
0x00a0 "~" "" "notermination=both" "~" "" # NO-BREAK
SPACE
0x00a1 "\\textexclamdown" "" "" # INVERTED EXCLAMATION MARK
@@ -86,7 +87,7 @@
0x00c2 "\\^{A}" "" "mathalpha" "\\hat{A}" # LATIN CAPITAL
LETTER A WITH CIRCUMFLEX
0x00c3 "\\~{A}" "" "mathalpha" "\\tilde{A}" # LATIN CAPITAL
LETTER A WITH TILDE
0x00c4 "\\\"{A}" "" "mathalpha" "\\ddot{A}" # LATIN CAPITAL
LETTER A WITH DIAERESIS
-0x00c5 "\\r{A}" "" "mathalpha" "\\mathring{A}" # LATIN
CAPITAL LETTER A WITH RING ABOVE
+0x00c5 "\\AA" "" "mathalpha" "\\mathring{A}" "" # LATIN
CAPITAL LETTER A WITH RING ABOVE
0x00c6 "\\AE" "" "" # LATIN CAPITAL LETTER AE
0x00c7 "\\c{C}" "" "mathalpha" "\\cedilla{C}"
"accents,cedilla" # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8 "\\`{E}" "" "mathalpha" "\\grave{E}" # LATIN CAPITAL
LETTER E WITH GRAVE
@@ -118,7 +119,7 @@
0x00e2 "\\^{a}" "" "mathalpha" "\\hat{a}" # LATIN SMALL
LETTER A WITH CIRCUMFLEX
0x00e3 "\\~{a}" "" "mathalpha" "\\tilde{a}" # LATIN SMALL
LETTER A WITH TILDE
0x00e4 "\\\"{a}" "" "mathalpha" "\\ddot{a}" # LATIN SMALL
LETTER A WITH DIAERESIS
-0x00e5 "\\r{a}" "" "mathalpha" "\\mathring{a}" # LATIN SMALL
LETTER A WITH RING ABOVE
+0x00e5 "\\aa" "" "mathalpha" "\\mathring{a}" "" # LATIN
SMALL LETTER A WITH RING ABOVE
0x00e6 "\\ae" "" "" # LATIN SMALL LETTER AE
0x00e7 "\\c{c}" "" "mathalpha" "\\cedilla{c}"
"accents,cedilla" # LATIN SMALL LETTER C WITH CEDILLA
0x00e8 "\\`{e}" "" "mathalpha" "\\grave{e}" # LATIN SMALL
LETTER E WITH GRAVE
@@ -1882,7 +1883,7 @@
0x2128 "" "" "" "\\mathfrak{Z}" "amssymb" #
BLACK-LETTER CAPITAL Z
#0x2129 "" "" "" "" "" # TURNED GREEK SMALL LETTER IOTA
0x212a "K" "" "notermination=text" "" "" # KELVIN SIGN
-0x212b "\\AA" "" "force=utf8" "" "" # ANGSTROM SIGN
+0x212b "\\AA" "" "force=utf8,deprecated" "" "" # ANGSTROM
SIGN
0x212c "" "" "" "\\mathscr{B}" "mathrsfs" # SCRIPT
CAPITAL B
0x212d "" "" "" "\\mathfrak{C}" "amssymb" #
BLACK-LETTER CAPITAL C
0x212e "\\textestimated" "textcomp" "" # ESTIMATED SYMBOL
diff --git a/src/Encoding.cpp b/src/Encoding.cpp
index 0d1d116..73edc84 100644
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@@ -317,6 +317,8 @@ char_type Encodings::fromLaTeXCommand(docstring const &
cmd, int cmdtype,
CharInfoMap::const_iterator const end = unicodesymbols.end();
CharInfoMap::const_iterator it = unicodesymbols.begin();
for (combining = false; it != end; ++it) {
+ if (it->second.deprecated())
+ continue;
docstring const math = it->second.mathcommand();
docstring const text = it->second.textcommand();
if ((cmdtype & MATH_CMD) && math == cmd) {
@@ -402,6 +404,8 @@ docstring Encodings::fromLaTeXCommand(docstring const &
cmd, int cmdtype,
size_t unicmd_size = 0;
char_type c = 0;
for (; it != uniend; ++it) {
+ if (it->second.deprecated())
+ continue;
docstring const math = mathmode ?
it->second.mathcommand()
: docstring();
docstring const text = textmode ?
it->second.textcommand()
@@ -722,6 +726,8 @@ void Encodings::read(FileName const & encfile, FileName
const & symbolsfile)
flags &= ~CharInfoMathNoTermination;
} else if (contains(flag, "tipashortcut=")) {
tipashortcut = split(flag, '=');
+ } else if (flag == "deprecated") {
+ flags |= CharInfoDeprecated;
} else {
lyxerr << "Ignoring unknown flag `" << flag
<< "' for symbol `0x"
diff --git a/src/Encoding.h b/src/Encoding.h
index f1513c8..aac632e 100644
--- a/src/Encoding.h
+++ b/src/Encoding.h
@@ -52,6 +52,8 @@ enum CharInfoFlags {
CharInfoMathNoTermination = 32,
///
CharInfoForceSelected = 64,
+ ///
+ CharInfoDeprecated = 128
};
@@ -86,6 +88,8 @@ public:
bool force() const { return flags_ & CharInfoForce ? true : false; }
/// Force the LaTeX command for some encodings?
bool forceselected() const { return flags_ & CharInfoForceSelected ?
true : false; }
+ /// Disable LaTeX command => char_type conversion for this deprecated
symbol?
+ bool deprecated() const { return flags_ & CharInfoDeprecated ? true :
false; }
/// TIPA shortcut
std::string const tipashortcut() const { return tipashortcut_; }
/// \c textcommand needs no termination (such as {} or space).
diff --git a/src/tex2lyx/test/test-insets.lyx.lyx
b/src/tex2lyx/test/test-insets.lyx.lyx
index ff3129b..e8a67b1 100644
--- a/src/tex2lyx/test/test-insets.lyx.lyx
+++ b/src/tex2lyx/test/test-insets.lyx.lyx
@@ -6907,29 +6907,7 @@ Other symbols
\end_layout
\begin_layout Standard
-All three should be converted to U+00C5: Å Å Å (not U+212B). All three should
be converted to U+00E5:
-\begin_inset ERT
-status collapsed
-
-\begin_layout Plain Layout
-
-\backslash
-aa
-\end_layout
-
-\end_inset
-
-
-\begin_inset ERT
-status collapsed
-
-\begin_layout Plain Layout
-{}
-\end_layout
-
-\end_inset
-
- å å.
+All three should be converted to U+00C5: Å Å Å (not U+212B). All three should
be converted to U+00E5: å å å.
\end_layout
\begin_layout Subsection
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp
index 73a8d69..ce330a2 100644
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -521,8 +521,8 @@ docstring convert_unicodesymbols(docstring s)
bool termination;
docstring rem;
set<string> req;
- docstring parsed = encodings.fromLaTeXCommand(s,
- Encodings::TEXT_CMD, termination, rem, &req);
+ docstring parsed = normalize_c(encodings.fromLaTeXCommand(s,
+ Encodings::TEXT_CMD, termination, rem, &req));
set<string>::const_iterator it = req.begin();
set<string>::const_iterator en = req.end();
for (; it != en; ++it)
@@ -4824,8 +4824,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags,
bool outer,
bool termination;
docstring rem;
set<string> req;
- docstring s =
encodings.fromLaTeXCommand(from_utf8(name),
- Encodings::TEXT_CMD, termination, rem,
&req);
+ docstring s =
normalize_c(encodings.fromLaTeXCommand(from_utf8(name),
+ Encodings::TEXT_CMD, termination, rem,
&req));
if (!s.empty()) {
context.check_layout(os);
os << to_utf8(s);