commit d263ca052948061bd3e38377197052805075cabb
Author: Juergen Spitzmueller <sp...@lyx.org>
Date:   Sun Mar 11 18:04:23 2018 +0100

    tex2lyx: update quote handling
    
    * Consider new quote styles
    * Consider changed quote styles
    * Try to be a bit smarter with ambiguous quotation marks
    
    (cherry picked from commit 8184f08f4af6efea6d1499e3f8c8d3c20ebb1b97)
---
 src/tex2lyx/Preamble.cpp                   |  108 +++++++++++++++---------
 src/tex2lyx/Preamble.h                     |    2 +
 src/tex2lyx/TODO.txt                       |   17 ----
 src/tex2lyx/test/CJK.lyx.lyx               |    2 +-
 src/tex2lyx/test/CJKutf8.lyx.lyx           |    2 +-
 src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx |    2 +-
 src/tex2lyx/text.cpp                       |  124 ++++++++++++++++++++++------
 status.23x                                 |    6 ++
 8 files changed, 178 insertions(+), 85 deletions(-)

diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp
index 1b9d509..26824dd 100644
--- a/src/tex2lyx/Preamble.cpp
+++ b/src/tex2lyx/Preamble.cpp
@@ -88,35 +88,51 @@ const char * const known_coded_languages[] = {"french", 
"afrikaans", "albanian",
 "vietnamese", "welsh",
 0};
 
+/// languages with british quotes (.lyx names)
+const char * const known_british_quotes_languages[] = {"british", "welsh", 0};
+
+/// languages with cjk quotes (.lyx names)
+const char * const known_cjk_quotes_languages[] = {"chinese-traditional",
+"japanese", "japanese-cjk", 0};
+
+/// languages with cjk-angle quotes (.lyx names)
+const char * const known_cjkangle_quotes_languages[] = {"korean", 0};
+
 /// languages with danish quotes (.lyx names)
 const char * const known_danish_quotes_languages[] = {"danish", 0};
 
 /// languages with english quotes (.lyx names)
 const char * const known_english_quotes_languages[] = {"american", 
"australian",
 "bahasa", "bahasam", "brazilian", "canadian", "chinese-simplified", "english",
-"esperanto", "hebrew", "irish", "korean", "newzealand", "portuguese", 
"scottish",
-"thai", 0};
+"esperanto", "farsi", "interlingua", "irish", "newzealand", "scottish",
+"thai", "turkish", "vietnamese", 0};
 
 /// languages with french quotes (.lyx names)
-const char * const known_french_quotes_languages[] = {"albanian",
-"arabic_arabi", "arabic_arabtex", "asturian", "basque", "canadien", "catalan",
-"french", "friulan", "galician", "greek", "italian", "norsk", "nynorsk",
-"piedmontese", "polutonikogreek", "russian", "spanish", "spanish-mexico",
-"turkish", "turkmen", "ukrainian", "vietnamese", 0};
+const char * const known_french_quotes_languages[] = {"ancientgreek",
+"arabic_arabi", "arabic_arabtex", "asturian", "belarusian", "breton",
+"canadien", "catalan", "french", "friulan", "galician", "italian", "occitan",
+"piedmontese", "portuguese", "spanish", "spanish-mexico", 0};
 
 /// languages with german quotes (.lyx names)
 const char * const known_german_quotes_languages[] = {"austrian", "bulgarian",
-"czech", "german", "georgian", "icelandic", "lithuanian", "lowersorbian", 
"macedonian",
-"naustrian", "ngerman", "romansh", "serbian", "serbian-latin", "slovak", 
"slovene",
+"czech", "estonian", "georgian", "german", "icelandic", "latvian", 
"lithuanian",
+"lowersorbian", "macedonian", "naustrian", "ngerman", "romansh", "slovak", 
"slovene",
 "uppersorbian", 0};
 
 /// languages with polish quotes (.lyx names)
 const char * const known_polish_quotes_languages[] = {"afrikaans", "bosnian", 
"croatian",
-"dutch", "estonian", "magyar", "polish", "romanian", 0};
+"dutch", "magyar", "polish", "romanian", "serbian", "serbian-latin", 0};
+
+/// languages with russian quotes (.lyx names)
+const char * const known_russian_quotes_languages[] = {"russian", "ukrainian", 
0};
 
 /// languages with swedish quotes (.lyx names)
-const char * const known_swedish_quotes_languages[] = {"finnish",
-"swedish", 0};
+const char * const known_swedish_quotes_languages[] = {"finnish", "swedish", 
0};
+
+/// languages with swiss quotes (.lyx names)
+const char * const known_swiss_quotes_languages[] = {"albanian",
+"armenian", "basque", "german-ch", "german-ch-old",
+"norsk", "nynorsk", "turkmen", "ukrainian", "vietnamese", 0};
 
 /// known language packages from the times before babel
 const char * const known_old_language_packages[] = {"french", "frenchle",
@@ -1188,33 +1204,6 @@ void Preamble::handle_if(Parser & p, bool 
in_lyx_preamble)
 
 bool Preamble::writeLyXHeader(ostream & os, bool subdoc, string const & 
outfiledir)
 {
-       // set the quote language
-       // LyX only knows the following quotes languages:
-       // english, swedish, german, polish, french and danish
-       // (quotes for "japanese" and "chinese-traditional" are missing because
-       //  they wouldn't be useful: https://www.lyx.org/trac/ticket/6383)
-       // conversion list taken from
-       // https://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage
-       // (quotes for kazakh and interlingua are unknown)
-       // danish
-       if (is_known(h_language, known_danish_quotes_languages))
-               h_quotes_style = "danish";
-       // french
-       else if (is_known(h_language, known_french_quotes_languages))
-               h_quotes_style = "french";
-       // german
-       else if (is_known(h_language, known_german_quotes_languages))
-               h_quotes_style = "german";
-       // polish
-       else if (is_known(h_language, known_polish_quotes_languages))
-               h_quotes_style = "polish";
-       // swedish
-       else if (is_known(h_language, known_swedish_quotes_languages))
-               h_quotes_style = "swedish";
-       //english
-       else if (is_known(h_language, known_english_quotes_languages))
-               h_quotes_style = "english";
-
        if (contains(h_float_placement, "H"))
                registerAutomaticallyLoadedPackage("float");
        if (h_spacing != "single" && h_spacing != "default")
@@ -2239,6 +2228,47 @@ void Preamble::parse(Parser & p, string const & 
forceclass,
                                h_options += ',' + lyx2babel(default_language);
                }
        }
+
+       // Finally, set the quote style.
+       // LyX knows the following quotes styles:
+       // british, cjk, cjkangle, danish, english, french, german,
+       // polish, russian, swedish and swiss
+       // conversion list taken from
+       // https://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage
+       // (quotes for kazakh are unknown)
+       // british
+       if (is_known(h_language, known_british_quotes_languages))
+               h_quotes_style = "british";
+       // cjk
+       else if (is_known(h_language, known_cjk_quotes_languages))
+               h_quotes_style = "cjk";
+       // cjkangle
+       else if (is_known(h_language, known_cjkangle_quotes_languages))
+               h_quotes_style = "cjkangle";
+       // danish
+       else if (is_known(h_language, known_danish_quotes_languages))
+               h_quotes_style = "danish";
+       // french
+       else if (is_known(h_language, known_french_quotes_languages))
+               h_quotes_style = "french";
+       // german
+       else if (is_known(h_language, known_german_quotes_languages))
+               h_quotes_style = "german";
+       // polish
+       else if (is_known(h_language, known_polish_quotes_languages))
+               h_quotes_style = "polish";
+       // russian
+       else if (is_known(h_language, known_russian_quotes_languages))
+               h_quotes_style = "russian";
+       // swedish
+       else if (is_known(h_language, known_swedish_quotes_languages))
+               h_quotes_style = "swedish";
+       // swiss
+       else if (is_known(h_language, known_swiss_quotes_languages))
+               h_quotes_style = "swiss";
+       // english
+       else if (is_known(h_language, known_english_quotes_languages))
+               h_quotes_style = "english";
 }
 
 
diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h
index 9c70dca..0d3ff01 100644
--- a/src/tex2lyx/Preamble.h
+++ b/src/tex2lyx/Preamble.h
@@ -54,6 +54,8 @@ public:
        std::string docLanguage() const { return h_language; }
        /// The language of text which is not explicitly marked
        std::string defaultLanguage() const  { return default_language; }
+       /// The quotation marks style
+       std::string quotesStyle() const { return h_quotes_style; }
        ///
        bool usePolyglossia() const;
        ///
diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt
index b2ba6bb..9588691 100644
--- a/src/tex2lyx/TODO.txt
+++ b/src/tex2lyx/TODO.txt
@@ -75,23 +75,6 @@ Format LaTeX feature                        LyX feature
        \twocolumn[]{}{}                      Layout Twocolumn, InsetArgument
        \item[]<>                             InsetArgument
        \begin{enumerate|itemize|...}[]       InsetArgument
-520    Plain InsetQuote Style:
-       \textquotesingle                      \begin_inset Quotes qls, 
\begin_inset Quotes qrs
-       \textquotedbl                         \begin_inset Quotes qld, 
\begin_inset Quotes qrd
-521    New Quote Styles                      InsetQuote
-       - british                             \begin_inset Quotes b..
-       - swiss                               \begin_inset Quotes c..
-       - swedishg                            \begin_inset Quotes w..
-       - frenchin                            \begin_inset Quotes i..
-       - russian                             \begin_inset Quotes r..
-       Change default behavior               \begin_inset Quotes f..
-       of French quote style:
-       - Inner quotes are now ``...''.
-       - Former french style is now
-         called "swiss"
-523    CJK Quote Styles                      InsetQuote
-       - cjk (corner brackets)               \begin_inset Quotes j..
-       - cjkangle (angle brackets)           \begin_inset Quotes k..
 526   Plural and capitalized refstyles      InsetRef
 533   Multibib support
       \begin{btUnit}...\end{btUnit}        \multibib 
{none|part|chapter|section|subsection}
diff --git a/src/tex2lyx/test/CJK.lyx.lyx b/src/tex2lyx/test/CJK.lyx.lyx
index c31cba4..2ec5f9c 100644
--- a/src/tex2lyx/test/CJK.lyx.lyx
+++ b/src/tex2lyx/test/CJK.lyx.lyx
@@ -74,7 +74,7 @@
 \paragraph_indentation default
 \is_math_indent 0
 \math_numbering_side default
-\quotes_style english
+\quotes_style cjk
 \dynamic_quotes 0
 \papercolumns 1
 \papersides 1
diff --git a/src/tex2lyx/test/CJKutf8.lyx.lyx b/src/tex2lyx/test/CJKutf8.lyx.lyx
index 7eeec41..a3a6856 100644
--- a/src/tex2lyx/test/CJKutf8.lyx.lyx
+++ b/src/tex2lyx/test/CJKutf8.lyx.lyx
@@ -74,7 +74,7 @@
 \paragraph_indentation default
 \is_math_indent 0
 \math_numbering_side default
-\quotes_style english
+\quotes_style cjk
 \dynamic_quotes 0
 \papercolumns 1
 \papersides 1
diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx 
b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx
index fac9a95..6938846 100644
--- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx
+++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx
@@ -73,7 +73,7 @@
 \paragraph_indentation default
 \is_math_indent 0
 \math_numbering_side default
-\quotes_style english
+\quotes_style british
 \dynamic_quotes 0
 \papercolumns 1
 \papersides 1
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp
index b67f4a7..b81b866 100644
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -201,13 +201,14 @@ bool need_commentbib = false;
 char const * const known_quotes[] = { "dq", "guillemotleft", "flqq", "og",
 "guillemotright", "frqq", "fg", "glq", "glqq", "textquoteleft", "grq", "grqq",
 "quotedblbase", "textquotedblleft", "quotesinglbase", "textquoteright", "flq",
-"guilsinglleft", "frq", "guilsinglright", 0};
+"guilsinglleft", "frq", "guilsinglright", "textquotedblright", 
"textquotesingle",
+"textquotedbl", 0};
 
 /// the same as known_quotes with .lyx names
-char const * const known_coded_quotes[] = { "prd", "ard", "ard", "ard",
-"ald", "ald", "ald", "gls", "gld", "els", "els", "grd",
-"gld", "grd", "gls", "ers", "fls",
-"fls", "frs", "frs", 0};
+char const * const known_coded_quotes[] = { "qrd", "ard", "ard", "ard",
+"ald", "ald", "ald", "gls", "gld", "els", "els", "eld",
+"gld", "eld", "gls", "ers", "ars",
+"ars", "als", "als", "erd", "qrs", "qrd", 0};
 
 /// LaTeX names for font sizes
 char const * const known_sizes[] = { "tiny", "scriptsize", "footnotesize",
@@ -446,6 +447,78 @@ bool translate_len(string const & length, string & 
valstring, string & unit)
        return true;
 }
 
+
+/// If we have ambiguous quotation marks, make a smart guess
+/// based on main quote style
+string guessQuoteStyle(string in, bool const opening)
+{
+       string res = in;
+       if (prefixIs(in, "qr")) {// straight quote
+               if (!opening)
+                       res = subst(res, "r", "l");
+       } else if (in == "eld") {// ``
+               if (preamble.quotesStyle() == "german")
+                       res = "grd";
+               else if (preamble.quotesStyle() == "british")
+                       res = "bls";
+               else if (preamble.quotesStyle() == "french")
+                       res = "fls";
+               else if (preamble.quotesStyle() == "russian")
+                       res = "rrs";
+       } else if (in == "erd") {// ''
+               if (preamble.quotesStyle() == "polish")
+                       res = "prd";
+               else if (preamble.quotesStyle() == "british")
+                       res = "brs";
+               else if (preamble.quotesStyle() == "french")
+                       res = "frs";
+               else if (preamble.quotesStyle() == "swedish")
+                       res = opening ? "sld" : "srd";
+       } else if (in == "els") {// `
+               if (preamble.quotesStyle() == "german")
+                       res = "grs";
+               else if (preamble.quotesStyle() == "british")
+                       res = "bld";
+       } else if (in == "ers") {// '
+               if (preamble.quotesStyle() == "polish")
+                       res = "prs";
+               else if (preamble.quotesStyle() == "british")
+                       res = "brd";
+               else if (preamble.quotesStyle() == "swedish")
+                       res = opening ? "sls" : "srs";
+       } else if (in == "ard") {// >>
+               if (preamble.quotesStyle() == "swiss")
+                       res = "cld";
+               else if (preamble.quotesStyle() == "french")
+                       res = "fld";
+               else if (preamble.quotesStyle() == "russian")
+                       res = "rld";
+       } else if (in == "ald") {// <<
+               if (preamble.quotesStyle() == "swiss")
+                       res = "crd";
+               else if (preamble.quotesStyle() == "french")
+                       res = "frd";
+               else if (preamble.quotesStyle() == "russian")
+                       res = "rrd";
+       } else if (in == "ars") {// >
+               if (preamble.quotesStyle() == "swiss")
+                       res = "cls";
+       } else if (in == "als") {// <
+               if (preamble.quotesStyle() == "swiss")
+                       res = "crs";
+       } else if (in == "gld") {// ,,
+               if (preamble.quotesStyle() == "polish")
+                       res = "pld";
+               else if (preamble.quotesStyle() == "russian")
+                       res = "rls";
+       } else if (in == "gls") {// ,
+               if (preamble.quotesStyle() == "polish")
+                       res = "pls";
+       }
+       return res;
+}
+
+
 } // namespace
 
 
@@ -2628,14 +2701,17 @@ void parse_text(Parser & p, ostream & os, unsigned 
flags, bool outer,
                        continue;
                }
 
-               // Basic support for english quotes. This should be
-               // extended to other quotes, but is not so easy (a
-               // left english quote is the same as a right german
-               // quote...)
+               // Basic support for quotes. We try to disambiguate
+               // quotes from the context (e.g., a left english quote is
+               // the same as a right german quote...).
+               // Try to make a smart guess about the side
+               Token const prev = p.prev_token();
+               bool const opening = (prev.cat() != catSpace && 
prev.character() != 0
+                               && prev.character() != '\n' && prev.character() 
!= '~');
                if (t.asInput() == "`" && p.next_token().asInput() == "`") {
                        context.check_layout(os);
                        begin_inset(os, "Quotes ");
-                       os << "eld";
+                       os << guessQuoteStyle("eld", opening);
                        end_inset(os);
                        p.get_token();
                        skip_braces(p);
@@ -2644,7 +2720,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, 
bool outer,
                if (t.asInput() == "'" && p.next_token().asInput() == "'") {
                        context.check_layout(os);
                        begin_inset(os, "Quotes ");
-                       os << "erd";
+                       os << guessQuoteStyle("erd", opening);
                        end_inset(os);
                        p.get_token();
                        skip_braces(p);
@@ -2654,7 +2730,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, 
bool outer,
                if (t.asInput() == ">" && p.next_token().asInput() == ">") {
                        context.check_layout(os);
                        begin_inset(os, "Quotes ");
-                       os << "ald";
+                       os << guessQuoteStyle("ald", opening);
                        end_inset(os);
                        p.get_token();
                        skip_braces(p);
@@ -2675,9 +2751,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, 
bool outer,
                        if (!has_chunk) {
                                context.check_layout(os);
                                begin_inset(os, "Quotes ");
-                               //FIXME: this is a right danish quote;
-                               // why not a left french quote?
-                               os << "ard";
+                               os << guessQuoteStyle("ard", opening);
                                end_inset(os);
                                p.get_token();
                                skip_braces(p);
@@ -2803,8 +2877,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, 
bool outer,
                                   is_known(next.cs(), known_quotes) &&
                                   end.cat() == catEnd) {
                                // Something like {\textquoteright} (e.g.
-                               // from writer2latex). LyX writes
-                               // \textquoteright{}, so we may skip the
+                               // from writer2latex). We may skip the
                                // braces here for better readability.
                                parse_text_snippet(p, os, FLAG_BRACE_LAST,
                                                   outer, context);
@@ -4356,7 +4429,13 @@ void parse_text(Parser & p, ostream & os, unsigned 
flags, bool outer,
                if ((where = is_known(t.cs(), known_quotes))) {
                        context.check_layout(os);
                        begin_inset(os, "Quotes ");
-                       os << known_coded_quotes[where - known_quotes];
+                       string quotetype = known_coded_quotes[where - 
known_quotes];
+                       // try to make a smart guess about the side
+                       Token const prev = p.prev_token();
+                       bool const opening = (prev.cat() != catSpace && 
prev.character() != 0
+                                       && prev.character() != '\n' && 
prev.character() != '~');
+                       quotetype = guessQuoteStyle(quotetype, opening);
+                       os << quotetype;
                        end_inset(os);
                        // LyX adds {} after the quote, so we have to eat
                        // spaces here if there are any before a possible
@@ -4367,7 +4446,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, 
bool outer,
                }
 
                if ((where = is_known(t.cs(), known_sizes)) &&
-                        context.new_layout_allowed) {
+                       context.new_layout_allowed) {
                        context.check_layout(os);
                        TeXFont const oldFont = context.font;
                        context.font.size = known_coded_sizes[where - 
known_sizes];
@@ -4532,13 +4611,6 @@ void parse_text(Parser & p, ostream & os, unsigned 
flags, bool outer,
                        continue;
                }
 
-               if (t.cs() == "textquotedbl") {
-                       context.check_layout(os);
-                       os << "\"";
-                       skip_braces(p);
-                       continue;
-               }
-
                if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#"
                            || t.cs() == "$" || t.cs() == "{" || t.cs() == "}"
                            || t.cs() == "%" || t.cs() == "-") {
diff --git a/status.23x b/status.23x
index 4f90930..14989d5 100644
--- a/status.23x
+++ b/status.23x
@@ -27,6 +27,12 @@ What's new
 
 - Add support for \includeonly.
 
+- Update tex2lyx quotation marks detection:
+  * Consider new quote styles of LyX 2.3.
+  * Consider changed quote styles in LYX 2.3.
+  * Try to be a bit smarter with ambiguous quotation marks,
+    depending on the main quote style and the local context.
+
 - Add support for URW Classico, MinionPro and the new Libertine fonts.
 
 - Add support for the \t*{} (bottomtiebar) macro of TIPA.

Reply via email to