This bug may happen when export documents that include UTF8 characters, especially Chinese documents, as LaTeX format.
A document only contains a single Chinese word "你", which encoded as "#4F60" in TeXmacs, triggers this bug. The TeXmacs format is <TeXmacs|1.0.7.20> <style|generic> <\body> \<#4F60\> </body> <\initial> <\collection> <associate|font|sys-chinese> <associate|language|chinese> </collection> </initial> When export as LaTeX format, I get this: \documentclass{article} \usepackage{CJK} \usepackage[chinese]{babel} \begin{document} {\CYRGDSC}0 \end{document} Because "#4F60" clash with '("#4E6" "{\\\"\CYRO}")' in TeXmacs/langs/encoding/utf8tolatex.scm .(This explains why almost a quarter of Chinese characters triggers this bug) So, instead of converter_rep::match which does a partial match, I add converter_rep::match_all to do a full match, because the argument passing to it is a single utf8 code, returned from decode_from_utf8. Here is the patch(I'm new to C++,there should be a better solution): diff --git a/src/src/Data/String/converter.cpp b/src/src/Data/String/converter.cpp index 9fc3213..8f3c2d0 100644 --- a/src/src/Data/String/converter.cpp +++ b/src/src/Data/String/converter.cpp @@ -37,6 +37,13 @@ apply (converter c, string str) { } string +utf8_latex_apply(converter c, string str) { + c->output = string(); + c->match_all(str); + return flush(c); +} + +string flush (converter c) { string result = c->output; c->output = string(); @@ -98,6 +105,30 @@ converter_rep::match (string& str, int& index) { //cout << "]"; } +inline void +converter_rep::match_all (string& str) { + int forward = 0; + int last_match = -1; + string value(""); + hashtree<char,string> node = ht; + while (forward <= N(str)) { + if (node->contains (str[forward])) { + node = node(str[forward]); + //printf("->%x",str[forward]); + if (forward==N(str) && has_value(node)) { + last_match = forward; + value = node->label; + } + } + forward++; + } + if (last_match==-1) { + if (copy_unmatched) + output << str; + } + else output << value; +} + void converter_rep::load () { // to handle each case individually seems unelegant, but there is simply more @@ -261,7 +292,7 @@ convert_utf8_to_LaTeX (string input) { else { start = i; string hex_code = '#' * as_hexadecimal (decode_from_utf8 (input, i)); - r = apply (conv, hex_code); + r = utf8_latex_apply (conv, hex_code); if (r != hex_code) output << r; else { output << input(start, i); Modified src/src/Data/String/converter.hpp diff --git a/src/src/Data/String/converter.hpp b/src/src/Data/String/converter.hpp index d61cb2b..f5567b5 100644 --- a/src/src/Data/String/converter.hpp +++ b/src/src/Data/String/converter.hpp @@ -31,6 +31,7 @@ struct converter_rep: rep<converter> { string output, nil_string, from, to; bool copy_unmatched; void match (string& str, int& index); + void match_all (string& str); void load (); public: _______________________________________________ Texmacs-dev mailing list Texmacs-dev@gnu.org https://lists.gnu.org/mailman/listinfo/texmacs-dev