On Wed, Apr 12, 2017 at 03:36:10PM +0200, Jürgen Spitzmüller wrote: > > I understand that there is no perfect method. However, if we can catch > a significant set of cases without too much effort, we should probably > do that.
If we agree on this, with the attached patch no re-encoding is necessary. Please, try it. The python chardet module is required. -- Enrico
diff --git a/lib/configure.py b/lib/configure.py index bdd825f..b16c695 100644 --- a/lib/configure.py +++ b/lib/configure.py @@ -9,7 +9,7 @@ # Full author contact details are available in file CREDITS. from __future__ import print_function -import glob, logging, os, re, shutil, subprocess, sys, stat, io +import glob, logging, os, re, shutil, subprocess, sys, stat, io, chardet # set up logging logging.basicConfig(level = logging.DEBUG, @@ -1352,7 +1352,7 @@ def checkLatexConfig(check_config, bool_docbook): if os.name == 'nt': enco = sys.getfilesystemencoding() else: - enco="utf8" + enco = chardet.detect(open(file, 'rb').read())['encoding'] for line in io.open(file, encoding=enco).readlines(): if not empty.match(line) and line[0] != '#': if decline == "": @@ -1374,6 +1374,8 @@ def checkLatexConfig(check_config, bool_docbook): if nodeclaration: continue testclasses.sort() + if os.name != 'nt': + enco = "utf-8" cl = io.open('chklayouts.tex', 'w', encoding=enco) for line in testclasses: cl.write(line + '\n')