On Wed, Apr 12, 2017 at 03:36:10PM +0200, Jürgen Spitzmüller wrote:
> 
> I understand that there is no perfect method. However, if we can catch
> a significant set of cases without too much effort, we should probably
> do that.

If we agree on this, with the attached patch no re-encoding is necessary.
Please, try it. The python chardet module is required.

-- 
Enrico
diff --git a/lib/configure.py b/lib/configure.py
index bdd825f..b16c695 100644
--- a/lib/configure.py
+++ b/lib/configure.py
@@ -9,7 +9,7 @@
 # Full author contact details are available in file CREDITS.
 
 from __future__ import print_function
-import glob, logging, os, re, shutil, subprocess, sys, stat, io
+import glob, logging, os, re, shutil, subprocess, sys, stat, io, chardet
 
 # set up logging
 logging.basicConfig(level = logging.DEBUG,
@@ -1352,7 +1352,7 @@ def checkLatexConfig(check_config, bool_docbook):
         if os.name == 'nt':
             enco = sys.getfilesystemencoding()
         else:
-            enco="utf8"
+            enco = chardet.detect(open(file, 'rb').read())['encoding']
         for line in io.open(file, encoding=enco).readlines():
             if not empty.match(line) and line[0] != '#':
                 if decline == "":
@@ -1374,6 +1374,8 @@ def checkLatexConfig(check_config, bool_docbook):
         if nodeclaration:
             continue
     testclasses.sort()
+    if os.name != 'nt':
+        enco = "utf-8"
     cl = io.open('chklayouts.tex', 'w', encoding=enco)
     for line in testclasses:
         cl.write(line + '\n')

Reply via email to