(cqlsh) Support utf-8/cp65001 encoding on Windows

patch by Paulo Motta; reviewed by Stefania Alborghetti for CASSANDRA-11030


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d295c7c6
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d295c7c6
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d295c7c6

Branch: refs/heads/cassandra-3.0
Commit: d295c7c69886f63739792d60d876b012a408cc07
Parents: 6982aaa
Author: Paulo Motta <pauloricard...@gmail.com>
Authored: Tue Jan 19 13:10:59 2016 -0200
Committer: Aleksey Yeschenko <alek...@apache.org>
Committed: Mon Feb 8 12:16:23 2016 +0000

----------------------------------------------------------------------
 CHANGES.txt  |  1 +
 bin/cqlsh.py | 41 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/d295c7c6/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 12198e4..b0a5062 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.2.6
+ * (cqlsh) Support utf-8/cp65001 encoding on Windows (CASSANDRA-11030)
  * Gossiper#isEnabled is not thread safe (CASSANDRA-11116)
  * Fix paging on DISTINCT queries repeats result when first row in partition 
changes (CASSANDRA-10010)
 Merged from 2.1:

http://git-wip-us.apache.org/repos/asf/cassandra/blob/d295c7c6/bin/cqlsh.py
----------------------------------------------------------------------
diff --git a/bin/cqlsh.py b/bin/cqlsh.py
index 17c42a6..630c92b 100644
--- a/bin/cqlsh.py
+++ b/bin/cqlsh.py
@@ -52,6 +52,9 @@ from uuid import UUID
 if sys.version_info[0] != 2 or sys.version_info[1] != 7:
     sys.exit("\nCQL Shell supports only Python 2.7\n")
 
+UTF8 = 'utf-8'
+CP65001 = 'cp65001'  # Win utf-8 variant
+
 description = "CQL Shell for Apache Cassandra"
 version = "5.0.1"
 
@@ -101,6 +104,12 @@ elif webbrowser._tryorder[0] == 'xdg-open' and 
os.environ.get('XDG_DATA_DIRS', '
 # is a ../lib dir, use bundled libs there preferentially.
 ZIPLIB_DIRS = [os.path.join(CASSANDRA_PATH, 'lib')]
 myplatform = platform.system()
+is_win = myplatform == 'Windows'
+
+# Workaround for supporting CP65001 encoding on python < 3.3 
(https://bugs.python.org/issue13216)
+if is_win and sys.version_info < (3, 3):
+    codecs.register(lambda name: codecs.lookup(UTF8) if name == CP65001 else 
None)
+
 if myplatform == 'Linux':
     ZIPLIB_DIRS.append('/usr/share/cassandra/lib')
 
@@ -723,11 +732,15 @@ class Shell(cmd.Cmd):
 
         self.max_trace_wait = max_trace_wait
         self.session.max_trace_wait = max_trace_wait
+
+        self.tty = tty
         if encoding is None:
             encoding = locale.getpreferredencoding()
             if encoding is None:
-                encoding = 'utf-8'
+                encoding = UTF8
         self.encoding = encoding
+        self.check_windows_encoding()
+
         self.output_codec = codecs.lookup(encoding)
 
         self.statement = StringIO()
@@ -737,7 +750,7 @@ class Shell(cmd.Cmd):
         self.prompt = ''
         if stdin is None:
             stdin = sys.stdin
-        self.tty = tty
+
         if tty:
             self.reset_prompt()
             self.report_connection()
@@ -753,6 +766,19 @@ class Shell(cmd.Cmd):
         self.statement_error = False
         self.single_statement = single_statement
 
+    @property
+    def is_using_utf8(self):
+        # utf8 encodings from https://docs.python.org/{2,3}/library/codecs.html
+        return self.encoding.replace('-', '_').lower() in ['utf', 'utf_8', 
'u8', 'utf8', CP65001]
+
+    def check_windows_encoding(self):
+        if is_win and os.name == 'nt' and self.tty and \
+           self.is_using_utf8 and sys.stdout.encoding != CP65001:
+            self.printerr("\nWARNING: console codepage must be set to cp65001 "
+                          "to support {} encoding on Windows platforms.\n"
+                          "If you experience encoding problems, change your 
console"
+                          " codepage with 'chcp 65001' before starting 
cqlsh.\n".format(self.encoding))
+
     def refresh_schema_metadata_best_effort(self):
         try:
             self.conn.refresh_schema_metadata(5)  # will throw exception if 
there is a schema mismatch
@@ -1004,7 +1030,7 @@ class Shell(cmd.Cmd):
             try:
                 import readline
             except ImportError:
-                if myplatform == 'Windows':
+                if is_win:
                     print "WARNING: pyreadline dependency missing.  Install to 
enable tab completion."
                 pass
             else:
@@ -1024,7 +1050,12 @@ class Shell(cmd.Cmd):
 
     def get_input_line(self, prompt=''):
         if self.tty:
-            self.lastcmd = raw_input(prompt).decode(self.encoding)
+            try:
+                self.lastcmd = raw_input(prompt).decode(self.encoding)
+            except UnicodeDecodeError:
+                self.lastcmd = ''
+                traceback.print_exc()
+                self.check_windows_encoding()
             line = self.lastcmd + '\n'
         else:
             self.lastcmd = self.stdin.readline()
@@ -2082,7 +2113,7 @@ class Shell(cmd.Cmd):
         Clears the console.
         """
         import subprocess
-        subprocess.call(['clear', 'cls'][myplatform == 'Windows'], shell=True)
+        subprocess.call(['clear', 'cls'][is_win], shell=True)
     do_cls = do_clear
 
     def do_debug(self, parsed):

Reply via email to