(cqlsh) Support utf-8/cp65001 encoding on Windows patch by Paulo Motta; reviewed by Stefania Alborghetti for CASSANDRA-11030
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/d295c7c6 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/d295c7c6 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/d295c7c6 Branch: refs/heads/cassandra-3.0 Commit: d295c7c69886f63739792d60d876b012a408cc07 Parents: 6982aaa Author: Paulo Motta <pauloricard...@gmail.com> Authored: Tue Jan 19 13:10:59 2016 -0200 Committer: Aleksey Yeschenko <alek...@apache.org> Committed: Mon Feb 8 12:16:23 2016 +0000 ---------------------------------------------------------------------- CHANGES.txt | 1 + bin/cqlsh.py | 41 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/d295c7c6/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 12198e4..b0a5062 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.2.6 + * (cqlsh) Support utf-8/cp65001 encoding on Windows (CASSANDRA-11030) * Gossiper#isEnabled is not thread safe (CASSANDRA-11116) * Fix paging on DISTINCT queries repeats result when first row in partition changes (CASSANDRA-10010) Merged from 2.1: http://git-wip-us.apache.org/repos/asf/cassandra/blob/d295c7c6/bin/cqlsh.py ---------------------------------------------------------------------- diff --git a/bin/cqlsh.py b/bin/cqlsh.py index 17c42a6..630c92b 100644 --- a/bin/cqlsh.py +++ b/bin/cqlsh.py @@ -52,6 +52,9 @@ from uuid import UUID if sys.version_info[0] != 2 or sys.version_info[1] != 7: sys.exit("\nCQL Shell supports only Python 2.7\n") +UTF8 = 'utf-8' +CP65001 = 'cp65001' # Win utf-8 variant + description = "CQL Shell for Apache Cassandra" version = "5.0.1" @@ -101,6 +104,12 @@ elif webbrowser._tryorder[0] == 'xdg-open' and os.environ.get('XDG_DATA_DIRS', ' # is a ../lib dir, use bundled libs there preferentially. ZIPLIB_DIRS = [os.path.join(CASSANDRA_PATH, 'lib')] myplatform = platform.system() +is_win = myplatform == 'Windows' + +# Workaround for supporting CP65001 encoding on python < 3.3 (https://bugs.python.org/issue13216) +if is_win and sys.version_info < (3, 3): + codecs.register(lambda name: codecs.lookup(UTF8) if name == CP65001 else None) + if myplatform == 'Linux': ZIPLIB_DIRS.append('/usr/share/cassandra/lib') @@ -723,11 +732,15 @@ class Shell(cmd.Cmd): self.max_trace_wait = max_trace_wait self.session.max_trace_wait = max_trace_wait + + self.tty = tty if encoding is None: encoding = locale.getpreferredencoding() if encoding is None: - encoding = 'utf-8' + encoding = UTF8 self.encoding = encoding + self.check_windows_encoding() + self.output_codec = codecs.lookup(encoding) self.statement = StringIO() @@ -737,7 +750,7 @@ class Shell(cmd.Cmd): self.prompt = '' if stdin is None: stdin = sys.stdin - self.tty = tty + if tty: self.reset_prompt() self.report_connection() @@ -753,6 +766,19 @@ class Shell(cmd.Cmd): self.statement_error = False self.single_statement = single_statement + @property + def is_using_utf8(self): + # utf8 encodings from https://docs.python.org/{2,3}/library/codecs.html + return self.encoding.replace('-', '_').lower() in ['utf', 'utf_8', 'u8', 'utf8', CP65001] + + def check_windows_encoding(self): + if is_win and os.name == 'nt' and self.tty and \ + self.is_using_utf8 and sys.stdout.encoding != CP65001: + self.printerr("\nWARNING: console codepage must be set to cp65001 " + "to support {} encoding on Windows platforms.\n" + "If you experience encoding problems, change your console" + " codepage with 'chcp 65001' before starting cqlsh.\n".format(self.encoding)) + def refresh_schema_metadata_best_effort(self): try: self.conn.refresh_schema_metadata(5) # will throw exception if there is a schema mismatch @@ -1004,7 +1030,7 @@ class Shell(cmd.Cmd): try: import readline except ImportError: - if myplatform == 'Windows': + if is_win: print "WARNING: pyreadline dependency missing. Install to enable tab completion." pass else: @@ -1024,7 +1050,12 @@ class Shell(cmd.Cmd): def get_input_line(self, prompt=''): if self.tty: - self.lastcmd = raw_input(prompt).decode(self.encoding) + try: + self.lastcmd = raw_input(prompt).decode(self.encoding) + except UnicodeDecodeError: + self.lastcmd = '' + traceback.print_exc() + self.check_windows_encoding() line = self.lastcmd + '\n' else: self.lastcmd = self.stdin.readline() @@ -2082,7 +2113,7 @@ class Shell(cmd.Cmd): Clears the console. """ import subprocess - subprocess.call(['clear', 'cls'][myplatform == 'Windows'], shell=True) + subprocess.call(['clear', 'cls'][is_win], shell=True) do_cls = do_clear def do_debug(self, parsed):