This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new f39841b533 Add syntax highlight to datafusion-cli (#8918)
f39841b533 is described below

commit f39841b533b18d688926c483c38a13c78e84a94d
Author: Trung Dinh <[email protected]>
AuthorDate: Mon Jan 22 11:47:35 2024 -0800

    Add syntax highlight to datafusion-cli (#8918)
    
    * use tokenizer
    
    * clippy
    
    * license + lighter color
    
    * unit tests
---
 datafusion-cli/src/helper.rs      |  16 ++++-
 datafusion-cli/src/highlighter.rs | 126 ++++++++++++++++++++++++++++++++++++++
 datafusion-cli/src/lib.rs         |   2 +
 3 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 69d412db5a..0e146d5757 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -18,6 +18,8 @@
 //! Helper that helps with interactive editing, including multi-line parsing 
and validation,
 //! and auto-completion for file name during creating external table.
 
+use std::borrow::Cow;
+
 use datafusion::common::sql_err;
 use datafusion::error::DataFusionError;
 use datafusion::sql::parser::{DFParser, Statement};
@@ -36,9 +38,12 @@ use rustyline::Context;
 use rustyline::Helper;
 use rustyline::Result;
 
+use crate::highlighter::SyntaxHighlighter;
+
 pub struct CliHelper {
     completer: FilenameCompleter,
     dialect: String,
+    highlighter: SyntaxHighlighter,
 }
 
 impl CliHelper {
@@ -46,6 +51,7 @@ impl CliHelper {
         Self {
             completer: FilenameCompleter::new(),
             dialect: dialect.into(),
+            highlighter: SyntaxHighlighter::new(dialect),
         }
     }
 
@@ -100,7 +106,15 @@ impl Default for CliHelper {
     }
 }
 
-impl Highlighter for CliHelper {}
+impl Highlighter for CliHelper {
+    fn highlight<'l>(&self, line: &'l str, pos: usize) -> Cow<'l, str> {
+        self.highlighter.highlight(line, pos)
+    }
+
+    fn highlight_char(&self, line: &str, pos: usize) -> bool {
+        self.highlighter.highlight_char(line, pos)
+    }
+}
 
 impl Hinter for CliHelper {
     type Hint = String;
diff --git a/datafusion-cli/src/highlighter.rs 
b/datafusion-cli/src/highlighter.rs
new file mode 100644
index 0000000000..28732d5b97
--- /dev/null
+++ b/datafusion-cli/src/highlighter.rs
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The syntax highlighter.
+
+use std::{
+    borrow::Cow::{self, Borrowed},
+    fmt::Display,
+};
+
+use datafusion::sql::sqlparser::{
+    dialect::{dialect_from_str, Dialect, GenericDialect},
+    keywords::Keyword,
+    tokenizer::{Token, Tokenizer},
+};
+use rustyline::highlight::Highlighter;
+
+/// The syntax highlighter.
+pub struct SyntaxHighlighter {
+    dialect: Box<dyn Dialect>,
+}
+
+impl SyntaxHighlighter {
+    pub fn new(dialect: &str) -> Self {
+        let dialect = match dialect_from_str(dialect) {
+            Some(dialect) => dialect,
+            None => Box::new(GenericDialect {}),
+        };
+        Self { dialect }
+    }
+}
+
+impl Highlighter for SyntaxHighlighter {
+    fn highlight<'l>(&self, line: &'l str, _: usize) -> Cow<'l, str> {
+        let mut out_line = String::new();
+
+        // `with_unescape(false)` since we want to rebuild the original string.
+        let mut tokenizer =
+            Tokenizer::new(self.dialect.as_ref(), line).with_unescape(false);
+        let tokens = tokenizer.tokenize();
+        match tokens {
+            Ok(tokens) => {
+                for token in tokens.iter() {
+                    match token {
+                        Token::Word(w) if w.keyword != Keyword::NoKeyword => {
+                            out_line.push_str(&Color::red(token));
+                        }
+                        Token::SingleQuotedString(_) => {
+                            out_line.push_str(&Color::green(token));
+                        }
+                        other => out_line.push_str(&format!("{other}")),
+                    }
+                }
+                out_line.into()
+            }
+            Err(_) => Borrowed(line),
+        }
+    }
+
+    fn highlight_char(&self, line: &str, _: usize) -> bool {
+        !line.is_empty()
+    }
+}
+
+/// Convenient utility to return strings with [ANSI 
color](https://gist.github.com/JBlond/2fea43a3049b38287e5e9cefc87b2124).
+struct Color {}
+
+impl Color {
+    fn green(s: impl Display) -> String {
+        format!("\x1b[92m{s}\x1b[0m")
+    }
+
+    fn red(s: impl Display) -> String {
+        format!("\x1b[91m{s}\x1b[0m")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SyntaxHighlighter;
+    use rustyline::highlight::Highlighter;
+
+    #[test]
+    fn highlighter_valid() {
+        let s = "SElect col_a from tab_1;";
+        let highlighter = SyntaxHighlighter::new("generic");
+        let out = highlighter.highlight(s, s.len());
+        assert_eq!(
+            "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1;",
+            out
+        );
+    }
+
+    #[test]
+    fn highlighter_valid_with_new_line() {
+        let s = "SElect col_a from tab_1\n WHERE col_b = 'なにか';";
+        let highlighter = SyntaxHighlighter::new("generic");
+        let out = highlighter.highlight(s, s.len());
+        assert_eq!(
+            "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1\n 
\u{1b}[91mWHERE\u{1b}[0m col_b = \u{1b}[92m'なにか'\u{1b}[0m;",
+            out
+        );
+    }
+
+    #[test]
+    fn highlighter_invalid() {
+        let s = "SElect col_a from tab_1 WHERE col_b = ';";
+        let highlighter = SyntaxHighlighter::new("generic");
+        let out = highlighter.highlight(s, s.len());
+        assert_eq!("SElect col_a from tab_1 WHERE col_b = ';", out);
+    }
+}
diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs
index 7eb3cb51c1..61f9eae7dd 100644
--- a/datafusion-cli/src/lib.rs
+++ b/datafusion-cli/src/lib.rs
@@ -26,3 +26,5 @@ pub mod helper;
 pub mod object_storage;
 pub mod print_format;
 pub mod print_options;
+
+mod highlighter;

Reply via email to