alamb commented on code in PR #1590:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1590#discussion_r1882396934


##########
build.rs:
##########
@@ -0,0 +1,101 @@
+use std::env;
+use std::fs::File;
+use std::io::{BufWriter, Write};
+use std::path::Path;
+
+fn read_keywords() -> Vec<(String, Option<String>)> {
+    let path = Path::new("src").join("keywords.txt");

Review Comment:
   this is quite clever



##########
src/keywords.rs:
##########
@@ -33,845 +33,13 @@ use serde::{Deserialize, Serialize};
 #[cfg(feature = "visitor")]
 use sqlparser_derive::{Visit, VisitMut};
 
-/// Defines a string constant for a single keyword: `kw_def!(SELECT);`
-/// expands to `pub const SELECT = "SELECT";`
-macro_rules! kw_def {
-    ($ident:ident = $string_keyword:expr) => {
-        pub const $ident: &'static str = $string_keyword;
-    };
-    ($ident:ident) => {
-        kw_def!($ident = stringify!($ident));
-    };
-}
-
-/// Expands to a list of `kw_def!()` invocations for each keyword
-/// and defines an ALL_KEYWORDS array of the defined constants.
-macro_rules! define_keywords {
-    ($(
-        $ident:ident $(= $string_keyword:expr)?
-    ),*) => {
-        #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
-        #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-        #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-        #[allow(non_camel_case_types)]
-        pub enum Keyword {
-            NoKeyword,
-            $($ident),*
-        }
+include!(concat!(env!("OUT_DIR"), "/keyword_gen.rs"));
 
-        pub const ALL_KEYWORDS_INDEX: &[Keyword] = &[
-            $(Keyword::$ident),*
-        ];
-
-        $(kw_def!($ident $(= $string_keyword)?);)*
-        pub const ALL_KEYWORDS: &[&str] = &[
-            $($ident),*
-        ];
-    };
+pub fn lookup(keyword: &str) -> Keyword {
+    let keyword = keyword.to_ascii_uppercase();

Review Comment:
   If we are going to optimize lookup I think we should also avoid this call to 
`to_ascii_upprcase` as it allocates / copies the string which I believe is 
significiantly more costly than some comparisons
   
   
https://doc.rust-lang.org/std/ascii/trait.AsciiExt.html#tymethod.to_ascii_uppercase
   https://doc.rust-lang.org/std/primitive.str.html#method.to_uppercase
   
   Since all keywords are ASCII, what if we did the binary search character by 
character
   
   Here is some brain 🤮 :
   
   ```
   let first_char = string.first().to_upper() 
   // find range of keywords within the
   let range = binary_search(keywords, first_char, 0);
   // if not unique location, proceed to second character, etc
   ...
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to