(datafusion-sqlparser-rs) branch main updated: Improve parsing speed by avoiding some clones in parse_identifier (#1624)

iffyio Sun, 29 Dec 2024 05:18:05 -0800

This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 3db1b443 Improve parsing speed by avoiding some clones in 
parse_identifier (#1624)
3db1b443 is described below

commit 3db1b4430f26ccb5077d4e28d3b6d8caa345cc49
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Dec 29 08:17:52 2024 -0500

    Improve parsing speed by avoiding some clones in parse_identifier (#1624)
---
 src/parser/mod.rs | 60 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index ab8379ad..ec91bf00 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -970,7 +970,7 @@ impl<'a> Parser<'a> {
             t @ (Token::Word(_) | Token::SingleQuotedString(_)) => {
                 if self.peek_token().token == Token::Period {
                     let mut id_parts: Vec<Ident> = vec![match t {
-                        Token::Word(w) => w.to_ident(next_token.span),
+                        Token::Word(w) => w.into_ident(next_token.span),
                         Token::SingleQuotedString(s) => 
Ident::with_quote('\'', s),
                         _ => unreachable!(), // We matched above
                     }];
@@ -978,7 +978,7 @@ impl<'a> Parser<'a> {
                     while self.consume_token(&Token::Period) {
                         let next_token = self.next_token();
                         match next_token.token {
-                            Token::Word(w) => 
id_parts.push(w.to_ident(next_token.span)),
+                            Token::Word(w) => 
id_parts.push(w.into_ident(next_token.span)),
                             Token::SingleQuotedString(s) => {
                                 // SQLite has single-quoted identifiers
                                 id_parts.push(Ident::with_quote('\'', s))
@@ -1108,7 +1108,7 @@ impl<'a> Parser<'a> {
             if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
                 {
                     Ok(Some(Expr::Function(Function {
-                        name: ObjectName(vec![w.to_ident(w_span)]),
+                        name: ObjectName(vec![w.clone().into_ident(w_span)]),
                         uses_odbc_syntax: false,
                         parameters: FunctionArguments::None,
                         args: FunctionArguments::None,
@@ -1123,7 +1123,7 @@ impl<'a> Parser<'a> {
             | Keyword::CURRENT_DATE
             | Keyword::LOCALTIME
             | Keyword::LOCALTIMESTAMP => {
-                
Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?))
+                
Ok(Some(self.parse_time_functions(ObjectName(vec![w.clone().into_ident(w_span)]))?))
             }
             Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
             Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
@@ -1148,7 +1148,7 @@ impl<'a> Parser<'a> {
             Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
             Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
             Keyword::POSITION if self.peek_token_ref().token == Token::LParen 
=> {
-                Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
+                
Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?))
             }
             Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)),
             Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)),
@@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
                     let query = self.parse_query()?;
                     self.expect_token(&Token::RParen)?;
                     Ok(Some(Expr::Function(Function {
-                        name: ObjectName(vec![w.to_ident(w_span)]),
+                        name: ObjectName(vec![w.clone().into_ident(w_span)]),
                         uses_odbc_syntax: false,
                         parameters: FunctionArguments::None,
                         args: FunctionArguments::Subquery(query),
@@ -1203,11 +1203,12 @@ impl<'a> Parser<'a> {
         w_span: Span,
     ) -> Result<Expr, ParserError> {
         match self.peek_token().token {
-            Token::Period => {
-                
self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![])
-            }
+            Token::Period => self.parse_compound_field_access(
+                Expr::Identifier(w.clone().into_ident(w_span)),
+                vec![],
+            ),
             Token::LParen => {
-                let id_parts = vec![w.to_ident(w_span)];
+                let id_parts = vec![w.clone().into_ident(w_span)];
                 if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
                     Ok(expr)
                 } else {
@@ -1220,7 +1221,7 @@ impl<'a> Parser<'a> {
             }
             Token::LBracket if dialect_of!(self is PostgreSqlDialect | 
DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) =>
             {
-                let ident = Expr::Identifier(w.to_ident(w_span));
+                let ident = Expr::Identifier(w.clone().into_ident(w_span));
                 let mut fields = vec![];
                 self.parse_multi_dim_subscript(&mut fields)?;
                 self.parse_compound_field_access(ident, fields)
@@ -1250,11 +1251,11 @@ impl<'a> Parser<'a> {
             Token::Arrow if self.dialect.supports_lambda_functions() => {
                 self.expect_token(&Token::Arrow)?;
                 Ok(Expr::Lambda(LambdaFunction {
-                    params: OneOrManyWithParens::One(w.to_ident(w_span)),
+                    params: 
OneOrManyWithParens::One(w.clone().into_ident(w_span)),
                     body: Box::new(self.parse_expr()?),
                 }))
             }
-            _ => Ok(Expr::Identifier(w.to_ident(w_span))),
+            _ => Ok(Expr::Identifier(w.clone().into_ident(w_span))),
         }
     }
 
@@ -1438,7 +1439,7 @@ impl<'a> Parser<'a> {
                 } else {
                     let tok = self.next_token();
                     let key = match tok.token {
-                        Token::Word(word) => word.to_ident(tok.span),
+                        Token::Word(word) => word.into_ident(tok.span),
                         _ => {
                             return parser_err!(
                                 format!("Expected identifier, found: {tok}"),
@@ -1490,7 +1491,7 @@ impl<'a> Parser<'a> {
             let next_token = self.next_token();
             match next_token.token {
                 Token::Word(w) => {
-                    let expr = Expr::Identifier(w.to_ident(next_token.span));
+                    let expr = Expr::Identifier(w.into_ident(next_token.span));
                     chain.push(AccessExpr::Dot(expr));
                     if self.peek_token().token == Token::LBracket {
                         if self.dialect.supports_partiql() {
@@ -1670,7 +1671,7 @@ impl<'a> Parser<'a> {
             while p.consume_token(&Token::Period) {
                 let tok = p.next_token();
                 let name = match tok.token {
-                    Token::Word(word) => word.to_ident(tok.span),
+                    Token::Word(word) => word.into_ident(tok.span),
                     _ => return p.expected("identifier", tok),
                 };
                 let func = match p.parse_function(ObjectName(vec![name]))? {
@@ -8242,7 +8243,7 @@ impl<'a> Parser<'a> {
                 // This because snowflake allows numbers as placeholders
                 let next_token = self.next_token();
                 let ident = match next_token.token {
-                    Token::Word(w) => Ok(w.to_ident(next_token.span)),
+                    Token::Word(w) => Ok(w.into_ident(next_token.span)),
                     Token::Number(w, false) => Ok(Ident::new(w)),
                     _ => self.expected("placeholder", next_token),
                 }?;
@@ -8753,7 +8754,7 @@ impl<'a> Parser<'a> {
             // (For example, in `FROM t1 JOIN` the `JOIN` will always be 
parsed as a keyword,
             // not an alias.)
             Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) 
=> {
-                Ok(Some(w.to_ident(next_token.span)))
+                Ok(Some(w.into_ident(next_token.span)))
             }
             // MSSQL supports single-quoted strings as aliases for columns
             // We accept them as table aliases too, although MSSQL does not.
@@ -8920,7 +8921,7 @@ impl<'a> Parser<'a> {
         loop {
             match &self.peek_token_ref().token {
                 Token::Word(w) => {
-                    idents.push(w.to_ident(self.peek_token_ref().span));
+                    
idents.push(w.clone().into_ident(self.peek_token_ref().span));
                 }
                 Token::EOF | Token::Eq => break,
                 _ => {}
@@ -8975,7 +8976,7 @@ impl<'a> Parser<'a> {
         // expecting at least one word for identifier
         let next_token = self.next_token();
         match next_token.token {
-            Token::Word(w) => idents.push(w.to_ident(next_token.span)),
+            Token::Word(w) => idents.push(w.into_ident(next_token.span)),
             Token::EOF => {
                 return Err(ParserError::ParserError(
                     "Empty input when parsing identifier".to_string(),
@@ -8995,7 +8996,7 @@ impl<'a> Parser<'a> {
                 Token::Period => {
                     let next_token = self.next_token();
                     match next_token.token {
-                        Token::Word(w) => 
idents.push(w.to_ident(next_token.span)),
+                        Token::Word(w) => 
idents.push(w.into_ident(next_token.span)),
                         Token::EOF => {
                             return Err(ParserError::ParserError(
                                 "Trailing period in identifier".to_string(),
@@ -9024,7 +9025,7 @@ impl<'a> Parser<'a> {
     pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
         let next_token = self.next_token();
         match next_token.token {
-            Token::Word(w) => Ok(w.to_ident(next_token.span)),
+            Token::Word(w) => Ok(w.into_ident(next_token.span)),
             Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
             Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)),
             _ => self.expected("identifier", next_token),
@@ -9044,9 +9045,10 @@ impl<'a> Parser<'a> {
     fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, 
bool), ParserError> {
         match self.peek_token().token {
             Token::Word(w) => {
+                let quote_style_is_none = w.quote_style.is_none();
                 let mut requires_whitespace = false;
-                let mut ident = w.to_ident(self.next_token().span);
-                if w.quote_style.is_none() {
+                let mut ident = w.into_ident(self.next_token().span);
+                if quote_style_is_none {
                     while matches!(self.peek_token_no_skip().token, 
Token::Minus) {
                         self.next_token();
                         ident.value.push('-');
@@ -13475,6 +13477,7 @@ impl<'a> Parser<'a> {
 }
 
 impl Word {
+    #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
     pub fn to_ident(&self, span: Span) -> Ident {
         Ident {
             value: self.value.clone(),
@@ -13482,6 +13485,15 @@ impl Word {
             span,
         }
     }
+
+    /// Convert this word into an [`Ident`] identifier
+    pub fn into_ident(self, span: Span) -> Ident {
+        Ident {
+            value: self.value,
+            quote_style: self.quote_style,
+            span,
+        }
+    }
 }
 
 #[cfg(test)]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-sqlparser-rs) branch main updated: Improve parsing speed by avoiding some clones in parse_identifier (#1624)

Reply via email to