(datafusion-sqlparser-rs) branch main updated: Refactor advancing token to avoid duplication, avoid borrow checker issues (#1618)

iffyio Fri, 27 Dec 2024 06:18:21 -0800

This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 6daa4b05 Refactor advancing token to avoid duplication, avoid borrow 
checker issues (#1618)
6daa4b05 is described below

commit 6daa4b059cde8b77b67a3699b174ef0f8edff350
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Dec 27 09:17:52 2024 -0500

    Refactor advancing token to avoid duplication, avoid borrow checker issues 
(#1618)
    
    Co-authored-by: Ifeanyi Ubah <[email protected]>
---
 src/parser/mod.rs | 102 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 71 insertions(+), 31 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 2756ed6c..65991d32 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1315,7 +1315,9 @@ impl<'a> Parser<'a> {
 
         let dialect = self.dialect;
 
-        let (next_token, next_token_index) = self.next_token_ref_with_index();
+        self.advance_token();
+        let next_token_index = self.get_current_index();
+        let next_token = self.get_current_token();
         let span = next_token.span;
         let expr = match &next_token.token {
             Token::Word(w) => {
@@ -2953,7 +2955,9 @@ impl<'a> Parser<'a> {
 
         let dialect = self.dialect;
 
-        let (tok, tok_index) = self.next_token_ref_with_index();
+        self.advance_token();
+        let tok = self.get_current_token();
+        let tok_index = self.get_current_index();
         let span = tok.span;
         let regular_binary_operator = match &tok.token {
             Token::Spaceship => Some(BinaryOperator::Spaceship),
@@ -3033,7 +3037,8 @@ impl<'a> Parser<'a> {
                     // See 
https://www.postgresql.org/docs/current/sql-createoperator.html
                     let mut idents = vec![];
                     loop {
-                        idents.push(self.next_token_ref().to_string());
+                        self.advance_token();
+                        idents.push(self.get_current_token().to_string());
                         if !self.consume_token(&Token::Period) {
                             break;
                         }
@@ -3480,6 +3485,8 @@ impl<'a> Parser<'a> {
 
     /// Return the first non-whitespace token that has not yet been processed
     /// or Token::EOF
+    ///
+    /// See [`Self::peek_token_ref`] to avoid the copy.
     pub fn peek_token(&self) -> TokenWithSpan {
         self.peek_nth_token(0)
     }
@@ -3594,21 +3601,31 @@ impl<'a> Parser<'a> {
 
     /// Advances to the next non-whitespace token and returns a copy.
     ///
-    /// See [`Self::next_token_ref`] to avoid the copy.
+    /// Please use [`Self::advance_token`] and [`Self::get_current_token`] to
+    /// avoid the copy.
     pub fn next_token(&mut self) -> TokenWithSpan {
-        self.next_token_ref().clone()
+        self.advance_token();
+        self.get_current_token().clone()
     }
 
-    pub fn next_token_ref(&mut self) -> &TokenWithSpan {
-        self.next_token_ref_with_index().0
+    /// Returns the index of the current token
+    ///
+    /// This can be used with APIs that expect an index, such as
+    /// [`Self::token_at`]
+    pub fn get_current_index(&self) -> usize {
+        self.index.saturating_sub(1)
     }
 
-    /// Return the first non-whitespace token that has not yet been processed
-    /// and that tokens index and advances the tokens
+    /// Return the next unprocessed token, possibly whitespace.
+    pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
+        self.index += 1;
+        self.tokens.get(self.index - 1)
+    }
+
+    /// Advances the current token to the next non-whitespace token
     ///
-    /// # Notes:
-    /// OK to call repeatedly after reaching EOF.
-    pub fn next_token_ref_with_index(&mut self) -> (&TokenWithSpan, usize) {
+    /// See [`Self::get_current_token`] to get the current token after 
advancing
+    pub fn advance_token(&mut self) {
         loop {
             self.index += 1;
             match self.tokens.get(self.index - 1) {
@@ -3616,25 +3633,38 @@ impl<'a> Parser<'a> {
                     token: Token::Whitespace(_),
                     span: _,
                 }) => continue,
-                token => return (token.unwrap_or(&EOF_TOKEN), self.index - 1),
+                _ => break,
             }
         }
     }
 
     /// Returns a reference to the current token
-    pub fn current_token(&self) -> &TokenWithSpan {
-        self.tokens.get(self.index - 1).unwrap_or(&EOF_TOKEN)
+    ///
+    /// Does not advance the current token.
+    pub fn get_current_token(&self) -> &TokenWithSpan {
+        self.token_at(self.index.saturating_sub(1))
     }
 
-    /// Return the first unprocessed token, possibly whitespace.
-    pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
-        self.index += 1;
-        self.tokens.get(self.index - 1)
+    /// Returns a reference to the previous token
+    ///
+    /// Does not advance the current token.
+    pub fn get_previous_token(&self) -> &TokenWithSpan {
+        self.token_at(self.index.saturating_sub(2))
     }
 
-    /// Push back the last one non-whitespace token. Must be called after
-    /// `next_token()`, otherwise might panic. OK to call after
-    /// `next_token()` indicates an EOF.
+    /// Returns a reference to the next token
+    ///
+    /// Does not advance the current token.
+    pub fn get_next_token(&self) -> &TokenWithSpan {
+        self.token_at(self.index)
+    }
+
+    /// Seek back the last one non-whitespace token.
+    ///
+    /// Must be called after `next_token()`, otherwise might panic. OK to call
+    /// after `next_token()` indicates an EOF.
+    ///
+    // TODO rename to backup_token and deprecate prev_token?
     pub fn prev_token(&mut self) {
         loop {
             assert!(self.index > 0);
@@ -3680,22 +3710,30 @@ impl<'a> Parser<'a> {
     #[must_use]
     pub fn parse_keyword(&mut self, expected: Keyword) -> bool {
         if self.peek_keyword(expected) {
-            self.next_token_ref();
+            self.advance_token();
             true
         } else {
             false
         }
     }
 
+    /// If the current token is the `expected` keyword, consume it and returns
+    ///
+    /// See [`Self::parse_keyword_token_ref`] to avoid the copy.
     #[must_use]
     pub fn parse_keyword_token(&mut self, expected: Keyword) -> 
Option<TokenWithSpan> {
         self.parse_keyword_token_ref(expected).cloned()
     }
 
+    /// If the current token is the `expected` keyword, consume it and returns 
a reference to the next token.
+    ///
     #[must_use]
     pub fn parse_keyword_token_ref(&mut self, expected: Keyword) -> 
Option<&TokenWithSpan> {
         match &self.peek_token_ref().token {
-            Token::Word(w) if expected == w.keyword => 
Some(self.next_token_ref()),
+            Token::Word(w) if expected == w.keyword => {
+                self.advance_token();
+                Some(self.get_current_token())
+            }
             _ => None,
         }
     }
@@ -3722,7 +3760,7 @@ impl<'a> Parser<'a> {
                 }
                 // consume all tokens
                 for _ in 0..(tokens.len() + 1) {
-                    self.next_token_ref();
+                    self.advance_token();
                 }
                 true
             }
@@ -3758,7 +3796,7 @@ impl<'a> Parser<'a> {
                     .iter()
                     .find(|keyword| **keyword == w.keyword)
                     .map(|keyword| {
-                        self.next_token_ref();
+                        self.advance_token();
                         *keyword
                     })
             }
@@ -3813,10 +3851,12 @@ impl<'a> Parser<'a> {
     }
 
     /// Consume the next token if it matches the expected token, otherwise 
return false
+    ///
+    /// See [Self::advance_token] to consume the token unconditionally
     #[must_use]
     pub fn consume_token(&mut self, expected: &Token) -> bool {
         if self.peek_token_ref() == expected {
-            self.next_token_ref();
+            self.advance_token();
             true
         } else {
             false
@@ -8338,9 +8378,9 @@ impl<'a> Parser<'a> {
         &mut self,
     ) -> Result<(DataType, MatchedTrailingBracket), ParserError> {
         let dialect = self.dialect;
-        let (next_token, next_token_index) = self.next_token_ref_with_index();
-        let _ = next_token; // release ref
-        let next_token = self.current_token();
+        self.advance_token();
+        let next_token = self.get_current_token();
+        let next_token_index = self.get_current_index();
 
         let mut trailing_bracket: MatchedTrailingBracket = false.into();
         let mut data = match &next_token.token {
@@ -8866,7 +8906,7 @@ impl<'a> Parser<'a> {
                 Token::EOF | Token::Eq => break,
                 _ => {}
             }
-            self.next_token_ref();
+            self.advance_token();
         }
         Ok(idents)
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-sqlparser-rs) branch main updated: Refactor advancing token to avoid duplication, avoid borrow checker issues (#1618)

Reply via email to