This is an automated email from the ASF dual-hosted git repository.
iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 6daa4b05 Refactor advancing token to avoid duplication, avoid borrow
checker issues (#1618)
6daa4b05 is described below
commit 6daa4b059cde8b77b67a3699b174ef0f8edff350
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Dec 27 09:17:52 2024 -0500
Refactor advancing token to avoid duplication, avoid borrow checker issues
(#1618)
Co-authored-by: Ifeanyi Ubah <[email protected]>
---
src/parser/mod.rs | 102 +++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 71 insertions(+), 31 deletions(-)
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 2756ed6c..65991d32 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1315,7 +1315,9 @@ impl<'a> Parser<'a> {
let dialect = self.dialect;
- let (next_token, next_token_index) = self.next_token_ref_with_index();
+ self.advance_token();
+ let next_token_index = self.get_current_index();
+ let next_token = self.get_current_token();
let span = next_token.span;
let expr = match &next_token.token {
Token::Word(w) => {
@@ -2953,7 +2955,9 @@ impl<'a> Parser<'a> {
let dialect = self.dialect;
- let (tok, tok_index) = self.next_token_ref_with_index();
+ self.advance_token();
+ let tok = self.get_current_token();
+ let tok_index = self.get_current_index();
let span = tok.span;
let regular_binary_operator = match &tok.token {
Token::Spaceship => Some(BinaryOperator::Spaceship),
@@ -3033,7 +3037,8 @@ impl<'a> Parser<'a> {
// See
https://www.postgresql.org/docs/current/sql-createoperator.html
let mut idents = vec![];
loop {
- idents.push(self.next_token_ref().to_string());
+ self.advance_token();
+ idents.push(self.get_current_token().to_string());
if !self.consume_token(&Token::Period) {
break;
}
@@ -3480,6 +3485,8 @@ impl<'a> Parser<'a> {
/// Return the first non-whitespace token that has not yet been processed
/// or Token::EOF
+ ///
+ /// See [`Self::peek_token_ref`] to avoid the copy.
pub fn peek_token(&self) -> TokenWithSpan {
self.peek_nth_token(0)
}
@@ -3594,21 +3601,31 @@ impl<'a> Parser<'a> {
/// Advances to the next non-whitespace token and returns a copy.
///
- /// See [`Self::next_token_ref`] to avoid the copy.
+ /// Please use [`Self::advance_token`] and [`Self::get_current_token`] to
+ /// avoid the copy.
pub fn next_token(&mut self) -> TokenWithSpan {
- self.next_token_ref().clone()
+ self.advance_token();
+ self.get_current_token().clone()
}
- pub fn next_token_ref(&mut self) -> &TokenWithSpan {
- self.next_token_ref_with_index().0
+ /// Returns the index of the current token
+ ///
+ /// This can be used with APIs that expect an index, such as
+ /// [`Self::token_at`]
+ pub fn get_current_index(&self) -> usize {
+ self.index.saturating_sub(1)
}
- /// Return the first non-whitespace token that has not yet been processed
- /// and that tokens index and advances the tokens
+ /// Return the next unprocessed token, possibly whitespace.
+ pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
+ self.index += 1;
+ self.tokens.get(self.index - 1)
+ }
+
+ /// Advances the current token to the next non-whitespace token
///
- /// # Notes:
- /// OK to call repeatedly after reaching EOF.
- pub fn next_token_ref_with_index(&mut self) -> (&TokenWithSpan, usize) {
+ /// See [`Self::get_current_token`] to get the current token after
advancing
+ pub fn advance_token(&mut self) {
loop {
self.index += 1;
match self.tokens.get(self.index - 1) {
@@ -3616,25 +3633,38 @@ impl<'a> Parser<'a> {
token: Token::Whitespace(_),
span: _,
}) => continue,
- token => return (token.unwrap_or(&EOF_TOKEN), self.index - 1),
+ _ => break,
}
}
}
/// Returns a reference to the current token
- pub fn current_token(&self) -> &TokenWithSpan {
- self.tokens.get(self.index - 1).unwrap_or(&EOF_TOKEN)
+ ///
+ /// Does not advance the current token.
+ pub fn get_current_token(&self) -> &TokenWithSpan {
+ self.token_at(self.index.saturating_sub(1))
}
- /// Return the first unprocessed token, possibly whitespace.
- pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
- self.index += 1;
- self.tokens.get(self.index - 1)
+ /// Returns a reference to the previous token
+ ///
+ /// Does not advance the current token.
+ pub fn get_previous_token(&self) -> &TokenWithSpan {
+ self.token_at(self.index.saturating_sub(2))
}
- /// Push back the last one non-whitespace token. Must be called after
- /// `next_token()`, otherwise might panic. OK to call after
- /// `next_token()` indicates an EOF.
+ /// Returns a reference to the next token
+ ///
+ /// Does not advance the current token.
+ pub fn get_next_token(&self) -> &TokenWithSpan {
+ self.token_at(self.index)
+ }
+
+ /// Seek back the last one non-whitespace token.
+ ///
+ /// Must be called after `next_token()`, otherwise might panic. OK to call
+ /// after `next_token()` indicates an EOF.
+ ///
+ // TODO rename to backup_token and deprecate prev_token?
pub fn prev_token(&mut self) {
loop {
assert!(self.index > 0);
@@ -3680,22 +3710,30 @@ impl<'a> Parser<'a> {
#[must_use]
pub fn parse_keyword(&mut self, expected: Keyword) -> bool {
if self.peek_keyword(expected) {
- self.next_token_ref();
+ self.advance_token();
true
} else {
false
}
}
+ /// If the current token is the `expected` keyword, consume it and returns
+ ///
+ /// See [`Self::parse_keyword_token_ref`] to avoid the copy.
#[must_use]
pub fn parse_keyword_token(&mut self, expected: Keyword) ->
Option<TokenWithSpan> {
self.parse_keyword_token_ref(expected).cloned()
}
+ /// If the current token is the `expected` keyword, consume it and returns
a reference to the next token.
+ ///
#[must_use]
pub fn parse_keyword_token_ref(&mut self, expected: Keyword) ->
Option<&TokenWithSpan> {
match &self.peek_token_ref().token {
- Token::Word(w) if expected == w.keyword =>
Some(self.next_token_ref()),
+ Token::Word(w) if expected == w.keyword => {
+ self.advance_token();
+ Some(self.get_current_token())
+ }
_ => None,
}
}
@@ -3722,7 +3760,7 @@ impl<'a> Parser<'a> {
}
// consume all tokens
for _ in 0..(tokens.len() + 1) {
- self.next_token_ref();
+ self.advance_token();
}
true
}
@@ -3758,7 +3796,7 @@ impl<'a> Parser<'a> {
.iter()
.find(|keyword| **keyword == w.keyword)
.map(|keyword| {
- self.next_token_ref();
+ self.advance_token();
*keyword
})
}
@@ -3813,10 +3851,12 @@ impl<'a> Parser<'a> {
}
/// Consume the next token if it matches the expected token, otherwise
return false
+ ///
+ /// See [Self::advance_token] to consume the token unconditionally
#[must_use]
pub fn consume_token(&mut self, expected: &Token) -> bool {
if self.peek_token_ref() == expected {
- self.next_token_ref();
+ self.advance_token();
true
} else {
false
@@ -8338,9 +8378,9 @@ impl<'a> Parser<'a> {
&mut self,
) -> Result<(DataType, MatchedTrailingBracket), ParserError> {
let dialect = self.dialect;
- let (next_token, next_token_index) = self.next_token_ref_with_index();
- let _ = next_token; // release ref
- let next_token = self.current_token();
+ self.advance_token();
+ let next_token = self.get_current_token();
+ let next_token_index = self.get_current_index();
let mut trailing_bracket: MatchedTrailingBracket = false.into();
let mut data = match &next_token.token {
@@ -8866,7 +8906,7 @@ impl<'a> Parser<'a> {
Token::EOF | Token::Eq => break,
_ => {}
}
- self.next_token_ref();
+ self.advance_token();
}
Ok(idents)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]