This is an automated email from the ASF dual-hosted git repository. github-bot pushed a commit to branch gh-readonly-queue/main/pr-2184-0924f3a9b28cf79e7ef49819dabc719523f9aa8b in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
commit 60abfecf66341f970894c03e696e862ef967e5a7 Author: Albert Skalt <[email protected]> AuthorDate: Fri Feb 6 21:18:12 2026 +0300 Add Tokenizer custom token mapper support (#2184) --- src/tokenizer.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index cc5a2aa1..852b7316 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -934,6 +934,16 @@ impl<'a> Tokenizer<'a> { pub fn tokenize_with_location_into_buf( &mut self, buf: &mut Vec<TokenWithSpan>, + ) -> Result<(), TokenizerError> { + self.tokenize_with_location_into_buf_with_mapper(buf, |token| token) + } + + /// Tokenize the statement and produce a vector of tokens, mapping each token + /// with provided `mapper` + pub fn tokenize_with_location_into_buf_with_mapper( + &mut self, + buf: &mut Vec<TokenWithSpan>, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { let mut state = State { peekable: self.query.chars().peekable(), @@ -952,10 +962,10 @@ impl<'a> Tokenizer<'a> { && comment.starts_with('!') => { // Re-tokenize the hints and add them to the buffer - self.tokenize_comment_hints(comment, span, buf)?; + self.tokenize_comment_hints(comment, span, buf, &mut mapper)?; } _ => { - buf.push(TokenWithSpan { token, span }); + buf.push(mapper(TokenWithSpan { token, span })); } } @@ -971,6 +981,7 @@ impl<'a> Tokenizer<'a> { comment: &str, span: Span, buf: &mut Vec<TokenWithSpan>, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { // Strip the leading '!' and any version digits (e.g., "50110") let hint_content = comment @@ -997,10 +1008,10 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? { let token_span = location.span_to(state.location()); - buf.push(TokenWithSpan { + buf.push(mapper(TokenWithSpan { token, span: token_span, - }); + })); location = state.location(); } @@ -2644,6 +2655,38 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_with_mapper() { + let sql = String::from("SELECT ?"); + let dialect = GenericDialect {}; + let mut param_num = 1; + + let mut tokens = vec![]; + Tokenizer::new(&dialect, &sql) + .tokenize_with_location_into_buf_with_mapper(&mut tokens, |mut token_span| { + token_span.token = match token_span.token { + Token::Placeholder(n) => Token::Placeholder(if n == "?" { + let ret = format!("${}", param_num); + param_num += 1; + ret + } else { + n + }), + token => token, + }; + token_span + }) + .unwrap(); + let actual = tokens.into_iter().map(|t| t.token).collect(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$1".to_string()), + ]; + + compare(expected, actual); + } + #[test] fn tokenize_clickhouse_double_equal() { let sql = String::from("SELECT foo=='1'"); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
