This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch 
gh-readonly-queue/main/pr-2184-0924f3a9b28cf79e7ef49819dabc719523f9aa8b
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git

commit 60abfecf66341f970894c03e696e862ef967e5a7
Author: Albert Skalt <[email protected]>
AuthorDate: Fri Feb 6 21:18:12 2026 +0300

    Add Tokenizer custom token mapper support (#2184)
---
 src/tokenizer.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index cc5a2aa1..852b7316 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -934,6 +934,16 @@ impl<'a> Tokenizer<'a> {
     pub fn tokenize_with_location_into_buf(
         &mut self,
         buf: &mut Vec<TokenWithSpan>,
+    ) -> Result<(), TokenizerError> {
+        self.tokenize_with_location_into_buf_with_mapper(buf, |token| token)
+    }
+
+    /// Tokenize the statement and produce a vector of tokens, mapping each 
token
+    /// with provided `mapper`
+    pub fn tokenize_with_location_into_buf_with_mapper(
+        &mut self,
+        buf: &mut Vec<TokenWithSpan>,
+        mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan,
     ) -> Result<(), TokenizerError> {
         let mut state = State {
             peekable: self.query.chars().peekable(),
@@ -952,10 +962,10 @@ impl<'a> Tokenizer<'a> {
                         && comment.starts_with('!') =>
                 {
                     // Re-tokenize the hints and add them to the buffer
-                    self.tokenize_comment_hints(comment, span, buf)?;
+                    self.tokenize_comment_hints(comment, span, buf, &mut 
mapper)?;
                 }
                 _ => {
-                    buf.push(TokenWithSpan { token, span });
+                    buf.push(mapper(TokenWithSpan { token, span }));
                 }
             }
 
@@ -971,6 +981,7 @@ impl<'a> Tokenizer<'a> {
         comment: &str,
         span: Span,
         buf: &mut Vec<TokenWithSpan>,
+        mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan,
     ) -> Result<(), TokenizerError> {
         // Strip the leading '!' and any version digits (e.g., "50110")
         let hint_content = comment
@@ -997,10 +1008,10 @@ impl<'a> Tokenizer<'a> {
         let mut location = state.location();
         while let Some(token) = inner.next_token(&mut state, 
buf.last().map(|t| &t.token))? {
             let token_span = location.span_to(state.location());
-            buf.push(TokenWithSpan {
+            buf.push(mapper(TokenWithSpan {
                 token,
                 span: token_span,
-            });
+            }));
             location = state.location();
         }
 
@@ -2644,6 +2655,38 @@ mod tests {
         compare(expected, tokens);
     }
 
+    #[test]
+    fn tokenize_with_mapper() {
+        let sql = String::from("SELECT ?");
+        let dialect = GenericDialect {};
+        let mut param_num = 1;
+
+        let mut tokens = vec![];
+        Tokenizer::new(&dialect, &sql)
+            .tokenize_with_location_into_buf_with_mapper(&mut tokens, |mut 
token_span| {
+                token_span.token = match token_span.token {
+                    Token::Placeholder(n) => Token::Placeholder(if n == "?" {
+                        let ret = format!("${}", param_num);
+                        param_num += 1;
+                        ret
+                    } else {
+                        n
+                    }),
+                    token => token,
+                };
+                token_span
+            })
+            .unwrap();
+        let actual = tokens.into_iter().map(|t| t.token).collect();
+        let expected = vec![
+            Token::make_keyword("SELECT"),
+            Token::Whitespace(Whitespace::Space),
+            Token::Placeholder("$1".to_string()),
+        ];
+
+        compare(expected, actual);
+    }
+
     #[test]
     fn tokenize_clickhouse_double_equal() {
         let sql = String::from("SELECT foo=='1'");


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to