MohamedAbdeen21 commented on code in PR #1835: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1835#discussion_r2072419795
########## src/tokenizer.rs: ########## @@ -1281,20 +1262,91 @@ impl<'a> Tokenizer<'a> { return Ok(Some(Token::make_word(s.as_str(), None))); } } else if prev_token == Some(&Token::Period) { - // If the previous token was a period, thus not belonging to a number, - // the value we have is part of an identifier. + // Handle as word if it follows a period return Ok(Some(Token::make_word(s.as_str(), None))); } } + // Handle "L" suffix for long numbers let long = if chars.peek() == Some(&'L') { chars.next(); true } else { false }; + + // Return the final token for the number Ok(Some(Token::Number(s, long))) } + + // Period (`.`) handling + '.' => { + chars.next(); // consume the dot + + match chars.peek() { + // Handle "._" case as a period followed by identifier + // if the last token was a word + Some('_') if matches!(prev_token, Some(Token::Word(_))) => { + Ok(Some(Token::Period)) + } + Some('_') => { + self.tokenizer_error( + chars.location(), + "Unexpected underscore here".to_string(), + ) + } + Some(ch) + // Hive and mysql dialects allow numeric prefixes for identifers + if ch.is_ascii_digit() + && self.dialect.supports_numeric_prefix() + && matches!(prev_token, Some(Token::Word(_))) => + { + Ok(Some(Token::Period)) + } + Some(ch) if ch.is_ascii_digit() => { + // Handle numbers starting with a dot (e.g., ".123") + let mut s = String::from("."); + let is_number_separator = |ch: char, next_char: Option<char>| { + self.dialect.supports_numeric_literal_underscores() + && ch == '_' + && next_char.is_some_and(|c| c.is_ascii_digit()) + }; + + s += &peeking_next_take_while(chars, |ch, next_ch| { + ch.is_ascii_digit() || is_number_separator(ch, next_ch) + }); + + // Handle exponent part + if matches!(chars.peek(), Some('e' | 'E')) { + let mut exp = String::new(); + exp.push(chars.next().unwrap()); + + if matches!(chars.peek(), Some('+' | '-')) { + exp.push(chars.next().unwrap()); + } + + if matches!(chars.peek(), Some(c) if c.is_ascii_digit()) { + exp += &peeking_take_while(chars, |c| c.is_ascii_digit()); + s += &exp; + } + } + + // Handle "L" suffix for long numbers + let long = if chars.peek() == Some(&'L') { + chars.next(); + true Review Comment: The problem is the match happens on a peek and you need to consume the dot in order to peek the underscore. What if the second peek wasn't an underscore? You need to un-consume the dot for it to be parsed as part of the number. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org