afs commented on code in PR #2726: URL: https://github.com/apache/jena/pull/2726#discussion_r1774070016
########## jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java: ########## @@ -439,37 +458,42 @@ private Token parseToken() { */ - // TODO extract readNumberNoSign - - int signCh = 0; - if ( ch == CH_PLUS || ch == CH_MINUS ) { reader.readChar(); int ch2 = reader.peekChar(); - - if ( !range(ch2, '0', '9') ) { - // ch was end of symbol. - // reader.readChar(); + if ( !range(ch2, '0', '9') && ch2 != CH_DOT ) { + // Not a number. if ( ch == CH_PLUS ) token.setType(TokenType.PLUS); else token.setType(TokenType.MINUS); return token; } - - // Already got a + or - ... - // readNumberNoSign - // Because next, old code processes signs. - reader.pushbackChar(ch); - signCh = ch; - // Drop to next "if" + // ch2 not consumed. + boolean charactersConsumed = readNumber(ch, false); + if ( ! charactersConsumed ) { + if ( ch == CH_PLUS ) + token.setType(TokenType.PLUS); + else + token.setType(TokenType.MINUS); + } + return token; } - if ( ch == CH_PLUS || ch == CH_MINUS || range(ch, '0', '9') ) { - // readNumberNoSign - readNumber(); - if ( Checking ) - checkNumber(token.getImage(), token.getImage2()); + if ( range(ch, '0', '9') ) { Review Comment: Leading DOT is handled in the `case DOT` rule at line 330. It works by pushing back the DOT into the read stream. It means there are 3 routes to `readNumber` spread out across the token loop making it harder to follow. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@jena.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@jena.apache.org For additional commands, e-mail: pr-h...@jena.apache.org