https://gcc.gnu.org/g:2c88d63160886ecd754c064b3047411b62572cd9
commit r16-4814-g2c88d63160886ecd754c064b3047411b62572cd9 Author: Owen Avery <[email protected]> Date: Sun Aug 24 19:37:45 2025 -0400 gccrs: Avoid std::unique_ptr<std::string> in Token gcc/rust/ChangeLog: * ast/rust-ast-collector.cc (TokenCollector::visit): Use should_have_str instead of has_str. * ast/rust-ast.cc (Token::as_string): Likewise. * expand/rust-macro-builtins-offset-of.cc (MacroBuiltin::offset_of_handler): Likewise. * lex/rust-lex.cc (Lexer::dump_and_skip): Likewise. * ast/rust-ast.h (class Token): Remove dead code. (Token::has_str): Remove function. (Token::should_have_str): New function. * lex/rust-token.cc (Token::get_str): Remove function definition. * lex/rust-token.h: Include "rust-diagnostics.h". (Token::str): Change type from std::unique_ptr<std::string> to std::string. (Token::Token): Adjust initialization of str member variable and the type of the parameter used to initialize it. (Token::make_identifier): Accept std::string instead of rvalue reference to std::string. (Token::make_int): Likewise. (Token::make_float): Likewise. (Token::make_string): Likewise. (Token::make_byte_string): Likewise. (Token::make_raw_string): Likewise. (Token::make_inner_doc_comment): Likewise. (Token::make_outer_doc_comment): Likewise. (Token::make_lifetime): Likewise. (Token::get_str): Add definition to function declaration. (Token::has_str): Remove member function. (Token::should_have_str): Handle INNER_DOC_COMMENT and OUTER_DOC_COMMENT. Signed-off-by: Owen Avery <[email protected]> Diff: --- gcc/rust/ast/rust-ast-collector.cc | 3 +- gcc/rust/ast/rust-ast.cc | 2 +- gcc/rust/ast/rust-ast.h | 59 +------------------ gcc/rust/expand/rust-macro-builtins-offset-of.cc | 2 +- gcc/rust/lex/rust-lex.cc | 9 +-- gcc/rust/lex/rust-token.cc | 23 -------- gcc/rust/lex/rust-token.h | 72 +++++++++++------------- 7 files changed, 44 insertions(+), 126 deletions(-) diff --git a/gcc/rust/ast/rust-ast-collector.cc b/gcc/rust/ast/rust-ast-collector.cc index bd924d0746c1..2854525d121c 100644 --- a/gcc/rust/ast/rust-ast-collector.cc +++ b/gcc/rust/ast/rust-ast-collector.cc @@ -355,7 +355,8 @@ TokenCollector::visit (MaybeNamedParam ¶m) void TokenCollector::visit (Token &tok) { - std::string data = tok.get_tok_ptr ()->has_str () ? tok.get_str () : ""; + std::string data + = tok.get_tok_ptr ()->should_have_str () ? tok.get_str () : ""; switch (tok.get_id ()) { case IDENTIFIER: diff --git a/gcc/rust/ast/rust-ast.cc b/gcc/rust/ast/rust-ast.cc index 8072ce96b4f0..d60b6711f94c 100644 --- a/gcc/rust/ast/rust-ast.cc +++ b/gcc/rust/ast/rust-ast.cc @@ -405,7 +405,7 @@ DelimTokenTree::as_string () const std::string Token::as_string () const { - if (tok_ref->has_str ()) + if (tok_ref->should_have_str ()) { std::string str = tok_ref->get_str (); diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index a7e62965201f..f14136f445bb 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -184,17 +184,6 @@ class Token : public TokenTree, public MacroMatch { // A token is a kind of token tree (except delimiter tokens) // A token is a kind of MacroMatch (except $ and delimiter tokens) -#if 0 - // TODO: improve member variables - current ones are the same as lexer token - // Token kind. - TokenId token_id; - // Token location. - location_t locus; - // Associated text (if any) of token. - std::string str; - // Token type hint (if any). - PrimitiveCoreType type_hint; -#endif const_TokenPtr tok_ref; @@ -209,53 +198,7 @@ public: return std::unique_ptr<Token> (clone_token_impl ()); } -#if 0 - /* constructor from general text - avoid using if lexer const_TokenPtr is - * available */ - Token (TokenId token_id, location_t locus, std::string str, - PrimitiveCoreType type_hint) - : token_id (token_id), locus (locus), str (std::move (str)), - type_hint (type_hint) - {} -#endif - // not doable with new implementation - will have to make a const_TokenPtr - // Constructor from lexer const_TokenPtr -#if 0 - /* TODO: find workaround for std::string being nullptr - probably have to - * introduce new method in lexer Token, or maybe make conversion method - * there */ - Token (const_TokenPtr lexer_token_ptr) - : token_id (lexer_token_ptr->get_id ()), - locus (lexer_token_ptr->get_locus ()), str (""), - type_hint (lexer_token_ptr->get_type_hint ()) - { - // FIXME: change to "should have str" later? - if (lexer_token_ptr->has_str ()) - { - str = lexer_token_ptr->get_str (); - - // DEBUG - rust_debug ("ast token created with str '%s'", str.c_str ()); - } - else - { - // FIXME: is this returning correct thing? - str = lexer_token_ptr->get_token_description (); - - // DEBUG - rust_debug ("ast token created with string '%s'", str.c_str ()); - } - - // DEBUG - if (lexer_token_ptr->should_have_str () && !lexer_token_ptr->has_str ()) - { - rust_debug ( - "BAD: for token '%s', should have string but does not!", - lexer_token_ptr->get_token_description ()); - } - } -#endif Token (const_TokenPtr lexer_tok_ptr) : tok_ref (std::move (lexer_tok_ptr)) {} bool is_string_lit () const @@ -283,7 +226,7 @@ public: std::vector<std::unique_ptr<Token>> to_token_stream () const override; TokenId get_id () const { return tok_ref->get_id (); } - bool has_str () const { return tok_ref->has_str (); } + bool should_have_str () const { return tok_ref->should_have_str (); } const std::string &get_str () const { return tok_ref->get_str (); } location_t get_locus () const { return tok_ref->get_locus (); } diff --git a/gcc/rust/expand/rust-macro-builtins-offset-of.cc b/gcc/rust/expand/rust-macro-builtins-offset-of.cc index 53efe74b228b..02c637bebd0f 100644 --- a/gcc/rust/expand/rust-macro-builtins-offset-of.cc +++ b/gcc/rust/expand/rust-macro-builtins-offset-of.cc @@ -56,7 +56,7 @@ MacroBuiltin::offset_of_handler (location_t invoc_locus, parser.skip_token (COMMA); auto field_tok = parser.parse_identifier_or_keyword_token (); - auto invalid_field = !field_tok || !field_tok->has_str (); + auto invalid_field = !field_tok || !field_tok->should_have_str (); if (invalid_field) rust_error_at (invoc_locus, "could not parse field argument for %qs", diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 76ff15c21bc1..214161fcca61 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -236,10 +236,11 @@ Lexer::dump_and_skip (int n) out << "<id="; out << tok->token_id_to_str (); - out << (tok->has_str () ? (std::string (", text=") + tok->get_str () - + std::string (", typehint=") - + std::string (tok->get_type_hint_str ())) - : "") + out << (tok->should_have_str () + ? (std::string (", text=") + tok->get_str () + + std::string (", typehint=") + + std::string (tok->get_type_hint_str ())) + : "") << " "; out << Linemap::location_to_string (loc) << '\n'; } diff --git a/gcc/rust/lex/rust-token.cc b/gcc/rust/lex/rust-token.cc index c396e100dd8d..43682af4fe2e 100644 --- a/gcc/rust/lex/rust-token.cc +++ b/gcc/rust/lex/rust-token.cc @@ -178,29 +178,6 @@ nfc_normalize_token_string (location_t loc, TokenId id, const std::string &str) return str; } -const std::string & -Token::get_str () const -{ - if (token_id_is_keyword (token_id)) - return token_id_keyword_string (token_id); - - // FIXME: attempt to return null again - // gcc_assert(str != NULL); - - // HACK: allow referencing an empty string - static const std::string empty = ""; - - if (str == NULL) - { - rust_error_at (get_locus (), - "attempted to get string for %qs, which has no string. " - "returning empty string instead", - get_token_description ()); - return empty; - } - return *str; -} - namespace { enum class Context { diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h index 2021aec4e4ca..71a75037dd31 100644 --- a/gcc/rust/lex/rust-token.h +++ b/gcc/rust/lex/rust-token.h @@ -22,6 +22,7 @@ #include "rust-system.h" #include "rust-linemap.h" #include "rust-unicode.h" +#include "rust-diagnostics.h" namespace Rust { @@ -249,7 +250,7 @@ private: // Token location. location_t locus; // Associated text (if any) of token. - std::unique_ptr<std::string> str; + std::string str; // TODO: maybe remove issues and just store std::string as value? /* Type hint for token based on lexer data (e.g. type suffix). Does not exist * for most tokens. */ @@ -257,23 +258,21 @@ private: // Token constructor from token id and location. Has a null string. Token (TokenId token_id, location_t location) - : token_id (token_id), locus (location), str (nullptr), - type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) {} // Token constructor from token id, location, and a string. - Token (TokenId token_id, location_t location, std::string &¶mStr) + Token (TokenId token_id, location_t location, std::string paramStr) : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) { // Normalize identifier tokens - str = std::make_unique<std::string> ( - nfc_normalize_token_string (location, token_id, paramStr)); + str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); } // Token constructor from token id, location, and a char. Token (TokenId token_id, location_t location, char paramChar) - : token_id (token_id), locus (location), - str (new std::string (1, paramChar)), type_hint (CORETYPE_UNKNOWN) + : token_id (token_id), locus (location), str (1, paramChar), + type_hint (CORETYPE_UNKNOWN) { // Do not need to normalize 1byte char } @@ -283,19 +282,17 @@ private: : token_id (token_id), locus (location), type_hint (CORETYPE_UNKNOWN) { // Normalize identifier tokens - str = std::make_unique<std::string> ( - nfc_normalize_token_string (location, token_id, - paramCodepoint.as_string ())); + str = nfc_normalize_token_string (location, token_id, + paramCodepoint.as_string ()); } // Token constructor from token id, location, a string, and type hint. - Token (TokenId token_id, location_t location, std::string &¶mStr, + Token (TokenId token_id, location_t location, std::string paramStr, PrimitiveCoreType parType) : token_id (token_id), locus (location), type_hint (parType) { // Normalize identifier tokens - str = std::make_unique<std::string> ( - nfc_normalize_token_string (location, token_id, paramStr)); + str = nfc_normalize_token_string (location, token_id, std::move (paramStr)); } public: @@ -322,7 +319,7 @@ public: } // Makes and returns a new TokenPtr of type IDENTIFIER. - static TokenPtr make_identifier (location_t locus, std::string &&str) + static TokenPtr make_identifier (location_t locus, std::string str) { // return std::make_shared<Token> (IDENTIFIER, locus, str); return TokenPtr (new Token (IDENTIFIER, locus, std::move (str))); @@ -331,7 +328,7 @@ public: static TokenPtr make_identifier (const Identifier &ident); // Makes and returns a new TokenPtr of type INT_LITERAL. - static TokenPtr make_int (location_t locus, std::string &&str, + static TokenPtr make_int (location_t locus, std::string str, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { // return std::make_shared<Token> (INT_LITERAL, locus, str, type_hint); @@ -340,7 +337,7 @@ public: } // Makes and returns a new TokenPtr of type FLOAT_LITERAL. - static TokenPtr make_float (location_t locus, std::string &&str, + static TokenPtr make_float (location_t locus, std::string str, PrimitiveCoreType type_hint = CORETYPE_UNKNOWN) { // return std::make_shared<Token> (FLOAT_LITERAL, locus, str, type_hint); @@ -349,7 +346,7 @@ public: } // Makes and returns a new TokenPtr of type STRING_LITERAL. - static TokenPtr make_string (location_t locus, std::string &&str) + static TokenPtr make_string (location_t locus, std::string str) { // return std::make_shared<Token> (STRING_LITERAL, locus, str, // CORETYPE_STR); @@ -372,32 +369,32 @@ public: } // Makes and returns a new TokenPtr of type BYTE_STRING_LITERAL (fix). - static TokenPtr make_byte_string (location_t locus, std::string &&str) + static TokenPtr make_byte_string (location_t locus, std::string str) { // return std::make_shared<Token> (BYTE_STRING_LITERAL, locus, str); return TokenPtr (new Token (BYTE_STRING_LITERAL, locus, std::move (str))); } // Makes and returns a new TokenPtr of type RAW_STRING_LITERAL. - static TokenPtr make_raw_string (location_t locus, std::string &&str) + static TokenPtr make_raw_string (location_t locus, std::string str) { return TokenPtr (new Token (RAW_STRING_LITERAL, locus, std::move (str))); } // Makes and returns a new TokenPtr of type INNER_DOC_COMMENT. - static TokenPtr make_inner_doc_comment (location_t locus, std::string &&str) + static TokenPtr make_inner_doc_comment (location_t locus, std::string str) { return TokenPtr (new Token (INNER_DOC_COMMENT, locus, std::move (str))); } // Makes and returns a new TokenPtr of type OUTER_DOC_COMMENT. - static TokenPtr make_outer_doc_comment (location_t locus, std::string &&str) + static TokenPtr make_outer_doc_comment (location_t locus, std::string str) { return TokenPtr (new Token (OUTER_DOC_COMMENT, locus, std::move (str))); } // Makes and returns a new TokenPtr of type LIFETIME. - static TokenPtr make_lifetime (location_t locus, std::string &&str) + static TokenPtr make_lifetime (location_t locus, std::string str) { // return std::make_shared<Token> (LIFETIME, locus, str); return TokenPtr (new Token (LIFETIME, locus, std::move (str))); @@ -413,16 +410,18 @@ public: void set_locus (location_t locus) { this->locus = locus; } // Gets string description of the token. - const std::string & - get_str () const; /*{ -// FIXME: put in header again when fix null problem -//gcc_assert(str != nullptr); -if (str == nullptr) { -error_at(get_locus(), "attempted to get string for '%s', which has no string. -returning empty string instead.", get_token_description()); return ""; -} -return *str; -}*/ + const std::string &get_str () const + { + if (token_id_is_keyword (token_id)) + return token_id_keyword_string (token_id); + + if (!should_have_str ()) + rust_internal_error_at ( + locus, "attempting to get string for %qs, which should have no string", + get_token_description ()); + + return str; + } // Gets token's type hint info. PrimitiveCoreType get_type_hint () const @@ -464,14 +463,11 @@ return *str; } } - /* Returns whether the token actually has a string (regardless of whether it - * should or not). */ - bool has_str () const { return str != nullptr; } - // Returns whether the token should have a string. bool should_have_str () const { - return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME; + return is_literal () || token_id == IDENTIFIER || token_id == LIFETIME + || token_id == INNER_DOC_COMMENT || token_id == OUTER_DOC_COMMENT; } // Returns whether the token is a pure decimal int literal
