================ @@ -0,0 +1,176 @@ +//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements NumericLiteralCaseFixer that standardizes character +/// case within numeric literals. +/// +//===----------------------------------------------------------------------===// + +#include "NumericLiteralCaseFixer.h" +#include "NumericLiteralInfo.h" + +#include "llvm/ADT/StringExtras.h" + +#include <algorithm> + +namespace clang { +namespace format { + +static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) { + // Check if language is supported. + switch (Style.Language) { + case FormatStyle::LK_C: + case FormatStyle::LK_Cpp: + case FormatStyle::LK_ObjC: + case FormatStyle::LK_CSharp: + case FormatStyle::LK_Java: + case FormatStyle::LK_JavaScript: + break; + default: + return false; + } + + // Check if style options are set. + const auto &Option = Style.NumericLiteralCase; + const auto Leave = FormatStyle::NLCS_Leave; + return Option.Prefix != Leave || Option.HexDigit != Leave || + Option.ExponentLetter != Leave || Option.Suffix != Leave; +} + +static std::string +transformComponent(StringRef Component, + FormatStyle::NumericLiteralComponentStyle ConfigValue) { + switch (ConfigValue) { + case FormatStyle::NLCS_Upper: + return Component.upper(); + case FormatStyle::NLCS_Lower: + return Component.lower(); + case FormatStyle::NLCS_Leave: + default: + return Component.str(); + } +} + +/// Test if Suffix matches a C++ literal reserved by the library. +/// Matches against all suffixes reserved in the C++23 standard. +static bool matchesReservedSuffix(StringRef Suffix) { + static constexpr std::array<StringRef, 11> SortedReservedSuffixes = { + "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y"}; + + // This can be static_assert when we have access to constexpr is_sorted in + // C++ 20. + assert(llvm::is_sorted(SortedReservedSuffixes) && + "Must be sorted as precondition for lower_bound()."); + + auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix); + if (entry == SortedReservedSuffixes.cend()) + return false; + return *entry == Suffix; +} + +static std::string format(StringRef NumericLiteral, const FormatStyle &Style) { + const char Separator = Style.isCpp() ? '\'' : '_'; + const NumericLiteralInfo Info(NumericLiteral, Separator); + const bool HasBaseLetter = Info.BaseLetterPos != llvm::StringRef::npos; + const bool HasExponent = Info.ExponentLetterPos != llvm::StringRef::npos; + const bool HasSuffix = Info.SuffixPos != llvm::StringRef::npos; + + std::string Formatted; + + if (HasBaseLetter) { + Formatted += + transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos), + Style.NumericLiteralCase.Prefix); + } + // Reformat this slice as HexDigit whether or not the digit has hexadecimal + // characters because binary/decimal/octal digits are unchanged. + Formatted += transformComponent( + NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0, + HasExponent ? Info.ExponentLetterPos + : HasSuffix ? Info.SuffixPos + : NumericLiteral.size()), + Style.NumericLiteralCase.HexDigit); + + if (HasExponent) { + Formatted += transformComponent( + NumericLiteral.slice(Info.ExponentLetterPos, + HasSuffix ? Info.SuffixPos + : NumericLiteral.size()), + Style.NumericLiteralCase.ExponentLetter); + } + + if (HasSuffix) { + StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos); + if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') { + // In C++, it is idiomatic, but NOT standardized to define user-defined + // literals with a leading '_'. Omit user defined literals and standard + // reserved suffixes from transformation. + Formatted += Suffix.str(); + } else { + Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix); + } + } + + return Formatted; +} + +std::pair<tooling::Replacements, unsigned> +NumericLiteralCaseFixer::process(const Environment &Env, + const FormatStyle &Style) { + if (!isNumericLiteralCaseFixerNeeded(Style)) + return {}; + + const auto &SourceMgr = Env.getSourceManager(); + AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); + + const auto ID = Env.getFileID(); + const auto LangOpts = getFormattingLangOpts(Style); + Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); + Lex.SetCommentRetentionState(true); + + Token Tok; + tooling::Replacements Result; + + for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { + // Skip tokens that are too small to contain a formattable literal. + // Size=2 is the smallest possible literal that could contain formattable + // components, for example "1u". + auto Length = Tok.getLength(); + if (Length < 2) + continue; + + // Service clang-format off/on comments. + auto Location = Tok.getLocation(); + auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); + if (Tok.is(tok::comment)) { + if (isClangFormatOff(Text)) + Skip = true; + else if (isClangFormatOn(Text)) + Skip = false; + continue; + } + + if (Skip || Tok.isNot(tok::numeric_constant) || + !AffectedRangeMgr.affectsCharSourceRange( + CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { + continue; + } + + const auto Formatted = format(Text, Style); ---------------- owenca wrote:
```suggestion const auto &Formatted = format(Text, Style); ``` https://github.com/llvm/llvm-project/pull/151590 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits