https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/151590
>From ad46ad82b5564d5aa7236d9a0bf1f66fdf476a72 Mon Sep 17 00:00:00 2001 From: Andy MacGregor <amacgregor.2018.comcast....@gmail.com> Date: Wed, 10 Sep 2025 09:50:18 -0400 Subject: [PATCH 1/2] [clang-format] Add an option to format numeric literal case --- clang/docs/ClangFormatStyleOptions.rst | 107 ++++++ clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Format/Format.h | 68 ++++ clang/lib/Format/CMakeLists.txt | 1 + clang/lib/Format/Format.cpp | 29 ++ clang/lib/Format/NumericLiteralCaseFixer.cpp | 176 +++++++++ clang/lib/Format/NumericLiteralCaseFixer.h | 32 ++ clang/unittests/Format/CMakeLists.txt | 1 + .../Format/NumericLiteralCaseTest.cpp | 346 ++++++++++++++++++ 9 files changed, 762 insertions(+) create mode 100644 clang/lib/Format/NumericLiteralCaseFixer.cpp create mode 100644 clang/lib/Format/NumericLiteralCaseFixer.h create mode 100644 clang/unittests/Format/NumericLiteralCaseTest.cpp diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 3ac9e3795cae7..6be4d512bda6a 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -5079,6 +5079,113 @@ the configuration (without a prefix: ``Auto``). For example: TESTSUITE +.. _NumericLiteralCase: + +**NumericLiteralCase** (``NumericLiteralCaseStyle``) :versionbadge:`clang-format 22` :ref:`¶ <NumericLiteralCase>` + Capitalization style for numeric literals. + + Nested configuration flags: + + Separate control for each numeric literal component. + + For example, the config below will leave exponent letters alone, reformat + hexadecimal digits in lowercase, reformat numeric literal prefixes in + uppercase, and reformat suffixes in lowercase. + + .. code-block:: c++ + + NumericLiteralCase: + ExponentLetter: Leave + HexDigit: Lower + Prefix: Upper + Suffix: Lower + + * ``NumericLiteralComponentStyle ExponentLetter`` + Format floating point exponent separator letter case. + + .. code-block:: c++ + + float a = 6.02e23 + 1.0E10; // Leave + float a = 6.02E23 + 1.0E10; // Upper + float a = 6.02e23 + 1.0e10; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + * ``NumericLiteralComponentStyle HexDigit`` + Format hexadecimal digit case. + + .. code-block:: c++ + + a = 0xaBcDeF; // Leave + a = 0xABCDEF; // Upper + a = 0xabcdef; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + * ``NumericLiteralComponentStyle Prefix`` + Format integer prefix case. + + .. code-block:: c++ + + a = 0XF0 | 0b1; // Leave + a = 0XF0 | 0B1; // Upper + a = 0xF0 | 0b1; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + * ``NumericLiteralComponentStyle Suffix`` + Format suffix case. This option excludes case-sensitive reserved + suffixes, such as ``min`` in C++. + + .. code-block:: c++ + + a = 1uLL; // Leave + a = 1ULL; // Upper + a = 1ull; // Lower + + Possible values: + + * ``NLCS_Leave`` (in configuration: ``Leave``) + Leave this component of the literal as is. + + * ``NLCS_Upper`` (in configuration: ``Upper``) + Format this component with uppercase characters. + + * ``NLCS_Lower`` (in configuration: ``Lower``) + Format this component with lowercase characters. + + + .. _ObjCBinPackProtocolList: **ObjCBinPackProtocolList** (``BinPackStyle``) :versionbadge:`clang-format 7` :ref:`¶ <ObjCBinPackProtocolList>` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a20b1ab298f9c..2089e531a0876 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -458,6 +458,8 @@ AST Matchers clang-format ------------ - Add ``SpaceInEmptyBraces`` option and set it to ``Always`` for WebKit style. +- Add ``NumericLiteralCase`` option for enforcing character case in numeric + literals. libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 5dfdb23594610..03cff5f8cfb66 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3558,6 +3558,73 @@ struct FormatStyle { /// \version 9 std::vector<std::string> NamespaceMacros; + /// Control over each component in a numeric literal. + enum NumericLiteralComponentStyle : int8_t { + /// Leave this component of the literal as is. + NLCS_Leave, + /// Format this component with uppercase characters. + NLCS_Upper, + /// Format this component with lowercase characters. + NLCS_Lower, + }; + + /// Separate control for each numeric literal component. + /// + /// For example, the config below will leave exponent letters alone, reformat + /// hexadecimal digits in lowercase, reformat numeric literal prefixes in + /// uppercase, and reformat suffixes in lowercase. + /// \code + /// NumericLiteralCase: + /// ExponentLetter: Leave + /// HexDigit: Lower + /// Prefix: Upper + /// Suffix: Lower + /// \endcode + struct NumericLiteralCaseStyle { + /// Format floating point exponent separator letter case. + /// \code + /// float a = 6.02e23 + 1.0E10; // Leave + /// float a = 6.02E23 + 1.0E10; // Upper + /// float a = 6.02e23 + 1.0e10; // Lower + /// \endcode + NumericLiteralComponentStyle ExponentLetter; + /// Format hexadecimal digit case. + /// \code + /// a = 0xaBcDeF; // Leave + /// a = 0xABCDEF; // Upper + /// a = 0xabcdef; // Lower + /// \endcode + NumericLiteralComponentStyle HexDigit; + /// Format integer prefix case. + /// \code + /// a = 0XF0 | 0b1; // Leave + /// a = 0XF0 | 0B1; // Upper + /// a = 0xF0 | 0b1; // Lower + /// \endcode + NumericLiteralComponentStyle Prefix; + /// Format suffix case. This option excludes case-sensitive reserved + /// suffixes, such as ``min`` in C++. + /// \code + /// a = 1uLL; // Leave + /// a = 1ULL; // Upper + /// a = 1ull; // Lower + /// \endcode + NumericLiteralComponentStyle Suffix; + + bool operator==(const NumericLiteralCaseStyle &R) const { + return ExponentLetter == R.ExponentLetter && HexDigit == R.HexDigit && + Prefix == R.Prefix && Suffix == R.Suffix; + } + + bool operator!=(const NumericLiteralCaseStyle &R) const { + return !(*this == R); + } + }; + + /// Capitalization style for numeric literals. + /// \version 22 + NumericLiteralCaseStyle NumericLiteralCase; + /// Controls bin-packing Objective-C protocol conformance list /// items into as few lines as possible when they go over ``ColumnLimit``. /// @@ -5469,6 +5536,7 @@ struct FormatStyle { MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep && NamespaceIndentation == R.NamespaceIndentation && NamespaceMacros == R.NamespaceMacros && + NumericLiteralCase == R.NumericLiteralCase && ObjCBinPackProtocolList == R.ObjCBinPackProtocolList && ObjCBlockIndentWidth == R.ObjCBlockIndentWidth && ObjCBreakBeforeNestedBlockParam == diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt index 24f435d2caee1..50c0683dc9b7f 100644 --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -13,6 +13,7 @@ add_clang_library(clangFormat MacroExpander.cpp MatchFilePath.cpp NamespaceEndCommentsFixer.cpp + NumericLiteralCaseFixer.cpp NumericLiteralInfo.cpp ObjCPropertyAttributeOrderFixer.cpp QualifierAlignmentFixer.cpp diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index e3b22cdabaccd..f095d2c18cfcf 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -16,6 +16,7 @@ #include "DefinitionBlockSeparator.h" #include "IntegerLiteralSeparatorFixer.h" #include "NamespaceEndCommentsFixer.h" +#include "NumericLiteralCaseFixer.h" #include "ObjCPropertyAttributeOrderFixer.h" #include "QualifierAlignmentFixer.h" #include "SortJavaScriptImports.h" @@ -472,6 +473,25 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { } }; +template <> +struct ScalarEnumerationTraits<FormatStyle::NumericLiteralComponentStyle> { + static void enumeration(IO &IO, + FormatStyle::NumericLiteralComponentStyle &Value) { + IO.enumCase(Value, "Leave", FormatStyle::NLCS_Leave); + IO.enumCase(Value, "Upper", FormatStyle::NLCS_Upper); + IO.enumCase(Value, "Lower", FormatStyle::NLCS_Lower); + } +}; + +template <> struct MappingTraits<FormatStyle::NumericLiteralCaseStyle> { + static void mapping(IO &IO, FormatStyle::NumericLiteralCaseStyle &Value) { + IO.mapOptional("ExponentLetter", Value.ExponentLetter); + IO.mapOptional("HexDigit", Value.HexDigit); + IO.mapOptional("Prefix", Value.Prefix); + IO.mapOptional("Suffix", Value.Suffix); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::OperandAlignmentStyle> { static void enumeration(IO &IO, FormatStyle::OperandAlignmentStyle &Value) { IO.enumCase(Value, "DontAlign", FormatStyle::OAS_DontAlign); @@ -1121,6 +1141,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); IO.mapOptional("NamespaceMacros", Style.NamespaceMacros); + IO.mapOptional("NumericLiteralCase", Style.NumericLiteralCase); IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList); IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); IO.mapOptional("ObjCBreakBeforeNestedBlockParam", @@ -1653,6 +1674,10 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.LineEnding = FormatStyle::LE_DeriveLF; LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; + LLVMStyle.NumericLiteralCase = {/*ExponentLetter=*/FormatStyle::NLCS_Leave, + /*HexDigit=*/FormatStyle::NLCS_Leave, + /*Prefix=*/FormatStyle::NLCS_Leave, + /*Suffix=*/FormatStyle::NLCS_Leave}; LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto; LLVMStyle.ObjCBlockIndentWidth = 2; LLVMStyle.ObjCBreakBeforeNestedBlockParam = true; @@ -3890,6 +3915,10 @@ reformat(const FormatStyle &Style, StringRef Code, return IntegerLiteralSeparatorFixer().process(Env, Expanded); }); + Passes.emplace_back([&](const Environment &Env) { + return NumericLiteralCaseFixer().process(Env, Expanded); + }); + if (Style.isCpp()) { if (Style.QualifierAlignment != FormatStyle::QAS_Leave) addQualifierAlignmentFixerPasses(Expanded, Passes); diff --git a/clang/lib/Format/NumericLiteralCaseFixer.cpp b/clang/lib/Format/NumericLiteralCaseFixer.cpp new file mode 100644 index 0000000000000..2bed6185a9d26 --- /dev/null +++ b/clang/lib/Format/NumericLiteralCaseFixer.cpp @@ -0,0 +1,176 @@ +//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements NumericLiteralCaseFixer that standardizes character +/// case within numeric literals. +/// +//===----------------------------------------------------------------------===// + +#include "NumericLiteralCaseFixer.h" +#include "NumericLiteralInfo.h" + +#include "llvm/ADT/StringExtras.h" + +#include <algorithm> + +namespace clang { +namespace format { + +static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) { + // Check if language is supported. + switch (Style.Language) { + case FormatStyle::LK_C: + case FormatStyle::LK_Cpp: + case FormatStyle::LK_ObjC: + case FormatStyle::LK_CSharp: + case FormatStyle::LK_Java: + case FormatStyle::LK_JavaScript: + break; + default: + return false; + } + + // Check if style options are set. + const auto &Option = Style.NumericLiteralCase; + const auto Leave = FormatStyle::NLCS_Leave; + return Option.Prefix != Leave || Option.HexDigit != Leave || + Option.ExponentLetter != Leave || Option.Suffix != Leave; +} + +static std::string +transformComponent(StringRef Component, + FormatStyle::NumericLiteralComponentStyle ConfigValue) { + switch (ConfigValue) { + case FormatStyle::NLCS_Upper: + return Component.upper(); + case FormatStyle::NLCS_Lower: + return Component.lower(); + default: + // Covers FormatStyle::NLCS_Leave. + return Component.str(); + } +} + +/// Test if Suffix matches a C++ literal reserved by the library. +/// Matches against all suffixes reserved in the C++23 standard. +static bool matchesReservedSuffix(StringRef Suffix) { + static constexpr std::array<StringRef, 11> SortedReservedSuffixes = { + "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y"}; + + // This can be static_assert when we have access to constexpr is_sorted in + // C++ 20. + assert(llvm::is_sorted(SortedReservedSuffixes) && + "Must be sorted as precondition for lower_bound()."); + + auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix); + if (entry == SortedReservedSuffixes.cend()) + return false; + return *entry == Suffix; +} + +static std::string format(StringRef NumericLiteral, const FormatStyle &Style) { + const char Separator = Style.isCpp() ? '\'' : '_'; + const NumericLiteralInfo Info(NumericLiteral, Separator); + const bool HasBaseLetter = Info.BaseLetterPos != StringRef::npos; + const bool HasExponent = Info.ExponentLetterPos != StringRef::npos; + const bool HasSuffix = Info.SuffixPos != StringRef::npos; + + std::string Formatted; + + if (HasBaseLetter) { + Formatted += + transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos), + Style.NumericLiteralCase.Prefix); + } + // Reformat this slice as HexDigit whether or not the digit has hexadecimal + // characters because binary/decimal/octal digits are unchanged. + Formatted += transformComponent( + NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0, + HasExponent ? Info.ExponentLetterPos + : HasSuffix ? Info.SuffixPos + : NumericLiteral.size()), + Style.NumericLiteralCase.HexDigit); + + if (HasExponent) { + Formatted += transformComponent( + NumericLiteral.slice(Info.ExponentLetterPos, + HasSuffix ? Info.SuffixPos + : NumericLiteral.size()), + Style.NumericLiteralCase.ExponentLetter); + } + + if (HasSuffix) { + StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos); + if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') { + // In C++, it is idiomatic, but NOT standardized to define user-defined + // literals with a leading '_'. Omit user defined literals and standard + // reserved suffixes from transformation. + Formatted += Suffix.str(); + } else { + Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix); + } + } + + return Formatted; +} + +std::pair<tooling::Replacements, unsigned> +NumericLiteralCaseFixer::process(const Environment &Env, + const FormatStyle &Style) { + if (!isNumericLiteralCaseFixerNeeded(Style)) + return {}; + + const auto &SourceMgr = Env.getSourceManager(); + AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); + + const auto ID = Env.getFileID(); + const auto LangOpts = getFormattingLangOpts(Style); + Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); + Lex.SetCommentRetentionState(true); + + Token Tok; + tooling::Replacements Result; + + for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { + // Skip tokens that are too small to contain a formattable literal. + // Size=2 is the smallest possible literal that could contain formattable + // components, for example "1u". + auto Length = Tok.getLength(); + if (Length < 2) + continue; + + // Service clang-format off/on comments. + auto Location = Tok.getLocation(); + auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); + if (Tok.is(tok::comment)) { + if (isClangFormatOff(Text)) + Skip = true; + else if (isClangFormatOn(Text)) + Skip = false; + continue; + } + + if (Skip || Tok.isNot(tok::numeric_constant) || + !AffectedRangeMgr.affectsCharSourceRange( + CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { + continue; + } + + const auto Formatted = format(Text, Style); + if (Formatted != Text) { + cantFail(Result.add( + tooling::Replacement(SourceMgr, Location, Length, Formatted))); + } + } + + return {Result, 0}; +} + +} // namespace format +} // namespace clang diff --git a/clang/lib/Format/NumericLiteralCaseFixer.h b/clang/lib/Format/NumericLiteralCaseFixer.h new file mode 100644 index 0000000000000..ac3ac30d1d19a --- /dev/null +++ b/clang/lib/Format/NumericLiteralCaseFixer.h @@ -0,0 +1,32 @@ +//===--- NumericLiteralCaseFixer.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares NumericLiteralCaseFixer that standardizes character case +/// within numeric literals. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_NUMERICLITERALCASEFIXER_H +#define LLVM_CLANG_LIB_FORMAT_NUMERICLITERALCASEFIXER_H + +#include "TokenAnalyzer.h" + +namespace clang { +namespace format { + +class NumericLiteralCaseFixer { +public: + std::pair<tooling::Replacements, unsigned> process(const Environment &Env, + const FormatStyle &Style); +}; + +} // end namespace format +} // end namespace clang + +#endif diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt index 5e5a7a0552993..03fff988d4663 100644 --- a/clang/unittests/Format/CMakeLists.txt +++ b/clang/unittests/Format/CMakeLists.txt @@ -28,6 +28,7 @@ add_distinct_clang_unittest(FormatTests MacroExpanderTest.cpp MatchFilePathTest.cpp NamespaceEndCommentsFixerTest.cpp + NumericLiteralCaseTest.cpp NumericLiteralInfoTest.cpp ObjCPropertyAttributeOrderFixerTest.cpp QualifierFixerTest.cpp diff --git a/clang/unittests/Format/NumericLiteralCaseTest.cpp b/clang/unittests/Format/NumericLiteralCaseTest.cpp new file mode 100644 index 0000000000000..ecd230d73f692 --- /dev/null +++ b/clang/unittests/Format/NumericLiteralCaseTest.cpp @@ -0,0 +1,346 @@ +//===- unittest/Format/NumericLiteralCaseTest.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatTestBase.h" + +#define DEBUG_TYPE "numeric-literal-case-test" + +namespace clang { +namespace format { +namespace test { +namespace { + +class NumericLiteralCaseTest : public FormatTestBase {}; + +TEST_F(NumericLiteralCaseTest, Prefix) { + constexpr StringRef Bin0("b = 0b0'10'010uL;"); + constexpr StringRef Bin1("b = 0B010'010Ul;"); + constexpr StringRef Hex0("b = 0xdead'BEEFuL;"); + constexpr StringRef Hex1("b = 0Xdead'BEEFUl;"); + verifyFormat(Bin0); + verifyFormat(Bin1); + verifyFormat(Hex0); + verifyFormat(Hex1); + + auto Style = getLLVMStyle(); + EXPECT_EQ(Style.NumericLiteralCase.Prefix, FormatStyle::NLCS_Leave); + EXPECT_EQ(Style.NumericLiteralCase.HexDigit, FormatStyle::NLCS_Leave); + EXPECT_EQ(Style.NumericLiteralCase.ExponentLetter, FormatStyle::NLCS_Leave); + EXPECT_EQ(Style.NumericLiteralCase.Suffix, FormatStyle::NLCS_Leave); + + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + verifyFormat("b = 0B0'10'010uL;", Bin0, Style); + verifyFormat(Bin1, Style); + verifyFormat("b = 0Xdead'BEEFuL;", Hex0, Style); + verifyFormat(Hex1, Style); + verifyFormat("i = 0XaBcD.a0Ebp123F;", Style); + verifyFormat("j = 0XaBcD.a0EbP123f;", Style); + + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + verifyFormat(Bin0, Style); + verifyFormat("b = 0b010'010Ul;", Bin1, Style); + verifyFormat(Hex0, Style); + verifyFormat("b = 0xdead'BEEFUl;", Hex1, Style); +} + +TEST_F(NumericLiteralCaseTest, HexDigit) { + constexpr StringRef A("a = 0xaBc0'123fuL;"); + constexpr StringRef B("b = 0XaBc0'123FUl;"); + constexpr StringRef C("c = 0xa'Bc.0p12'3f32;"); + constexpr StringRef D("d = 0xa'Bc.0P12'3F128;"); + constexpr StringRef E("e = 0b0011'00Ull;"); + constexpr StringRef F("f = 0B0100'000zu;"); + constexpr StringRef G("g = 0.123e-19f;"); + constexpr StringRef H("h = 0.12'3E-19F16;"); + constexpr StringRef I("i = 0x.0000aBcp12'3F128;"); + constexpr StringRef J("j = 0xaa1'fP12'3F128;"); + constexpr StringRef K("k = 0x0;"); + constexpr StringRef L("l = 0xA;"); + verifyFormat(A); + verifyFormat(B); + verifyFormat(C); + verifyFormat(D); + verifyFormat(E); + verifyFormat(F); + verifyFormat(G); + verifyFormat(H); + verifyFormat(I); + verifyFormat(J); + verifyFormat(K); + verifyFormat(L); + + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper; + verifyFormat("a = 0xABC0'123FuL;", A, Style); + verifyFormat("b = 0XABC0'123FUl;", B, Style); + verifyFormat("c = 0xA'BC.0p12'3f32;", C, Style); + verifyFormat("d = 0xA'BC.0P12'3F128;", D, Style); + verifyFormat(E, Style); + verifyFormat(F, Style); + verifyFormat(G, Style); + verifyFormat(H, Style); + verifyFormat("i = 0x.0000ABCp12'3F128;", I, Style); + verifyFormat("j = 0xAA1'FP12'3F128;", J, Style); + verifyFormat(K, Style); + verifyFormat(L, Style); + + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower; + verifyFormat("a = 0xabc0'123fuL;", A, Style); + verifyFormat("b = 0Xabc0'123fUl;", B, Style); + verifyFormat("c = 0xa'bc.0p12'3f32;", C, Style); + verifyFormat("d = 0xa'bc.0P12'3F128;", D, Style); + verifyFormat(E, Style); + verifyFormat(F, Style); + verifyFormat(G, Style); + verifyFormat(H, Style); + verifyFormat("i = 0x.0000abcp12'3F128;", I, Style); + verifyFormat("j = 0xaa1'fP12'3F128;", J, Style); + verifyFormat(K, Style); + verifyFormat("l = 0xa;", Style); +} + +TEST_F(NumericLiteralCaseTest, ExponentLetter) { + constexpr StringRef A("a = .0'01e-19f;"); + constexpr StringRef B("b = .00'1E2F;"); + constexpr StringRef C("c = 10'2.e99;"); + constexpr StringRef D("d = 123.456E-1;"); + constexpr StringRef E("e = 0x12abEe3.456p-10'0;"); + constexpr StringRef F("f = 0x.deEfP23;"); + constexpr StringRef G("g = 0xe0E1.p-1;"); + verifyFormat(A); + verifyFormat(B); + verifyFormat(C); + verifyFormat(D); + verifyFormat(E); + verifyFormat(F); + verifyFormat(G); + + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower; + verifyFormat(A, Style); + verifyFormat("b = .00'1e2F;", B, Style); + verifyFormat(C, Style); + verifyFormat("d = 123.456e-1;", D, Style); + verifyFormat(E, Style); + verifyFormat("f = 0x.deEfp23;", F, Style); + verifyFormat(G, Style); + + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper; + verifyFormat("a = .0'01E-19f;", A, Style); + verifyFormat(B, Style); + verifyFormat("c = 10'2.E99;", C, Style); + verifyFormat(D, Style); + verifyFormat("e = 0x12abEe3.456P-10'0;", E, Style); + verifyFormat(F, Style); + verifyFormat("g = 0xe0E1.P-1;", G, Style); +} + +TEST_F(NumericLiteralCaseTest, IntegerSuffix) { + constexpr StringRef A("a = 102u;"); + constexpr StringRef B("b = 0177U;"); + constexpr StringRef C("c = 0b101'111llU;"); + constexpr StringRef D("d = 0xdead'BeefuZ;"); + constexpr StringRef E("e = 3lU;"); + constexpr StringRef F("f = 1zu;"); + constexpr StringRef G("g = 0uLL;"); + constexpr StringRef H("h = 10'233'213'0101uLL;"); + verifyFormat(A); + verifyFormat(B); + verifyFormat(C); + verifyFormat(D); + verifyFormat(E); + verifyFormat(F); + verifyFormat(G); + verifyFormat(H); + + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + verifyFormat(A, Style); + verifyFormat("b = 0177u;", B, Style); + verifyFormat("c = 0b101'111llu;", C, Style); + verifyFormat("d = 0xdead'Beefuz;", D, Style); + verifyFormat("e = 3lu;", E, Style); + verifyFormat(F, Style); + verifyFormat("g = 0ull;", G, Style); + verifyFormat("h = 10'233'213'0101ull;", H, Style); + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + verifyFormat("a = 102U;", A, Style); + verifyFormat(B, Style); + verifyFormat("c = 0b101'111LLU;", C, Style); + verifyFormat("d = 0xdead'BeefUZ;", D, Style); + verifyFormat("e = 3LU;", E, Style); + verifyFormat("f = 1ZU;", F, Style); + verifyFormat("g = 0ULL;", G, Style); + verifyFormat("h = 10'233'213'0101ULL;", H, Style); +} + +TEST_F(NumericLiteralCaseTest, FloatingPointSuffix) { + auto Style = getLLVMStyle(); + // Floating point literals without suffixes. + constexpr std::array<StringRef, 6> FloatingPointStatements = { + "a = 0.", "b = 1.0", "c = .123'45E-10", + "d = 12'3.0e1", "e = 0Xa0eE.P10", "f = 0xeE01.aFf3p6", + }; + + // All legal floating-point literal suffixes defined in the C++23 standard in + // lowercase. + constexpr std::array<StringRef, 7> FloatingPointSuffixes = { + "f", "l", "f16", "f32", "f64", "f128", "bf16", + }; + + // Test all combinations of literals with suffixes. + for (const auto &Statement : FloatingPointStatements) { + for (const auto &Suffix : FloatingPointSuffixes) { + const auto LowerLine = Statement.str() + Suffix.str() + ";"; + const auto UpperLine = Statement.str() + Suffix.upper() + ";"; + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Leave; + verifyFormat(LowerLine, Style); + verifyFormat(UpperLine, Style); + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + verifyFormat(LowerLine, Style); + verifyFormat(LowerLine, UpperLine, Style); + + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + verifyFormat(UpperLine, LowerLine, Style); + verifyFormat(UpperLine, Style); + } + } +} + +TEST_F(NumericLiteralCaseTest, CppStandardAndUserDefinedLiteralsAreUntouched) { + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + + // C++ user-defined suffixes begin with '_' or are reserved for the standard + // library. + constexpr StringRef UDLiterals("a = 12.if;\n" + "b = -3i;\n" + "c = 100'01il;\n" + "d = 100'0.12il;\n" + "e = 12h;\n" + "f = 0XABE12h;\n" + "g = 0XFA03min;\n" + "h = 0X12B4Ds;\n" + "i = 20.13E-1ms;\n" + "j = 20.13E-1us;\n" + "k = 20.13E-1ns;\n" + "l = 20.13E-1y;\n" + "m = 20.13E-1d;\n" + "n = 20.13E-1d;\n" + "o = 1d;\n" + "p = 102_ffl_lzlz;\n" + "q = 10.2_l;\n" + "r = 0XABDE.0'1P-23_f;\n" + "s = 102_foo_bar;\n" + "t = 123.456_felfz_ballpen;\n" + "u = 0XBEAD1_spacebar;"); + + verifyFormat(UDLiterals, Style); + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + verifyFormat(UDLiterals, Style); +} + +TEST_F(NumericLiteralCaseTest, FixRanges) { + auto Style = getLLVMStyle(); + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + + constexpr StringRef CodeBlock("a = 0xFea3duLL;\n" + "b = 0X.aEbp-12f;\n" + "c = 0uLL;\n" + "// clang-format off\n" + "e = 0xBeAdu;\n" + "// clang-format on\n" + "g = 0xabCDu;\n" + "h = 0b010uL;\n" + "// clang-format off\n" + "i = 0B1010'000Zu;\n" + "// clang-format on\n" + "k = 0XaBuL;"); + + verifyFormat("a = 0xfea3dull;\n" + "b = 0x.aebp-12f;\n" + "c = 0ull;\n" + "// clang-format off\n" + "e = 0xBeAdu;\n" + "// clang-format on\n" + "g = 0xabcdu;\n" + "h = 0b010ul;\n" + "// clang-format off\n" + "i = 0B1010'000Zu;\n" + "// clang-format on\n" + "k = 0xabul;", + CodeBlock, Style); +} + +TEST_F(NumericLiteralCaseTest, UnderScoreSeparatorLanguages) { + auto Style = getLLVMStyle(); + + constexpr StringRef CodeBlock("a = 0xFea_3dl;\n" + "b = 0123_345;\n" + "c = 0b11____00lU;\n" + "d = 0XB_e_A_du;\n" + "e = 123_456.333__456e-10f;\n" + "f = .1_0E-10D;\n" + "g = 1_0.F;\n" + "h = 0B1_0;"); + auto TestUnderscore = [&](auto Language) { + Style.Language = Language; + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Upper; + verifyFormat("a = 0xFEA_3DL;\n" + "b = 0123_345;\n" + "c = 0b11____00LU;\n" + "d = 0xB_E_A_DU;\n" + "e = 123_456.333__456e-10F;\n" + "f = .1_0e-10D;\n" + "g = 1_0.F;\n" + "h = 0b1_0;", + CodeBlock, Style); + + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.HexDigit = FormatStyle::NLCS_Lower; + Style.NumericLiteralCase.ExponentLetter = FormatStyle::NLCS_Upper; + Style.NumericLiteralCase.Suffix = FormatStyle::NLCS_Lower; + + verifyFormat("a = 0Xfea_3dl;\n" + "b = 0123_345;\n" + "c = 0B11____00lu;\n" + "d = 0Xb_e_a_du;\n" + "e = 123_456.333__456E-10f;\n" + "f = .1_0E-10d;\n" + "g = 1_0.f;\n" + "h = 0B1_0;", + CodeBlock, Style); + }; + + TestUnderscore(FormatStyle::LK_CSharp); + TestUnderscore(FormatStyle::LK_Java); + TestUnderscore(FormatStyle::LK_JavaScript); + + Style.Language = FormatStyle::LK_JavaScript; + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Upper; + verifyFormat("o = 0O0_10_010;", "o = 0o0_10_010;", Style); + Style.NumericLiteralCase.Prefix = FormatStyle::NLCS_Lower; + verifyFormat("o = 0o0_10_010;", "o = 0O0_10_010;", Style); +} + +} // namespace +} // namespace test +} // namespace format +} // namespace clang >From 7803d3bf077456a16c3be7e3592a1796769b70a7 Mon Sep 17 00:00:00 2001 From: owenca <owenpi...@gmail.com> Date: Fri, 12 Sep 2025 01:08:38 -0700 Subject: [PATCH 2/2] Update clang/lib/Format/NumericLiteralCaseFixer.cpp --- clang/lib/Format/NumericLiteralCaseFixer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/NumericLiteralCaseFixer.cpp b/clang/lib/Format/NumericLiteralCaseFixer.cpp index 2bed6185a9d26..b58b3c7ee0189 100644 --- a/clang/lib/Format/NumericLiteralCaseFixer.cpp +++ b/clang/lib/Format/NumericLiteralCaseFixer.cpp @@ -61,7 +61,8 @@ transformComponent(StringRef Component, /// Matches against all suffixes reserved in the C++23 standard. static bool matchesReservedSuffix(StringRef Suffix) { static constexpr std::array<StringRef, 11> SortedReservedSuffixes = { - "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y"}; + "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y", + }; // This can be static_assert when we have access to constexpr is_sorted in // C++ 20. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits