[clang] [clang-format] Add an option to format integer and float literal case (PR #151590)

via cfe-commits Thu, 28 Aug 2025 21:11:59 -0700

================
@@ -0,0 +1,176 @@
+//===--- NumericLiteralCaseFixer.cpp ----------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements NumericLiteralCaseFixer that standardizes character
+/// case within numeric literals.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NumericLiteralCaseFixer.h"
+#include "NumericLiteralInfo.h"
+
+#include "llvm/ADT/StringExtras.h"
+
+#include <algorithm>
+
+namespace clang {
+namespace format {
+
+static bool isNumericLiteralCaseFixerNeeded(const FormatStyle &Style) {
+  // Check if language is supported.
+  switch (Style.Language) {
+  case FormatStyle::LK_C:
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+  case FormatStyle::LK_CSharp:
+  case FormatStyle::LK_Java:
+  case FormatStyle::LK_JavaScript:
+    break;
+  default:
+    return false;
+  }
+
+  // Check if style options are set.
+  const auto &Option = Style.NumericLiteralCase;
+  const auto Leave = FormatStyle::NLCS_Leave;
+  return Option.Prefix != Leave || Option.HexDigit != Leave ||
+         Option.ExponentLetter != Leave || Option.Suffix != Leave;
+}
+
+static std::string
+transformComponent(StringRef Component,
+                   FormatStyle::NumericLiteralComponentStyle ConfigValue) {
+  switch (ConfigValue) {
+  case FormatStyle::NLCS_Upper:
+    return Component.upper();
+  case FormatStyle::NLCS_Lower:
+    return Component.lower();
+  case FormatStyle::NLCS_Leave:
+  default:
+    return Component.str();
+  }
+}
+
+/// Test if Suffix matches a C++ literal reserved by the library.
+/// Matches against all suffixes reserved in the C++23 standard.
+static bool matchesReservedSuffix(StringRef Suffix) {
+  static constexpr std::array<StringRef, 11> SortedReservedSuffixes = {
+      "d", "h", "i", "if", "il", "min", "ms", "ns", "s", "us", "y"};
+
+  // This can be static_assert when we have access to constexpr is_sorted in
+  // C++ 20.
+  assert(llvm::is_sorted(SortedReservedSuffixes) &&
+         "Must be sorted as precondition for lower_bound().");
+
+  auto entry = llvm::lower_bound(SortedReservedSuffixes, Suffix);
+  if (entry == SortedReservedSuffixes.cend())
+    return false;
+  return *entry == Suffix;
+}
+
+static std::string format(StringRef NumericLiteral, const FormatStyle &Style) {
+  const char Separator = Style.isCpp() ? '\'' : '_';
+  const NumericLiteralInfo Info(NumericLiteral, Separator);
+  const bool HasBaseLetter = Info.BaseLetterPos != llvm::StringRef::npos;
+  const bool HasExponent = Info.ExponentLetterPos != llvm::StringRef::npos;
+  const bool HasSuffix = Info.SuffixPos != llvm::StringRef::npos;
+
+  std::string Formatted;
+
+  if (HasBaseLetter) {
+    Formatted +=
+        transformComponent(NumericLiteral.take_front(1 + Info.BaseLetterPos),
+                           Style.NumericLiteralCase.Prefix);
+  }
+  // Reformat this slice as HexDigit whether or not the digit has hexadecimal
+  // characters because binary/decimal/octal digits are unchanged.
+  Formatted += transformComponent(
+      NumericLiteral.slice(HasBaseLetter ? 1 + Info.BaseLetterPos : 0,
+                           HasExponent ? Info.ExponentLetterPos
+                           : HasSuffix ? Info.SuffixPos
+                                       : NumericLiteral.size()),
+      Style.NumericLiteralCase.HexDigit);
+
+  if (HasExponent) {
+    Formatted += transformComponent(
+        NumericLiteral.slice(Info.ExponentLetterPos,
+                             HasSuffix ? Info.SuffixPos
+                                       : NumericLiteral.size()),
+        Style.NumericLiteralCase.ExponentLetter);
+  }
+
+  if (HasSuffix) {
+    StringRef Suffix = NumericLiteral.drop_front(Info.SuffixPos);
+    if (matchesReservedSuffix(Suffix) || Suffix.front() == '_') {
+      // In C++, it is idiomatic, but NOT standardized to define user-defined
+      // literals with a leading '_'. Omit user defined literals and standard
+      // reserved suffixes from transformation.
+      Formatted += Suffix.str();
+    } else {
+      Formatted += transformComponent(Suffix, Style.NumericLiteralCase.Suffix);
+    }
+  }
+
+  return Formatted;
+}
+
+std::pair<tooling::Replacements, unsigned>
+NumericLiteralCaseFixer::process(const Environment &Env,
+                                 const FormatStyle &Style) {
+  if (!isNumericLiteralCaseFixerNeeded(Style))
+    return {};
+
+  const auto &SourceMgr = Env.getSourceManager();
+  AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
+
+  const auto ID = Env.getFileID();
+  const auto LangOpts = getFormattingLangOpts(Style);
+  Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+  Lex.SetCommentRetentionState(true);
+
+  Token Tok;
+  tooling::Replacements Result;
+
+  for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
+    // Skip tokens that are too small to contain a formattable literal.
+    // Size=2 is the smallest possible literal that could contain formattable
+    // components, for example "1u".
+    auto Length = Tok.getLength();
+    if (Length < 2)
+      continue;
+
+    // Service clang-format off/on comments.
+    auto Location = Tok.getLocation();
+    auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
+    if (Tok.is(tok::comment)) {
+      if (isClangFormatOff(Text))
+        Skip = true;
+      else if (isClangFormatOn(Text))
+        Skip = false;
+      continue;
+    }
+
+    if (Skip || Tok.isNot(tok::numeric_constant) ||
+        !AffectedRangeMgr.affectsCharSourceRange(
+            CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
+      continue;
+    }
+
+    const auto Formatted = format(Text, Style);
----------------
owenca wrote:


```suggestion
    const auto &Formatted = format(Text, Style);
```

https://github.com/llvm/llvm-project/pull/151590
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-format] Add an option to format integer and float literal case (PR #151590)

Reply via email to