Author: Qiongsi Wu
Date: 2026-06-11T08:27:47-07:00
New Revision: 6d4fd946e4486f13d6b4ea1cc54d2aae5ee2f03d

URL: 
https://github.com/llvm/llvm-project/commit/6d4fd946e4486f13d6b4ea1cc54d2aae5ee2f03d
DIFF: 
https://github.com/llvm/llvm-project/commit/6d4fd946e4486f13d6b4ea1cc54d2aae5ee2f03d.diff

LOG: [clang] Reland Adding an Atomic Line Logger (#195885) (#202428)

This PR relands #195885. The original PR caused a Windows [build
failure](https://github.com/llvm/llvm-project/pull/195885#issuecomment-4636690409)
because of differences in const capture support in MSVC and `clang`. The
unit test is fixed by capturing everything.

***********

This PR adds an atomic line logger to `clang`.

Situations have arisen where `clang` performs multi-threaded tasks (such
as dependency scanning), and race conditions may happen. Such race
conditions are difficult to debug using either `lldb` or with
`llvm::errs()`.

This logger provides atomic logging per line to a file on disk with time
stamps at each line to facilitate such investigations. Specifically, the
logger is designed with the following properties:

1. Each line is atomically written to the backing file. This avoids
concurrent writes making the output text interleaving.
2. Each line is prefixed with a timestamp, a process ID and a thread ID.
3. `LogLine` implements a `<<` operator to allow arbitrary printable
types to be piped into it.
4. The `LogLine`'s user does not need to check if it is setup or valid.
A LogLine is always valid and can always accept input from `<<`. It
becomes a no-op if the `LogLine` object is returned from a default
constructed `AtomicLineLogger`.
5. The write happens when a `LogLine` object goes out of scope.

The logger is inspired by the

[OnDiskCASLogger](https://github.com/llvm/llvm-project/blob/09abee845d2136630fc3f50524148daa55a740a8/llvm/include/llvm/CAS/OnDiskCASLogger.h#L33).
A followup PR https://github.com/llvm/llvm-project/pull/195896 wires up
this logger to clang's dependency scanning stack.

Assisted-by: claude-opus-4.6

rdar://39907408

Added: 
    clang/include/clang/Basic/AtomicLineLogger.h
    clang/lib/Basic/AtomicLineLogger.cpp
    clang/unittests/Basic/AtomicLineLoggerTest.cpp

Modified: 
    clang/lib/Basic/CMakeLists.txt
    clang/unittests/Basic/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/AtomicLineLogger.h 
b/clang/include/clang/Basic/AtomicLineLogger.h
new file mode 100644
index 0000000000000..4ede0bea98b50
--- /dev/null
+++ b/clang/include/clang/Basic/AtomicLineLogger.h
@@ -0,0 +1,67 @@
+//===- AtomicLineLogger.h ---------------------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines a logger where each line is written atomically to the file. It is
+/// safe to share a logger instance across threads.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_ATOMICLINELOGGER_H
+#define LLVM_CLANG_BASIC_ATOMICLINELOGGER_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+namespace clang {
+
+class LogLine {
+  SmallString<128> Buffer;
+  std::optional<llvm::raw_svector_ostream> FormattingOS;
+  raw_ostream *Dest = nullptr;
+
+public:
+  explicit LogLine(raw_ostream &Dest);
+  LogLine() {}
+  LogLine(LogLine &&Other);
+  LogLine(const LogLine &) = delete;
+  LogLine &operator=(const LogLine &) = delete;
+  LogLine &operator=(LogLine &&) = delete;
+
+  ~LogLine() {
+    if (Dest) {
+      assert(FormattingOS && "Cannot have uninitialized FormattingOS");
+      *FormattingOS << '\n';
+      Dest->write(Buffer.data(), Buffer.size());
+    }
+  }
+
+  template <typename T> LogLine &operator<<(const T &Val) {
+    if (Dest) {
+      assert(FormattingOS && "Cannot have uninitialized FormattingOS");
+      *FormattingOS << Val;
+    }
+    return *this;
+  }
+};
+
+class AtomicLineLogger {
+  std::unique_ptr<llvm::raw_fd_ostream> OS;
+
+public:
+  AtomicLineLogger() {}
+  AtomicLineLogger(StringRef LogFilePath);
+
+  LogLine log();
+};
+
+} // namespace clang
+
+#endif

diff  --git a/clang/lib/Basic/AtomicLineLogger.cpp 
b/clang/lib/Basic/AtomicLineLogger.cpp
new file mode 100644
index 0000000000000..bee79e7c00b6f
--- /dev/null
+++ b/clang/lib/Basic/AtomicLineLogger.cpp
@@ -0,0 +1,77 @@
+//===- AtomicLineLogger.cpp 
-----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the implementation of an AtomicLineLogger and the relevant
+// supporting classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/AtomicLineLogger.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+#ifdef __APPLE__
+#include <sys/time.h>
+#endif
+
+using namespace clang;
+
+static uint64_t getTimestampMillis() {
+#ifdef __APPLE__
+  // Using chrono is roughly 50% slower.
+  struct timeval T;
+  gettimeofday(&T, 0);
+  return T.tv_sec * 1000 + T.tv_usec / 1000;
+#else
+  auto Time = std::chrono::system_clock::now();
+  auto Millis = std::chrono::duration_cast<std::chrono::milliseconds>(
+      Time.time_since_epoch());
+  return Millis.count();
+#endif
+}
+
+LogLine::LogLine(raw_ostream &Dest) : FormattingOS(Buffer), Dest(&Dest) {
+  auto Millis = getTimestampMillis();
+  assert(FormattingOS && "Cannot have unintialized FormattingOS");
+  *FormattingOS << llvm::format("[%lld.%0.3lld]", Millis / 1000, Millis % 
1000);
+  *FormattingOS << ' ' << llvm::sys::Process::getProcessId() << ' '
+                << llvm::get_threadid() << ": ";
+}
+
+LogLine::LogLine(LogLine &&Other)
+    : Buffer(std::move(Other.Buffer)), Dest(Other.Dest) {
+  if (Dest)
+    FormattingOS.emplace(Buffer);
+  Other.Dest = nullptr;
+}
+
+AtomicLineLogger::AtomicLineLogger(StringRef LogFilePath) {
+  if (LogFilePath.empty())
+    return;
+
+  std::error_code EC;
+  OS = std::make_unique<llvm::raw_fd_ostream>(
+      LogFilePath, EC, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::FA_Write,
+      llvm::sys::fs::OF_Append);
+  if (EC) {
+    llvm::errs() << "warning: unable to open log file '" << LogFilePath
+                 << "': " << EC.message() << "\n";
+    OS.reset();
+    return;
+  }
+
+  // We need to set the OS to unbuffered, so LogLine's destructor can write
+  // a single line as an atomic operation.
+  OS->SetUnbuffered();
+}
+
+LogLine AtomicLineLogger::log() {
+  if (OS)
+    return LogLine(*OS);
+  return LogLine();
+}

diff  --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index 350c77696bcc4..21851e79b9d15 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -57,6 +57,7 @@ endif()
 add_clang_library(clangBasic
   ASTSourceDescriptor.cpp
   Attributes.cpp
+  AtomicLineLogger.cpp
   Builtins.cpp
   CLWarnings.cpp
   CharInfo.cpp

diff  --git a/clang/unittests/Basic/AtomicLineLoggerTest.cpp 
b/clang/unittests/Basic/AtomicLineLoggerTest.cpp
new file mode 100644
index 0000000000000..4c5d8d4b1e521
--- /dev/null
+++ b/clang/unittests/Basic/AtomicLineLoggerTest.cpp
@@ -0,0 +1,212 @@
+//===- unittests/Basic/AtomicLineLoggerTest.cpp 
---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/AtomicLineLogger.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+#include <thread>
+
+using namespace clang;
+
+TEST(LogLineTest, NoOpLogLineProducesNoOutput) {
+  LogLine() << "stream to empty line";
+  // Check to make sure streaming into Empty does not lead to crashes.
+  EXPECT_TRUE(true);
+}
+
+TEST(LogLineTest, MoveConstructor) {
+  SmallString<128> Buffer;
+  llvm::raw_svector_ostream OS(Buffer);
+
+  {
+    LogLine Original(OS);
+    LogLine Moved(std::move(Original));
+    Moved << "after_move";
+  }
+
+  StringRef Output = OS.str();
+
+  // Only one line should be written (from Moved, not from Original).
+  EXPECT_EQ(Output.count('\n'), 1u);
+  EXPECT_TRUE(Output.contains("after_move"));
+}
+
+TEST(LogLineTest, ActiveLogLinePIDTIDMsg) {
+  SmallString<128> Buffer;
+  llvm::raw_svector_ostream OS(Buffer);
+  LogLine(OS) << "test_event: " << "some_file.pcm";
+
+  StringRef Output = OS.str();
+
+  // Ends with message + newline.
+  EXPECT_TRUE(Output.ends_with("test_event: some_file.pcm\n"));
+
+  // Prefix has the form: "<timestamp> <pid> <tid>: "
+  // Verify PID matches this process.
+  std::string ExpectedPID = std::to_string(llvm::sys::Process::getProcessId());
+  EXPECT_TRUE(Output.contains(ExpectedPID));
+
+  // Verify TID is present.
+  std::string ExpectedTID = std::to_string(llvm::get_threadid());
+  EXPECT_TRUE(Output.contains(ExpectedTID));
+}
+
+TEST(LogLineTest, ActiveLogLineTimestamp) {
+  SmallString<128> Buffer;
+  llvm::raw_svector_ostream OS(Buffer);
+  // Test that the timestamp generated is always sandwiched between Before
+  // and After to verify the correctness.
+  auto Before = std::chrono::duration_cast<std::chrono::milliseconds>(
+                    std::chrono::system_clock::now().time_since_epoch())
+                    .count();
+
+  LogLine(OS) << "test_event";
+
+  auto After = std::chrono::duration_cast<std::chrono::milliseconds>(
+                   std::chrono::system_clock::now().time_since_epoch())
+                   .count();
+
+  StringRef Output = OS.str();
+
+  // Extract timestamp from "[<seconds>.<millis>]" prefix.
+  ASSERT_TRUE(Output.starts_with("["));
+  size_t CloseBracket = Output.find(']');
+  ASSERT_NE(CloseBracket, StringRef::npos);
+  StringRef TimestampStr = Output.slice(1, CloseBracket);
+
+  // Parse "<seconds>.<millis>".
+  auto [SecStr, MillisStr] = TimestampStr.split('.');
+  uint64_t Seconds, Millis;
+  ASSERT_FALSE(SecStr.getAsInteger(10, Seconds));
+  ASSERT_FALSE(MillisStr.getAsInteger(10, Millis));
+
+  uint64_t LoggedMillis = Seconds * 1000 + Millis;
+  EXPECT_GE(LoggedMillis, (uint64_t)Before);
+  EXPECT_LE(LoggedMillis, (uint64_t)After);
+}
+
+TEST(AtomicLineLoggerTest, DisabledLoggerDoesNotCrash) {
+  AtomicLineLogger Logger;
+  Logger.log() << "this goes nowhere";
+
+  // An empty logger should not crash.
+  EXPECT_TRUE(true);
+}
+
+TEST(AtomicLineLoggerTest, SingleLineWrittenToFile) {
+  // Create a temp directory and build a log file path inside it.
+  llvm::unittest::TempDir Dir("atomic-logger-test", /*Unique=*/true);
+  SmallString<128> LogPath(Dir.path());
+  llvm::sys::path::append(LogPath, "test.log");
+
+  {
+    AtomicLineLogger Logger(LogPath);
+    Logger.log() << "pcm_write: module.pcm";
+  }
+  // Logger destroyed here. Log file is written to disk.
+
+  // Read the file back.
+  auto BufOrErr = llvm::MemoryBuffer::getFile(LogPath);
+  ASSERT_TRUE(BufOrErr) << "Failed to read log file";
+  StringRef Content = (*BufOrErr)->getBuffer();
+
+  // Verify the message is present and the line ends with a newline.
+  EXPECT_TRUE(Content.contains("pcm_write: module.pcm"));
+  EXPECT_TRUE(Content.ends_with("\n"));
+
+  // Verify there is exactly one line.
+  EXPECT_EQ(Content.count('\n'), 1u);
+}
+
+TEST(AtomicLineLoggerTest, ConcurrentWritesProduceCompleteLines) {
+  llvm::unittest::TempDir Dir("atomic-logger-concurrent", /*Unique=*/true);
+  SmallString<128> LogPath(Dir.path());
+  llvm::sys::path::append(LogPath, "concurrent.log");
+
+  // Testing concurrent writing of the log file.
+  // Each logged message starts with the string `thread_`, and the message is
+  // always 32 characters long.
+  constexpr unsigned NumThreads = 8;
+  constexpr unsigned LinesPerThread = 100;
+  constexpr unsigned MessageLen = 32;
+
+  {
+    AtomicLineLogger Logger(LogPath);
+
+    std::vector<std::thread> Threads;
+    for (unsigned I = 0; I < NumThreads; ++I) {
+      Threads.emplace_back([&] {
+        for (unsigned J = 0; J < LinesPerThread; ++J) {
+          SmallString<64> Msg;
+          llvm::raw_svector_ostream MsgOS(Msg);
+          MsgOS << "thread_" << llvm::format("%02u", I) << "_line_"
+                << llvm::format("%03u", J);
+          // Pad to fixed width.
+          while (Msg.size() < MessageLen)
+            MsgOS << '_';
+          Logger.log() << Msg;
+        }
+      });
+    }
+    for (auto &T : Threads)
+      T.join();
+  }
+  // Logger destroyed here. Log file is written to disk.
+
+  auto BufOrErr = llvm::MemoryBuffer::getFile(LogPath);
+  ASSERT_TRUE(BufOrErr) << "Failed to read log file";
+  StringRef Content = (*BufOrErr)->getBuffer();
+
+  SmallVector<StringRef> Lines;
+  Content.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+
+  EXPECT_EQ(Lines.size(), (size_t)(NumThreads * LinesPerThread));
+
+  for (const auto &Line : Lines) {
+    // For each line, we check the separator, message length, message start and
+    // the prefix format to make sure no lines are interleved.
+
+    // Split at ": " to separate prefix from message body.
+    auto [Prefix, Body] = Line.split(": ");
+    ASSERT_FALSE(Body.empty())
+        << "Malformed line (no ': ' separator): " << Line.str();
+
+    // Message body checks.
+    EXPECT_EQ(Body.size(), (size_t)MessageLen)
+        << "Wrong message length (interleaving?): " << Line.str();
+    EXPECT_TRUE(Body.starts_with("thread_"))
+        << "Corrupted message body: " << Line.str();
+
+    // Prefix format: "[<seconds>.<millis>] <pid> <tid>"
+    // Parse timestamp.
+    EXPECT_TRUE(Prefix.starts_with("[")) << "Missing '[': " << Prefix.str();
+    size_t CloseBracket = Prefix.find(']');
+    ASSERT_NE(CloseBracket, StringRef::npos) << "Missing ']': " << 
Prefix.str();
+
+    StringRef TimestampStr = Prefix.slice(1, CloseBracket);
+    auto [SecStr, MillisStr] = TimestampStr.split('.');
+    uint64_t Seconds, Millis;
+    EXPECT_FALSE(SecStr.getAsInteger(10, Seconds))
+        << "Bad seconds: " << SecStr.str();
+    EXPECT_FALSE(MillisStr.getAsInteger(10, Millis))
+        << "Bad millis: " << MillisStr.str();
+
+    // Parse PID and TID from the rest: " <pid> <tid>"
+    StringRef Rest = Prefix.substr(CloseBracket + 1).ltrim();
+    auto [PidStr, TidStr] = Rest.split(' ');
+    uint64_t Pid, Tid;
+    EXPECT_FALSE(PidStr.getAsInteger(10, Pid)) << "Bad PID: " << PidStr.str();
+    EXPECT_FALSE(TidStr.getAsInteger(10, Tid)) << "Bad TID: " << TidStr.str();
+
+    // PID should match this process.
+    EXPECT_EQ(Pid, (uint64_t)llvm::sys::Process::getProcessId());
+  }
+}

diff  --git a/clang/unittests/Basic/CMakeLists.txt 
b/clang/unittests/Basic/CMakeLists.txt
index 058243fd3fdba..32dce09866892 100644
--- a/clang/unittests/Basic/CMakeLists.txt
+++ b/clang/unittests/Basic/CMakeLists.txt
@@ -1,6 +1,7 @@
 # Basic tests have few LLVM and Clang dependencies, so linking it as a
 # distinct target enables faster iteration times at low cost.
 add_distinct_clang_unittest(BasicTests
+  AtomicLineLoggerTest.cpp
   CharInfoTest.cpp
   DarwinSDKInfoTest.cpp
   DiagnosticTest.cpp


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to