Author: Qiongsi Wu Date: 2026-06-11T08:27:47-07:00 New Revision: 6d4fd946e4486f13d6b4ea1cc54d2aae5ee2f03d
URL: https://github.com/llvm/llvm-project/commit/6d4fd946e4486f13d6b4ea1cc54d2aae5ee2f03d DIFF: https://github.com/llvm/llvm-project/commit/6d4fd946e4486f13d6b4ea1cc54d2aae5ee2f03d.diff LOG: [clang] Reland Adding an Atomic Line Logger (#195885) (#202428) This PR relands #195885. The original PR caused a Windows [build failure](https://github.com/llvm/llvm-project/pull/195885#issuecomment-4636690409) because of differences in const capture support in MSVC and `clang`. The unit test is fixed by capturing everything. *********** This PR adds an atomic line logger to `clang`. Situations have arisen where `clang` performs multi-threaded tasks (such as dependency scanning), and race conditions may happen. Such race conditions are difficult to debug using either `lldb` or with `llvm::errs()`. This logger provides atomic logging per line to a file on disk with time stamps at each line to facilitate such investigations. Specifically, the logger is designed with the following properties: 1. Each line is atomically written to the backing file. This avoids concurrent writes making the output text interleaving. 2. Each line is prefixed with a timestamp, a process ID and a thread ID. 3. `LogLine` implements a `<<` operator to allow arbitrary printable types to be piped into it. 4. The `LogLine`'s user does not need to check if it is setup or valid. A LogLine is always valid and can always accept input from `<<`. It becomes a no-op if the `LogLine` object is returned from a default constructed `AtomicLineLogger`. 5. The write happens when a `LogLine` object goes out of scope. The logger is inspired by the [OnDiskCASLogger](https://github.com/llvm/llvm-project/blob/09abee845d2136630fc3f50524148daa55a740a8/llvm/include/llvm/CAS/OnDiskCASLogger.h#L33). A followup PR https://github.com/llvm/llvm-project/pull/195896 wires up this logger to clang's dependency scanning stack. Assisted-by: claude-opus-4.6 rdar://39907408 Added: clang/include/clang/Basic/AtomicLineLogger.h clang/lib/Basic/AtomicLineLogger.cpp clang/unittests/Basic/AtomicLineLoggerTest.cpp Modified: clang/lib/Basic/CMakeLists.txt clang/unittests/Basic/CMakeLists.txt Removed: ################################################################################ diff --git a/clang/include/clang/Basic/AtomicLineLogger.h b/clang/include/clang/Basic/AtomicLineLogger.h new file mode 100644 index 0000000000000..4ede0bea98b50 --- /dev/null +++ b/clang/include/clang/Basic/AtomicLineLogger.h @@ -0,0 +1,67 @@ +//===- AtomicLineLogger.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Defines a logger where each line is written atomically to the file. It is +/// safe to share a logger instance across threads. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_ATOMICLINELOGGER_H +#define LLVM_CLANG_BASIC_ATOMICLINELOGGER_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> + +namespace clang { + +class LogLine { + SmallString<128> Buffer; + std::optional<llvm::raw_svector_ostream> FormattingOS; + raw_ostream *Dest = nullptr; + +public: + explicit LogLine(raw_ostream &Dest); + LogLine() {} + LogLine(LogLine &&Other); + LogLine(const LogLine &) = delete; + LogLine &operator=(const LogLine &) = delete; + LogLine &operator=(LogLine &&) = delete; + + ~LogLine() { + if (Dest) { + assert(FormattingOS && "Cannot have uninitialized FormattingOS"); + *FormattingOS << '\n'; + Dest->write(Buffer.data(), Buffer.size()); + } + } + + template <typename T> LogLine &operator<<(const T &Val) { + if (Dest) { + assert(FormattingOS && "Cannot have uninitialized FormattingOS"); + *FormattingOS << Val; + } + return *this; + } +}; + +class AtomicLineLogger { + std::unique_ptr<llvm::raw_fd_ostream> OS; + +public: + AtomicLineLogger() {} + AtomicLineLogger(StringRef LogFilePath); + + LogLine log(); +}; + +} // namespace clang + +#endif diff --git a/clang/lib/Basic/AtomicLineLogger.cpp b/clang/lib/Basic/AtomicLineLogger.cpp new file mode 100644 index 0000000000000..bee79e7c00b6f --- /dev/null +++ b/clang/lib/Basic/AtomicLineLogger.cpp @@ -0,0 +1,77 @@ +//===- AtomicLineLogger.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the implementation of an AtomicLineLogger and the relevant +// supporting classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/AtomicLineLogger.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Threading.h" +#ifdef __APPLE__ +#include <sys/time.h> +#endif + +using namespace clang; + +static uint64_t getTimestampMillis() { +#ifdef __APPLE__ + // Using chrono is roughly 50% slower. + struct timeval T; + gettimeofday(&T, 0); + return T.tv_sec * 1000 + T.tv_usec / 1000; +#else + auto Time = std::chrono::system_clock::now(); + auto Millis = std::chrono::duration_cast<std::chrono::milliseconds>( + Time.time_since_epoch()); + return Millis.count(); +#endif +} + +LogLine::LogLine(raw_ostream &Dest) : FormattingOS(Buffer), Dest(&Dest) { + auto Millis = getTimestampMillis(); + assert(FormattingOS && "Cannot have unintialized FormattingOS"); + *FormattingOS << llvm::format("[%lld.%0.3lld]", Millis / 1000, Millis % 1000); + *FormattingOS << ' ' << llvm::sys::Process::getProcessId() << ' ' + << llvm::get_threadid() << ": "; +} + +LogLine::LogLine(LogLine &&Other) + : Buffer(std::move(Other.Buffer)), Dest(Other.Dest) { + if (Dest) + FormattingOS.emplace(Buffer); + Other.Dest = nullptr; +} + +AtomicLineLogger::AtomicLineLogger(StringRef LogFilePath) { + if (LogFilePath.empty()) + return; + + std::error_code EC; + OS = std::make_unique<llvm::raw_fd_ostream>( + LogFilePath, EC, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::FA_Write, + llvm::sys::fs::OF_Append); + if (EC) { + llvm::errs() << "warning: unable to open log file '" << LogFilePath + << "': " << EC.message() << "\n"; + OS.reset(); + return; + } + + // We need to set the OS to unbuffered, so LogLine's destructor can write + // a single line as an atomic operation. + OS->SetUnbuffered(); +} + +LogLine AtomicLineLogger::log() { + if (OS) + return LogLine(*OS); + return LogLine(); +} diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 350c77696bcc4..21851e79b9d15 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -57,6 +57,7 @@ endif() add_clang_library(clangBasic ASTSourceDescriptor.cpp Attributes.cpp + AtomicLineLogger.cpp Builtins.cpp CLWarnings.cpp CharInfo.cpp diff --git a/clang/unittests/Basic/AtomicLineLoggerTest.cpp b/clang/unittests/Basic/AtomicLineLoggerTest.cpp new file mode 100644 index 0000000000000..4c5d8d4b1e521 --- /dev/null +++ b/clang/unittests/Basic/AtomicLineLoggerTest.cpp @@ -0,0 +1,212 @@ +//===- unittests/Basic/AtomicLineLoggerTest.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/AtomicLineLogger.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Threading.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" +#include <thread> + +using namespace clang; + +TEST(LogLineTest, NoOpLogLineProducesNoOutput) { + LogLine() << "stream to empty line"; + // Check to make sure streaming into Empty does not lead to crashes. + EXPECT_TRUE(true); +} + +TEST(LogLineTest, MoveConstructor) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + + { + LogLine Original(OS); + LogLine Moved(std::move(Original)); + Moved << "after_move"; + } + + StringRef Output = OS.str(); + + // Only one line should be written (from Moved, not from Original). + EXPECT_EQ(Output.count('\n'), 1u); + EXPECT_TRUE(Output.contains("after_move")); +} + +TEST(LogLineTest, ActiveLogLinePIDTIDMsg) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + LogLine(OS) << "test_event: " << "some_file.pcm"; + + StringRef Output = OS.str(); + + // Ends with message + newline. + EXPECT_TRUE(Output.ends_with("test_event: some_file.pcm\n")); + + // Prefix has the form: "<timestamp> <pid> <tid>: " + // Verify PID matches this process. + std::string ExpectedPID = std::to_string(llvm::sys::Process::getProcessId()); + EXPECT_TRUE(Output.contains(ExpectedPID)); + + // Verify TID is present. + std::string ExpectedTID = std::to_string(llvm::get_threadid()); + EXPECT_TRUE(Output.contains(ExpectedTID)); +} + +TEST(LogLineTest, ActiveLogLineTimestamp) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + // Test that the timestamp generated is always sandwiched between Before + // and After to verify the correctness. + auto Before = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + LogLine(OS) << "test_event"; + + auto After = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + StringRef Output = OS.str(); + + // Extract timestamp from "[<seconds>.<millis>]" prefix. + ASSERT_TRUE(Output.starts_with("[")); + size_t CloseBracket = Output.find(']'); + ASSERT_NE(CloseBracket, StringRef::npos); + StringRef TimestampStr = Output.slice(1, CloseBracket); + + // Parse "<seconds>.<millis>". + auto [SecStr, MillisStr] = TimestampStr.split('.'); + uint64_t Seconds, Millis; + ASSERT_FALSE(SecStr.getAsInteger(10, Seconds)); + ASSERT_FALSE(MillisStr.getAsInteger(10, Millis)); + + uint64_t LoggedMillis = Seconds * 1000 + Millis; + EXPECT_GE(LoggedMillis, (uint64_t)Before); + EXPECT_LE(LoggedMillis, (uint64_t)After); +} + +TEST(AtomicLineLoggerTest, DisabledLoggerDoesNotCrash) { + AtomicLineLogger Logger; + Logger.log() << "this goes nowhere"; + + // An empty logger should not crash. + EXPECT_TRUE(true); +} + +TEST(AtomicLineLoggerTest, SingleLineWrittenToFile) { + // Create a temp directory and build a log file path inside it. + llvm::unittest::TempDir Dir("atomic-logger-test", /*Unique=*/true); + SmallString<128> LogPath(Dir.path()); + llvm::sys::path::append(LogPath, "test.log"); + + { + AtomicLineLogger Logger(LogPath); + Logger.log() << "pcm_write: module.pcm"; + } + // Logger destroyed here. Log file is written to disk. + + // Read the file back. + auto BufOrErr = llvm::MemoryBuffer::getFile(LogPath); + ASSERT_TRUE(BufOrErr) << "Failed to read log file"; + StringRef Content = (*BufOrErr)->getBuffer(); + + // Verify the message is present and the line ends with a newline. + EXPECT_TRUE(Content.contains("pcm_write: module.pcm")); + EXPECT_TRUE(Content.ends_with("\n")); + + // Verify there is exactly one line. + EXPECT_EQ(Content.count('\n'), 1u); +} + +TEST(AtomicLineLoggerTest, ConcurrentWritesProduceCompleteLines) { + llvm::unittest::TempDir Dir("atomic-logger-concurrent", /*Unique=*/true); + SmallString<128> LogPath(Dir.path()); + llvm::sys::path::append(LogPath, "concurrent.log"); + + // Testing concurrent writing of the log file. + // Each logged message starts with the string `thread_`, and the message is + // always 32 characters long. + constexpr unsigned NumThreads = 8; + constexpr unsigned LinesPerThread = 100; + constexpr unsigned MessageLen = 32; + + { + AtomicLineLogger Logger(LogPath); + + std::vector<std::thread> Threads; + for (unsigned I = 0; I < NumThreads; ++I) { + Threads.emplace_back([&] { + for (unsigned J = 0; J < LinesPerThread; ++J) { + SmallString<64> Msg; + llvm::raw_svector_ostream MsgOS(Msg); + MsgOS << "thread_" << llvm::format("%02u", I) << "_line_" + << llvm::format("%03u", J); + // Pad to fixed width. + while (Msg.size() < MessageLen) + MsgOS << '_'; + Logger.log() << Msg; + } + }); + } + for (auto &T : Threads) + T.join(); + } + // Logger destroyed here. Log file is written to disk. + + auto BufOrErr = llvm::MemoryBuffer::getFile(LogPath); + ASSERT_TRUE(BufOrErr) << "Failed to read log file"; + StringRef Content = (*BufOrErr)->getBuffer(); + + SmallVector<StringRef> Lines; + Content.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + EXPECT_EQ(Lines.size(), (size_t)(NumThreads * LinesPerThread)); + + for (const auto &Line : Lines) { + // For each line, we check the separator, message length, message start and + // the prefix format to make sure no lines are interleved. + + // Split at ": " to separate prefix from message body. + auto [Prefix, Body] = Line.split(": "); + ASSERT_FALSE(Body.empty()) + << "Malformed line (no ': ' separator): " << Line.str(); + + // Message body checks. + EXPECT_EQ(Body.size(), (size_t)MessageLen) + << "Wrong message length (interleaving?): " << Line.str(); + EXPECT_TRUE(Body.starts_with("thread_")) + << "Corrupted message body: " << Line.str(); + + // Prefix format: "[<seconds>.<millis>] <pid> <tid>" + // Parse timestamp. + EXPECT_TRUE(Prefix.starts_with("[")) << "Missing '[': " << Prefix.str(); + size_t CloseBracket = Prefix.find(']'); + ASSERT_NE(CloseBracket, StringRef::npos) << "Missing ']': " << Prefix.str(); + + StringRef TimestampStr = Prefix.slice(1, CloseBracket); + auto [SecStr, MillisStr] = TimestampStr.split('.'); + uint64_t Seconds, Millis; + EXPECT_FALSE(SecStr.getAsInteger(10, Seconds)) + << "Bad seconds: " << SecStr.str(); + EXPECT_FALSE(MillisStr.getAsInteger(10, Millis)) + << "Bad millis: " << MillisStr.str(); + + // Parse PID and TID from the rest: " <pid> <tid>" + StringRef Rest = Prefix.substr(CloseBracket + 1).ltrim(); + auto [PidStr, TidStr] = Rest.split(' '); + uint64_t Pid, Tid; + EXPECT_FALSE(PidStr.getAsInteger(10, Pid)) << "Bad PID: " << PidStr.str(); + EXPECT_FALSE(TidStr.getAsInteger(10, Tid)) << "Bad TID: " << TidStr.str(); + + // PID should match this process. + EXPECT_EQ(Pid, (uint64_t)llvm::sys::Process::getProcessId()); + } +} diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt index 058243fd3fdba..32dce09866892 100644 --- a/clang/unittests/Basic/CMakeLists.txt +++ b/clang/unittests/Basic/CMakeLists.txt @@ -1,6 +1,7 @@ # Basic tests have few LLVM and Clang dependencies, so linking it as a # distinct target enables faster iteration times at low cost. add_distinct_clang_unittest(BasicTests + AtomicLineLoggerTest.cpp CharInfoTest.cpp DarwinSDKInfoTest.cpp DiagnosticTest.cpp _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
