https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/182814
>From b657a41ab08dc112f47b10cd231db236c457fe7d Mon Sep 17 00:00:00 2001 From: Fangrui Song <[email protected]> Date: Mon, 16 Mar 2026 18:26:28 -0700 Subject: [PATCH 1/2] [CodeGen] Extract helpers from accumulateBitFields. NFC The new bit-field access unit algorithm in #65742 has a complex structure. This patch extracts two lambdas, eliminating a goto and reducing nesting. Also fix an accumulateBitfields typo. --- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 86 +++++++++++---------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index e9205c68c2812..53b8733fbed64 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -46,7 +46,7 @@ namespace { /// i24. This isn't always possible because i24 has storage size of 32 bit /// and if it is possible to use that extra byte of padding we must use [i8 x /// 3] instead of i24. This is computed when accumulating bitfields in -/// accumulateBitfields. +/// accumulateBitFields. /// C++ examples that require clipping: /// struct { int a : 24; char b; }; // a must be clipped, b goes at offset 3 /// struct A { int a : 24; ~A(); }; // a must be clipped because: @@ -510,6 +510,48 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, // cached here. CharUnits ScissorOffset = CharUnits::Zero(); + auto getLimitOffset = [&]() -> CharUnits { + for (auto Probe = Field; Probe != FieldEnd; ++Probe) + if (!isEmptyFieldForLayout(Context, *Probe)) { + // A member with storage sets the limit. + assert((getFieldBitOffset(*Probe) % CharBits) == 0 && + "Next storage is not byte-aligned"); + return bitsToCharUnits(getFieldBitOffset(*Probe)); + } + // We reached the end of the fields, determine the bounds of useable + // tail padding. As this can be complex for C++, we cache the result. + if (ScissorOffset.isZero()) { + ScissorOffset = calculateTailClippingOffset(isNonVirtualBaseType); + assert(!ScissorOffset.isZero() && "Tail clipping at zero"); + } + return ScissorOffset; + }; + + auto emitAccessUnit = [&](CharUnits BeginOffset, CharUnits EndOffset, + bool Clipped, RecordDecl::field_iterator First, + RecordDecl::field_iterator Last) { + CharUnits AccessSize = EndOffset - BeginOffset; + if (AccessSize.isZero()) + return; + // Add the storage member for the access unit to the record. The + // bitfields get the offset of their storage but come afterward and + // remain there after a stable sort. + llvm::Type *Type; + if (Clipped) { + assert(getSize(getIntNType(Context.toBits(AccessSize))) > AccessSize && + "Clipped access need not be clipped"); + Type = getByteArrayType(AccessSize); + } else { + Type = getIntNType(Context.toBits(AccessSize)); + assert(getSize(Type) == AccessSize && "Unclipped access must be clipped"); + } + Members.push_back(StorageInfo(BeginOffset, Type)); + for (; First != Last; ++First) + if (!First->isZeroLengthBitField()) + Members.push_back( + MemberInfo(BeginOffset, MemberInfo::Field, nullptr, *First)); + }; + // Data about the start of the span we're accumulating to create an access // unit from. Begin is the first bitfield of the span. If Begin is FieldEnd, // we've not got a current span. The span starts at the BeginOffset character @@ -632,24 +674,7 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, // the current span is. That's either the offset of the next field // with storage (which might be Field itself) or the end of the // non-reusable tail padding. - CharUnits LimitOffset; - for (auto Probe = Field; Probe != FieldEnd; ++Probe) - if (!isEmptyFieldForLayout(Context, *Probe)) { - // A member with storage sets the limit. - assert((getFieldBitOffset(*Probe) % CharBits) == 0 && - "Next storage is not byte-aligned"); - LimitOffset = bitsToCharUnits(getFieldBitOffset(*Probe)); - goto FoundLimit; - } - // We reached the end of the fields, determine the bounds of useable - // tail padding. As this can be complex for C++, we cache the result. - if (ScissorOffset.isZero()) { - ScissorOffset = calculateTailClippingOffset(isNonVirtualBaseType); - assert(!ScissorOffset.isZero() && "Tail clipping at zero"); - } - - LimitOffset = ScissorOffset; - FoundLimit:; + CharUnits LimitOffset = getLimitOffset(); CharUnits TypeSize = getSize(Type); if (BeginOffset + TypeSize <= LimitOffset) { @@ -679,28 +704,7 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, assert((Field == FieldEnd || !Field->isBitField() || (getFieldBitOffset(*Field) % CharBits) == 0) && "Installing but not at an aligned bitfield or limit"); - CharUnits AccessSize = BestEndOffset - BeginOffset; - if (!AccessSize.isZero()) { - // Add the storage member for the access unit to the record. The - // bitfields get the offset of their storage but come afterward and - // remain there after a stable sort. - llvm::Type *Type; - if (BestClipped) { - assert(getSize(getIntNType(Context.toBits(AccessSize))) > - AccessSize && - "Clipped access need not be clipped"); - Type = getByteArrayType(AccessSize); - } else { - Type = getIntNType(Context.toBits(AccessSize)); - assert(getSize(Type) == AccessSize && - "Unclipped access must be clipped"); - } - Members.push_back(StorageInfo(BeginOffset, Type)); - for (; Begin != BestEnd; ++Begin) - if (!Begin->isZeroLengthBitField()) - Members.push_back( - MemberInfo(BeginOffset, MemberInfo::Field, nullptr, *Begin)); - } + emitAccessUnit(BeginOffset, BestEndOffset, BestClipped, Begin, BestEnd); // Reset to start a new span. Field = BestEnd; Begin = FieldEnd; >From 83bcb90adbce74ad99a454f0620534fed640777e Mon Sep 17 00:00:00 2001 From: Fangrui Song <[email protected]> Date: Sun, 22 Feb 2026 21:59:04 -0800 Subject: [PATCH 2/2] [CodeGen] Refactor accumulateBitFields into two passes The current accumulateBitFields in clang/lib/CodeGen/CGRecordLayoutBuilder.cpp implements a "spans then merge" algorithm in a single interleaved loop. The loop is hard to follow with 6 levels of nesting and a goto. Split the single interleaved loop into two passes matching the conceptual algorithm: - Pass 1 walks bitfields and groups them into spans (maximal runs sharing mid-byte boundaries) - Pass 2 greedily merges spans into access units subject to register-width, alignment, and padding constraints. This eliminates 6 levels of nesting, the goto, and 5 separate `InstallBest` sites while preserving identical behavior. Drafted with Claude Code with significant manual edits. The agent context includes the code, bit-field-related tests, and https://maskray.me/blog/2026-02-22-bit-field-layout I spent a full day on this blog post and the patch. --- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 298 +++++++++----------- 1 file changed, 137 insertions(+), 161 deletions(-) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 53b8733fbed64..6bdc288e8fc85 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -502,22 +502,86 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, // *) it would cross a character-aigned zero-width bitfield, or // *) fine-grained bitfield access option is in effect. - CharUnits RegSize = - bitsToCharUnits(Context.getTargetInfo().getRegisterWidth()); unsigned CharBits = Context.getCharWidth(); + // ---- Pass 1: Walk the fields, grouping bitfields into "spans". + // A span is a maximal run of bitfields where each successive field starts + // mid-byte (sharing a byte with the previous one). A new span starts + // whenever a field begins at a byte boundary. Zero-width bitfields and + // non-bitfields act as barriers that prevent merging across spans. + struct BitFieldSpan { + RecordDecl::field_iterator Begin; + RecordDecl::field_iterator End; + // Byte offset of the span's first byte. + CharUnits Offset; + // Size in bits of the span. + uint64_t Size; + // True if a zero-width bitfield or non-bitfield follows this span (prevents + // merging with the next span). + bool Barrier; + }; + SmallVector<BitFieldSpan, 4> Spans; + { + RecordDecl::field_iterator SpanBegin = FieldEnd; + CharUnits SpanOffset; + uint64_t SpanSize = 0; + auto closeSpan = [&](bool IsBarrier) { + if (SpanBegin == FieldEnd) + return; + if (SpanSize) + Spans.push_back({SpanBegin, Field, SpanOffset, SpanSize, IsBarrier}); + SpanBegin = FieldEnd; + SpanSize = 0; + }; + + for (; Field != FieldEnd && Field->isBitField(); ++Field) { + uint64_t BitOffset = getFieldBitOffset(*Field); + if (SpanBegin == FieldEnd) { + // Beginning a new span. + assert((BitOffset % CharBits) == 0 && "Not at start of char"); + SpanBegin = Field; + SpanOffset = bitsToCharUnits(BitOffset); + SpanSize = 0; + } else if (BitOffset % CharBits != 0) { + // Bitfield occupies the same character as previous bitfield, it must + // be part of the same span. This can include zero-length bitfields, + // should the target not align them to character boundaries. Such + // non-alignment is at variance with the standards, which require + // zero-length bitfields be a barrier between access units. But of + // course we can't achieve that in the middle of a character. + assert(BitOffset == Context.toBits(SpanOffset) + SpanSize && + "Concatenating non-contiguous bitfields"); + } else { + // Bitfield potentially begins a new span. This includes + // zero-length bitfields on non-aligning targets that lie at character + // boundaries (those are barriers to merging). + bool IsBarrier = Field->isZeroLengthBitField(); + closeSpan(IsBarrier); + SpanBegin = Field; + SpanOffset = bitsToCharUnits(BitOffset); + SpanSize = 0; + } + + SpanSize += Field->getBitWidthValue(); + } + + // Close the final span. If we stopped because we hit a non-bitfield (or + // FieldEnd), that's a barrier. + closeSpan(/*IsBarrier=*/true); + } + // Limit of useable tail padding at end of the record. Computed lazily and // cached here. CharUnits ScissorOffset = CharUnits::Zero(); - - auto getLimitOffset = [&]() -> CharUnits { - for (auto Probe = Field; Probe != FieldEnd; ++Probe) + auto getLimitOffset = [&](const BitFieldSpan &Span) -> CharUnits { + for (auto Probe = Span.End; Probe != FieldEnd; ++Probe) { if (!isEmptyFieldForLayout(Context, *Probe)) { // A member with storage sets the limit. assert((getFieldBitOffset(*Probe) % CharBits) == 0 && "Next storage is not byte-aligned"); return bitsToCharUnits(getFieldBitOffset(*Probe)); } + } // We reached the end of the fields, determine the bounds of useable // tail padding. As this can be complex for C++, we cache the result. if (ScissorOffset.isZero()) { @@ -552,170 +616,82 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, MemberInfo(BeginOffset, MemberInfo::Field, nullptr, *First)); }; - // Data about the start of the span we're accumulating to create an access - // unit from. Begin is the first bitfield of the span. If Begin is FieldEnd, - // we've not got a current span. The span starts at the BeginOffset character - // boundary. BitSizeSinceBegin is the size (in bits) of the span -- this might - // include padding when we've advanced to a subsequent bitfield run. - RecordDecl::field_iterator Begin = FieldEnd; - CharUnits BeginOffset; - uint64_t BitSizeSinceBegin; - - // The (non-inclusive) end of the largest acceptable access unit we've found - // since Begin. If this is Begin, we're gathering the initial set of bitfields - // of a new span. BestEndOffset is the end of that acceptable access unit -- - // it might extend beyond the last character of the bitfield run, using - // available padding characters. - RecordDecl::field_iterator BestEnd = Begin; - CharUnits BestEndOffset; - bool BestClipped; // Whether the representation must be in a byte array. - - for (;;) { - // AtAlignedBoundary is true iff Field is the (potential) start of a new - // span (or the end of the bitfields). When true, LimitOffset is the - // character offset of that span and Barrier indicates whether the new - // span cannot be merged into the current one. - bool AtAlignedBoundary = false; - bool Barrier = false; - - if (Field != FieldEnd && Field->isBitField()) { - uint64_t BitOffset = getFieldBitOffset(*Field); - if (Begin == FieldEnd) { - // Beginning a new span. - Begin = Field; - BestEnd = Begin; - - assert((BitOffset % CharBits) == 0 && "Not at start of char"); - BeginOffset = bitsToCharUnits(BitOffset); - BitSizeSinceBegin = 0; - } else if ((BitOffset % CharBits) != 0) { - // Bitfield occupies the same character as previous bitfield, it must be - // part of the same span. This can include zero-length bitfields, should - // the target not align them to character boundaries. Such non-alignment - // is at variance with the standards, which require zero-length - // bitfields be a barrier between access units. But of course we can't - // achieve that in the middle of a character. - assert(BitOffset == Context.toBits(BeginOffset) + BitSizeSinceBegin && - "Concatenating non-contiguous bitfields"); - } else { - // Bitfield potentially begins a new span. This includes zero-length - // bitfields on non-aligning targets that lie at character boundaries - // (those are barriers to merging). - if (Field->isZeroLengthBitField()) - Barrier = true; - AtAlignedBoundary = true; - } - } else { - // We've reached the end of the bitfield run. Either we're done, or this - // is a barrier for the current span. - if (Begin == FieldEnd) + // ---- Pass 2: Merge spans and emit access units. + // For each span, try to widen the access unit by incorporating subsequent + // non-barrier spans, subject to register-width, alignment, and + // available-padding constraints. Note that this requires lookahead to one or + // more subsequent spans. For instance, consider a 2-byte span followed by + // 2 1-byte spans. We can merge that into a 4-byte access-unit, but we would + // not want to merge a 2-byte followed by a single 1-byte (and no available + // tail padding). Emit the best access unit found and advance past its spans. + const CharUnits RegSize = + bitsToCharUnits(Context.getTargetInfo().getRegisterWidth()); + for (unsigned I = 0, Size = Spans.size(); I != Size;) { + CharUnits BeginOffset = Spans[I].Offset; + // The (non-inclusive) end of the largest acceptable access unit we've found + // since I. BestEndOffset is the end of that access unit -- it might extend + // beyond the last character of the span, using available padding + // characters. + unsigned BestEnd = I; + CharUnits BestEndOffset; + // Whether the access unit must use a byte array [N x i8] instead of iN. + bool BestClipped = true; + for (unsigned J = I; J != Size; ++J) { + // Compute the byte-rounded access size from BeginOffset through span J. + CharUnits AccessSize = + bitsToCharUnits(Context.toBits(Spans[J].Offset - BeginOffset) + + Spans[J].Size + CharBits - 1); + // Accumulating this span would create a multi-register access unit, + // which would increase register pressure. + if (J > I && AccessSize > RegSize) break; - Barrier = true; - AtAlignedBoundary = true; - } + llvm::Type *Type = getIntNType(Context.toBits(AccessSize)); + if (!Context.getTargetInfo().hasCheapUnalignedBitFieldAccess()) { + // Unaligned accesses are expensive. Only accumulate if the new unit + // is naturally aligned. Otherwise install the best we have, which is + // either the initial access unit (can't do better), or a naturally + // aligned accumulation (since we would have already installed it if + // it wasn't naturally aligned). + CharUnits Align = getAlignment(Type); + if (Align > Layout.getAlignment() || // alignment exceeds the struct's + !BeginOffset.isMultipleOf(Align)) { // not naturally aligned + // However, if this is the very first span (J == I), we still need to + // record it as the best. Check whether it needs clipping. + if (J == I) { + BestEnd = J + 1; + BestEndOffset = BeginOffset + AccessSize; + BestClipped = (getSize(Type) != AccessSize); + } + break; + } + } - // InstallBest indicates whether we should create an access unit for the - // current best span: fields [Begin, BestEnd) occupying characters - // [BeginOffset, BestEndOffset). - bool InstallBest = false; - if (AtAlignedBoundary) { - // Field is the start of a new span or the end of the bitfields. The - // just-seen span now extends to BitSizeSinceBegin. - - // Determine if we can accumulate that just-seen span into the current - // accumulation. - CharUnits AccessSize = bitsToCharUnits(BitSizeSinceBegin + CharBits - 1); - if (BestEnd == Begin) { - // This is the initial run at the start of a new span. By definition, - // this is the best seen so far. - BestEnd = Field; + CharUnits TypeSize = getSize(Type); + if (BeginOffset + TypeSize <= getLimitOffset(Spans[J])) { + // There is space before LimitOffset to create a naturally-sized + // access unit. + BestEnd = J + 1; + BestEndOffset = BeginOffset + TypeSize; + BestClipped = false; + } else if (J == I) { + // First span doesn't fit naturally — record clipped. + BestEnd = J + 1; BestEndOffset = BeginOffset + AccessSize; - // Assume clipped until proven not below. BestClipped = true; - if (!BitSizeSinceBegin) - // A zero-sized initial span -- this will install nothing and reset - // for another. - InstallBest = true; - } else if (AccessSize > RegSize) - // Accumulating the just-seen span would create a multi-register access - // unit, which would increase register pressure. - InstallBest = true; - - if (!InstallBest) { - // Determine if accumulating the just-seen span will create an expensive - // access unit or not. - llvm::Type *Type = getIntNType(Context.toBits(AccessSize)); - if (!Context.getTargetInfo().hasCheapUnalignedBitFieldAccess()) { - // Unaligned accesses are expensive. Only accumulate if the new unit - // is naturally aligned. Otherwise install the best we have, which is - // either the initial access unit (can't do better), or a naturally - // aligned accumulation (since we would have already installed it if - // it wasn't naturally aligned). - CharUnits Align = getAlignment(Type); - if (Align > Layout.getAlignment()) - // The alignment required is greater than the containing structure - // itself. - InstallBest = true; - else if (!BeginOffset.isMultipleOf(Align)) - // The access unit is not at a naturally aligned offset within the - // structure. - InstallBest = true; - - if (InstallBest && BestEnd == Field) - // We're installing the first span, whose clipping was presumed - // above. Compute it correctly. - if (getSize(Type) == AccessSize) - BestClipped = false; - } - - if (!InstallBest) { - // Find the next used storage offset to determine what the limit of - // the current span is. That's either the offset of the next field - // with storage (which might be Field itself) or the end of the - // non-reusable tail padding. - CharUnits LimitOffset = getLimitOffset(); - - CharUnits TypeSize = getSize(Type); - if (BeginOffset + TypeSize <= LimitOffset) { - // There is space before LimitOffset to create a naturally-sized - // access unit. - BestEndOffset = BeginOffset + TypeSize; - BestEnd = Field; - BestClipped = false; - } - - if (Barrier) - // The next field is a barrier that we cannot merge across. - InstallBest = true; - else if (Types.getCodeGenOpts().FineGrainedBitfieldAccesses) - // Fine-grained access, so no merging of spans. - InstallBest = true; - else - // Otherwise, we're not installing. Update the bit size - // of the current span to go all the way to LimitOffset, which is - // the (aligned) offset of next bitfield to consider. - BitSizeSinceBegin = Context.toBits(LimitOffset - BeginOffset); - } } - } - if (InstallBest) { - assert((Field == FieldEnd || !Field->isBitField() || - (getFieldBitOffset(*Field) % CharBits) == 0) && - "Installing but not at an aligned bitfield or limit"); - emitAccessUnit(BeginOffset, BestEndOffset, BestClipped, Begin, BestEnd); - // Reset to start a new span. - Field = BestEnd; - Begin = FieldEnd; - } else { - assert(Field != FieldEnd && Field->isBitField() && - "Accumulating past end of bitfields"); - assert(!Barrier && "Accumulating across barrier"); - // Accumulate this bitfield into the current (potential) span. - BitSizeSinceBegin += Field->getBitWidthValue(); - ++Field; + // The accumulation is also prevented when FineGrainedBitfieldAccesses is + // in effect. + if (Spans[J].Barrier || + Types.getCodeGenOpts().FineGrainedBitfieldAccesses) + break; } + + assert(BestEnd > I && "Span must always produce a best"); + emitAccessUnit(BeginOffset, BestEndOffset, BestClipped, Spans[I].Begin, + Spans[BestEnd - 1].End); + I = BestEnd; } return Field; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
