Timm =?utf-8?q?Bäder?= <tbae...@redhat.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/116...@github.com>
https://github.com/tbaederr updated https://github.com/llvm/llvm-project/pull/116843 >From 441fd49350866110d458c247786d0f71096505ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Fri, 8 Nov 2024 14:37:56 +0100 Subject: [PATCH 1/2] [clang][bytecode] Handle bitcasts involving bitfields --- clang/lib/AST/ByteCode/BitcastBuffer.cpp | 88 ++++ clang/lib/AST/ByteCode/BitcastBuffer.h | 67 +++ clang/lib/AST/ByteCode/Boolean.h | 4 +- clang/lib/AST/ByteCode/Integral.h | 1 + .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 250 ++++------ clang/lib/AST/CMakeLists.txt | 1 + .../ByteCode/builtin-bit-cast-bitfields.cpp | 437 ++++++++++++++++++ clang/test/AST/ByteCode/builtin-bit-cast.cpp | 104 +---- .../unittests/AST/ByteCode/BitcastBuffer.cpp | 83 ++++ clang/unittests/AST/ByteCode/CMakeLists.txt | 1 + 10 files changed, 790 insertions(+), 246 deletions(-) create mode 100644 clang/lib/AST/ByteCode/BitcastBuffer.cpp create mode 100644 clang/lib/AST/ByteCode/BitcastBuffer.h create mode 100644 clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp create mode 100644 clang/unittests/AST/ByteCode/BitcastBuffer.cpp diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp new file mode 100644 index 00000000000000..093f2b2c224093 --- /dev/null +++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp @@ -0,0 +1,88 @@ +//===-------------------- Bitcastbuffer.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "BitcastBuffer.h" + +using namespace clang; +using namespace clang::interp; + +void BitcastBuffer::pushData(const std::byte *In, size_t BitOffset, + size_t BitWidth, Endian TargetEndianness) { + for (unsigned It = 0; It != BitWidth; ++It) { + bool BitValue = bitof(In, It); + if (!BitValue) + continue; + + unsigned DstBit; + if (TargetEndianness == Endian::Little) + DstBit = BitOffset + It; + else + DstBit = size() - BitOffset - BitWidth + It; + + unsigned DstByte = (DstBit / 8); + Data[DstByte] |= std::byte{1} << (DstBit % 8); + } +} + +std::unique_ptr<std::byte[]> +BitcastBuffer::copyBits(unsigned BitOffset, unsigned BitWidth, + unsigned FullBitWidth, Endian TargetEndianness) const { + assert(BitWidth <= FullBitWidth); + assert(fullByte(FullBitWidth)); + auto Out = std::make_unique<std::byte[]>(FullBitWidth / 8); + + for (unsigned It = 0; It != BitWidth; ++It) { + unsigned BitIndex; + if (TargetEndianness == Endian::Little) + BitIndex = BitOffset + It; + else + BitIndex = size() - BitWidth - BitOffset + It; + + bool BitValue = bitof(Data.get(), BitIndex); + if (!BitValue) + continue; + unsigned DstBit = It; + unsigned DstByte = (DstBit / 8); + Out[DstByte] |= std::byte{1} << (DstBit % 8); + } + + return Out; +} + +#if 0 + template<typename T> + static std::string hex(T t) { + std::stringstream stream; + stream << std::hex << (int)t; + return std::string(stream.str()); + } + + + void BitcastBuffer::dump(bool AsHex = true) const { + llvm::errs() << "LSB\n "; + unsigned LineLength = 0; + for (unsigned I = 0; I != (FinalBitSize / 8); ++I) { + std::byte B = Data[I]; + if (AsHex) { + std::stringstream stream; + stream << std::hex << (int)B; + llvm::errs() << stream.str(); + LineLength += stream.str().size() + 1; + } else { + llvm::errs() << std::bitset<8>((int)B).to_string(); + LineLength += 8 + 1; + // llvm::errs() << (int)B; + } + llvm::errs() << ' '; + } + llvm::errs() << '\n'; + + for (unsigned I = 0; I != LineLength; ++I) + llvm::errs() << ' '; + llvm::errs() << "MSB\n"; + } +#endif diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h new file mode 100644 index 00000000000000..0cc7d5909c47e4 --- /dev/null +++ b/clang/lib/AST/ByteCode/BitcastBuffer.h @@ -0,0 +1,67 @@ +//===--------------------- BitcastBuffer.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H +#define LLVM_CLANG_AST_INTERP_BITCAST_BUFFER_H + +#include <cassert> +#include <cstddef> +#include <memory> + +namespace clang { +namespace interp { + +enum class Endian { Little, Big }; + +/// Returns the value of the bit in the given sequence of bytes. +static inline bool bitof(const std::byte *B, unsigned BitIndex) { + return (B[BitIndex / 8] & (std::byte{1} << (BitIndex % 8))) != std::byte{0}; +} + +/// Returns whether \p N is a full byte offset or size. +static inline bool fullByte(unsigned N) { return N % 8 == 0; } + +/// Track what bits have been initialized to known values and which ones +/// have indeterminate value. +/// All offsets are in bits. +struct BitcastBuffer { + size_t FinalBitSize = 0; + std::unique_ptr<std::byte[]> Data; + + BitcastBuffer(size_t FinalBitSize) : FinalBitSize(FinalBitSize) { + assert(fullByte(FinalBitSize)); + unsigned ByteSize = FinalBitSize / 8; + Data = std::make_unique<std::byte[]>(ByteSize); + } + + /// Returns the buffer size in bits. + size_t size() const { return FinalBitSize; } + + /// Returns \c true if all bits in the buffer have been initialized. + bool allInitialized() const { + // FIXME: Implement. + return true; + } + + /// Push \p BitWidth bits at \p BitOffset from \p In into the buffer. + /// \p TargetEndianness is the endianness of the target we're compiling for. + /// \p In must hold at least \p BitWidth many bits. + void pushData(const std::byte *In, size_t BitOffset, size_t BitWidth, + Endian TargetEndianness); + + /// Copy \p BitWidth bits at offset \p BitOffset from the buffer. + /// \p TargetEndianness is the endianness of the target we're compiling for. + /// + /// The returned output holds exactly (\p FullBitWidth / 8) bytes. + std::unique_ptr<std::byte[]> copyBits(unsigned BitOffset, unsigned BitWidth, + unsigned FullBitWidth, + Endian TargetEndianness) const; +}; + +} // namespace interp +} // namespace clang +#endif diff --git a/clang/lib/AST/ByteCode/Boolean.h b/clang/lib/AST/ByteCode/Boolean.h index 78d75e75c7531a..8380e85865ac55 100644 --- a/clang/lib/AST/ByteCode/Boolean.h +++ b/clang/lib/AST/ByteCode/Boolean.h @@ -82,9 +82,7 @@ class Boolean final { Boolean truncate(unsigned TruncBits) const { return *this; } static Boolean bitcastFromMemory(const std::byte *Buff, unsigned BitWidth) { - // Boolean width is currently always 8 for all supported targets. If this - // changes we need to get the bool width from the target info. - assert(BitWidth == 8); + // Just load the first byte. bool Val = static_cast<bool>(*Buff); return Boolean(Val); } diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index ca3674263aef4f..bb1688a8a7622c 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -181,6 +181,7 @@ template <unsigned Bits, bool Signed> class Integral final { } Integral truncate(unsigned TruncBits) const { + assert(TruncBits >= 1); if (TruncBits >= Bits) return *this; const ReprT BitMask = (ReprT(1) << ReprT(TruncBits)) - 1; diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index b1230f92ddf1d4..775c121a8ca3ec 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include "InterpBuiltinBitCast.h" +#include "BitcastBuffer.h" #include "Boolean.h" #include "Context.h" #include "Floating.h" @@ -21,6 +22,16 @@ using namespace clang; using namespace clang::interp; +/// Implement __builtin_bit_cast and related operations. +/// Since our internal representation for data is more complex than +/// something we can simply memcpy or memcmp, we first bitcast all the data +/// into a buffer, which we then later use to copy the data into the target. + +// TODO: +// - Try to minimize heap allocations. +// - Optimize the common case of only pushing and pulling full +// bytes to/from the buffer. + /// Used to iterate over pointer fields. using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, size_t BitOffset, bool PackedBools)>; @@ -61,81 +72,12 @@ using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, } \ } while (0) -static bool bitof(std::byte B, unsigned BitIndex) { - return (B & (std::byte{1} << BitIndex)) != std::byte{0}; -} - static void swapBytes(std::byte *M, size_t N) { for (size_t I = 0; I != (N / 2); ++I) std::swap(M[I], M[N - 1 - I]); } -/// Track what bits have been initialized to known values and which ones -/// have indeterminate value. -/// All offsets are in bits. -struct BitcastBuffer { - size_t SizeInBits = 0; - llvm::SmallVector<std::byte> Data; - - BitcastBuffer() = default; - - size_t size() const { return SizeInBits; } - - const std::byte *data() const { return Data.data(); } - - std::byte *getBytes(unsigned BitOffset) const { - assert(BitOffset % 8 == 0); - assert(BitOffset < SizeInBits); - return const_cast<std::byte *>(data() + (BitOffset / 8)); - } - - bool allInitialized() const { - // FIXME: Implement. - return true; - } - - bool atByteBoundary() const { return (Data.size() * 8) == SizeInBits; } - - void pushBit(bool Value) { - if (atByteBoundary()) - Data.push_back(std::byte{0}); - - if (Value) - Data.back() |= (std::byte{1} << (SizeInBits % 8)); - ++SizeInBits; - } - - void pushData(const std::byte *data, size_t BitWidth, bool BigEndianTarget) { - bool OnlyFullBytes = BitWidth % 8 == 0; - unsigned NBytes = BitWidth / 8; - - size_t BitsHandled = 0; - // Read all full bytes first - for (size_t I = 0; I != NBytes; ++I) { - std::byte B = - BigEndianTarget ? data[NBytes - OnlyFullBytes - I] : data[I]; - for (unsigned X = 0; X != 8; ++X) { - pushBit(bitof(B, X)); - ++BitsHandled; - } - } - - if (BitsHandled == BitWidth) - return; - - // Rest of the bits. - assert((BitWidth - BitsHandled) < 8); - std::byte B = BigEndianTarget ? data[0] : data[NBytes]; - for (size_t I = 0, E = (BitWidth - BitsHandled); I != E; ++I) { - pushBit(bitof(B, I)); - ++BitsHandled; - } - - assert(BitsHandled == BitWidth); - } -}; - -/// We use this to recursively iterate over all fields and elemends of a pointer +/// We use this to recursively iterate over all fields and elements of a pointer /// and extract relevant data for a bitcast. static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, DataFunc F) { @@ -144,33 +86,30 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, // Primitives. if (FieldDesc->isPrimitive()) - return F(P, FieldDesc->getPrimType(), Offset, false); + return F(P, FieldDesc->getPrimType(), Offset, /*PackedBools=*/false); // Primitive arrays. if (FieldDesc->isPrimitiveArray()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); QualType ElemType = FieldDesc->getElemQualType(); size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); PrimType ElemT = *Ctx.classify(ElemType); // Special case, since the bools here are packed. bool PackedBools = FieldDesc->getType()->isExtVectorBoolType(); + unsigned NumElems = FieldDesc->getNumElems(); bool Ok = true; - for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { - unsigned Index = BigEndianTarget ? (FieldDesc->getNumElems() - 1 - I) : I; - Ok = Ok && F(P.atIndex(Index), ElemT, Offset, PackedBools); - Offset += ElemSizeInBits; + for (unsigned I = 0; I != NumElems; ++I) { + Ok = Ok && F(P.atIndex(I), ElemT, Offset, PackedBools); + Offset += PackedBools ? 1 : ElemSizeInBits; } return Ok; } // Composite arrays. if (FieldDesc->isCompositeArray()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); QualType ElemType = FieldDesc->getElemQualType(); size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { - unsigned Index = BigEndianTarget ? (FieldDesc->getNumElems() - 1 - I) : I; - enumerateData(P.atIndex(Index).narrow(), Ctx, Offset, F); + enumerateData(P.atIndex(I).narrow(), Ctx, Offset, F); Offset += ElemSizeInBits; } return true; @@ -178,39 +117,23 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, // Records. if (FieldDesc->isRecord()) { - bool BigEndianTarget = Ctx.getASTContext().getTargetInfo().isBigEndian(); const Record *R = FieldDesc->ElemRecord; const ASTRecordLayout &Layout = Ctx.getASTContext().getASTRecordLayout(R->getDecl()); bool Ok = true; - auto enumerateFields = [&]() -> void { - for (unsigned I = 0, N = R->getNumFields(); I != N; ++I) { - const Record::Field *Fi = - R->getField(BigEndianTarget ? (N - 1 - I) : I); - Pointer Elem = P.atField(Fi->Offset); - size_t BitOffset = - Offset + Layout.getFieldOffset(Fi->Decl->getFieldIndex()); - Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); - } - }; - auto enumerateBases = [&]() -> void { - for (unsigned I = 0, N = R->getNumBases(); I != N; ++I) { - const Record::Base *B = R->getBase(BigEndianTarget ? (N - 1 - I) : I); - Pointer Elem = P.atField(B->Offset); - CharUnits ByteOffset = - Layout.getBaseClassOffset(cast<CXXRecordDecl>(B->Decl)); - size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); - Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); - } - }; - - if (BigEndianTarget) { - enumerateFields(); - enumerateBases(); - } else { - enumerateBases(); - enumerateFields(); + for (const Record::Field &Fi : R->fields()) { + Pointer Elem = P.atField(Fi.Offset); + size_t BitOffset = + Offset + Layout.getFieldOffset(Fi.Decl->getFieldIndex()); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + for (const Record::Base &B : R->bases()) { + Pointer Elem = P.atField(B.Offset); + CharUnits ByteOffset = + Layout.getBaseClassOffset(cast<CXXRecordDecl>(B.Decl)); + size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); } return Ok; @@ -295,27 +218,28 @@ static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, BitcastBuffer &Buffer, bool ReturnOnUninit) { const ASTContext &ASTCtx = Ctx.getASTContext(); - bool SwapData = (ASTCtx.getTargetInfo().isLittleEndian() != - llvm::sys::IsLittleEndianHost); - bool BigEndianTarget = ASTCtx.getTargetInfo().isBigEndian(); + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; return enumeratePointerFields( FromPtr, Ctx, [&](const Pointer &P, PrimType T, size_t BitOffset, bool PackedBools) -> bool { - if (!P.isInitialized()) { - assert(false && "Implement uninitialized value tracking"); - return ReturnOnUninit; - } + // if (!P.isInitialized()) { + // assert(false && "Implement uninitialized value tracking"); + // return ReturnOnUninit; + // } - assert(P.isInitialized()); + // assert(P.isInitialized()); // nullptr_t is a PT_Ptr for us, but it's still not std::is_pointer_v. if (T == PT_Ptr) assert(false && "Implement casting to pointer types"); CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); unsigned BitWidth = ASTCtx.toBits(ObjectReprChars); - llvm::SmallVector<std::byte> Buff(ObjectReprChars.getQuantity()); + unsigned FullBitWidth = BitWidth; + auto Buff = + std::make_unique<std::byte[]>(ObjectReprChars.getQuantity()); // Work around floating point types that contain unused padding bytes. // This is really just `long double` on x86, which is the only // fundamental type with padding bytes. @@ -323,34 +247,27 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, const Floating &F = P.deref<Floating>(); unsigned NumBits = llvm::APFloatBase::getSizeInBits(F.getAPFloat().getSemantics()); - assert(NumBits % 8 == 0); - assert(NumBits <= (ObjectReprChars.getQuantity() * 8)); - F.bitcastToMemory(Buff.data()); + assert(fullByte(NumBits)); + assert(NumBits <= FullBitWidth); + F.bitcastToMemory(Buff.get()); // Now, only (maybe) swap the actual size of the float, excluding the // padding bits. - if (SwapData) - swapBytes(Buff.data(), NumBits / 8); + if (llvm::sys::IsBigEndianHost) + swapBytes(Buff.get(), NumBits / 8); } else { if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) - BitWidth = FD->getBitWidthValue(ASTCtx); + BitWidth = std::min(FD->getBitWidthValue(ASTCtx), FullBitWidth); else if (T == PT_Bool && PackedBools) BitWidth = 1; - BITCAST_TYPE_SWITCH(T, { - T Val = P.deref<T>(); - Val.bitcastToMemory(Buff.data()); - }); - if (SwapData) - swapBytes(Buff.data(), ObjectReprChars.getQuantity()); - } + BITCAST_TYPE_SWITCH(T, { P.deref<T>().bitcastToMemory(Buff.get()); }); - if (BitWidth != (Buff.size() * 8) && BigEndianTarget) { - Buffer.pushData(Buff.data() + (Buff.size() - 1 - (BitWidth / 8)), - BitWidth, BigEndianTarget); - } else { - Buffer.pushData(Buff.data(), BitWidth, BigEndianTarget); + if (llvm::sys::IsBigEndianHost) + swapBytes(Buff.get(), FullBitWidth / 8); } + + Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness); return true; }); } @@ -362,16 +279,21 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, assert(Ptr.isBlockPointer()); assert(Buff); - BitcastBuffer Buffer; + size_t BitSize = BuffSize * 8; + BitcastBuffer Buffer(BitSize); if (!CheckBitcastType(S, OpPC, Ptr.getType(), /*IsToType=*/false)) return false; bool Success = readPointerToBuffer(S.getContext(), Ptr, Buffer, /*ReturnOnUninit=*/false); - assert(Buffer.size() == BuffSize * 8); - HasIndeterminateBits = !Buffer.allInitialized(); - std::memcpy(Buff, Buffer.data(), BuffSize); + + const ASTContext &ASTCtx = S.getASTContext(); + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; + auto B = Buffer.copyBits(0, BitSize, BitSize, TargetEndianness); + + std::memcpy(Buff, B.get(), BuffSize); if (llvm::sys::IsBigEndianHost) swapBytes(Buff, BuffSize); @@ -393,43 +315,59 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, if (!CheckBitcastType(S, OpPC, FromType, /*IsToType=*/false)) return false; - BitcastBuffer Buffer; + const ASTContext &ASTCtx = S.getASTContext(); + + CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(ToType); + BitcastBuffer Buffer(ASTCtx.toBits(ObjectReprChars)); readPointerToBuffer(S.getContext(), FromPtr, Buffer, /*ReturnOnUninit=*/false); // Now read the values out of the buffer again and into ToPtr. - const ASTContext &ASTCtx = S.getASTContext(); - size_t BitOffset = 0; + Endian TargetEndianness = + ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; bool Success = enumeratePointerFields( ToPtr, S.getContext(), - [&](const Pointer &P, PrimType T, size_t _, bool PackedBools) -> bool { + [&](const Pointer &P, PrimType T, size_t BitOffset, + bool PackedBools) -> bool { + CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); + unsigned FullBitWidth = ASTCtx.toBits(ObjectReprChars); if (T == PT_Float) { - CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); const auto &Semantics = ASTCtx.getFloatTypeSemantics(P.getType()); unsigned NumBits = llvm::APFloatBase::getSizeInBits(Semantics); - assert(NumBits % 8 == 0); - assert(NumBits <= ASTCtx.toBits(ObjectReprChars)); - std::byte *M = Buffer.getBytes(BitOffset); + assert(fullByte(NumBits)); + assert(NumBits <= FullBitWidth); + auto M = Buffer.copyBits(BitOffset, NumBits, FullBitWidth, + TargetEndianness); if (llvm::sys::IsBigEndianHost) - swapBytes(M, NumBits / 8); + swapBytes(M.get(), NumBits / 8); - P.deref<Floating>() = Floating::bitcastFromMemory(M, Semantics); + P.deref<Floating>() = Floating::bitcastFromMemory(M.get(), Semantics); P.initialize(); - BitOffset += ASTCtx.toBits(ObjectReprChars); return true; } - BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { - std::byte *M = Buffer.getBytes(BitOffset); + unsigned BitWidth; + if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) + BitWidth = std::min(FD->getBitWidthValue(ASTCtx), FullBitWidth); + else if (T == PT_Bool && PackedBools) + BitWidth = 1; + else + BitWidth = ASTCtx.toBits(ObjectReprChars); - if (llvm::sys::IsBigEndianHost) - swapBytes(M, T::bitWidth() / 8); + auto Memory = Buffer.copyBits(BitOffset, BitWidth, FullBitWidth, + TargetEndianness); + if (llvm::sys::IsBigEndianHost) + swapBytes(Memory.get(), FullBitWidth / 8); - P.deref<T>() = T::bitcastFromMemory(M, T::bitWidth()); - P.initialize(); - BitOffset += T::bitWidth(); + BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { + if (BitWidth > 0) + P.deref<T>() = T::bitcastFromMemory(Memory.get(), T::bitWidth()) + .truncate(BitWidth); + else + P.deref<T>() = T::zero(); }); + P.initialize(); return true; }); diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index 52c6a45de9a26c..cb13c5225b713b 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -65,6 +65,7 @@ add_clang_library(clangAST ExternalASTSource.cpp FormatString.cpp InheritViz.cpp + ByteCode/BitcastBuffer.cpp ByteCode/ByteCodeEmitter.cpp ByteCode/Compiler.cpp ByteCode/Context.cpp diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp new file mode 100644 index 00000000000000..1bb44f4f13443b --- /dev/null +++ b/clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp @@ -0,0 +1,437 @@ +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter %s +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu -fexperimental-new-constant-interpreter %s +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s +// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64-unknown-unknown -mabi=ieeelongdouble %s + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define LITTLE_END 1 +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define LITTLE_END 0 +#else +# error "huh?" +#endif + +typedef decltype(nullptr) nullptr_t; +typedef __INTPTR_TYPE__ intptr_t; +typedef unsigned __INT16_TYPE__ uint16_t; +typedef unsigned __INT32_TYPE__ uint32_t; +typedef unsigned __INT64_TYPE__ uint64_t; + +static_assert(sizeof(int) == 4); +static_assert(sizeof(long long) == 8); + +template <class To, class From> +constexpr To bit_cast(const From &from) { + static_assert(sizeof(To) == sizeof(From)); + return __builtin_bit_cast(To, from); +} + +template <class Intermediate, class Init> +constexpr bool check_round_trip(const Init &init) { + return bit_cast<Init>(bit_cast<Intermediate>(init)) == init; +} + +template <class Intermediate, class Init> +constexpr Init round_trip(const Init &init) { + return bit_cast<Init>(bit_cast<Intermediate>(init)); +} + +namespace std { +enum byte : unsigned char {}; +} // namespace std + +template <int N, typename T = unsigned char, int Pad = 0> +struct bits { + T : Pad; + T bits : N; + + constexpr bool operator==(const T& rhs) const { + return bits == rhs; + } +}; + +template <int N, typename T, int P> +constexpr bool operator==(const struct bits<N, T, P>& lhs, const struct bits<N, T, P>& rhs) { + return lhs.bits == rhs.bits; +} + +template<int N> +struct bytes { + using size_t = unsigned int; + unsigned char d[N]; + + constexpr unsigned char operator[](size_t index) { + if (index < N) + return d[index]; + return -1; + } +}; + +namespace Sanity { + /// This is just one byte, and we extract 2 bits from it. + /// + /// 3 is 0000'0011. + /// For both LE and BE, the buffer will contain exactly that + /// byte, unaltered and not reordered in any way. It contains all 8 bits. + static_assert(__builtin_bit_cast(bits<2>, (unsigned char)3) == (LITTLE_END ? 3 : 0)); + + /// Similarly, we have one full byte of data, with the two most-significant + /// bits set: + /// 192 is 1100'0000 + static_assert(__builtin_bit_cast(bits<2>, (unsigned char)192) == (LITTLE_END ? 0 : 3)); + + + /// Here we are instead bitcasting two 1-bits into a destination of 8 bits. + /// On LE, we should pick the two least-significant bits. On BE, the opposite. + /// NOTE: Can't verify this with gcc. + constexpr auto B1 = bits<2>{3}; + static_assert(__builtin_bit_cast(unsigned char, B1) == (LITTLE_END ? 3 : 192)); + + /// This should be 0000'0110. + /// On LE, this should result in 6. + /// On BE, 1100'0000 = 192. + constexpr auto B2 = bits<3>{6}; + static_assert(__builtin_bit_cast(unsigned char, B2) == (LITTLE_END ? 6 : 192)); + + constexpr auto B3 = bits<4>{6}; + static_assert(__builtin_bit_cast(unsigned char, B3) == (LITTLE_END ? 6 : 96)); + + struct B { + std::byte b0 : 4; + std::byte b1 : 4; + }; + + /// We can properly decompose one byte (8 bit) int two 4-bit bitfields. + constexpr struct { unsigned char b0; } T = {0xee}; + constexpr B MB = __builtin_bit_cast(B, T); + static_assert(MB.b0 == 0xe); + static_assert(MB.b1 == 0xe); +} + +namespace BitFields { + struct BitFields { + unsigned a : 2; + unsigned b : 30; + }; + + constexpr unsigned A = __builtin_bit_cast(unsigned, BitFields{3, 16}); + static_assert(A == (LITTLE_END ? 67 : 3221225488)); + + struct S { + unsigned a : 2; + unsigned b : 28; + unsigned c : 2; + }; + + constexpr S s = __builtin_bit_cast(S, 0xFFFFFFFF); + static_assert(s.a == 3); + static_assert(s.b == 268435455); + static_assert(s.c == 3); + + void bitfield_indeterminate() { + struct BF { unsigned char z : 2; }; + enum byte : unsigned char {}; + + constexpr BF bf = {0x3}; + /// Requires bitcasts to composite types. + static_assert(bit_cast<bits<2>>(bf).bits == bf.z); + static_assert(bit_cast<unsigned char>(bf)); + + static_assert(__builtin_bit_cast(byte, bf)); + + struct M { + // ref-note@+1 {{subobject declared here}} + unsigned char mem[sizeof(BF)]; + }; + // ref-error@+2 {{initialized by a constant expression}} + // ref-note@+1 {{not initialized}} + constexpr M m = bit_cast<M>(bf); + + constexpr auto f = []() constexpr { + // bits<24, unsigned int, LITTLE_END ? 0 : 8> B = {0xc0ffee}; + constexpr struct { unsigned short b1; unsigned char b0; } B = {0xc0ff, 0xee}; + return bit_cast<bytes<4>>(B); + }; + + static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee); + { + // ref-error@+2 {{initialized by a constant expression}} + // ref-note@+1 {{read of uninitialized object is not allowed in a constant expression}} + constexpr auto _bad = f()[3]; + } + + struct B { + unsigned short s0 : 8; + unsigned short s1 : 8; + std::byte b0 : 4; + std::byte b1 : 4; + std::byte b2 : 4; + }; + constexpr auto g = [f]() constexpr { + return bit_cast<B>(f()); + }; + static_assert(g().s0 + g().s1 + g().b0 + g().b1 == 0xc0 + 0xff + 0xe + 0xe); + { + // ref-error@+2 {{initialized by a constant expression}} + // ref-note@+1 {{read of uninitialized object is not allowed in a constant expression}} + constexpr auto _bad = g().b2; + } + } +} + +namespace BoolVectors { + typedef bool bool32 __attribute__((ext_vector_type(32))); + constexpr auto v = bit_cast<bool32>(0xa1c0ffee); +#if LITTLE_END + static_assert(!v[0]); + static_assert(v[1]); + static_assert(v[2]); + static_assert(v[3]); + static_assert(!v[4]); + static_assert(v[5]); + static_assert(v[6]); + static_assert(v[7]); + + static_assert(v[8]); + static_assert(v[9]); + static_assert(v[10]); + static_assert(v[11]); + static_assert(v[12]); + static_assert(v[13]); + static_assert(v[14]); + static_assert(v[15]); + + static_assert(!v[16]); + static_assert(!v[17]); + static_assert(!v[18]); + static_assert(!v[19]); + static_assert(!v[20]); + static_assert(!v[21]); + static_assert(v[22]); + static_assert(v[23]); + + static_assert(v[24]); + static_assert(!v[25]); + static_assert(!v[26]); + static_assert(!v[27]); + static_assert(!v[28]); + static_assert(v[29]); + static_assert(!v[30]); + static_assert(v[31]); + +#else + static_assert(v[0]); + static_assert(!v[1]); + static_assert(v[2]); + static_assert(!v[3]); + static_assert(!v[4]); + static_assert(!v[5]); + static_assert(!v[6]); + static_assert(v[7]); + + static_assert(v[8]); + static_assert(v[9]); + static_assert(!v[10]); + static_assert(!v[11]); + static_assert(!v[12]); + static_assert(!v[13]); + static_assert(!v[14]); + static_assert(!v[15]); + + static_assert(v[16]); + static_assert(v[17]); + static_assert(v[18]); + static_assert(v[19]); + static_assert(v[20]); + static_assert(v[21]); + static_assert(v[22]); + static_assert(v[23]); + + static_assert(v[24]); + static_assert(v[25]); + static_assert(v[26]); + static_assert(!v[27]); + static_assert(v[28]); + static_assert(v[29]); + static_assert(v[30]); + static_assert(!v[31]); +#endif + + struct pad { + unsigned short s; + unsigned char c; + }; + + constexpr auto p = bit_cast<pad>(v); + static_assert(p.s == (LITTLE_END ? 0xffee : 0xa1c0)); + static_assert(p.c == (LITTLE_END ? 0xc0 : 0xff)); +} + +namespace TwoShorts { + struct B { + unsigned short s0 : 8; + unsigned short s1 : 8; + }; + constexpr struct { unsigned short b1;} T = {0xc0ff}; + constexpr B MB = __builtin_bit_cast(B, T); +#if LITTLE_END + static_assert(MB.s0 == 0xff); + static_assert(MB.s1 == 0xc0); +#else + static_assert(MB.s0 == 0xc0); + static_assert(MB.s1 == 0xff); + +#endif +} + +typedef bool bool8 __attribute__((ext_vector_type(8))); +typedef bool bool9 __attribute__((ext_vector_type(9))); +typedef bool bool16 __attribute__((ext_vector_type(16))); +typedef bool bool17 __attribute__((ext_vector_type(17))); +typedef bool bool32 __attribute__((ext_vector_type(32))); +typedef bool bool128 __attribute__((ext_vector_type(128))); + +static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA), ""); +constexpr bool8 b8 = __builtin_bit_cast(bool8, 0x55); // both-error {{'__builtin_bit_cast' source type 'int' does not match destination type 'bool8' (vector of 8 'bool' values) (4 vs 1 bytes)}} +static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0)), ""); +static_assert(check_round_trip<bool8>(static_cast<unsigned char>(1)), ""); +static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0x55)), ""); + +static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4), ""); + +static_assert(check_round_trip<bool16>(static_cast<short>(0xCAFE)), ""); +static_assert(check_round_trip<bool32>(static_cast<int>(0xCAFEBABE)), ""); +static_assert(check_round_trip<bool128>(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)), ""); + +static_assert(bit_cast<bits<8, uint16_t, 7>, uint16_t>(0xcafe) == (LITTLE_END ? 0x95 : 0x7f)); +static_assert(bit_cast<bits<4, uint16_t, 10>, uint16_t>(0xcafe) == (LITTLE_END ? 0x2 : 0xf)); +static_assert(bit_cast<bits<4, uint32_t, 19>, uint32_t>(0xa1cafe) == (LITTLE_END ? 0x4 : 0x5)); + +struct S { + // little endian: + // MSB .... .... LSB + // |y| |x| + // + // big endian + // MSB .... .... LSB + // |x| |y| + + unsigned char x : 4; + unsigned char y : 4; + + constexpr bool operator==(S const &other) const { + return x == other.x && y == other.y; + } +}; + +constexpr S s{0xa, 0xb}; +static_assert(bit_cast<bits<8>>(s) == (LITTLE_END ? 0xba : 0xab)); +static_assert(bit_cast<bits<7>>(s) == (LITTLE_END + ? 0xba & 0x7f + : (0xab & 0xfe) >> 1)); + +static_assert(round_trip<bits<8>>(s) == s); + +struct R { + unsigned int r : 31; + unsigned int : 0; + unsigned int : 32; + constexpr bool operator==(R const &other) const { + return r == other.r; + } + }; +using T = bits<31, signed long long>; +constexpr R r{0x4ac0ffee}; +constexpr T t = bit_cast<T>(r); +static_assert(t == ((0xFFFFFFFF8 << 28) | 0x4ac0ffee)); // sign extension + +static_assert(round_trip<T>(r) == r); +static_assert(round_trip<R>(t) == t); + + +/// The oversized bitfield is an error on Windows and not just a warning. +#if !defined(_WIN32) +struct U { + // expected-warning@+1 {{exceeds the width of its type}} + uint32_t trunc : 33; + uint32_t u : 31; + constexpr bool operator==(U const &other) const { + return trunc == other.trunc && u == other.u; + } +}; +struct V { + uint64_t notrunc : 32; + uint64_t : 1; + uint64_t v : 31; + constexpr bool operator==(V const &other) const { + return notrunc == other.notrunc && v == other.v; + } +}; + +constexpr U u{static_cast<unsigned int>(~0), 0x4ac0ffee}; +constexpr V v = bit_cast<V>(u); +static_assert(v.v == 0x4ac0ffee); + +static_assert(round_trip<V>(u) == u); +static_assert(round_trip<U>(v) == v); + +constexpr auto w = bit_cast<bits<12, unsigned long, 33>>(u); +static_assert(w == (LITTLE_END + ? 0x4ac0ffee & 0xFFF + : (0x4ac0ffee & (0xFFF << (31 - 12))) >> (31-12) + )); +#endif + + +namespace NestedStructures { + struct J { + struct { + uint16_t k : 12; + } K; + struct { + uint16_t l : 4; + } L; + }; + + static_assert(sizeof(J) == 4); + constexpr J j = bit_cast<J>(0x8c0ffee5); + + static_assert(j.K.k == (LITTLE_END ? 0xee5 : 0x8c0)); + static_assert(j.L.l == 0xf /* yay symmetry */); + static_assert(bit_cast<bits<4, uint16_t, 16>>(j) == 0xf); + struct N { + bits<12, uint16_t> k; + uint16_t : 16; + }; + static_assert(bit_cast<N>(j).k == j.K.k); + + struct M { + bits<4, uint16_t, 0> m[2]; + constexpr bool operator==(const M& rhs) const { + return m[0] == rhs.m[0] && m[1] == rhs.m[1]; + }; + }; + #if LITTLE_END == 1 + constexpr uint16_t want[2] = {0x5, 0xf}; + #else + constexpr uint16_t want[2] = {0x8000, 0xf000}; + #endif + + static_assert(bit_cast<M>(j) == bit_cast<M>(want)); +} + +namespace Enums { + // ensure we're packed into the top 2 bits + constexpr int pad = LITTLE_END ? 6 : 0; + struct X + { + char : pad; + enum class direction: char { left, right, up, down } direction : 2; + }; + + constexpr X x = { X::direction::down }; + static_assert(bit_cast<bits<2, signed char, pad>>(x) == -1); + static_assert(bit_cast<bits<2, unsigned char, pad>>(x) == 3); + static_assert( + bit_cast<X>((unsigned char)0x40).direction == X::direction::right); +} diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index 0fecde59cd57ce..271c98ae072015 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -186,72 +186,6 @@ namespace bitint { // ref-note {{initializer of 'IB' is not a constant expression}} } -namespace BitFields { - struct BitFields { - unsigned a : 2; - unsigned b : 30; - }; - - constexpr unsigned A = __builtin_bit_cast(unsigned, BitFields{3, 16}); // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{not yet supported}} \ - // ref-note {{declared here}} - static_assert(A == (LITTLE_END ? 67 : 3221225488)); // ref-error {{not an integral constant expression}} \ - // ref-note {{initializer of 'A'}} - - - void bitfield_indeterminate() { - struct BF { unsigned char z : 2; }; - enum byte : unsigned char {}; - - constexpr BF bf = {0x3}; - /// Requires bitcasts to composite types. - // static_assert(bit_cast<bits<2>>(bf).bits == bf.z); - // static_assert(bit_cast<unsigned char>(bf)); - -#if 0 - // static_assert(__builtin_bit_cast(byte, bf)); - - struct M { - // expected-note@+1 {{subobject declared here}} - unsigned char mem[sizeof(BF)]; - }; - // expected-error@+2 {{initialized by a constant expression}} - // expected-note@+1 {{not initialized}} - constexpr M m = bit_cast<M>(bf); - - constexpr auto f = []() constexpr { - // bits<24, unsigned int, LITTLE_END ? 0 : 8> B = {0xc0ffee}; - constexpr struct { unsigned short b1; unsigned char b0; } B = {0xc0ff, 0xee}; - return bit_cast<bytes<4>>(B); - }; - - static_assert(f()[0] + f()[1] + f()[2] == 0xc0 + 0xff + 0xee); - { - // expected-error@+2 {{initialized by a constant expression}} - // expected-note@+1 {{read of uninitialized object is not allowed in a constant expression}} - constexpr auto _bad = f()[3]; - } - - struct B { - unsigned short s0 : 8; - unsigned short s1 : 8; - std::byte b0 : 4; - std::byte b1 : 4; - std::byte b2 : 4; - }; - constexpr auto g = [f]() constexpr { - return bit_cast<B>(f()); - }; - static_assert(g().s0 + g().s1 + g().b0 + g().b1 == 0xc0 + 0xff + 0xe + 0xe); - { - // expected-error@+2 {{initialized by a constant expression}} - // expected-note@+1 {{read of uninitialized object is not allowed in a constant expression}} - constexpr auto _bad = g().b2; - } -#endif - } -} - namespace Classes { class A { public: @@ -510,27 +444,6 @@ static_assert(bit_cast<unsigned long long>(test_vector) == (LITTLE_END static_assert(check_round_trip<uint2>(0xCAFEBABE0C05FEFEULL), ""); static_assert(check_round_trip<byte8>(0xCAFEBABE0C05FEFEULL), ""); -typedef bool bool8 __attribute__((ext_vector_type(8))); -typedef bool bool9 __attribute__((ext_vector_type(9))); -typedef bool bool16 __attribute__((ext_vector_type(16))); -typedef bool bool17 __attribute__((ext_vector_type(17))); -typedef bool bool32 __attribute__((ext_vector_type(32))); -typedef bool bool128 __attribute__((ext_vector_type(128))); - -static_assert(bit_cast<unsigned char>(bool8{1,0,1,0,1,0,1,0}) == (LITTLE_END ? 0x55 : 0xAA), ""); -constexpr bool8 b8 = __builtin_bit_cast(bool8, 0x55); // both-error {{'__builtin_bit_cast' source type 'int' does not match destination type 'bool8' (vector of 8 'bool' values) (4 vs 1 bytes)}} -#if 0 -static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0)), ""); -static_assert(check_round_trip<bool8>(static_cast<unsigned char>(1)), ""); -static_assert(check_round_trip<bool8>(static_cast<unsigned char>(0x55)), ""); - -static_assert(bit_cast<unsigned short>(bool16{1,1,1,1,1,0,0,0, 1,1,1,1,0,1,0,0}) == (LITTLE_END ? 0x2F1F : 0xF8F4), ""); - -static_assert(check_round_trip<bool16>(static_cast<short>(0xCAFE)), ""); -static_assert(check_round_trip<bool32>(static_cast<int>(0xCAFEBABE)), ""); -static_assert(check_round_trip<bool128>(static_cast<__int128_t>(0xCAFEBABE0C05FEFEULL)), ""); -#endif - #if 0 // expected-error@+2 {{constexpr variable 'bad_bool9_to_short' must be initialized by a constant expression}} // expected-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}} @@ -559,3 +472,20 @@ namespace test_complex { constexpr double D = __builtin_bit_cast(double, test_float_complex); constexpr int M = __builtin_bit_cast(int, test_int_complex); // both-error {{size of '__builtin_bit_cast' source type 'const _Complex unsigned int' does not match destination type 'int' (8 vs 4 bytes)}} } + + +namespace OversizedBitField { +#if defined(_WIN32) + /// This is an error (not just a warning) on Windows and the field ends up with a size of 1 instead of 4. +#else + typedef unsigned __INT16_TYPE__ uint16_t; + typedef unsigned __INT32_TYPE__ uint32_t; + struct S { + uint16_t a : 20; // both-warning {{exceeds the width of its type}} + }; + + static_assert(sizeof(S) == 4); + static_assert(__builtin_bit_cast(S, (uint32_t)32).a == (LITTLE_END ? 32 : 0)); // ref-error {{not an integral constant expression}} \ + // ref-note {{constexpr bit_cast involving bit-field is not yet supported}} +#endif +} diff --git a/clang/unittests/AST/ByteCode/BitcastBuffer.cpp b/clang/unittests/AST/ByteCode/BitcastBuffer.cpp new file mode 100644 index 00000000000000..0f45d74fe5e208 --- /dev/null +++ b/clang/unittests/AST/ByteCode/BitcastBuffer.cpp @@ -0,0 +1,83 @@ +#include "../../../lib/AST/ByteCode/BitcastBuffer.h" +#include "clang/AST/ASTContext.h" +#include "gtest/gtest.h" +#include <bitset> +#include <cassert> +#include <cmath> +#include <memory> +#include <string> + +using namespace clang; +using namespace clang::interp; + +TEST(BitcastBuffer, PushData) { + BitcastBuffer Buff1(sizeof(int) * 8); + + const unsigned V = 0xCAFEBABE; + std::byte Data[sizeof(V)]; + std::memcpy(Data, &V, sizeof(V)); + + Endian HostEndianness = + llvm::sys::IsLittleEndianHost ? Endian::Little : Endian::Big; + + Buff1.pushData(Data, 0, sizeof(V) * 8, HostEndianness); + + // The buffer is in host-endianness. + if (llvm::sys::IsLittleEndianHost) { + ASSERT_EQ(Buff1.Data[0], std::byte{0xbe}); + ASSERT_EQ(Buff1.Data[1], std::byte{0xba}); + ASSERT_EQ(Buff1.Data[2], std::byte{0xfe}); + ASSERT_EQ(Buff1.Data[3], std::byte{0xca}); + } else { + ASSERT_EQ(Buff1.Data[0], std::byte{0xca}); + ASSERT_EQ(Buff1.Data[1], std::byte{0xfe}); + ASSERT_EQ(Buff1.Data[2], std::byte{0xba}); + ASSERT_EQ(Buff1.Data[3], std::byte{0xbe}); + } + + { + unsigned V2; + auto D = Buff1.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Little); + std::memcpy(&V2, D.get(), sizeof(V)); + ASSERT_EQ(V, V2); + + D = Buff1.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Big); + std::memcpy(&V2, D.get(), sizeof(V)); + ASSERT_EQ(V, V2); + } + + BitcastBuffer Buff2(sizeof(int) * 8); + { + short s1 = 0xCAFE; + short s2 = 0xBABE; + std::byte sdata[2]; + + std::memcpy(sdata, &s1, sizeof(s1)); + Buff2.pushData(sdata, 0, sizeof(s1) * 8, HostEndianness); + std::memcpy(sdata, &s2, sizeof(s2)); + Buff2.pushData(sdata, sizeof(s1) * 8, sizeof(s2) * 8, HostEndianness); + } + + if (llvm::sys::IsLittleEndianHost) { + ASSERT_EQ(Buff2.Data[0], std::byte{0xfe}); + ASSERT_EQ(Buff2.Data[1], std::byte{0xca}); + ASSERT_EQ(Buff2.Data[2], std::byte{0xbe}); + ASSERT_EQ(Buff2.Data[3], std::byte{0xba}); + } else { + ASSERT_EQ(Buff2.Data[0], std::byte{0xba}); + ASSERT_EQ(Buff2.Data[1], std::byte{0xbe}); + ASSERT_EQ(Buff2.Data[2], std::byte{0xca}); + ASSERT_EQ(Buff2.Data[3], std::byte{0xfe}); + } + + { + unsigned V; + auto D = Buff2.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Little); + std::memcpy(&V, D.get(), sizeof(V)); + ASSERT_EQ(V, 0xBABECAFE); + + D = Buff2.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Big); + std::memcpy(&V, D.get(), sizeof(V)); + ASSERT_EQ(V, 0xBABECAFE); + } +} diff --git a/clang/unittests/AST/ByteCode/CMakeLists.txt b/clang/unittests/AST/ByteCode/CMakeLists.txt index ea727cdd4412be..b862fb4834fbdc 100644 --- a/clang/unittests/AST/ByteCode/CMakeLists.txt +++ b/clang/unittests/AST/ByteCode/CMakeLists.txt @@ -1,4 +1,5 @@ add_clang_unittest(InterpTests + BitcastBuffer.cpp Descriptor.cpp toAPValue.cpp ) >From 7d58afa720455ae7e8297d79abe21ccc92eeae8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Thu, 21 Nov 2024 09:52:18 +0100 Subject: [PATCH 2/2] Use Bits and Bytes structs --- clang/lib/AST/ByteCode/BitcastBuffer.cpp | 49 +++++++------ clang/lib/AST/ByteCode/BitcastBuffer.h | 49 +++++++++---- .../lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 68 ++++++++++--------- .../unittests/AST/ByteCode/BitcastBuffer.cpp | 22 +++--- 4 files changed, 110 insertions(+), 78 deletions(-) diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.cpp b/clang/lib/AST/ByteCode/BitcastBuffer.cpp index 093f2b2c224093..0cc97b0b6bf190 100644 --- a/clang/lib/AST/ByteCode/BitcastBuffer.cpp +++ b/clang/lib/AST/ByteCode/BitcastBuffer.cpp @@ -10,44 +10,51 @@ using namespace clang; using namespace clang::interp; -void BitcastBuffer::pushData(const std::byte *In, size_t BitOffset, - size_t BitWidth, Endian TargetEndianness) { - for (unsigned It = 0; It != BitWidth; ++It) { - bool BitValue = bitof(In, It); +/// Returns the value of the bit in the given sequence of bytes. +static inline bool bitof(const std::byte *B, Bits BitIndex) { + return (B[BitIndex.roundToBytes()] & + (std::byte{1} << BitIndex.getOffsetInByte())) != std::byte{0}; +} + +void BitcastBuffer::pushData(const std::byte *In, Bits BitOffset, Bits BitWidth, + Endian TargetEndianness) { + for (unsigned It = 0; It != BitWidth.getQuantity(); ++It) { + bool BitValue = bitof(In, Bits(It)); if (!BitValue) continue; - unsigned DstBit; + Bits DstBit; if (TargetEndianness == Endian::Little) - DstBit = BitOffset + It; + DstBit = BitOffset + Bits(It); else - DstBit = size() - BitOffset - BitWidth + It; + DstBit = size() - BitOffset - BitWidth + Bits(It); - unsigned DstByte = (DstBit / 8); - Data[DstByte] |= std::byte{1} << (DstBit % 8); + size_t DstByte = DstBit.roundToBytes(); + Data[DstByte] |= std::byte{1} << DstBit.getOffsetInByte(); } } std::unique_ptr<std::byte[]> -BitcastBuffer::copyBits(unsigned BitOffset, unsigned BitWidth, - unsigned FullBitWidth, Endian TargetEndianness) const { - assert(BitWidth <= FullBitWidth); - assert(fullByte(FullBitWidth)); - auto Out = std::make_unique<std::byte[]>(FullBitWidth / 8); +BitcastBuffer::copyBits(Bits BitOffset, Bits BitWidth, Bits FullBitWidth, + Endian TargetEndianness) const { + assert(BitWidth.getQuantity() <= FullBitWidth.getQuantity()); + assert(FullBitWidth.isFullByte()); + auto Out = std::make_unique<std::byte[]>(FullBitWidth.roundToBytes()); - for (unsigned It = 0; It != BitWidth; ++It) { - unsigned BitIndex; + for (unsigned It = 0; It != BitWidth.getQuantity(); ++It) { + Bits BitIndex; if (TargetEndianness == Endian::Little) - BitIndex = BitOffset + It; + BitIndex = BitOffset + Bits(It); else - BitIndex = size() - BitWidth - BitOffset + It; + BitIndex = size() - BitWidth - BitOffset + Bits(It); bool BitValue = bitof(Data.get(), BitIndex); if (!BitValue) continue; - unsigned DstBit = It; - unsigned DstByte = (DstBit / 8); - Out[DstByte] |= std::byte{1} << (DstBit % 8); + + Bits DstBit = Bits(It); + size_t DstByte = DstBit.roundToBytes(); + Out[DstByte] |= std::byte{1} << DstBit.getOffsetInByte(); } return Out; diff --git a/clang/lib/AST/ByteCode/BitcastBuffer.h b/clang/lib/AST/ByteCode/BitcastBuffer.h index 0cc7d5909c47e4..19a7e1151df4c3 100644 --- a/clang/lib/AST/ByteCode/BitcastBuffer.h +++ b/clang/lib/AST/ByteCode/BitcastBuffer.h @@ -17,29 +17,48 @@ namespace interp { enum class Endian { Little, Big }; -/// Returns the value of the bit in the given sequence of bytes. -static inline bool bitof(const std::byte *B, unsigned BitIndex) { - return (B[BitIndex / 8] & (std::byte{1} << (BitIndex % 8))) != std::byte{0}; -} +/// A quantity in bits. +struct Bits { + size_t N = 0; + Bits() = default; + static Bits zero() { return Bits(0); } + explicit Bits(size_t Quantity) : N(Quantity) {} + size_t getQuantity() const { return N; } + size_t roundToBytes() const { return N / 8; } + size_t getOffsetInByte() const { return N % 8; } + bool isFullByte() const { return N % 8 == 0; } + bool nonZero() const { return N != 0; } -/// Returns whether \p N is a full byte offset or size. -static inline bool fullByte(unsigned N) { return N % 8 == 0; } + Bits operator-(Bits Other) { return Bits(N - Other.N); } + Bits operator+(Bits Other) { return Bits(N + Other.N); } + Bits operator+=(size_t O) { + N += O; + return *this; + } +}; + +/// A quantity in bytes. +struct Bytes { + size_t N; + explicit Bytes(size_t Quantity) : N(Quantity) {} + size_t getQuantity() const { return N; } + Bits toBits() const { return Bits(N * 8); } +}; /// Track what bits have been initialized to known values and which ones /// have indeterminate value. -/// All offsets are in bits. struct BitcastBuffer { - size_t FinalBitSize = 0; + Bits FinalBitSize; std::unique_ptr<std::byte[]> Data; - BitcastBuffer(size_t FinalBitSize) : FinalBitSize(FinalBitSize) { - assert(fullByte(FinalBitSize)); - unsigned ByteSize = FinalBitSize / 8; + BitcastBuffer(Bits FinalBitSize) : FinalBitSize(FinalBitSize) { + assert(FinalBitSize.isFullByte()); + unsigned ByteSize = FinalBitSize.roundToBytes(); Data = std::make_unique<std::byte[]>(ByteSize); } /// Returns the buffer size in bits. - size_t size() const { return FinalBitSize; } + Bits size() const { return FinalBitSize; } /// Returns \c true if all bits in the buffer have been initialized. bool allInitialized() const { @@ -50,15 +69,15 @@ struct BitcastBuffer { /// Push \p BitWidth bits at \p BitOffset from \p In into the buffer. /// \p TargetEndianness is the endianness of the target we're compiling for. /// \p In must hold at least \p BitWidth many bits. - void pushData(const std::byte *In, size_t BitOffset, size_t BitWidth, + void pushData(const std::byte *In, Bits BitOffset, Bits BitWidth, Endian TargetEndianness); /// Copy \p BitWidth bits at offset \p BitOffset from the buffer. /// \p TargetEndianness is the endianness of the target we're compiling for. /// /// The returned output holds exactly (\p FullBitWidth / 8) bytes. - std::unique_ptr<std::byte[]> copyBits(unsigned BitOffset, unsigned BitWidth, - unsigned FullBitWidth, + std::unique_ptr<std::byte[]> copyBits(Bits BitOffset, Bits BitWidth, + Bits FullBitWidth, Endian TargetEndianness) const; }; diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index 775c121a8ca3ec..44bd69c0eefcec 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -34,7 +34,7 @@ using namespace clang::interp; /// Used to iterate over pointer fields. using DataFunc = llvm::function_ref<bool(const Pointer &P, PrimType Ty, - size_t BitOffset, bool PackedBools)>; + Bits BitOffset, bool PackedBools)>; #define BITCAST_TYPE_SWITCH(Expr, B) \ do { \ @@ -79,7 +79,7 @@ static void swapBytes(std::byte *M, size_t N) { /// We use this to recursively iterate over all fields and elements of a pointer /// and extract relevant data for a bitcast. -static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, +static bool enumerateData(const Pointer &P, const Context &Ctx, Bits Offset, DataFunc F) { const Descriptor *FieldDesc = P.getFieldDesc(); assert(FieldDesc); @@ -124,15 +124,15 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, for (const Record::Field &Fi : R->fields()) { Pointer Elem = P.atField(Fi.Offset); - size_t BitOffset = - Offset + Layout.getFieldOffset(Fi.Decl->getFieldIndex()); + Bits BitOffset = + Offset + Bits(Layout.getFieldOffset(Fi.Decl->getFieldIndex())); Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); } for (const Record::Base &B : R->bases()) { Pointer Elem = P.atField(B.Offset); CharUnits ByteOffset = Layout.getBaseClassOffset(cast<CXXRecordDecl>(B.Decl)); - size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); + Bits BitOffset = Offset + Bits(Ctx.getASTContext().toBits(ByteOffset)); Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); } @@ -144,7 +144,7 @@ static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, static bool enumeratePointerFields(const Pointer &P, const Context &Ctx, DataFunc F) { - return enumerateData(P, Ctx, 0, F); + return enumerateData(P, Ctx, Bits::zero(), F); } // This function is constexpr if and only if To, From, and the types of @@ -223,7 +223,7 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, return enumeratePointerFields( FromPtr, Ctx, - [&](const Pointer &P, PrimType T, size_t BitOffset, + [&](const Pointer &P, PrimType T, Bits BitOffset, bool PackedBools) -> bool { // if (!P.isInitialized()) { // assert(false && "Implement uninitialized value tracking"); @@ -236,8 +236,8 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, assert(false && "Implement casting to pointer types"); CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); - unsigned BitWidth = ASTCtx.toBits(ObjectReprChars); - unsigned FullBitWidth = BitWidth; + Bits BitWidth = Bits(ASTCtx.toBits(ObjectReprChars)); + Bits FullBitWidth = BitWidth; auto Buff = std::make_unique<std::byte[]>(ObjectReprChars.getQuantity()); // Work around floating point types that contain unused padding bytes. @@ -245,26 +245,27 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr, // fundamental type with padding bytes. if (T == PT_Float) { const Floating &F = P.deref<Floating>(); - unsigned NumBits = - llvm::APFloatBase::getSizeInBits(F.getAPFloat().getSemantics()); - assert(fullByte(NumBits)); - assert(NumBits <= FullBitWidth); + Bits NumBits = Bits( + llvm::APFloatBase::getSizeInBits(F.getAPFloat().getSemantics())); + assert(NumBits.isFullByte()); + assert(NumBits.getQuantity() <= FullBitWidth.getQuantity()); F.bitcastToMemory(Buff.get()); // Now, only (maybe) swap the actual size of the float, excluding the // padding bits. if (llvm::sys::IsBigEndianHost) - swapBytes(Buff.get(), NumBits / 8); + swapBytes(Buff.get(), NumBits.roundToBytes()); } else { if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) - BitWidth = std::min(FD->getBitWidthValue(ASTCtx), FullBitWidth); + BitWidth = Bits(std::min(FD->getBitWidthValue(ASTCtx), + (unsigned)FullBitWidth.getQuantity())); else if (T == PT_Bool && PackedBools) - BitWidth = 1; + BitWidth = Bits(1); BITCAST_TYPE_SWITCH(T, { P.deref<T>().bitcastToMemory(Buff.get()); }); if (llvm::sys::IsBigEndianHost) - swapBytes(Buff.get(), FullBitWidth / 8); + swapBytes(Buff.get(), FullBitWidth.roundToBytes()); } Buffer.pushData(Buff.get(), BitOffset, BitWidth, TargetEndianness); @@ -279,7 +280,7 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, assert(Ptr.isBlockPointer()); assert(Buff); - size_t BitSize = BuffSize * 8; + Bits BitSize = Bytes(BuffSize).toBits(); BitcastBuffer Buffer(BitSize); if (!CheckBitcastType(S, OpPC, Ptr.getType(), /*IsToType=*/false)) return false; @@ -291,7 +292,7 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, const ASTContext &ASTCtx = S.getASTContext(); Endian TargetEndianness = ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; - auto B = Buffer.copyBits(0, BitSize, BitSize, TargetEndianness); + auto B = Buffer.copyBits(Bits::zero(), BitSize, BitSize, TargetEndianness); std::memcpy(Buff, B.get(), BuffSize); @@ -318,7 +319,7 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, const ASTContext &ASTCtx = S.getASTContext(); CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(ToType); - BitcastBuffer Buffer(ASTCtx.toBits(ObjectReprChars)); + BitcastBuffer Buffer(Bits(ASTCtx.toBits(ObjectReprChars))); readPointerToBuffer(S.getContext(), FromPtr, Buffer, /*ReturnOnUninit=*/false); @@ -327,43 +328,44 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, ASTCtx.getTargetInfo().isLittleEndian() ? Endian::Little : Endian::Big; bool Success = enumeratePointerFields( ToPtr, S.getContext(), - [&](const Pointer &P, PrimType T, size_t BitOffset, + [&](const Pointer &P, PrimType T, Bits BitOffset, bool PackedBools) -> bool { CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(P.getType()); - unsigned FullBitWidth = ASTCtx.toBits(ObjectReprChars); + Bits FullBitWidth = Bits(ASTCtx.toBits(ObjectReprChars)); if (T == PT_Float) { const auto &Semantics = ASTCtx.getFloatTypeSemantics(P.getType()); - unsigned NumBits = llvm::APFloatBase::getSizeInBits(Semantics); - assert(fullByte(NumBits)); - assert(NumBits <= FullBitWidth); + Bits NumBits = Bits(llvm::APFloatBase::getSizeInBits(Semantics)); + assert(NumBits.isFullByte()); + assert(NumBits.getQuantity() <= FullBitWidth.getQuantity()); auto M = Buffer.copyBits(BitOffset, NumBits, FullBitWidth, TargetEndianness); if (llvm::sys::IsBigEndianHost) - swapBytes(M.get(), NumBits / 8); + swapBytes(M.get(), NumBits.roundToBytes()); P.deref<Floating>() = Floating::bitcastFromMemory(M.get(), Semantics); P.initialize(); return true; } - unsigned BitWidth; + Bits BitWidth; if (const FieldDecl *FD = P.getField(); FD && FD->isBitField()) - BitWidth = std::min(FD->getBitWidthValue(ASTCtx), FullBitWidth); + BitWidth = Bits(std::min(FD->getBitWidthValue(ASTCtx), + (unsigned)FullBitWidth.getQuantity())); else if (T == PT_Bool && PackedBools) - BitWidth = 1; + BitWidth = Bits(1); else - BitWidth = ASTCtx.toBits(ObjectReprChars); + BitWidth = FullBitWidth; auto Memory = Buffer.copyBits(BitOffset, BitWidth, FullBitWidth, TargetEndianness); if (llvm::sys::IsBigEndianHost) - swapBytes(Memory.get(), FullBitWidth / 8); + swapBytes(Memory.get(), FullBitWidth.roundToBytes()); BITCAST_TYPE_SWITCH_FIXED_SIZE(T, { - if (BitWidth > 0) + if (BitWidth.nonZero()) P.deref<T>() = T::bitcastFromMemory(Memory.get(), T::bitWidth()) - .truncate(BitWidth); + .truncate(BitWidth.getQuantity()); else P.deref<T>() = T::zero(); }); diff --git a/clang/unittests/AST/ByteCode/BitcastBuffer.cpp b/clang/unittests/AST/ByteCode/BitcastBuffer.cpp index 0f45d74fe5e208..02c38a22013630 100644 --- a/clang/unittests/AST/ByteCode/BitcastBuffer.cpp +++ b/clang/unittests/AST/ByteCode/BitcastBuffer.cpp @@ -11,16 +11,17 @@ using namespace clang; using namespace clang::interp; TEST(BitcastBuffer, PushData) { - BitcastBuffer Buff1(sizeof(int) * 8); + BitcastBuffer Buff1(Bytes(sizeof(int)).toBits()); const unsigned V = 0xCAFEBABE; + Bits VSize = Bytes(sizeof(V)).toBits(); std::byte Data[sizeof(V)]; std::memcpy(Data, &V, sizeof(V)); Endian HostEndianness = llvm::sys::IsLittleEndianHost ? Endian::Little : Endian::Big; - Buff1.pushData(Data, 0, sizeof(V) * 8, HostEndianness); + Buff1.pushData(Data, Bits::zero(), VSize, HostEndianness); // The buffer is in host-endianness. if (llvm::sys::IsLittleEndianHost) { @@ -37,25 +38,26 @@ TEST(BitcastBuffer, PushData) { { unsigned V2; - auto D = Buff1.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Little); + auto D = Buff1.copyBits(Bits::zero(), VSize, VSize, Endian::Little); std::memcpy(&V2, D.get(), sizeof(V)); ASSERT_EQ(V, V2); - D = Buff1.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Big); + D = Buff1.copyBits(Bits::zero(), VSize, VSize, Endian::Big); std::memcpy(&V2, D.get(), sizeof(V)); ASSERT_EQ(V, V2); } - BitcastBuffer Buff2(sizeof(int) * 8); + BitcastBuffer Buff2(Bytes(sizeof(int)).toBits()); { short s1 = 0xCAFE; short s2 = 0xBABE; std::byte sdata[2]; std::memcpy(sdata, &s1, sizeof(s1)); - Buff2.pushData(sdata, 0, sizeof(s1) * 8, HostEndianness); + Buff2.pushData(sdata, Bits::zero(), Bits(sizeof(s1) * 8), HostEndianness); std::memcpy(sdata, &s2, sizeof(s2)); - Buff2.pushData(sdata, sizeof(s1) * 8, sizeof(s2) * 8, HostEndianness); + Buff2.pushData(sdata, Bits(sizeof(s1) * 8), Bits(sizeof(s2) * 8), + HostEndianness); } if (llvm::sys::IsLittleEndianHost) { @@ -72,11 +74,13 @@ TEST(BitcastBuffer, PushData) { { unsigned V; - auto D = Buff2.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Little); + auto D = Buff2.copyBits(Bits::zero(), Bits(sizeof(V) * 8), + Bits(sizeof(V) * 8), Endian::Little); std::memcpy(&V, D.get(), sizeof(V)); ASSERT_EQ(V, 0xBABECAFE); - D = Buff2.copyBits(0, sizeof(V) * 8, sizeof(V) * 8, Endian::Big); + D = Buff2.copyBits(Bits::zero(), Bits(sizeof(V) * 8), Bits(sizeof(V) * 8), + Endian::Big); std::memcpy(&V, D.get(), sizeof(V)); ASSERT_EQ(V, 0xBABECAFE); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits