llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Timm Baeder (tbaederr) <details> <summary>Changes</summary> This is a subset of #<!-- -->68288, with hopefully narrower scope. It does not support bitcasting to non-integral types yet. The tests are from #<!-- -->68288 and partially from #<!-- -->74775. The `BitcastBuffer` struct is currently always working in single bits, but I plan to (try to) optimize this for the common full-byte case. --- Patch is 36.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112126.diff 14 Files Affected: - (modified) clang/lib/AST/ByteCode/Boolean.h (+8) - (modified) clang/lib/AST/ByteCode/Compiler.cpp (+64) - (modified) clang/lib/AST/ByteCode/Compiler.h (+1) - (modified) clang/lib/AST/ByteCode/Floating.h (+5) - (modified) clang/lib/AST/ByteCode/Integral.h (+12) - (modified) clang/lib/AST/ByteCode/IntegralAP.h (+6) - (modified) clang/lib/AST/ByteCode/Interp.cpp (+17) - (modified) clang/lib/AST/ByteCode/Interp.h (+30) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+1) - (added) clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp (+336) - (added) clang/lib/AST/ByteCode/InterpBuiltinBitCast.h (+23) - (modified) clang/lib/AST/ByteCode/Opcodes.td (+6) - (modified) clang/lib/AST/CMakeLists.txt (+1) - (added) clang/test/AST/ByteCode/builtin-bit-cast.cpp (+347) ``````````diff diff --git a/clang/lib/AST/ByteCode/Boolean.h b/clang/lib/AST/ByteCode/Boolean.h index c568b557574e2b..14c61686d6c692 100644 --- a/clang/lib/AST/ByteCode/Boolean.h +++ b/clang/lib/AST/ByteCode/Boolean.h @@ -81,6 +81,14 @@ class Boolean final { Boolean truncate(unsigned TruncBits) const { return *this; } + static Boolean bitcastFromMemory(const std::byte *Buff, unsigned BitWidth) { + assert(BitWidth == 8); + bool Val = static_cast<bool>(*Buff); + return Boolean(Val); + } + + void bitcastToMemory(std::byte *Buff) { std::memcpy(Buff, &V, sizeof(V)); } + void print(llvm::raw_ostream &OS) const { OS << (V ? "true" : "false"); } std::string toDiagnosticString(const ASTContext &Ctx) const { std::string NameStr; diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index b2663714340b93..4315f77b0581f9 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -470,6 +470,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) { return this->emitDecayPtr(*FromT, *ToT, CE); } + case CK_LValueToRValueBitCast: + return this->emitBuiltinBitCast(CE); + case CK_IntegralToBoolean: case CK_FixedPointToBoolean: case CK_BooleanToSignedIntegral: @@ -6400,6 +6403,67 @@ bool Compiler<Emitter>::emitDestruction(const Descriptor *Desc, return this->emitRecordDestruction(Desc->ElemRecord, Loc); } +// This function is constexpr if and only if To, From, and the types of +// all subobjects of To and From are types T such that... +// (3.1) - is_union_v<T> is false; +// (3.2) - is_pointer_v<T> is false; +// (3.3) - is_member_pointer_v<T> is false; +// (3.4) - is_volatile_v<T> is false; and +// (3.5) - T has no non-static data members of reference type +template <class Emitter> +bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) { + const Expr *SubExpr = E->getSubExpr(); + QualType FromType = SubExpr->getType(); + QualType ToType = E->getType(); + std::optional<PrimType> ToT = classify(ToType); + + assert(!DiscardResult && "Implement"); + + if (ToType->isNullPtrType()) { + if (!this->discard(SubExpr)) + return false; + + return this->emitNullPtr(nullptr, E); + } + + if (FromType->isNullPtrType() && ToT) { + if (!this->discard(SubExpr)) + return false; + + return visitZeroInitializer(*ToT, ToType, E); + } + assert(!ToType->isReferenceType()); + + // Get a pointer to the value-to-cast on the stack. + if (!this->visit(SubExpr)) + return false; + + if (!ToT || ToT == PT_Ptr) { + // Conversion to an array or record type. + assert(false && "Implement"); + } + assert(ToT); + + const llvm::fltSemantics *TargetSemantics = nullptr; + if (ToT == PT_Float) + TargetSemantics = &Ctx.getFloatSemantics(ToType); + + // Conversion to a primitive type. FromType can be another + // primitive type, or a record/array. + bool ToTypeIsUChar = (ToType->isSpecificBuiltinType(BuiltinType::UChar) || + ToType->isSpecificBuiltinType(BuiltinType::Char_U)); + uint32_t ResultBitWidth = std::max(Ctx.getBitWidth(ToType), 8u); + + if (!this->emitBitCast(*ToT, ToTypeIsUChar || ToType->isStdByteType(), + ResultBitWidth, TargetSemantics, E)) + return false; + + if (DiscardResult) + return this->emitPop(*ToT, E); + + return true; +} + namespace clang { namespace interp { diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 4253e7b3248c9f..5fbde244b1d9e9 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -373,6 +373,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>, unsigned collectBaseOffset(const QualType BaseType, const QualType DerivedType); bool emitLambdaStaticInvokerBody(const CXXMethodDecl *MD); + bool emitBuiltinBitCast(const CastExpr *E); bool compileConstructor(const CXXConstructorDecl *Ctor); bool compileDestructor(const CXXDestructorDecl *Dtor); diff --git a/clang/lib/AST/ByteCode/Floating.h b/clang/lib/AST/ByteCode/Floating.h index 114487821880fb..be38e6991dad75 100644 --- a/clang/lib/AST/ByteCode/Floating.h +++ b/clang/lib/AST/ByteCode/Floating.h @@ -135,6 +135,11 @@ class Floating final { return Floating(APFloat(Sem, API)); } + void bitcastToMemory(std::byte *Buff) { + llvm::APInt API = F.bitcastToAPInt(); + llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8); + } + // === Serialization support === size_t bytesToSerialize() const { return sizeof(llvm::fltSemantics *) + diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index e06ec1669259da..9b07e86bbc8dd9 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -151,6 +151,18 @@ template <unsigned Bits, bool Signed> class Integral final { return Compare(V, RHS.V); } + void bitcastToMemory(std::byte *Dest) const { + std::memcpy(Dest, &V, sizeof(V)); + } + + static Integral bitcastFromMemory(const std::byte *Src, unsigned BitWidth) { + assert(BitWidth == sizeof(ReprT) * 8); + ReprT V; + + std::memcpy(&V, Src, sizeof(ReprT)); + return Integral(V); + } + std::string toDiagnosticString(const ASTContext &Ctx) const { std::string NameStr; llvm::raw_string_ostream OS(NameStr); diff --git a/clang/lib/AST/ByteCode/IntegralAP.h b/clang/lib/AST/ByteCode/IntegralAP.h index a4d656433344b7..58e413e4d06c12 100644 --- a/clang/lib/AST/ByteCode/IntegralAP.h +++ b/clang/lib/AST/ByteCode/IntegralAP.h @@ -173,6 +173,12 @@ template <bool Signed> class IntegralAP final { return IntegralAP<false>(Copy); } + void bitcastToMemory(std::byte *Dest) const { assert(false); } + + static IntegralAP bitcastFromMemory(const std::byte *Src, unsigned BitWidth) { + return IntegralAP(); + } + ComparisonCategoryResult compare(const IntegralAP &RHS) const { assert(Signed == RHS.isSigned()); assert(bitWidth() == RHS.bitWidth()); diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 95715655cc9bbd..0f830076f131af 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1539,6 +1539,23 @@ bool CastPointerIntegralAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { return true; } +bool CheckBitCast(InterpState &S, CodePtr OpPC, bool HasIndeterminateBits, + bool TargetIsUCharOrByte) { + // This is always fine. + if (!HasIndeterminateBits) + return true; + + // Indeterminate bits can only be bitcast to unsigned char or std::byte. + if (TargetIsUCharOrByte) + return true; + + const Expr *E = S.Current->getExpr(OpPC); + QualType ExprType = E->getType(); + S.FFDiag(E, diag::note_constexpr_bit_cast_indet_dest) + << ExprType << S.getLangOpts().CharIsSigned << E->getSourceRange(); + return false; +} + // https://github.com/llvm/llvm-project/issues/102513 #if defined(_WIN32) && !defined(__clang__) && !defined(NDEBUG) #pragma optimize("", off) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index dece95971b7617..e41fe1b2e55e08 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -20,6 +20,7 @@ #include "Floating.h" #include "Function.h" #include "FunctionPointer.h" +#include "InterpBuiltinBitCast.h" #include "InterpFrame.h" #include "InterpStack.h" #include "InterpState.h" @@ -162,6 +163,8 @@ bool CallPtr(InterpState &S, CodePtr OpPC, uint32_t ArgSize, const CallExpr *CE); bool CheckLiteralType(InterpState &S, CodePtr OpPC, const Type *T); bool InvalidShuffleVectorIndex(InterpState &S, CodePtr OpPC, uint32_t Index); +bool CheckBitCast(InterpState &S, CodePtr OpPC, bool HasIndeterminateBits, + bool TargetIsUCharOrByte); template <typename T> static bool handleOverflow(InterpState &S, CodePtr OpPC, const T &SrcValue) { @@ -2995,6 +2998,33 @@ bool CheckNewTypeMismatchArray(InterpState &S, CodePtr OpPC, const Expr *E) { return CheckNewTypeMismatch(S, OpPC, E, static_cast<uint64_t>(Size)); } bool InvalidNewDeleteExpr(InterpState &S, CodePtr OpPC, const Expr *E); + +template <PrimType Name, class T = typename PrimConv<Name>::T> +inline bool BitCast(InterpState &S, CodePtr OpPC, bool TargetIsUCharOrByte, + uint32_t ResultBitWidth, const llvm::fltSemantics *Sem) { + const Pointer &FromPtr = S.Stk.pop<Pointer>(); + + size_t BuffSize = ResultBitWidth / 8; + llvm::SmallVector<std::byte> Buff(BuffSize); + bool HasIndeterminateBits = false; + + if (!DoBitCast(S, OpPC, FromPtr, Buff.data(), BuffSize, HasIndeterminateBits)) + return false; + + if (!CheckBitCast(S, OpPC, HasIndeterminateBits, TargetIsUCharOrByte)) + return false; + + if constexpr (std::is_same_v<T, Floating>) { + assert(false && "Implement"); + // assert(Sem); + // S.Stk.push<Floating>(Floating::bitcastFromMemory(Buff.data(), *Sem)); + } else { + assert(!Sem); + S.Stk.push<T>(T::bitcastFromMemory(Buff.data(), ResultBitWidth)); + } + return true; +} + //===----------------------------------------------------------------------===// // Read opcode arguments //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ec27aebf84bd80..d0d569cb46381f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -10,6 +10,7 @@ #include "Compiler.h" #include "EvalEmitter.h" #include "Interp.h" +#include "InterpBuiltinBitCast.h" #include "PrimType.h" #include "clang/AST/OSLog.h" #include "clang/AST/RecordLayout.h" diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp new file mode 100644 index 00000000000000..656095a44c0f46 --- /dev/null +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -0,0 +1,336 @@ +//===-------------------- InterpBuiltinBitCast.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InterpBuiltinBitCast.h" +#include "Boolean.h" +#include "Context.h" +#include "FixedPoint.h" +#include "Floating.h" +#include "Integral.h" +#include "IntegralAP.h" +#include "InterpState.h" +#include "MemberPointer.h" +#include "Pointer.h" +#include "Record.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecordLayout.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/BitVector.h" +#include <cmath> + +using namespace clang; +using namespace clang::interp; + +/// Used to iterate over pointer fields. +using DataFunc = + llvm::function_ref<bool(const Pointer &P, PrimType Ty, size_t BitOffset)>; + +#define BITCAST_TYPE_SWITCH(Expr, B) \ + do { \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Sint8, B) \ + TYPE_SWITCH_CASE(PT_Uint8, B) \ + TYPE_SWITCH_CASE(PT_Sint16, B) \ + TYPE_SWITCH_CASE(PT_Uint16, B) \ + TYPE_SWITCH_CASE(PT_Sint32, B) \ + TYPE_SWITCH_CASE(PT_Uint32, B) \ + TYPE_SWITCH_CASE(PT_Sint64, B) \ + TYPE_SWITCH_CASE(PT_Uint64, B) \ + TYPE_SWITCH_CASE(PT_IntAP, B) \ + TYPE_SWITCH_CASE(PT_IntAPS, B) \ + TYPE_SWITCH_CASE(PT_Bool, B) \ + default: \ + llvm_unreachable("Unhandled bitcast type"); \ + } \ + } while (0) + +/// Float is a special case that sometimes needs the floating point semantics +/// to be available. +#define BITCAST_TYPE_SWITCH_WITH_FLOAT(Expr, B) \ + do { \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Sint8, B) \ + TYPE_SWITCH_CASE(PT_Uint8, B) \ + TYPE_SWITCH_CASE(PT_Sint16, B) \ + TYPE_SWITCH_CASE(PT_Uint16, B) \ + TYPE_SWITCH_CASE(PT_Sint32, B) \ + TYPE_SWITCH_CASE(PT_Uint32, B) \ + TYPE_SWITCH_CASE(PT_Sint64, B) \ + TYPE_SWITCH_CASE(PT_Uint64, B) \ + TYPE_SWITCH_CASE(PT_IntAP, B) \ + TYPE_SWITCH_CASE(PT_IntAPS, B) \ + TYPE_SWITCH_CASE(PT_Bool, B) \ + TYPE_SWITCH_CASE(PT_Float, B) \ + default: \ + llvm_unreachable("Unhandled bitcast type"); \ + } \ + } while (0) + +static void swapBytes(std::byte *M, size_t N) { + for (size_t I = 0; I != (N / 2); ++I) + std::swap(M[I], M[N - 1 - I]); +} + +/// Track what bits have been initialized to known values and which ones +/// have indeterminate value. +/// All offsets are in bits. +struct BitcastBuffer { + llvm::BitVector Initialized; + llvm::BitVector Data; + + BitcastBuffer() = default; + + size_t size() const { + assert(Initialized.size() == Data.size()); + return Initialized.size(); + } + + const std::byte *data() const { return getBytes(0); } + + const std::byte *getBytes(size_t BitOffset) const { + assert(BitOffset % 8 == 0); + return reinterpret_cast<const std::byte *>(Data.getData().data()) + + (BitOffset / 8); + } + + bool allInitialized() const { return Initialized.all(); } + + void pushData(const std::byte *data, size_t BitOffset, size_t BitWidth) { + assert(BitOffset >= Data.size()); + Data.reserve(BitOffset + BitWidth); + Initialized.reserve(BitOffset + BitWidth); + + // First, fill up the bit vector until BitOffset. The bits are all 0 + // but we record them as indeterminate. + { + Data.resize(BitOffset, false); + Initialized.resize(BitOffset, false); + } + + size_t BitsHandled = 0; + // Read all full bytes first + for (size_t I = 0; I != BitWidth / 8; ++I) { + for (unsigned X = 0; X != 8; ++X) { + Data.push_back((data[I] & std::byte(1 << X)) != std::byte{0}); + Initialized.push_back(true); + ++BitsHandled; + } + } + + // Rest of the bits. + assert((BitWidth - BitsHandled) < 8); + for (size_t I = 0, E = (BitWidth - BitsHandled); I != E; ++I) { + Data.push_back((data[BitWidth / 8] & std::byte(1 << I)) != std::byte{0}); + Initialized.push_back(true); + ++BitsHandled; + } + } +}; + +/// We use this to recursively iterate over all fields and elemends of a pointer +/// and extract relevant data for a bitcast. +static bool enumerateData(const Pointer &P, const Context &Ctx, size_t Offset, + DataFunc F) { + const Descriptor *FieldDesc = P.getFieldDesc(); + assert(FieldDesc); + + // Primitives. + if (FieldDesc->isPrimitive()) + return F(P, FieldDesc->getPrimType(), Offset); + + // Primitive arrays. + if (FieldDesc->isPrimitiveArray()) { + QualType ElemType = + FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType(); + size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); + PrimType ElemT = *Ctx.classify(ElemType); + bool Ok = true; + for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { + Ok = Ok && F(P.atIndex(I), ElemT, Offset); + Offset += ElemSizeInBits; + } + return Ok; + } + + // Composite arrays. + if (FieldDesc->isCompositeArray()) { + QualType ElemType = + FieldDesc->getType()->getAsArrayTypeUnsafe()->getElementType(); + size_t ElemSizeInBits = Ctx.getASTContext().getTypeSize(ElemType); + for (unsigned I = 0; I != FieldDesc->getNumElems(); ++I) { + enumerateData(P.atIndex(I).narrow(), Ctx, Offset, F); + Offset += ElemSizeInBits; + } + return true; + } + + // Records. + if (FieldDesc->isRecord()) { + const Record *R = FieldDesc->ElemRecord; + const ASTRecordLayout &Layout = + Ctx.getASTContext().getASTRecordLayout(R->getDecl()); + bool Ok = true; + for (const auto &B : R->bases()) { + Pointer Elem = P.atField(B.Offset); + CharUnits ByteOffset = + Layout.getBaseClassOffset(cast<CXXRecordDecl>(B.Decl)); + size_t BitOffset = Offset + Ctx.getASTContext().toBits(ByteOffset); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + + for (unsigned I = 0; I != R->getNumFields(); ++I) { + const Record::Field *Fi = R->getField(I); + Pointer Elem = P.atField(Fi->Offset); + size_t BitOffset = Offset + Layout.getFieldOffset(I); + Ok = Ok && enumerateData(Elem, Ctx, BitOffset, F); + } + return Ok; + } + + llvm_unreachable("Unhandled data type"); +} + +static bool enumeratePointerFields(const Pointer &P, const Context &Ctx, + DataFunc F) { + return enumerateData(P, Ctx, 0, F); +} + +// This function is constexpr if and only if To, From, and the types of +// all subobjects of To and From are types T such that... +// (3.1) - is_union_v<T> is false; +// (3.2) - is_pointer_v<T> is false; +// (3.3) - is_member_pointer_v<T> is false; +// (3.4) - is_volatile_v<T> is false; and +// (3.5) - T has no non-static data members of reference type +// +// NOTE: This is a version of checkBitCastConstexprEligibilityType() in +// ExprConstant.cpp. +static bool CheckBitcastType(InterpState &S, CodePtr OpPC, QualType T, + bool IsToType) { + enum { + E_Union = 0, + E_Pointer, + E_MemberPointer, + E_Volatile, + E_Reference, + }; + enum { C_Member, C_Base }; + + auto diag = [&](int Reason) -> bool { + const Expr *E = S.Current->getExpr(OpPC); + S.FFDiag(E, diag::note_constexpr_bit_cast_invalid_type) + << static_cast<int>(IsToType) << (Reason == E_Reference) << Reason + << E->getSourceRange(); + return false; + }; + auto note = [&](int Construct, QualType NoteType, SourceRange NoteRange) { + S.Note(NoteRange.getBegin(), diag::note_constexpr_bit_cast_invalid_subtype) + << NoteType << Construct << T << NoteRange; + return false; + }; + + T = T.getCanonicalType(); + + if (T->isUnionType()) + return diag(E_Union); + if (T->isPointerType()) + return diag(E_Pointer); + if (T->isMemberPointerType()) + return diag(E_MemberPointer); + if (T.isVolatileQualified()) + return diag(E_Volatile); + + if (const RecordDecl *RD = T->getAsRecordDecl()) { + if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + for (const CXXBaseSpecifier &BS : CXXRD->bases()) { + if (!CheckBitcastType(S, OpPC, BS.getType(), IsToType)) + return note(C_Base, BS.getType(), BS.getBeginLoc()); + } + } + for (const FieldDecl *FD : RD->fields()) { + if (FD->getType()->isReferenceType()) + return diag(E_Reference); + if (!CheckBitcastType(S, OpPC, FD->getType(), IsToType)) + return note(C_Member, FD->getType(), FD->getSourceRange()); + } + } + + if (T->isArrayType() && + !CheckBitcastType(S, OpPC, S.getASTContext()... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/112126 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits