https://github.com/xTachyon updated https://github.com/llvm/llvm-project/pull/134551
>From bcd518bfba3a8969111a8cb84662861152946c8e Mon Sep 17 00:00:00 2001 From: Andrei Damian <[email protected]> Date: Sun, 6 Apr 2025 19:55:59 +0300 Subject: [PATCH 1/3] clang_EvalResult_getAsCXString impl --- clang/include/clang-c/CXString.h | 16 ++++++- clang/include/clang-c/Index.h | 17 +++++-- clang/tools/libclang/CIndex.cpp | 43 +++++++---------- clang/tools/libclang/CXString.cpp | 57 +++++++++++++++++------ clang/tools/libclang/libclang.map | 2 + clang/unittests/libclang/LibclangTest.cpp | 40 ++++++++++++++++ 6 files changed, 129 insertions(+), 46 deletions(-) diff --git a/clang/include/clang-c/CXString.h b/clang/include/clang-c/CXString.h index 63dce4d140ce2..14d2eebbc8733 100644 --- a/clang/include/clang-c/CXString.h +++ b/clang/include/clang-c/CXString.h @@ -16,6 +16,7 @@ #include "clang-c/ExternC.h" #include "clang-c/Platform.h" +#include <stddef.h> LLVM_CLANG_C_EXTERN_C_BEGIN @@ -44,6 +45,11 @@ typedef struct { unsigned Count; } CXStringSet; +typedef struct { + const char *string; + size_t length; +} CStringInfo; + /** * Retrieve the character data associated with the given string. * @@ -53,6 +59,15 @@ typedef struct { */ CINDEX_LINKAGE const char *clang_getCString(CXString string); +/** + * Retrieve the character data associated with the given string and its length. + * + * The returned lenght might be bigger than strlen(.string) if the string + * contains nul bytes. This function has the same requirements and guarantees as + * clang_getCString. + */ +CINDEX_LINKAGE CStringInfo clang_getCStringInfo(CXString string); + /** * Free the given string. */ @@ -70,4 +85,3 @@ CINDEX_LINKAGE void clang_disposeStringSet(CXStringSet *set); LLVM_CLANG_C_EXTERN_C_END #endif - diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index f13d9c9307b40..6c71050f89acc 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -6062,12 +6062,21 @@ clang_EvalResult_getAsUnsigned(CXEvalResult E); CINDEX_LINKAGE double clang_EvalResult_getAsDouble(CXEvalResult E); /** - * Returns the evaluation result as a constant string if the - * kind is other than Int or float. User must not free this pointer, - * instead call clang_EvalResult_dispose on the CXEvalResult returned - * by clang_Cursor_Evaluate. + * This function behaves the same as clang_EvalResult_getAsCXString, with 2 + * exceptions: + * - the string literal will be truncated if a nul byte is found in the string. + * For this reason clang_EvalResult_getAsCXString is recommended. + * - the caller must not free this pointer; instead call + * clang_EvalResult_dispose on the CXEvalResult returned by + * clang_Cursor_Evaluate. */ CINDEX_LINKAGE const char *clang_EvalResult_getAsStr(CXEvalResult E); +/** + * Returns the evaluation result as a CXString if the + * kind is other than Int or float. This might include zero bytes. + * The caller is responsible for freeing the CXString using clang_disposeString. + */ +CINDEX_LINKAGE CXString clang_EvalResult_getAsCXString(CXEvalResult E); /** * Disposes the created Eval memory. diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 15eec87652451..190b6b730e8bf 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -4571,13 +4571,13 @@ struct ExprEvalResult { unsigned long long unsignedVal; long long intVal; double floatVal; - char *stringVal; + CXString stringVal; } EvalData; bool IsUnsignedInt; ~ExprEvalResult() { if (EvalType != CXEval_UnExposed && EvalType != CXEval_Float && EvalType != CXEval_Int) { - delete[] EvalData.stringVal; + clang_disposeString(EvalData.stringVal); } } }; @@ -4633,7 +4633,15 @@ const char *clang_EvalResult_getAsStr(CXEvalResult E) { if (!E) { return nullptr; } - return ((ExprEvalResult *)E)->EvalData.stringVal; + return clang_getCString(((ExprEvalResult *)E)->EvalData.stringVal); +} + +CXString clang_EvalResult_getAsCXString(CXEvalResult E) { + if (!E) { + return cxstring::createNull(); + } + auto data = clang_getCStringInfo(((ExprEvalResult *)E)->EvalData.stringVal); + return cxstring::createDup(StringRef(data.string, data.length)); } static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) { @@ -4697,10 +4705,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) { result->EvalType = CXEval_StrLiteral; } - std::string strRef(StrE->getString().str()); - result->EvalData.stringVal = new char[strRef.size() + 1]; - strncpy(result->EvalData.stringVal, strRef.c_str(), strRef.size()); - result->EvalData.stringVal[strRef.size()] = '\0'; + result->EvalData.stringVal = cxstring::createDup(StrE->getString()); return result.release(); } } else if (expr->getStmtClass() == Stmt::ObjCStringLiteralClass || @@ -4717,10 +4722,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) { result->EvalType = CXEval_StrLiteral; } - std::string strRef(StrE->getString().str()); - result->EvalData.stringVal = new char[strRef.size() + 1]; - strncpy(result->EvalData.stringVal, strRef.c_str(), strRef.size()); - result->EvalData.stringVal[strRef.size()] = '\0'; + result->EvalData.stringVal = cxstring::createDup(StrE->getString()); return result.release(); } @@ -4734,13 +4736,8 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) { callExpr = static_cast<CallExpr *>(CC->getSubExpr()); StringLiteral *S = getCFSTR_value(callExpr); if (S) { - std::string strLiteral(S->getString().str()); result->EvalType = CXEval_CFStr; - - result->EvalData.stringVal = new char[strLiteral.size() + 1]; - strncpy(result->EvalData.stringVal, strLiteral.c_str(), - strLiteral.size()); - result->EvalData.stringVal[strLiteral.size()] = '\0'; + result->EvalData.stringVal = cxstring::createDup(S->getString()); return result.release(); } } @@ -4760,12 +4757,8 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) { StringLiteral *S = getCFSTR_value(callExpr); if (S) { - std::string strLiteral(S->getString().str()); result->EvalType = CXEval_CFStr; - result->EvalData.stringVal = new char[strLiteral.size() + 1]; - strncpy(result->EvalData.stringVal, strLiteral.c_str(), - strLiteral.size()); - result->EvalData.stringVal[strLiteral.size()] = '\0'; + result->EvalData.stringVal = cxstring::createDup(S->getString()); return result.release(); } } @@ -4773,11 +4766,9 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) { DeclRefExpr *D = static_cast<DeclRefExpr *>(expr); ValueDecl *V = D->getDecl(); if (V->getKind() == Decl::Function) { - std::string strName = V->getNameAsString(); result->EvalType = CXEval_Other; - result->EvalData.stringVal = new char[strName.size() + 1]; - strncpy(result->EvalData.stringVal, strName.c_str(), strName.size()); - result->EvalData.stringVal[strName.size()] = '\0'; + result->EvalData.stringVal = + cxstring::createDup(StringRef(V->getNameAsString())); return result.release(); } } diff --git a/clang/tools/libclang/CXString.cpp b/clang/tools/libclang/CXString.cpp index aaa8f8eeb67a1..8ad981f5533dc 100644 --- a/clang/tools/libclang/CXString.cpp +++ b/clang/tools/libclang/CXString.cpp @@ -25,14 +25,19 @@ enum CXStringFlag { /// CXString contains a 'const char *' that it doesn't own. CXS_Unmanaged, - /// CXString contains a 'const char *' that it allocated with malloc(). - CXS_Malloc, + /// CXString contains a 'CStringImpl' that it allocated with malloc(). + CXS_MallocWithSize, /// CXString contains a CXStringBuf that needs to be returned to the /// CXStringPool. CXS_StringBuf }; +struct CStringImpl { + size_t length; + char buffer[sizeof(length)]; +}; + namespace clang { namespace cxstring { @@ -71,10 +76,7 @@ CXString createDup(const char *String) { if (String[0] == '\0') return createEmpty(); - CXString Str; - Str.data = strdup(String); - Str.private_flags = CXS_Malloc; - return Str; + return createDup(StringRef(String)); } CXString createRef(StringRef String) { @@ -91,12 +93,18 @@ CXString createRef(StringRef String) { } CXString createDup(StringRef String) { + auto toAllocate = + sizeof(size_t) + std::max(sizeof(size_t), String.size() + 1); + assert(toAllocate >= sizeof(CStringImpl)); + auto ptr = static_cast<CStringImpl *>(llvm::safe_malloc(toAllocate)); + + ptr->length = String.size(); + memcpy(ptr->buffer, String.data(), String.size()); + ptr->buffer[String.size()] = 0; + CXString Result; - char *Spelling = static_cast<char *>(llvm::safe_malloc(String.size() + 1)); - memmove(Spelling, String.data(), String.size()); - Spelling[String.size()] = 0; - Result.data = Spelling; - Result.private_flags = (unsigned) CXS_Malloc; + Result.data = ptr; + Result.private_flags = (unsigned)CXS_MallocWithSize; return Result; } @@ -164,19 +172,38 @@ const char *clang_getCString(CXString string) { return static_cast<const char *>(string.data); } +CStringInfo clang_getCStringInfo(CXString string) { + switch ((CXStringFlag)string.private_flags) { + case CXS_Unmanaged: { + auto ptr = static_cast<const char *>(string.data); + return {ptr, strlen(ptr)}; + } + case CXS_MallocWithSize: { + auto ptr = static_cast<const CStringImpl *>(string.data); + return {ptr->buffer, ptr->length}; + } + case CXS_StringBuf: { + auto ptr = static_cast<const cxstring::CXStringBuf *>(string.data); + return {ptr->Data.data(), ptr->Data.size()}; + } + } + llvm_unreachable("Invalid CXString::private_flags"); +} + void clang_disposeString(CXString string) { switch ((CXStringFlag) string.private_flags) { case CXS_Unmanaged: - break; - case CXS_Malloc: + return; + case CXS_MallocWithSize: if (string.data) free(const_cast<void *>(string.data)); - break; + return; case CXS_StringBuf: static_cast<cxstring::CXStringBuf *>( const_cast<void *>(string.data))->dispose(); - break; + return; } + llvm_unreachable("Invalid CXString::private_flags"); } void clang_disposeStringSet(CXStringSet *set) { diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map index 3d9d2e268a611..debbf83893e25 100644 --- a/clang/tools/libclang/libclang.map +++ b/clang/tools/libclang/libclang.map @@ -455,6 +455,8 @@ LLVM_21 { clang_Cursor_getGCCAssemblyNumClobbers; clang_Cursor_getGCCAssemblyClobber; clang_Cursor_isGCCAssemblyVolatile; + clang_getCStringInfo; + clang_EvalResult_getAsCXString; }; # Example of how to add a new symbol version entry. If you do add a new symbol diff --git a/clang/unittests/libclang/LibclangTest.cpp b/clang/unittests/libclang/LibclangTest.cpp index b2a87d240e56e..1ae56bbdc546b 100644 --- a/clang/unittests/libclang/LibclangTest.cpp +++ b/clang/unittests/libclang/LibclangTest.cpp @@ -623,6 +623,46 @@ TEST_F(LibclangParseTest, EvaluateChildExpression) { nullptr); } +TEST_F(LibclangParseTest, StringLiteralWithZeros) { + const char testSource[] = R"cpp( +const char str[] = "pika\0chu"; +)cpp"; + std::string fileName = "main.cpp"; + WriteFile(fileName, testSource); + + const char *Args[] = {"-xc++"}; + ClangTU = clang_parseTranslationUnit(Index, fileName.c_str(), Args, 1, + nullptr, 0, TUFlags); + + int nodes = 0; + + Traverse([&nodes](CXCursor cursor, CXCursor parent) -> CXChildVisitResult { + if (cursor.kind == CXCursor_StringLiteral) { + CXEvalResult RE = clang_Cursor_Evaluate(cursor); + EXPECT_NE(RE, nullptr); + EXPECT_EQ(clang_EvalResult_getKind(RE), CXEval_StrLiteral); + + const char expected[] = "pika\0chu"; + size_t expected_size = sizeof(expected) - 1; + + auto lit = clang_EvalResult_getAsCXString(RE); + auto str = clang_getCStringInfo(lit); + + EXPECT_TRUE(str.length == expected_size && + memcmp(str.string, expected, str.length) == 0); + + clang_disposeString(lit); + clang_EvalResult_dispose(RE); + + nodes++; + return CXChildVisit_Continue; + } + return CXChildVisit_Recurse; + }); + + EXPECT_EQ(nodes, 1); +} + class LibclangReparseTest : public LibclangParseTest { public: void DisplayDiagnostics() { >From 16b4c8fdeac5939334404947e0aa5f401baee2a9 Mon Sep 17 00:00:00 2001 From: Xiangfei Ding <[email protected]> Date: Thu, 29 Jan 2026 21:16:25 +0200 Subject: [PATCH 2/3] clang_EvalResult_getAsCXString impl 2 --- clang/include/clang-c/CXString.h | 2 + clang/tools/libclang/CIndex.cpp | 5 +- clang/tools/libclang/CXString.cpp | 89 ++++++++++++++++++------------- 3 files changed, 58 insertions(+), 38 deletions(-) diff --git a/clang/include/clang-c/CXString.h b/clang/include/clang-c/CXString.h index 14d2eebbc8733..db05656567e5d 100644 --- a/clang/include/clang-c/CXString.h +++ b/clang/include/clang-c/CXString.h @@ -14,6 +14,8 @@ #ifndef LLVM_CLANG_C_CXSTRING_H #define LLVM_CLANG_C_CXSTRING_H +#include <stddef.h> + #include "clang-c/ExternC.h" #include "clang-c/Platform.h" #include <stddef.h> diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 190b6b730e8bf..dc3eb1fcc3375 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -4565,6 +4565,7 @@ static StringLiteral *getCFSTR_value(CallExpr *callExpr) { return S; } +namespace { struct ExprEvalResult { CXEvalResultKind EvalType; union { @@ -4581,6 +4582,7 @@ struct ExprEvalResult { } } }; +} // end namespace void clang_EvalResult_dispose(CXEvalResult E) { delete static_cast<ExprEvalResult *>(E); @@ -4640,7 +4642,8 @@ CXString clang_EvalResult_getAsCXString(CXEvalResult E) { if (!E) { return cxstring::createNull(); } - auto data = clang_getCStringInfo(((ExprEvalResult *)E)->EvalData.stringVal); + auto data = clang_getCStringInfo( + static_cast<ExprEvalResult *>(E)->EvalData.stringVal); return cxstring::createDup(StringRef(data.string, data.length)); } diff --git a/clang/tools/libclang/CXString.cpp b/clang/tools/libclang/CXString.cpp index 8ad981f5533dc..a547213214114 100644 --- a/clang/tools/libclang/CXString.cpp +++ b/clang/tools/libclang/CXString.cpp @@ -14,16 +14,22 @@ #include "CXString.h" #include "CXTranslationUnit.h" +#include "clang-c/CXString.h" #include "clang-c/Index.h" -#include "clang/Frontend/ASTUnit.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TrailingObjects.h" using namespace clang; +namespace { /// Describes the kind of underlying data in CXString. -enum CXStringFlag { +enum CXStringFlag : unsigned { /// CXString contains a 'const char *' that it doesn't own. - CXS_Unmanaged, + CXS_Unmanaged = 0, + + /// CXString contains a 'const char *' that is allocated with malloc(). + /// WARNING: do not use this variant outside c-index-test! + CXS_Malloc, /// CXString contains a 'CStringImpl' that it allocated with malloc(). CXS_MallocWithSize, @@ -33,10 +39,20 @@ enum CXStringFlag { CXS_StringBuf }; -struct CStringImpl { +struct CStringImpl final : llvm::TrailingObjects<CStringImpl, char> { size_t length; - char buffer[sizeof(length)]; + + CStringImpl(size_t len) : length(len) {} + + char *Buffer() { return getTrailingObjects(); } + const char *Buffer() const { return getTrailingObjects(); } + + static CStringImpl *Create(size_t length) { + void *Mem = llvm::safe_malloc(totalSizeToAlloc<char>(length + 1)); + return new (Mem) CStringImpl(length); + } }; +} // end namespace namespace clang { namespace cxstring { @@ -93,25 +109,21 @@ CXString createRef(StringRef String) { } CXString createDup(StringRef String) { - auto toAllocate = - sizeof(size_t) + std::max(sizeof(size_t), String.size() + 1); - assert(toAllocate >= sizeof(CStringImpl)); - auto ptr = static_cast<CStringImpl *>(llvm::safe_malloc(toAllocate)); - - ptr->length = String.size(); - memcpy(ptr->buffer, String.data(), String.size()); - ptr->buffer[String.size()] = 0; + auto *ptr = CStringImpl::Create(String.size()); + auto *buf = ptr->Buffer(); + memcpy(buf, String.data(), String.size()); + buf[String.size()] = 0; CXString Result; Result.data = ptr; - Result.private_flags = (unsigned)CXS_MallocWithSize; + Result.private_flags = static_cast<unsigned>(CXS_MallocWithSize); return Result; } CXString createCXString(CXStringBuf *buf) { CXString Str; Str.data = buf; - Str.private_flags = (unsigned) CXS_StringBuf; + Str.private_flags = static_cast<unsigned>(CXS_StringBuf); return Str; } @@ -155,7 +167,7 @@ void CXStringBuf::dispose() { } bool isManagedByPool(CXString str) { - return ((CXStringFlag) str.private_flags) == CXS_StringBuf; + return static_cast<CXStringFlag>(str.private_flags) == CXS_StringBuf; } } // end namespace cxstring @@ -166,24 +178,22 @@ bool isManagedByPool(CXString str) { //===----------------------------------------------------------------------===// const char *clang_getCString(CXString string) { - if (string.private_flags == (unsigned) CXS_StringBuf) { - return static_cast<const cxstring::CXStringBuf *>(string.data)->Data.data(); - } - return static_cast<const char *>(string.data); + return clang_getCStringInfo(string).string; } CStringInfo clang_getCStringInfo(CXString string) { - switch ((CXStringFlag)string.private_flags) { - case CXS_Unmanaged: { - auto ptr = static_cast<const char *>(string.data); + switch (static_cast<CXStringFlag>(string.private_flags)) { + case CXS_Unmanaged: + case CXS_Malloc: { + auto *ptr = static_cast<const char *>(string.data); return {ptr, strlen(ptr)}; } case CXS_MallocWithSize: { - auto ptr = static_cast<const CStringImpl *>(string.data); - return {ptr->buffer, ptr->length}; + auto *ptr = static_cast<const CStringImpl *>(string.data); + return {ptr->Buffer(), ptr->length}; } case CXS_StringBuf: { - auto ptr = static_cast<const cxstring::CXStringBuf *>(string.data); + auto *ptr = static_cast<const cxstring::CXStringBuf *>(string.data); return {ptr->Data.data(), ptr->Data.size()}; } } @@ -191,17 +201,22 @@ CStringInfo clang_getCStringInfo(CXString string) { } void clang_disposeString(CXString string) { - switch ((CXStringFlag) string.private_flags) { - case CXS_Unmanaged: - return; - case CXS_MallocWithSize: - if (string.data) - free(const_cast<void *>(string.data)); - return; - case CXS_StringBuf: - static_cast<cxstring::CXStringBuf *>( - const_cast<void *>(string.data))->dispose(); - return; + switch (static_cast<CXStringFlag>(string.private_flags)) { + case CXS_Unmanaged: + return; + case CXS_Malloc: + case CXS_MallocWithSize: + if (string.data) { + // Safety: + // - the malloc'ed string can be free'ed + // - CStringImpl was malloc'ed and has trivial destructor + free(const_cast<void *>(string.data)); + } + return; + case CXS_StringBuf: + static_cast<cxstring::CXStringBuf *>(const_cast<void *>(string.data)) + ->dispose(); + return; } llvm_unreachable("Invalid CXString::private_flags"); } >From 61ddbd8e98e25d48e1d5a2d602f89e413d18d6eb Mon Sep 17 00:00:00 2001 From: Andrei Damian <[email protected]> Date: Thu, 5 Feb 2026 22:58:17 +0200 Subject: [PATCH 3/3] clang_EvalResult_getAsCXString impl: fix duplicate include --- clang/include/clang-c/CXString.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/include/clang-c/CXString.h b/clang/include/clang-c/CXString.h index db05656567e5d..14d2eebbc8733 100644 --- a/clang/include/clang-c/CXString.h +++ b/clang/include/clang-c/CXString.h @@ -14,8 +14,6 @@ #ifndef LLVM_CLANG_C_CXSTRING_H #define LLVM_CLANG_C_CXSTRING_H -#include <stddef.h> - #include "clang-c/ExternC.h" #include "clang-c/Platform.h" #include <stddef.h> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
