hans created this revision.
hans added reviewers: thakis, rnk, arlosi.
Herald added subscribers: dexonsmith, hiraditya.
hans requested review of this revision.
Herald added projects: clang, LLVM.

>From the VS2022 release notes, it sounds like newer MSVC versions are using 
>SHA256 for these checksums: (search for "SHA-256" in 
>https://docs.microsoft.com/en-us/visualstudio/releases/2022/release-notes#17.0.0)

Since D75785 <https://reviews.llvm.org/D75785> laid the groundwork, let's hook 
it up.

While here, I noticed llvm::SHA256 doesn't have a method to get the hash as a 
hex string, like llvm::MD5. But we can use llvm::toHex() and actually that 
could be made more efficient and llvm::MD5 could use that too.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116835

Files:
  clang/include/clang/Basic/LangOptions.h
  clang/lib/CodeGen/CGDebugInfo.cpp
  clang/lib/CodeGen/CGDebugInfo.h
  clang/test/CodeGen/debug-info-file-checksum.c
  llvm/include/llvm/ADT/StringExtras.h
  llvm/include/llvm/Support/MD5.h
  llvm/lib/Support/MD5.cpp

Index: llvm/lib/Support/MD5.cpp
===================================================================
--- llvm/lib/Support/MD5.cpp
+++ llvm/lib/Support/MD5.cpp
@@ -40,10 +40,9 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 #include <array>
 #include <cstdint>
 #include <cstring>
@@ -281,14 +280,12 @@
 
 SmallString<32> MD5::MD5Result::digest() const {
   SmallString<32> Str;
-  raw_svector_ostream Res(Str);
-  for (int i = 0; i < 16; ++i)
-    Res << format("%.2x", Bytes[i]);
+  toHex(Bytes, /*LowerCase*/ true, Str);
   return Str;
 }
 
-void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
-  Str = Result.digest();
+void MD5::stringifyResult(MD5Result &Result, SmallVectorImpl<char> &Str) {
+  toHex(Result.Bytes, /*LowerCase*/ true, Str);
 }
 
 std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
Index: llvm/include/llvm/Support/MD5.h
===================================================================
--- llvm/include/llvm/Support/MD5.h
+++ llvm/include/llvm/Support/MD5.h
@@ -88,7 +88,7 @@
 
   /// Translates the bytes in \p Res to a hex string that is
   /// deposited into \p Str. The result will be of length 32.
-  static void stringifyResult(MD5Result &Result, SmallString<32> &Str);
+  static void stringifyResult(MD5Result &Result, SmallVectorImpl<char> &Str);
 
   /// Computes the hash for a given bytes.
   static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
Index: llvm/include/llvm/ADT/StringExtras.h
===================================================================
--- llvm/include/llvm/ADT/StringExtras.h
+++ llvm/include/llvm/ADT/StringExtras.h
@@ -162,21 +162,26 @@
   return std::string(BufPtr, std::end(Buffer));
 }
 
-/// Convert buffer \p Input to its hexadecimal representation.
-/// The returned string is double the size of \p Input.
-inline std::string toHex(StringRef Input, bool LowerCase = false) {
+inline void toHex(ArrayRef<uint8_t> Input, bool LowerCase,
+                  SmallVectorImpl<char> &Output) {
   static const char *const LUT = "0123456789ABCDEF";
   const uint8_t Offset = LowerCase ? 32 : 0;
   size_t Length = Input.size();
+  Output.resize_for_overwrite(Length * 2);
 
-  std::string Output;
-  Output.reserve(2 * Length);
   for (size_t i = 0; i < Length; ++i) {
-    const unsigned char c = Input[i];
-    Output.push_back(LUT[c >> 4] | Offset);
-    Output.push_back(LUT[c & 15] | Offset);
+    const uint8_t c = Input[i];
+    Output[i * 2    ] = LUT[c >> 4] | Offset;
+    Output[i * 2 + 1] = LUT[c & 15] | Offset;
   }
-  return Output;
+}
+
+/// Convert buffer \p Input to its hexadecimal representation.
+/// The returned string is double the size of \p Input.
+inline std::string toHex(StringRef Input, bool LowerCase = false) {
+  SmallString<16> Output;
+  toHex(arrayRefFromStringRef(Input), LowerCase, Output);
+  return std::string(Output);
 }
 
 inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
Index: clang/test/CodeGen/debug-info-file-checksum.c
===================================================================
--- clang/test/CodeGen/debug-info-file-checksum.c
+++ clang/test/CodeGen/debug-info-file-checksum.c
@@ -16,3 +16,7 @@
 // RUN: %clang -emit-llvm -S -g -gcodeview -x c %S/Inputs/debug-info-file-checksum-line.cpp -o - | FileCheck %s --check-prefix CHECKSUM
 
 // CHECKSUM: !DIFile(filename: "{{.*}}debug-info-file-checksum-line.cpp", directory:{{.*}}, checksumkind: CSK_MD5, checksum: "7b568574d0e3c56c28e5e0234d1f4a06")
+
+// Later MSVC versions use SHA256.
+// RUN: %clang -target i686-pc-windows-msvc19.30.0 -emit-llvm -S -g -gcodeview -x c %S/Inputs/debug-info-file-checksum-line.cpp -o - | FileCheck %s --check-prefix SHA256
+// SHA256: !DIFile(filename: "{{.*}}debug-info-file-checksum-line.cpp", directory:{{.*}}, checksumkind: CSK_SHA256, checksum: "e28b727db04814680c02d0d2ec72bcd77f5e54131dafc34163bd1576e9428d42")
Index: clang/lib/CodeGen/CGDebugInfo.h
===================================================================
--- clang/lib/CodeGen/CGDebugInfo.h
+++ clang/lib/CodeGen/CGDebugInfo.h
@@ -617,7 +617,7 @@
 
   /// Compute the file checksum debug info for input file ID.
   Optional<llvm::DIFile::ChecksumKind>
-  computeChecksum(FileID FID, SmallString<32> &Checksum) const;
+  computeChecksum(FileID FID, SmallString<64> &Checksum) const;
 
   /// Get the source of the given file ID.
   Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
Index: clang/lib/CodeGen/CGDebugInfo.cpp
===================================================================
--- clang/lib/CodeGen/CGDebugInfo.cpp
+++ clang/lib/CodeGen/CGDebugInfo.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SHA256.h"
 #include "llvm/Support/TimeProfiler.h"
 using namespace clang;
 using namespace clang::CodeGen;
@@ -342,7 +343,7 @@
 }
 
 Optional<llvm::DIFile::ChecksumKind>
-CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const {
+CGDebugInfo::computeChecksum(FileID FID, SmallString<64> &Checksum) const {
   Checksum.clear();
 
   if (!CGM.getCodeGenOpts().EmitCodeView &&
@@ -354,6 +355,13 @@
   if (!MemBuffer)
     return None;
 
+  if (CGM.getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2022)) {
+    llvm::toHex(
+        llvm::SHA256::hash(llvm::arrayRefFromStringRef(MemBuffer->getBuffer())),
+        /*LowerCase*/ true, Checksum);
+    return llvm::DIFile::CSK_SHA256;
+  }
+
   llvm::MD5 Hash;
   llvm::MD5::MD5Result Result;
 
@@ -408,7 +416,7 @@
       return cast<llvm::DIFile>(V);
   }
 
-  SmallString<32> Checksum;
+  SmallString<64> Checksum;
 
   Optional<llvm::DIFile::ChecksumKind> CSKind = computeChecksum(FID, Checksum);
   Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
@@ -499,7 +507,7 @@
 }
 
 void CGDebugInfo::CreateCompileUnit() {
-  SmallString<32> Checksum;
+  SmallString<64> Checksum;
   Optional<llvm::DIFile::ChecksumKind> CSKind;
   Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
 
Index: clang/include/clang/Basic/LangOptions.h
===================================================================
--- clang/include/clang/Basic/LangOptions.h
+++ clang/include/clang/Basic/LangOptions.h
@@ -125,6 +125,7 @@
     MSVC2017_7 = 1914,
     MSVC2019 = 1920,
     MSVC2019_8 = 1928,
+    MSVC2022 = 1930,
   };
 
   enum SYCLMajorVersion {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to