commit:     093aafa4f80f05873a812a69f072413609da50ac
Author:     Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Fri Oct 20 22:02:59 2023 +0000
Commit:     Benda XU <heroxbd <AT> gentoo <DOT> org>
CommitDate: Thu Dec 14 15:01:59 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=093aafa4

dev-util/hip: add extend-isa-compatibility-check patch

Bug: https://github.com/gentoo/gentoo/pull/33400
Signed-off-by: Benda Xu <heroxbd <AT> gentoo.org>
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>

 .../hip-5.7.1-extend-isa-compatibility-check.patch | 236 +++++++++++++++++++++
 dev-util/hip/hip-5.7.1-r1.ebuild                   |   1 +
 2 files changed, 237 insertions(+)

diff --git a/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch 
b/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch
new file mode 100644
index 000000000000..7853de693cde
--- /dev/null
+++ b/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch
@@ -0,0 +1,236 @@
+Combined with matching changes within rocr-runtime ebuild, this patch allows
+to load compatible kernels whenever possible.
+For example if AMDGPU_TARGETS is set to gfx1030 and some application
+was started on gfx1036, it loads gfx1030 kernel.
+
+Author: Cordell Bloor <c...@slerp.xyz>
+https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch
+https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch
+--- a/hipamd/src/hip_code_object.cpp
++++ b/hipamd/src/hip_code_object.cpp
+@@ -390,47 +390,123 @@ static bool getTripleTargetID(std::string 
bundled_co_entry_id, const void* code_
+   return true;
+ }
+ 
+-static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
+-                                             std::string 
agent_triple_target_id) {
++struct GfxPattern {
++  std::string root;
++  std::string suffixes;
++};
++
++static bool matches(const GfxPattern& p, const std::string& s) {
++  if (p.root.size() + 1 != s.size()) {
++    return false;
++  }
++  if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) {
++    return false;
++  }
++  return p.suffixes.find(s[p.root.size()]) != std::string::npos;
++}
++
++static bool isGfx900EquivalentProcessor(const std::string& processor) {
++  return matches(GfxPattern{"gfx90", "029c"}, processor);
++}
++
++static bool isGfx900SupersetProcessor(const std::string& processor) {
++  return matches(GfxPattern{"gfx90", "0269c"}, processor);
++}
++
++static bool isGfx1030EquivalentProcessor(const std::string& processor) {
++  return matches(GfxPattern{"gfx103", "0123456"}, processor);
++}
++
++static bool isGfx1010EquivalentProcessor(const std::string& processor) {
++  return matches(GfxPattern{"gfx101", "02"}, processor);
++}
++
++static bool isGfx1010SupersetProcessor(const std::string& processor) {
++  return matches(GfxPattern{"gfx101", "0123"}, processor);
++}
++
++enum CompatibilityScore {
++  CS_EXACT_MATCH           = 1 << 4,
++  CS_PROCESSOR_MATCH       = 1 << 3,
++  CS_PROCESSOR_COMPATIBLE  = 1 << 2,
++  CS_XNACK_SPECIALIZED     = 1 << 1,
++  CS_SRAM_ECC_SPECIALIZED  = 1 << 0,
++  CS_INCOMPATIBLE          = 0,
++};
++
++static int getProcessorCompatibilityScore(const std::string& co_processor,
++                                          const std::string& agent_processor) 
{
++  if (co_processor == agent_processor)
++    return CS_PROCESSOR_MATCH;
++
++  if (isGfx900SupersetProcessor(agent_processor))
++    return isGfx900EquivalentProcessor(co_processor) ? 
CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++
++  if (isGfx1010SupersetProcessor(agent_processor))
++    return isGfx1010EquivalentProcessor(co_processor) ? 
CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++
++  if (isGfx1030EquivalentProcessor(agent_processor))
++    return isGfx1030EquivalentProcessor(co_processor) ? 
CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++
++  return CS_INCOMPATIBLE;
++}
++
++static int getCompatiblityScore(std::string co_triple_target_id,
++                                std::string agent_triple_target_id) {
+   // Primitive Check
+-  if (co_triple_target_id == agent_triple_target_id) return true;
++  if (co_triple_target_id == agent_triple_target_id) return CS_EXACT_MATCH;
+ 
+   // Parse code object triple target id
+   if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) 
{
+-    return false;
++    return CS_INCOMPATIBLE;
+   }
+ 
+   std::string co_processor;
+   char co_sram_ecc, co_xnack;
+   if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, 
co_xnack)) {
+-    return false;
++    return CS_INCOMPATIBLE;
+   }
+ 
+-  if (!co_triple_target_id.empty()) return false;
++  if (!co_triple_target_id.empty()) return CS_INCOMPATIBLE;
+ 
+   // Parse agent isa triple target id
+   if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + 
'-')) {
+-    return false;
++    return CS_INCOMPATIBLE;
+   }
+ 
+   std::string agent_isa_processor;
+   char isa_sram_ecc, isa_xnack;
+   if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, 
isa_sram_ecc, isa_xnack)) {
+-    return false;
++    return CS_INCOMPATIBLE;
+   }
+ 
+-  if (!agent_triple_target_id.empty()) return false;
++  if (!agent_triple_target_id.empty()) return CS_INCOMPATIBLE;
+ 
+   // Check for compatibility
+-  if (agent_isa_processor != co_processor) return false;
+-  if (co_sram_ecc != ' ') {
+-    if (co_sram_ecc != isa_sram_ecc) return false;
++  int processor_score = getProcessorCompatibilityScore(co_processor, 
agent_isa_processor);
++  if (processor_score == CS_INCOMPATIBLE) {
++    return CS_INCOMPATIBLE;
+   }
+-  if (co_xnack != ' ') {
+-    if (co_xnack != isa_xnack) return false;
++
++  int xnack_bonus;
++  if (co_xnack == ' ') {
++    xnack_bonus = 0;
++  } else if (co_xnack == isa_xnack) {
++    xnack_bonus = CS_XNACK_SPECIALIZED;
++  } else {
++    return CS_INCOMPATIBLE;
+   }
+ 
+-  return true;
++  int sram_ecc_bonus;
++  if (co_sram_ecc == ' ') {
++    sram_ecc_bonus = 0;
++  } else if (co_sram_ecc == isa_sram_ecc) {
++    sram_ecc_bonus = CS_SRAM_ECC_SPECIALIZED;
++  } else {
++    return CS_INCOMPATIBLE;
++  }
++
++  return processor_score + xnack_bonus + sram_ecc_bonus;
+ }
+ 
+ // This will be moved to COMGR eventually
+@@ -483,6 +559,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
+   for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
+     code_objs.push_back(std::make_pair(nullptr, 0));
+   }
++  std::vector<int> compatibility_score(agent_triple_target_ids.size());
+ 
+   const auto obheader = reinterpret_cast<const 
__ClangOffloadBundleHeader*>(data);
+   const auto* desc = &obheader->desc[0];
+@@ -495,17 +572,19 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
+         reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + 
desc->offset);
+     const size_t image_size = desc->size;
+ 
+-    if (num_code_objs == 0) break;
+     std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
+ 
+     std::string co_triple_target_id;
+     if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) 
continue;
+ 
+     for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
+-      if (code_objs[dev].first) continue;
+-      if (isCodeObjectCompatibleWithDevice(co_triple_target_id, 
agent_triple_target_ids[dev])) {
++      if (compatibility_score[dev] >= CS_PROCESSOR_MATCH) continue;
++      int score = getCompatiblityScore(co_triple_target_id, 
agent_triple_target_ids[dev]);
++      if (score > compatibility_score[dev]) {
++        compatibility_score[dev] = score;
++        if (!code_objs[dev].first)
++          --num_code_objs;
+         code_objs[dev] = std::make_pair(image, image_size);
+-        --num_code_objs;
+       }
+     }
+   }
+--- a/rocclr/device/device.cpp
++++ b/rocclr/device/device.cpp
+@@ -232,10 +232,49 @@ std::string Isa::isaName() const {
+   return std::string(hsaIsaNamePrefix) + targetId();
+ }
+ 
++template <class T, std::size_t N>
++static bool Contains(const std::array<T, N>& arr, const T& value) {
++  return std::find(std::begin(arr), std::end(arr), value) != std::end(arr);
++}
++
++static bool IsVersionCompatible(const Isa &codeObjectIsa,
++                                const Isa &agentIsa) {
++  if (codeObjectIsa.versionMajor() == agentIsa.versionMajor() &&
++      codeObjectIsa.versionMinor() == agentIsa.versionMinor()) {
++
++    if (codeObjectIsa.versionStepping() == agentIsa.versionStepping()) {
++      return true; // exact match
++    }
++
++    // The code object and the agent may sometimes be compatible if
++    // they differ only by stepping version.
++    if (codeObjectIsa.versionMajor() == 9 &&
++        codeObjectIsa.versionMinor() == 0) {
++      const std::array<uint32_t, 4> gfx900_equivalent = { 0, 2, 9, 12 };
++      const std::array<uint32_t, 5> gfx900_superset = { 0, 2, 6, 9, 12 };
++      if (Contains(gfx900_equivalent, codeObjectIsa.versionStepping()) &&
++          Contains(gfx900_superset, agentIsa.versionStepping())) {
++        return true; // gfx900 compatible object and agent
++      }
++    } else if (codeObjectIsa.versionMajor() == 10) {
++      if (codeObjectIsa.versionMinor() == 1) {
++        const std::array<uint32_t, 2> gfx1010_equivalent = { 0, 2 };
++        const std::array<uint32_t, 4> gfx1010_superset = { 0, 1, 2, 3 };
++        if (Contains(gfx1010_equivalent, codeObjectIsa.versionStepping()) &&
++            Contains(gfx1010_superset, agentIsa.versionStepping())) {
++          return true; // gfx1010 compatible object and agent
++        }
++      } else if (codeObjectIsa.versionMinor() == 3) {
++        return true; // gfx1030 compatible object and agent
++      }
++    }
++  }
++
++  return false;
++}
++
+ bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) {
+-  if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
+-      codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
+-      codeObjectIsa.versionStepping() != agentIsa.versionStepping())
++  if (!IsVersionCompatible(codeObjectIsa, agentIsa))
+     return false;
+ 
+   assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() 
&&

diff --git a/dev-util/hip/hip-5.7.1-r1.ebuild b/dev-util/hip/hip-5.7.1-r1.ebuild
index 25601091075f..7b1a593f23e5 100644
--- a/dev-util/hip/hip-5.7.1-r1.ebuild
+++ b/dev-util/hip/hip-5.7.1-r1.ebuild
@@ -46,6 +46,7 @@ PATCHES=(
        "${FILESDIR}/${PN}-5.7.1-exec-stack.patch"
        "${FILESDIR}/${PN}-5.7.1-disable-stack-protector.patch"
        "${FILESDIR}/${PN}-5.7.1-no_asan_doc.patch"
+       "${FILESDIR}/${PN}-5.7.1-extend-isa-compatibility-check.patch"
 )
 
 S="${WORKDIR}/clr-rocm-${PV}/"

Reply via email to