This is an automated email from the ASF dual-hosted git repository.

jroesch pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 901dee5  [Vulkan] Check at codegen if the shader is within shared 
memory limits. (#8746)
901dee5 is described below

commit 901dee54e42d6393cde6eefcc25964db8e24e41d
Author: Lunderberg <[email protected]>
AuthorDate: Sat Aug 14 19:47:08 2021 -0500

    [Vulkan] Check at codegen if the shader is within shared memory limits. 
(#8746)
    
    Previously, shaders that do not respect device limits for shared
    memory could result in segfaults that occur during the call to
    `vkCreateComputePipelines`.
---
 src/target/spirv/codegen_spirv.cc                  | 12 +++++++++
 src/target/spirv/codegen_spirv.h                   |  4 +++
 src/target/spirv/spirv_support.cc                  |  3 +++
 src/target/spirv/spirv_support.h                   | 16 ++++++++++++
 .../python/unittest/test_target_codegen_vulkan.py  | 30 ++++++++++++++++++++++
 5 files changed, 65 insertions(+)

diff --git a/src/target/spirv/codegen_spirv.cc 
b/src/target/spirv/codegen_spirv.cc
index 42d0027..66952da 100644
--- a/src/target/spirv/codegen_spirv.cc
+++ b/src/target/spirv/codegen_spirv.cc
@@ -110,6 +110,14 @@ runtime::VulkanShader CodeGenSPIRV::BuildFunction(const 
PrimFunc& f, const std::
 
   builder_->CommitKernelFunction(func_ptr, name);
 
+  ICHECK_LE(shared_memory_bytes_used_, 
spirv_support_.max_shared_memory_per_block)
+      << "Vulkan shader " << name << " uses " << shared_memory_bytes_used_
+      << " bytes of shared memory, "
+      << "but target supports only " << 
spirv_support_.max_shared_memory_per_block << " bytes.  "
+      << "If the device supports this allocation, "
+      << "please add -max_shared_memory_per_block=NBYTES to the target, "
+      << "or query all device parameters by adding -from_device=0.";
+
   shader.data = builder_->Finalize();
   return shader;
 }
@@ -121,6 +129,7 @@ void CodeGenSPIRV::InitFuncState() {
   analyzer_.reset(new arith::Analyzer());
   builder_.reset(new spirv::IRBuilder(spirv_support_));
   builder_->InitHeader();
+  shared_memory_bytes_used_ = 0;
 }
 
 spirv::Value CodeGenSPIRV::GetThreadIndex(const IterVar& iv, const PrimExpr& 
extent) {
@@ -642,6 +651,9 @@ void CodeGenSPIRV::VisitStmt_(const AllocateNode* op) {
     // Shared memory
     buf =
         builder_->Allocate(etype, static_cast<uint32_t>(constant_size), 
spv::StorageClassWorkgroup);
+
+    size_t num_bytes = op->dtype.bytes() * op->dtype.lanes() * 
static_cast<uint32_t>(constant_size);
+    shared_memory_bytes_used_ += num_bytes;
   } else {
     LOG(FATAL) << "Can only allocate shared or local memory inside kernel";
   }
diff --git a/src/target/spirv/codegen_spirv.h b/src/target/spirv/codegen_spirv.h
index 8b14754..74b62e7 100644
--- a/src/target/spirv/codegen_spirv.h
+++ b/src/target/spirv/codegen_spirv.h
@@ -214,6 +214,10 @@ class CodeGenSPIRV : public ExprFunctor<spirv::Value(const 
PrimExpr&)>,
 
   // binding of let variables. Enables duplicate var defs that map to same 
value
   std::unordered_map<Var, const LetNode*, ObjectPtrHash, ObjectPtrEqual> 
let_binding_;
+
+  // Running total of the number of bytes of shared memory used.
+  // Checked against the max_shared_memory_per_group
+  size_t shared_memory_bytes_used_{0};
 };
 
 }  // namespace codegen
diff --git a/src/target/spirv/spirv_support.cc 
b/src/target/spirv/spirv_support.cc
index 4a294d5..0f1207f 100644
--- a/src/target/spirv/spirv_support.cc
+++ b/src/target/spirv/spirv_support.cc
@@ -52,6 +52,9 @@ SPIRVSupport::SPIRVSupport(tvm::Target target) {
   if (target->GetAttr<Integer>("max_storage_buffer_range")) {
     max_storage_buffer_range = 
target->GetAttr<Integer>("max_storage_buffer_range").value();
   }
+  if (target->GetAttr<Integer>("max_shared_memory_per_block")) {
+    max_shared_memory_per_block = 
target->GetAttr<Integer>("max_shared_memory_per_block").value();
+  }
   if (target->GetAttr<Integer>("max_per_stage_descriptor_storage_buffer")) {
     max_per_stage_descriptor_storage_buffers =
         
target->GetAttr<Integer>("max_per_stage_descriptor_storage_buffer").value();
diff --git a/src/target/spirv/spirv_support.h b/src/target/spirv/spirv_support.h
index 1497c7c..04d13cc 100644
--- a/src/target/spirv/spirv_support.h
+++ b/src/target/spirv/spirv_support.h
@@ -102,6 +102,22 @@ struct SPIRVSupport {
   uint32_t max_storage_buffer_range{1 << 27};
 
   /*!
+   * \brief The maximum amount of shared memory usable by a shader
+   *
+   * Vulkan extension: N/A
+   * Vulkan struct: VkPhysicalDeviceLimits
+   * Device Property: maxComputeSharedMemorySize
+   * SPV Extension name: N/A
+   * SPV Capability: N/A
+   *
+   * The maximum amount of shared memory (Workgroup scope) that may be
+   * allocated by a shader.  Default value is from Vulkan spec,
+   * "Required Limits" table.  Implementations may have a larger
+   * limit.
+   */
+  uint32_t max_shared_memory_per_block{16384};
+
+  /*!
    * \brief The maximum number of storage buffers accessible by a single 
shader.
    *
    * Vulkan struct: VkPhysicalDeviceLimits
diff --git a/tests/python/unittest/test_target_codegen_vulkan.py 
b/tests/python/unittest/test_target_codegen_vulkan.py
index 85e9cb1..01f734b 100644
--- a/tests/python/unittest/test_target_codegen_vulkan.py
+++ b/tests/python/unittest/test_target_codegen_vulkan.py
@@ -527,5 +527,35 @@ class TestVectorizedIndices:
         tvm.testing.assert_allclose(b.numpy(), b_np)
 
 
[email protected]_targets("vulkan -max_shared_memory_per_block=16384")
+def test_shared_mem_alloc(target, dev):
+    alloc_nbytes = 16384 * 2
+
+    def do_compute(ins, outs):
+        ib = tvm.tir.ir_builder.create()
+        out = ib.buffer_ptr(outs[0])
+
+        ib.scope_attr(te.thread_axis("blockIdx.x"), "thread_extent", 0)
+
+        array = ib.allocate("int32", (alloc_nbytes,), name="array", 
scope="shared")
+        array[0] = 0
+        out[0] = array[0]
+
+        return ib.get()
+
+    Out = te.extern(
+        shape=(1,),
+        inputs=[],
+        fcompute=do_compute,
+        dtype="int32",
+    )
+    s = te.create_schedule(Out.op)
+
+    # Codegen should raise error when allocating more memory than the
+    # target supports.
+    with pytest.raises(tvm.TVMError):
+        tvm.build(s, [Out], target)
+
+
 if __name__ == "__main__":
     sys.exit(pytest.main(sys.argv))

Reply via email to