================
@@ -351,12 +353,24 @@ Address 
CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
       "kernel_launch_params");
 
   auto KernelArgsSize = CGM.getDataLayout().getTypeAllocSize(KernelArgsTy);
+
+  // Avoid accounting the tail padding for CUDA.
+  auto KernelArgsSizeNoTailPadding = llvm::TypeSize::getZero();
+  if (auto N = KernelArgsTy->getNumElements()) {
+    auto *SL = CGM.getDataLayout().getStructLayout(KernelArgsTy);
+    KernelArgsSizeNoTailPadding = SL->getElementOffset(N - 1);
+    KernelArgsSizeNoTailPadding += CGM.getDataLayout().getTypeAllocSize(
+        KernelArgsTy->getElementType(N - 1));
----------------
kevinsala wrote:

If there is a cleaner way of getting this information, please let me know.

https://github.com/llvm/llvm-project/pull/156229
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to