[Bf-blender-cvs] [e6902d19a0d] master: Cycles: Allow Intel GPUs under Metal

Morteza Mostajab Wed, 19 Oct 2022 09:09:51 -0700

Commit: e6902d19a0d8b034e65f28df6dba914a876b08df
Author: Morteza Mostajab
Date:   Wed Oct 19 17:09:23 2022 +0100
Branches: master
https://developer.blender.org/rBe6902d19a0d8b034e65f28df6dba914a876b08df


Cycles: Allow Intel GPUs under Metal

Known Issues:
- Command buffer failures when using binary archives (binary archives is 
disabled for Intel GPUs as a workaround)
- Wrong texture sampler being applied (to be addressed in the future)

Ref T92212

Reviewed By: brecht

Maniphest Tasks: T92212

Differential Revision: https://developer.blender.org/D16253

===================================================================

M       intern/cycles/device/metal/device_impl.mm
M       intern/cycles/device/metal/kernel.mm
M       intern/cycles/device/metal/util.mm
M       intern/cycles/kernel/device/metal/context_begin.h

===================================================================

diff --git a/intern/cycles/device/metal/device_impl.mm 
b/intern/cycles/device/metal/device_impl.mm
index 6a16d4bb3b4..82ad5d55ecd 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -339,6 +339,12 @@ bool MetalDevice::compile_and_load(MetalPipelineType 
pso_type)
 
   MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
 
+  if (@available(macos 13.0, *)) {
+    if (device_vendor == METAL_GPU_INTEL) {
+      [options setOptimizationLevel:MTLLibraryOptimizationLevelSize];
+    }
+  }
+
   options.fastMathEnabled = YES;
   if (@available(macOS 12.0, *)) {
     options.languageVersion = MTLLanguageVersion2_4;
diff --git a/intern/cycles/device/metal/kernel.mm 
b/intern/cycles/device/metal/kernel.mm
index 5e0cb6d18f4..8ccc50e57a3 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -317,6 +317,12 @@ bool MetalKernelPipeline::should_use_binary_archive() const
       }
     }
 
+    /* Workaround for Intel GPU having issue using Binary Archives */
+    MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
+    if (gpu_vendor == METAL_GPU_INTEL) {
+      return false;
+    }
+
     if (pso_type == PSO_GENERIC) {
       /* Archive the generic kernels. */
       return true;
diff --git a/intern/cycles/device/metal/util.mm 
b/intern/cycles/device/metal/util.mm
index 65c67c400fe..eb77aeb6a54 100644
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -110,6 +110,10 @@ vector<id<MTLDevice>> const 
&MetalInfo::get_usable_devices()
       usable |= (vendor == METAL_GPU_AMD);
     }
 
+    if (@available(macos 13.0, *)) {
+      usable |= (vendor == METAL_GPU_INTEL);
+    }
+
     if (usable) {
       metal_printf("- %s\n", device_name.c_str());
       [device retain];
diff --git a/intern/cycles/kernel/device/metal/context_begin.h 
b/intern/cycles/kernel/device/metal/context_begin.h
index 99cb1e3826e..e75ec9cadec 100644
--- a/intern/cycles/kernel/device/metal/context_begin.h
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -34,21 +34,48 @@ class MetalKernelContext {
       kernel_assert(0);
       return 0;
     }
-
+    
+#ifdef __KERNEL_METAL_INTEL__
+    template<typename TextureType, typename CoordsType>
+    inline __attribute__((__always_inline__))
+    auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
+                                                  const uint tid, const uint 
sid,
+                                                  CoordsType coords) const
+    {
+      switch(sid) {
+        default:
+        case 0: return texture_array[tid].tex.sample(sampler(address::repeat, 
filter::nearest), coords);
+        case 1: return 
texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), 
coords);
+        case 2: return 
texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), 
coords);
+        case 3: return texture_array[tid].tex.sample(sampler(address::repeat, 
filter::linear), coords);
+        case 4: return 
texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), 
coords);
+        case 5: return 
texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), 
coords);
+      }
+    }
+#endif
+    
     // texture2d
     template<>
     inline __attribute__((__always_inline__))
     float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, 
float y) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return 
metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, 
y));
+#else
+      return 
ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, 
sid, float2(x, y));
+#endif
     }
     template<>
     inline __attribute__((__always_inline__))
     float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float 
y) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return 
metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, 
y)).x;
+#else
+      return 
ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, 
sid, float2(x, y)).x;
+#endif
     }
 
     // texture3d
@@ -57,14 +84,22 @@ class MetalKernelContext {
     float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, 
float y, float z) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return 
metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, 
y, z));
+#else
+      return 
ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, 
sid, float3(x, y, z));
+#endif
     }
     template<>
     inline __attribute__((__always_inline__))
     float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float 
y, float z) const {
       const uint tid(tex);
       const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
       return 
metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, 
y, z)).x;
+#else
+      return 
ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, 
sid, float3(x, y, z)).x;
+#endif
     }
 #    include "kernel/device/gpu/image.h"

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
List details, subscription details or unsubscribe:
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [e6902d19a0d] master: Cycles: Allow Intel GPUs under Metal

Reply via email to