Mesa (main): freedreno/devices: Separate device definition into base + gen features

GitLab Mirror Mon, 20 Nov 2023 18:09:57 -0800

Module: Mesa
Branch: main
Commit: e6bfe42b6e2b0243627d6c98cdb52073084213da
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e6bfe42b6e2b0243627d6c98cdb52073084213da


Author: Danylo Piliaiev <dpilia...@igalia.com>
Date:   Thu Oct 26 18:48:35 2023 +0200

freedreno/devices: Separate device definition into base + gen features

After introduction of A7XX it doesn't make sense to define base GPU
properties in A6xxGPUInfo. Now we move to a more clean definition:
- a6xx_base + a6xx_genX - for A6XX
- a7xx_base + a7xx_xxx - for A7XX, there is no sub-gens clearly
                         identifiable at the moment.

Signed-off-by: Danylo Piliaiev <dpilia...@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25939>

---

 src/freedreno/common/freedreno_devices.py | 133 ++++++++++++++++++------------
 1 file changed, 80 insertions(+), 53 deletions(-)

diff --git a/src/freedreno/common/freedreno_devices.py 
b/src/freedreno/common/freedreno_devices.py
index 2bb2ad6f029..b67dc681fc4 100644
--- a/src/freedreno/common/freedreno_devices.py
+++ b/src/freedreno/common/freedreno_devices.py
@@ -143,7 +143,7 @@ class A6xxGPUInfo(GPUInfo):
     def __init__(self, chip, template, num_ccu,
                  tile_align_w, tile_align_h, num_vsc_pipes,
                  cs_shared_mem_size, wave_granularity, fibers_per_sp,
-                 magic_regs, raw_magic_regs = None, max_sets = 5):
+                 magic_regs, raw_magic_regs = None):
         super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
                          tile_align_w = tile_align_w,
                          tile_align_h = tile_align_h,
@@ -168,25 +168,6 @@ class A6xxGPUInfo(GPUInfo):
         if raw_magic_regs:
             self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs]
 
-        # Things that earlier gens have and later gens remove, provide
-        # defaults here and let them be overridden by sub-gen template:
-        self.a6xx.has_cp_reg_write = True
-        self.a6xx.has_8bpp_ubwc = True
-
-        self.a6xx.has_gmem_fast_clear = True
-        self.a6xx.has_hw_multiview = True
-        self.a6xx.has_fs_tex_prefetch = True
-        self.a6xx.has_sampler_minmax = True
-
-        self.a6xx.sysmem_per_ccu_cache_size = 64 * 1024
-        self.a6xx.gmem_ccu_color_cache_fraction = 
CCUColorCacheFraction.QUARTER.value
-
-        self.a6xx.prim_alloc_threshold = 0x7
-
-        self.a6xx.vs_max_inputs_count = 32
-
-        self.a6xx.max_sets = max_sets
-
         templates = template if type(template) is list else [template]
         for template in templates:
             template.apply_props(self)
@@ -315,24 +296,36 @@ class A7XXProps(dict):
             setattr(gpu_info.a7xx, name, val)
 
 
+a6xx_base = A6XXProps(
+        has_cp_reg_write = True,
+        has_8bpp_ubwc = True,
+        has_gmem_fast_clear = True,
+        has_hw_multiview = True,
+        has_fs_tex_prefetch = True,
+        has_sampler_minmax = True,
+
+        supports_double_threadsize = True,
+
+        sysmem_per_ccu_cache_size = 64 * 1024,
+        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
+
+        prim_alloc_threshold = 0x7,
+        vs_max_inputs_count = 32,
+        max_sets = 5,
+    )
+
+
 # a6xx can be divided into distinct sub-generations, where certain device-
 # info parameters are keyed to the sub-generation.  These templates reduce
 # the copypaste
 
-# a615, a616, a618, a619, a620 and a630:
-a6xx_gen1 = A6XXProps(
-        reg_size_vec4 = 96,
+a6xx_gen1_low = A6XXProps(
+        reg_size_vec4 = 48,
         instr_cache_size = 64,
-        concurrent_resolve = False,
         indirect_draw_wfm_quirk = True,
         depth_bounds_require_depth_test_quirk = True,
-        supports_double_threadsize = True,
-    )
 
-# a605, a608, a610, 612
-a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
         has_gmem_fast_clear = False,
-        reg_size_vec4 = 48,
         has_hw_multiview = False,
         has_sampler_minmax = False,
         has_fs_tex_prefetch = False,
@@ -340,9 +333,15 @@ a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps(
         gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
         vs_max_inputs_count = 16,
         supports_double_threadsize = False,
-)})
+    )
+
+a6xx_gen1 = A6XXProps(
+        reg_size_vec4 = 96,
+        instr_cache_size = 64,
+        indirect_draw_wfm_quirk = True,
+        depth_bounds_require_depth_test_quirk = True,
+    )
 
-# a640, a680:
 a6xx_gen2 = A6XXProps(
         reg_size_vec4 = 96,
         instr_cache_size = 64, # TODO
@@ -352,10 +351,8 @@ a6xx_gen2 = A6XXProps(
         depth_bounds_require_depth_test_quirk = True, # TODO: check if true
         has_dp2acc = False, # TODO: check if true
         has_8bpp_ubwc = False,
-        supports_double_threadsize = True,
     )
 
-# a650:
 a6xx_gen3 = A6XXProps(
         reg_size_vec4 = 64,
         # Blob limits it to 128 but we hang with 128
@@ -373,10 +370,8 @@ a6xx_gen3 = A6XXProps(
         enable_lrz_fast_clear = True,
         lrz_track_quirk = True,
         has_per_view_viewport = True,
-        supports_double_threadsize = True,
     )
 
-# a635, a660:
 a6xx_gen4 = A6XXProps(
         reg_size_vec4 = 64,
         # Blob limits it to 128 but we hang with 128
@@ -398,7 +393,6 @@ a6xx_gen4 = A6XXProps(
         enable_lrz_fast_clear = True,
         has_lrz_dir_tracking = True,
         has_per_view_viewport = True,
-        supports_double_threadsize = True,
     )
 
 add_gpus([
@@ -408,7 +402,7 @@ add_gpus([
         GPUId(612), # TODO: Test it, based only on libwrapfake dumps
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen1_low,
+        [a6xx_base, a6xx_gen1_low],
         num_ccu = 1,
         tile_align_w = 32,
         tile_align_h = 16,
@@ -440,7 +434,7 @@ add_gpus([
         GPUId(619),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen1,
+        [a6xx_base, a6xx_gen1],
         num_ccu = 1,
         tile_align_w = 32,
         tile_align_h = 32,
@@ -469,7 +463,7 @@ add_gpus([
         GPUId(620),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen1,
+        [a6xx_base, a6xx_gen1],
         num_ccu = 1,
         tile_align_w = 32,
         tile_align_h = 16,
@@ -498,7 +492,7 @@ add_gpus([
         GPUId(630),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen1,
+        [a6xx_base, a6xx_gen1],
         num_ccu = 2,
         tile_align_w = 32,
         tile_align_h = 16,
@@ -527,7 +521,7 @@ add_gpus([
         GPUId(640),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen2,
+        [a6xx_base, a6xx_gen2],
         num_ccu = 2,
         tile_align_w = 32,
         tile_align_h = 16,
@@ -556,7 +550,7 @@ add_gpus([
         GPUId(680),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen2,
+        [a6xx_base, a6xx_gen2],
         num_ccu = 4,
         tile_align_w = 64,
         tile_align_h = 32,
@@ -585,7 +579,7 @@ add_gpus([
         GPUId(650),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen3,
+        [a6xx_base, a6xx_gen3],
         num_ccu = 3,
         tile_align_w = 96,
         tile_align_h = 16,
@@ -620,7 +614,7 @@ add_gpus([
         GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen4,
+        [a6xx_base, a6xx_gen4],
         num_ccu = 2,
         tile_align_w = 32,
         tile_align_h = 16,
@@ -649,7 +643,7 @@ add_gpus([
         GPUId(660),
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen4,
+        [a6xx_base, a6xx_gen4],
         num_ccu = 3,
         tile_align_w = 96,
         tile_align_h = 16,
@@ -679,7 +673,7 @@ add_gpus([
         GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin 
fallback
     ], A6xxGPUInfo(
         CHIP.A6XX,
-        a6xx_gen4,
+        [a6xx_base, a6xx_gen4],
         num_ccu = 8,
         tile_align_w = 64,
         tile_align_h = 32,
@@ -704,9 +698,45 @@ add_gpus([
         )
     ))
 
+# Based on a6xx_base + a6xx_gen4
+a7xx_base = A6XXProps(
+        has_gmem_fast_clear = True,
+        has_hw_multiview = True,
+        has_fs_tex_prefetch = True,
+        has_sampler_minmax = True,
+
+        supports_double_threadsize = True,
+
+        sysmem_per_ccu_cache_size = 64 * 1024,
+        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
+
+        prim_alloc_threshold = 0x7,
+        vs_max_inputs_count = 32,
+        max_sets = 8,
+
+        reg_size_vec4 = 64,
+        # Blob limits it to 128 but we hang with 128
+        instr_cache_size = 127,
+        supports_multiview_mask = True,
+        has_z24uint_s8uint = True,
+        tess_use_shared = True,
+        storage_16bit = True,
+        has_tex_filter_cubic = True,
+        has_separate_chroma_filter = True,
+        has_sample_locations = True,
+        has_lpac = True,
+        has_shading_rate = True,
+        has_getfiberid = True,
+        has_dp2acc = True,
+        has_dp4acc = True,
+        enable_lrz_fast_clear = True,
+        has_lrz_dir_tracking = True,
+        has_per_view_viewport = True,
+    )
+
 a7xx_725 = A7XXProps(
         cmdbuf_start_a725_quirk = True,
-)
+    )
 
 a7xx_730 = A7XXProps()
 
@@ -761,7 +791,7 @@ add_gpus([
         GPUId(chip_id=0xffff07030002, name="FD725"),
     ], A6xxGPUInfo(
         CHIP.A7XX,
-        [a6xx_gen4, a7xx_725],
+        [a7xx_base, a7xx_725],
         num_ccu = 4,
         tile_align_w = 64,
         tile_align_h = 32,
@@ -771,7 +801,6 @@ add_gpus([
         fibers_per_sp = 128 * 2 * 16,
         magic_regs = a730_magic_regs,
         raw_magic_regs = a730_raw_magic_regs,
-        max_sets = 8,
     ))
 
 add_gpus([
@@ -779,7 +808,7 @@ add_gpus([
         GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin 
fallback
     ], A6xxGPUInfo(
         CHIP.A7XX,
-        [a6xx_gen4, a7xx_730],
+        [a7xx_base, a7xx_730],
         num_ccu = 4,
         tile_align_w = 64,
         tile_align_h = 32,
@@ -789,7 +818,6 @@ add_gpus([
         fibers_per_sp = 128 * 2 * 16,
         magic_regs = a730_magic_regs,
         raw_magic_regs = a730_raw_magic_regs,
-        max_sets = 8,
     ))
 
 add_gpus([
@@ -798,7 +826,7 @@ add_gpus([
         GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin 
fallback
     ], A6xxGPUInfo(
         CHIP.A7XX,
-        [a6xx_gen4, a7xx_740],
+        [a7xx_base, a7xx_740],
         num_ccu = 6,
         tile_align_w = 64,
         tile_align_h = 32,
@@ -853,7 +881,6 @@ add_gpus([
             [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
             [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
         ],
-        max_sets = 8,
     ))
 
 template = """\

Mesa (main): freedreno/devices: Separate device definition into base + gen features

Reply via email to