Module: Mesa Branch: main Commit: e6bfe42b6e2b0243627d6c98cdb52073084213da URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e6bfe42b6e2b0243627d6c98cdb52073084213da
Author: Danylo Piliaiev <dpilia...@igalia.com> Date: Thu Oct 26 18:48:35 2023 +0200 freedreno/devices: Separate device definition into base + gen features After introduction of A7XX it doesn't make sense to define base GPU properties in A6xxGPUInfo. Now we move to a more clean definition: - a6xx_base + a6xx_genX - for A6XX - a7xx_base + a7xx_xxx - for A7XX, there is no sub-gens clearly identifiable at the moment. Signed-off-by: Danylo Piliaiev <dpilia...@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25939> --- src/freedreno/common/freedreno_devices.py | 133 ++++++++++++++++++------------ 1 file changed, 80 insertions(+), 53 deletions(-) diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 2bb2ad6f029..b67dc681fc4 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -143,7 +143,7 @@ class A6xxGPUInfo(GPUInfo): def __init__(self, chip, template, num_ccu, tile_align_w, tile_align_h, num_vsc_pipes, cs_shared_mem_size, wave_granularity, fibers_per_sp, - magic_regs, raw_magic_regs = None, max_sets = 5): + magic_regs, raw_magic_regs = None): super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4, tile_align_w = tile_align_w, tile_align_h = tile_align_h, @@ -168,25 +168,6 @@ class A6xxGPUInfo(GPUInfo): if raw_magic_regs: self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs] - # Things that earlier gens have and later gens remove, provide - # defaults here and let them be overridden by sub-gen template: - self.a6xx.has_cp_reg_write = True - self.a6xx.has_8bpp_ubwc = True - - self.a6xx.has_gmem_fast_clear = True - self.a6xx.has_hw_multiview = True - self.a6xx.has_fs_tex_prefetch = True - self.a6xx.has_sampler_minmax = True - - self.a6xx.sysmem_per_ccu_cache_size = 64 * 1024 - self.a6xx.gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value - - self.a6xx.prim_alloc_threshold = 0x7 - - self.a6xx.vs_max_inputs_count = 32 - - self.a6xx.max_sets = max_sets - templates = template if type(template) is list else [template] for template in templates: template.apply_props(self) @@ -315,24 +296,36 @@ class A7XXProps(dict): setattr(gpu_info.a7xx, name, val) +a6xx_base = A6XXProps( + has_cp_reg_write = True, + has_8bpp_ubwc = True, + has_gmem_fast_clear = True, + has_hw_multiview = True, + has_fs_tex_prefetch = True, + has_sampler_minmax = True, + + supports_double_threadsize = True, + + sysmem_per_ccu_cache_size = 64 * 1024, + gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value, + + prim_alloc_threshold = 0x7, + vs_max_inputs_count = 32, + max_sets = 5, + ) + + # a6xx can be divided into distinct sub-generations, where certain device- # info parameters are keyed to the sub-generation. These templates reduce # the copypaste -# a615, a616, a618, a619, a620 and a630: -a6xx_gen1 = A6XXProps( - reg_size_vec4 = 96, +a6xx_gen1_low = A6XXProps( + reg_size_vec4 = 48, instr_cache_size = 64, - concurrent_resolve = False, indirect_draw_wfm_quirk = True, depth_bounds_require_depth_test_quirk = True, - supports_double_threadsize = True, - ) -# a605, a608, a610, 612 -a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps( has_gmem_fast_clear = False, - reg_size_vec4 = 48, has_hw_multiview = False, has_sampler_minmax = False, has_fs_tex_prefetch = False, @@ -340,9 +333,15 @@ a6xx_gen1_low = A6XXProps({**a6xx_gen1, **A6XXProps( gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value, vs_max_inputs_count = 16, supports_double_threadsize = False, -)}) + ) + +a6xx_gen1 = A6XXProps( + reg_size_vec4 = 96, + instr_cache_size = 64, + indirect_draw_wfm_quirk = True, + depth_bounds_require_depth_test_quirk = True, + ) -# a640, a680: a6xx_gen2 = A6XXProps( reg_size_vec4 = 96, instr_cache_size = 64, # TODO @@ -352,10 +351,8 @@ a6xx_gen2 = A6XXProps( depth_bounds_require_depth_test_quirk = True, # TODO: check if true has_dp2acc = False, # TODO: check if true has_8bpp_ubwc = False, - supports_double_threadsize = True, ) -# a650: a6xx_gen3 = A6XXProps( reg_size_vec4 = 64, # Blob limits it to 128 but we hang with 128 @@ -373,10 +370,8 @@ a6xx_gen3 = A6XXProps( enable_lrz_fast_clear = True, lrz_track_quirk = True, has_per_view_viewport = True, - supports_double_threadsize = True, ) -# a635, a660: a6xx_gen4 = A6XXProps( reg_size_vec4 = 64, # Blob limits it to 128 but we hang with 128 @@ -398,7 +393,6 @@ a6xx_gen4 = A6XXProps( enable_lrz_fast_clear = True, has_lrz_dir_tracking = True, has_per_view_viewport = True, - supports_double_threadsize = True, ) add_gpus([ @@ -408,7 +402,7 @@ add_gpus([ GPUId(612), # TODO: Test it, based only on libwrapfake dumps ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen1_low, + [a6xx_base, a6xx_gen1_low], num_ccu = 1, tile_align_w = 32, tile_align_h = 16, @@ -440,7 +434,7 @@ add_gpus([ GPUId(619), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen1, + [a6xx_base, a6xx_gen1], num_ccu = 1, tile_align_w = 32, tile_align_h = 32, @@ -469,7 +463,7 @@ add_gpus([ GPUId(620), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen1, + [a6xx_base, a6xx_gen1], num_ccu = 1, tile_align_w = 32, tile_align_h = 16, @@ -498,7 +492,7 @@ add_gpus([ GPUId(630), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen1, + [a6xx_base, a6xx_gen1], num_ccu = 2, tile_align_w = 32, tile_align_h = 16, @@ -527,7 +521,7 @@ add_gpus([ GPUId(640), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen2, + [a6xx_base, a6xx_gen2], num_ccu = 2, tile_align_w = 32, tile_align_h = 16, @@ -556,7 +550,7 @@ add_gpus([ GPUId(680), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen2, + [a6xx_base, a6xx_gen2], num_ccu = 4, tile_align_w = 64, tile_align_h = 32, @@ -585,7 +579,7 @@ add_gpus([ GPUId(650), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen3, + [a6xx_base, a6xx_gen3], num_ccu = 3, tile_align_w = 96, tile_align_h = 16, @@ -620,7 +614,7 @@ add_gpus([ GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen4, + [a6xx_base, a6xx_gen4], num_ccu = 2, tile_align_w = 32, tile_align_h = 16, @@ -649,7 +643,7 @@ add_gpus([ GPUId(660), ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen4, + [a6xx_base, a6xx_gen4], num_ccu = 3, tile_align_w = 96, tile_align_h = 16, @@ -679,7 +673,7 @@ add_gpus([ GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback ], A6xxGPUInfo( CHIP.A6XX, - a6xx_gen4, + [a6xx_base, a6xx_gen4], num_ccu = 8, tile_align_w = 64, tile_align_h = 32, @@ -704,9 +698,45 @@ add_gpus([ ) )) +# Based on a6xx_base + a6xx_gen4 +a7xx_base = A6XXProps( + has_gmem_fast_clear = True, + has_hw_multiview = True, + has_fs_tex_prefetch = True, + has_sampler_minmax = True, + + supports_double_threadsize = True, + + sysmem_per_ccu_cache_size = 64 * 1024, + gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value, + + prim_alloc_threshold = 0x7, + vs_max_inputs_count = 32, + max_sets = 8, + + reg_size_vec4 = 64, + # Blob limits it to 128 but we hang with 128 + instr_cache_size = 127, + supports_multiview_mask = True, + has_z24uint_s8uint = True, + tess_use_shared = True, + storage_16bit = True, + has_tex_filter_cubic = True, + has_separate_chroma_filter = True, + has_sample_locations = True, + has_lpac = True, + has_shading_rate = True, + has_getfiberid = True, + has_dp2acc = True, + has_dp4acc = True, + enable_lrz_fast_clear = True, + has_lrz_dir_tracking = True, + has_per_view_viewport = True, + ) + a7xx_725 = A7XXProps( cmdbuf_start_a725_quirk = True, -) + ) a7xx_730 = A7XXProps() @@ -761,7 +791,7 @@ add_gpus([ GPUId(chip_id=0xffff07030002, name="FD725"), ], A6xxGPUInfo( CHIP.A7XX, - [a6xx_gen4, a7xx_725], + [a7xx_base, a7xx_725], num_ccu = 4, tile_align_w = 64, tile_align_h = 32, @@ -771,7 +801,6 @@ add_gpus([ fibers_per_sp = 128 * 2 * 16, magic_regs = a730_magic_regs, raw_magic_regs = a730_raw_magic_regs, - max_sets = 8, )) add_gpus([ @@ -779,7 +808,7 @@ add_gpus([ GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback ], A6xxGPUInfo( CHIP.A7XX, - [a6xx_gen4, a7xx_730], + [a7xx_base, a7xx_730], num_ccu = 4, tile_align_w = 64, tile_align_h = 32, @@ -789,7 +818,6 @@ add_gpus([ fibers_per_sp = 128 * 2 * 16, magic_regs = a730_magic_regs, raw_magic_regs = a730_raw_magic_regs, - max_sets = 8, )) add_gpus([ @@ -798,7 +826,7 @@ add_gpus([ GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback ], A6xxGPUInfo( CHIP.A7XX, - [a6xx_gen4, a7xx_740], + [a7xx_base, a7xx_740], num_ccu = 6, tile_align_w = 64, tile_align_h = 32, @@ -853,7 +881,6 @@ add_gpus([ [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], ], - max_sets = 8, )) template = """\