From: Marek Olšák <marek.ol...@amd.com>

Reviewed-by: Alex Deucher <alexander.deuc...@amd.com>
---
 include/pci_ids/radeonsi_pci_ids.h              |  6 +++++
 src/amd/common/ac_llvm_util.c                   |  1 +
 src/amd/common/ac_surface.c                     | 33 +++++++++++++++++++------
 src/amd/common/amd_family.h                     |  1 +
 src/gallium/drivers/radeonsi/si_get.c           |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c          |  2 ++
 src/gallium/drivers/radeonsi/si_state.c         |  4 ++-
 src/gallium/drivers/radeonsi/si_state_binning.c |  1 +
 8 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/include/pci_ids/radeonsi_pci_ids.h 
b/include/pci_ids/radeonsi_pci_ids.h
index 6a3594eabc9..62b130307a3 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,11 +219,17 @@ CHIPSET(0x699F, POLARIS12)
 CHIPSET(0x6860, VEGA10)
 CHIPSET(0x6861, VEGA10)
 CHIPSET(0x6862, VEGA10)
 CHIPSET(0x6863, VEGA10)
 CHIPSET(0x6864, VEGA10)
 CHIPSET(0x6867, VEGA10)
 CHIPSET(0x6868, VEGA10)
 CHIPSET(0x687F, VEGA10)
 CHIPSET(0x686C, VEGA10)
 
+CHIPSET(0x69A0, VEGA12)
+CHIPSET(0x69A1, VEGA12)
+CHIPSET(0x69A2, VEGA12)
+CHIPSET(0x69A3, VEGA12)
+CHIPSET(0x69AF, VEGA12)
+
 CHIPSET(0x15DD, RAVEN)
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index bb9e873af81..f3db1c5a4a4 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -107,20 +107,21 @@ const char *ac_get_llvm_processor_name(enum radeon_family 
family)
        case CHIP_FIJI:
                return "fiji";
        case CHIP_STONEY:
                return "stoney";
        case CHIP_POLARIS10:
                return "polaris10";
        case CHIP_POLARIS11:
        case CHIP_POLARIS12:
                return "polaris11";
        case CHIP_VEGA10:
+       case CHIP_VEGA12:
        case CHIP_RAVEN:
                return "gfx900";
        default:
                return "";
        }
 }
 
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum 
ac_target_machine_options tm_options)
 {
        assert(family >= CHIP_TAHITI);
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 603b7058bdc..12dfc0cb1f2 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -128,20 +128,24 @@ static void addrlib_family_rev_id(enum radeon_family 
family,
                *addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE);
                break;
        case CHIP_POLARIS12:
                *addrlib_family = FAMILY_VI;
                *addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE);
                break;
        case CHIP_VEGA10:
                *addrlib_family = FAMILY_AI;
                *addrlib_revid = get_first(AMDGPU_VEGA10_RANGE);
                break;
+       case CHIP_VEGA12:
+               *addrlib_family = FAMILY_AI;
+               *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE);
+               break;
        case CHIP_RAVEN:
                *addrlib_family = FAMILY_RV;
                *addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
                break;
        default:
                fprintf(stderr, "amdgpu: Unknown family.\n");
        }
 }
 
 static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
@@ -898,22 +902,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
        if (in->flags.depth) {
                assert(in->swizzleMode != ADDR_SW_LINEAR);
 
                /* HTILE */
                ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
                ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
 
                hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
                hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
 
-               hin.hTileFlags.pipeAligned = 1;
-               hin.hTileFlags.rbAligned = 1;
+               hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+               hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned;
                hin.depthFlags = in->flags;
                hin.swizzleMode = in->swizzleMode;
                hin.unalignedWidth = in->width;
                hin.unalignedHeight = in->height;
                hin.numSlices = in->numSlices;
                hin.numMipLevels = in->numMipLevels;
 
                ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
                if (ret != ADDR_OK)
                        return ret;
@@ -960,22 +964,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
                    !compressed &&
                    in->swizzleMode != ADDR_SW_LINEAR) {
                        ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
                        ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
                        ADDR2_META_MIP_INFO 
meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
 
                        din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
                        dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
                        dout.pMipInfo = meta_mip_info;
 
-                       din.dccKeyFlags.pipeAligned = 1;
-                       din.dccKeyFlags.rbAligned = 1;
+                       din.dccKeyFlags.pipeAligned = 
!in->flags.metaPipeUnaligned;
+                       din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
                        din.colorFlags = in->flags;
                        din.resourceType = in->resourceType;
                        din.swizzleMode = in->swizzleMode;
                        din.bpp = in->bpp;
                        din.unalignedWidth = in->width;
                        din.unalignedHeight = in->height;
                        din.numSlices = in->numSlices;
                        din.numFrags = in->numFrags;
                        din.numMipLevels = in->numMipLevels;
                        din.dataSurfaceSize = out.surfSize;
@@ -1081,22 +1085,28 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
                }
 
                /* CMASK */
                if (in->swizzleMode != ADDR_SW_LINEAR) {
                        ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
                        ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
 
                        cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
                        cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
 
-                       cin.cMaskFlags.pipeAligned = 1;
-                       cin.cMaskFlags.rbAligned = 1;
+                       if (in->numSamples) {
+                               /* FMASK is always aligned. */
+                               cin.cMaskFlags.pipeAligned = 1;
+                               cin.cMaskFlags.rbAligned = 1;
+                       } else {
+                               cin.cMaskFlags.pipeAligned = 
!in->flags.metaPipeUnaligned;
+                               cin.cMaskFlags.rbAligned = 
!in->flags.metaRbUnaligned;
+                       }
                        cin.colorFlags = in->flags;
                        cin.resourceType = in->resourceType;
                        cin.unalignedWidth = in->width;
                        cin.unalignedHeight = in->height;
                        cin.numSlices = in->numSlices;
 
                        if (in->numSamples > 1)
                                cin.swizzleMode = 
surf->u.gfx9.fmask.swizzle_mode;
                        else
                                cin.swizzleMode = in->swizzleMode;
@@ -1109,20 +1119,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
                        surf->u.gfx9.cmask.pipe_aligned = 
cin.cMaskFlags.pipeAligned;
                        surf->u.gfx9.cmask_size = cout.cmaskBytes;
                        surf->u.gfx9.cmask_alignment = cout.baseAlign;
                }
        }
 
        return 0;
 }
 
 static int gfx9_compute_surface(ADDR_HANDLE addrlib,
+                               const struct radeon_info *info,
                                const struct ac_surf_config *config,
                                enum radeon_surf_mode mode,
                                struct radeon_surf *surf)
 {
        bool compressed;
        ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
        int r;
 
        assert(!(surf->flags & RADEON_SURF_FMASK));
 
@@ -1189,20 +1200,24 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
        AddrSurfInfoIn.width = config->info.width;
        AddrSurfInfoIn.height = config->info.height;
 
        if (config->is_3d)
                AddrSurfInfoIn.numSlices = config->info.depth;
        else if (config->is_cube)
                AddrSurfInfoIn.numSlices = 6;
        else
                AddrSurfInfoIn.numSlices = config->info.array_size;
 
+       /* This is propagated to HTILE/DCC/CMASK. */
+       AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
+       AddrSurfInfoIn.flags.metaRbUnaligned = 0;
+
        switch (mode) {
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
                assert(config->info.samples <= 1);
                assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
                AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
                break;
 
        case RADEON_SURF_MODE_1D:
        case RADEON_SURF_MODE_2D:
                if (surf->flags & RADEON_SURF_IMPORTED) {
@@ -1314,29 +1329,33 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
                case ADDR_SW_4KB_Z_X:
                case ADDR_SW_64KB_Z_X:
                case ADDR_SW_VAR_Z_X:
                        surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
                        break;
 
                default:
                        assert(0);
        }
 
+       /* Temporary workaround to prevent VM faults and hangs. */
+       if (info->family == CHIP_VEGA12)
+               surf->u.gfx9.fmask_size *= 8;
+
        return 0;
 }
 
 int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
                       const struct ac_surf_config *config,
                       enum radeon_surf_mode mode,
                       struct radeon_surf *surf)
 {
        int r;
 
        r = surf_config_sanity(config);
        if (r)
                return r;
 
        if (info->chip_class >= GFX9)
-               return gfx9_compute_surface(addrlib, config, mode, surf);
+               return gfx9_compute_surface(addrlib, info, config, mode, surf);
        else
                return gfx6_compute_surface(addrlib, info, config, mode, surf);
 }
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index c62d0aa527a..285111f2a2a 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -86,20 +86,21 @@ enum radeon_family {
     CHIP_MULLINS,
     CHIP_TONGA,
     CHIP_ICELAND,
     CHIP_CARRIZO,
     CHIP_FIJI,
     CHIP_STONEY,
     CHIP_POLARIS10,
     CHIP_POLARIS11,
     CHIP_POLARIS12,
     CHIP_VEGA10,
+    CHIP_VEGA12,
     CHIP_RAVEN,
     CHIP_LAST,
 };
 
 enum chip_class {
     CLASS_UNKNOWN = 0,
     R300,
     R400,
     R500,
     R600,
diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index b4ca5bea943..fc2be33b3e4 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -68,20 +68,21 @@ const char *si_get_family_name(const struct si_screen 
*sscreen)
        case CHIP_MULLINS: return "AMD MULLINS";
        case CHIP_TONGA: return "AMD TONGA";
        case CHIP_ICELAND: return "AMD ICELAND";
        case CHIP_CARRIZO: return "AMD CARRIZO";
        case CHIP_FIJI: return "AMD FIJI";
        case CHIP_POLARIS10: return "AMD POLARIS10";
        case CHIP_POLARIS11: return "AMD POLARIS11";
        case CHIP_POLARIS12: return "AMD POLARIS12";
        case CHIP_STONEY: return "AMD STONEY";
        case CHIP_VEGA10: return "AMD VEGA10";
+       case CHIP_VEGA12: return "AMD VEGA12";
        case CHIP_RAVEN: return "AMD RAVEN";
        default: return "AMD unknown";
        }
 }
 
 static bool si_have_tgsi_compute(struct si_screen *sscreen)
 {
        /* Old kernels disallowed some register writes for SI
         * that are used for indirect dispatches. */
        return (sscreen->info.chip_class >= CIK ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 3d787d58cd1..fa9ee43389a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -821,20 +821,21 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
                                            sscreen->info.family <= 
CHIP_POLARIS12) ||
                                           sscreen->info.family == CHIP_VEGA10 
||
                                           sscreen->info.family == CHIP_RAVEN;
        sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
                                        sscreen->info.family == CHIP_RAVEN;
 
        if (sscreen->debug_flags & DBG(DPBB)) {
                sscreen->dpbb_allowed = true;
        } else {
                /* Only enable primitive binning on Raven by default. */
+               /* TODO: Investigate if binning is profitable on Vega12. */
                sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
                                        !(sscreen->debug_flags & DBG(NO_DPBB));
        }
 
        if (sscreen->debug_flags & DBG(DFSM)) {
                sscreen->dfsm_allowed = sscreen->dpbb_allowed;
        } else {
                sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
                                        !(sscreen->debug_flags & DBG(NO_DFSM));
        }
@@ -848,20 +849,21 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
        /* Some chips have RB+ registers, but don't support RB+. Those must
         * always disable it.
         */
        if (sscreen->info.family == CHIP_STONEY ||
            sscreen->info.chip_class >= GFX9) {
                sscreen->has_rbplus = true;
 
                sscreen->rbplus_allowed =
                        !(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
                        (sscreen->info.family == CHIP_STONEY ||
+                        sscreen->info.family == CHIP_VEGA12 ||
                         sscreen->info.family == CHIP_RAVEN);
        }
 
        sscreen->dcc_msaa_allowed =
                !(sscreen->debug_flags & DBG(NO_DCC_MSAA)) &&
                (sscreen->debug_flags & DBG(DCC_MSAA) ||
                 sscreen->info.chip_class == VI);
 
        sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 1bfb3c34aa7..b4165a4669b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1668,21 +1668,22 @@ static uint32_t si_translate_texformat(struct 
pipe_screen *screen,
                case PIPE_FORMAT_RGTC2_UNORM:
                case PIPE_FORMAT_LATC2_UNORM:
                        return V_008F14_IMG_DATA_FORMAT_BC5;
                default:
                        goto out_unknown;
                }
        }
 
        if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
            (sscreen->info.family == CHIP_STONEY ||
-            sscreen->info.chip_class >= GFX9)) {
+            sscreen->info.family == CHIP_VEGA10 ||
+            sscreen->info.family == CHIP_RAVEN)) {
                switch (format) {
                case PIPE_FORMAT_ETC1_RGB8:
                case PIPE_FORMAT_ETC2_RGB8:
                case PIPE_FORMAT_ETC2_SRGB8:
                        return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
                case PIPE_FORMAT_ETC2_RGB8A1:
                case PIPE_FORMAT_ETC2_SRGB8A1:
                        return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
                case PIPE_FORMAT_ETC2_RGBA8:
                case PIPE_FORMAT_ETC2_SRGBA8:
@@ -5038,20 +5039,21 @@ static void si_init_config(struct si_context *sctx)
        }
        si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
                      RADEON_PRIO_BORDER_COLORS);
 
        if (sctx->b.chip_class >= GFX9) {
                unsigned num_se = sscreen->info.max_se;
                unsigned pc_lines = 0;
 
                switch (sctx->b.family) {
                case CHIP_VEGA10:
+               case CHIP_VEGA12:
                        pc_lines = 4096;
                        break;
                case CHIP_RAVEN:
                        pc_lines = 1024;
                        break;
                default:
                        assert(0);
                }
 
                si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c 
b/src/gallium/drivers/radeonsi/si_state_binning.c
index 686701d718f..0f50ea755cb 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -407,20 +407,21 @@ void si_emit_dpbb_state(struct si_context *sctx, struct 
r600_atom *state)
                                         blend->blend_enable_4bit) != 0;
        }
 
        /* Tunable parameters. Also test with DFSM enabled/disabled. */
        unsigned context_states_per_bin; /* allowed range: [0, 5] */
        unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
        unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
 
        switch (sctx->b.family) {
        case CHIP_VEGA10:
+       case CHIP_VEGA12:
        case CHIP_RAVEN:
                /* Tuned for Raven. Vega might need different values. */
                context_states_per_bin = 5;
                persistent_states_per_bin = 31;
                fpovs_per_batch = 63;
                break;
        default:
                assert(0);
        }
 
-- 
2.15.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to