From: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> --- include/pci_ids/radeonsi_pci_ids.h | 6 +++++ src/amd/common/ac_llvm_util.c | 1 + src/amd/common/ac_surface.c | 33 +++++++++++++++++++------ src/amd/common/amd_family.h | 1 + src/gallium/drivers/radeonsi/si_get.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 2 ++ src/gallium/drivers/radeonsi/si_state.c | 4 ++- src/gallium/drivers/radeonsi/si_state_binning.c | 1 + 8 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index 6a3594eabc9..62b130307a3 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -219,11 +219,17 @@ CHIPSET(0x699F, POLARIS12) CHIPSET(0x6860, VEGA10) CHIPSET(0x6861, VEGA10) CHIPSET(0x6862, VEGA10) CHIPSET(0x6863, VEGA10) CHIPSET(0x6864, VEGA10) CHIPSET(0x6867, VEGA10) CHIPSET(0x6868, VEGA10) CHIPSET(0x687F, VEGA10) CHIPSET(0x686C, VEGA10) +CHIPSET(0x69A0, VEGA12) +CHIPSET(0x69A1, VEGA12) +CHIPSET(0x69A2, VEGA12) +CHIPSET(0x69A3, VEGA12) +CHIPSET(0x69AF, VEGA12) + CHIPSET(0x15DD, RAVEN) diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index bb9e873af81..f3db1c5a4a4 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -107,20 +107,21 @@ const char *ac_get_llvm_processor_name(enum radeon_family family) case CHIP_FIJI: return "fiji"; case CHIP_STONEY: return "stoney"; case CHIP_POLARIS10: return "polaris10"; case CHIP_POLARIS11: case CHIP_POLARIS12: return "polaris11"; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_RAVEN: return "gfx900"; default: return ""; } } LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options) { assert(family >= CHIP_TAHITI); diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 603b7058bdc..12dfc0cb1f2 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -128,20 +128,24 @@ static void addrlib_family_rev_id(enum radeon_family family, *addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE); break; case CHIP_POLARIS12: *addrlib_family = FAMILY_VI; *addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE); break; case CHIP_VEGA10: *addrlib_family = FAMILY_AI; *addrlib_revid = get_first(AMDGPU_VEGA10_RANGE); break; + case CHIP_VEGA12: + *addrlib_family = FAMILY_AI; + *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE); + break; case CHIP_RAVEN: *addrlib_family = FAMILY_RV; *addrlib_revid = get_first(AMDGPU_RAVEN_RANGE); break; default: fprintf(stderr, "amdgpu: Unknown family.\n"); } } static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput) @@ -898,22 +902,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, if (in->flags.depth) { assert(in->swizzleMode != ADDR_SW_LINEAR); /* HTILE */ ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); - hin.hTileFlags.pipeAligned = 1; - hin.hTileFlags.rbAligned = 1; + hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned; + hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned; hin.depthFlags = in->flags; hin.swizzleMode = in->swizzleMode; hin.unalignedWidth = in->width; hin.unalignedHeight = in->height; hin.numSlices = in->numSlices; hin.numMipLevels = in->numMipLevels; ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout); if (ret != ADDR_OK) return ret; @@ -960,22 +964,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, !compressed && in->swizzleMode != ADDR_SW_LINEAR) { ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {}; din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); dout.pMipInfo = meta_mip_info; - din.dccKeyFlags.pipeAligned = 1; - din.dccKeyFlags.rbAligned = 1; + din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; + din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; din.colorFlags = in->flags; din.resourceType = in->resourceType; din.swizzleMode = in->swizzleMode; din.bpp = in->bpp; din.unalignedWidth = in->width; din.unalignedHeight = in->height; din.numSlices = in->numSlices; din.numFrags = in->numFrags; din.numMipLevels = in->numMipLevels; din.dataSurfaceSize = out.surfSize; @@ -1081,22 +1085,28 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, } /* CMASK */ if (in->swizzleMode != ADDR_SW_LINEAR) { ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0}; ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0}; cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); - cin.cMaskFlags.pipeAligned = 1; - cin.cMaskFlags.rbAligned = 1; + if (in->numSamples) { + /* FMASK is always aligned. */ + cin.cMaskFlags.pipeAligned = 1; + cin.cMaskFlags.rbAligned = 1; + } else { + cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned; + cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned; + } cin.colorFlags = in->flags; cin.resourceType = in->resourceType; cin.unalignedWidth = in->width; cin.unalignedHeight = in->height; cin.numSlices = in->numSlices; if (in->numSamples > 1) cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode; else cin.swizzleMode = in->swizzleMode; @@ -1109,20 +1119,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib, surf->u.gfx9.cmask.pipe_aligned = cin.cMaskFlags.pipeAligned; surf->u.gfx9.cmask_size = cout.cmaskBytes; surf->u.gfx9.cmask_alignment = cout.baseAlign; } } return 0; } static int gfx9_compute_surface(ADDR_HANDLE addrlib, + const struct radeon_info *info, const struct ac_surf_config *config, enum radeon_surf_mode mode, struct radeon_surf *surf) { bool compressed; ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; int r; assert(!(surf->flags & RADEON_SURF_FMASK)); @@ -1189,20 +1200,24 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.width = config->info.width; AddrSurfInfoIn.height = config->info.height; if (config->is_3d) AddrSurfInfoIn.numSlices = config->info.depth; else if (config->is_cube) AddrSurfInfoIn.numSlices = 6; else AddrSurfInfoIn.numSlices = config->info.array_size; + /* This is propagated to HTILE/DCC/CMASK. */ + AddrSurfInfoIn.flags.metaPipeUnaligned = 0; + AddrSurfInfoIn.flags.metaRbUnaligned = 0; + switch (mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: assert(config->info.samples <= 1); assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; break; case RADEON_SURF_MODE_1D: case RADEON_SURF_MODE_2D: if (surf->flags & RADEON_SURF_IMPORTED) { @@ -1314,29 +1329,33 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib, case ADDR_SW_4KB_Z_X: case ADDR_SW_64KB_Z_X: case ADDR_SW_VAR_Z_X: surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH; break; default: assert(0); } + /* Temporary workaround to prevent VM faults and hangs. */ + if (info->family == CHIP_VEGA12) + surf->u.gfx9.fmask_size *= 8; + return 0; } int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, const struct ac_surf_config *config, enum radeon_surf_mode mode, struct radeon_surf *surf) { int r; r = surf_config_sanity(config); if (r) return r; if (info->chip_class >= GFX9) - return gfx9_compute_surface(addrlib, config, mode, surf); + return gfx9_compute_surface(addrlib, info, config, mode, surf); else return gfx6_compute_surface(addrlib, info, config, mode, surf); } diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h index c62d0aa527a..285111f2a2a 100644 --- a/src/amd/common/amd_family.h +++ b/src/amd/common/amd_family.h @@ -86,20 +86,21 @@ enum radeon_family { CHIP_MULLINS, CHIP_TONGA, CHIP_ICELAND, CHIP_CARRIZO, CHIP_FIJI, CHIP_STONEY, CHIP_POLARIS10, CHIP_POLARIS11, CHIP_POLARIS12, CHIP_VEGA10, + CHIP_VEGA12, CHIP_RAVEN, CHIP_LAST, }; enum chip_class { CLASS_UNKNOWN = 0, R300, R400, R500, R600, diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index b4ca5bea943..fc2be33b3e4 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -68,20 +68,21 @@ const char *si_get_family_name(const struct si_screen *sscreen) case CHIP_MULLINS: return "AMD MULLINS"; case CHIP_TONGA: return "AMD TONGA"; case CHIP_ICELAND: return "AMD ICELAND"; case CHIP_CARRIZO: return "AMD CARRIZO"; case CHIP_FIJI: return "AMD FIJI"; case CHIP_POLARIS10: return "AMD POLARIS10"; case CHIP_POLARIS11: return "AMD POLARIS11"; case CHIP_POLARIS12: return "AMD POLARIS12"; case CHIP_STONEY: return "AMD STONEY"; case CHIP_VEGA10: return "AMD VEGA10"; + case CHIP_VEGA12: return "AMD VEGA12"; case CHIP_RAVEN: return "AMD RAVEN"; default: return "AMD unknown"; } } static bool si_have_tgsi_compute(struct si_screen *sscreen) { /* Old kernels disallowed some register writes for SI * that are used for indirect dispatches. */ return (sscreen->info.chip_class >= CIK || diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3d787d58cd1..fa9ee43389a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -821,20 +821,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->info.family <= CHIP_POLARIS12) || sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; } else { /* Only enable primitive binning on Raven by default. */ + /* TODO: Investigate if binning is profitable on Vega12. */ sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN && !(sscreen->debug_flags & DBG(NO_DPBB)); } if (sscreen->debug_flags & DBG(DFSM)) { sscreen->dfsm_allowed = sscreen->dpbb_allowed; } else { sscreen->dfsm_allowed = sscreen->dpbb_allowed && !(sscreen->debug_flags & DBG(NO_DFSM)); } @@ -848,20 +849,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->info.family == CHIP_STONEY || sscreen->info.chip_class >= GFX9) { sscreen->has_rbplus = true; sscreen->rbplus_allowed = !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && (sscreen->info.family == CHIP_STONEY || + sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_RAVEN); } sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)) && (sscreen->debug_flags & DBG(DCC_MSAA) || sscreen->info.chip_class == VI); sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 1bfb3c34aa7..b4165a4669b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1668,21 +1668,22 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_LATC2_UNORM: return V_008F14_IMG_DATA_FORMAT_BC5; default: goto out_unknown; } } if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && (sscreen->info.family == CHIP_STONEY || - sscreen->info.chip_class >= GFX9)) { + sscreen->info.family == CHIP_VEGA10 || + sscreen->info.family == CHIP_RAVEN)) { switch (format) { case PIPE_FORMAT_ETC1_RGB8: case PIPE_FORMAT_ETC2_RGB8: case PIPE_FORMAT_ETC2_SRGB8: return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; case PIPE_FORMAT_ETC2_RGB8A1: case PIPE_FORMAT_ETC2_SRGB8A1: return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; case PIPE_FORMAT_ETC2_RGBA8: case PIPE_FORMAT_ETC2_SRGBA8: @@ -5038,20 +5039,21 @@ static void si_init_config(struct si_context *sctx) } si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, RADEON_PRIO_BORDER_COLORS); if (sctx->b.chip_class >= GFX9) { unsigned num_se = sscreen->info.max_se; unsigned pc_lines = 0; switch (sctx->b.family) { case CHIP_VEGA10: + case CHIP_VEGA12: pc_lines = 4096; break; case CHIP_RAVEN: pc_lines = 1024; break; default: assert(0); } si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 686701d718f..0f50ea755cb 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -407,20 +407,21 @@ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state) blend->blend_enable_4bit) != 0; } /* Tunable parameters. Also test with DFSM enabled/disabled. */ unsigned context_states_per_bin; /* allowed range: [0, 5] */ unsigned persistent_states_per_bin; /* allowed range: [0, 31] */ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */ switch (sctx->b.family) { case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_RAVEN: /* Tuned for Raven. Vega might need different values. */ context_states_per_bin = 5; persistent_states_per_bin = 31; fpovs_per_batch = 63; break; default: assert(0); } -- 2.15.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev