From: Xavi Zhang <xavi.zh...@amd.com> Note: remove reference to degrade4Space and use opt4Space instead. --- src/amd/addrlib/addrinterface.h | 6 ++-- src/amd/addrlib/core/addrcommon.h | 3 +- src/amd/addrlib/core/addrlib.cpp | 47 ++++++++++++++++---------- src/amd/addrlib/core/addrlib.h | 2 +- src/amd/addrlib/r800/egbaddrlib.cpp | 16 +++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 12 +++---- 6 files changed, 55 insertions(+), 31 deletions(-)
diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h index d05c6ef..a50717c 100644 --- a/src/amd/addrlib/addrinterface.h +++ b/src/amd/addrlib/addrinterface.h @@ -239,23 +239,22 @@ typedef union _ADDR_CREATE_FLAGS { struct { UINT_32 noCubeMipSlicesPad : 1; ///< Turn cubemap faces padding off UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and /// output structure UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment - UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize - UINT_32 reserved : 24; ///< Reserved bits for future use + UINT_32 reserved : 25; ///< Reserved bits for future use }; UINT_32 value; } ADDR_CREATE_FLAGS; /** *************************************************************************************************** * ADDR_REGISTER_VALUE * * @brief @@ -433,29 +432,28 @@ typedef union _ADDR_SURFACE_FLAGS UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil UINT_32 display : 1; ///< Flag indicates this should match display controller req. UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space /// i.e. save some memory but may lose performance UINT_32 prt : 1; ///< Flag for partially resident texture UINT_32 qbStereo : 1; ///< Quad buffer stereo surface UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear UINT_32 czDispCompatible: 1; ///< SI+: CZ family has a HW bug needs special alignment. /// This flag indicates we need to follow the alignment with /// CZ families or other ASICs under PX configuration + CZ. UINT_32 nonSplit : 1; ///< CI: depth texture should not be split - UINT_32 reserved : 9; ///< Reserved bits + UINT_32 reserved : 10; ///< Reserved bits }; UINT_32 value; } ADDR_SURFACE_FLAGS; /** *************************************************************************************************** * ADDR_COMPUTE_SURFACE_INFO_INPUT * * @brief diff --git a/src/amd/addrlib/core/addrcommon.h b/src/amd/addrlib/core/addrcommon.h index f996c9a..88cbad0 100644 --- a/src/amd/addrlib/core/addrcommon.h +++ b/src/amd/addrlib/core/addrcommon.h @@ -125,23 +125,22 @@ union ADDR_CONFIG_FLAGS /// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and /// output structure UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment - UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize - UINT_32 reserved : 22; ///< Reserved bits for future use + UINT_32 reserved : 23; ///< Reserved bits for future use }; UINT_32 value; }; /////////////////////////////////////////////////////////////////////////////////////////////////// // Platform specific debug break defines /////////////////////////////////////////////////////////////////////////////////////////////////// #if DEBUG #if defined(__GNUC__) diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp index 8cf4a24..b92568e 100644 --- a/src/amd/addrlib/core/addrlib.cpp +++ b/src/amd/addrlib/core/addrlib.cpp @@ -257,21 +257,20 @@ ADDR_E_RETURNCODE AddrLib::Create( { BOOL_32 initValid; // Pass createFlags to configFlags first since these flags may be overwritten pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad; pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields; pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex; pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle; pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel; pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign; - pLib->m_configFlags.degradeBaseLevel = pCreateIn->createFlags.degradeBaseLevel; pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels); // Global parameters initialized and remaining configFlags bits are set as well initValid = pLib->HwlInitGlobalParams(pCreateIn); if (initValid) @@ -552,22 +551,22 @@ ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo( { AddrTileMode tileMode = localIn.tileMode; AddrTileType tileType = localIn.tileType; // HWL layer may override tile mode if necessary if (HwlOverrideTileMode(&localIn, &tileMode, &tileType)) { localIn.tileMode = tileMode; localIn.tileType = tileType; } - // Degrade base level if applicable - if (DegradeBaseLevel(&localIn, &tileMode)) + // Optimize tile mode if possible + if (OptimizeTileMode(&localIn, &tileMode)) { localIn.tileMode = tileMode; } } // Call main function to compute surface info if (returnCode == ADDR_OK) { returnCode = HwlComputeSurfaceInfo(&localIn, pOut); } @@ -3486,70 +3485,84 @@ VOID AddrLib::ComputeMipLevel( pIn->width = PowTwoAlign(pIn->width, 4); pIn->height = PowTwoAlign(pIn->height, 4); } } HwlComputeMipLevel(pIn); } /** *************************************************************************************************** -* AddrLib::DegradeBaseLevel +* AddrLib::OptimizeTileMode * * @brief -* Check if base level's tile mode can be degraded +* Check if base level's tile mode can be optimized (degraded) * @return * TRUE if degraded, also returns degraded tile mode (unchanged if not degraded) *************************************************************************************************** */ -BOOL_32 AddrLib::DegradeBaseLevel( +BOOL_32 AddrLib::OptimizeTileMode( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure for surface info AddrTileMode* pTileMode ///< [out] Degraded tile mode ) const { - BOOL_32 degraded = FALSE; AddrTileMode tileMode = pIn->tileMode; UINT_32 thickness = ComputeSurfaceThickness(tileMode); - if (m_configFlags.degradeBaseLevel) // This is a global setting + // Optimization can only be done on level 0 and samples <= 1 + if ((pIn->flags.opt4Space == TRUE) && + (pIn->mipLevel == 0) && + (pIn->numSamples <= 1) && + (pIn->flags.display == FALSE) && + (IsPrtTileMode(tileMode) == FALSE) && + (pIn->flags.prt == FALSE)) { - if (pIn->flags.degrade4Space && // Degradation per surface - pIn->mipLevel == 0 && - pIn->numSamples == 1 && - IsMacroTiled(tileMode)) + // Check if linear mode is optimal + if ((pIn->height == 1) && + (IsLinear(tileMode) == FALSE) && + (AddrElemLib::IsBlockCompressed(pIn->format) == FALSE) && + (pIn->flags.depth == FALSE) && + (pIn->flags.stencil == FALSE)) + { + tileMode = ADDR_TM_LINEAR_ALIGNED; + } + else if (IsMacroTiled(tileMode)) { if (HwlDegradeBaseLevel(pIn)) { - *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; - degraded = TRUE; + tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; } else if (thickness > 1) { // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to // thinner modes, we should re-evaluate whether the corresponding thinner modes // need to be degraded. If so, we choose 1D thick mode instead. tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp); if (tileMode != pIn->tileMode) { ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn; input.tileMode = tileMode; if (HwlDegradeBaseLevel(&input)) { - *pTileMode = ADDR_TM_1D_TILED_THICK; - degraded = TRUE; + tileMode = ADDR_TM_1D_TILED_THICK; } } } } } - return degraded; + BOOL_32 optimized = (tileMode != pIn->tileMode); + if (optimized) + { + *pTileMode = tileMode; + } + return optimized; } /** *************************************************************************************************** * AddrLib::DegradeLargeThickTile * * @brief * Check if the thickness needs to be reduced if a tile is too large * @return * The degraded tile mode (unchanged if not degraded) diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h index 43c55ff..d693fd2 100644 --- a/src/amd/addrlib/core/addrlib.h +++ b/src/amd/addrlib/core/addrlib.h @@ -645,21 +645,21 @@ private: // VOID ComputeTileDataWidthAndHeight( UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo, UINT_32* pMacroWidth, UINT_32* pMacroHeight) const; UINT_32 ComputeXmaskCoordYFromPipe( UINT_32 pipe, UINT_32 x) const; VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); - BOOL_32 DegradeBaseLevel( + BOOL_32 OptimizeTileMode( const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const; protected: AddrLibClass m_class; ///< Store class type (HWL type) AddrChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h UINT_32 m_chipRevision; ///< Revision id from xxx_id.h UINT_32 m_version; ///< Current version diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp b/src/amd/addrlib/r800/egbaddrlib.cpp index abd1a79..5d80906 100644 --- a/src/amd/addrlib/r800/egbaddrlib.cpp +++ b/src/amd/addrlib/r800/egbaddrlib.cpp @@ -1151,20 +1151,36 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel( pIn->mipLevel, pIn->numSamples, &tileInfo, &baseAlign, &pitchAlign, &heightAlign); if (valid) { degrade = (pIn->width < pitchAlign || pIn->height < heightAlign); + // Check whether 2D tiling still has too much footprint + if (degrade == FALSE) + { + // Only check width and height as slices are aligned to thickness + UINT_64 unalignedSize = pIn->width * pIn->height; + + UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign); + UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign); + UINT_64 alignedSize = alignedPitch * alignedHeight; + + // alignedSize > 1.5 * unalignedSize + if (2 * alignedSize > 3 * unalignedSize) + { + degrade = TRUE; + } + } } else { degrade = TRUE; } return degrade; } /** diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index abe2b2a..8632f06 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -117,21 +117,20 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws) if (ws->info.chip_class == SI) { regValue.pMacroTileConfig = NULL; regValue.noOfMacroEntries = 0; } else { regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode; regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode); } createFlags.value = 0; createFlags.useTileIndex = 1; - createFlags.degradeBaseLevel = 1; createFlags.useHtileSliceAlign = 1; addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; addrCreateInput.chipFamily = ws->family; addrCreateInput.chipRevision = ws->rev_id; addrCreateInput.createFlags = createFlags; addrCreateInput.callbacks.allocSysMem = allocSysMem; addrCreateInput.callbacks.freeSysMem = freeSysMem; addrCreateInput.callbacks.debugPrint = 0; addrCreateInput.regValue = regValue; @@ -394,25 +393,24 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.flags.depth = (flags & RADEON_SURF_ZBUFFER) != 0; AddrSurfInfoIn.flags.cube = tex->target == PIPE_TEXTURE_CUBE; AddrSurfInfoIn.flags.fmask = (flags & RADEON_SURF_FMASK) != 0; AddrSurfInfoIn.flags.display = (flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = tex->last_level > 0; AddrSurfInfoIn.flags.tcCompatible = (flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been * requested, because TC-compatible HTILE requires 2D tiling. */ - AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible && - !AddrSurfInfoIn.flags.fmask && - tex->nr_samples <= 1 && - (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE); - AddrSurfInfoIn.flags.opt4Space = AddrSurfInfoIn.flags.degrade4Space; + AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && + !AddrSurfInfoIn.flags.fmask && + tex->nr_samples <= 1 && + (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE); /* DCC notes: * - If we add MSAA support, keep in mind that CB can't decompress 8bpp * with samples >= 4. * - Mipmapped array textures have low performance (discovered by a closed * driver team). */ AddrSurfInfoIn.flags.dccCompatible = ws->info.chip_class >= VI && !(flags & RADEON_SURF_Z_OR_SBUFFER) && !(flags & RADEON_SURF_DISABLE_DCC) && @@ -440,21 +438,21 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, assert(!(flags & RADEON_SURF_FMASK)); /* If any of these parameters are incorrect, the calculation * will fail. */ AddrTileInfoIn.banks = surf->num_banks; AddrTileInfoIn.bankWidth = surf->bankw; AddrTileInfoIn.bankHeight = surf->bankh; AddrTileInfoIn.macroAspectRatio = surf->mtilea; AddrTileInfoIn.tileSplitBytes = surf->tile_split; AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */ - AddrSurfInfoIn.flags.degrade4Space = 0; + AddrSurfInfoIn.flags.opt4Space = 0; AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set * the tile index, because we are expected to know it if * we know the other parameters. * * This is something that can easily be fixed in Addrlib. * For now, just figure it out here. * Note that only 2D_TILE_THIN1 is handled here. */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev