From: Xavi Zhang <xavi.zh...@amd.com>

Note: remove reference to degrade4Space and use opt4Space instead.
---
 src/amd/addrlib/addrinterface.h                |  6 ++--
 src/amd/addrlib/core/addrcommon.h              |  3 +-
 src/amd/addrlib/core/addrlib.cpp               | 47 ++++++++++++++++----------
 src/amd/addrlib/core/addrlib.h                 |  2 +-
 src/amd/addrlib/r800/egbaddrlib.cpp            | 16 +++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 12 +++----
 6 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/src/amd/addrlib/addrinterface.h b/src/amd/addrlib/addrinterface.h
index d05c6ef..a50717c 100644
--- a/src/amd/addrlib/addrinterface.h
+++ b/src/amd/addrlib/addrinterface.h
@@ -239,23 +239,22 @@ typedef union _ADDR_CREATE_FLAGS
 {
     struct
     {
         UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding 
off
         UINT_32 fillSizeFields         : 1;    ///< If clients fill size 
fields in all input and
                                                ///  output structure
         UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in 
input valid
         UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
         UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub 
level
         UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice 
alignment
-        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes 
automatically for base level
         UINT_32 allowLargeThickTile    : 1;    ///< Allow 
64*thickness*bytesPerPixel > rowSize
-        UINT_32 reserved               : 24;   ///< Reserved bits for future 
use
+        UINT_32 reserved               : 25;   ///< Reserved bits for future 
use
     };
 
     UINT_32 value;
 } ADDR_CREATE_FLAGS;
 
 /**
 
***************************************************************************************************
 *   ADDR_REGISTER_VALUE
 *
 *   @brief
@@ -433,29 +432,28 @@ typedef union _ADDR_SURFACE_FLAGS
         UINT_32 compressZ       : 1; ///< Flag indicates z buffer is compressed
         UINT_32 overlay         : 1; ///< Flag indicates this is an overlay 
surface
         UINT_32 noStencil       : 1; ///< Flag indicates this depth has no 
separate stencil
         UINT_32 display         : 1; ///< Flag indicates this should match 
display controller req.
         UINT_32 opt4Space       : 1; ///< Flag indicates this surface should 
be optimized for space
                                      ///  i.e. save some memory but may lose 
performance
         UINT_32 prt             : 1; ///< Flag for partially resident texture
         UINT_32 qbStereo        : 1; ///< Quad buffer stereo surface
         UINT_32 pow2Pad         : 1; ///< SI: Pad to pow2, must set for mipmap 
(include level0)
         UINT_32 interleaved     : 1; ///< Special flag for interleaved YUV 
surface padding
-        UINT_32 degrade4Space   : 1; ///< Degrade base level's tile mode to 
save memory
         UINT_32 tcCompatible    : 1; ///< Flag indicates surface needs to be 
shader readable
         UINT_32 dispTileType    : 1; ///< NI: force display Tiling for 128 bit 
shared resoruce
         UINT_32 dccCompatible   : 1; ///< VI: whether to support dcc fast clear
         UINT_32 czDispCompatible: 1; ///< SI+: CZ family has a HW bug needs 
special alignment.
                                      ///  This flag indicates we need to 
follow the alignment with
                                      ///  CZ families or other ASICs under PX 
configuration + CZ.
         UINT_32 nonSplit        : 1; ///< CI: depth texture should not be split
-        UINT_32 reserved        : 9; ///< Reserved bits
+        UINT_32 reserved        : 10; ///< Reserved bits
     };
 
     UINT_32 value;
 } ADDR_SURFACE_FLAGS;
 
 /**
 
***************************************************************************************************
 *   ADDR_COMPUTE_SURFACE_INFO_INPUT
 *
 *   @brief
diff --git a/src/amd/addrlib/core/addrcommon.h 
b/src/amd/addrlib/core/addrcommon.h
index f996c9a..88cbad0 100644
--- a/src/amd/addrlib/core/addrcommon.h
+++ b/src/amd/addrlib/core/addrcommon.h
@@ -125,23 +125,22 @@ union ADDR_CONFIG_FLAGS
         /// There flags are set up by AddrLib inside thru 
AddrInitGlobalParamsFromRegister
         UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 
only
         UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for 
cubemap mipmaps
         UINT_32 fillSizeFields         : 1;    ///< If clients fill size 
fields in all input and
                                                ///  output structure
         UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info 
structure
         UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in 
input valid
         UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
         UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub 
level
         UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice 
alignment
-        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes 
automatically for base level
         UINT_32 allowLargeThickTile    : 1;    ///< Allow 
64*thickness*bytesPerPixel > rowSize
-        UINT_32 reserved               : 22;   ///< Reserved bits for future 
use
+        UINT_32 reserved               : 23;   ///< Reserved bits for future 
use
     };
 
     UINT_32 value;
 };
 
 
///////////////////////////////////////////////////////////////////////////////////////////////////
 // Platform specific debug break defines
 
///////////////////////////////////////////////////////////////////////////////////////////////////
 #if DEBUG
     #if defined(__GNUC__)
diff --git a/src/amd/addrlib/core/addrlib.cpp b/src/amd/addrlib/core/addrlib.cpp
index 8cf4a24..b92568e 100644
--- a/src/amd/addrlib/core/addrlib.cpp
+++ b/src/amd/addrlib/core/addrlib.cpp
@@ -257,21 +257,20 @@ ADDR_E_RETURNCODE AddrLib::Create(
     {
         BOOL_32 initValid;
 
         // Pass createFlags to configFlags first since these flags may be 
overwritten
         pLib->m_configFlags.noCubeMipSlicesPad  = 
pCreateIn->createFlags.noCubeMipSlicesPad;
         pLib->m_configFlags.fillSizeFields      = 
pCreateIn->createFlags.fillSizeFields;
         pLib->m_configFlags.useTileIndex        = 
pCreateIn->createFlags.useTileIndex;
         pLib->m_configFlags.useCombinedSwizzle  = 
pCreateIn->createFlags.useCombinedSwizzle;
         pLib->m_configFlags.checkLast2DLevel    = 
pCreateIn->createFlags.checkLast2DLevel;
         pLib->m_configFlags.useHtileSliceAlign  = 
pCreateIn->createFlags.useHtileSliceAlign;
-        pLib->m_configFlags.degradeBaseLevel    = 
pCreateIn->createFlags.degradeBaseLevel;
         pLib->m_configFlags.allowLargeThickTile = 
pCreateIn->createFlags.allowLargeThickTile;
 
         pLib->SetAddrChipFamily(pCreateIn->chipFamily, 
pCreateIn->chipRevision);
 
         pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
 
         // Global parameters initialized and remaining configFlags bits are 
set as well
         initValid = pLib->HwlInitGlobalParams(pCreateIn);
 
         if (initValid)
@@ -552,22 +551,22 @@ ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
         {
             AddrTileMode tileMode = localIn.tileMode;
             AddrTileType tileType = localIn.tileType;
 
             // HWL layer may override tile mode if necessary
             if (HwlOverrideTileMode(&localIn, &tileMode, &tileType))
             {
                 localIn.tileMode = tileMode;
                 localIn.tileType = tileType;
             }
-            // Degrade base level if applicable
-            if (DegradeBaseLevel(&localIn, &tileMode))
+            // Optimize tile mode if possible
+            if (OptimizeTileMode(&localIn, &tileMode))
             {
                 localIn.tileMode = tileMode;
             }
         }
 
         // Call main function to compute surface info
         if (returnCode == ADDR_OK)
         {
             returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
         }
@@ -3486,70 +3485,84 @@ VOID AddrLib::ComputeMipLevel(
             pIn->width = PowTwoAlign(pIn->width, 4);
             pIn->height = PowTwoAlign(pIn->height, 4);
         }
     }
 
     HwlComputeMipLevel(pIn);
 }
 
 /**
 
***************************************************************************************************
-*   AddrLib::DegradeBaseLevel
+*   AddrLib::OptimizeTileMode
 *
 *   @brief
-*       Check if base level's tile mode can be degraded
+*       Check if base level's tile mode can be optimized (degraded)
 *   @return
 *       TRUE if degraded, also returns degraded tile mode (unchanged if not 
degraded)
 
***************************************************************************************************
 */
-BOOL_32 AddrLib::DegradeBaseLevel(
+BOOL_32 AddrLib::OptimizeTileMode(
     const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input 
structure for surface info
     AddrTileMode*                           pTileMode   ///< [out] Degraded 
tile mode
     ) const
 {
-    BOOL_32 degraded = FALSE;
     AddrTileMode tileMode = pIn->tileMode;
     UINT_32 thickness = ComputeSurfaceThickness(tileMode);
 
-    if (m_configFlags.degradeBaseLevel) // This is a global setting
+    // Optimization can only be done on level 0 and samples <= 1
+    if ((pIn->flags.opt4Space == TRUE)      &&
+        (pIn->mipLevel == 0)                &&
+        (pIn->numSamples <= 1)              &&
+        (pIn->flags.display == FALSE)       &&
+        (IsPrtTileMode(tileMode) == FALSE)  &&
+        (pIn->flags.prt == FALSE))
     {
-        if (pIn->flags.degrade4Space        && // Degradation per surface
-            pIn->mipLevel == 0              &&
-            pIn->numSamples == 1            &&
-            IsMacroTiled(tileMode))
+        // Check if linear mode is optimal
+        if ((pIn->height == 1) &&
+            (IsLinear(tileMode) == FALSE) &&
+            (AddrElemLib::IsBlockCompressed(pIn->format) == FALSE) &&
+            (pIn->flags.depth == FALSE) &&
+            (pIn->flags.stencil == FALSE))
+        {
+            tileMode = ADDR_TM_LINEAR_ALIGNED;
+        }
+        else if (IsMacroTiled(tileMode))
         {
             if (HwlDegradeBaseLevel(pIn))
             {
-                *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : 
ADDR_TM_1D_TILED_THICK;
-                degraded = TRUE;
+                tileMode = (thickness == 1) ? ADDR_TM_1D_TILED_THIN1 : 
ADDR_TM_1D_TILED_THICK;
             }
             else if (thickness > 1)
             {
                 // As in the following HwlComputeSurfaceInfo, thick modes may 
be degraded to
                 // thinner modes, we should re-evaluate whether the 
corresponding thinner modes
                 // need to be degraded. If so, we choose 1D thick mode instead.
                 tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp);
                 if (tileMode != pIn->tileMode)
                 {
                     ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn;
                     input.tileMode = tileMode;
                     if (HwlDegradeBaseLevel(&input))
                     {
-                        *pTileMode = ADDR_TM_1D_TILED_THICK;
-                        degraded = TRUE;
+                        tileMode = ADDR_TM_1D_TILED_THICK;
                     }
                 }
             }
         }
     }
 
-    return degraded;
+    BOOL_32 optimized = (tileMode != pIn->tileMode);
+    if (optimized)
+    {
+        *pTileMode = tileMode;
+    }
+    return optimized;
 }
 
 /**
 
***************************************************************************************************
 *   AddrLib::DegradeLargeThickTile
 *
 *   @brief
 *       Check if the thickness needs to be reduced if a tile is too large
 *   @return
 *       The degraded tile mode (unchanged if not degraded)
diff --git a/src/amd/addrlib/core/addrlib.h b/src/amd/addrlib/core/addrlib.h
index 43c55ff..d693fd2 100644
--- a/src/amd/addrlib/core/addrlib.h
+++ b/src/amd/addrlib/core/addrlib.h
@@ -645,21 +645,21 @@ private:
     //
     VOID    ComputeTileDataWidthAndHeight(
         UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
         UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
 
     UINT_32 ComputeXmaskCoordYFromPipe(
         UINT_32 pipe, UINT_32 x) const;
 
     VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
 
-    BOOL_32 DegradeBaseLevel(
+    BOOL_32 OptimizeTileMode(
         const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) 
const;
 
 protected:
     AddrLibClass        m_class;        ///< Store class type (HWL type)
 
     AddrChipFamily      m_chipFamily;   ///< Chip family translated from the 
one in atiid.h
 
     UINT_32             m_chipRevision; ///< Revision id from xxx_id.h
 
     UINT_32             m_version;      ///< Current version
diff --git a/src/amd/addrlib/r800/egbaddrlib.cpp 
b/src/amd/addrlib/r800/egbaddrlib.cpp
index abd1a79..5d80906 100644
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -1151,20 +1151,36 @@ BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
                                                pIn->mipLevel,
                                                pIn->numSamples,
                                                &tileInfo,
                                                &baseAlign,
                                                &pitchAlign,
                                                &heightAlign);
 
     if (valid)
     {
         degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
+        // Check whether 2D tiling still has too much footprint
+        if (degrade == FALSE)
+        {
+            // Only check width and height as slices are aligned to thickness
+            UINT_64 unalignedSize = pIn->width * pIn->height;
+
+            UINT_32 alignedPitch = PowTwoAlign(pIn->width, pitchAlign);
+            UINT_32 alignedHeight = PowTwoAlign(pIn->height, heightAlign);
+            UINT_64 alignedSize = alignedPitch * alignedHeight;
+
+            // alignedSize > 1.5 * unalignedSize
+            if (2 * alignedSize > 3 * unalignedSize)
+            {
+                degrade = TRUE;
+            }
+        }
     }
     else
     {
         degrade = TRUE;
     }
 
     return degrade;
 }
 
 /**
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index abe2b2a..8632f06 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -117,21 +117,20 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
    if (ws->info.chip_class == SI) {
       regValue.pMacroTileConfig = NULL;
       regValue.noOfMacroEntries = 0;
    } else {
       regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
       regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
    }
 
    createFlags.value = 0;
    createFlags.useTileIndex = 1;
-   createFlags.degradeBaseLevel = 1;
    createFlags.useHtileSliceAlign = 1;
 
    addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
    addrCreateInput.chipFamily = ws->family;
    addrCreateInput.chipRevision = ws->rev_id;
    addrCreateInput.createFlags = createFlags;
    addrCreateInput.callbacks.allocSysMem = allocSysMem;
    addrCreateInput.callbacks.freeSysMem = freeSysMem;
    addrCreateInput.callbacks.debugPrint = 0;
    addrCreateInput.regValue = regValue;
@@ -394,25 +393,24 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    AddrSurfInfoIn.flags.depth = (flags & RADEON_SURF_ZBUFFER) != 0;
    AddrSurfInfoIn.flags.cube = tex->target == PIPE_TEXTURE_CUBE;
    AddrSurfInfoIn.flags.fmask = (flags & RADEON_SURF_FMASK) != 0;
    AddrSurfInfoIn.flags.display = (flags & RADEON_SURF_SCANOUT) != 0;
    AddrSurfInfoIn.flags.pow2Pad = tex->last_level > 0;
    AddrSurfInfoIn.flags.tcCompatible = (flags & 
RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
 
    /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
     * requested, because TC-compatible HTILE requires 2D tiling.
     */
-   AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible &&
-                                        !AddrSurfInfoIn.flags.fmask &&
-                                        tex->nr_samples <= 1 &&
-                                        (flags & 
RADEON_SURF_OPTIMIZE_FOR_SPACE);
-   AddrSurfInfoIn.flags.opt4Space = AddrSurfInfoIn.flags.degrade4Space;
+   AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
+                                    !AddrSurfInfoIn.flags.fmask &&
+                                    tex->nr_samples <= 1 &&
+                                    (flags & RADEON_SURF_OPTIMIZE_FOR_SPACE);
 
    /* DCC notes:
     * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
     *   with samples >= 4.
     * - Mipmapped array textures have low performance (discovered by a closed
     *   driver team).
     */
    AddrSurfInfoIn.flags.dccCompatible = ws->info.chip_class >= VI &&
                                         !(flags & RADEON_SURF_Z_OR_SBUFFER) &&
                                         !(flags & RADEON_SURF_DISABLE_DCC) &&
@@ -440,21 +438,21 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
       assert(!(flags & RADEON_SURF_FMASK));
 
       /* If any of these parameters are incorrect, the calculation
        * will fail. */
       AddrTileInfoIn.banks = surf->num_banks;
       AddrTileInfoIn.bankWidth = surf->bankw;
       AddrTileInfoIn.bankHeight = surf->bankh;
       AddrTileInfoIn.macroAspectRatio = surf->mtilea;
       AddrTileInfoIn.tileSplitBytes = surf->tile_split;
       AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to 
GB_TILE_MODE */
-      AddrSurfInfoIn.flags.degrade4Space = 0;
+      AddrSurfInfoIn.flags.opt4Space = 0;
       AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
 
       /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
        * the tile index, because we are expected to know it if
        * we know the other parameters.
        *
        * This is something that can easily be fixed in Addrlib.
        * For now, just figure it out here.
        * Note that only 2D_TILE_THIN1 is handled here.
        */
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to