Allows us to use the 3D engine for memory management
and allows us to use vram beyond the BAR aperture.

v2: fix copy paste typo
Reported-by: Nils Wallm?nius <nils.wallmenius at gmail.com>

Signed-off-by: Alex Deucher <alexdeucher at gmail.com>
---
 drivers/gpu/drm/radeon/cayman_blit_shaders.c |  326 ++++++++++++++++-
 drivers/gpu/drm/radeon/cayman_blit_shaders.h |    3 +
 drivers/gpu/drm/radeon/evergreen_blit_kms.c  |  505 ++++++++++++++------------
 drivers/gpu/drm/radeon/ni.c                  |   13 +-
 drivers/gpu/drm/radeon/radeon_asic.c         |    6 +-
 5 files changed, 598 insertions(+), 255 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c 
b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
index e148ab0..7b4eeb7 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
@@ -39,17 +39,335 @@

 const u32 cayman_default_state[] =
 {
-       /* XXX fill in additional blit state */
+       0xc0066900,
+       0x00000000,
+       0x00000060, /* DB_RENDER_CONTROL */
+       0x00000000, /* DB_COUNT_CONTROL */
+       0x00000000, /* DB_DEPTH_VIEW */
+       0x0000002a, /* DB_RENDER_OVERRIDE */
+       0x00000000, /* DB_RENDER_OVERRIDE2 */
+       0x00000000, /* DB_HTILE_DATA_BASE */

        0xc0026900,
-       0x00000316,
-       0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
-       0x00000010, /*  */
+       0x0000000a,
+       0x00000000, /* DB_STENCIL_CLEAR */
+       0x00000000, /* DB_DEPTH_CLEAR */
+
+       0xc0036900,
+       0x0000000f,
+       0x00000000, /* DB_DEPTH_INFO */
+       0x00000000, /* DB_Z_INFO */
+       0x00000000, /* DB_STENCIL_INFO */
+
+       0xc0016900,
+       0x00000080,
+       0x00000000, /* PA_SC_WINDOW_OFFSET */
+
+       0xc00d6900,
+       0x00000083,
+       0x0000ffff, /* PA_SC_CLIPRECT_RULE */
+       0x00000000, /* PA_SC_CLIPRECT_0_TL */
+       0x20002000, /* PA_SC_CLIPRECT_0_BR */
+       0x00000000,
+       0x20002000,
+       0x00000000,
+       0x20002000,
+       0x00000000,
+       0x20002000,
+       0xaaaaaaaa, /* PA_SC_EDGERULE */
+       0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
+       0x0000000f, /* CB_TARGET_MASK */
+       0x0000000f, /* CB_SHADER_MASK */
+
+       0xc0226900,
+       0x00000094,
+       0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
+       0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x00000000, /* PA_SC_VPORT_ZMIN_0 */
+       0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
+
+       0xc0016900,
+       0x000000d4,
+       0x00000000, /* SX_MISC */

        0xc0026900,
        0x000000d9,
        0x00000000, /* CP_RINGID */
        0x00000000, /* CP_VMID */
+
+       0xc0096900,
+       0x00000100,
+       0x00ffffff, /* VGT_MAX_VTX_INDX */
+       0x00000000, /* VGT_MIN_VTX_INDX */
+       0x00000000, /* VGT_INDX_OFFSET */
+       0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
+       0x00000000, /* SX_ALPHA_TEST_CONTROL */
+       0x00000000, /* CB_BLEND_RED */
+       0x00000000, /* CB_BLEND_GREEN */
+       0x00000000, /* CB_BLEND_BLUE */
+       0x00000000, /* CB_BLEND_ALPHA */
+
+       0xc0016900,
+       0x00000187,
+       0x00000100, /* SPI_VS_OUT_ID_0 */
+
+       0xc0026900,
+       0x00000191,
+       0x00000100, /* SPI_PS_INPUT_CNTL_0 */
+       0x00000101, /* SPI_PS_INPUT_CNTL_1 */
+
+       0xc0016900,
+       0x000001b1,
+       0x00000000, /* SPI_VS_OUT_CONFIG */
+
+       0xc0106900,
+       0x000001b3,
+       0x20000001, /* SPI_PS_IN_CONTROL_0 */
+       0x00000000, /* SPI_PS_IN_CONTROL_1 */
+       0x00000000, /* SPI_INTERP_CONTROL_0 */
+       0x00000000, /* SPI_INPUT_Z */
+       0x00000000, /* SPI_FOG_CNTL */
+       0x00100000, /* SPI_BARYC_CNTL */
+       0x00000000, /* SPI_PS_IN_CONTROL_2 */
+       0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
+       0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
+       0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
+       0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
+       0x00000000, /* SPI_GPR_MGMT */
+       0x00000000, /* SPI_LDS_MGMT */
+       0x00000000, /* SPI_STACK_MGMT */
+       0x00000000, /* SPI_WAVE_MGMT_1 */
+       0x00000000, /* SPI_WAVE_MGMT_2 */
+
+       0xc0016900,
+       0x000001e0,
+       0x00000000, /* CB_BLEND0_CONTROL */
+
+       0xc00e6900,
+       0x00000200,
+       0x00000000, /* DB_DEPTH_CONTROL */
+       0x00000000, /* DB_EQAA */
+       0x00cc0010, /* CB_COLOR_CONTROL */
+       0x00000210, /* DB_SHADER_CONTROL */
+       0x00010000, /* PA_CL_CLIP_CNTL */
+       0x00000004, /* PA_SU_SC_MODE_CNTL */
+       0x00000100, /* PA_CL_VTE_CNTL */
+       0x00000000, /* PA_CL_VS_OUT_CNTL */
+       0x00000000, /* PA_CL_NANINF_CNTL */
+       0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
+       0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
+       0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
+       0x00000000, /*  */
+       0x00000000, /*  */
+
+       0xc0026900,
+       0x00000229,
+       0x00000000, /* SQ_PGM_START_FS */
+       0x00000000,
+
+       0xc0016900,
+       0x0000023b,
+       0x00000000, /* SQ_LDS_ALLOC_PS */
+
+       0xc0066900,
+       0x00000240,
+       0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0046900,
+       0x00000247,
+       0x00000000, /* SQ_GS_VERT_ITEMSIZE */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0116900,
+       0x00000280,
+       0x00000000, /* PA_SU_POINT_SIZE */
+       0x00000000, /* PA_SU_POINT_MINMAX */
+       0x00000008, /* PA_SU_LINE_CNTL */
+       0x00000000, /* PA_SC_LINE_STIPPLE */
+       0x00000000, /* VGT_OUTPUT_PATH_CNTL */
+       0x00000000, /* VGT_HOS_CNTL */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000, /* VGT_GS_MODE */
+
+       0xc0026900,
+       0x00000292,
+       0x00000000, /* PA_SC_MODE_CNTL_0 */
+       0x00000000, /* PA_SC_MODE_CNTL_1 */
+
+       0xc0016900,
+       0x000002a1,
+       0x00000000, /* VGT_PRIMITIVEID_EN */
+
+       0xc0016900,
+       0x000002a5,
+       0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
+
+       0xc0026900,
+       0x000002a8,
+       0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
+       0x00000000,
+
+       0xc0026900,
+       0x000002ad,
+       0x00000000, /* VGT_REUSE_OFF */
+       0x00000000,
+
+       0xc0016900,
+       0x000002d5,
+       0x00000000, /* VGT_SHADER_STAGES_EN */
+
+       0xc0016900,
+       0x000002dc,
+       0x0000aa00, /* DB_ALPHA_TO_MASK */
+
+       0xc0066900,
+       0x000002de,
+       0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0026900,
+       0x000002e5,
+       0x00000000, /* VGT_STRMOUT_CONFIG */
+       0x00000000,
+
+       0xc01b6900,
+       0x000002f5,
+       0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
+       0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
+       0x00000000, /* PA_SC_LINE_CNTL */
+       0x00000000, /* PA_SC_AA_CONFIG */
+       0x00000005, /* PA_SU_VTX_CNTL */
+       0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
+       0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
+       0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
+       0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
+       0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
+       0xffffffff,
+
+       0xc0026900,
+       0x00000316,
+       0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+       0x00000010, /*  */
+};
+
+const u32 cayman_vs[] =
+{
+       0x00000004,
+       0x80400400,
+       0x0000a03c,
+       0x95000688,
+       0x00004000,
+       0x15000688,
+       0x00000000,
+       0x88000000,
+       0x04000000,
+       0x67961001,
+#ifdef __BIG_ENDIAN
+       0x00020000,
+#else
+       0x00000000,
+#endif
+       0x00000000,
+       0x04000000,
+       0x67961000,
+#ifdef __BIG_ENDIAN
+       0x00020008,
+#else
+       0x00000008,
+#endif
+       0x00000000,
+};
+
+const u32 cayman_ps[] =
+{
+       0x00000004,
+       0xa00c0000,
+       0x00000008,
+       0x80400000,
+       0x00000000,
+       0x95000688,
+       0x00000000,
+       0x88000000,
+       0x00380400,
+       0x00146b10,
+       0x00380000,
+       0x20146b10,
+       0x00380400,
+       0x40146b00,
+       0x80380000,
+       0x60146b00,
+       0x00000010,
+       0x000d1000,
+       0xb0800000,
+       0x00000000,
 };

+const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
+const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h 
b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
index 33b75e5..f5d0e9a 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
@@ -25,8 +25,11 @@
 #ifndef CAYMAN_BLIT_SHADERS_H
 #define CAYMAN_BLIT_SHADERS_H

+extern const u32 cayman_ps[];
+extern const u32 cayman_vs[];
 extern const u32 cayman_default_state[];

+extern const u32 cayman_ps_size, cayman_vs_size;
 extern const u32 cayman_default_size;

 #endif
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c 
b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 4086729..a60ad28 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -31,6 +31,7 @@

 #include "evergreend.h"
 #include "evergreen_blit_shaders.h"
+#include "cayman_blit_shaders.h"

 #define DI_PT_RECTLIST        0x11
 #define DI_INDEX_SIZE_16_BIT  0x0
@@ -265,238 +266,240 @@ set_default_state(struct radeon_device *rdev)
        u64 gpu_addr;
        int dwords;

-       switch (rdev->family) {
-       case CHIP_CEDAR:
-       default:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 96;
-               num_vs_threads = 16;
-               num_gs_threads = 16;
-               num_es_threads = 16;
-               num_hs_threads = 16;
-               num_ls_threads = 16;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_REDWOOD:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_JUNIPER:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 85;
-               num_vs_stack_entries = 85;
-               num_gs_stack_entries = 85;
-               num_es_stack_entries = 85;
-               num_hs_stack_entries = 85;
-               num_ls_stack_entries = 85;
-               break;
-       case CHIP_CYPRESS:
-       case CHIP_HEMLOCK:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 85;
-               num_vs_stack_entries = 85;
-               num_gs_stack_entries = 85;
-               num_es_stack_entries = 85;
-               num_hs_stack_entries = 85;
-               num_ls_stack_entries = 85;
-               break;
-       case CHIP_PALM:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 96;
-               num_vs_threads = 16;
-               num_gs_threads = 16;
-               num_es_threads = 16;
-               num_hs_threads = 16;
-               num_ls_threads = 16;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_BARTS:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 85;
-               num_vs_stack_entries = 85;
-               num_gs_stack_entries = 85;
-               num_es_stack_entries = 85;
-               num_hs_stack_entries = 85;
-               num_ls_stack_entries = 85;
-               break;
-       case CHIP_TURKS:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 20;
-               num_gs_threads = 20;
-               num_es_threads = 20;
-               num_hs_threads = 20;
-               num_ls_threads = 20;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       case CHIP_CAICOS:
-               num_ps_gprs = 93;
-               num_vs_gprs = 46;
-               num_temp_gprs = 4;
-               num_gs_gprs = 31;
-               num_es_gprs = 31;
-               num_hs_gprs = 23;
-               num_ls_gprs = 23;
-               num_ps_threads = 128;
-               num_vs_threads = 10;
-               num_gs_threads = 10;
-               num_es_threads = 10;
-               num_hs_threads = 10;
-               num_ls_threads = 10;
-               num_ps_stack_entries = 42;
-               num_vs_stack_entries = 42;
-               num_gs_stack_entries = 42;
-               num_es_stack_entries = 42;
-               num_hs_stack_entries = 42;
-               num_ls_stack_entries = 42;
-               break;
-       }
-
-       if ((rdev->family == CHIP_CEDAR) ||
-           (rdev->family == CHIP_PALM) ||
-           (rdev->family == CHIP_CAICOS))
-               sq_config = 0;
-       else
-               sq_config = VC_ENABLE;
-
-       sq_config |= (EXPORT_SRC_C |
-                     CS_PRIO(0) |
-                     LS_PRIO(0) |
-                     HS_PRIO(0) |
-                     PS_PRIO(0) |
-                     VS_PRIO(1) |
-                     GS_PRIO(2) |
-                     ES_PRIO(3));
-
-       sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
-                                 NUM_VS_GPRS(num_vs_gprs) |
-                                 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-       sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
-                                 NUM_ES_GPRS(num_es_gprs));
-       sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
-                                 NUM_LS_GPRS(num_ls_gprs));
-       sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
-                                  NUM_VS_THREADS(num_vs_threads) |
-                                  NUM_GS_THREADS(num_gs_threads) |
-                                  NUM_ES_THREADS(num_es_threads));
-       sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
-                                    NUM_LS_THREADS(num_ls_threads));
-       sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-                                   NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-       sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-                                   NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-       sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
-                                   NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
-
        /* set clear context state */
        radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
        radeon_ring_write(rdev, 0);

-       /* disable dyn gprs */
-       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-       radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - 
PACKET3_SET_CONFIG_REG_START) >> 2);
-       radeon_ring_write(rdev, 0);
+       if (rdev->family < CHIP_CAYMAN) {
+               switch (rdev->family) {
+               case CHIP_CEDAR:
+               default:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 96;
+                       num_vs_threads = 16;
+                       num_gs_threads = 16;
+                       num_es_threads = 16;
+                       num_hs_threads = 16;
+                       num_ls_threads = 16;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_REDWOOD:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_JUNIPER:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_CYPRESS:
+               case CHIP_HEMLOCK:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_PALM:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 96;
+                       num_vs_threads = 16;
+                       num_gs_threads = 16;
+                       num_es_threads = 16;
+                       num_hs_threads = 16;
+                       num_ls_threads = 16;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_BARTS:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 85;
+                       num_vs_stack_entries = 85;
+                       num_gs_stack_entries = 85;
+                       num_es_stack_entries = 85;
+                       num_hs_stack_entries = 85;
+                       num_ls_stack_entries = 85;
+                       break;
+               case CHIP_TURKS:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 20;
+                       num_gs_threads = 20;
+                       num_es_threads = 20;
+                       num_hs_threads = 20;
+                       num_ls_threads = 20;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               case CHIP_CAICOS:
+                       num_ps_gprs = 93;
+                       num_vs_gprs = 46;
+                       num_temp_gprs = 4;
+                       num_gs_gprs = 31;
+                       num_es_gprs = 31;
+                       num_hs_gprs = 23;
+                       num_ls_gprs = 23;
+                       num_ps_threads = 128;
+                       num_vs_threads = 10;
+                       num_gs_threads = 10;
+                       num_es_threads = 10;
+                       num_hs_threads = 10;
+                       num_ls_threads = 10;
+                       num_ps_stack_entries = 42;
+                       num_vs_stack_entries = 42;
+                       num_gs_stack_entries = 42;
+                       num_es_stack_entries = 42;
+                       num_hs_stack_entries = 42;
+                       num_ls_stack_entries = 42;
+                       break;
+               }

-       /* SQ config */
-       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
-       radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 
2);
-       radeon_ring_write(rdev, sq_config);
-       radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
-       radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
-       radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
-       radeon_ring_write(rdev, 0);
-       radeon_ring_write(rdev, 0);
-       radeon_ring_write(rdev, sq_thread_resource_mgmt);
-       radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
-       radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
-       radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
-       radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+               if ((rdev->family == CHIP_CEDAR) ||
+                   (rdev->family == CHIP_PALM) ||
+                   (rdev->family == CHIP_CAICOS))
+                       sq_config = 0;
+               else
+                       sq_config = VC_ENABLE;
+
+               sq_config |= (EXPORT_SRC_C |
+                             CS_PRIO(0) |
+                             LS_PRIO(0) |
+                             HS_PRIO(0) |
+                             PS_PRIO(0) |
+                             VS_PRIO(1) |
+                             GS_PRIO(2) |
+                             ES_PRIO(3));
+
+               sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+                                         NUM_VS_GPRS(num_vs_gprs) |
+                                         NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+               sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+                                         NUM_ES_GPRS(num_es_gprs));
+               sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
+                                         NUM_LS_GPRS(num_ls_gprs));
+               sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+                                          NUM_VS_THREADS(num_vs_threads) |
+                                          NUM_GS_THREADS(num_gs_threads) |
+                                          NUM_ES_THREADS(num_es_threads));
+               sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
+                                            NUM_LS_THREADS(num_ls_threads));
+               sq_stack_resource_mgmt_1 = 
(NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+                                           
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+               sq_stack_resource_mgmt_2 = 
(NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+                                           
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+               sq_stack_resource_mgmt_3 = 
(NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
+                                           
NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
+
+               /* disable dyn gprs */
+               radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - 
PACKET3_SET_CONFIG_REG_START) >> 2);
+               radeon_ring_write(rdev, 0);
+
+               /* SQ config */
+               radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
+               radeon_ring_write(rdev, (SQ_CONFIG - 
PACKET3_SET_CONFIG_REG_START) >> 2);
+               radeon_ring_write(rdev, sq_config);
+               radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+               radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+               radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
+               radeon_ring_write(rdev, 0);
+               radeon_ring_write(rdev, 0);
+               radeon_ring_write(rdev, sq_thread_resource_mgmt);
+               radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
+               radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+               radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+               radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+       }

        /* CONTEXT_CONTROL */
        radeon_ring_write(rdev, 0xc0012800);
@@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
        mutex_init(&rdev->r600_blit.mutex);
        rdev->r600_blit.state_offset = 0;

-       rdev->r600_blit.state_len = evergreen_default_size;
+       if (rdev->family < CHIP_CAYMAN)
+               rdev->r600_blit.state_len = evergreen_default_size;
+       else
+               rdev->r600_blit.state_len = cayman_default_size;

        dwords = rdev->r600_blit.state_len;
        while (dwords & 0xf) {
@@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
        obj_size = ALIGN(obj_size, 256);

        rdev->r600_blit.vs_offset = obj_size;
-       obj_size += evergreen_vs_size * 4;
+       if (rdev->family < CHIP_CAYMAN)
+               obj_size += evergreen_vs_size * 4;
+       else
+               obj_size += cayman_vs_size * 4;
        obj_size = ALIGN(obj_size, 256);

        rdev->r600_blit.ps_offset = obj_size;
-       obj_size += evergreen_ps_size * 4;
+       if (rdev->family < CHIP_CAYMAN)
+               obj_size += evergreen_ps_size * 4;
+       else
+               obj_size += cayman_ps_size * 4;
        obj_size = ALIGN(obj_size, 256);

        r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, 
RADEON_GEM_DOMAIN_VRAM,
@@ -609,16 +621,29 @@ int evergreen_blit_init(struct radeon_device *rdev)
                return r;
        }

-       memcpy_toio(ptr + rdev->r600_blit.state_offset,
-                   evergreen_default_state, rdev->r600_blit.state_len * 4);
-
-       if (num_packet2s)
-               memcpy_toio(ptr + rdev->r600_blit.state_offset + 
(rdev->r600_blit.state_len * 4),
-                           packet2s, num_packet2s * 4);
-       for (i = 0; i < evergreen_vs_size; i++)
-               *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 
4) = cpu_to_le32(evergreen_vs[i]);
-       for (i = 0; i < evergreen_ps_size; i++)
-               *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 
4) = cpu_to_le32(evergreen_ps[i]);
+       if (rdev->family < CHIP_CAYMAN) {
+               memcpy_toio(ptr + rdev->r600_blit.state_offset,
+                           evergreen_default_state, rdev->r600_blit.state_len 
* 4);
+
+               if (num_packet2s)
+                       memcpy_toio(ptr + rdev->r600_blit.state_offset + 
(rdev->r600_blit.state_len * 4),
+                                   packet2s, num_packet2s * 4);
+               for (i = 0; i < evergreen_vs_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset 
+ i * 4) = cpu_to_le32(evergreen_vs[i]);
+               for (i = 0; i < evergreen_ps_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset 
+ i * 4) = cpu_to_le32(evergreen_ps[i]);
+       } else {
+               memcpy_toio(ptr + rdev->r600_blit.state_offset,
+                           cayman_default_state, rdev->r600_blit.state_len * 
4);
+
+               if (num_packet2s)
+                       memcpy_toio(ptr + rdev->r600_blit.state_offset + 
(rdev->r600_blit.state_len * 4),
+                                   packet2s, num_packet2s * 4);
+               for (i = 0; i < cayman_vs_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset 
+ i * 4) = cpu_to_le32(cayman_vs[i]);
+               for (i = 0; i < cayman_ps_size; i++)
+                       *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset 
+ i * 4) = cpu_to_le32(cayman_ps[i]);
+       }
        radeon_bo_kunmap(rdev->r600_blit.shader_obj);
        radeon_bo_unreserve(rdev->r600_blit.shader_obj);

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index b205ba1..16caafe 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
                return r;
        cayman_gpu_init(rdev);

-#if 0
-       r = cayman_blit_init(rdev);
+       r = evergreen_blit_init(rdev);
        if (r) {
-               cayman_blit_fini(rdev);
+               evergreen_blit_fini(rdev);
                rdev->asic->copy = NULL;
                dev_warn(rdev->dev, "failed blitter (%d) falling back to 
memcpy\n", r);
        }
-#endif

        /* allocate wb buffer */
        r = radeon_wb_init(rdev);
@@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)

 int cayman_suspend(struct radeon_device *rdev)
 {
-       /* int r; */
+       int r;

        /* FIXME: we should wait for ring to be empty */
        cayman_cp_enable(rdev, false);
@@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
        radeon_wb_disable(rdev);
        cayman_pcie_gart_disable(rdev);

-#if 0
        /* unpin shaders bo */
        r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
        if (likely(r == 0)) {
                radeon_bo_unpin(rdev->r600_blit.shader_obj);
                radeon_bo_unreserve(rdev->r600_blit.shader_obj);
        }
-#endif
+
        return 0;
 }

@@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)

 void cayman_fini(struct radeon_device *rdev)
 {
-       /* cayman_blit_fini(rdev); */
+       evergreen_blit_fini(rdev);
        cayman_cp_fini(rdev);
        r600_irq_fini(rdev);
        radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c 
b/drivers/gpu/drm/radeon/radeon_asic.c
index d948265..b9b3c2a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
        .get_vblank_counter = &evergreen_get_vblank_counter,
        .fence_ring_emit = &r600_fence_ring_emit,
        .cs_parse = &evergreen_cs_parse,
-       .copy_blit = NULL,
-       .copy_dma = NULL,
-       .copy = NULL,
+       .copy_blit = &evergreen_copy_blit,
+       .copy_dma = &evergreen_copy_blit,
+       .copy = &evergreen_copy_blit,
        .get_engine_clock = &radeon_atom_get_engine_clock,
        .set_engine_clock = &radeon_atom_set_engine_clock,
        .get_memory_clock = &radeon_atom_get_memory_clock,
-- 
1.7.1.1

Reply via email to