On Tue, Nov 15, 2016 at 05:53:56PM -0800, Kenneth Graunke wrote: > Signed-off-by: Kenneth Graunke <[email protected]> > --- > src/intel/common/gen_device_info.c | 144 > +++++++++++++++++++++-------------- > src/intel/common/gen_device_info.h | 10 ++- > src/intel/vulkan/genX_pipeline.c | 12 ++- > src/mesa/drivers/dri/i965/gen6_urb.c | 8 +- > src/mesa/drivers/dri/i965/gen7_urb.c | 26 ++++--- > 5 files changed, 122 insertions(+), 78 deletions(-) > > diff --git a/src/intel/common/gen_device_info.c > b/src/intel/common/gen_device_info.c > index b8d9227..b351da1 100644 > --- a/src/intel/common/gen_device_info.c > +++ b/src/intel/common/gen_device_info.c > @@ -24,6 +24,7 @@ > #include <stdio.h> > #include <stdlib.h> > #include "gen_device_info.h" > +#include "compiler/shader_enums.h" > > static const struct gen_device_info gen_device_info_i965 = { > .gen = 4, > @@ -81,8 +82,10 @@ static const struct gen_device_info > gen_device_info_snb_gt1 = { > .urb = { > .size = 32, > .min_vs_entries = 24, > - .max_vs_entries = 256, > - .max_gs_entries = 256, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 256, > + [MESA_SHADER_GEOMETRY] = 256, > + }, > }, > }; > > @@ -101,8 +104,10 @@ static const struct gen_device_info > gen_device_info_snb_gt2 = { > .urb = { > .size = 64, > .min_vs_entries = 24, > - .max_vs_entries = 256, > - .max_gs_entries = 256, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 256, > + [MESA_SHADER_GEOMETRY] = 256, > + }, > }, > }; > > @@ -126,11 +131,13 @@ static const struct gen_device_info > gen_device_info_ivb_gt1 = { > .urb = { > .size = 128, > .min_vs_entries = 32, > - .max_vs_entries = 512, > - .max_tcs_entries = 32, > .min_ds_entries = 10, > - .max_tes_entries = 288, > - .max_gs_entries = 192, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 512, > + [MESA_SHADER_TESS_CTRL] = 32, > + [MESA_SHADER_TESS_EVAL] = 288, > + [MESA_SHADER_GEOMETRY] = 192, > + }, > }, > }; > > @@ -146,11 +153,13 @@ static const struct gen_device_info > gen_device_info_ivb_gt2 = { > .urb = { > .size = 256, > .min_vs_entries = 32, > - .max_vs_entries = 704, > - .max_tcs_entries = 64, > .min_ds_entries = 10, > - .max_tes_entries = 448, > - .max_gs_entries = 320, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 704, > + [MESA_SHADER_TESS_CTRL] = 64, > + [MESA_SHADER_TESS_EVAL] = 448, > + [MESA_SHADER_GEOMETRY] = 320, > + }, > }, > }; > > @@ -167,11 +176,13 @@ static const struct gen_device_info gen_device_info_byt > = { > .urb = { > .size = 128, > .min_vs_entries = 32, > - .max_vs_entries = 512, > - .max_tcs_entries = 32, > .min_ds_entries = 10, > - .max_tes_entries = 288, > - .max_gs_entries = 192, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 512, > + [MESA_SHADER_TESS_CTRL] = 32, > + [MESA_SHADER_TESS_EVAL] = 288, > + [MESA_SHADER_GEOMETRY] = 192, > + }, > }, > }; > > @@ -193,11 +204,13 @@ static const struct gen_device_info > gen_device_info_hsw_gt1 = { > .urb = { > .size = 128, > .min_vs_entries = 32, > - .max_vs_entries = 640, > - .max_tcs_entries = 64, > .min_ds_entries = 10, > - .max_tes_entries = 384, > - .max_gs_entries = 256, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 640, > + [MESA_SHADER_TESS_CTRL] = 64, > + [MESA_SHADER_TESS_EVAL] = 384, > + [MESA_SHADER_GEOMETRY] = 256, > + }, > }, > }; > > @@ -213,11 +226,13 @@ static const struct gen_device_info > gen_device_info_hsw_gt2 = { > .urb = { > .size = 256, > .min_vs_entries = 64, > - .max_vs_entries = 1664, > - .max_tcs_entries = 128, > .min_ds_entries = 10, > - .max_tes_entries = 960, > - .max_gs_entries = 640, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 1664, > + [MESA_SHADER_TESS_CTRL] = 128, > + [MESA_SHADER_TESS_EVAL] = 960, > + [MESA_SHADER_GEOMETRY] = 640, > + }, > }, > }; > > @@ -233,11 +248,13 @@ static const struct gen_device_info > gen_device_info_hsw_gt3 = { > .urb = { > .size = 512, > .min_vs_entries = 64, > - .max_vs_entries = 1664, > - .max_tcs_entries = 128, > .min_ds_entries = 10, > - .max_tes_entries = 960, > - .max_gs_entries = 640, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 1664, > + [MESA_SHADER_TESS_CTRL] = 128, > + [MESA_SHADER_TESS_EVAL] = 960, > + [MESA_SHADER_GEOMETRY] = 640, > + }, > }, > }; > > @@ -263,11 +280,13 @@ static const struct gen_device_info > gen_device_info_bdw_gt1 = { > .urb = { > .size = 192, > .min_vs_entries = 64, > - .max_vs_entries = 2560, > - .max_tcs_entries = 504, > .min_ds_entries = 34, > - .max_tes_entries = 1536, > - .max_gs_entries = 960, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 2560, > + [MESA_SHADER_TESS_CTRL] = 504, > + [MESA_SHADER_TESS_EVAL] = 1536, > + [MESA_SHADER_GEOMETRY] = 960, > + }, > } > }; > > @@ -278,11 +297,13 @@ static const struct gen_device_info > gen_device_info_bdw_gt2 = { > .urb = { > .size = 384, > .min_vs_entries = 64, > - .max_vs_entries = 2560, > - .max_tcs_entries = 504, > .min_ds_entries = 34, > - .max_tes_entries = 1536, > - .max_gs_entries = 960, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 2560, > + [MESA_SHADER_TESS_CTRL] = 504, > + [MESA_SHADER_TESS_EVAL] = 1536, > + [MESA_SHADER_GEOMETRY] = 960, > + }, > } > }; > > @@ -293,11 +314,13 @@ static const struct gen_device_info > gen_device_info_bdw_gt3 = { > .urb = { > .size = 384, > .min_vs_entries = 64, > - .max_vs_entries = 2560, > - .max_tcs_entries = 504, > .min_ds_entries = 34, > - .max_tes_entries = 1536, > - .max_gs_entries = 960, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 2560, > + [MESA_SHADER_TESS_CTRL] = 504, > + [MESA_SHADER_TESS_EVAL] = 1536, > + [MESA_SHADER_GEOMETRY] = 960, > + }, > } > }; > > @@ -314,11 +337,13 @@ static const struct gen_device_info gen_device_info_chv > = { > .urb = { > .size = 192, > .min_vs_entries = 34, > - .max_vs_entries = 640, > - .max_tcs_entries = 80, > .min_ds_entries = 34, > - .max_tes_entries = 384, > - .max_gs_entries = 256, > + .max_entries = { > + [MESA_SHADER_VERTEX] = 640, > + [MESA_SHADER_TESS_CTRL] = 80, > + [MESA_SHADER_TESS_EVAL] = 384, > + [MESA_SHADER_GEOMETRY] = 256, > + }, > } > }; > > @@ -339,11 +364,13 @@ static const struct gen_device_info gen_device_info_chv > = { > .urb = { \ > .size = 384, \ > .min_vs_entries = 64, \ > - .max_vs_entries = 1856, \ > - .max_tcs_entries = 672, \ > .min_ds_entries = 34, \ > - .max_tes_entries = 1120, \ > - .max_gs_entries = 640, \ > + .max_entries = { \ > + [MESA_SHADER_VERTEX] = 1856, \ > + [MESA_SHADER_TESS_CTRL] = 672, \ > + [MESA_SHADER_TESS_EVAL] = 1120, \ > + [MESA_SHADER_GEOMETRY] = 640, \ > + }, \ > } > > #define GEN9_LP_FEATURES \ > @@ -361,10 +388,12 @@ static const struct gen_device_info gen_device_info_chv > = { > .size = 192, \ > .min_vs_entries = 34, \ > .min_ds_entries = 34, \ > - .max_vs_entries = 704, \ > - .max_tcs_entries = 256, \ > - .max_tes_entries = 416, \ > - .max_gs_entries = 256, \ > + .max_entries = { \ > + [MESA_SHADER_VERTEX] = 704, \ > + [MESA_SHADER_TESS_CTRL] = 256, \ > + [MESA_SHADER_TESS_EVAL] = 416, \ > + [MESA_SHADER_GEOMETRY] = 256, \ > + }, \ > } > > #define GEN9_LP_FEATURES_2X6 \ > @@ -378,10 +407,12 @@ static const struct gen_device_info gen_device_info_chv > = { > .size = 128, \ > .min_vs_entries = 34, \ > .min_ds_entries = 34, \ > - .max_vs_entries = 352, \ > - .max_tcs_entries = 128, \ > - .max_tes_entries = 208, \ > - .max_gs_entries = 128, \ > + .max_entries = { \ > + [MESA_SHADER_VERTEX] = 352, \ > + [MESA_SHADER_TESS_CTRL] = 128, \ > + [MESA_SHADER_TESS_EVAL] = 208, \ > + [MESA_SHADER_GEOMETRY] = 128, \ > + }, \ > } > > static const struct gen_device_info gen_device_info_skl_gt1 = { > @@ -421,6 +452,7 @@ static const struct gen_device_info gen_device_info_bxt = > { > static const struct gen_device_info gen_device_info_bxt_2x6 = { > GEN9_LP_FEATURES_2X6 > }; > + > /* > * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. > * There's no KBL entry. Using the default SKL (GEN9) GS entries value. > diff --git a/src/intel/common/gen_device_info.h > b/src/intel/common/gen_device_info.h > index 10324e6..3125a68 100644 > --- a/src/intel/common/gen_device_info.h > +++ b/src/intel/common/gen_device_info.h > @@ -135,12 +135,14 @@ struct gen_device_info > * urb.size = URB Size (kbytes) / slice count > */ > unsigned size; > + > unsigned min_vs_entries; > - unsigned max_vs_entries; > - unsigned max_tcs_entries; > unsigned min_ds_entries; > - unsigned max_tes_entries; > - unsigned max_gs_entries; > + > + /** > + * The maximum number of URB entries. See the 3DSTATE_URB_<XS> docs. > + */ > + unsigned max_entries[4];
I had to go and check "shader_enums.h" if all MESA_SHADER_* values here actually fit. And they do. I'm still wondering if we should try to tie the size somehow to the enums. Perhaps "max_entries[MESA_SHADER_GEOMETRY + 1]", what do you think? > } urb; > /** @} */ > }; > diff --git a/src/intel/vulkan/genX_pipeline.c > b/src/intel/vulkan/genX_pipeline.c > index 0af37e4..6dd9f4f 100644 > --- a/src/intel/vulkan/genX_pipeline.c > +++ b/src/intel/vulkan/genX_pipeline.c > @@ -237,7 +237,8 @@ genX(emit_urb_setup)(struct anv_device *device, struct > anv_batch *batch, > ALIGN(device->info.urb.min_vs_entries * vs_entry_size_bytes, > chunk_size_bytes) / chunk_size_bytes; > unsigned vs_wants = > - ALIGN(device->info.urb.max_vs_entries * vs_entry_size_bytes, > + ALIGN(device->info.urb.max_entries[MESA_SHADER_VERTEX] * > + vs_entry_size_bytes, > chunk_size_bytes) / chunk_size_bytes - vs_chunks; > > unsigned gs_chunks = 0; > @@ -254,7 +255,8 @@ genX(emit_urb_setup)(struct anv_device *device, struct > anv_batch *batch, > gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, > chunk_size_bytes) / chunk_size_bytes; > gs_wants = > - ALIGN(device->info.urb.max_gs_entries * gs_entry_size_bytes, > + ALIGN(device->info.urb.max_entries[MESA_SHADER_GEOMETRY] * > + gs_entry_size_bytes, > chunk_size_bytes) / chunk_size_bytes - gs_chunks; > } > > @@ -289,8 +291,10 @@ genX(emit_urb_setup)(struct anv_device *device, struct > anv_batch *batch, > /* Since we rounded up when computing *_wants, this may be slightly more > * than the maximum allowed amount, so correct for that. > */ > - nr_vs_entries = MIN2(nr_vs_entries, device->info.urb.max_vs_entries); > - nr_gs_entries = MIN2(nr_gs_entries, device->info.urb.max_gs_entries); > + nr_vs_entries = MIN2(nr_vs_entries, > + device->info.urb.max_entries[MESA_SHADER_VERTEX]); > + nr_gs_entries = MIN2(nr_gs_entries, > + device->info.urb.max_entries[MESA_SHADER_GEOMETRY]); > > /* Ensure that we program a multiple of the granularity. */ > nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); > diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c > b/src/mesa/drivers/dri/i965/gen6_urb.c > index 3658c38..b91d7fa 100644 > --- a/src/mesa/drivers/dri/i965/gen6_urb.c > +++ b/src/mesa/drivers/dri/i965/gen6_urb.c > @@ -64,11 +64,11 @@ gen6_upload_urb(struct brw_context *brw, unsigned vs_size, > } > > /* Then clamp to the maximum allowed by the hardware */ > - if (nr_vs_entries > devinfo->urb.max_vs_entries) > - nr_vs_entries = devinfo->urb.max_vs_entries; > + if (nr_vs_entries > devinfo->urb.max_entries[MESA_SHADER_VERTEX]) > + nr_vs_entries = devinfo->urb.max_entries[MESA_SHADER_VERTEX]; > > - if (nr_gs_entries > devinfo->urb.max_gs_entries) > - nr_gs_entries = devinfo->urb.max_gs_entries; > + if (nr_gs_entries > devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]) > + nr_gs_entries = devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]; > > /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */ > brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); > diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c > b/src/mesa/drivers/dri/i965/gen7_urb.c > index b60bd23..ca347b4 100644 > --- a/src/mesa/drivers/dri/i965/gen7_urb.c > +++ b/src/mesa/drivers/dri/i965/gen7_urb.c > @@ -291,7 +291,8 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size, > unsigned vs_chunks = > DIV_ROUND_UP(vs_min_entries * vs_entry_size_bytes, chunk_size_bytes); > unsigned vs_wants = > - DIV_ROUND_UP(devinfo->urb.max_vs_entries * vs_entry_size_bytes, > + DIV_ROUND_UP(devinfo->urb.max_entries[MESA_SHADER_VERTEX] * > + vs_entry_size_bytes, > chunk_size_bytes) - vs_chunks; > > unsigned gs_chunks = 0; > @@ -307,7 +308,8 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size, > */ > gs_chunks = DIV_ROUND_UP(MAX2(gs_granularity, 2) * gs_entry_size_bytes, > chunk_size_bytes); > - gs_wants = DIV_ROUND_UP(devinfo->urb.max_gs_entries * > gs_entry_size_bytes, > + gs_wants = DIV_ROUND_UP(devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] > * > + gs_entry_size_bytes, > chunk_size_bytes) - gs_chunks; > } > > @@ -321,15 +323,15 @@ gen7_upload_urb(struct brw_context *brw, unsigned > vs_size, > DIV_ROUND_UP(hs_granularity * hs_entry_size_bytes, > chunk_size_bytes); > hs_wants = > - DIV_ROUND_UP(devinfo->urb.max_tcs_entries * hs_entry_size_bytes, > - chunk_size_bytes) - hs_chunks; > + DIV_ROUND_UP(devinfo->urb.max_entries[MESA_SHADER_TESS_CTRL] * > + hs_entry_size_bytes, chunk_size_bytes) - hs_chunks; > > ds_chunks = > DIV_ROUND_UP(devinfo->urb.min_ds_entries * ds_entry_size_bytes, > chunk_size_bytes); > ds_wants = > - DIV_ROUND_UP(devinfo->urb.max_tes_entries * ds_entry_size_bytes, > - chunk_size_bytes) - ds_chunks; > + DIV_ROUND_UP(devinfo->urb.max_entries[MESA_SHADER_TESS_EVAL] * > + ds_entry_size_bytes, chunk_size_bytes) - ds_chunks; > } > > /* There should always be enough URB space to satisfy the minimum > @@ -385,10 +387,14 @@ gen7_upload_urb(struct brw_context *brw, unsigned > vs_size, > /* Since we rounded up when computing *_wants, this may be slightly more > * than the maximum allowed amount, so correct for that. > */ > - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); > - nr_hs_entries = MIN2(nr_hs_entries, devinfo->urb.max_tcs_entries); > - nr_ds_entries = MIN2(nr_ds_entries, devinfo->urb.max_tes_entries); > - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); > + nr_vs_entries = > + MIN2(nr_vs_entries, devinfo->urb.max_entries[MESA_SHADER_VERTEX]); > + nr_hs_entries = > + MIN2(nr_hs_entries, devinfo->urb.max_entries[MESA_SHADER_TESS_CTRL]); > + nr_ds_entries = > + MIN2(nr_ds_entries, devinfo->urb.max_entries[MESA_SHADER_TESS_EVAL]); > + nr_gs_entries = > + MIN2(nr_gs_entries, devinfo->urb.max_entries[MESA_SHADER_GEOMETRY]); > > /* Ensure that we program a multiple of the granularity. */ > nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); > -- > 2.10.2 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
