[Mesa-dev] [PATCH] intel/compiler/fs/icl: Use dummy masked urb write for tess eval
One cannot write the URB arbitrarily and therefore the message has to be carefully constructed. The clever tricks originate from Kenneth and Jason, I'm just writing the patch. Fixes GPU hangs on ICL with Vulkan CTS. CC: Kenneth Graunke CC: Jason Ekstrand CC: Anuj Phogat CC: Clayton Craft Signed-off-by: Topi Pohjolainen --- src/intel/compiler/brw_fs_visitor.cpp | 51 ++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index af9f803fb68..6509868f1c3 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -821,7 +821,13 @@ fs_visitor::emit_urb_writes(const fs_reg _vertex_count) header_size); fs_inst *inst = abld.emit(opcode, reg_undef, payload); - inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY; + + /* For ICL WA 1805992985 one needs additional write in the end. */ + if (devinfo->gen == 11 && stage == MESA_SHADER_TESS_EVAL) +inst->eot = false; + else +inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY; + inst->mlen = length + header_size; inst->offset = urb_offset; urb_offset = starting_urb_offset + slot + 1; @@ -857,6 +863,49 @@ fs_visitor::emit_urb_writes(const fs_reg _vertex_count) inst->mlen = 2; inst->offset = 1; return; + } + + /* ICL WA 1805992985: +* +* ICLLP GPU hangs on one of tessellation vkcts tests with DS not done. The +* send cycle, which is a urb write with an eot must be 4 phases long and +* all 8 lanes must valid. +*/ + if (devinfo->gen == 11 && stage == MESA_SHADER_TESS_EVAL) { + fs_reg payload = fs_reg(VGRF, alloc.allocate(6), BRW_REGISTER_TYPE_UD); + + /* Workaround requires all 8 channels (lanes) to be valid. This is + * understood to mean they all need to be alive. First trick is to find + * a live channel and copy its urb handle for all the other channels to + * make sure all handles are valid. + */ + bld.exec_all().MOV(payload, bld.emit_uniformize(urb_handle)); + + /* Second trick is to use masked URB write where one can tell the HW to + * actually write data only for selected channels even though all are + * active. + * Third trick is to take advantage of the must-be-zero (MBZ) area in + * the very beginning of the URB. + * + * One masks data to be written only for the first channel and uses + * offset zero explicitly to land data to the MBZ area avoiding trashing + * any other part of the URB. + * + * Since the WA says that the write needs to be 4 phases long one uses + * 4 slots data. All are explicitly zeros in order to to keep the MBZ + * area written as zeros. + */ + bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0x1u)); + bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u)); + bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u)); + bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u)); + bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u)); + + fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, + reg_undef, payload); + inst->eot = true; + inst->mlen = 6; + inst->offset = 0; } } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/isl: Align clear color buffer to full cacheline
From: Rafael Antognolli Fixes MCS fast clear gpu hangs with Vulkan CTS on ICL in CI. CC: Anuj Phogat CC: Kenneth Graunke Tested-by: Topi Pohjolainen Signed-off-by: Rafael Antognolli --- src/intel/isl/isl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 6b9e6c9e0f0..acfed5119ba 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -122,7 +122,8 @@ isl_device_init(struct isl_device *dev, dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4; dev->ss.align = isl_align(dev->ss.size, 32); - dev->ss.clear_color_state_size = CLEAR_COLOR_length(info) * 4; + dev->ss.clear_color_state_size = + isl_align(CLEAR_COLOR_length(info) * 4, 64); dev->ss.clear_color_state_offset = RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4; -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler/icl: Use tcs barrier id bits 24:30 instead of 24:27
Similarly to 1cc17fb731466c68586915acbb916586457b19bc Fixes gpu hangs with dEQP-VK.tessellation.shader_input_output.barrier CC: Anuj Phogat CC: Clayton Craft Signed-off-by: Topi Pohjolainen --- src/intel/compiler/brw_fs_nir.cpp | 21 +++-- 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 747529e72d8..ee8274de65a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2458,15 +2458,24 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder , bld.exec_all().MOV(m0, brw_imm_ud(0u)); /* Copy "Barrier ID" from r0.2, bits 16:13 */ - chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(INTEL_MASK(16, 13))); + if (devinfo->gen < 11) { + chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(INTEL_MASK(16, 13))); - /* Shift it up to bits 27:24. */ - chanbld.SHL(m0_2, m0_2, brw_imm_ud(11)); + /* Shift it up to bits 27:24. */ + chanbld.SHL(m0_2, m0_2, brw_imm_ud(11)); + } else { + chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(INTEL_MASK(30, 24))); + } /* Set the Barrier Count and the enable bit */ - chanbld.OR(m0_2, m0_2, - brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15))); + if (devinfo->gen < 11) + chanbld.OR(m0_2, m0_2, +brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15))); + else + chanbld.OR(m0_2, m0_2, +brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15))); bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0); break; -- 2.13.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 61/61] nir: Document precision lowering pass
--- src/compiler/nir/nir_lower_precision.cpp | 106 ++- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_lower_precision.cpp b/src/compiler/nir/nir_lower_precision.cpp index 3d05fa2b3c9..9647fb4d6a9 100644 --- a/src/compiler/nir/nir_lower_precision.cpp +++ b/src/compiler/nir/nir_lower_precision.cpp @@ -21,8 +21,110 @@ * IN THE SOFTWARE. */ -/* TODO: Introduce helpers in C++ space for examining GLSL types and make - * this file just C. +/* + * This lowering pass seeks to change precision for float values and for the + * expressions producing them. Decision making follows the rules described in + * GLES 3.2 Specification and section 4.7.3 Precision Qualifiers. Shortly the + * idea is that an arithmetic expression can be performed in 16-bit precision + * if and only if all its operands are either already fixed to 16-bits or are + * such that compiler is free to use either 32 or 16-bit precision. + * + * First step is to go over the variables as these are the only things that + * are marked by the shader author with explicit instructions if high + * precision is needed (or lower precision allowed respectively). This + * implementation sets precision unconditionally to 16-bits whenever allowed + * by the shader author. (This may not produce the most optimal end result + * but is a design choice to keep the complexity at bay). + * + * This is followed by manipulation of instructions themselves. Variable + * derefs and intrinsics dealing with derefs are straight-forward. For them + * one only needs to consult the variables themselves and adjust the precision + * of the instruction in question accordingly. + * + * At this point things get more complex as the rest are dependent on context. + * Precision for texturing return values (sample values) and constant loads + * depend on the needs of consuming expressions. As there may be need for + * both 16 and 32-bit precision, one cannot simply just set them as 16-bits. + * There may be, for example, two separate multiplications of a sample value + * S. One multiplying it with 32-bit value A and the other with 16-bit + * value B. (Recall that the rules mandate that if one of the source operands + * has full precision then the rest need to have full precision as well). + * Hence A * S requires the sample value S with full precision. This in turn + * means that texturing needs to return full precision and needs to convert + * sample value as to 16-bits for the other multiplication (B * S). + * NOTE: Hardware may have capability for mixed mode instructions and it is + * left for the backend to drop any unnecessary conversions. + * + * Here the implementation leaves all texturing and constant load operations + * to 32-bit precision until all instructions are analysed. Instead it inserts + * conversions from 32-bits to 16-bits for expressions that can operate with + * lower precision. In the example above, the multiplication of the sample + * with 16-bit value B would become C = B * f2f16(S). This is important for + * the analysis of the rest of the instructions. Once the pass examines + * expressions consuming C the pass can allow these expressions with lower + * precision if all the operands are allowed in lower precision. If one had + * left C with 32-bit precision it would have prevented the use of 16-bit + * precision in the consuming expressions even though all other operands would + * have allowed that. + * + * Once all instructions are examined there is separate pass that goes thru + * all the uses of texturing return values. If all are happy with lower + * precision, the pass removes the conversions (f2f16) and switches the + * texturing itself to directly return 16-bit samples (given that hardware + * support 16-bit sample values of course). + * + * For input varyings marked with lower precision there is an alternative to + * uploading 16-bit values into the shader. One can load them with using full + * precision but immediately convert them into 16-bits before they are used. + * This allows one to perform all calculations based on them in 16-bit + * precision but still keep the upload mechanism intact in the backend. + * + * TODO: + * + * 1) There is still major flaw: logic is against the rules as it considers + *arithmetic expressions without consider to their consuming expressions. + *As alus at nir level are just sub-expressions of larger expressions they + *are subject to the uses and shouldn't be examined just based on their + *own sources. + *One should recursively examaine uses until either a fixed search depth + *(heuristic to avoid runtime explosion) or it becomes clear which + *precision is needed. Naturally there may be both low and high precision + *uses. In order to keep things simple one could just force all lower + *precision uses to high in case even one high precision use is found or + *the search depth boundary is hit. + * + * 2)
[Mesa-dev] intel: WIP: Support for using 16-bits for mediump
Here is a version 2 of adding support for 16-bit float instructions in the shader compiler. Unlike the first version which did all the analysis at glsl level here one adds the notion of precision to NIR variables and does the analysis and precision lowering in NIR level. This lives in: gitlab.freedesktop.org:tpohjola/mesa and branch fp16. This is now mature enough to be able to use 16-bit precision for all instructions except a few special cases for gfxbench trex and alu2. (Unfortunately I'm not seeing any performance benefit. This is not that surprising as I got to the same point with the glsl-based solution and was able to measure the performance already back then). Hence I thought it is time to share it. While this is still work-in-progress I didn't want to flood the list with the full set of patches but instead included the very last where I try to outline the logic and its current shortcomings. There is also a short list of TODO items. In addition to those I need to examine couple of Intel specific misrenderings. I haven't gotten that deep yet but it looks I'm missing something with 16-bit inot and mad/mac lowered interpolation. Unfortunately I get corrupted rendering only with hardware while simulator is happy. Mostly I'm afraid how to test all of this properly. I haven't written any unit tests but that is high on my list. This is mostly because I've been uncertain about my design choices. So far I've used shader runner tests that I've written for specific cases. These are useful for development purposes but don't bring much value for regression testing. Alejandro Piñeiro (1): intel/compiler/fs: Use half_precision data_format on 16-bit fb writes Jose Maria Casanova Crespo (2): intel/compiler/fs: Include support for RT data_format bit intel/compiler/disasm: Show half-precision data_format on rt_writes Topi Pohjolainen (58): intel/compiler/fs: Set 16-bit sampler return format intel/compiler/disasm: Show half-precision for sampler messages intel/compiler/fs: Skip tex-inst early in conversion lowering intel/compiler/fs: Support for dumping 16-bit IMM values intel/compiler: Allow 16-bit math intel/compiler/fs: Add helpers for 16-bit null regs intel/compiler/fs: Use two SIMD8 instructions for 16-bit math intel/compiler/fs: Use 16-bit null dest with 16-bit math intel/compiler/fs: Use 16-bit null dest with 16-bit compare intel/compiler/fs: Add 16-bit type support for nir_if intel/compiler/eu: Prepare 3-src-op for 16-bit sources intel/compiler/eu: Prepare 3-src-op for 16-bit dst intel/compiler/eu: Allow 3-src-op with mixed precision (HF/F) sources intel/compiler/disasm: Print mixed precision 3-src types correctly intel/compiler/disasm: Print 16-bit IMM values intel/compiler/fs: Support for combining 16-bit immediates intel/compiler/fs: Set tex type for generator to flag fp16 intel/compiler/fs: Use component_size() instead of open coded intel/compiler/fs: Add register padding support intel/compiler/fs: Pad 16-bit texture return payloads intel/compiler/fs: Pad 16-bit output (store/fb write) payloads intel/compiler/fs: Pad 16-bit nir vec* components into full reg intel/compiler/fs: Pad 16-bit nir intrinsic dest into full reg intel/compiler/fs: Pad 16-bit const loads into full regs intel/compiler/fs: Pad 16-bit load payload lowering nir: Lower also 16-bit lrp() if needed intel/compiler: Lower 16-bit lrp() nir: Recognize f232(f216(x)) as x nir: Recognize f216(f232(x)) as x nir: Store variable precision when translating from glsl glsl: Set default precision for builtin variables i965: Prepare uniform mapping for 16-bit values i965: Support for uploading 16-bit uniforms from 32-bit store intel/compiler/fs: WIP: Use 32-bit slots for 16-bit uniforms intel/compiler: Tell compiler if lower precision is supported nir: Add lowering pass for variables marked mediump nir: Add pass for deref precision lowering nir: Add pass for alu precision lowering nir: Add precision conversion for load/store_deref nir: Add precision conversion for sources of texturing ops nir: Don't set destination size 16 for booleans nir: Add precision lowering for texture samples nir: Add support for non-fixed precision nir: Don't try to alter precision of boolean sources nir: Add support for variable sized booleans nir: Add support for lowering phi precision intel/compiler/fs: Prepare alu dest type for 16-bit booleans nir: Add lowering pass setting 16-bit boolean destinations nir: Add lowering pass turning b2f(i2i32(x)) into b2f(x) nir: Adjust integer precision for alus operating with 16-bit srcs nir: Replace b2f(x) with b2f(i2i32(x)) for 16-bit x nir: Adjust precision for discard_if nir: Allow input varyings to be converted to lower precision nir: Replace 16-bit src[0] for bcsel i2i32(src[0]) nir: Replace 16-bit nir_if condition with i2i32(condition) Revert "intel/compiler: fix 16-bit comparisons" intel/comp
[Mesa-dev] intel/icl: RFC: Two hardware workarounds
These don't seem to fix anything (hence RFC). Moreover, vertex combining is not documented to harm anything. I thought better having them in the list anyway. CC: Anuj Phogat Topi Pohjolainen (2): intel/icl: Disable combining of vertices from separate instances intel/isl/icl: Use halign == 8 instead 4 hw workaround src/intel/blorp/blorp_genX_exec.h | 6 src/intel/isl/isl_gen8.c | 35 +++ src/intel/vulkan/genX_pipeline.c | 6 src/mesa/drivers/dri/i965/genX_state_upload.c | 6 4 files changed, 53 insertions(+) -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] intel/isl/icl: Use halign == 8 instead 4 hw workaround
CC: Jason Ekstrand CC: Nanley Chery CC: Anuj Phogat Signed-off-by: Topi Pohjolainen --- src/intel/isl/isl_gen8.c | 35 +++ 1 file changed, 35 insertions(+) diff --git a/src/intel/isl/isl_gen8.c b/src/intel/isl/isl_gen8.c index 2199b8d22d..f9a424dd48 100644 --- a/src/intel/isl/isl_gen8.c +++ b/src/intel/isl/isl_gen8.c @@ -87,6 +87,38 @@ isl_gen8_choose_msaa_layout(const struct isl_device *dev, return true; } +static void +gen11_wa_1604596806(const struct isl_surf_init_info *restrict info, +enum isl_tiling tiling, const uint32_t bpb, +struct isl_extent3d *align_el) + +{ + /* Don't try to apply the workaround for depth or stencil. See the Ice Lake +* BSpec: Shared Functions - vol5c Shared Functions - RENDER_SURFACE_STATE: +* +* This field is intended to be set to HALIGN_8 only if the surface was +* rendered as a depth buffer with Z16 format or a stencil buffer. In this +* case it must be set to HALIGN_8 since these surfaces support only +* alignment of 8. For Z32 formats it must be set ot HALIGN_4. Use of +* HALIGN_8 for other surfaces is supported, but increases memory usage. +*/ + if (info->usage & (ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT)) + return; + + /* See the Ice Lake BSpec: GEN:BUG:1604596806 : Pixel Corruption in +* subspan combining (8x4 combining) scenarios if halign=4 +* +* Shared Functions - vol5c Shared Functions - RENDER_SURFACE_STATE: +* +* For surface format = 32 bpp, num_multisamples = 1 , MIpcount > 0 and +* surface walk = TiledY, HALIGN must be programmed to 8 +*/ + if (tiling == ISL_TILING_Y0 && bpb == 32 && info->samples == 1) { + assert(align_el->w == 4); + align_el->w = 8; + } +} + void isl_gen8_choose_image_alignment_el(const struct isl_device *dev, const struct isl_surf_init_info *restrict info, @@ -174,4 +206,7 @@ isl_gen8_choose_image_alignment_el(const struct isl_device *dev, const uint32_t halign = needs_halign16 ? 16 : 4; *image_align_el = isl_extent3d(halign, valign, 1); + + if (!needs_halign16 && dev->info->gen == 11) + gen11_wa_1604596806(info, tiling, fmtl->bpb, image_align_el); } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] intel/icl: Disable combining of vertices from separate instances
This is new hardware feature, and should be disabled until it is clear our VS kernels are prepared for it. Thread payload has new bit (See Bspec: Pipeline Stages - 3D Pipeline Geometry - Vertex Shader (VS) Stage - Payloads - SIMD8 Payload [BDW+]) that vertex shaders could consult. CC: Jason Ekstrand CC: Kenneth Graunke CC: Anuj Phogat Signed-off-by: Topi Pohjolainen --- src/intel/blorp/blorp_genX_exec.h | 6 ++ src/intel/vulkan/genX_pipeline.c | 6 ++ src/mesa/drivers/dri/i965/genX_state_upload.c | 6 ++ 3 files changed, 18 insertions(+) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 50341ab0ec..10865b9c15 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -629,6 +629,12 @@ blorp_emit_vs_config(struct blorp_batch *batch, #if GEN_GEN >= 8 vs.SIMD8DispatchEnable = vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; +#endif +#if GEN_GEN >= 11 + /* TODO: Disable combining of instances until it is clear VS kernels + * are prepared for it. + */ + vs.SIMD8SingleInstanceDispatchEnable = vs.SIMD8DispatchEnable; #endif } } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 33f1f7832a..9762fc78b5 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1157,6 +1157,12 @@ emit_3dstate_vs(struct anv_pipeline *pipeline) vs.SIMD8DispatchEnable = vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8; #endif +#if GEN_GEN >= 11 + /* TODO: Disable combining of instances until it is clear VS kernels + * are prepared for it. + */ + vs.SIMD8SingleInstanceDispatchEnable = vs.SIMD8DispatchEnable; +#endif assert(!vs_prog_data->base.base.use_alt_mode); #if GEN_GEN < 11 diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 740cb0c4d2..9198a2953a 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -2277,6 +2277,12 @@ genX(upload_vs_state)(struct brw_context *brw) vs.UserClipDistanceCullTestEnableBitmask = vue_prog_data->cull_distance_mask; +#endif +#if GEN_GEN >= 11 + /* TODO: Disable combining of instances until it is clear VS kernels + * are prepared for it. + */ + vs.SIMD8SingleInstanceDispatchEnable = vs.SIMD8DispatchEnable; #endif } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler/icl: Use invocation id bits 22:16 instead of 23:17
Identifier bits in the dispatch header have changed. See Bspec: SINGLE_PATCH Payload: 3D Pipeline Stages - 3D Pipeline Geometry - Hull Shader (HS) Stage IVB+ - Payloads IVB+ Fixes: KHR-GL46.tessellation_shader.tessellation_shader_tc_barriers.barrier_guarded_read_write_calls CC: Anuj Phogat CC: Mark Janes Signed-off-by: Topi Pohjolainen --- src/intel/compiler/brw_fs.cpp | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 23a25fedca5..757147b01ec 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6593,14 +6593,18 @@ fs_visitor::run_tcs_single_patch() if (tcs_prog_data->instances == 1) { invocation_id = channels_ud; } else { + const unsigned invocation_id_mask = devinfo->gen >= 11 ? + INTEL_MASK(22, 16) : INTEL_MASK(23, 17); + const unsigned invocation_id_shift = devinfo->gen >= 11 ? 16 : 17; + invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD); /* Get instance number from g0.2 bits 23:17, and multiply it by 8. */ fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD); fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)), - brw_imm_ud(INTEL_MASK(23, 17))); - bld.SHR(instance_times_8, t, brw_imm_ud(17 - 3)); + brw_imm_ud(invocation_id_mask)); + bld.SHR(instance_times_8, t, brw_imm_ud(invocation_id_shift - 3)); bld.ADD(invocation_id, instance_times_8, channels_ud); } -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler/icl: Use barrier id bits 24:30 instead of 24:27, 31
Fixes gpu hangs with Carchase and Manhattan. Cc: Anuj Phogat Signed-off-by: Topi Pohjolainen --- src/intel/compiler/brw_fs_visitor.cpp | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index cd2abbb5960..51a0ca2374a 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -791,9 +791,19 @@ fs_visitor::emit_cs_terminate() void fs_visitor::emit_barrier() { - assert(devinfo->gen >= 7); - const uint32_t barrier_id_mask = - devinfo->gen >= 9 ? 0x8f00u : 0x0f00u; + uint32_t barrier_id_mask; + switch (devinfo->gen) { + case 7: + case 8: + barrier_id_mask = 0x0f00u; break; + case 9: + case 10: + barrier_id_mask = 0x8f00u; break; + case 11: + barrier_id_mask = 0x7f00u; break; + default: + unreachable("barrier is only available on gen >= 7"); + } /* We are getting the barrier ID from the compute shader header */ assert(stage == MESA_SHADER_COMPUTE); -- 2.17.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/decoder: Use gen_group::dw_length when available
Otherwise gen_group_get_length() will try to use first fields of, for example, CC_VIEWPORT and SF_CLIP to determine the group size. These packets are not present in the state with full header but simply with their contents while equivalent state pointers (3DSTATE_VIEWPORT_STATE_POINTERS_CC and 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP respectively) telling their starting points. Before: --- 0xfffef540: 0x7823: 3DSTATE_VIEWPORT_STATE_POINTERS_CC 0xfffef540: 0x7823 : Dword 0 DWord Length: 0 0xfffef544: 0x0180 : Dword 1 CC Viewport Pointer: 0x0180 CC_VIEWPORT 0 0xfffea180: 0x : Dword 0 Minimum Depth: 0.00 0xfffef548: 0x7821: 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP 0xfffef548: 0x7821 : Dword 0 DWord Length: 0 0xfffef54c: 0x01c0 : Dword 1 SF Clip Viewport Pointer: 0x01c0 SF_CLIP_VIEWPORT 0 0xfffea1c0: 0x4100 : Dword 0 Viewport Matrix Element m00: 0.00 0xfffea1c4: 0x3f00 : Dword 1 Viewport Matrix Element m11: 0.50 0xfffef550: 0x7824: 3DSTATE_BLEND_STATE_POINTERS After: -- 0xfffef540: 0x7823: 3DSTATE_VIEWPORT_STATE_POINTERS_CC 0xfffef540: 0x7823 : Dword 0 DWord Length: 0 0xfffef544: 0x0180 : Dword 1 CC Viewport Pointer: 0x0180 CC_VIEWPORT 0 0xfffea180: 0x : Dword 0 Minimum Depth: 0.00 0xfffea184: 0x3f80 : Dword 1 Maximum Depth: 1.00 0xfffef548: 0x7821: 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP 0xfffef548: 0x7821 : Dword 0 DWord Length: 0 0xfffef54c: 0x01c0 : Dword 1 SF Clip Viewport Pointer: 0x01c0 SF_CLIP_VIEWPORT 0 0xfffea1c0: 0x4100 : Dword 0 Viewport Matrix Element m00: 0.00 0xfffea1c4: 0x3f00 : Dword 1 Viewport Matrix Element m11: 0.50 0xfffea1c8: 0x3f00 : Dword 2 Viewport Matrix Element m22: 0.50 0xfffea1cc: 0x4100 : Dword 3 Viewport Matrix Element m30: 8.00 0xfffea1d0: 0x3f00 : Dword 4 Viewport Matrix Element m31: 0.50 0xfffea1d4: 0x3f00 : Dword 5 Viewport Matrix Element m32: 0.50 0xfffea1d8: 0x : Dword 6 0xfffea1dc: 0x : Dword 7 0xfffea1e0: 0xc500 : Dword 8 X Min Clip Guardband: -2048.00 0xfffea1e4: 0x4500 : Dword 9 X Max Clip Guardband: 2048.00 0xfffea1e8: 0xc700 : Dword 10 Y Min Clip Guardband: -32768.00 0xfffea1ec: 0x4700 : Dword 11 Y Max Clip Guardband: 32768.00 0xfffea1f0: 0x : Dword 12 X Min ViewPort: 0.00 0xfffea1f4: 0x4170 : Dword 13 X Max ViewPort: 15.00 0xfffea1f8: 0x : Dword 14 Y Min ViewPort: 0.00 0xfffea1fc: 0x : Dword 15 Y Max ViewPort: 0.00 CC: Lionel Landwerlin <lionel.g.landwer...@intel.com> CC: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/common/gen_decoder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c index 1b8123b..cc212cc 100644 --- a/src/intel/common/gen_decoder.c +++ b/src/intel/common/gen_decoder.c @@ -713,6 +713,9 @@ gen_group_find_field(struct gen_group *group, const char *name) int gen_group_get_length(struct gen_group *group, const uint32_t *p) { + if (group->dw_length) + return group->dw_length; + uint32_t h = p[0]; uint32_t type = field_value(h, 29, 31); -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/urb/cnl: Apply gen7 CS stall
This didn't actually help the failing tests I'm looking at but hopefully has teeth elsewhere. CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Jordan Justen <jordan.l.jus...@intel.com> CC: Anuj Phogat <anuj.pho...@gmail.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/gen7_urb.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 2e5f8e6..9e12657 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -145,8 +145,15 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, * in the ring after this instruction. * * No such restriction exists for Haswell or Baytrail. +* +* From the CNL Bspec, Windower - +* 3DSTATE_PUSH_CONSTANT_ALLOC_PS/VS/GS/DS/HS: +* +* This command must be followed by a PIPE_CONTROL with CS Stall bit +* set. */ - if (devinfo->gen < 8 && !devinfo->is_haswell && !devinfo->is_baytrail) + if ((devinfo->gen < 8 && !devinfo->is_haswell && !devinfo->is_baytrail) || + devinfo->gen >= 10) gen7_emit_cs_stall_flush(brw); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/miptree: Initialize mcs buffer only until clear color
Otherwise even the clear color gets initialised to 0xFF. This allows enabling of color fast clears on ICL without regressing multisampling tests. CC: Rafael Antognolli <rafael.antogno...@intel.com> CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Nanley Chery <nanley.g.ch...@intel.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 89074a6..25f901d 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1680,7 +1680,12 @@ intel_miptree_init_mcs(struct brw_context *brw, return; } void *data = map; - memset(data, init_value, mt->mcs_buf->size); + + /* Only initialize until clear color (if present). */ + const unsigned aux_size = mt->mcs_buf->clear_color_offset ? +mt->mcs_buf->clear_color_offset : +mt->mcs_buf->size; + memset(data, init_value, aux_size); brw_bo_unmap(mt->mcs_buf->bo); } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Assert base format before truncating to unsigned short
CID: 1433709 Fixes: ca721b3d8: mesa: use GLenum16 in a few more places CC: Marek Olšák <marek.ol...@amd.com> CC: Brian Paul <bri...@vmware.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/main/teximage.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 8f53510..f560512 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -845,6 +845,7 @@ _mesa_init_teximage_fields_ms(struct gl_context *ctx, mesa_format format, GLuint numSamples, GLboolean fixedSampleLocations) { + const GLint base_format =_mesa_base_tex_format(ctx, internalFormat); GLenum target; assert(img); assert(width >= 0); @@ -852,8 +853,8 @@ _mesa_init_teximage_fields_ms(struct gl_context *ctx, assert(depth >= 0); target = img->TexObject->Target; - img->_BaseFormat = _mesa_base_tex_format( ctx, internalFormat ); - assert(img->_BaseFormat != -1); + assert(base_format != -1); + img->_BaseFormat = (GLenum16)base_format; img->InternalFormat = internalFormat; img->Border = border; img->Width = width; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/dev: Assert the number of slices is not zero
Fixes: c1900f5b intel: devinfo: add helper functions to fill... CID: 1433511 CC: Lionel Landwerlin <lionel.g.landwer...@intel.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/dev/gen_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index f7cb94f..dfeab6e 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -1047,7 +1047,7 @@ gen_device_info_update_from_topology(struct gen_device_info *devinfo, /* We expect the total number of EUs to be uniformly distributed throughout * the subslices. */ - assert((n_eus % n_subslices) == 0); + assert(n_subslices && (n_eus % n_subslices) == 0); devinfo->num_eu_per_subslice = n_eus / n_subslices; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: Check if u_vector_init() succeeds
However, it only fails when running out of memory. Now, if we are about to check that, we should be consistent and check the allocation of the worklist as well. On the other hand there are other places where we don't check for allocation failures. Therefore I'm not sure if we bother here either. Coverity complains but I can mark it as ignored the same. CID: 1433512 Fixes: edb18564c7 nir: Initial implementation of a nir_instr_worklist CC: Thomas Helland <thomashellan...@gmail.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/nir/nir_worklist.h | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h index e376908..3fb391f 100644 --- a/src/compiler/nir/nir_worklist.h +++ b/src/compiler/nir/nir_worklist.h @@ -105,8 +105,15 @@ typedef struct { static inline nir_instr_worklist * nir_instr_worklist_create() { nir_instr_worklist *wl = malloc(sizeof(nir_instr_worklist)); - u_vector_init(>instr_vec, sizeof(struct nir_instr *), - sizeof(struct nir_instr *) * 8); + if (!wl) + return NULL; + + if (!u_vector_init(>instr_vec, sizeof(struct nir_instr *), + sizeof(struct nir_instr *) * 8)) { + free(wl); + return NULL; + } + return wl; } -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/blorp/hiz: Emit CC viewport
Otherwise simulator for ICL complains that: B-spec CC_ViewPort Minimum Depth cannot be greater than Maximum Depth CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/blorp/blorp_genX_exec.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 992bc99..e16d10c 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1570,6 +1570,7 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, * emit 3DSTATE_MULTISAMPLE. */ blorp_emit_3dstate_multisample(batch, params); + blorp_emit_cc_viewport(batch); /* If we can't alter the depth stencil config and multiple layers are * involved, the HiZ op will fail. This is because the op requires that a -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] intel/isl: Add support for enabling clear color conversion
CC: Rafael Antognolli <rafael.antogno...@intel.com> CC: Jordan Justen <jordan.l.jus...@intel.com> CC: Jason Ekstrand <ja...@jlekstrand.net> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/isl/isl.h | 6 ++ src/intel/isl/isl_surface_state.c | 11 +++ 2 files changed, 17 insertions(+) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index d65c621..ee89e07 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1316,6 +1316,12 @@ struct isl_surf_fill_state_info { uint64_t clear_address; /** +* On gen11+, tells if the hardware should write the given clear color out +* for sampler and display engine in native format. +*/ + bool clear_color_conversion_enable; + + /** * Surface write disables for gen4-5 */ isl_channel_mask_t write_disables; diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index 77931f2..83a 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -637,6 +637,17 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, #endif if (info->aux_usage != ISL_AUX_USAGE_NONE) { +#if GEN_GEN >= 11 + /* From the Ice Lake BSpec, RENDER_SURFACE_STATE: + * + * Enables Pixel backend hw to convert clear values into native format + * and write back to clear address, so that display and sampler can use + * the converted value for resolving fast cleared RTs. + */ + s.ClearColorConversionEnable = info->clear_color_conversion_enable; +#else + assert(!info->clear_color_conversion_enable); +#endif #if GEN_GEN >= 10 s.ClearValueAddressEnable = true; s.ClearValueAddress = info->clear_address; -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] intel/blorp/icl: Enable clear color conversion when fast clearing
CC: Rafael Antognolli <rafael.antogno...@intel.com> CC: Jordan Justen <jordan.l.jus...@intel.com> CC: Jason Ekstrand <ja...@jlekstrand.net> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/blorp/blorp_genX_exec.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 721f02a..53c6b2a 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1319,6 +1319,8 @@ blorp_emit_surface_state(struct blorp_batch *batch, .aux_surf = >aux_surf, .aux_usage = aux_usage, .mocs = surface->addr.mocs, .clear_color = surface->clear_color, + .clear_color_conversion_enable = + GEN_GEN >= 11 && op == ISL_AUX_OP_FAST_CLEAR, .write_disables = write_disable_mask); blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, -- 2.7.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Don't try to disable render buffers for compute
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104546 CC: xinghua@intel.com Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_draw.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 7e29dcfd4e8..626cd3fdb70 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -441,8 +441,10 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering) tex_obj = intel_texture_object(u->TexObj); if (tex_obj && tex_obj->mt) { - intel_disable_rb_aux_buffer(brw, tex_obj->mt, 0, ~0, - "as a shader image"); + if (rendering) { + intel_disable_rb_aux_buffer(brw, tex_obj->mt, 0, ~0, + "as a shader image"); + } intel_miptree_prepare_image(brw, tex_obj->mt); -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v3 01/11] framework: Check for vulkan availability
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4259ec832..c90109907 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,6 +173,8 @@ ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Windows") endif() ENDIF() +pkg_check_modules(LIBVULKAN QUIET vulkan) + IF(PIGLIT_HAS_GLX) option(PIGLIT_BUILD_GLX_TESTS "Build tests that require GLX" ON) ELSE() -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] v3: ext_memory_object: Test sampling memory exported from Vulkan
Here is a revision taking into account feedback from Andres and Fredrik. Many thanks for both, I hope I didn't miss anything. CC: Andres Rodriguez <andre...@gmail.com> CC: Fredrik Hoeglund <fred...@kde.org> CC: Jason Ekstrand <ja...@jlekstrand.net> Topi Pohjolainen (11): framework: Check for vulkan availability framework: HACK: Read glslc path from env ext_memory_object: Add script for turning glsl into spirv c-array ext_memory_object: Support for setting up vulkan device ext_memory_object: Support for drawing with vulkan ext_memory_object: Support for setting up vulkan framebuffer ext_memory_object: Add tex layout command line ext_memory_object: Support for importing vulkan memory ext_memory_object: Support for creating simple vulkan pipelines ext_memory_object: Add helper for image type support ext_memory_object: Test render with vulkan and sample with gl CMakeLists.txt | 3 + tests/spec/ext_memory_object/CMakeLists.gl.txt | 18 + tests/spec/ext_memory_object/common.c | 167 + tests/spec/ext_memory_object/common.h | 51 ++ .../compile_and_dump_glsl_as_spirv.py | 139 + tests/spec/ext_memory_object/vk_common.c | 670 + tests/spec/ext_memory_object/vk_common.h | 176 ++ .../ext_memory_object/vk_export_image_as_tex.c | 219 +++ tests/spec/ext_memory_object/vk_fb.c | 346 +++ tests/spec/ext_memory_object/vk_fragcoord.fs | 7 + tests/spec/ext_memory_object/vk_fragcoord.vs | 8 + 11 files changed, 1804 insertions(+) create mode 100644 tests/spec/ext_memory_object/common.c create mode 100644 tests/spec/ext_memory_object/common.h create mode 100644 tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py create mode 100644 tests/spec/ext_memory_object/vk_common.c create mode 100644 tests/spec/ext_memory_object/vk_common.h create mode 100644 tests/spec/ext_memory_object/vk_export_image_as_tex.c create mode 100644 tests/spec/ext_memory_object/vk_fb.c create mode 100644 tests/spec/ext_memory_object/vk_fragcoord.fs create mode 100644 tests/spec/ext_memory_object/vk_fragcoord.vs -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v3 03/11] ext_memory_object: Add script for turning glsl into spirv c-array
This stripped down version of glsl_scraper.py found in crucible. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- .../compile_and_dump_glsl_as_spirv.py | 139 + 1 file changed, 139 insertions(+) create mode 100644 tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py diff --git a/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py b/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py new file mode 100644 index 0..b7fdeafe2 --- /dev/null +++ b/tests/spec/ext_memory_object/compile_and_dump_glsl_as_spirv.py @@ -0,0 +1,139 @@ +#! /usr/bin/env python3 + +import argparse +import io +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile +from textwrap import dedent + +class ShaderCompileError(RuntimeError): +def __init__(self, *args): +super(ShaderCompileError, self).__init__(*args) + +class Shader: +def __init__(self, stage, infname): +self.stage = stage +self.infname = infname +self.dwords = None +self.var_prefix = os.path.basename(infname).replace('.', '_') + +def __run_glslc(self, extra_args=[]): +stage_flag = '-fshader-stage=' + self.stage + +with subprocess.Popen([glslc] + extra_args + + [stage_flag, '-std=430core', '-o', '-', + self.infname], + stdout = subprocess.PIPE, + stderr = subprocess.PIPE, + stdin = subprocess.PIPE) as proc: + +out, err = proc.communicate(timeout=30) + +if proc.returncode != 0: +# Unfortunately, glslang dumps errors to standard out. +# However, since we don't really want to count on that, +# we'll grab the output of both +message = out.decode('utf-8') + '\n' + err.decode('utf-8') +raise ShaderCompileError(message.strip()) + +return out + +def compile(self): +def dwords(f): +while True: +dword_str = f.read(4) +if not dword_str: +return +assert len(dword_str) == 4 +yield struct.unpack('I', dword_str)[0] + +spirv = self.__run_glslc() +self.dwords = list(dwords(io.BytesIO(spirv))) +self.assembly = str(self.__run_glslc(['-S']), 'utf-8') + +def _dump_glsl_code(self, f, var_name): +# First dump the GLSL source as strings +f.write('static const char {0}[] ='.format(var_name)) +f.write('\n"#version 330\\n"') + +infile = open_file(self.infname, 'r') +for line in infile: +f.write('\n"{0}\\n"'.format(line.strip('\n'))) +f.write(';\n\n') + +def _dump_spirv_code(self, f, var_name): +f.write('/* SPIR-V Assembly:\n') +f.write(' *\n') +for line in self.assembly.splitlines(): +f.write(' * ' + line + '\n') +f.write(' */\n') + +f.write('static const uint32_t {0}[] = {{'.format(var_name)) +line_start = 0 +while line_start < len(self.dwords): +f.write('\n') +for i in range(line_start, min(line_start + 6, len(self.dwords))): +f.write(' 0x{:08x},'.format(self.dwords[i])) +line_start += 6 +f.write('\n};\n') + +def dump_c_code(self, f): +self._dump_glsl_code(f, self.var_prefix + '_glsl_src') +self._dump_spirv_code(f, self.var_prefix + '_spir_v_src') + +def parse_args(): +description = dedent("""\ +This program compiles the given glsl source file into SPIR-V and +writes it to another C file as an array of 32-bit words. + +If '-' is passed as the input file or output file, stdin or stdout +will be used instead of a file on disc.""") + +p = argparse.ArgumentParser( +description=description, +formatter_class=argparse.RawDescriptionHelpFormatter) +p.add_argument('-o', '--outfile', default='-', +help='Output to the given file (default: stdout).') +p.add_argument('--with-glslc', metavar='PATH', +default='glslc', +dest='glslc', +help='Full path to the glslc shader compiler.') +p.add_argument('--stage', dest='stage') +p.add_argument('infile', metavar='INFILE') + +return p.parse_args() + +def open_file(name, mode): +if name == '-': +if mode == 'w': +return sys.stdout +elif mode == 'r': +return sys.stdin +else: +assert False +else: +return open(name, mode) + +args = parse_args() +outfname = args.outfile +glslc = args.glslc + +shader = Shader(args.stage, args.infile) +shader.compile() + +with ope
[Mesa-dev] [v3 02/11] framework: HACK: Read glslc path from env
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c90109907..767b90add 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,6 +174,7 @@ ELSEIF(${CMAKE_SYSTEM_NAME} MATCHES "Windows") ENDIF() pkg_check_modules(LIBVULKAN QUIET vulkan) +set(GLSLC $ENV{GLSLC}) IF(PIGLIT_HAS_GLX) option(PIGLIT_BUILD_GLX_TESTS "Build tests that require GLX" ON) -- 2.14.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug
This is just drafting some thoughts and only compile tested. CC: "Rogovin, Kevin"--- src/mesa/drivers/dri/i965/brw_blorp.c | 8 + src/mesa/drivers/dri/i965/brw_context.h | 10 ++ src/mesa/drivers/dri/i965/brw_draw.c| 54 - 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 680121b6ab..b3f84ab8ca 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -186,11 +186,19 @@ blorp_surf_for_miptree(struct brw_context *brw, surf->aux_addr.buffer = mt->hiz_buf->bo; surf->aux_addr.offset = mt->hiz_buf->offset; } + + if (!is_render_target && brw->screen->devinfo.gen == 9) + gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_AUX); } else { surf->aux_addr = (struct blorp_address) { .buffer = NULL, }; memset(>clear_color, 0, sizeof(surf->clear_color)); + + if (!is_render_target && brw->screen->devinfo.gen == 9 && + (mt->format == MESA_FORMAT_RGBA_ASTC_5x5 || + mt->format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)) + gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5); } assert((surf->aux_usage == ISL_AUX_USAGE_NONE) == (surf->aux_addr.buffer == NULL)); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0670483806..44602c23c0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -165,6 +165,11 @@ enum brw_cache_id { BRW_MAX_CACHE }; +enum gen9_astc5x5_wa_tex_type { + GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0, + GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1, +}; + enum brw_state_id { /* brw_cache_ids must come first - see brw_program_cache.c */ BRW_STATE_URB_FENCE = BRW_MAX_CACHE, @@ -1262,6 +1267,8 @@ struct brw_context */ bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS]; + enum gen9_astc5x5_wa_tex_type gen9_sampler_wa_tex_mask; + __DRIcontext *driContext; struct intel_screen *screen; }; @@ -1286,6 +1293,9 @@ void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); void intel_prepare_render(struct brw_context *brw); +void gen9_astc5x5_sampler_wa(struct brw_context *brw, + enum gen9_astc5x5_wa_tex_type curr_mask); + void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering); void intel_resolve_for_dri2_flush(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 7e29dcfd4e..929f806eb3 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -371,6 +371,50 @@ intel_disable_rb_aux_buffer(struct brw_context *brw, return found; } +static enum gen9_astc5x5_wa_tex_type +gen9_astc5x5_wa_get_tex_mask(const struct brw_context *brw) +{ + enum gen9_astc5x5_wa_tex_type mask = 0; + const struct gl_context *ctx = >ctx; + const struct intel_texture_object *tex_obj; + + const int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!ctx->Texture.Unit[i]._Current) +continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) +continue; + + if (tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE) + mask |= GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5; + + if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 || + tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) + mask |= GEN9_ASTC5X5_WA_TEX_TYPE_AUX; + } + + return mask; +} + +/* TODO: Do we actually need this both ways: astc5x5 followed by aux + * and vice-versa? Or is only one direction problematic? + */ +void +gen9_astc5x5_sampler_wa(struct brw_context *brw, +enum gen9_astc5x5_wa_tex_type curr_mask) +{ + if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); + + if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) && + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5)) + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); + + brw->gen9_sampler_wa_tex_mask = curr_mask; +} + /** * \brief Resolve buffers before drawing. * @@ -383,6 +427,12 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering) struct gl_context *ctx = >ctx; struct intel_texture_object *tex_obj; + const enum gen9_astc5x5_wa_tex_type curr_wa_mask = + (brw->screen->devinfo.gen == 9) ? gen9_astc5x5_wa_get_tex_mask(brw) : 0; + + if (brw->screen->devinfo.gen == 9) + gen9_astc5x5_sampler_wa(brw,
[Mesa-dev] [PATCH 44/51] glsl: WIP: Add lowering pass for treating mediump as float16
At least the following need more thought: 1) Converting right-hand-side of assignments from 16-bits to 32-bits - More correct thing to do is to treat rhs as 32-bits latest in the expression producing the value 2) Texture arguments except coordinates are not handled at all - Moreover, coordinates are always converted into 32-bits due to logic missing in the Intel compiler backend. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/Makefile.sources | 1 + src/compiler/glsl/ir_optimization.h | 1 + src/compiler/glsl/lower_mediump.cpp | 273 3 files changed, 275 insertions(+) create mode 100644 src/compiler/glsl/lower_mediump.cpp diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 2ab8e163a2..47bde4fb78 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -94,6 +94,7 @@ LIBGLSL_FILES = \ glsl/lower_int64.cpp \ glsl/lower_jumps.cpp \ glsl/lower_mat_op_to_vec.cpp \ + glsl/lower_mediump.cpp \ glsl/lower_noise.cpp \ glsl/lower_offset_array.cpp \ glsl/lower_packed_varyings.cpp \ diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 2b8c195151..09c4d664e0 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -132,6 +132,7 @@ bool do_vec_index_to_swizzle(exec_list *instructions); bool lower_discard(exec_list *instructions); void lower_discard_flow(exec_list *instructions); bool lower_instructions(exec_list *instructions, unsigned what_to_lower); +bool lower_mediump(struct gl_linked_shader *shader); bool lower_noise(exec_list *instructions); bool lower_variable_index_to_cond_assign(gl_shader_stage stage, exec_list *instructions, bool lower_input, bool lower_output, diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp new file mode 100644 index 00..89eed8b294 --- /dev/null +++ b/src/compiler/glsl/lower_mediump.cpp @@ -0,0 +1,273 @@ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_mediump.cpp + * + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ast.h" + +static const glsl_type * +get_mediump(const glsl_type *highp) +{ + if (highp->is_float()) + return glsl_type::get_instance(GLSL_TYPE_FLOAT16, + highp->vector_elements, + highp->matrix_columns); + + if (highp->is_array() && highp->fields.array->is_float()) + return glsl_type::get_array_instance( +glsl_type::get_instance(GLSL_TYPE_FLOAT16, +highp->fields.array->vector_elements, +highp->fields.array->matrix_columns), +highp->length); + + return highp; +} + +static bool +is_16_bit(const ir_rvalue *ir) +{ + return ir->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16; +} + +static bool +refers_16_bit_float(const ir_rvalue *ir) +{ + ir_variable *var = ir->variable_referenced(); + + /* Only variables have the mediump property, constants need conversion. */ + if (!var) + return false; + + return var->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16; +} + +static ir_rvalue * +convert(ir_rvalue *ir, enum ir_expression_operation op) +{ + if (ir->ir_type == ir_type_constant) { + assert(op == ir_unop_f2h); + ir->type = get_mediump(ir->type); + return ir; + } + + void *ctx = ralloc_parent(ir); + return new(ctx) ir_expression(op, ir); +} + +class lower_mediump_visitor : public ir_rvalu
[Mesa-dev] [PATCH 29/51] intel/compiler/fs: Add register padding support
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 3 ++- src/intel/compiler/brw_fs.h| 3 ++- src/intel/compiler/brw_fs_builder.h| 25 ++--- src/intel/compiler/brw_fs_copy_propagation.cpp | 1 + src/intel/compiler/brw_fs_nir.cpp | 9 +++-- src/intel/compiler/brw_ir_fs.h | 3 +++ 6 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index cedfde5096..9c3410b698 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -440,6 +440,7 @@ fs_reg::fs_reg(struct ::brw_reg reg) : { this->offset = 0; this->stride = 1; + this->pad_per_component = 0; if (this->file == IMM && (this->type != BRW_REGISTER_TYPE_V && this->type != BRW_REGISTER_TYPE_UV && @@ -467,7 +468,7 @@ fs_reg::component_size(unsigned width) const const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride : hstride == 0 ? 0 : 1 << (hstride - 1)); - return MAX2(width * stride, 1) * type_sz(type); + return (MAX2(width * stride, 1) * (type_sz(type)) + pad_per_component); } /** diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 30557324d5..d9c4f737e6 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -231,7 +231,8 @@ public: nir_jump_instr *instr); fs_reg get_nir_src(const nir_src ); fs_reg get_nir_src_imm(const nir_src ); - fs_reg get_nir_dest(const nir_dest ); + fs_reg get_nir_dest(const nir_dest , + bool pad_components_to_full_registers = false); fs_reg get_nir_image_deref(const nir_deref_var *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder , const fs_inst , diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 633086c64b..804d52e5df 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -182,17 +182,28 @@ namespace brw { * component in this IR). */ dst_reg - vgrf(enum brw_reg_type type, unsigned n = 1) const + vgrf(enum brw_reg_type type, + unsigned n = 1, + bool pad_components_to_full_registers = false) const { assert(dispatch_width() <= 32); - if (n > 0) -return dst_reg(VGRF, shader->alloc.allocate( - DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), - REG_SIZE)), - type); - else + if (n == 0) return retype(null_reg_ud(), type); + + const unsigned pad_per_component = +(pad_components_to_full_registers && + type_sz(type) == 2 && + dispatch_width() == 8) ? (REG_SIZE / 2) : 0; + const unsigned size = +n * ((type_sz(type) * dispatch_width()) + pad_per_component); + const unsigned nr = shader->alloc.allocate( +DIV_ROUND_UP(size, REG_SIZE)); + + dst_reg dst = dst_reg(VGRF, nr, type); + dst.pad_per_component = pad_per_component; + + return dst; } /** diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index ed2511ecfa..637a1de6ae 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -447,6 +447,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].file = entry->src.file; inst->src[arg].nr = entry->src.nr; inst->src[arg].stride *= entry->src.stride; + inst->src[arg].pad_per_component = entry->src.pad_per_component; inst->saturate = inst->saturate || entry->saturate; /* Compute the offset of inst->src[arg] relative to entry->dst */ diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 16e8dfc186..35e78b134a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -357,6 +357,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) unsigned size = array_elems * reg->num_components; const brw_reg_type reg_type = brw_reg_type_from_bit_size(reg->bit_size, BRW_REGISTER_TYPE_F); + + /* TODO: Consider if 16-bit component padding is needed. */ + nir_locals[reg->index] = bld.vgrf(reg_type, size); } @@ -1602,13 +1605,15 @@ fs_visitor::get_nir_src_imm(const nir_src ) } fs_reg -fs_visitor::get_nir_dest(const nir_dest ) +fs_visitor::get_nir_dest(co
[Mesa-dev] [PATCH 45/51] glsl: Use 16-bit constants if operation is otherwise 16-bit
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/lower_mediump.cpp | 43 - 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp index 89eed8b294..0276e74d6e 100644 --- a/src/compiler/glsl/lower_mediump.cpp +++ b/src/compiler/glsl/lower_mediump.cpp @@ -67,6 +67,25 @@ refers_16_bit_float(const ir_rvalue *ir) return var->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16; } +static bool +is_constant(const ir_rvalue *ir) +{ + if (ir->ir_type == ir_type_constant) + return true; + + if (ir->ir_type != ir_type_expression) + return false; + + const ir_expression *expr = (const ir_expression *)ir; + + for (unsigned i = 0; i < expr->num_operands; i++) { + if (!is_constant(expr->operands[i])) + return false; + } + + return true; +} + static ir_rvalue * convert(ir_rvalue *ir, enum ir_expression_operation op) { @@ -99,6 +118,7 @@ private: bool can_be_lowered(const ir_variable *var) const; void retype_to_float16(const glsl_type **t); + void retype_to_float16(ir_rvalue *ir); }; bool @@ -119,6 +139,22 @@ lower_mediump_visitor::retype_to_float16(const glsl_type **t) *t = mediump; } +void +lower_mediump_visitor::retype_to_float16(ir_rvalue *ir) +{ + retype_to_float16(>type); + + if (ir->ir_type != ir_type_expression) + return; + + const ir_expression *expr = (const ir_expression *)ir; + + for (unsigned i = 0; i < expr->num_operands; i++) { + assert(is_constant(expr->operands[i])); + retype_to_float16(>operands[i]->type); + } +} + ir_visitor_status lower_mediump_visitor::visit(ir_variable *ir) { @@ -228,7 +264,7 @@ lower_mediump_visitor::visit_leave(ir_expression *ir) for (unsigned i = 0; i < ir->num_operands; i++) { if (is_16_bit(ir->operands[i])) has_16_bit_src = true; - else + else if (!is_constant(ir->operands[i])) has_32_bit_src = true; } @@ -240,6 +276,11 @@ lower_mediump_visitor::visit_leave(ir_expression *ir) */ if (!has_32_bit_src && ir->operation != ir_triop_lrp) { + for (unsigned i = 0; i < ir->num_operands; i++) { + if (is_constant(ir->operands[i])) +retype_to_float16(ir->operands[i]); + } + retype_to_float16(>type); return visit_continue; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 41/51] intel/compiler/eu: Take stride into account in 16-bit ops
This is needed when converting from F -> HF. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_eu_validate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 6ee6b4ffbe..735ea6 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -459,6 +459,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf exec_type_size == 8 && dst_type_size == 4) dst_type_size = 8; + if (exec_type_size == 4 && dst_type_size == 2 && dst_stride == 2) + dst_type_size = 4; + if (exec_type_size > dst_type_size) { ERROR_IF(dst_stride * dst_type_size != exec_type_size, "Destination stride must be equal to the ratio of the sizes of " -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/51] intel/compiler: Add support for loading 16-bit constants
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 5 + 1 file changed, 5 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index a973c18203..65a5bfa49a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1515,6 +1515,11 @@ fs_visitor::nir_emit_load_const(const fs_builder , fs_reg reg = bld.vgrf(reg_type, instr->def.num_components); switch (instr->def.bit_size) { + case 16: + for (unsigned i = 0; i < instr->def.num_components; i++) + bld.MOV(offset(reg, bld, i), brw_imm_w(instr->value.i16[i])); + break; + case 32: for (unsigned i = 0; i < instr->def.num_components; i++) bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i])); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 51/51] i965/fs: Lower gles mediump floats into 16-bits
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_link.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index d18521e792..89ccbb06b5 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -134,6 +134,9 @@ process_glsl_ir(struct brw_context *brw, lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); + if (shader_prog->IsES && shader->Stage == MESA_SHADER_FRAGMENT) + lower_mediump(shader); + validate_ir_tree(shader->ir); /* Now that we've finished altering the linked IR, reparent any live IR back -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 48/51] glsl: HACK: Treat input varyings as 16-bits by conversion
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/lower_mediump.cpp | 26 +- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp index 094ab4e743..45cf75b53c 100644 --- a/src/compiler/glsl/lower_mediump.cpp +++ b/src/compiler/glsl/lower_mediump.cpp @@ -92,6 +92,20 @@ refers_16_bit_float(const ir_rvalue *ir) } static bool +defers_input_varying(const ir_rvalue *ir) +{ + ir_variable *var = ir->variable_referenced(); + if (!var) + return false; + + if (var->data.mode != ir_var_shader_in) + return false; + + return var->data.precision == ast_precision_low || + var->data.precision == ast_precision_medium; +} + +static bool is_constant(const ir_rvalue *ir) { if (ir->ir_type == ir_type_constant) @@ -152,6 +166,13 @@ lower_mediump_visitor::can_be_lowered(const ir_variable *var) const if (!var->type->get_scalar_type()->is_float()) return false; + /* TODO: Intel compiler backend isn't prepared for interpolated 16-bit +* varyings. Input varyings are instead converted to 16-bits before +* use. +*/ + if (var->data.mode == ir_var_shader_in) + return false; + return var->data.precision == ast_precision_low || var->data.precision == ast_precision_medium; } @@ -309,7 +330,8 @@ lower_mediump_visitor::visit_leave(ir_expression *ir) for (unsigned i = 0; i < ir->num_operands; i++) { if (is_16_bit(ir->operands[i])) has_16_bit_src = true; - else if (!is_constant(ir->operands[i])) + else if (!is_constant(ir->operands[i]) && + !defers_input_varying(ir->operands[i])) has_32_bit_src = true; } @@ -324,6 +346,8 @@ lower_mediump_visitor::visit_leave(ir_expression *ir) for (unsigned i = 0; i < ir->num_operands; i++) { if (is_constant(ir->operands[i])) retype_to_float16(ir->operands[i]); + else if (defers_input_varying(ir->operands[i])) +ir->operands[i] = convert(ir->operands[i], ir_unop_f2h); } retype_to_float16(>type); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 33/51] intel/compiler/fs: Pad 16-bit nir intrinsic dest into full reg
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index cbb1c118d2..64243312b9 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3881,7 +3881,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr { fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) - dest = get_nir_dest(instr->dest); + dest = get_nir_dest(instr->dest, true /* pad components to full regs */); switch (instr->intrinsic) { case nir_intrinsic_image_load: -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/51] intel/compiler/disasm: Print fp16 also for sampler messages
This is what render target write does. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_disasm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index da2a5d78dd..fbb18b0f26 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -1621,6 +1621,11 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, brw_inst_sampler_msg_type(devinfo, inst), ); err |= control(file, "sampler simd mode", gen5_sampler_simd_mode, brw_inst_sampler_simd_mode(devinfo, inst), ); + if ((devinfo->gen >= 9 || devinfo->is_cherryview) && + brw_inst_data_format(devinfo, inst)) { + string(file, " HP"); + } + format(file, " Surface = %"PRIu64" Sampler = %"PRIu64, brw_inst_binding_table_index(devinfo, inst), brw_inst_sampler(devinfo, inst)); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 37/51] intel/compiler/fs: Consider original sizes when retyping alu ops
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 30 -- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index baa84b0f3c..d28ed57eca 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -655,6 +655,26 @@ emit_find_msb_using_lzd(const fs_builder , inst->src[0].negate = true; } +static enum brw_reg_type +get_nir_alu_dest_type(const struct gen_device_info *devinfo, + const nir_alu_instr *instr, unsigned size) +{ + brw_reg_type res = brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_op_infos[instr->op].output_type | + nir_dest_bit_size(instr->dest.dest))); + return brw_reg_type_from_bit_size(size * 8, res); +} + +static enum brw_reg_type +get_nir_alu_src_type(const struct gen_device_info *devinfo, + const nir_alu_instr *instr, unsigned i, unsigned size) +{ + brw_reg_type res = brw_type_for_nir_type(devinfo, + (nir_alu_type)(nir_op_infos[instr->op].input_types[i] | + nir_src_bit_size(instr->src[i].src))); + return brw_reg_type_from_bit_size(size * 8, res); +} + void fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) { @@ -662,16 +682,14 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) fs_inst *inst; fs_reg result = get_nir_alu_dest(instr); - result.type = brw_type_for_nir_type(devinfo, - (nir_alu_type)(nir_op_infos[instr->op].output_type | - nir_dest_bit_size(instr->dest.dest))); + result.type = get_nir_alu_dest_type(devinfo, instr, + brw_reg_type_to_size(result.type)); fs_reg op[4]; for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { op[i] = get_nir_src(instr->src[i].src); - op[i].type = brw_type_for_nir_type(devinfo, - (nir_alu_type)(nir_op_infos[instr->op].input_types[i] | -nir_src_bit_size(instr->src[i].src))); + op[i].type = get_nir_alu_src_type(devinfo, instr, i, +brw_reg_type_to_size(op[i].type)); op[i].abs = instr->src[i].abs; op[i].negate = instr->src[i].negate; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 30/51] intel/compiler/fs: Pad 16-bit texture return payloads
This is to tell offset and read/write calculators enough to work correctly with 16-bit texture payloads. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 35e78b134a..6d9b272a57 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4949,7 +4949,22 @@ fs_visitor::nir_emit_texture(const fs_builder , nir_tex_instr *instr) } } - fs_reg dst = bld.vgrf(brw_type_for_nir_type(devinfo, instr->dest_type), 4); + const enum brw_reg_type dst_type = + brw_type_for_nir_type(devinfo, instr->dest_type); + + /* In case of 16-bit return format one needs to prepare for 4 registers +* regardless of the dispatch width: +* +* From SKL PRM Vol. 7 Page 131, Return Format = 16-bit: +* +* A SIMD8* writeback message with Return Format of 16-bit consists of +* up to 4 destination registers). +* +* Therefore tell builder to give full register per component even in +* case of 16-bit size and SIMD8. +*/ + const bool pad_components_to_full_registers = true; + fs_reg dst = bld.vgrf(dst_type, 4, pad_components_to_full_registers); fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); inst->offset = header_bits; @@ -4987,7 +5002,9 @@ fs_visitor::nir_emit_texture(const fs_builder , nir_tex_instr *instr) bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE); } - bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0); + bld.LOAD_PAYLOAD(get_nir_dest(instr->dest, + pad_components_to_full_registers), +nir_dest, dest_size, 0); } void -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 39/51] intel/compiler/fs: Consider logic ops on 16-bit booleans
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 70 ++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 2a32b1449a..aff592c354 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1662,7 +1662,75 @@ fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr) * one component per register. */ const bool pad_components_to_full_register = true; - return get_nir_dest(instr->dest.dest, pad_components_to_full_register); + + switch (instr->op) { + case nir_op_flt: + case nir_op_fge: + case nir_op_feq: + case nir_op_fne: { + assert(instr->dest.dest.is_ssa); + + if (nir_src_bit_size(instr->src[0].src) > 16) + return get_nir_dest(instr->dest.dest); + + assert(nir_src_bit_size(instr->src[0].src) == 16 && + nir_src_bit_size(instr->src[1].src) == 16); + + /* Destination type for comparison operations is boolean which NIR + * treats as having 32-bit size. If, however, sources are 16-bit + * hardware will produce 16-bit result (0x/0x). Therefore set + * the destination type accordingly. + */ + nir_ssa_values[instr->dest.dest.ssa.index] = + bld.vgrf(BRW_REGISTER_TYPE_HF, + instr->dest.dest.ssa.num_components, + pad_components_to_full_register); + return nir_ssa_values[instr->dest.dest.ssa.index]; + } + case nir_op_inot: + case nir_op_ixor: + case nir_op_ior: + case nir_op_iand: { + assert(instr->dest.dest.is_ssa); + + const fs_reg src0 = get_nir_src(instr->src[0].src); + const fs_reg src1 = get_nir_src(instr->src[0].src); + + /* TODO: This specifically prepares for mixed precision operations which + * in principle shouldn't happen. There is, however, corner case + * when this is possible. As NIR doesn't consider how booleans + * are produced, we may end up here with one source operand + * produced from an operation with 32-bit sources and another from + * 16-bits. + * This is handled by marking this operation as producing 16-bits + * and relying on nir_emit_alu() to adjust the 32-bit source + * operand to 16-bits with stride == 2. Recall that 32-bit + * booleans are just 0x/0x and it suffices to read + * only the lower 16-bits. + * WARN: This blindly assumes that mixed precision integer source + * operands represent boolean values. There is no way of checking + * if that holds. + */ + if (brw_reg_type_to_size(src0.type) > 2 && + brw_reg_type_to_size(src1.type) > 2) + return get_nir_dest(instr->dest.dest); + + /* Translation from GLSL to NIR produces logical operations with + * integer operands even when operands are booleans. See handling + * of ir_binop_bit_*. + * As hardware will produce 16-bit results when the sources are 16-bit + * set the destination type accordingly. + */ + nir_ssa_values[instr->dest.dest.ssa.index] = + bld.vgrf(BRW_REGISTER_TYPE_W, + instr->dest.dest.ssa.num_components, + pad_components_to_full_register); + return nir_ssa_values[instr->dest.dest.ssa.index]; + } + default: + return get_nir_dest(instr->dest.dest, + pad_components_to_full_register); + } } fs_reg -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 35/51] intel/compiler/fs: Pad 16-bit payload lowering
Otherwise copy propagation fails when write sizes differ. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 5 - src/intel/compiler/brw_ir_fs.h | 13 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 9c3410b698..8e77248470 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3450,7 +3450,10 @@ fs_visitor::lower_load_payload() for (uint8_t i = inst->header_size; i < inst->sources; i++) { if (inst->src[i].file != BAD_FILE) -ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]); +ibld.MOV(retype_pad_to_full_register( +dst, dispatch_width, inst->src[i].type), + inst->src[i]); + if (type_sz(inst->src[i].type) == 2) dst = byte_offset(dst, REG_SIZE); else diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index b4a1d7ef5a..fe7f7c4be7 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -72,6 +72,19 @@ retype(fs_reg reg, enum brw_reg_type type) } static inline fs_reg +retype_pad_to_full_register(fs_reg reg, unsigned dispatch_width, +enum brw_reg_type type) +{ + reg.type = type; + + assert(reg.pad_per_component == 0); + if (dispatch_width == 8 && type_sz(reg.type) == 2) + reg.pad_per_component = REG_SIZE / 2; + + return reg; +} + +static inline fs_reg byte_offset(fs_reg reg, unsigned delta) { switch (reg.file) { -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 32/51] intel/compiler/fs: Pad 16-bit nir vec* components into full reg
This allows quite a bit of infra to be kept as is, such as liveness analysis, copy propagation and dead code elimination. Here one deals with virtual register space and this doesn't prevent from packing more than one component into one hardware register later on. That is entirely matter of register allocator working with sub-registers. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_nir.cpp | 19 ++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index d9c4f737e6..b23d2b1733 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -233,6 +233,7 @@ public: fs_reg get_nir_src_imm(const nir_src ); fs_reg get_nir_dest(const nir_dest , bool pad_components_to_full_registers = false); + fs_reg get_nir_alu_dest(const nir_alu_instr *instr); fs_reg get_nir_image_deref(const nir_deref_var *deref); fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder , const fs_inst , diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index d3125d7dcd..cbb1c118d2 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -656,7 +656,7 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; fs_inst *inst; - fs_reg result = get_nir_dest(instr->dest.dest); + fs_reg result = get_nir_alu_dest(instr); result.type = brw_type_for_nir_type(devinfo, (nir_alu_type)(nir_op_infos[instr->op].output_type | nir_dest_bit_size(instr->dest.dest))); @@ -1624,6 +1624,23 @@ fs_visitor::get_nir_dest(const nir_dest , } fs_reg +fs_visitor::get_nir_alu_dest(const nir_alu_instr *instr) +{ + /* With data type size =< 16 bits one can fit two or more components +* into one register. In virtual register space this doesn't really add +* any value but requires things such as liveness analysis, +* copy propagation and dead code elimination to be updated to work with +* sub-regsiter regions. +* +* Therefore instead allocate full padded registers per component. This +* doesn't prevent final hardware register allocator from packing more than +* one component per register. +*/ + const bool pad_components_to_full_register = true; + return get_nir_dest(instr->dest.dest, pad_components_to_full_register); +} + +fs_reg fs_visitor::get_nir_image_deref(const nir_deref_var *deref) { fs_reg image(UNIFORM, deref->var->data.driver_location / 4, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 27/51] intel/compiler/fs: Set tex type for generator to flag fp16
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5751bb0ad7..0d415e2393 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2601,7 +2601,15 @@ fs_visitor::opt_sampler_eot() tex_inst->offset |= fb_write->target << 24; tex_inst->eot = true; - tex_inst->dst = ibld.null_reg_ud(); + + /* Set the null destination type specifically so that generator knows to +* flag half precision flag. +*/ + if (tex_inst->dst.type == BRW_REGISTER_TYPE_HF) + tex_inst->dst = ibld.null_reg_hf(); + else + tex_inst->dst = ibld.null_reg_ud(); + tex_inst->size_written = 0; fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 46/51] glsl: Lower float conversions to mediump
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/lower_mediump.cpp | 26 ++ 1 file changed, 26 insertions(+) diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp index 0276e74d6e..07f1f1ba9d 100644 --- a/src/compiler/glsl/lower_mediump.cpp +++ b/src/compiler/glsl/lower_mediump.cpp @@ -55,6 +55,30 @@ is_16_bit(const ir_rvalue *ir) return ir->type->get_scalar_type()->base_type == GLSL_TYPE_FLOAT16; } +static void +retype_x2f_x2f16(ir_rvalue *ir) +{ + if (ir->ir_type != ir_type_expression) + return; + + ir_expression *expr = (ir_expression *)ir; + switch (expr->operation) { + case ir_unop_i2f: + expr->operation = ir_unop_i2h; + break; + case ir_unop_b2f: + expr->operation = ir_unop_b2h; + break; + case ir_unop_u2f: + expr->operation = ir_unop_u2h; + break; + default: + return; + } + + ir->type = get_mediump(ir->type); +} + static bool refers_16_bit_float(const ir_rvalue *ir) { @@ -259,6 +283,8 @@ lower_mediump_visitor::visit_leave(ir_expression *ir) { ir_rvalue_visitor::visit_leave(ir); + retype_x2f_x2f16(ir); + bool has_32_bit_src = false; bool has_16_bit_src = false; for (unsigned i = 0; i < ir->num_operands; i++) { -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 50/51] glsl: HACK: Lower all temporary float variables to 16-bits
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/lower_mediump.cpp | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp index bae18c9bfb..73b8aa577c 100644 --- a/src/compiler/glsl/lower_mediump.cpp +++ b/src/compiler/glsl/lower_mediump.cpp @@ -184,6 +184,17 @@ lower_mediump_visitor::can_be_lowered(const ir_variable *var) const var->data.how_declared == ir_var_declared_implicitly) return true; + /* Such as builtins, temporary variables don't have have precision +* qualifiers either. Lower them by default. +* +* TODO: Surrounding expressions should really be examined to tell if +* full precision needed. Moreover, these can be referred from +* multiple locations. If any requires full precision, then all +* expressions involved would need to operate on full precision? +*/ + if (var->data.mode == ir_var_temporary) + return true; + return var->data.precision == ast_precision_low || var->data.precision == ast_precision_medium; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 43/51] intel/compiler/fs: WIP: Use 32-bit slots for 16-bit uniforms
--- src/intel/compiler/brw_fs_nir.cpp | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 2060a3139d..631bbf7f92 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4164,7 +4164,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr src.offset = const_offset->u32[0]; for (unsigned j = 0; j < instr->num_components; j++) { -bld.MOV(offset(dest, bld, j), offset(src, bld, j)); +/* Currently 16-bit uniforms occupy 32-bit slot. */ +const unsigned src_offset = + src.type == BRW_REGISTER_TYPE_HF ? 2 * j : j; + +bld.MOV(offset(dest, bld, j), offset(src, bld, src_offset)); } } else { fs_reg indirect = retype(get_nir_src(instr->src[0]), -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/51] intel/compiler: Prepare for glsl mediump float uniforms
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_shader.cpp | 13 + src/mesa/drivers/dri/i965/brw_program.c | 10 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 234b5a11c1..cc9297772b 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -78,6 +78,19 @@ type_size_scalar(const struct glsl_type *type) return 0; } +/* Variant of type_size_scalar() taking into account that GL core and api + * don't deal with 16-bit uniforms but with 32-bit. Only compiler backend can + * work with reduced precision if desired. + */ +extern "C" int +uniform_storage_type_size_scalar(const struct glsl_type *type) +{ + if (type->base_type == GLSL_TYPE_FLOAT16) + return type->components(); + + return type_size_scalar(type); +} + enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type) { diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 755d4973cc..4573d9d303 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -47,12 +47,20 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" +int uniform_storage_type_size_scalar(const struct glsl_type *type); + +static int +uniform_storage_type_size_scalar_bytes(const struct glsl_type *type) +{ + return uniform_storage_type_size_scalar(type) * 4; +} + static bool brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) { if (is_scalar) { nir_assign_var_locations(>uniforms, >num_uniforms, - type_size_scalar_bytes); + uniform_storage_type_size_scalar_bytes); return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0); } else { nir_assign_var_locations(>uniforms, >num_uniforms, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 49/51] glsl: HACK: Lower builtin float outputs to 16-bits by default
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/lower_mediump.cpp | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp index 45cf75b53c..bae18c9bfb 100644 --- a/src/compiler/glsl/lower_mediump.cpp +++ b/src/compiler/glsl/lower_mediump.cpp @@ -173,6 +173,17 @@ lower_mediump_visitor::can_be_lowered(const ir_variable *var) const if (var->data.mode == ir_var_shader_in) return false; + /* Builtin outputs such as gl_FragColor don't have precision qualifier. +* Lower them by default. +* +* TODO: If this gets assigned with full precision value, output would +* need to be in full precision instead of the value being converted +* to 16-bits? +*/ + if (var->data.mode == ir_var_shader_out && + var->data.how_declared == ir_var_declared_implicitly) + return true; + return var->data.precision == ast_precision_low || var->data.precision == ast_precision_medium; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 38/51] intel/compiler/fs: Use original reg size when retyping nir src
In case of boolean typed the values maybe given in 16-bits whereas NIR unconditionally regards them as 32-bit. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index d28ed57eca..2a32b1449a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1604,8 +1604,9 @@ fs_visitor::get_nir_src(const nir_src ) * default to an integer type - instructions that need floating point * semantics will set this to F if they need to */ - reg.type = brw_reg_type_from_bit_size(nir_src_bit_size(src), -BRW_REGISTER_TYPE_D); + reg.type = brw_reg_type_from_bit_size( +brw_reg_type_to_size(reg.type) * 8, +BRW_REGISTER_TYPE_D); } return reg; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 26/51] intel/compiler/fs: Set 16-bit sampler return format
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_generator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 20d018e1fe..610a545cd8 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1051,6 +1051,9 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src brw_inst_set_eot(p->devinfo, brw_last_inst, true); brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); } + + if (dst.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_data_format(p->devinfo, brw_last_inst, 1); } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/51] intel/compiler/fs: Support for dumping 16-bit IMM values
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 5 + 1 file changed, 5 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 694fcc1919..1b972972c1 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -39,6 +39,7 @@ #include "compiler/glsl_types.h" #include "compiler/nir/nir_builder.h" #include "program/prog_parameter.h" +#include "util/half_float.h" using namespace brw; @@ -5532,6 +5533,10 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) break; case IMM: switch (inst->src[i].type) { + case BRW_REGISTER_TYPE_HF: +fprintf(file, "%-gHF", +_mesa_half_to_float((uint16_t)inst->src[i].ud)); +break; case BRW_REGISTER_TYPE_F: fprintf(file, "%-gf", inst->src[i].f); break; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 31/51] intel/compiler/fs: Pad 16-bit output (store/fb write) payloads
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 6d9b272a57..d3125d7dcd 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3254,7 +3254,7 @@ alloc_temporary(const fs_builder , unsigned size, fs_reg *regs, unsigned n, } else { const brw_reg_type type = is_16bit ? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_F; - const fs_reg tmp = bld.vgrf(type, size); + const fs_reg tmp = bld.vgrf(type, size, is_16bit); for (unsigned i = 0; i < n; i++) regs[i] = tmp; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 42/51] i965: WIP: Support for uploading 16-bit uniforms from 32-bit store
At this point 16-bit uniforms still take full 32-bit slots in the pull/push constant buffers and in shader deployment payload. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_compiler.h | 9 + src/intel/compiler/brw_fs.cpp | 12 src/intel/compiler/brw_fs_nir.cpp | 2 ++ src/intel/compiler/brw_fs_visitor.cpp | 1 + src/intel/compiler/brw_vec4.cpp | 8 src/intel/compiler/brw_vec4_gs_visitor.cpp | 8 src/intel/compiler/brw_vec4_visitor.cpp | 4 src/mesa/drivers/dri/i965/brw_cs.c | 2 ++ src/mesa/drivers/dri/i965/brw_curbe.c | 2 ++ src/mesa/drivers/dri/i965/brw_disk_cache.c | 14 ++ src/mesa/drivers/dri/i965/brw_gs.c | 2 ++ src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 10 ++ src/mesa/drivers/dri/i965/brw_program.c | 2 ++ src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_tcs.c | 2 ++ src/mesa/drivers/dri/i965/brw_tes.c | 2 ++ src/mesa/drivers/dri/i965/brw_vs.c | 2 ++ src/mesa/drivers/dri/i965/brw_wm.c | 2 ++ src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 - 19 files changed, 101 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index cdd61aae6c..7b43c4a135 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -613,6 +613,12 @@ struct brw_stage_prog_data { */ uint32_t *param; uint32_t *pull_param; + + /* Tells for GLSL backend if conversion from 32-bit store to, for example, +* 16-bits is required. +*/ + unsigned char *param_type; /* enum glsl_base_type */ + unsigned char *pull_param_type; /* enum glsl_base_type */ }; static inline uint32_t * @@ -621,6 +627,9 @@ brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data, { unsigned old_nr_params = prog_data->nr_params; prog_data->nr_params += nr_new_params; + prog_data->param_type = reralloc(ralloc_parent(prog_data->param_type), +prog_data->param_type, unsigned char, +prog_data->nr_params); prog_data->param = reralloc(ralloc_parent(prog_data->param), prog_data->param, uint32_t, prog_data->nr_params); diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 8e77248470..3ca1d4cbc7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2102,19 +2102,26 @@ fs_visitor::assign_constant_locations() * create two new arrays for push/pull params. */ uint32_t *param = stage_prog_data->param; + unsigned char *param_type = stage_prog_data->param_type; stage_prog_data->nr_params = num_push_constants; if (num_push_constants) { stage_prog_data->param = ralloc_array(mem_ctx, uint32_t, num_push_constants); + stage_prog_data->param_type = ralloc_array(mem_ctx, unsigned char, + num_push_constants); } else { stage_prog_data->param = NULL; + stage_prog_data->param_type = NULL; } assert(stage_prog_data->nr_pull_params == 0); assert(stage_prog_data->pull_param == NULL); + assert(stage_prog_data->pull_param_type == NULL); if (num_pull_constants > 0) { stage_prog_data->nr_pull_params = num_pull_constants; stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t, num_pull_constants); + stage_prog_data->pull_param_type = ralloc_array(NULL, unsigned char, + num_pull_constants); } /* Now that we know how many regular uniforms we'll push, reduce the @@ -2143,11 +2150,16 @@ fs_visitor::assign_constant_locations() uint32_t value = param[i]; if (pull_constant_loc[i] != -1) { stage_prog_data->pull_param[pull_constant_loc[i]] = value; + stage_prog_data->pull_param_type[pull_constant_loc[i]] = +param_type[i]; } else if (push_constant_loc[i] != -1) { stage_prog_data->param[push_constant_loc[i]] = value; + stage_prog_data->param_type[push_constant_loc[i]] = +param_type[i]; } } ralloc_free(param); + ralloc_free(param_type); } bool diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 43127e00e8..2060a3139d 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -120,9 +120,11 @@ fs_visitor::nir_setup_uniforms() * on the list.
[Mesa-dev] [PATCH 47/51] glsl: HACK: Force texture return into 16-bits
and convert coordinates unconditionally to 32-bits. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/lower_mediump.cpp | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/compiler/glsl/lower_mediump.cpp b/src/compiler/glsl/lower_mediump.cpp index 07f1f1ba9d..094ab4e743 100644 --- a/src/compiler/glsl/lower_mediump.cpp +++ b/src/compiler/glsl/lower_mediump.cpp @@ -132,6 +132,7 @@ public: virtual ir_visitor_status visit_leave(ir_assignment *ir); virtual ir_visitor_status visit_leave(ir_expression *ir); + virtual ir_visitor_status visit_leave(ir_texture *ir); virtual ir_visitor_status visit_leave(ir_swizzle *ir); virtual void handle_rvalue(ir_rvalue **rvalue); @@ -238,6 +239,24 @@ lower_mediump_visitor::visit_leave(ir_assignment *ir) } ir_visitor_status +lower_mediump_visitor::visit_leave(ir_texture *ir) +{ + ir_rvalue_visitor::visit_leave(ir); + + /* HACK: Intel compiler backend isn't prepared for 16-bit texture +* arguments. +* TODO: Convert the rest of the operands. +*/ + if (is_16_bit(ir->coordinate)) + ir->coordinate = convert(ir->coordinate, ir_unop_h2f); + + if (ir->type->is_float()) + retype_to_float16(>type); + + return visit_continue; +} + +ir_visitor_status lower_mediump_visitor::visit_leave(ir_swizzle *ir) { ir_rvalue_visitor::visit_leave(ir); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 34/51] intel/compiler/fs: Pad 16-bit const loads into full regs
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 64243312b9..c455fa4e27 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1519,7 +1519,8 @@ fs_visitor::nir_emit_load_const(const fs_builder , { const brw_reg_type reg_type = brw_reg_type_from_bit_size(instr->def.bit_size, BRW_REGISTER_TYPE_D); - fs_reg reg = bld.vgrf(reg_type, instr->def.num_components); + fs_reg reg = bld.vgrf(reg_type, instr->def.num_components, + true /* pad components to full regs */); switch (instr->def.bit_size) { case 16: -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 28/51] intel/compiler/fs: Use component_size() instead of open coded
This prepares for following patch will add 16-bit tex/fb write payload padding support. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 2 +- src/intel/compiler/brw_fs_copy_propagation.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 0d415e2393..cedfde5096 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -639,7 +639,7 @@ bool fs_inst::is_partial_write() const { return ((this->predicate && this->opcode != BRW_OPCODE_SEL) || - (this->exec_size * type_sz(this->dst.type)) < 32 || + dst.component_size(exec_size) < 32 || !this->dst.is_contiguous() || this->dst.offset % REG_SIZE != 0); } diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 470eaeec4f..ed2511ecfa 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -801,8 +801,8 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, for (int i = 0; i < inst->sources; i++) { int effective_width = i < inst->header_size ? 8 : inst->exec_size; assert(effective_width * MAX2(4, type_sz(inst->src[i].type)) % REG_SIZE == 0); -const unsigned size_written = effective_width * - type_sz(inst->src[i].type); +const unsigned size_written = + inst->src[i].component_size(effective_width); if (inst->src[i].file == VGRF) { acp_entry *entry = rzalloc(copy_prop_ctx, acp_entry); entry->dst = byte_offset(inst->dst, offset); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 24/51] intel/compiler: Add support for negating 16-bit floats
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_shader.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index cc9297772b..3a83f55f28 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -653,7 +653,8 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) case BRW_REGISTER_TYPE_V: assert(!"unimplemented: negate UV/V immediate"); case BRW_REGISTER_TYPE_HF: - assert(!"unimplemented: negate HF immediate"); + reg->ud ^= 0x8000; + return true; } return false; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 18/51] intel/compiler: Allow 16-bit math
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_eu_emit.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 1507968e6c..87b144e871 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1921,8 +1921,10 @@ void gen6_math(struct brw_codegen *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE || (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - assert(src1.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_F || + src0.type == BRW_REGISTER_TYPE_HF); + assert(src1.type == BRW_REGISTER_TYPE_F || + src1.type == BRW_REGISTER_TYPE_HF); } /* Source modifiers are ignored for extended math instructions on Gen6. */ -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 40/51] intel/compiler/fs: Prepare 16-bit and/or/xor for 32-bit src
In GLSL->NIR translation logic operations with boolean typed operands are treated as operating with integer operands. The values of the operands therefore can be 0xFFF/0x000 in case they are produced with 32-bit execution type or 0x/0x in case of 16-bit. This patch allows 16-bit logic operations to use 32-bit boolean types as sources. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 21 + 1 file changed, 21 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index aff592c354..43127e00e8 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1127,6 +1127,13 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) break; case nir_op_ixor: if (devinfo->gen >= 8) { + if (brw_reg_type_to_size(result.type) == 2) { +op[0] = subscript(op[0], + brw_reg_type_from_bit_size(16, op[0].type), 0); +op[1] = subscript(op[1], + brw_reg_type_from_bit_size(16, op[1].type), 0); + } + op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } @@ -1134,6 +1141,13 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) break; case nir_op_ior: if (devinfo->gen >= 8) { + if (brw_reg_type_to_size(result.type) == 2) { +op[0] = subscript(op[0], + brw_reg_type_from_bit_size(16, op[0].type), 0); +op[1] = subscript(op[1], + brw_reg_type_from_bit_size(16, op[1].type), 0); + } + op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } @@ -1141,6 +1155,13 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) break; case nir_op_iand: if (devinfo->gen >= 8) { + if (brw_reg_type_to_size(result.type) == 2) { +op[0] = subscript(op[0], + brw_reg_type_from_bit_size(16, op[0].type), 0); +op[1] = subscript(op[1], + brw_reg_type_from_bit_size(16, op[1].type), 0); + } + op[0] = resolve_source_modifiers(op[0]); op[1] = resolve_source_modifiers(op[1]); } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 21/51] intel/compiler/fs: Use 16-bit null dest with 16-bit math
Even though this doesn't seem to alter anything else than dumping it is more consistent. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_generator.cpp | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 03fd34c00a..20d018e1fe 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1918,8 +1918,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) if (devinfo->gen >= 6) { assert(inst->mlen == 0); assert(devinfo->gen >= 7 || inst->exec_size == 8); + +struct brw_reg null_reg = brw_null_reg(); +if (brw_reg_type_to_size(dst.type) == 2) + null_reg = retype(null_reg, BRW_REGISTER_TYPE_HF); + gen6_math(p, dst, brw_math_function(inst->opcode), - src[0], brw_null_reg()); + src[0], null_reg); } else { assert(inst->mlen >= 1); assert(devinfo->gen == 5 || devinfo->is_g4x || inst->exec_size == 8); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 36/51] intel/compiler/fs: Prepare nir_emit_if() for 16-bit sources
Comparison operations using 16-bit sources produce 16-bit results (0x/0x) instead of (0xFFF/0x). Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c455fa4e27..baa84b0f3c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -396,10 +396,15 @@ fs_visitor::nir_emit_cf_list(exec_list *list) void fs_visitor::nir_emit_if(nir_if *if_stmt) { + const fs_reg src = get_nir_src(if_stmt->condition); + fs_inst *inst; + /* first, put the condition into f0 */ - fs_inst *inst = bld.MOV(bld.null_reg_d(), -retype(get_nir_src(if_stmt->condition), - BRW_REGISTER_TYPE_D)); + if (brw_reg_type_to_size(src.type) == 2) + inst = bld.MOV(bld.null_reg_w(), retype(src, BRW_REGISTER_TYPE_W)); + else + inst = bld.MOV(bld.null_reg_d(), retype(src, BRW_REGISTER_TYPE_D)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; bld.IF(BRW_PREDICATE_NORMAL); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/51] intel/compiler/disasm: Print 16-bit IMM values
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_disasm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index c752e15331..da2a5d78dd 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -1286,7 +1286,8 @@ imm(FILE *file, const struct gen_device_info *devinfo, enum brw_reg_type type, format(file, "%-gDF", brw_inst_imm_df(devinfo, inst)); break; case BRW_REGISTER_TYPE_HF: - string(file, "Half Float IMM"); + format(file, "%-gHF", + _mesa_half_to_float((uint16_t) brw_inst_imm_ud(devinfo, inst))); break; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 22/51] intel/compiler/fs: Use 16-bit null dest with 16-bit compare
Otherwise EU-emitter will deduce wrong execution size when examining source types and finding 32-bit wide register. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_nir.cpp | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 65a5bfa49a..16e8dfc186 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -25,6 +25,7 @@ #include "brw_fs.h" #include "brw_fs_surface_builder.h" #include "brw_nir.h" +#include "util/half_float.h" using namespace brw; using namespace brw::surface_access; @@ -1446,7 +1447,10 @@ fs_visitor::nir_emit_alu(const fs_builder , nir_alu_instr *instr) if (optimize_frontfacing_ternary(instr, result)) return; - bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); + if (brw_reg_type_to_size(op[0].type) == 2) + bld.CMP(bld.null_reg_w(), op[0], brw_imm_w(0), BRW_CONDITIONAL_NZ); + else + bld.CMP(bld.null_reg_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ); inst = bld.SEL(result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; @@ -3410,8 +3414,14 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder , */ fs_inst *cmp; if (instr->intrinsic == nir_intrinsic_discard_if) { - cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), - brw_imm_d(0), BRW_CONDITIONAL_Z); + const fs_reg src = get_nir_src(instr->src[0]); + + if (brw_reg_type_to_size(src.type) == 2) +cmp = bld.CMP(bld.null_reg_hf(), get_nir_src(instr->src[0]), + brw_imm_w(0), BRW_CONDITIONAL_Z); + else +cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]), + brw_imm_d(0), BRW_CONDITIONAL_Z); } else { fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/51] glsl: Enable 16-bit texturing in nir-conversion
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/glsl_to_nir.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index c0adf744e0..b16efa6555 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2057,6 +2057,9 @@ nir_visitor::visit(ir_texture *ir) case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_FLOAT16: + instr->dest_type = nir_type_float16; + break; case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 25/51] intel/compiler/fs: Support for combining 16-bit immediates
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_combine_constants.cpp | 84 + 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index e0c95d379b..5772ffb94a 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -36,6 +36,7 @@ #include "brw_fs.h" #include "brw_cfg.h" +#include "util/half_float.h" using namespace brw; @@ -95,6 +96,15 @@ link(void *mem_ctx, fs_reg *reg) return >link; } +union imm_val { + double df; + uint64_t u64; + int64_t d64; + float f; + int d; + unsigned ud; +}; + /** * Information about an immediate value. */ @@ -114,8 +124,10 @@ struct imm { */ exec_list *uses; - /** The immediate value. We currently only handle floats. */ - float val; + enum brw_reg_type type; + + /** The immediate value. We currently handle floats and half floats. */ + union imm_val val; /** * The GRF register and subregister number where we've decided to store the @@ -145,10 +157,10 @@ struct table { }; static struct imm * -find_imm(struct table *table, float val) +find_imm(struct table *table, enum brw_reg_type type, union imm_val val) { for (int i = 0; i < table->len; i++) { - if (table->imm[i].val == val) { + if (table->imm[i].val.u64 == val.u64 && table->imm[i].type == type) { return >imm[i]; } } @@ -190,6 +202,33 @@ compare(const void *_a, const void *_b) return a->first_use_ip - b->first_use_ip; } +static uint16_t +fabs_f16(uint16_t hf) +{ + return _mesa_float_to_half(fabs(_mesa_half_to_float(hf))); +} + +static union imm_val +get_val(const struct gen_device_info *devinfo, fs_inst *inst, unsigned i) +{ + union imm_val res = { 0 }; + + switch (inst->src[i].type) { + case BRW_REGISTER_TYPE_F: + res.f = !inst->can_do_source_mods(devinfo) ? + inst->src[i].f : fabs(inst->src[i].f); + break; + case BRW_REGISTER_TYPE_HF: + res.ud = !inst->can_do_source_mods(devinfo) ? + inst->src[i].ud : fabs_f16(inst->src[i].ud); + break; + default: + unreachable("unsupported immediate type"); + } + + return res; +} + bool fs_visitor::opt_combine_constants() { @@ -215,12 +254,12 @@ fs_visitor::opt_combine_constants() for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file != IMM || - inst->src[i].type != BRW_REGISTER_TYPE_F) + (inst->src[i].type != BRW_REGISTER_TYPE_F && + inst->src[i].type != BRW_REGISTER_TYPE_HF)) continue; - float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f : - fabs(inst->src[i].f); - struct imm *imm = find_imm(, val); + union imm_val val = get_val(devinfo, inst, i); + struct imm *imm = find_imm(, inst->src[i].type, val); if (imm) { bblock_t *intersection = cfg_t::intersect(block, imm->block); @@ -238,6 +277,7 @@ fs_visitor::opt_combine_constants() imm->uses = new(const_ctx) exec_list(); imm->uses->push_tail(link(const_ctx, >src[i])); imm->val = val; +imm->type = inst->src[i].type; imm->uses_by_coissue = could_coissue(devinfo, inst); imm->must_promote = must_promote_imm(devinfo, inst); imm->first_use_ip = ip; @@ -278,7 +318,14 @@ fs_visitor::opt_combine_constants() imm->block->last_non_control_flow_inst()->next); const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0); - ibld.MOV(reg, brw_imm_f(imm->val)); + if (imm->type == BRW_REGISTER_TYPE_F) + ibld.MOV(reg, brw_imm_f(imm->val.f)); + else if (imm->type == BRW_REGISTER_TYPE_HF) { + ibld.MOV(retype(reg, BRW_REGISTER_TYPE_HF), + retype(brw_imm_ud(imm->val.ud), BRW_REGISTER_TYPE_HF)); + } else + unreachable("unsupported immediate type"); + imm->nr = reg.nr; imm->subreg_offset = reg.offset; @@ -298,9 +345,19 @@ fs_visitor::opt_combine_constants() reg->nr = table.imm[i].nr; reg->offset = table.imm[i].subreg_offset; reg->stride = 0; - reg->negate = signbit(reg->f) != signbit(table.imm[i].val); - assert((isnan(reg->f) && isnan(table.imm[i].val)) || -fabsf(reg->f) == fabs(table.imm[i].val)); + reg->negate = signbit(reg->f) != signbit(table.imm[i].val.f); + + switch (table.imm[i].type) { + case BRW_REGISTER_TYPE_F: +
[Mesa-dev] [PATCH 23/51] intel/compiler: Prepare for 16-bit 3-src ops
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_eu_emit.c | 21 + src/intel/compiler/brw_inst.h | 4 src/intel/compiler/brw_reg_type.c | 2 ++ 3 files changed, 27 insertions(+) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 87b144e871..fb8d5b5513 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -810,6 +810,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, assert(dest.file == BRW_GENERAL_REGISTER_FILE || dest.file == BRW_MESSAGE_REGISTER_FILE); assert(dest.type == BRW_REGISTER_TYPE_F || + dest.type == BRW_REGISTER_TYPE_HF || dest.type == BRW_REGISTER_TYPE_DF || dest.type == BRW_REGISTER_TYPE_D || dest.type == BRW_REGISTER_TYPE_UD); @@ -857,6 +858,21 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, */ brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); + + if (dest.type == BRW_REGISTER_TYPE_HF) { +/* From the Bspec: Instruction types + * + * Three source instructions can use operands with mixed-mode + * precision. When SrcType field is set to :f or :hf it defines + * precision for source 0 only, and fields Src1Type and Src2Type + * define precision for other source operands: + * + * 0b = :f. Single precision Float (32-bit). + * 1b = :hf. Half precision Float (16-bit). + */ +brw_inst_set_3src_src1_type(devinfo, inst, 1); +brw_inst_set_3src_src2_type(devinfo, inst, 1); + } } } @@ -902,11 +918,16 @@ brw_inst *brw_##OP(struct brw_codegen *p, \ struct brw_reg src2) \ { \ assert(dest.type == BRW_REGISTER_TYPE_F || \ + dest.type == BRW_REGISTER_TYPE_HF || \ dest.type == BRW_REGISTER_TYPE_DF); \ if (dest.type == BRW_REGISTER_TYPE_F) { \ assert(src0.type == BRW_REGISTER_TYPE_F); \ assert(src1.type == BRW_REGISTER_TYPE_F); \ assert(src2.type == BRW_REGISTER_TYPE_F); \ + } else if (dest.type == BRW_REGISTER_TYPE_HF) { \ + assert(src0.type == BRW_REGISTER_TYPE_HF);\ + assert(src1.type == BRW_REGISTER_TYPE_HF);\ + assert(src2.type == BRW_REGISTER_TYPE_HF);\ } else if (dest.type == BRW_REGISTER_TYPE_DF) { \ assert(src0.type == BRW_REGISTER_TYPE_DF);\ assert(src1.type == BRW_REGISTER_TYPE_DF);\ diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 2501d6adff..c295a2b3ff 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -222,6 +222,10 @@ F8(3src_src1_negate,39, 39, 40, 40) F8(3src_src1_abs, 38, 38, 39, 39) F8(3src_src0_negate,37, 37, 38, 38) F8(3src_src0_abs, 36, 36, 37, 37) + +F(3src_src2_type, 36, 36) +F(3src_src1_type, 35, 35) + F8(3src_a16_flag_reg_nr,34, 34, 33, 33) F8(3src_a16_flag_subreg_nr, 33, 33, 32, 32) FF(3src_a16_dst_reg_file, diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c index b7fff0867f..55956ef563 100644 --- a/src/intel/compiler/brw_reg_type.c +++ b/src/intel/compiler/brw_reg_type.c @@ -93,6 +93,7 @@ enum hw_3src_reg_type { GEN7_3SRC_TYPE_D = 1, GEN7_3SRC_TYPE_UD = 2, GEN7_3SRC_TYPE_DF = 3, + GEN7_3SRC_TYPE_HF = 4, /** When ExecutionDatatype is 1: @{ */ GEN10_ALIGN1_3SRC_REG_TYPE_HF = 0b000, @@ -120,6 +121,7 @@ static const struct hw_3src_type { [BRW_REGISTER_TYPE_D] = { GEN7_3SRC_TYPE_D }, [BRW_REGISTER_TYPE_UD] = { GEN7_3SRC_TYPE_UD }, [BRW_REGISTER_TYPE_DF] = { GEN7_3SRC_TYPE_DF }, + [BRW_REGISTER_TYPE_HF] = { GEN7_3SRC_TYPE_HF }, }, gen10_hw_3src_align1_type[] = { #define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x [0 ... BRW_REGISTER_TYPE_LAST] = { INVALID }, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/51] glsl: Allow 16-bit neg() and dot()
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/ir_validate.cpp | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index a20f52e527..735e862141 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -263,7 +263,8 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT || ir->operands[0]->type->is_float() || ir->operands[0]->type->is_double() || - ir->operands[0]->type->base_type == GLSL_TYPE_INT64); + ir->operands[0]->type->base_type == GLSL_TYPE_INT64 || + ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16); assert(ir->type == ir->operands[0]->type); break; @@ -742,9 +743,11 @@ ir_validate::visit_leave(ir_expression *ir) case ir_binop_dot: assert(ir->type == glsl_type::float_type || - ir->type == glsl_type::double_type); + ir->type == glsl_type::double_type || + ir->type->base_type == GLSL_TYPE_FLOAT16); assert(ir->operands[0]->type->is_float() || - ir->operands[0]->type->is_double()); + ir->operands[0]->type->is_double() || + ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16); assert(ir->operands[0]->type->is_vector()); assert(ir->operands[0]->type == ir->operands[1]->type); break; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 19/51] intel/compiler/fs: Add helpers for 16-bit null regs
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs_builder.h | 12 1 file changed, 12 insertions(+) diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 87394bc17b..633086c64b 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -205,6 +205,12 @@ namespace brw { } dst_reg + null_reg_hf() const + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_HF)); + } + + dst_reg null_reg_df() const { return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF)); @@ -219,6 +225,12 @@ namespace brw { return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } + dst_reg + null_reg_w() const + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_W)); + } + /** * Create a null register of unsigned integer type. */ -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 20/51] intel/compiler/fs: Use two SIMD8 instructions for 16-bit math
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3c70231be8..5751bb0ad7 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4903,6 +4903,15 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: + /* From the SKL PRM Vol 2, math - Extended Math Function: + * + * The execution size must be no more than 8 when half-floats are used + * in source or destination operand. + */ + if (inst->src[0].type == BRW_REGISTER_TYPE_HF || + inst->dst.type == BRW_REGISTER_TYPE_HF) + return MIN2(8, inst->exec_size); + /* Unary extended math instructions are limited to SIMD8 on Gen4 and * Gen6. */ @@ -4911,6 +4920,15 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, MIN2(8, inst->exec_size)); case SHADER_OPCODE_POW: + /* From the SKL PRM Vol 2, math - Extended Math Function: + * + * The execution size must be no more than 8 when half-floats are used + * in source or destination operand. + */ + if (inst->src[0].type == BRW_REGISTER_TYPE_HF || + inst->dst.type == BRW_REGISTER_TYPE_HF) + return MIN2(8, inst->exec_size); + /* SIMD16 is only allowed on Gen7+. */ return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) : MIN2(8, inst->exec_size)); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/51] intel/compiler: Move type_size_scalar() into brw_shader.cpp
Next path will add another variant and in order not to make brw_fs.cpp any bigger it already is, add both in brw_shader.cpp instead. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 48 --- src/intel/compiler/brw_shader.cpp | 48 +++ 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 1b972972c1..3c70231be8 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -470,54 +470,6 @@ fs_reg::component_size(unsigned width) const return MAX2(width * stride, 1) * type_sz(type); } -extern "C" int -type_size_scalar(const struct glsl_type *type) -{ - unsigned int size, i; - - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - return type->components(); - case GLSL_TYPE_UINT16: - case GLSL_TYPE_INT16: - case GLSL_TYPE_FLOAT16: - return DIV_ROUND_UP(type->components(), 2); - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_UINT64: - case GLSL_TYPE_INT64: - return type->components() * 2; - case GLSL_TYPE_ARRAY: - return type_size_scalar(type->fields.array) * type->length; - case GLSL_TYPE_STRUCT: - size = 0; - for (i = 0; i < type->length; i++) { -size += type_size_scalar(type->fields.structure[i].type); - } - return size; - case GLSL_TYPE_SAMPLER: - /* Samplers take up no register space, since they're baked in at - * link time. - */ - return 0; - case GLSL_TYPE_ATOMIC_UINT: - return 0; - case GLSL_TYPE_SUBROUTINE: - return 1; - case GLSL_TYPE_IMAGE: - return BRW_IMAGE_PARAM_SIZE; - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_FUNCTION: - unreachable("not reached"); - } - - return 0; -} - /** * Create a MOV to read the timestamp register. * diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 74b52976d7..234b5a11c1 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -30,6 +30,54 @@ #include "main/uniforms.h" #include "util/macros.h" +extern "C" int +type_size_scalar(const struct glsl_type *type) +{ + unsigned int size, i; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return type->components(); + case GLSL_TYPE_UINT16: + case GLSL_TYPE_INT16: + case GLSL_TYPE_FLOAT16: + return DIV_ROUND_UP(type->components(), 2); + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_UINT64: + case GLSL_TYPE_INT64: + return type->components() * 2; + case GLSL_TYPE_ARRAY: + return type_size_scalar(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size_scalar(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up no register space, since they're baked in at + * link time. + */ + return 0; + case GLSL_TYPE_ATOMIC_UINT: + return 0; + case GLSL_TYPE_SUBROUTINE: + return 1; + case GLSL_TYPE_IMAGE: + return BRW_IMAGE_PARAM_SIZE; + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: + unreachable("not reached"); + } + + return 0; +} + enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type) { -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/51] glsl: Allow 16-bit math
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/ir_validate.cpp | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index 735e862141..d246af866d 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -272,7 +272,8 @@ ir_validate::visit_leave(ir_expression *ir) case ir_unop_rsq: case ir_unop_sqrt: assert(ir->type->is_float() || - ir->type->is_double()); + ir->type->is_double() || + ir->type->base_type == GLSL_TYPE_FLOAT16); assert(ir->type == ir->operands[0]->type); break; @@ -281,7 +282,9 @@ ir_validate::visit_leave(ir_expression *ir) case ir_unop_exp2: case ir_unop_log2: case ir_unop_saturate: - assert(ir->operands[0]->type->is_float()); + assert(ir->operands[0]->type->is_float() || + (ir->operands[0]->type->get_scalar_type()->base_type == + GLSL_TYPE_FLOAT16)); assert(ir->type == ir->operands[0]->type); break; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/51] nir: Add 16-bit float support into algebraic opts
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/nir/nir_search.c | 4 1 file changed, 4 insertions(+) diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index dec56fee74..3b28da4a3f 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -27,6 +27,7 @@ #include #include "nir_search.h" +#include "util/half_float.h" struct match_state { bool inexact_match; @@ -194,6 +195,9 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, for (unsigned i = 0; i < num_components; ++i) { double val; switch (load->def.bit_size) { +case 16: + val = _mesa_half_to_float(load->value.u16[new_swizzle[i]]); + break; case 32: val = load->value.f32[new_swizzle[i]]; break; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/51] glsl: Add conversion ops to/from 16-bit floats
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/glsl_to_nir.cpp| 2 ++ src/compiler/glsl/ir.cpp | 8 src/compiler/glsl/ir_expression_operation.py | 5 + src/compiler/glsl/ir_validate.cpp| 8 src/mesa/program/ir_to_mesa.cpp | 2 ++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 +++ 6 files changed, 28 insertions(+) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 289f8be031..14c358465b 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1561,6 +1561,8 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_d2b: case ir_unop_i2d: case ir_unop_u2d: + case ir_unop_h2f: + case ir_unop_f2h: case ir_unop_i642i: case ir_unop_i642u: case ir_unop_i642f: diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index 2c61dd9d64..a901ec5683 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -281,6 +281,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_i2f: case ir_unop_u2f: case ir_unop_d2f: + case ir_unop_h2f: case ir_unop_bitcast_i2f: case ir_unop_bitcast_u2f: case ir_unop_i642f: @@ -334,6 +335,13 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) this->type = glsl_type::get_instance(GLSL_TYPE_UINT64, op0->type->vector_elements, 1); break; + + case ir_unop_f2h: + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16, + op0->type->vector_elements, 1); + break; + + case ir_unop_noise: this->type = glsl_type::float_type; break; diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py index d8542925a0..3158533c02 100644 --- a/src/compiler/glsl/ir_expression_operation.py +++ b/src/compiler/glsl/ir_expression_operation.py @@ -82,6 +82,7 @@ int_type = type("int", "i", "GLSL_TYPE_INT") uint64_type = type("uint64_t", "u64", "GLSL_TYPE_UINT64") int64_type = type("int64_t", "i64", "GLSL_TYPE_INT64") float_type = type("float", "f", "GLSL_TYPE_FLOAT") +float16_t_type = type("float16_t_type", "f", "GLSL_TYPE_FLOAT16") double_type = type("double", "d", "GLSL_TYPE_DOUBLE") bool_type = type("bool", "b", "GLSL_TYPE_BOOL") @@ -460,6 +461,10 @@ ir_expression_operation = [ operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, c_expression="{src0}"), # Double-to-boolean conversion. operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, c_expression="{src0} != 0.0"), + # hafl-to-float conversion. + operation("h2f", 1, source_types=(float16_t_type,), dest_type=float_type, c_expression="{src0}"), + # hafl-to-float conversion. + operation("f2h", 1, source_types=(float_type,), dest_type=float16_t_type, c_expression="{src0}"), # 'Bit-identical int-to-float "conversion" operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"), # 'Bit-identical float-to-int "conversion" diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index aa07f8aea6..29e3cda865 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -595,6 +595,14 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type->is_double()); assert(ir->type->is_boolean()); break; + case ir_unop_h2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16); + assert(ir->type->is_float()); + break; + case ir_unop_f2h: + assert(ir->operands[0]->type->is_float()); + assert(ir->type->base_type == GLSL_TYPE_FLOAT16); + break; case ir_unop_frexp_sig: assert(ir->operands[0]->type->is_float() || diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index ac12389f70..d57e50366e 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1313,6 +1313,8 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_d2u: case ir_unop_u2d: case ir_unop_d2b: + case ir_unop_h2f: + case ir_unop_f2h: case ir_unop_frexp_sig: case ir_unop_frexp_exp: assert(!"not supported"); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0772b73627..f8cb94c7dc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/s
[Mesa-dev] i965: Kicking off fp16 glsl support
lues there were 16-bit, backend would still need to know types. My feeling is that we just need to rewrite fair amount of the Intel push/pull constant setup. 5) Patches 44-50 are all about the GLSL lowering pass. This is really work-in-progress. What I have here is crude attempt to do everything in one pass. It also has several hacks working around shortcomings in the Intel backend. Short story is that there are quite a few things which don't have precision and compiler needs to analyze expressions recursively in order to know what precision to use. Take, for example, variables that don't have precision but are referred to from multiple locations. These require the compiler to examine all the expressions involved and use full precision for the variable even if one of the expressions require it. This in turn alters the requirements in the other expressions - compiler would need to emit conversions for them. And I don't think this can be done cleanly in one pass. I also realized that there may be cases where the compiler would need to use full precision instead of half in order to submit the most optimal code. Such shaders sound just evil and I don't even want to think about that now. There is more than enough work to get even the rules covered... This series doesn't touch hardware register allocator - it still allocates one full register per 16-bit float component even in case of SIMD8. Patches can be found in (it is rebased on current master and Igalia's work): git://people.freedesktop.org/~tpohjola/mesa:16_bit_gles There are also some simple shader runner tests I wrote along the way: git://people.freedesktop.org/~tpohjola/piglit:fp16 All feedback is very welcome. I'm prepared to keep on working on this if people find it useful. Personally I'd be curious to add fp16 for pln() and lrp() and see if 16-bits could beat 32-bits performance wise. Proper push/pull constant support is another thing on the list. Hardware register allocator with sub-register support sounds both interesting and scary. CC: Jose Maria Casanova Crespo <jmcasan...@igalia.com> CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Kenneth Graunke <kenn...@whitecape.org> CC: Matt Turner <matts...@gmail.com> CC: Ian Romanick <i...@freedesktop.org> CC: Francisco Jerez <curroje...@riseup.net> Topi Pohjolainen (51): nir: Prepare constant folding for 16-bits nir: Prepare constant lowering for 16-bits constants nir: Add 16-bit float support into algebraic opts glsl: Print 16-bit constants nir: Print 16-bit constants glsl: Add support for 16-bit float constants in nir-conversion glsl: Add conversion ops to/from 16-bit floats glsl: Add more conversion ops to/from 16-bit floats glsl: Allow 16-bit neg() and dot() glsl: Allow 16-bit math glsl: Enable 16-bit texturing in nir-conversion intel/compiler/disasm: Print 16-bit IMM values intel/compiler/disasm: Print fp16 also for sampler messages intel/compiler/fs: Support for dumping 16-bit IMM values intel/compiler: Add support for loading 16-bit constants intel/compiler: Move type_size_scalar() into brw_shader.cpp intel/compiler: Prepare for glsl mediump float uniforms intel/compiler: Allow 16-bit math intel/compiler/fs: Add helpers for 16-bit null regs intel/compiler/fs: Use two SIMD8 instructions for 16-bit math intel/compiler/fs: Use 16-bit null dest with 16-bit math intel/compiler/fs: Use 16-bit null dest with 16-bit compare intel/compiler: Prepare for 16-bit 3-src ops intel/compiler: Add support for negating 16-bit floats intel/compiler/fs: Support for combining 16-bit immediates intel/compiler/fs: Set 16-bit sampler return format intel/compiler/fs: Set tex type for generator to flag fp16 intel/compiler/fs: Use component_size() instead of open coded intel/compiler/fs: Add register padding support intel/compiler/fs: Pad 16-bit texture return payloads intel/compiler/fs: Pad 16-bit output (store/fb write) payloads intel/compiler/fs: Pad 16-bit nir vec* components into full reg intel/compiler/fs: Pad 16-bit nir intrinsic dest into full reg intel/compiler/fs: Pad 16-bit const loads into full regs intel/compiler/fs: Pad 16-bit payload lowering intel/compiler/fs: Prepare nir_emit_if() for 16-bit sources intel/compiler/fs: Consider original sizes when retyping alu ops intel/compiler/fs: Use original reg size when retyping nir src intel/compiler/fs: Consider logic ops on 16-bit booleans intel/compiler/fs: Prepare 16-bit and/or/xor for 32-bit src intel/compiler/eu: Take stride into account in 16-bit ops i965: WIP: Support for uploading 16-bit uniforms from 32-bit store intel/compiler/fs: WIP: Use 32-bit slots for 16-bit uniforms glsl: WIP: Add lowering pass for treating mediump as float16 glsl: Use 16-bit constants if operation is otherwise 16-bit glsl: Lower float conversions to mediump glsl: HACK: Force texture re
[Mesa-dev] [PATCH 05/51] nir: Print 16-bit constants
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/nir/nir_print.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index fcc8025346..9ed23a74bb 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -27,6 +27,7 @@ #include "nir.h" #include "compiler/shader_enums.h" +#include "util/half_float.h" #include #include #include /* for PRIx64 macro */ @@ -842,6 +843,10 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state) if (instr->def.bit_size == 64) fprintf(fp, "0x%16" PRIx64 " /* %f */", instr->value.u64[i], instr->value.f64[i]); + else if (instr->def.bit_size == 16) + fprintf(fp, "0x%04x /* %f */", + instr->value.u16[i], + _mesa_half_to_float(instr->value.u16[i])); else fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]); } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/51] glsl: Add support for 16-bit float constants in nir-conversion
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/glsl_to_nir.cpp | 9 + 1 file changed, 9 insertions(+) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 1e636225c1..289f8be031 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -32,6 +32,7 @@ #include "compiler/nir/nir_control_flow.h" #include "compiler/nir/nir_builder.h" #include "main/imports.h" +#include "util/half_float.h" /* * pass to lower GLSL IR to NIR @@ -245,6 +246,14 @@ constant_copy(ir_constant *ir, void *mem_ctx) break; + case GLSL_TYPE_FLOAT16: + for (unsigned c = 0; c < cols; c++) { + for (unsigned r = 0; r < rows; r++) +ret->values[c].u16[r] = + _mesa_float_to_half(ir->value.f[c * rows + r]); + } + break; + case GLSL_TYPE_FLOAT: for (unsigned c = 0; c < cols; c++) { for (unsigned r = 0; r < rows; r++) -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/51] glsl: Add more conversion ops to/from 16-bit floats
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/glsl/glsl_to_nir.cpp| 6 ++ src/compiler/glsl/ir_expression_operation.py | 16 ++-- src/compiler/glsl/ir_validate.cpp| 24 src/mesa/program/ir_to_mesa.cpp | 6 ++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 ++ 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 14c358465b..c0adf744e0 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1563,6 +1563,12 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_u2d: case ir_unop_h2f: case ir_unop_f2h: + case ir_unop_h2u: + case ir_unop_u2h: + case ir_unop_h2i: + case ir_unop_i2h: + case ir_unop_h2b: + case ir_unop_b2h: case ir_unop_i642i: case ir_unop_i642u: case ir_unop_i642f: diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py index 3158533c02..0316d1a82d 100644 --- a/src/compiler/glsl/ir_expression_operation.py +++ b/src/compiler/glsl/ir_expression_operation.py @@ -461,10 +461,22 @@ ir_expression_operation = [ operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, c_expression="{src0}"), # Double-to-boolean conversion. operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, c_expression="{src0} != 0.0"), - # hafl-to-float conversion. + # half-to-float conversion. operation("h2f", 1, source_types=(float16_t_type,), dest_type=float_type, c_expression="{src0}"), - # hafl-to-float conversion. + # float-to-half conversion. operation("f2h", 1, source_types=(float_type,), dest_type=float16_t_type, c_expression="{src0}"), + # half-to-unsigned conversion. + operation("h2u", 1, source_types=(float16_t_type,), dest_type=uint_type, c_expression="{src0}"), + # unsigned-to-half conversion. + operation("u2h", 1, source_types=(uint_type,), dest_type=float16_t_type, c_expression="{src0}"), + # half-to-integer conversion. + operation("h2i", 1, source_types=(float16_t_type,), dest_type=int_type, c_expression="{src0}"), + # integer-to-half conversion. + operation("i2h", 1, source_types=(int_type,), dest_type=float16_t_type, c_expression="{src0}"), + # half-to-boolean conversion. + operation("h2b", 1, source_types=(float16_t_type,), dest_type=bool_type, c_expression="{src0} != 0.0"), + # boolean-to-half conversion. + operation("b2h", 1, source_types=(bool_type,), dest_type=float16_t_type, c_expression="{src0} ? 1.0F : 0.0F"), # 'Bit-identical int-to-float "conversion" operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"), # 'Bit-identical float-to-int "conversion" diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index 29e3cda865..a20f52e527 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -603,6 +603,30 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type->is_float()); assert(ir->type->base_type == GLSL_TYPE_FLOAT16); break; + case ir_unop_h2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2h: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT16); + break; + case ir_unop_h2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_i2h: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT16); + break; + case ir_unop_h2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT16); + assert(ir->type->is_boolean()); + break; + case ir_unop_b2h: + assert(ir->operands[0]->type->is_boolean()); + assert(ir->type->base_type == GLSL_TYPE_FLOAT16); + break; case ir_unop_frexp_sig: assert(ir->operands[0]->type->is_float() || diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index d57e50366e..286b9e07bf 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1315,6 +1315,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_d2b: case ir_unop_h2f: case ir_unop_f2h: + case ir_unop_h2u: + c
[Mesa-dev] [PATCH 01/51] nir: Prepare constant folding for 16-bits
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/nir/nir_opt_constant_folding.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index d6be807b3d..b63660ea4d 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -78,6 +78,8 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) j++) { if (load_const->def.bit_size == 64) src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]]; + else if (load_const->def.bit_size == 16) +src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]]; else src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]]; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/51] glsl: Print 16-bit constants
--- src/compiler/glsl/ir_print_visitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/glsl/ir_print_visitor.cpp b/src/compiler/glsl/ir_print_visitor.cpp index ea14cdeb6c..ab9a35d73f 100644 --- a/src/compiler/glsl/ir_print_visitor.cpp +++ b/src/compiler/glsl/ir_print_visitor.cpp @@ -482,6 +482,7 @@ void ir_print_visitor::visit(ir_constant *ir) case GLSL_TYPE_UINT: fprintf(f, "%u", ir->value.u[i]); break; case GLSL_TYPE_INT: fprintf(f, "%d", ir->value.i[i]); break; case GLSL_TYPE_FLOAT: +case GLSL_TYPE_FLOAT16: if (ir->value.f[i] == 0.0f) /* 0.0 == -0.0, so print with %f to get the proper sign. */ fprintf(f, "%f", ir->value.f[i]); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/51] nir: Prepare constant lowering for 16-bits constants
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/compiler/nir/nir_lower_load_const_to_scalar.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c index e494facfd2..76eb1d3a12 100644 --- a/src/compiler/nir/nir_lower_load_const_to_scalar.c +++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c @@ -52,9 +52,13 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower) nir_load_const_instr_create(b.shader, 1, lower->def.bit_size); if (lower->def.bit_size == 64) load_comp->value.f64[0] = lower->value.f64[i]; + else if (lower->def.bit_size == 16) + load_comp->value.u16[0] = lower->value.u16[i]; else load_comp->value.u32[0] = lower->value.u32[i]; - assert(lower->def.bit_size == 64 || lower->def.bit_size == 32); + assert(lower->def.bit_size == 64 || + lower->def.bit_size == 32 || + lower->def.bit_size == 16); nir_builder_instr_insert(, _comp->instr); loads[i] = _comp->def; } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler/gen9: Pixel shader header only workaround
Fixes intermittent GPU hangs on Broxton with an Intel internal test case. There are plenty of similar fragment shaders in piglit that do not use any varyings and any uniforms. According to the documentation special timing is needed between pipeline stages. Apparently we just don't hit that with piglit. Even with the failing test case one doesn't always get the hang. Moreover, according to the error states the hang happens significantly later than the execution of the problematic shader. There are multiple render cycles (primitive submissions) in between. I've also seen error states where the ACTHD points outside the batch. Almost as if the hardware writes somewhere that gets used later on. That would also explain why piglit doesn't suffer from this - most tests kick off one render cycle and any corruption is left unseen. v2 (Ken): Instead of enabling push constants, enable one of the inputs (PSIZ). v3 (Ken, Jason): Use LAYER instead making vulkan emit_3dstate_sbe() happy. CC: Kenneth Graunke <kenn...@whitecape.org> CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Eero Tamminen <eero.t.tammi...@intel.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_fs.cpp | 29 + 1 file changed, 29 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 30e8841242..2c6dc1e5a0 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6164,6 +6164,31 @@ fs_visitor::run_gs() return !failed; } +/* From the SKL PRM, Volume 16, Workarounds: + * + * 0877 3D Pixel Shader Hang possible when pixel shader dispatched with + * only header phases (R0-R2) + * + * WA: Enable a non-header phase (e.g. push constant) when dispatch would + * have been header only. + * + * Instead of enabling push constants one can alternatively enable one of the + * inputs. Here one simply chooses point size which shouldn't impose much + * overhead. + */ +static void +gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data) +{ + if (wm_prog_data->num_varying_inputs) + return; + + if (wm_prog_data->base.curb_read_length) + return; + + wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0; + wm_prog_data->num_varying_inputs = 1; +} + bool fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) { @@ -6227,6 +6252,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) optimize(); assign_curb_setup(); + + if (devinfo->gen >= 9) + gen9_ps_header_only_workaround(wm_prog_data); + assign_urb_setup(); fixup_3src_null_dest(); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler/gen9: Pixel shader header only workaround
Fixes intermittent GPU hangs on Broxton with an Intel internal test case. There are plenty of similar fragment shaders in piglit that do not use any varyings and any uniforms. According to the documentation special timing is needed between pipeline stages. Apparently we just don't hit that with piglit. Even with the failing test case one doesn't always get the hang. Moreover, according to the error states the hang happens significantly later than the execution of the problematic shader. There are multiple render cycles (primitive submissions) in between. I've also seen error states where the ACTHD points outside the batch. Almost as if the hardware writes somewhere that gets used later on. That would also explain why piglit doesn't suffer from this - most tests kick off one render cycle and any corruption is left unseen. For clarity I chose to make the decision in the compiler only and mark it with a boolean. In principle, constant loaders could make the same decision by examing num_varying_inputs along with push constant details. Alternatively tweaking nr_params in compiler would allow GL driver to be kept as is if one did, for example: static const gl_constant_value zero = { 0 }; wm_prog_data->base.param[0] = wm_prog_data->base.nr_params = 1; This, however, doesn't work for Vulkan which would still need some logic to be added in anv_cmd_buffer_push_constants(). In the end I thought future debugging is probably easier when the explicit boolean tells about this corner case. CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Eero Tamminen <eero.t.tammi...@intel.com> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_compiler.h | 7 src/intel/compiler/brw_fs.cpp | 46 + src/intel/vulkan/anv_cmd_buffer.c | 22 ++-- src/intel/vulkan/genX_pipeline.c| 6 +++- src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 +++-- src/mesa/drivers/dri/i965/genX_state_upload.c | 3 +- 6 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 6753a8daf0..8a1c8c85ac 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -622,6 +622,13 @@ struct brw_wm_prog_data { bool contains_noperspective_varying; /** +* Tell constant uplaoders, gen6_upload_push_constants() and +* anv_cmd_buffer_push_constants(), that workaround is needed. +* See gen9_ps_header_only_workaround(). +*/ + bool needs_gen9_ps_header_only_workaround; + + /** * Mask of which interpolation modes are required by the fragment shader. * Used in hardware setup on gen6+. */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index eb9b4c3890..5f4271fb59 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6159,6 +6159,48 @@ fs_visitor::run_gs() return !failed; } +/* From the SKL PRM, Volume 16, Workarounds: + * + * 0877 3D Pixel Shader Hang possible when pixel shader dispatched with + * only header phases (R0-R2) + * + * WA: Enable a non-header phase (e.g. push constant) when dispatch would + * have been header only. + * + * Additionally from the SKL PRM, Volume 2a, Command Reference, + * 3DSTATE_PS and Push Constant Enable: + * + * This field must be enabled if the sum of the PS Constant Buffer [3:0] + * Read Length fields in 3DSTATE_CONSTANT_PS is nonzero, and must be + * disabled if the sum is zero. + * + * Therefore one needs to prepare register space for minimum amount of + * constants to be uploaded. + * + * Here it is assumed that assign_curb_setup() has determined the total amount + * of constants (uniforms + ubos) and therefore it is safe to examine if the + * workaround is needed. + */ +static void +gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data, + int *first_non_payload_grf) +{ + if (wm_prog_data->num_varying_inputs) + return; + + if (wm_prog_data->base.curb_read_length) + return; + + assert(wm_prog_data->base.nr_params == 0); + + wm_prog_data->needs_gen9_ps_header_only_workaround = true; + + const unsigned wa_upload_size = DIV_ROUND_UP(1, 8); + + wm_prog_data->base.curb_read_length = wa_upload_size; + *first_non_payload_grf += wa_upload_size; +} + bool fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) { @@ -6222,6 +6264,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) optimize(); assign_curb_setup(); + + if (devinfo->gen >= 9) + gen9_ps_header_only_workaround(wm_prog_data, _non_payload_grf); + assign_urb_setup(); fixup_3src_null_dest(); diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 3b59af8f6f..07d45bd5d4 100644 ---
[Mesa-dev] [PATCH] i965/screen: Check that given format is valid
CID: 1418110 Fixes: 939b53d3325 "i965/screen: Implement queryDmaBufFormatModifierAttirbs" CC: Jason Ekstrand <ja...@jlekstrand.net> Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/intel_screen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index bc2bba00b6..f85d1ba51d 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -830,6 +830,8 @@ intel_query_format_modifier_attribs(__DRIscreen *dri_screen, { struct intel_screen *screen = dri_screen->driverPrivate; const struct intel_image_format *f = intel_image_format_lookup(fourcc); + if (f == NULL) + return false; if (!modifier_is_supported(>devinfo, f, 0, modifier)) return false; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] i965: Two possible bug fixes
While debugging one internal workload I've been trying various things. Here are two of those. I'm not aware of them actually fixing anything but... CC: Mark Janes <mark.a.ja...@intel.com> Topi Pohjolainen (3): i965/gen8: Remove unused gen8_emit_3dstate_multisample() intel/blorp/hiz: Always set sample number i965: Disable stencil cache optimization combining two 4x2 blocks src/intel/blorp/blorp_genX_exec.h | 11 +++ src/mesa/drivers/dri/i965/brw_context.h| 1 - src/mesa/drivers/dri/i965/brw_defines.h| 5 - src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + src/mesa/drivers/dri/i965/gen8_multisample_state.c | 16 5 files changed, 16 insertions(+), 18 deletions(-) -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] i965/gen8: Remove unused gen8_emit_3dstate_multisample()
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_context.h| 1 - src/mesa/drivers/dri/i965/gen8_multisample_state.c | 16 2 files changed, 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 92fc16de13..bd56ffc819 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1510,7 +1510,6 @@ void gen6_set_sample_maps(struct gl_context *ctx); /* gen8_multisample_state.c */ -void gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samp); void gen8_emit_3dstate_sample_pattern(struct brw_context *brw); /* gen7_urb.c */ diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/drivers/dri/i965/gen8_multisample_state.c index 7a31a5df4a..3afa586275 100644 --- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c @@ -28,22 +28,6 @@ #include "brw_multisample_state.h" /** - * 3DSTATE_MULTISAMPLE - */ -void -gen8_emit_3dstate_multisample(struct brw_context *brw, unsigned num_samples) -{ - assert(num_samples <= 16); - - unsigned log2_samples = ffs(MAX2(num_samples, 1)) - 1; - - BEGIN_BATCH(2); - OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE << 16 | (2 - 2)); - OUT_BATCH(MS_PIXEL_LOCATION_CENTER | log2_samples << 1); - ADVANCE_BATCH(); -} - -/** * 3DSTATE_SAMPLE_PATTERN */ void -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] i965: Disable stencil cache optimization combining two 4x2 blocks
From the BDW PRM, Volume 15, Workarounds: KMD Wa4x4STCOptimizationDisable HIZ/STC hang in hawx frames. W/A: Disable 4x4 RCPFE STC optimization and therefore only send one valid 4x4 to STC on 4x4 interface. This will require setting bit 6 of reg. 0x7004. Must be done at boot and all save/restore paths. From the SKL PRM, Volume 16, Workarounds: 0556 KMD Wa4x4STCOptimizationDisable HIZ/STC hang in hawx frames. W/A: Disable 4 x4 RCPFE STC optimization and therefore only send one valid 4x4 to STC on 4x4 interface. This will require setting bit 6 of reg. 0x7004. Must be done at boot and all save/restore paths. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_defines.h | 5 - src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 4abb790612..248512e01a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1611,11 +1611,14 @@ enum brw_pixel_shader_coverage_mask_mode { #define GEN7_CACHE_MODE_1 0x7004 # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) +# define GEN8_4X4_RCPFE_STC_OPTIMIZATION_DISABLE (1 << 6) # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11) # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13) # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1) # define GEN8_HIZ_PMA_MASK_BITS \ - REG_MASK(GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) + REG_MASK(GEN8_4X4_RCPFE_STC_OPTIMIZATION_DISABLE | \ +GEN8_HIZ_NP_PMA_FIX_ENABLE | \ +GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) #define GEN7_GT_MODE0x7008 # define GEN9_SUBSLICE_HASHING_8x8 (0 << 8) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7b31aad170..4149a3d5d4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -71,6 +71,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw) OUT_BATCH(GEN7_CACHE_MODE_1); OUT_BATCH(REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) | REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) | +REG_MASK(GEN8_4X4_RCPFE_STC_OPTIMIZATION_DISABLE) | GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE | GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC); ADVANCE_BATCH(); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] intel/blorp/hiz: Always set sample number
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/blorp/blorp_genX_exec.h | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 5f9a8ab4a5..5389262098 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1454,6 +1454,17 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, if (params->stencil.enabled) assert(params->hiz_op == BLORP_HIZ_OP_DEPTH_CLEAR); + /* From the BDW PRM Volume 2, 3DSTATE_WM_HZ_OP: +* +* 3DSTATE_MULTISAMPLE packet must be used prior to this packet to change +* the Number of Multisamples. This packet must not be used to change +* Number of Multisamples in a rendering sequence. +* +* Since HIZ may be the first thing in a batch buffer, play safe and always +* emit 3DSTATE_MULTISAMPLE. +*/ + blorp_emit_3dstate_multisample(batch, params); + /* If we can't alter the depth stencil config and multiple layers are * involved, the HiZ op will fail. This is because the op requires that a * new config is emitted for each additional layer. -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/compiler: Cast reg types explicitly
Makes coverity happier. CC: Matt Turner <matts...@gmail.com> CID: 1416799 Fixes: c1ac1a3d25 (i965: Add a brw_hw_type_to_reg_type() function) Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/compiler/brw_reg_type.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c index a0f674f0d7..98c4cf7234 100644 --- a/src/intel/compiler/brw_reg_type.c +++ b/src/intel/compiler/brw_reg_type.c @@ -111,13 +111,13 @@ brw_hw_type_to_reg_type(const struct gen_device_info *devinfo, { if (file == BRW_IMMEDIATE_VALUE) { for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) { - if (gen4_hw_type[i].imm_type == hw_type) { + if (gen4_hw_type[i].imm_type == (enum hw_imm_type)hw_type) { return i; } } } else { for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) { - if (gen4_hw_type[i].reg_type == hw_type) { + if (gen4_hw_type[i].reg_type == (enum hw_reg_type)hw_type) { return i; } } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [v2] intel/blorp: Adjust intra-tile x when faking rgb with red-only
v2 (Jason): Adjust directly in surf_fake_rgb_with_red() Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101910 CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Mark Janes <mark.a.ja...@intel.com> CC: mesa-sta...@lists.freedesktop.org Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/blorp/blorp_blit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index ed00516373..35008cbbb0 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -1549,6 +1549,7 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev, info->surf.logical_level0_px.width *= 3; info->surf.phys_level0_sa.width *= 3; + info->tile_x_sa *= 3; *x *= 3; *width *= 3; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] intel/blorp: Adjust intra-tile x when faking rgb with red-only
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101910 CC: Jason Ekstrand <ja...@jlekstrand.net> CC: Mark Janes <mark.a.ja...@intel.com> CC: mesa-sta...@lists.freedesktop.org Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/blorp/blorp_blit.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index ed00516373..db93d0f585 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -1839,6 +1839,12 @@ try_blorp_blit(struct blorp_batch *batch, surf_get_intratile_offset_px(>dst, >wm_inputs.dst_offset.x, >wm_inputs.dst_offset.y); + + if (wm_prog_key->dst_rgb) { + /* See surf_fake_rgb_with_red() */ + params->wm_inputs.dst_offset.x *= 3; + } + params->x0 += params->wm_inputs.dst_offset.x; params->y0 += params->wm_inputs.dst_offset.y; params->x1 += params->wm_inputs.dst_offset.x; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] i965/miptree: Use isl_image_offset
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_blorp.c| 2 +- src/mesa/drivers/dri/i965/brw_context.c | 1 - src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 +++ src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 49 +++- src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 19 + src/mesa/drivers/dri/i965/intel_pixel_draw.c | 3 +- src/mesa/drivers/dri/i965/intel_pixel_read.c | 2 +- src/mesa/drivers/dri/i965/intel_tex.c| 3 +- src/mesa/drivers/dri/i965/intel_tex_image.c | 2 +- 9 files changed, 36 insertions(+), 58 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index b2987ca4fa..ebe4a051f4 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -149,7 +149,7 @@ blorp_surf_for_miptree(struct brw_context *brw, surf->surf = >surf; surf->addr = (struct blorp_address) { .buffer = mt->bo, - .offset = mt->offset, + .offset = mt->offset.tile_aligned_byte_offset, .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER : I915_GEM_DOMAIN_SAMPLER, .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d0b22d4342..ddd50a16fc 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -1513,7 +1513,6 @@ intel_process_dri2_buffer(struct brw_context *brw, intel_miptree_create_for_bo(brw, bo, intel_rb_format(rb), - 0, drawable->w, drawable->h, 1, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 2da0984c0f..86e903888c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -135,12 +135,7 @@ brw_emit_surface_state(struct brw_context *brw, uint32_t mocs, uint32_t *surf_offset, int surf_index, unsigned read_domains, unsigned write_domains) { - struct isl_image_offset offset = { - .tile_aligned_byte_offset = mt->offset, - .intra_tile_x = mt->level[0].level_x, - .intra_tile_y = mt->level[0].level_y - }; - + struct isl_image_offset offset = mt->offset; struct isl_surf surf; get_isl_surf(brw, mt, target, , , ); @@ -1648,8 +1643,10 @@ update_image_surface(struct brw_context *brw, if (format == ISL_FORMAT_RAW) { brw_emit_buffer_surface_state( - brw, surf_offset, mt->bo, mt->offset, - format, mt->bo->size - mt->offset, 1 /* pitch */, + brw, surf_offset, mt->bo, mt->offset.tile_aligned_byte_offset, + format, + mt->bo->size - mt->offset.tile_aligned_byte_offset, + 1 /* pitch */, access != GL_READ_ONLY); } else { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index cab888f04d..d0546851b4 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -757,8 +757,6 @@ intel_miptree_create(struct brw_context *brw, if (!mt) return NULL; - mt->offset = 0; - if (!intel_miptree_alloc_aux(brw, mt)) { intel_miptree_release(); return NULL; @@ -771,7 +769,6 @@ struct intel_mipmap_tree * intel_miptree_create_for_bo(struct brw_context *brw, struct brw_bo *bo, mesa_format format, -uint32_t offset, uint32_t width, uint32_t height, uint32_t depth, @@ -817,12 +814,6 @@ intel_miptree_create_for_bo(struct brw_context *brw, brw_bo_get_tiling(bo, , ); - /* Nothing will be able to use this miptree with the BO if the offset isn't -* aligned. -*/ - if (tiling != I915_TILING_NONE) - assert(offset % 4096 == 0); - /* miptrees can't handle negative pitch. If you need flipping of images, * that's outside of the scope of the mt. */ @@ -845,7 +836,6 @@ intel_miptree_create_for_bo(struct brw_context *brw, brw_bo_reference(bo); mt->bo = bo; - mt->offset = offset; if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)) intel_miptree_choose_aux_usage(brw, mt); @@ -874,13 +864,13 @@ miptree_create_for_planar_image(struct brw_context *brw, */ struct
[Mesa-dev] [PATCH 1/6] i965/miptree: Take import tile offset along with intra-tile x, y
Imported miptrees represent single images in buffer objects that themselves may contain multiple images (full mipmaps or arrays). In such case there may be an offset which consists of pointer to a tile and x,y coordinates giving the start position within that tile. Until now callers got only the intra tile x,y offsets but applied the tile aligned byte offsets directly themselves. This patch drops applying the byte offset separately and returns it from intel_miptree_get_tile_offsets() along with the intra tile offsets. Note that intel_renderbuffer_get_tile_offsets() calls intel_miptree_get_tile_offsets(). Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 ++--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 17 + 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index a0ca6ddf98..abf1d29678 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -99,11 +99,11 @@ get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, */ assert(brw->has_surface_tile_offset); assert(view->levels == 1 && view->array_len == 1); - assert(*tile_x == 0 && *tile_y == 0); + assert(*tile_x == 0 && *tile_y == 0 && *offset == 0); - *offset += intel_miptree_get_tile_offsets(mt, view->base_level, - view->base_array_layer, - tile_x, tile_y); + *offset = intel_miptree_get_tile_offsets(mt, view->base_level, +view->base_array_layer, +tile_x, tile_y); /* Minify the logical dimensions of the texture. */ const unsigned l = view->base_level - mt->first_level; @@ -976,9 +976,8 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, format << BRW_SURFACE_FORMAT_SHIFT); /* reloc */ - assert(mt->offset % mt->cpp == 0); - surf[1] = (intel_renderbuffer_get_tile_offsets(irb, _x, _y) + - mt->bo->offset64 + mt->offset); + surf[1] = intel_renderbuffer_get_tile_offsets(irb, _x, _y) + +mt->bo->offset64; surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index ed7cb8e215..1b42edd285 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1343,6 +1343,23 @@ intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt, uint32_t *tile_x, uint32_t *tile_y) { + /* First consider the special case where caller wants the very first slice. +* In such case there is only possible import offset to consider. This +* consists of tile aligned byte offset and intra tile x,y coordinates. +*/ + if (level == 0 && slice == 0) { + *tile_x = mt->level[0].level_x; + *tile_y = mt->level[0].level_y; + return mt->offset; + } + + /* Only single slices can be imported - mipmapped and arrayed always +* start from the beginning of the underlying buffer object. +*/ + assert(mt->offset == 0); + assert(mt->level[0].level_x == 0); + assert(mt->level[0].level_y == 0); + uint32_t x, y; uint32_t mask_x, mask_y; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] i965/miptree: Use isl_image_offset in get_tile_offsets()
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 54 ++-- src/mesa/drivers/dri/i965/intel_fbo.h| 14 +++--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 28 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 8 ++-- src/mesa/drivers/dri/i965/intel_screen.c | 9 ++-- 5 files changed, 61 insertions(+), 52 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index abf1d29678..2da0984c0f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -77,8 +77,7 @@ uint32_t rb_mocs[] = { static void get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, GLenum target, struct isl_view *view, - uint32_t *tile_x, uint32_t *tile_y, - uint32_t *offset, struct isl_surf *surf) + struct isl_image_offset *surf_offset, struct isl_surf *surf) { *surf = mt->surf; @@ -99,11 +98,12 @@ get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, */ assert(brw->has_surface_tile_offset); assert(view->levels == 1 && view->array_len == 1); - assert(*tile_x == 0 && *tile_y == 0 && *offset == 0); + assert(surf_offset->intra_tile_x == 0 && + surf_offset->intra_tile_y == 0 && + surf_offset->tile_aligned_byte_offset == 0); - *offset = intel_miptree_get_tile_offsets(mt, view->base_level, -view->base_array_layer, -tile_x, tile_y); + intel_miptree_get_tile_offsets(mt, view->base_level, + view->base_array_layer, surf_offset); /* Minify the logical dimensions of the texture. */ const unsigned l = view->base_level - mt->first_level; @@ -135,13 +135,15 @@ brw_emit_surface_state(struct brw_context *brw, uint32_t mocs, uint32_t *surf_offset, int surf_index, unsigned read_domains, unsigned write_domains) { - uint32_t tile_x = mt->level[0].level_x; - uint32_t tile_y = mt->level[0].level_y; - uint32_t offset = mt->offset; + struct isl_image_offset offset = { + .tile_aligned_byte_offset = mt->offset, + .intra_tile_x = mt->level[0].level_x, + .intra_tile_y = mt->level[0].level_y + }; struct isl_surf surf; - get_isl_surf(brw, mt, target, , _x, _y, , ); + get_isl_surf(brw, mt, target, , , ); union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } }; @@ -180,14 +182,17 @@ brw_emit_surface_state(struct brw_context *brw, surf_offset); isl_surf_fill_state(>isl_dev, state, .surf = >surf, .view = , - .address = mt->bo->offset64 + offset, + .address = mt->bo->offset64 + + offset.tile_aligned_byte_offset, .aux_surf = aux_surf, .aux_usage = aux_usage, .aux_address = aux_offset, .mocs = mocs, .clear_color = clear_color, - .x_offset_sa = tile_x, .y_offset_sa = tile_y); + .x_offset_sa = offset.intra_tile_x, + .y_offset_sa = offset.intra_tile_y); brw_emit_reloc(>batch, *surf_offset + brw->isl_dev.ss.addr_offset, - mt->bo, offset, read_domains, write_domains); + mt->bo, offset.tile_aligned_byte_offset, + read_domains, write_domains); if (aux_surf) { /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the @@ -938,7 +943,7 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; uint32_t *surf; - uint32_t tile_x, tile_y; + struct isl_image_offset image_offset; enum isl_format format; uint32_t offset; /* _NEW_BUFFERS */ @@ -949,9 +954,9 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, assert(!(flags & INTEL_AUX_BUFFER_DISABLED)); if (rb->TexImage && !brw->has_surface_tile_offset) { - intel_renderbuffer_get_tile_offsets(irb, _x, _y); + intel_renderbuffer_get_tile_offsets(irb, _offset); - if (tile_x != 0 || tile_y != 0) { + if (image_offset.intra_tile_x != 0 || image_offset.intra_tile_y != 0) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your renderbuffer * as a miptree and used the fragile lod/array_index/etc. controls to @@ -975,9 +980,10 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, surf[
[Mesa-dev] [PATCH 5/6] i965/blit: Let _intratile_offset_el() resolve image offset
Image offset is really a triple: aligned byte offset, intra-tile x and intra-tile y. Taking intra-tile offsets into account in the caller side of emit_miptree_blit() and then applying tile-aligned byte offset in emit_miptree_blit() is confusing. Now both are handled in single location: get_blit_intratile_offset_el(). Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/intel_blit.c | 106 ++--- 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index e7338bdf46..d308cfb416 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -42,6 +42,7 @@ static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, + unsigned level, unsigned layer, int x, int y, int width, int height); static GLuint translate_raster_op(GLenum logicop) @@ -164,15 +165,68 @@ intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst) static void get_blit_intratile_offset_el(const struct brw_context *brw, - struct intel_mipmap_tree *mt, + const struct intel_mipmap_tree *mt, + unsigned level, unsigned layer, uint32_t total_x_offset_el, uint32_t total_y_offset_el, struct isl_image_offset *image_offset) { + struct isl_image_offset base_offset; + intel_miptree_get_tile_offsets(mt, level, layer, _offset); + + /* Given offsets are relative to the start of the slice and we need the +* offset that is relative to the beginning of the buffer. +* +* Image offset is really a triple: aligned byte offset, intra-tile +* x and intra-tile y. In order to simply add the page aligned offset of +* the start of the image and the page aligned offset of the given position +* (total_x_offset_el, total_y_offset_el) within the image, we need to +* first augment the position within the image with the intra-tile start +* position (x,y) of the image itself. Otherwise we might get intra-tile +* offsets that don't actually fit into one page: +* +* +-+ page N +*/ \ | | +* d_1 | | | +*\ / | | +* + + start of the image +* | | +* | | +* +-+ page N + 1 +* | | +* . . +* . . +* +-+ page N + M +*/ \ | | +* d_2 | | | +*\ / | | +* + + (total_x_offset_el, +* | | total_y_offset_el) +* | | +* +-+ page N + M + 1 +* +* Consider a case where d_1 + d_2 > sizeof(page). If one calculates the +* triple separately for the start of the image and for the position +* within the image, one gets page aligned of N + M and d_1 + d_2. If in +* turn one takes d_1 into account as offsetting the position _within_ the +* image, one gets page aligned of N + M + 1 and d_1 + d_2 - sizeof(page) +* where 0 <= d_1 + d_2 - sizeof(page) < sizeof(page). +*/ + total_x_offset_el += base_offset.intra_tile_x; + total_y_offset_el += base_offset.intra_tile_y; + isl_tiling_get_intratile_offset_el(mt->surf.tiling, mt->cpp * 8, mt->surf.row_pitch, total_x_offset_el, total_y_offset_el, image_offset); + + /* Finally add the byte offset of the page aligned start of the image to +* the page aligned offset with the image giving page aligned offset +* relative to the beginning of the buffer. +*/ + image_offset->tile_aligned_byte_offset += + base_offset.tile_aligned_byte_offset; + if (mt->surf.tiling == ISL_TILING_LINEAR) { /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress: * @@ -197,8 +251,10 @@ get_blit_intratile_offset_el(const struct brw_context *brw, static
[Mesa-dev] [PATCH 4/6] i965/miptree: Use isl instead of local offset calculator
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_misc_state.c| 20 +++--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 38 +++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 4 --- 3 files changed, 19 insertions(+), 43 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1e3be784c5..983fc0c736 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -189,10 +189,22 @@ rebase_depth_stencil(struct brw_context *brw, struct intel_renderbuffer *irb, brw->depthstencil.tile_x = tile_x; brw->depthstencil.tile_y = tile_y; - brw->depthstencil.depth_offset = intel_miptree_get_aligned_offset( - irb->mt, - irb->draw_x & ~tile_mask_x, - irb->draw_y & ~tile_mask_y); + + struct isl_image_offset image_offset; + isl_tiling_get_intratile_offset_el(irb->mt->surf.tiling, + irb->mt->cpp * 8, + irb->mt->surf.row_pitch, + irb->draw_x & ~tile_mask_x, + irb->draw_y & ~tile_mask_y, + _offset); + + brw->depthstencil.depth_offset = image_offset.tile_aligned_byte_offset; + + /* Given x and y were already masked to provide aligned offset. Therefore +* there should be no intra tile offset. +*/ + assert(image_offset.intra_tile_x == 0); + assert(image_offset.intra_tile_y == 0); return false; } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 37024c011d..cab888f04d 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1300,34 +1300,6 @@ intel_get_tile_masks(enum isl_tiling tiling, uint32_t cpp, } /** - * Compute the offset (in bytes) from the start of the BO to the given x - * and y coordinate. For tiled BOs, caller must ensure that x and y are - * multiples of the tile size. - */ -uint32_t -intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt, - uint32_t x, uint32_t y) -{ - int cpp = mt->cpp; - uint32_t pitch = mt->surf.row_pitch; - - switch (mt->surf.tiling) { - default: - unreachable("not reached"); - case ISL_TILING_LINEAR: - return y * pitch + x * cpp; - case ISL_TILING_X: - assert((x % (512 / cpp)) == 0); - assert((y % 8) == 0); - return y * pitch + x / (512 / cpp) * 4096; - case ISL_TILING_Y0: - assert((x % (128 / cpp)) == 0); - assert((y % 32) == 0); - return y * pitch + x / (128 / cpp) * 4096; - } -} - -/** * Rendering with tiled buffers requires that the base address of the buffer * be aligned to a page boundary. For renderbuffers, and sometimes with * textures, we may want the surface to point at a texture image level that @@ -1361,15 +1333,11 @@ intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt, assert(mt->level[0].level_y == 0); uint32_t x, y; - uint32_t mask_x, mask_y; - - intel_get_tile_masks(mt->surf.tiling, mt->cpp, _x, _y); intel_miptree_get_image_offset(mt, level, slice, , ); - image_offset->intra_tile_x = x & mask_x; - image_offset->intra_tile_y = y & mask_y; - image_offset->tile_aligned_byte_offset = - intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y); + isl_tiling_get_intratile_offset_el(mt->surf.tiling, + mt->cpp * 8, mt->surf.row_pitch, + x, y, image_offset); } static void diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index b8d36b35e0..d9d2ce9ee2 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -468,10 +468,6 @@ intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt, unsigned level, unsigned slice, struct isl_image_offset *image_offset); -uint32_t -intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt, - uint32_t x, uint32_t y); - void intel_miptree_copy_slice(struct brw_context *brw, struct intel_mipmap_tree *src_mt, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] intel/isl: Introduce tiled image offset
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/blorp/blorp_blit.c | 19 -- src/intel/isl/isl.c| 44 +++ src/intel/isl/isl.h| 29 +++-- src/mesa/drivers/dri/i965/intel_blit.c | 47 +++--- 4 files changed, 73 insertions(+), 66 deletions(-) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index ed00516373..db675dc1e4 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -1405,12 +1405,14 @@ blorp_surf_convert_to_single_slice(const struct isl_device *isl_dev, else layer = info->view.base_array_layer; - uint32_t byte_offset; + struct isl_image_offset image_offset; isl_surf_get_image_surf(isl_dev, >surf, info->view.base_level, layer, z, >surf, - _offset, >tile_x_sa, >tile_y_sa); - info->addr.offset += byte_offset; + _offset); + info->addr.offset += image_offset.tile_aligned_byte_offset; + info->tile_x_sa = image_offset.intra_tile_x; + info->tile_y_sa = image_offset.intra_tile_y; uint32_t tile_x_px, tile_y_px; surf_get_intratile_offset_px(info, _x_px, _y_px); @@ -1905,7 +1907,8 @@ shrink_surface_params(const struct isl_device *dev, struct brw_blorp_surface_info *info, double *x0, double *x1, double *y0, double *y1) { - uint32_t byte_offset, x_offset_sa, y_offset_sa, size; + uint32_t x_offset_sa, y_offset_sa, size; + struct isl_image_offset image_offset; struct isl_extent2d px_size_sa; int adjust; @@ -1922,10 +1925,10 @@ shrink_surface_params(const struct isl_device *dev, isl_tiling_get_intratile_offset_sa(info->surf.tiling, info->surf.format, info->surf.row_pitch, x_offset_sa, y_offset_sa, - _offset, - >tile_x_sa, >tile_y_sa); - - info->addr.offset += byte_offset; + _offset); + info->addr.offset += image_offset.tile_aligned_byte_offset; + info->tile_x_sa = image_offset.intra_tile_x; + info->tile_y_sa = image_offset.intra_tile_y; adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0; *x0 += adjust; diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 5e3d279b0b..8431d18639 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -2295,25 +2295,25 @@ isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, _x_offset_el, _y_offset_el); - uint32_t x_offset_el, y_offset_el; + struct isl_image_offset image_offset; isl_tiling_get_intratile_offset_el(surf->tiling, fmtl->bpb, surf->row_pitch, total_x_offset_el, total_y_offset_el, - offset_B, - _offset_el, - _offset_el); + _offset); + + *offset_B = image_offset.tile_aligned_byte_offset; if (x_offset_sa) { - *x_offset_sa = x_offset_el * fmtl->bw; + *x_offset_sa = image_offset.intra_tile_x * fmtl->bw; } else { - assert(x_offset_el == 0); + assert(image_offset.intra_tile_x == 0); } if (y_offset_sa) { - *y_offset_sa = y_offset_el * fmtl->bh; + *y_offset_sa = image_offset.intra_tile_y * fmtl->bh; } else { - assert(y_offset_el == 0); + assert(image_offset.intra_tile_y == 0); } } @@ -2324,17 +2324,15 @@ isl_surf_get_image_surf(const struct isl_device *dev, uint32_t logical_array_layer, uint32_t logical_z_offset_px, struct isl_surf *image_surf, -uint32_t *offset_B, -uint32_t *x_offset_sa, -uint32_t *y_offset_sa) +struct isl_image_offset *offset) { isl_surf_get_image_offset_B_tile_sa(surf, level, logical_array_layer, logical_z_offset_px, - offset_B, - x_offset_sa, - y_offset_sa); + >tile_aligned_byte_offset, + >intra_tile_x, + >intra_tile_y); /* Even for cube maps there will be
[Mesa-dev] i965/miptree: Rework import offsets
Offsets to tiled images consist of two parts: tile offset and intra-tile x,y coordinates giving the start position within the tile. Until now these have been split into different parts of miptree: intel_mipmap_tree::offset giving the tile offset and intel_mipmap_tree::level[0].level_x/y giving the intra tile coordinates. Moreover, logic in various places had been split in similar fashion. This patch set brings the two parts closer each other. CC: Jason Ekstrand <ja...@jlekstrand.net> Topi Pohjolainen (6): i965/miptree: Take import tile offset along with intra-tile x,y intel/isl: Introduce tiled image offset i965/miptree: Use isl_image_offset in get_tile_offsets() i965/miptree: Use isl instead of local offset calculator i965/blit: Let _intratile_offset_el() resolve image offset i965/miptree: Use isl_image_offset src/intel/blorp/blorp_blit.c | 19 +-- src/intel/isl/isl.c | 44 --- src/intel/isl/isl.h | 29 +++-- src/mesa/drivers/dri/i965/brw_blorp.c| 2 +- src/mesa/drivers/dri/i965/brw_context.c | 1 - src/mesa/drivers/dri/i965/brw_misc_state.c | 20 +++- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 58 - src/mesa/drivers/dri/i965/intel_blit.c | 145 +++ src/mesa/drivers/dri/i965/intel_fbo.h| 14 +-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 106 +++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 29 + src/mesa/drivers/dri/i965/intel_pixel_draw.c | 3 +- src/mesa/drivers/dri/i965/intel_pixel_read.c | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 9 +- src/mesa/drivers/dri/i965/intel_tex.c| 3 +- src/mesa/drivers/dri/i965/intel_tex_image.c | 2 +- 16 files changed, 254 insertions(+), 232 deletions(-) -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/17] squash: i965/gen4: Force x-tiling for color surfaces
This is what brw_miptree_choose_tiling() currently does even though blorp is available. Before enabling y-tiled one needs to fix, for example, batch wrapping caused by mipmap offsets not being tile aligned anymore and intel_renderbuffer_move_to_temp() kicking in where it didn't before as x-tiled met the alignment constraints. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 7b2f98cc1b..af5d37bc47 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -953,8 +953,13 @@ miptree_create(struct brw_context *brw, if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) alloc_flags |= BO_ALLOC_FOR_RENDER; - const isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ? + isl_tiling_flags_t tiling_flags = force_linear_tiling(layout_flags) ? ISL_TILING_LINEAR_BIT : ISL_TILING_ANY_MASK; + + /* TODO: This used to be because there wasn't BLORP to handle Y-tiling. */ + if (brw->gen < 6) + tiling_flags &= ~ISL_TILING_Y0_BIT; + struct intel_mipmap_tree *mt = make_surface( brw, target, format, first_level, last_level, -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/17] i965/miptree: Check tex image allocation failures
allowing graceful failure instead of crash on assert later on. This can be hit, for example, on SNB when trying to allocate 8kx8k CUBE_MAP against isl: x-tiled buffer size becomes 2421161984 exceeding the maximum of 1 << 31 == 2147483648. Another way to hit this on SNB is with multisampling of over 64-bit formats. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/intel_tex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 82e25fc5ea..7ce2ceb9a2 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -95,6 +95,8 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj, intel_image, 1 /* samples */); + if (!intel_image->mt) + return false; /* Even if the object currently has a mipmap tree associated * with it, this one is a more likely candidate to represent the -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/17] main/teximage: Even on failure use valid format for init()
Otherwise init_teximage_fields_ms() (called by _mesa_init_teximage_fields()) will always assert as it can't find valid base format. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/main/teximage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 5e13025ed1..2132aaee76 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -5772,7 +5772,7 @@ texture_image_multisample(struct gl_context *ctx, GLuint dims, * like, but being tidy is good. */ _mesa_init_teximage_fields(ctx, texImage, - 0, 0, 0, 0, GL_NONE, MESA_FORMAT_NONE); + 0, 0, 0, 0, internalformat, texFormat); } } -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/17] intel/isl/gen7: Allow msaa with 128-bit formats
These formats are already allowed by the i965 GL driver, and the feature seems to work just fine. There are tests for multisampled rendering in piglit: tests/spec/ext_framebuffer_multisample which can be patched to try GL_RGBA16F/32F/16I/16UI/32I/32UI in addition to GL_RGBA/8I. IvyBridge passed all tests with all sample numbers and even with 128-bit formats. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/intel/isl/isl_format.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c index a9f9c6be73..435b0d003a 100644 --- a/src/intel/isl/isl_format.c +++ b/src/intel/isl/isl_format.c @@ -554,16 +554,19 @@ isl_format_supports_multisampling(const struct gen_device_info *devinfo, * - any compressed texture format (BC*) * - any YCRCB* format * -* The restriction on the format's size is removed on Broadwell. Also, -* there is an exception for HiZ which we treat as a compressed format and -* is allowed to be multisampled on Broadwell and earlier. +* The restriction on the format's size is removed on Broadwell. Moreover, +* empirically it looks that even IvyBridge can handle multisampled surfaces +* with format sizes all the way to 128-bits (RGBA32F, RGBA32I, RGBA32UI). +* +* Also, there is an exception for HiZ which we treat as a compressed +* format and is allowed to be multisampled on Broadwell and earlier. */ if (format == ISL_FORMAT_HIZ) { /* On SKL+, HiZ is always single-sampled even when the primary surface * is multisampled. See also isl_surf_get_hiz_surf(). */ return devinfo->gen <= 8; - } else if (devinfo->gen < 8 && isl_format_get_layout(format)->bpb > 64) { + } else if (devinfo->gen < 7 && isl_format_get_layout(format)->bpb > 64) { return false; } else if (isl_format_is_compressed(format)) { return false; -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/17] i965/miptree: Drop miptree_array_layout in get_isl_dim_layout()
This was only needed for checking gen6 stencil which is already using isl. One could delete GEN6_HIZ_STENCIL layout altogether but that will be gone with the rest after a while anyway. The dim_layout converter is needed even after transition to isl when setting up surface states - see brw_emit_surface_state(). Hence dropping the unneeded argument separately. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 5 +++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 11 --- src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 3 +-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 45ac106f3f..e9a50b89eb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -88,9 +88,10 @@ get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, surf->dim = get_isl_surf_dim(target); } + assert(mt->array_layout != GEN6_HIZ_STENCIL); + const enum isl_dim_layout dim_layout = - get_isl_dim_layout(>screen->devinfo, mt->surf.tiling, target, - mt->array_layout); + get_isl_dim_layout(>screen->devinfo, mt->surf.tiling, target); if (surf->dim_layout == dim_layout) return; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 73637b0fc5..3a2395b030 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -3819,12 +3819,8 @@ get_isl_surf_dim(GLenum target) enum isl_dim_layout get_isl_dim_layout(const struct gen_device_info *devinfo, - enum isl_tiling tiling, GLenum target, - enum miptree_array_layout array_layout) + enum isl_tiling tiling, GLenum target) { - if (array_layout == GEN6_HIZ_STENCIL) - return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ; - switch (target) { case GL_TEXTURE_1D: case GL_TEXTURE_1D_ARRAY: @@ -3865,10 +3861,11 @@ intel_miptree_get_isl_surf(struct brw_context *brw, const struct intel_mipmap_tree *mt, struct isl_surf *surf) { + assert(mt->array_layout != GEN6_HIZ_STENCIL); + surf->dim = get_isl_surf_dim(mt->target); surf->dim_layout = get_isl_dim_layout(>screen->devinfo, - mt->surf.tiling, mt->target, - mt->array_layout); + mt->surf.tiling, mt->target); surf->msaa_layout = mt->surf.msaa_layout; surf->tiling = intel_miptree_get_isl_tiling(mt); surf->row_pitch = mt->surf.row_pitch; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index e7872ff96c..7de7f86eee 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -668,8 +668,7 @@ get_isl_surf_dim(GLenum target); enum isl_dim_layout get_isl_dim_layout(const struct gen_device_info *devinfo, - enum isl_tiling tiling, - GLenum target, enum miptree_array_layout array_layout); + enum isl_tiling tiling, GLenum target); enum isl_tiling intel_miptree_get_isl_tiling(const struct intel_mipmap_tree *mt); -- 2.11.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 17/17] i965/miptree: Clean-up unused
Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/Makefile.sources | 1 - src/mesa/drivers/dri/i965/brw_blorp.c| 8 +- src/mesa/drivers/dri/i965/brw_tex_layout.c | 735 --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 23 +- src/mesa/drivers/dri/i965/gen6_depth_state.c | 8 +- src/mesa/drivers/dri/i965/gen7_misc_state.c | 8 +- src/mesa/drivers/dri/i965/gen8_depth_state.c | 8 +- src/mesa/drivers/dri/i965/intel_blit.c | 33 +- src/mesa/drivers/dri/i965/intel_fbo.c| 44 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c| 583 ++ src/mesa/drivers/dri/i965/intel_mipmap_tree.h| 241 src/mesa/drivers/dri/i965/intel_screen.c | 13 +- src/mesa/drivers/dri/i965/intel_tex_image.c | 29 +- src/mesa/drivers/dri/i965/intel_tex_subimage.c | 8 +- 14 files changed, 96 insertions(+), 1646 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_tex_layout.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 431712f76e..425c883de8 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -51,7 +51,6 @@ i965_FILES = \ brw_tcs_surface_state.c \ brw_tes.c \ brw_tes_surface_state.c \ - brw_tex_layout.c \ brw_urb.c \ brw_util.c \ brw_util.h \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 474dfc61c1..e50173d442 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -147,13 +147,7 @@ blorp_surf_for_miptree(struct brw_context *brw, intel_miptree_check_level_layer(mt, *level, start_layer + i); } - if (mt->surf.size > 0) { - surf->surf = >surf; - } else { - intel_miptree_get_isl_surf(brw, mt, _surfs[0]); - surf->surf = _surfs[0]; - } - + surf->surf = >surf; surf->addr = (struct blorp_address) { .buffer = mt->bo, .offset = mt->offset, diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c deleted file mode 100644 index f3b5a17c88..00 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ /dev/null @@ -1,735 +0,0 @@ -/* - * Copyright 2006 VMware, Inc. - * Copyright © 2006 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file brw_tex_layout.cpp - * - * Code to lay out images in a mipmap tree. - * - * \author Keith Whitwell <kei...@vmware.com> - * \author Michel Dänzer <daen...@vmware.com> - */ - -#include "intel_mipmap_tree.h" -#include "brw_context.h" -#include "main/macros.h" -#include "main/glformats.h" - -#define FILE_DEBUG_FLAG DEBUG_MIPTREE - -static unsigned int -intel_horizontal_texture_alignment_unit(struct brw_context *brw, -struct intel_mipmap_tree *mt, -uint32_t layout_flags) -{ - if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) - return 16; - - /** -* +--+ -* || alignment unit width ("i") | -* | Surface Property |-| -* || 915 | 965 | ILK | SNB | IVB | -* +--+ -* | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 | -* | BC1-5 compressed format (DXTn/S3TC)| 4 | 4 | 4 | 4 | 4 | -* | FXT1 compressed format| 8