Re: [Mesa-dev] [PATCH 1/5] mesa/bufferobj: make _mesa_delete_buffer_object externally accessible

2016-01-06 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Wed, Jan 6, 2016 at 3:53 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> gl_buffer_object has grown more complicated and requires cleanup. Using this
> function from drivers will be more future-proof.
> ---
>  src/mesa/main/bufferobj.c | 2 +-
>  src/mesa/main/bufferobj.h | 4 
>  2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
> index 8a9f9b6..4a098ac 100644
> --- a/src/mesa/main/bufferobj.c
> +++ b/src/mesa/main/bufferobj.c
> @@ -447,7 +447,7 @@ _mesa_new_buffer_object(struct gl_context *ctx, GLuint 
> name)
>   *
>   * Default callback for the \c dd_function_table::DeleteBuffer() hook.
>   */
> -static void
> +void
>  _mesa_delete_buffer_object(struct gl_context *ctx,
> struct gl_buffer_object *bufObj)
>  {
> diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h
> index 3eac96d..a5bfe88 100644
> --- a/src/mesa/main/bufferobj.h
> +++ b/src/mesa/main/bufferobj.h
> @@ -109,6 +109,10 @@ _mesa_initialize_buffer_object(struct gl_context *ctx,
> GLuint name);
>
>  extern void
> +_mesa_delete_buffer_object(struct gl_context *ctx,
> +   struct gl_buffer_object *bufObj);
> +
> +extern void
>  _mesa_reference_buffer_object_(struct gl_context *ctx,
> struct gl_buffer_object **ptr,
> struct gl_buffer_object *bufObj);
> --
> 2.5.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations

2016-01-06 Thread Timothy Arceri
Previously each member was being counted as using a single slot,
count_attribute_slots() fixes the count for array and struct members.

Also don't assign a negitive to the unsigned expl_location variable.
---

 Fixes these new piglit tests:
   http://patchwork.freedesktop.org/patch/69531/

 src/glsl/ast_to_hir.cpp | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 0197cdc..50d5e22 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -6408,12 +6408,13 @@ ast_process_struct_or_iface_block_members(exec_list 
*instructions,
 if (process_qualifier_constant(state, , "location",
qual->location, _location)) {
fields[i].location = VARYING_SLOT_VAR0 + qual_location;
-   expl_location = fields[i].location + 1;
+   expl_location = fields[i].location +
+  fields[i].type->count_attribute_slots(false);
 }
  } else {
 if (layout && layout->flags.q.explicit_location) {
fields[i].location = expl_location;
-   expl_location = expl_location + 1;
+   expl_location += fields[i].type->count_attribute_slots(false);
 } else {
fields[i].location = -1;
 }
@@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list *instructions,
 
state->struct_specifier_depth++;
 
-   unsigned expl_location = -1;
+   unsigned expl_location = 0;
if (layout && layout->flags.q.explicit_location) {
   if (!process_qualifier_constant(state, , "location",
   layout->location, _location)) {
@@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list *instructions,
   return NULL;
}
 
-   unsigned expl_location = -1;
+   unsigned expl_location = 0;
if (layout.flags.q.explicit_location) {
   if (!process_qualifier_constant(state, , "location",
   layout.location, _location)) {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93577

--- Comment #4 from Médéric Boquien  ---
Thanks for the investigation. Is there a way I can find the list of extensions
they are using? That way I can keep an eye with upcoming Mesa versions and try
again when everything is in place.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] tgsi/scan: set which color components are read by a fragment shader

2016-01-06 Thread Nicolai Hähnle

On 05.01.2016 20:46, Marek Olšák wrote:

From: Marek Olšák 

This will be used by radeonsi.
---
  src/gallium/auxiliary/tgsi/tgsi_scan.c | 30 ++
  src/gallium/auxiliary/tgsi/tgsi_scan.h |  1 +
  2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index e3a6fb0..6ea32ee 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -187,14 +187,28 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}

if (procType == TGSI_PROCESSOR_FRAGMENT &&
- !src->Register.Indirect &&
- info->input_semantic_name[src->Register.Index] ==
- TGSI_SEMANTIC_POSITION &&
-  (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
-   src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
-   src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
-   src->Register.SwizzleW == TGSI_SWIZZLE_Z)) {
- info->reads_z = TRUE;
+  !src->Register.Indirect) {
+ unsigned name =
+info->input_semantic_name[src->Register.Index];
+ unsigned index =
+info->input_semantic_index[src->Register.Index];


Move index down into the TGSI_SEMANTIC_COLOR branch? Either way,

Reviewed-by: Nicolai Hähnle 


+
+ if (name == TGSI_SEMANTIC_POSITION &&
+ (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
+  src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
+  src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
+  src->Register.SwizzleW == TGSI_SWIZZLE_Z))
+info->reads_z = TRUE;
+
+ if (name == TGSI_SEMANTIC_COLOR) {
+unsigned mask =
+  (1 << src->Register.SwizzleX) |
+  (1 << src->Register.SwizzleY) |
+  (1 << src->Register.SwizzleZ) |
+  (1 << src->Register.SwizzleW);
+
+info->colors_read |= mask << (index * 4);
+ }
}
 }

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h 
b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index a3e4378..b0b423a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -77,6 +77,7 @@ struct tgsi_shader_info

 uint opcode_count[TGSI_OPCODE_LAST];  /**< opcode histogram */

+   ubyte colors_read; /**< which color components are read by the FS */
 ubyte colors_written;
 boolean reads_position; /**< does fragment shader read position? */
 boolean reads_z; /**< does fragment shader read depth? */


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] tgsi/scan: fix tgsi_shader_info::reads_z

2016-01-06 Thread Nicolai Hähnle

Patches 1 & 2 are

Reviewed-by: Nicolai Hähnle 

On 05.01.2016 20:46, Marek Olšák wrote:

From: Marek Olšák 

This has no users in Mesa.
---
  src/gallium/auxiliary/tgsi/tgsi_scan.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index e3feed9..e3a6fb0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -187,8 +187,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
}

if (procType == TGSI_PROCESSOR_FRAGMENT &&
-  info->reads_position &&
-  src->Register.Index == 0 &&
+ !src->Register.Indirect &&
+ info->input_semantic_name[src->Register.Index] ==
+ TGSI_SEMANTIC_POSITION &&
(src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
 src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
 src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93577

Jose Fonseca  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |NOTABUG

--- Comment #3 from Jose Fonseca  ---
(In reply to Médéric Boquien from comment #0)
> Note that the producers of Total War: Attila explicitly state that they only
> support the NVidia binary driver at the exclusion of everything else, but
> they do not state what's missing/buggy in the Mesa drivers.

It seems they require at least OpenGL 4.3 core profile support.

But given they make no attempt to check for errors it seems a lost cause. I
wouldn't be surprised if they used proprietary NVIDIA extensions without
checking.

I don't think there's a bug here: if they specifically state they only support
NVIDIA blob there's nothing we can do but short of emulation NVIDIA blob
(extension etc.)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/23] radeonsi: export "undef" values for undefined PS outputs

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index e08a076..73a34ac 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1323,11 +1323,12 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,

LLVMInt32TypeInContext(base->gallivm->context),
pack_args, 2,
LLVMReadNoneAttribute | 
LLVMNoUnwindAttribute);
-   args[chan + 7] = args[chan + 5] =
+   args[chan + 5] =
LLVMBuildBitCast(base->gallivm->builder,
 packed,
 
LLVMFloatTypeInContext(base->gallivm->context),
 "");
+   args[chan + 7] = base->undef;
}
} else
memcpy([5], values, sizeof(values[0]) * 4);
@@ -2119,10 +2120,10 @@ static void si_export_mrt_z(struct 
lp_build_tgsi_context *bld_base,
args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
 
args[4] = uint->zero; /* COMP flag */
-   args[5] = base->zero; /* R, depth */
-   args[6] = base->zero; /* G, stencil test value[0:7], stencil op 
value[8:15] */
-   args[7] = base->zero; /* B, sample mask */
-   args[8] = base->zero; /* A, alpha to mask */
+   args[5] = base->undef; /* R, depth */
+   args[6] = base->undef; /* G, stencil test value[0:7], stencil op 
value[8:15] */
+   args[7] = base->undef; /* B, sample mask */
+   args[8] = base->undef; /* A, alpha to mask */
 
if (depth) {
args[5] = depth;
@@ -2173,10 +2174,10 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
args[2] = uint->one; /* DONE bit */
args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_MRT);
args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
-   args[5] = uint->zero; /* R */
-   args[6] = uint->zero; /* G */
-   args[7] = uint->zero; /* B */
-   args[8] = uint->zero; /* A */
+   args[5] = uint->undef; /* R */
+   args[6] = uint->undef; /* G */
+   args[7] = uint->undef; /* B */
+   args[8] = uint->undef; /* A */
 
lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
   
LLVMVoidTypeInContext(base->gallivm->context),
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/23] radeonsi: add struct si_shader_config

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

There will be 1 config per variant, which will be a union of configs
from {prolog, main, epilog}. For now, just add the structure.
---
 src/gallium/drivers/radeonsi/si_compute.c   | 24 ++--
 src/gallium/drivers/radeonsi/si_shader.c| 31 +++
 src/gallium/drivers/radeonsi/si_shader.h| 23 +++-
 src/gallium/drivers/radeonsi/si_state_draw.c|  4 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 50 -
 5 files changed, 68 insertions(+), 64 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 1c4d6b3..8edf4ad 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -68,7 +68,7 @@ static void init_scratch_buffer(struct si_context *sctx, 
struct si_compute *prog
unsigned scratch_bytes_needed;
 
si_shader_binary_read_config(>shader, offset);
-   scratch_bytes_needed = program->shader.scratch_bytes_per_wave;
+   scratch_bytes_needed = 
program->shader.config.scratch_bytes_per_wave;
scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
}
 
@@ -86,7 +86,7 @@ static void init_scratch_buffer(struct si_context *sctx, 
struct si_compute *prog
 * to the maximum bytes needed, so it can compute the stride
 * correctly.
 */
-   program->shader.scratch_bytes_per_wave = scratch_bytes;
+   program->shader.config.scratch_bytes_per_wave = scratch_bytes;
 
/* Patch the shader with the scratch buffer address. */
si_shader_apply_scratch_relocs(sctx,
@@ -281,12 +281,12 @@ static void si_launch_grid(
 
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program->input_size);
 
-   if (shader->scratch_bytes_per_wave > 0) {
+   if (shader->config.scratch_bytes_per_wave > 0) {
 
COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u 
bytes; "
"Total Scratch: %u bytes\n", num_waves_for_scratch,
-   shader->scratch_bytes_per_wave,
-   shader->scratch_bytes_per_wave *
+   shader->config.scratch_bytes_per_wave,
+   shader->config.scratch_bytes_per_wave *
num_waves_for_scratch);
 
radeon_add_to_buffer_list(>b, >b.gfx,
@@ -313,7 +313,7 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, 
scratch_buffer_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 12,
S_008F04_BASE_ADDRESS_HI(scratch_buffer_va >> 32)
-   |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64));
+   |  S_008F04_STRIDE(shader->config.scratch_bytes_per_wave / 64));
 
si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
@@ -361,9 +361,9 @@ static void si_launch_grid(
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
 
-   si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1);
+   si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->config.rsrc1);
 
-   lds_blocks = shader->lds_size;
+   lds_blocks = shader->config.lds_size;
/* XXX: We are over allocating LDS.  For SI, the shader reports LDS in
 * blocks of 256 bytes, so if there are 4 bytes lds allocated in
 * the shader and 4 bytes allocated by the state tracker, then
@@ -377,10 +377,10 @@ static void si_launch_grid(
 
assert(lds_blocks <= 0xFF);
 
-   shader->rsrc2 &= C_00B84C_LDS_SIZE;
-   shader->rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
+   shader->config.rsrc2 &= C_00B84C_LDS_SIZE;
+   shader->config.rsrc2 |=  S_00B84C_LDS_SIZE(lds_blocks);
 
-   si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->rsrc2);
+   si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->config.rsrc2);
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
 
si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
@@ -402,7 +402,7 @@ static void si_launch_grid(
 * COMPUTE_PGM_RSRC2.SCRATCH_EN is enabled.
 */
S_00B860_WAVES(num_waves_for_scratch)
-   | S_00B860_WAVESIZE(shader->scratch_bytes_per_wave >> 10))
+   | S_00B860_WAVESIZE(shader->config.scratch_bytes_per_wave >> 
10))
;
 
si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f6a5051..c468ee3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3728,25 +3728,25 @@ void si_shader_binary_read_config(struct si_shader 
*shader,
  

[Mesa-dev] [PATCH 15/23] radeonsi: always keep shader code, rodata, and relocs in memory

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

We won't compile shaders in draw calls, but we will concatenate shader
binaries according to states in draw calls, so keep the binaries.
---
 src/gallium/drivers/radeonsi/si_shader.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index de117d9..abc1652 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3908,14 +3908,9 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
return r;
 
FREE(shader->binary.config);
-   FREE(shader->binary.rodata);
FREE(shader->binary.global_symbol_offsets);
-   if (shader->config.scratch_bytes_per_wave == 0) {
-   FREE(shader->binary.code);
-   FREE(shader->binary.relocs);
-   memset(>binary, 0,
-  offsetof(struct radeon_shader_binary, disasm_string));
-   }
+   shader->binary.config = NULL;
+   shader->binary.global_symbol_offsets = NULL;
return r;
 }
 
@@ -4228,6 +4223,7 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(>bo, NULL);
 
FREE(shader->binary.code);
+   FREE(shader->binary.rodata);
FREE(shader->binary.relocs);
FREE(shader->binary.disasm_string);
 }
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/23] radeonsi: move MRTZ export into a separate function

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 113 +--
 1 file changed, 62 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8441fb4..e08a076 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2100,6 +2100,59 @@ static void si_llvm_emit_vs_epilogue(struct 
lp_build_tgsi_context * bld_base)
FREE(outputs);
 }
 
+static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
+  LLVMValueRef depth, LLVMValueRef stencil,
+  LLVMValueRef samplemask)
+{
+   struct si_screen *sscreen = si_shader_context(bld_base)->screen;
+   struct lp_build_context *base = _base->base;
+   struct lp_build_context *uint = _base->uint_bld;
+   LLVMValueRef args[9];
+   unsigned mask = 0;
+
+   assert(depth || stencil || samplemask);
+
+   args[1] = uint->one; /* whether the EXEC mask is valid */
+   args[2] = uint->one; /* DONE bit */
+
+   /* Specify the target we are exporting */
+   args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
+
+   args[4] = uint->zero; /* COMP flag */
+   args[5] = base->zero; /* R, depth */
+   args[6] = base->zero; /* G, stencil test value[0:7], stencil op 
value[8:15] */
+   args[7] = base->zero; /* B, sample mask */
+   args[8] = base->zero; /* A, alpha to mask */
+
+   if (depth) {
+   args[5] = depth;
+   mask |= 0x1;
+   }
+
+   if (stencil) {
+   args[6] = stencil;
+   mask |= 0x2;
+   }
+
+   if (samplemask) {
+   args[7] = samplemask;
+   mask |= 0x4;
+   }
+
+   /* SI (except OLAND) has a bug that it only looks
+* at the X writemask component. */
+   if (sscreen->b.chip_class == SI &&
+   sscreen->b.family != CHIP_OLAND)
+   mask |= 0x1;
+
+   /* Specify which components to enable */
+   args[0] = lp_build_const_int32(base->gallivm, mask);
+
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+}
+
 static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
 {
struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
@@ -2109,7 +2162,7 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
struct tgsi_shader_info *info = >selector->info;
LLVMBuilderRef builder = base->gallivm->builder;
LLVMValueRef args[9];
-   int depth_index = -1, stencil_index = -1, samplemask_index = -1;
+   LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
int last_color_export = -1;
int i;
 
@@ -2148,13 +2201,16 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
/* Select the correct target */
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION:
-   depth_index = i;
+   depth = LLVMBuildLoad(builder,
+ 
si_shader_ctx->radeon_bld.soa.outputs[i][2], "");
continue;
case TGSI_SEMANTIC_STENCIL:
-   stencil_index = i;
+   stencil = LLVMBuildLoad(builder,
+   
si_shader_ctx->radeon_bld.soa.outputs[i][1], "");
continue;
case TGSI_SEMANTIC_SAMPLEMASK:
-   samplemask_index = i;
+   samplemask = LLVMBuildLoad(builder,
+  
si_shader_ctx->radeon_bld.soa.outputs[i][0], "");
continue;
case TGSI_SEMANTIC_COLOR:
target = V_008DFC_SQ_EXP_MRT + semantic_index;
@@ -2214,53 +2270,8 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
   args, 9, 0);
}
 
-   if (depth_index >= 0 || stencil_index >= 0 || samplemask_index >= 0) {
-   LLVMValueRef out_ptr;
-   unsigned mask = 0;
-
-   args[1] = uint->one; /* whether the EXEC mask is valid */
-   args[2] = uint->one; /* DONE bit */
-
-   /* Specify the target we are exporting */
-   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_MRTZ);
-
-   args[4] = uint->zero; /* COMP flag */
-   args[5] = base->zero; /* R, depth */
-   args[6] = base->zero; /* G, stencil test value[0:7], stencil op 
value[8:15] */
-   args[7] = base->zero; /* B, sample mask */
-   args[8] = base->zero; /* 

[Mesa-dev] [PATCH 18/23] radeonsi: add si_shader_destroy_binary

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 14 +-
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 770f5b7..0773fff 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4219,6 +4219,14 @@ out:
return r;
 }
 
+void si_shader_destroy_binary(struct radeon_shader_binary *binary)
+{
+   FREE(binary->code);
+   FREE(binary->rodata);
+   FREE(binary->relocs);
+   FREE(binary->disasm_string);
+}
+
 void si_shader_destroy(struct si_shader *shader)
 {
if (shader->gs_copy_shader) {
@@ -4230,9 +4238,5 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(>scratch_bo, NULL);
 
r600_resource_reference(>bo, NULL);
-
-   FREE(shader->binary.code);
-   FREE(shader->binary.rodata);
-   FREE(shader->binary.relocs);
-   FREE(shader->binary.disasm_string);
+   si_shader_destroy_binary(>binary);
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 2220fc7..780383c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -341,6 +341,7 @@ int si_compile_llvm(struct si_screen *sscreen,
struct pipe_debug_callback *debug,
unsigned processor);
 void si_shader_destroy(struct si_shader *shader);
+void si_shader_destroy_binary(struct radeon_shader_binary *binary);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
 void si_shader_binary_read(struct si_screen *sscreen,
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/23] radeonsi: move NULL exporting into a separate function

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c | 37 +++-
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f60b560..f6a5051 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2186,34 +2186,41 @@ static void si_export_mrt_color(struct 
lp_build_tgsi_context *bld_base,
   args, 9, 0);
 }
 
+static void si_export_null(struct lp_build_tgsi_context *bld_base)
+{
+   struct lp_build_context *base = _base->base;
+   struct lp_build_context *uint = _base->uint_bld;
+   LLVMValueRef args[9];
+
+   args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels 
*/
+   args[1] = uint->one; /* whether the EXEC mask is valid */
+   args[2] = uint->one; /* DONE bit */
+   args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
+   args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+   args[5] = uint->undef; /* R */
+   args[6] = uint->undef; /* G */
+   args[7] = uint->undef; /* B */
+   args[8] = uint->undef; /* A */
+
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+}
+
 static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
 {
struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
struct si_shader * shader = si_shader_ctx->shader;
struct lp_build_context * base = _base->base;
-   struct lp_build_context * uint = _base->uint_bld;
struct tgsi_shader_info *info = >selector->info;
LLVMBuilderRef builder = base->gallivm->builder;
-   LLVMValueRef args[9];
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
int last_color_export = -1;
int i;
 
/* If there are no outputs, add a dummy export. */
if (!info->num_outputs) {
-   args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled 
channels */
-   args[1] = uint->one; /* whether the EXEC mask is valid */
-   args[2] = uint->one; /* DONE bit */
-   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
-   args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
-   args[5] = uint->undef; /* R */
-   args[6] = uint->undef; /* G */
-   args[7] = uint->undef; /* B */
-   args[8] = uint->undef; /* A */
-
-   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
-  
LLVMVoidTypeInContext(base->gallivm->context),
-  args, 9, 0);
+   si_export_null(bld_base);
return;
}
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/23] radeonsi: move si_shader_binary_upload out of si_compile_llvm

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  1 +
 src/gallium/drivers/radeonsi/si_shader.c  | 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index a543c55..aedea8e 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -124,6 +124,7 @@ static void *si_create_compute_state(
 code, 
header->num_bytes);
si_compile_llvm(sctx->screen, >kernels[i], 
sctx->tm,
mod, >b.debug, 
TGSI_PROCESSOR_COMPUTE);
+   si_shader_binary_upload(sctx->screen, 
>kernels[i]);
LLVMDisposeModule(mod);
}
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index abc1652..91473a7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3903,10 +3903,6 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
si_shader_binary_read(sscreen, >binary, >config,
  debug, processor);
 
-   r = si_shader_binary_upload(sscreen, shader);
-   if (r)
-   return r;
-
FREE(shader->binary.config);
FREE(shader->binary.global_symbol_offsets);
shader->binary.config = NULL;
@@ -3987,6 +3983,8 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
r = si_compile_llvm(sscreen, si_shader_ctx->shader,
si_shader_ctx->tm, bld_base->base.gallivm->module,
debug, TGSI_PROCESSOR_GEOMETRY);
+   if (!r)
+   r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
 
radeon_llvm_dispose(_shader_ctx->radeon_bld);
 
@@ -4187,6 +4185,12 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}
 
+   r = si_shader_binary_upload(sscreen, shader);
+   if (r) {
+   fprintf(stderr, "LLVM failed to upload shader\n");
+   goto out;
+   }
+
radeon_llvm_dispose(_shader_ctx.radeon_bld);
 
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/23] radeonsi: use EXP_NULL for pixel shaders without outputs

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

This never happens currently.
---
 src/gallium/drivers/radeonsi/si_shader.c| 2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 13e5140..4204db0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2153,7 +2153,7 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled 
channels */
args[1] = uint->one; /* whether the EXEC mask is valid */
args[2] = uint->one; /* DONE bit */
-   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_MRT);
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_NULL);
args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
args[5] = uint->undef; /* R */
args[6] = uint->undef; /* G */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 68ba7ec..af21f3e 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -428,11 +428,8 @@ static void si_shader_ps(struct si_shader *shader)
colors_written = info->colors_written;
export_16bpc = shader->key.ps.export_16bpc;
 
-   if (!info->num_outputs) {
-   colors_written = 0x1; /* dummy export */
-   export_16bpc = 0;
-   } else if (info->colors_written == 0x1 &&
-  info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
+   if (info->colors_written == 0x1 &&
+   info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1;
}
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/23] radeonsi: don't pass si_shader to si_shader_binary_read_config

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  5 +++--
 src/gallium/drivers/radeonsi/si_shader.c  | 28 ++--
 src/gallium/drivers/radeonsi/si_shader.h  |  3 ++-
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 8edf4ad..7aedd39 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -67,7 +67,8 @@ static void init_scratch_buffer(struct si_context *sctx, 
struct si_compute *prog
program->shader.binary.global_symbol_offsets[i];
unsigned scratch_bytes_needed;
 
-   si_shader_binary_read_config(>shader, offset);
+   si_shader_binary_read_config(>shader.binary,
+>shader.config, offset);
scratch_bytes_needed = 
program->shader.config.scratch_bytes_per_wave;
scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
}
@@ -260,7 +261,7 @@ static void si_launch_grid(
 
 #if HAVE_LLVM >= 0x0306
/* Read the config information */
-   si_shader_binary_read_config(shader, pc);
+   si_shader_binary_read_config(>binary, >config, pc);
 #endif
 
/* Upload the kernel arguments */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c468ee3..f9e61a2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3708,19 +3708,19 @@ static void preload_ring_buffers(struct 
si_shader_context *si_shader_ctx)
}
 }
 
-void si_shader_binary_read_config(struct si_shader *shader,
+void si_shader_binary_read_config(struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
  unsigned symbol_offset)
 {
unsigned i;
const unsigned char *config =
-   radeon_shader_binary_config_start(>binary,
-   symbol_offset);
+   radeon_shader_binary_config_start(binary, symbol_offset);
 
/* XXX: We may be able to emit some of these values directly rather than
 * extracting fields to be emitted later.
 */
 
-   for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
+   for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
@@ -3728,25 +3728,25 @@ void si_shader_binary_read_config(struct si_shader 
*shader,
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
-   shader->config.num_sgprs = 
MAX2(shader->config.num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
-   shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
-   shader->config.float_mode =  G_00B028_FLOAT_MODE(value);
-   shader->config.rsrc1 = value;
+   conf->num_sgprs = MAX2(conf->num_sgprs, 
(G_00B028_SGPRS(value) + 1) * 8);
+   conf->num_vgprs = MAX2(conf->num_vgprs, 
(G_00B028_VGPRS(value) + 1) * 4);
+   conf->float_mode =  G_00B028_FLOAT_MODE(value);
+   conf->rsrc1 = value;
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
-   shader->config.lds_size = MAX2(shader->config.lds_size, 
G_00B02C_EXTRA_LDS_SIZE(value));
+   conf->lds_size = MAX2(conf->lds_size, 
G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
-   shader->config.lds_size = MAX2(shader->config.lds_size, 
G_00B84C_LDS_SIZE(value));
-   shader->config.rsrc2 = value;
+   conf->lds_size = MAX2(conf->lds_size, 
G_00B84C_LDS_SIZE(value));
+   conf->rsrc2 = value;
break;
case R_0286CC_SPI_PS_INPUT_ENA:
-   shader->config.spi_ps_input_ena = value;
+   conf->spi_ps_input_ena = value;
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
-   shader->config.scratch_bytes_per_wave =
+   conf->scratch_bytes_per_wave =
G_00B860_WAVESIZE(value) * 256 * 4 * 1;
break;
default:
@@ -3858,7 +3858,7 @@ void 

[Mesa-dev] [PATCH 08/23] radeonsi: only use LLVMBuildLoad once when updating color outputs at the end

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

without LLVMBuildStore.

So:
- do LLVMBuildLoad
- update the values as necessary
- export
---
 src/gallium/drivers/radeonsi/si_shader.c | 67 ++--
 1 file changed, 20 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 73a34ac..13e5140 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1334,24 +1334,8 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
memcpy([5], values, sizeof(values[0]) * 4);
 }
 
-/* Load from output pointers and initialize arguments for the shader export 
intrinsic */
-static void si_llvm_init_export_args_load(struct lp_build_tgsi_context 
*bld_base,
- LLVMValueRef *out_ptr,
- unsigned target,
- LLVMValueRef *args)
-{
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-   LLVMValueRef values[4];
-   int i;
-
-   for (i = 0; i < 4; i++)
-   values[i] = LLVMBuildLoad(gallivm->builder, out_ptr[i], "");
-
-   si_llvm_init_export_args(bld_base, values, target, args);
-}
-
 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha_ptr)
+ LLVMValueRef alpha)
 {
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -1363,8 +1347,7 @@ static void si_alpha_test(struct lp_build_tgsi_context 
*bld_base,
LLVMValueRef alpha_pass =
lp_build_cmp(_base->base,
 si_shader_ctx->shader->key.ps.alpha_func,
-LLVMBuildLoad(gallivm->builder, alpha_ptr, 
""),
-alpha_ref);
+alpha, alpha_ref);
LLVMValueRef arg =
lp_build_select(_base->base,
alpha_pass,
@@ -1383,12 +1366,12 @@ static void si_alpha_test(struct lp_build_tgsi_context 
*bld_base,
}
 }
 
-static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context 
*bld_base,
- LLVMValueRef alpha_ptr)
+static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context 
*bld_base,
+ LLVMValueRef alpha)
 {
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
-   LLVMValueRef coverage, alpha;
+   LLVMValueRef coverage;
 
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -1406,9 +1389,7 @@ static void si_scale_alpha_by_sample_mask(struct 
lp_build_tgsi_context *bld_base
 lp_build_const_float(gallivm,
1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
 
-   alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
-   alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
-   LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
+   return LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
 }
 
 static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
@@ -2196,8 +2177,8 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
-   unsigned target;
-   LLVMValueRef alpha_ptr;
+   unsigned target, j;
+   LLVMValueRef color[4] = {};
 
/* Select the correct target */
switch (semantic_name) {
@@ -2215,29 +2196,24 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
continue;
case TGSI_SEMANTIC_COLOR:
target = V_008DFC_SQ_EXP_MRT + semantic_index;
-   alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
 
-   if (si_shader_ctx->shader->key.ps.clamp_color) {
-   for (int j = 0; j < 4; j++) {
-   LLVMValueRef ptr = 
si_shader_ctx->radeon_bld.soa.outputs[i][j];
-   LLVMValueRef result = 
LLVMBuildLoad(builder, ptr, "");
+   for (j = 0; j < 4; j++)
+   color[j] = LLVMBuildLoad(builder,
+

[Mesa-dev] [PATCH 10/23] radeonsi: move MRT color exporting into a separate function

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

This will be used by a fragment shader epilog.
---
 src/gallium/drivers/radeonsi/si_shader.c | 93 +++-
 1 file changed, 55 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4204db0..f60b560 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2135,6 +2135,57 @@ static void si_export_mrt_z(struct lp_build_tgsi_context 
*bld_base,
   args, 9, 0);
 }
 
+static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+   LLVMValueRef *color, unsigned index,
+   bool is_last)
+{
+   struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+   struct lp_build_context *base = _base->base;
+   LLVMValueRef args[9];
+   int i;
+
+   /* Clamp color */
+   if (si_shader_ctx->shader->key.ps.clamp_color)
+   for (i = 0; i < 4; i++)
+   color[i] = radeon_llvm_saturate(bld_base, color[i]);
+
+   /* Alpha to one */
+   if (si_shader_ctx->shader->key.ps.alpha_to_one)
+   color[3] = base->one;
+
+   /* Alpha test */
+   if (index == 0 &&
+   si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+   si_alpha_test(bld_base, color[3]);
+
+   /* Line & polygon smoothing */
+   if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+   color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+
+   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+   if (index == 0 &&
+   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+   for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; 
c++) {
+   si_llvm_init_export_args(bld_base, color,
+V_008DFC_SQ_EXP_MRT + c, args);
+   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+   }
+   }
+
+   /* Export */
+   si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+args);
+   if (is_last) {
+   args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is 
valid */
+   args[2] = bld_base->uint_bld.one; /* DONE bit */
+   }
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+}
+
 static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
 {
struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
@@ -2177,7 +2228,7 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
-   unsigned target, j;
+   unsigned j;
LLVMValueRef color[4] = {};
 
/* Select the correct target */
@@ -2195,53 +2246,19 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
   
si_shader_ctx->radeon_bld.soa.outputs[i][0], "");
continue;
case TGSI_SEMANTIC_COLOR:
-   target = V_008DFC_SQ_EXP_MRT + semantic_index;
-
for (j = 0; j < 4; j++)
color[j] = LLVMBuildLoad(builder,
 
si_shader_ctx->radeon_bld.soa.outputs[i][j], "");
 
-   if (si_shader_ctx->shader->key.ps.clamp_color)
-   for (j = 0; j < 4; j++)
-   color[j] = 
radeon_llvm_saturate(bld_base, color[j]);
-
-   if (si_shader_ctx->shader->key.ps.alpha_to_one)
-   color[3] = base->one;
-
-   if (semantic_index == 0 &&
-   si_shader_ctx->shader->key.ps.alpha_func != 
PIPE_FUNC_ALWAYS)
-   si_alpha_test(bld_base, color[3]);
-
-   if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
-   color[3] = 
si_scale_alpha_by_sample_mask(bld_base, color[3]);
-   break;
+   si_export_mrt_color(bld_base, color, semantic_index,
+   last_color_export == i);
+   continue;
default:

[Mesa-dev] [PATCH 00/23] RadeonSI: Restructuring shader code generation part 2

2016-01-06 Thread Marek Olšák
Hi,

These boring patches focus on restructuring pixel shader output handling and 
code around si_compile_llvm (config, dumping, etc.). They are mostly code 
movements and dividing functions into smaller ones, so that they can be re-used 
by pixel shader epilog compilation code.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/23] radeonsi: set SPI color formats and CB_SHADER_MASK outside of compilation

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c| 12 +-
 src/gallium/drivers/radeonsi/si_shader.h|  2 --
 src/gallium/drivers/radeonsi/si_state_shaders.c | 30 ++---
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f322c4e..85113c0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1302,18 +1302,8 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
int cbuf = target - V_008DFC_SQ_EXP_MRT;
 
-   if (cbuf >= 0 && cbuf < 8) {
+   if (cbuf >= 0 && cbuf < 8)
compressed = 
(si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
-
-   if (compressed)
-   si_shader_ctx->shader->spi_shader_col_format |=
-   V_028714_SPI_SHADER_FP16_ABGR << (4 * 
cbuf);
-   else
-   si_shader_ctx->shader->spi_shader_col_format |=
-   V_028714_SPI_SHADER_32_ABGR << (4 * 
cbuf);
-
-   si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * 
cbuf);
-   }
}
 
/* Set COMPR flag */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 3d14c79..b89d3b2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -277,8 +277,6 @@ struct si_shader {
unsignedspi_ps_input_ena;
unsignedfloat_mode;
unsignedscratch_bytes_per_wave;
-   unsignedspi_shader_col_format;
-   unsignedcb_shader_mask;
union si_shader_key key;
 
unsignednparam;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4b007ec..b08b035f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -387,6 +387,8 @@ static void si_shader_ps(struct si_shader *shader)
struct tgsi_shader_info *info = >selector->info;
struct si_pm4_state *pm4;
unsigned i, spi_ps_in_control;
+   unsigned spi_shader_col_format = 0, cb_shader_mask = 0;
+   unsigned colors_written, export_16bpc;
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = 0;
uint64_t va;
@@ -422,12 +424,35 @@ static void si_shader_ps(struct si_shader *shader)
}
}
 
+   /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
+   colors_written = info->colors_written;
+   export_16bpc = shader->key.ps.export_16bpc;
+
+   if (info->colors_written == 0x0) {
+   colors_written = 0x1; /* dummy export */
+   export_16bpc = 0;
+   } else if (info->colors_written == 0x1 &&
+  info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
+   colors_written |= (1 << (shader->key.ps.last_cbuf + 1)) - 1;
+   }
+
+   while (colors_written) {
+   i = u_bit_scan(_written);
+   if (export_16bpc & (1 << i))
+   spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR 
<< (4 * i);
+   else
+   spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << 
(4 * i);
+   cb_shader_mask |= 0xf << (4 * i);
+   }
+
+   /* Set interpolation controls. */
has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->spi_ps_input_ena) ||
   G_0286CC_LINEAR_CENTROID_ENA(shader->spi_ps_input_ena);
 
spi_ps_in_control = S_0286D8_NUM_INTERP(shader->nparam) |
S_0286D8_BC_OPTIMIZE_DISABLE(has_centroid);
 
+   /* Set registers. */
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 
@@ -437,9 +462,8 @@ static void si_shader_ps(struct si_shader *shader)
   info->writes_z ? V_028710_SPI_SHADER_32_R :
   V_028710_SPI_SHADER_ZERO);
 
-   si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
-  shader->spi_shader_col_format);
-   si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
+   si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, 
spi_shader_col_format);
+   si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, cb_shader_mask);
 
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_USER_SHADER);
-- 
2.1.4

[Mesa-dev] [PATCH 01/23] radeonsi: determine DB_SHADER_CONTROL outside of shader compilation

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

because the API pixel shader binary will not emulate alpha test one day,
so the KILL_ENABLE bit must be determined elsewhere.
---
 src/gallium/drivers/radeonsi/si_shader.c| 20 
 src/gallium/drivers/radeonsi/si_shader.h|  5 ++-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 43 +
 3 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 426f40f..4b49f9d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1390,8 +1390,6 @@ static void si_alpha_test(struct lp_build_tgsi_context 
*bld_base,
LLVMVoidTypeInContext(gallivm->context),
NULL, 0, 0);
}
-
-   si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
 }
 
 static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context 
*bld_base,
@@ -2229,22 +2227,18 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
out_ptr = 
si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
args[5] = LLVMBuildLoad(base->gallivm->builder, 
out_ptr, "");
mask |= 0x1;
-   si_shader_ctx->shader->db_shader_control |= 
S_02880C_Z_EXPORT_ENABLE(1);
}
 
if (stencil_index >= 0) {
out_ptr = 
si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
args[6] = LLVMBuildLoad(base->gallivm->builder, 
out_ptr, "");
mask |= 0x2;
-   si_shader_ctx->shader->db_shader_control |=
-   S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
}
 
if (samplemask_index >= 0) {
out_ptr = 
si_shader_ctx->radeon_bld.soa.outputs[samplemask_index][0];
args[7] = LLVMBuildLoad(base->gallivm->builder, 
out_ptr, "");
mask |= 0x4;
-   si_shader_ctx->shader->db_shader_control |= 
S_02880C_MASK_EXPORT_ENABLE(1);
}
 
/* SI (except OLAND) has a bug that it only looks
@@ -4113,9 +4107,6 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
if (sel->type != PIPE_SHADER_COMPUTE)
shader->dx10_clamp_mode = true;
 
-   if (sel->info.uses_kill)
-   shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
-
shader->uses_instanceid = sel->info.uses_instanceid;
bld_base->info = poly_stipple ? _shader_info : >info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
@@ -4190,17 +4181,6 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
case TGSI_PROCESSOR_FRAGMENT:
si_shader_ctx.radeon_bld.load_input = declare_input_fs;
bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
-
-   switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
-   case TGSI_FS_DEPTH_LAYOUT_GREATER:
-   shader->db_shader_control |=
-   
S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
-   break;
-   case TGSI_FS_DEPTH_LAYOUT_LESS:
-   shader->db_shader_control |=
-   
S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
-   break;
-   }
break;
default:
assert(!"Unsupported shader type");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index d377a2a..067704f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -201,6 +201,7 @@ struct si_shader_selector {
boolforces_persample_interp_for_persp;
boolforces_persample_interp_for_linear;
 
+   /* GS parameters. */
unsignedesgs_itemsize;
unsignedgs_input_verts_per_prim;
unsignedgs_output_prim;
@@ -210,6 +211,9 @@ struct si_shader_selector {
unsignedgsvs_vertex_size;
unsignedmax_gsvs_emit_size;
 
+   /* PS parameters. */
+   unsigneddb_shader_control;
+
/* masks of "get_unique_index" bits */
uint64_toutputs_written;
uint32_tpatch_outputs_written;
@@ -275,7 +279,6 @@ struct si_shader {
unsignedscratch_bytes_per_wave;
unsignedspi_shader_col_format;
unsignedspi_shader_z_format;
-   unsigneddb_shader_control;
unsignedcb_shader_mask;
   

[Mesa-dev] [PATCH 02/23] radeonsi: determine SPI_SHADER_Z_FORMAT outside of shader compilation

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_shader.c| 7 ---
 src/gallium/drivers/radeonsi/si_shader.h| 1 -
 src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++-
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4b49f9d..b7c44b9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2247,13 +2247,6 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
si_shader_ctx->screen->b.family != CHIP_OLAND)
mask |= 0x1;
 
-   if (samplemask_index >= 0)
-   si_shader_ctx->shader->spi_shader_z_format = 
V_028710_SPI_SHADER_32_ABGR;
-   else if (stencil_index >= 0)
-   si_shader_ctx->shader->spi_shader_z_format = 
V_028710_SPI_SHADER_32_GR;
-   else
-   si_shader_ctx->shader->spi_shader_z_format = 
V_028710_SPI_SHADER_32_R;
-
/* Specify which components to enable */
args[0] = lp_build_const_int32(base->gallivm, mask);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 067704f..3d14c79 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -278,7 +278,6 @@ struct si_shader {
unsignedfloat_mode;
unsignedscratch_bytes_per_wave;
unsignedspi_shader_col_format;
-   unsignedspi_shader_z_format;
unsignedcb_shader_mask;
union si_shader_key key;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 41e331b..61db8ef 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -431,7 +431,12 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);
 
-   si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 
shader->spi_shader_z_format);
+   si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT,
+  info->writes_samplemask ? V_028710_SPI_SHADER_32_ABGR :
+  info->writes_stencil ? V_028710_SPI_SHADER_32_GR :
+  info->writes_z ? V_028710_SPI_SHADER_32_R :
+  V_028710_SPI_SHADER_ZERO);
+
si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
   shader->spi_shader_col_format);
si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/23] radeonsi: write all MRTs only if there is exactly one output

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

This doesn't fix a known bug, but better safe than sorry.

Also, simplify the expression in si_shader.c.
---
 src/gallium/drivers/radeonsi/si_shader.c| 5 ++---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b7c44b9..f322c4e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2189,9 +2189,8 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
/* This instruction will be emitted at the end of the 
shader. */
memcpy(last_args, args, sizeof(args));
 
-   /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */
-   if 
(shader->selector->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
-   semantic_index == 0 &&
+   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is 
true. */
+   if (semantic_index == 0 &&
si_shader_ctx->shader->key.ps.last_cbuf > 0) {
for (int c = 1; c <= 
si_shader_ctx->shader->key.ps.last_cbuf; c++) {
si_llvm_init_export_args_load(bld_base,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 61db8ef..4b007ec 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -552,8 +552,10 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
-   if 
(sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+   if 
(sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
+   sel->info.colors_written == 0x1)
key->ps.last_cbuf = 
MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
+
key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
 
if (rs) {
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93577

--- Comment #2 from Jose Fonseca  ---
(In reply to Michel Dänzer from comment #1)
> Reassigning to Mesa core since it happens with the i965 driver as well, but
> it looks like it might be a game bug.
> 
> BTW, an apitrace is only useful if replaying it reproduces the problem.

Actually the apitrace shows the problem:

2347 glXCreateContextAttribsARB(dpy = 0x6c00ec0, config = 0x6d081a0,
share_context = NULL, direct = True, attrib_list =
{GLX_CONTEXT_MAJOR_VERSION_ARB, 4, GLX_CONTEXT_MINOR_VERSION_ARB, 3,
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB, 0}) = NULL
2348 glXMakeCurrent(dpy = 0x6c00ec0, drawable = 0, ctx = NULL) = True
2350 glGenTextures(n = 1, textures = &0)
...
2595 glMapBufferRange(target = GL_PIXEL_PACK_BUFFER, offset = 0, length = 0,
access = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
GL_MAP_FLUSH_EXPLICIT_BIT) = NULL

The game requests a 4.3 context, it doesn't get one, but happily proceeds using
a null context as if nothing happened ... until it gets a NULL glMapBufferRange
and segfaults.

glretrace skips gl calls with a NULL context (because on Windows the
OPENGL32.DLL silently drops them so quite a few Windows apps inadvertebntly do
it when shutting, so glretrace needs to ignore when replaying on Linux to
prevent crashes)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/23] radeonsi: don't pass si_shader to si_shader_binary_read

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  3 ++-
 src/gallium/drivers/radeonsi/si_shader.c  | 23 ---
 src/gallium/drivers/radeonsi/si_shader.h  |  7 +--
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 7aedd39..a543c55 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -136,7 +136,8 @@ static void *si_create_compute_state(
 * the shader code to the GPU.
 */
init_scratch_buffer(sctx, program);
-   si_shader_binary_read(sctx->screen, >shader, >b.debug,
+   si_shader_binary_read(sctx->screen, >shader.binary,
+ >shader.config, >b.debug,
  TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, >shader);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f9e61a2..de117d9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3853,12 +3853,13 @@ static void si_shader_dump_disassembly(const struct 
radeon_shader_binary *binary
}
 }
 
-void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
-  struct pipe_debug_callback *debug, unsigned 
processor)
+void si_shader_binary_read(struct si_screen *sscreen,
+  struct radeon_shader_binary *binary,
+  struct si_shader_config *conf,
+  struct pipe_debug_callback *debug,
+  unsigned processor)
 {
-   const struct radeon_shader_binary *binary = >binary;
-
-   si_shader_binary_read_config(>binary, >config, 0);
+   si_shader_binary_read_config(binary, conf, 0);
 
if (r600_can_dump_shader(>b, processor)) {
if (!(sscreen->b.debug_flags & DBG_NO_ASM))
@@ -3867,15 +3868,14 @@ void si_shader_binary_read(struct si_screen *sscreen, 
struct si_shader *shader,
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
"Scratch: %d bytes per wave\n\n",
-   shader->config.num_sgprs, shader->config.num_vgprs, 
binary->code_size,
-   shader->config.lds_size, 
shader->config.scratch_bytes_per_wave);
+   conf->num_sgprs, conf->num_vgprs, binary->code_size,
+   conf->lds_size, conf->scratch_bytes_per_wave);
}
 
pipe_debug_message(debug, SHADER_INFO,
   "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d 
LDS: %d Scratch: %d",
-  shader->config.num_sgprs, shader->config.num_vgprs,
-  binary->code_size, shader->config.lds_size,
-  shader->config.scratch_bytes_per_wave);
+  conf->num_sgprs, conf->num_vgprs, binary->code_size,
+  conf->lds_size, conf->scratch_bytes_per_wave);
 }
 
 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
@@ -3900,7 +3900,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
return r;
}
 
-   si_shader_binary_read(sscreen, shader, debug, processor);
+   si_shader_binary_read(sscreen, >binary, >config,
+ debug, processor);
 
r = si_shader_binary_upload(sscreen, shader);
if (r)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 93d5af6..b0abacc 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -339,8 +339,11 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
-void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
-  struct pipe_debug_callback *debug, unsigned 
processor);
+void si_shader_binary_read(struct si_screen *sscreen,
+  struct radeon_shader_binary *binary,
+  struct si_shader_config *conf,
+  struct pipe_debug_callback *debug,
+  unsigned processor);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
uint64_t scratch_va);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 22/23] radeonsi: move si_shader_dump call out of si_compile_llvm

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  3 +++
 src/gallium/drivers/radeonsi/si_shader.c  | 10 --
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 2380242..ffac656 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -125,6 +125,9 @@ static void *si_create_compute_state(
si_compile_llvm(sctx->screen, 
>kernels[i].binary,
>kernels[i].config, sctx->tm,
mod, >b.debug, 
TGSI_PROCESSOR_COMPUTE);
+   si_shader_dump(sctx->screen, 
>kernels[i].binary,
+  >kernels[i].config,
+  >b.debug, TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, 
>kernels[i]);
LLVMDisposeModule(mod);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index fea5b14..58d16cf 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3913,7 +3913,6 @@ int si_compile_llvm(struct si_screen *sscreen,
}
 
si_shader_binary_read_config(binary, conf, 0);
-   si_shader_dump(sscreen, binary, conf, debug, processor);
 
FREE(binary->config);
FREE(binary->global_symbol_offsets);
@@ -3996,8 +3995,12 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
_shader_ctx->shader->config, si_shader_ctx->tm,
bld_base->base.gallivm->module,
debug, TGSI_PROCESSOR_GEOMETRY);
-   if (!r)
+   if (!r) {
+   si_shader_dump(sscreen, _shader_ctx->shader->binary,
+  _shader_ctx->shader->config, debug,
+  TGSI_PROCESSOR_GEOMETRY);
r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
+   }
 
radeon_llvm_dispose(_shader_ctx->radeon_bld);
 
@@ -4199,6 +4202,9 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}
 
+   si_shader_dump(sscreen, >binary, >config,
+  debug, si_shader_ctx.type);
+
r = si_shader_binary_upload(sscreen, shader);
if (r) {
fprintf(stderr, "LLVM failed to upload shader\n");
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/23] radeonsi: don't pass si_shader to si_compile_llvm

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  3 ++-
 src/gallium/drivers/radeonsi/si_shader.c  | 33 ++-
 src/gallium/drivers/radeonsi/si_shader.h  | 10 +++---
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index aedea8e..3562bd8 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -122,7 +122,8 @@ static void *si_create_compute_state(
for (i = 0; i < program->num_kernels; i++) {
LLVMModuleRef mod = 
radeon_llvm_get_kernel_module(program->llvm_ctx, i,
 code, 
header->num_bytes);
-   si_compile_llvm(sctx->screen, >kernels[i], 
sctx->tm,
+   si_compile_llvm(sctx->screen, 
>kernels[i].binary,
+   >kernels[i].config, sctx->tm,
mod, >b.debug, 
TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, 
>kernels[i]);
LLVMDisposeModule(mod);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 91473a7..770f5b7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3878,9 +3878,13 @@ void si_shader_binary_read(struct si_screen *sscreen,
   conf->lds_size, conf->scratch_bytes_per_wave);
 }
 
-int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-   LLVMTargetMachineRef tm, LLVMModuleRef mod,
-   struct pipe_debug_callback *debug, unsigned processor)
+int si_compile_llvm(struct si_screen *sscreen,
+   struct radeon_shader_binary *binary,
+   struct si_shader_config *conf,
+   LLVMTargetMachineRef tm,
+   LLVMModuleRef mod,
+   struct pipe_debug_callback *debug,
+   unsigned processor)
 {
int r = 0;
unsigned count = p_atomic_inc_return(>b.num_compilations);
@@ -3892,21 +3896,20 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
LLVMDumpModule(mod);
}
 
-   if (!si_replace_shader(count, >binary)) {
-   r = radeon_llvm_compile(mod, >binary,
+   if (!si_replace_shader(count, binary)) {
+   r = radeon_llvm_compile(mod, binary,
r600_get_llvm_processor_name(sscreen->b.family), tm,
debug);
if (r)
return r;
}
 
-   si_shader_binary_read(sscreen, >binary, >config,
- debug, processor);
+   si_shader_binary_read(sscreen, binary, conf, debug, processor);
 
-   FREE(shader->binary.config);
-   FREE(shader->binary.global_symbol_offsets);
-   shader->binary.config = NULL;
-   shader->binary.global_symbol_offsets = NULL;
+   FREE(binary->config);
+   FREE(binary->global_symbol_offsets);
+   binary->config = NULL;
+   binary->global_symbol_offsets = NULL;
return r;
 }
 
@@ -3980,8 +3983,9 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
if (dump)
fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
 
-   r = si_compile_llvm(sscreen, si_shader_ctx->shader,
-   si_shader_ctx->tm, bld_base->base.gallivm->module,
+   r = si_compile_llvm(sscreen, _shader_ctx->shader->binary,
+   _shader_ctx->shader->config, si_shader_ctx->tm,
+   bld_base->base.gallivm->module,
debug, TGSI_PROCESSOR_GEOMETRY);
if (!r)
r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
@@ -4179,7 +4183,8 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
radeon_llvm_finalize_module(_shader_ctx.radeon_bld);
 
mod = bld_base->base.gallivm->module;
-   r = si_compile_llvm(sscreen, shader, tm, mod, debug, 
si_shader_ctx.type);
+   r = si_compile_llvm(sscreen, >binary, >config, tm,
+   mod, debug, si_shader_ctx.type);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
goto out;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index b0abacc..2220fc7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -333,9 +333,13 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
 struct si_shader *shader,
 struct pipe_debug_callback *debug);
 void si_dump_shader_key(unsigned shader, 

[Mesa-dev] [PATCH 23/23] radeonsi: adjust the parameters of si_shader_dump

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

The function will be extended to dump all binaries shaders will consist of,
so si_shader* makes sense here.
---
 src/gallium/drivers/radeonsi/si_compute.c |  6 ++
 src/gallium/drivers/radeonsi/si_shader.c  | 18 +++---
 src/gallium/drivers/radeonsi/si_shader.h  |  7 ++-
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index ffac656..5a08cbf 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -125,8 +125,7 @@ static void *si_create_compute_state(
si_compile_llvm(sctx->screen, 
>kernels[i].binary,
>kernels[i].config, sctx->tm,
mod, >b.debug, 
TGSI_PROCESSOR_COMPUTE);
-   si_shader_dump(sctx->screen, 
>kernels[i].binary,
-  >kernels[i].config,
+   si_shader_dump(sctx->screen, >kernels[i],
   >b.debug, TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, 
>kernels[i]);
LLVMDisposeModule(mod);
@@ -143,8 +142,7 @@ static void *si_create_compute_state(
init_scratch_buffer(sctx, program);
si_shader_binary_read_config(>shader.binary,
 >shader.config, 0);
-   si_shader_dump(sctx->screen, >shader.binary,
-  >shader.config, >b.debug,
+   si_shader_dump(sctx->screen, >shader, >b.debug,
   TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, >shader);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 58d16cf..b1a9a1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3873,17 +3873,15 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
   conf->lds_size, conf->scratch_bytes_per_wave);
 }
 
-void si_shader_dump(struct si_screen *sscreen,
-   struct radeon_shader_binary *binary,
-   struct si_shader_config *conf,
-   struct pipe_debug_callback *debug,
-   unsigned processor)
+void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
+   struct pipe_debug_callback *debug, unsigned processor)
 {
if (r600_can_dump_shader(>b, processor))
if (!(sscreen->b.debug_flags & DBG_NO_ASM))
-   si_shader_dump_disassembly(binary, debug);
+   si_shader_dump_disassembly(>binary, debug);
 
-   si_shader_dump_stats(sscreen, conf, binary->code_size, debug, 
processor);
+   si_shader_dump_stats(sscreen, >config,
+shader->binary.code_size, debug, processor);
 }
 
 int si_compile_llvm(struct si_screen *sscreen,
@@ -3996,8 +3994,7 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
bld_base->base.gallivm->module,
debug, TGSI_PROCESSOR_GEOMETRY);
if (!r) {
-   si_shader_dump(sscreen, _shader_ctx->shader->binary,
-  _shader_ctx->shader->config, debug,
+   si_shader_dump(sscreen, si_shader_ctx->shader, debug,
   TGSI_PROCESSOR_GEOMETRY);
r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
}
@@ -4202,8 +4199,7 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}
 
-   si_shader_dump(sscreen, >binary, >config,
-  debug, si_shader_ctx.type);
+   si_shader_dump(sscreen, shader, debug, si_shader_ctx.type);
 
r = si_shader_binary_upload(sscreen, shader);
if (r) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 712bcd9..1635358 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -344,11 +344,8 @@ void si_shader_destroy(struct si_shader *shader);
 void si_shader_destroy_binary(struct radeon_shader_binary *binary);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
-void si_shader_dump(struct si_screen *sscreen,
-   struct radeon_shader_binary *binary,
-   struct si_shader_config *conf,
-   struct pipe_debug_callback *debug,
-   unsigned processor);
+void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
+   struct pipe_debug_callback *debug, unsigned processor);
 void si_shader_apply_scratch_relocs(struct si_context 

[Mesa-dev] [PATCH 21/23] radeonsi: inline si_shader_binary_read

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c | 4 ++--
 src/gallium/drivers/radeonsi/si_shader.c  | 8 +---
 src/gallium/drivers/radeonsi/si_shader.h  | 2 --
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index ffa941b..2380242 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -138,8 +138,8 @@ static void *si_create_compute_state(
 * the shader code to the GPU.
 */
init_scratch_buffer(sctx, program);
-   si_shader_binary_read(>shader.binary,
- >shader.config);
+   si_shader_binary_read_config(>shader.binary,
+>shader.config, 0);
si_shader_dump(sctx->screen, >shader.binary,
   >shader.config, >b.debug,
   TGSI_PROCESSOR_COMPUTE);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1f334af..fea5b14 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3886,12 +3886,6 @@ void si_shader_dump(struct si_screen *sscreen,
si_shader_dump_stats(sscreen, conf, binary->code_size, debug, 
processor);
 }
 
-void si_shader_binary_read(struct radeon_shader_binary *binary,
-  struct si_shader_config *conf)
-{
-   si_shader_binary_read_config(binary, conf, 0);
-}
-
 int si_compile_llvm(struct si_screen *sscreen,
struct radeon_shader_binary *binary,
struct si_shader_config *conf,
@@ -3918,7 +3912,7 @@ int si_compile_llvm(struct si_screen *sscreen,
return r;
}
 
-   si_shader_binary_read(binary, conf);
+   si_shader_binary_read_config(binary, conf, 0);
si_shader_dump(sscreen, binary, conf, debug, processor);
 
FREE(binary->config);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 51dfcd0..712bcd9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -344,8 +344,6 @@ void si_shader_destroy(struct si_shader *shader);
 void si_shader_destroy_binary(struct radeon_shader_binary *binary);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
-void si_shader_binary_read(struct radeon_shader_binary *binary,
-  struct si_shader_config *conf);
 void si_shader_dump(struct si_screen *sscreen,
struct radeon_shader_binary *binary,
struct si_shader_config *conf,
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/23] radeonsi: simplify setting the DONE bit for PS exports

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

First find out what the last export is and simply set the DONE bit there.
---
 src/gallium/drivers/radeonsi/si_shader.c| 126 ++--
 src/gallium/drivers/radeonsi/si_state_shaders.c |   2 +-
 2 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 85113c0..8441fb4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2109,10 +2109,36 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
struct tgsi_shader_info *info = >selector->info;
LLVMBuilderRef builder = base->gallivm->builder;
LLVMValueRef args[9];
-   LLVMValueRef last_args[9] = { 0 };
int depth_index = -1, stencil_index = -1, samplemask_index = -1;
+   int last_color_export = -1;
int i;
 
+   /* If there are no outputs, add a dummy export. */
+   if (!info->num_outputs) {
+   args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled 
channels */
+   args[1] = uint->one; /* whether the EXEC mask is valid */
+   args[2] = uint->one; /* DONE bit */
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_MRT);
+   args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+   args[5] = uint->zero; /* R */
+   args[6] = uint->zero; /* G */
+   args[7] = uint->zero; /* B */
+   args[8] = uint->zero; /* A */
+
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+   return;
+   }
+
+   /* Determine the last export. If MRTZ is present, it's always last.
+* Otherwise, find the last color export.
+*/
+   if (!info->writes_z && !info->writes_stencil && 
!info->writes_samplemask)
+   for (i = 0; i < info->num_outputs; i++)
+   if (info->output_semantic_name[i] == 
TGSI_SEMANTIC_COLOR)
+   last_color_export = i;
+
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
@@ -2157,56 +2183,48 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
 
break;
default:
-   target = 0;
fprintf(stderr,
"Warning: SI unhandled fs output type:%d\n",
semantic_name);
+   continue;
}
 
-   si_llvm_init_export_args_load(bld_base,
- 
si_shader_ctx->radeon_bld.soa.outputs[i],
- target, args);
-
-   if (semantic_name == TGSI_SEMANTIC_COLOR) {
-   /* If there is an export instruction waiting to be 
emitted, do so now. */
-   if (last_args[0]) {
-   lp_build_intrinsic(base->gallivm->builder,
-  "llvm.SI.export",
+   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+   if (semantic_index == 0 &&
+   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+   for (int c = 1; c <= 
si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+   si_llvm_init_export_args_load(bld_base,
+ 
si_shader_ctx->radeon_bld.soa.outputs[i],
+ 
V_008DFC_SQ_EXP_MRT + c, args);
+   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
   
LLVMVoidTypeInContext(base->gallivm->context),
-  last_args, 9, 0);
+  args, 9, 0);
}
+   }
 
-   /* This instruction will be emitted at the end of the 
shader. */
-   memcpy(last_args, args, sizeof(args));
-
-   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is 
true. */
-   if (semantic_index == 0 &&
-   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
-   for (int c = 1; c <= 
si_shader_ctx->shader->key.ps.last_cbuf; c++) {
-   si_llvm_init_export_args_load(bld_base,
- 

[Mesa-dev] [PATCH 20/23] radeonsi: move si_shader_dump call out of si_shader_binary_read

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_compute.c |  8 +---
 src/gallium/drivers/radeonsi/si_shader.c  | 21 +
 src/gallium/drivers/radeonsi/si_shader.h  | 12 +++-
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 3562bd8..ffa941b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -138,9 +138,11 @@ static void *si_create_compute_state(
 * the shader code to the GPU.
 */
init_scratch_buffer(sctx, program);
-   si_shader_binary_read(sctx->screen, >shader.binary,
- >shader.config, >b.debug,
- TGSI_PROCESSOR_COMPUTE);
+   si_shader_binary_read(>shader.binary,
+ >shader.config);
+   si_shader_dump(sctx->screen, >shader.binary,
+  >shader.config, >b.debug,
+  TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, >shader);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 95cdf8a..1f334af 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3873,11 +3873,11 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
   conf->lds_size, conf->scratch_bytes_per_wave);
 }
 
-static void si_shader_dump(struct si_screen *sscreen,
-  struct radeon_shader_binary *binary,
-  struct si_shader_config *conf,
-  struct pipe_debug_callback *debug,
-  unsigned processor)
+void si_shader_dump(struct si_screen *sscreen,
+   struct radeon_shader_binary *binary,
+   struct si_shader_config *conf,
+   struct pipe_debug_callback *debug,
+   unsigned processor)
 {
if (r600_can_dump_shader(>b, processor))
if (!(sscreen->b.debug_flags & DBG_NO_ASM))
@@ -3886,14 +3886,10 @@ static void si_shader_dump(struct si_screen *sscreen,
si_shader_dump_stats(sscreen, conf, binary->code_size, debug, 
processor);
 }
 
-void si_shader_binary_read(struct si_screen *sscreen,
-  struct radeon_shader_binary *binary,
-  struct si_shader_config *conf,
-  struct pipe_debug_callback *debug,
-  unsigned processor)
+void si_shader_binary_read(struct radeon_shader_binary *binary,
+  struct si_shader_config *conf)
 {
si_shader_binary_read_config(binary, conf, 0);
-   si_shader_dump(sscreen, binary, conf, debug, processor);
 }
 
 int si_compile_llvm(struct si_screen *sscreen,
@@ -3922,7 +3918,8 @@ int si_compile_llvm(struct si_screen *sscreen,
return r;
}
 
-   si_shader_binary_read(sscreen, binary, conf, debug, processor);
+   si_shader_binary_read(binary, conf);
+   si_shader_dump(sscreen, binary, conf, debug, processor);
 
FREE(binary->config);
FREE(binary->global_symbol_offsets);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 780383c..51dfcd0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -344,11 +344,13 @@ void si_shader_destroy(struct si_shader *shader);
 void si_shader_destroy_binary(struct radeon_shader_binary *binary);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
-void si_shader_binary_read(struct si_screen *sscreen,
-  struct radeon_shader_binary *binary,
-  struct si_shader_config *conf,
-  struct pipe_debug_callback *debug,
-  unsigned processor);
+void si_shader_binary_read(struct radeon_shader_binary *binary,
+  struct si_shader_config *conf);
+void si_shader_dump(struct si_screen *sscreen,
+   struct radeon_shader_binary *binary,
+   struct si_shader_config *conf,
+   struct pipe_debug_callback *debug,
+   unsigned processor);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
uint64_t scratch_va);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/23] radeonsi: separate shader dumping code to si_shader_dump and *_dump_stats

2016-01-06 Thread Marek Olšák
From: Marek Olšák 

Eventually, I'd like to dump stats for several combined binaries, which is
why you don't see a binary parameter in si_shader_dump_stats
---
 src/gallium/drivers/radeonsi/si_shader.c | 42 +++-
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0773fff..95cdf8a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3853,31 +3853,49 @@ static void si_shader_dump_disassembly(const struct 
radeon_shader_binary *binary
}
 }
 
-void si_shader_binary_read(struct si_screen *sscreen,
-  struct radeon_shader_binary *binary,
-  struct si_shader_config *conf,
-  struct pipe_debug_callback *debug,
-  unsigned processor)
+static void si_shader_dump_stats(struct si_screen *sscreen,
+struct si_shader_config *conf,
+unsigned code_size,
+struct pipe_debug_callback *debug,
+unsigned processor)
 {
-   si_shader_binary_read_config(binary, conf, 0);
-
if (r600_can_dump_shader(>b, processor)) {
-   if (!(sscreen->b.debug_flags & DBG_NO_ASM))
-   si_shader_dump_disassembly(binary, debug);
-
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
"Scratch: %d bytes per wave\n\n",
-   conf->num_sgprs, conf->num_vgprs, binary->code_size,
+   conf->num_sgprs, conf->num_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave);
}
 
pipe_debug_message(debug, SHADER_INFO,
   "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d 
LDS: %d Scratch: %d",
-  conf->num_sgprs, conf->num_vgprs, binary->code_size,
+  conf->num_sgprs, conf->num_vgprs, code_size,
   conf->lds_size, conf->scratch_bytes_per_wave);
 }
 
+static void si_shader_dump(struct si_screen *sscreen,
+  struct radeon_shader_binary *binary,
+  struct si_shader_config *conf,
+  struct pipe_debug_callback *debug,
+  unsigned processor)
+{
+   if (r600_can_dump_shader(>b, processor))
+   if (!(sscreen->b.debug_flags & DBG_NO_ASM))
+   si_shader_dump_disassembly(binary, debug);
+
+   si_shader_dump_stats(sscreen, conf, binary->code_size, debug, 
processor);
+}
+
+void si_shader_binary_read(struct si_screen *sscreen,
+  struct radeon_shader_binary *binary,
+  struct si_shader_config *conf,
+  struct pipe_debug_callback *debug,
+  unsigned processor)
+{
+   si_shader_binary_read_config(binary, conf, 0);
+   si_shader_dump(sscreen, binary, conf, debug, processor);
+}
+
 int si_compile_llvm(struct si_screen *sscreen,
struct radeon_shader_binary *binary,
struct si_shader_config *conf,
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] llvmpipe: scale up bounding box planes to subpixel precision

2016-01-06 Thread Brian Paul

Intricate stuff.  As long as testing was successful, looks good to me.

For both, Reviewed-by: Brian Paul 

On 01/05/2016 05:06 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

Otherwise some planes we get in rasterization have subpixel precision, others
not. Doesn't matter so far, but will soon. (OpenGL actually supports viewports
with subpixel accuracy, so could even do bounding box calcs with that).
---
  src/gallium/drivers/llvmpipe/lp_setup_line.c  | 20 ++--
  src/gallium/drivers/llvmpipe/lp_setup_point.c | 20 ++--
  src/gallium/drivers/llvmpipe/lp_setup_tri.c   | 20 ++--
  3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c 
b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index a0de599..f425825 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -713,24 +713,24 @@ try_setup_line( struct lp_setup_context *setup,
const struct u_rect *scissor =
   >scissors[viewport_index];

-  plane[4].dcdx = -1;
+  plane[4].dcdx = -1 << 8;
plane[4].dcdy = 0;
-  plane[4].c = 1-scissor->x0;
-  plane[4].eo = 1;
+  plane[4].c = (1-scissor->x0) << 8;
+  plane[4].eo = 1 << 8;

-  plane[5].dcdx = 1;
+  plane[5].dcdx = 1 << 8;
plane[5].dcdy = 0;
-  plane[5].c = scissor->x1+1;
+  plane[5].c = (scissor->x1+1) << 8;
plane[5].eo = 0;

plane[6].dcdx = 0;
-  plane[6].dcdy = 1;
-  plane[6].c = 1-scissor->y0;
-  plane[6].eo = 1;
+  plane[6].dcdy = 1 << 8;
+  plane[6].c = (1-scissor->y0) << 8;
+  plane[6].eo = 1 << 8;

plane[7].dcdx = 0;
-  plane[7].dcdy = -1;
-  plane[7].c = scissor->y1+1;
+  plane[7].dcdy = -1 << 8;
+  plane[7].c = (scissor->y1+1) << 8;
plane[7].eo = 0;
 }

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c 
b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 14c389f..ddb6f0e 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -492,24 +492,24 @@ try_setup_point( struct lp_setup_context *setup,
 {
struct lp_rast_plane *plane = GET_PLANES(point);

-  plane[0].dcdx = -1;
+  plane[0].dcdx = -1 << 8;
plane[0].dcdy = 0;
-  plane[0].c = 1-bbox.x0;
-  plane[0].eo = 1;
+  plane[0].c = (1-bbox.x0) << 8;
+  plane[0].eo = 1 << 8;

-  plane[1].dcdx = 1;
+  plane[1].dcdx = 1 << 8;
plane[1].dcdy = 0;
-  plane[1].c = bbox.x1+1;
+  plane[1].c = (bbox.x1+1) << 8;
plane[1].eo = 0;

plane[2].dcdx = 0;
-  plane[2].dcdy = 1;
-  plane[2].c = 1-bbox.y0;
-  plane[2].eo = 1;
+  plane[2].dcdy = 1 << 8;
+  plane[2].c = (1-bbox.y0) << 8;
+  plane[2].eo = 1 << 8;

plane[3].dcdx = 0;
-  plane[3].dcdy = -1;
-  plane[3].c = bbox.y1+1;
+  plane[3].dcdy = -1 << 8;
+  plane[3].c = (bbox.y1+1) << 8;
plane[3].eo = 0;
 }

diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index fefd1c1..5ad4ac1 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -576,24 +576,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
 if (nr_planes == 7) {
const struct u_rect *scissor = >scissors[viewport_index];

-  plane[3].dcdx = -1;
+  plane[3].dcdx = -1 << 8;
plane[3].dcdy = 0;
-  plane[3].c = 1-scissor->x0;
-  plane[3].eo = 1;
+  plane[3].c = (1-scissor->x0) << 8;
+  plane[3].eo = 1 << 8;

-  plane[4].dcdx = 1;
+  plane[4].dcdx = 1 << 8;
plane[4].dcdy = 0;
-  plane[4].c = scissor->x1+1;
+  plane[4].c = (scissor->x1+1) << 8;
plane[4].eo = 0;

plane[5].dcdx = 0;
-  plane[5].dcdy = 1;
-  plane[5].c = 1-scissor->y0;
-  plane[5].eo = 1;
+  plane[5].dcdy = 1 << 8;
+  plane[5].c = (1-scissor->y0) << 8;
+  plane[5].eo = 1 << 8;

plane[6].dcdx = 0;
-  plane[6].dcdy = -1;
-  plane[6].c = scissor->y1+1;
+  plane[6].dcdy = -1 << 8;
+  plane[6].c = (scissor->y1+1) << 8;
plane[6].eo = 0;
 }




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=77449
Bug 77449 depends on bug 76664, which changed state.

Bug 76664 Summary: Metro: Last Light segfaults very often in level 10 (swamp) 
on loading last checkpoint
https://bugs.freedesktop.org/show_bug.cgi?id=76664

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |WONTFIX

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations

2016-01-06 Thread Ilia Mirkin
On Wed, Jan 6, 2016 at 4:32 AM, Timothy Arceri
 wrote:
> Previously each member was being counted as using a single slot,
> count_attribute_slots() fixes the count for array and struct members.
>
> Also don't assign a negitive to the unsigned expl_location variable.
> ---
>
>  Fixes these new piglit tests:
>http://patchwork.freedesktop.org/patch/69531/
>
>  src/glsl/ast_to_hir.cpp | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index 0197cdc..50d5e22 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -6408,12 +6408,13 @@ ast_process_struct_or_iface_block_members(exec_list 
> *instructions,
>  if (process_qualifier_constant(state, , "location",
> qual->location, _location)) {
> fields[i].location = VARYING_SLOT_VAR0 + qual_location;
> -   expl_location = fields[i].location + 1;
> +   expl_location = fields[i].location +
> +  fields[i].type->count_attribute_slots(false);
>  }
>   } else {
>  if (layout && layout->flags.q.explicit_location) {
> fields[i].location = expl_location;
> -   expl_location = expl_location + 1;
> +   expl_location += fields[i].type->count_attribute_slots(false);
>  } else {
> fields[i].location = -1;
>  }
> @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list *instructions,
>
> state->struct_specifier_depth++;
>
> -   unsigned expl_location = -1;
> +   unsigned expl_location = 0;
> if (layout && layout->flags.q.explicit_location) {
>if (!process_qualifier_constant(state, , "location",
>layout->location, _location)) {
> @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list *instructions,
>return NULL;
> }
>
> -   unsigned expl_location = -1;
> +   unsigned expl_location = 0;

There are a number of places that check for location != -1 as a sanity
check... won't this defeat that?

> if (layout.flags.q.explicit_location) {
>if (!process_qualifier_constant(state, , "location",
>layout.location, _location)) {
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/23] radeonsi: simplify setting the DONE bit for PS exports

2016-01-06 Thread Nicolai Hähnle

Patches 1-5 are

Reviewed-by: Nicolai Hähnle 

On 06.01.2016 07:41, Marek Olšák wrote:

From: Marek Olšák 

First find out what the last export is and simply set the DONE bit there.
---
  src/gallium/drivers/radeonsi/si_shader.c| 126 ++--
  src/gallium/drivers/radeonsi/si_state_shaders.c |   2 +-
  2 files changed, 55 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 85113c0..8441fb4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2109,10 +2109,36 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
struct tgsi_shader_info *info = >selector->info;
LLVMBuilderRef builder = base->gallivm->builder;
LLVMValueRef args[9];
-   LLVMValueRef last_args[9] = { 0 };
int depth_index = -1, stencil_index = -1, samplemask_index = -1;
+   int last_color_export = -1;
int i;

+   /* If there are no outputs, add a dummy export. */
+   if (!info->num_outputs) {
+   args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled 
channels */
+   args[1] = uint->one; /* whether the EXEC mask is valid */
+   args[2] = uint->one; /* DONE bit */
+   args[3] = lp_build_const_int32(base->gallivm, 
V_008DFC_SQ_EXP_MRT);
+   args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+   args[5] = uint->zero; /* R */
+   args[6] = uint->zero; /* G */
+   args[7] = uint->zero; /* B */
+   args[8] = uint->zero; /* A */
+
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+   return;
+   }
+
+   /* Determine the last export. If MRTZ is present, it's always last.
+* Otherwise, find the last color export.
+*/
+   if (!info->writes_z && !info->writes_stencil && 
!info->writes_samplemask)
+   for (i = 0; i < info->num_outputs; i++)
+   if (info->output_semantic_name[i] == 
TGSI_SEMANTIC_COLOR)
+   last_color_export = i;
+
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
@@ -2157,56 +2183,48 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)

break;
default:
-   target = 0;
fprintf(stderr,
"Warning: SI unhandled fs output type:%d\n",
semantic_name);
+   continue;
}

-   si_llvm_init_export_args_load(bld_base,
- 
si_shader_ctx->radeon_bld.soa.outputs[i],
- target, args);
-
-   if (semantic_name == TGSI_SEMANTIC_COLOR) {
-   /* If there is an export instruction waiting to be 
emitted, do so now. */
-   if (last_args[0]) {
-   lp_build_intrinsic(base->gallivm->builder,
-  "llvm.SI.export",
+   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+   if (semantic_index == 0 &&
+   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+   for (int c = 1; c <= 
si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+   si_llvm_init_export_args_load(bld_base,
+ 
si_shader_ctx->radeon_bld.soa.outputs[i],
+ 
V_008DFC_SQ_EXP_MRT + c, args);
+   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
   
LLVMVoidTypeInContext(base->gallivm->context),
-  last_args, 9, 0);
+  args, 9, 0);
}
+   }

-   /* This instruction will be emitted at the end of the 
shader. */
-   memcpy(last_args, args, sizeof(args));
-
-   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is 
true. */
-   if (semantic_index == 0 &&
-   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
-   for (int c = 1; c <= 
si_shader_ctx->shader->key.ps.last_cbuf; c++) {
-   

Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization

2016-01-06 Thread Roland Scheidegger
Am 06.01.2016 um 17:31 schrieb Jose Fonseca:
> On 06/01/16 16:26, Jose Fonseca wrote:
>> On 06/01/16 00:06, srol...@vmware.com wrote:
>>> From: Roland Scheidegger 
>>>
>>> The trick here is to recognize that in the c + n * dcdx calculations,
>>> not only can the lower FIXED_ORDER bits not change (as the dcdx values
>>> have those all zero) but that this means the sign bit of the
>>> calculations
>>> cannot be different as well, that is
>>> sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)).
>>> That shaves off more than enough bits to never require 64bit masks.
>>> A shifted plane c value could still easily exceed 32 bits, however
>>> since we
>>> throw out planes which are trivial accept even before binning (and
>>> similarly
>>> don't even get to see tris for which there was a trivial reject
>>> plane)) this
>>> is never a problem.
>>> The idea isnt't all that revolutionary, in fact something similar was
>>> tried
>>> ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the
>>> values were
>>> only 32 bit anyway. I believe now it didn't quite work then because the
>>> adjustment needed for testing trivial reject / partial masks wasn't
>>> handled
>>> correctly.
>>> This still keeps the separate 32/64 bit paths for now, as the 32 bit
>>> one still
>>> looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo
>>> unscaled
>>> from setup which would be a good reason to ditch the 32 bit path, we'd
>>> need to
>>> change the special-purpose rasterization functions for small tris).
>>>
>>> This passes piglit triangle-rasterization (-fbo -auto -max_size
>>> -subpixelbits 8). It still fails triangle-rasterization-overdraw
>>> -max_size
>>> (no change, fails everything at position 2048 - interestingly for
>>> softpipe,
>>> nvidia maxwell 1 blob, and amd evergreen open-source drivers the test
>>> fails
>>> as well but at 4096 - seems like we're missing a float mantissa bit
>>> somewhere!).
>>
>> I don't think that's how the test is supposed to be run.
>>
>> If you do an apitrace, you'll see the test creates a fbo with 1000x1000,
>> a viewport with 16Kx16K, and does a readpixels of 4Kx4K...
> 
> The problem is that the generic "-fbo" option is not useful for this, as
> we can't reliably resize it after the fact.
> 
> Take a look at tests/general/triangle-rasterization.cpp -- it has a
> different option "-use_fbo" that creates its own fbo.
OK I was running that the wrong way too I think. This one still passes
with -max_size -use_fbo -subpixelbits 8 (takes _forever_ though - all
due to convert_ubyte in readpixel path...)

triangle-rasterization-overdraw with just -auto passes. The max_size
parameter is a bit confusing since it won't do anything at
all without -fbo as piglit_width/height will just get overwritten to
window_width/height (and with fbo it will just fail badly). Increasing
the window size manually to 8192/8192 won't really work neither as the
size will be cut down to screen size. However, increasing this and then
use -fbo actually does the right thing. And passes.
Would be nice if piglit could pick up those size parameters _after_
piglit_init...

Roland


> Jose
> 
> 
>>
>> Jose
>>
>>> ---
>>>   src/gallium/drivers/llvmpipe/lp_rast_tri.c |  84
>>> +--
>>>   src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107
>>> +
>>>   2 files changed, 133 insertions(+), 58 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
>>> b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
>>> index c9b9221..a4dd6ef 100644
>>> --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
>>> +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
>>> @@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task,
>>>   }
>>>
>>>   static inline unsigned
>>> -build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
>>> +build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy)
>>>   {
>>>  unsigned mask = 0;
>>>
>>> -   int64_t c0 = c;
>>> -   int64_t c1 = c0 + dcdy;
>>> -   int64_t c2 = c1 + dcdy;
>>> -   int64_t c3 = c2 + dcdy;
>>> -
>>> -   mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
>>> -   mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
>>> -   mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
>>> -   mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
>>> -   mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
>>> -   mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
>>> -   mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
>>> -   mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
>>> -   mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
>>> -   mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
>>> -   mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
>>> -   mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
>>> -   mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
>>> -   mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
>>> -   mask |= ((c3 + 2 * dcdx) >> 

[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=77449

Ernst Sjöstrand  changed:

   What|Removed |Added

 CC||ern...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=77449

Ernst Sjöstrand  changed:

   What|Removed |Added

 Depends on||92944

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 23/28] glsl: add pack varying to resource list for vertex input / fragment output

2016-01-06 Thread Anders Jonsson
On 2015-12-29 06:00, Timothy Arceri wrote:
> This is needed now that we pack these type of varyings when they have a
> component layout qualifier.
> ---
>  src/glsl/linker.cpp | 15 ---
>  1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> index 44dd7f0..52a326a 100644
> --- a/src/glsl/linker.cpp
> +++ b/src/glsl/linker.cpp
> @@ -3763,13 +3763,14 @@ build_program_resource_list(struct gl_shader_program 
> *shProg)
> if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
>return;
>  
> -   /* Program interface needs to expose varyings in case of SSO. */
> -   if (shProg->SeparateShader) {
> -  if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT))
> - return;
> -  if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT))
> - return;
> -   }
> +   /* Program interface needs to expose varyings in case of SSO, or in case 
> of
> +* vertex inputs/fragement outputs that are packed unsing the component

s/fragement/fragment

s/unsing/using

> +* layout qualifier.
> +*/
> +   if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT))
> +  return;
> +   if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT))
> +  return;
>  
> if (!add_fragdata_arrays(shProg))
>return;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] st/mesa: check state->mesa in early return check in st_validate_state()

2016-01-06 Thread Brian Paul
We were checking the dirty->st flags but not the dirty->mesa flags.
When we took the early return, we didn't clear the dirty->mesa flags
so the next time we called st_validate_state() we'd often flush the
glBitmap cache.  And since st_validate_state() is called from
st_Bitmap(), it meant we flushed the bitmap cache for every glBitmap()
call.

This change seems to recover most of the performance loss observed
with the ipers demo on llvmpipe since commit commit 36c93a6fae27561.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_atom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 43dbadd..c1a9d00 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -188,7 +188,7 @@ void st_validate_state( struct st_context *st )
 
st_manager_validate_framebuffers(st);
 
-   if (state->st == 0)
+   if (state->st == 0 && state->mesa == 0)
   return;
 
/*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/r600: Replace ALIGN_DIVUP with DIV_ROUND_UP

2016-01-06 Thread Krzysztof A. Sobiecki
Nicolai Hähnle  writes:

> On 30.12.2015 13:44, Krzysztof A. Sobiecki wrote:
>> Nicolai Hähnle  writes:
>>
>>> On 30.12.2015 08:42, Krzysztof A. Sobiecki wrote:
 Nicolai Hähnle  writes:

> On 29.12.2015 14:27, Krzysztof A. Sobiecki wrote:
>> From: Krzysztof Sobiecki 
>>
>> ALIGN_DIVUP is a driver specific(r600g) macro that duplicates 
>> DIV_ROUND_UP functionality.
>> Replacing it with DIV_ROUND_UP eliminates this problems.
>
> Those macros are actually slightly different, and the assembly
> generated by the ALIGN_DIVUP looks clearly better to me.
>
> I remember seeing a very long thread about this not so long ago - what
> was the resolution there?
>
> Cheers,
> Nicolai
>
 I would like to remove ALIGN_DIVUP first and then debate with
 implementation DIV_ROUND_UP should use.

 btw. I prefer 1 + ((x - 1) / y)
>>>
>>> That produces an incorrect result when x is an unsigned type and equal
>>> to 0 -- and that is something that existing code definitely relies on.
>>>
>>> Cheers,
>>> Nicolai
>>>
>> Then what about (x / y) + (i % y != 0)
>
> Generates similar assembly to the DIV_ROUND_UP version.
>
> Anyway, now that I look at it again I'd say just go ahead and add my
> R-b. Yes, the assembly looks slightly worse, but only slightly, and
> avoiding surprises with overflows down the line seems like a good
> idea.
>
> Cheers,
> Nicolai
>
I don't have commit access, can you push it, sorry.
-- 
X was an interactive protocol: 
alpha blending a full-screen image looked like slugs racing down the monitor. 
http://www.keithp.com/~keithp/talks/usenix2000/render.html
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/23] RadeonSI: Restructuring shader code generation part 2

2016-01-06 Thread Tom Stellard
On Wed, Jan 06, 2016 at 01:41:22PM +0100, Marek Olšák wrote:
> Hi,
> 
> These boring patches focus on restructuring pixel shader output handling and 
> code around si_compile_llvm (config, dumping, etc.). They are mostly code 
> movements and dividing functions into smaller ones, so that they can be 
> re-used by pixel shader epilog compilation code.
> 
> Please review.

These all look OK to me.

-Tom
> 
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] st/mesa: use GLbitfield in st_state_flags, add comments

2016-01-06 Thread Brian Paul
Use GLbitfield instead of GLuint to be consistent with other variables.
---
 src/mesa/state_tracker/st_context.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 35c8932..91b0f97 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -65,8 +65,8 @@ struct u_upload_mgr;
 
 
 struct st_state_flags {
-   GLuint mesa;
-   uint64_t st;
+   GLbitfield mesa;  /**< Mask of _NEW_x flags */
+   uint64_t st;  /**< Mask of ST_NEW_x flags */
 };
 
 struct st_tracked_state {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] s/GLuint/GLbitfield/ for st_invalidate_state() parameter

2016-01-06 Thread Brian Paul
To match dd_function_table::UpdateState().
---
 src/mesa/state_tracker/st_context.c | 2 +-
 src/mesa/state_tracker/st_context.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index 31cc99d..e12c166 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -99,7 +99,7 @@ static void st_Enable(struct gl_context * ctx, GLenum cap, 
GLboolean state)
 /**
  * Called via ctx->Driver.UpdateState()
  */
-void st_invalidate_state(struct gl_context * ctx, GLuint new_state)
+void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
 {
struct st_context *st = st_context(ctx);
 
diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 276fa63..35c8932 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -252,7 +252,7 @@ struct st_framebuffer
 extern void st_init_driver_functions(struct pipe_screen *screen,
  struct dd_function_table *functions);
 
-void st_invalidate_state(struct gl_context * ctx, GLuint new_state);
+void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state);
 
 
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] st/mesa: be more careful about state validation in st_Bitmap()

2016-01-06 Thread Brian Paul
If the only dirty state is mesa's _NEW_PROGRAM_CONSTANTS flag, we can
skip state validation before drawing a bitmap since that state doesn't
effect bitmap rendering.

This further increases the performance of the ipers demo on llvmpipe
to about what it was before commit 36c93a6fae27561.
---
 src/mesa/state_tracker/st_cb_bitmap.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c 
b/src/mesa/state_tracker/st_cb_bitmap.c
index c2cbcbd..191f144 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -622,7 +622,14 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
if (width == 0 || height == 0)
   return;
 
-   st_validate_state(st);
+   /* We only need to validate state of the st dirty flags are set or
+* any non-_NEW_PROGRAM_CONSTANTS mesa flags are set.  The VS we use
+* for bitmap drawing uses no constants and the FS constants are
+* explicitly uploaded in the draw_bitmap_quad() function.
+*/
+   if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) {
+  st_validate_state(st);
+   }
 
if (!st->bitmap.vs) {
   /* create pass-through vertex shader now */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization

2016-01-06 Thread Jose Fonseca

On 06/01/16 16:26, Jose Fonseca wrote:

On 06/01/16 00:06, srol...@vmware.com wrote:

From: Roland Scheidegger 

The trick here is to recognize that in the c + n * dcdx calculations,
not only can the lower FIXED_ORDER bits not change (as the dcdx values
have those all zero) but that this means the sign bit of the calculations
cannot be different as well, that is
sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)).
That shaves off more than enough bits to never require 64bit masks.
A shifted plane c value could still easily exceed 32 bits, however
since we
throw out planes which are trivial accept even before binning (and
similarly
don't even get to see tris for which there was a trivial reject
plane)) this
is never a problem.
The idea isnt't all that revolutionary, in fact something similar was
tried
ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the
values were
only 32 bit anyway. I believe now it didn't quite work then because the
adjustment needed for testing trivial reject / partial masks wasn't
handled
correctly.
This still keeps the separate 32/64 bit paths for now, as the 32 bit
one still
looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo
unscaled
from setup which would be a good reason to ditch the 32 bit path, we'd
need to
change the special-purpose rasterization functions for small tris).

This passes piglit triangle-rasterization (-fbo -auto -max_size
-subpixelbits 8). It still fails triangle-rasterization-overdraw
-max_size
(no change, fails everything at position 2048 - interestingly for
softpipe,
nvidia maxwell 1 blob, and amd evergreen open-source drivers the test
fails
as well but at 4096 - seems like we're missing a float mantissa bit
somewhere!).


I don't think that's how the test is supposed to be run.

If you do an apitrace, you'll see the test creates a fbo with 1000x1000,
a viewport with 16Kx16K, and does a readpixels of 4Kx4K...


The problem is that the generic "-fbo" option is not useful for this, as 
we can't reliably resize it after the fact.


Take a look at tests/general/triangle-rasterization.cpp -- it has a 
different option "-use_fbo" that creates its own fbo.


Jose




Jose


---
  src/gallium/drivers/llvmpipe/lp_rast_tri.c |  84
+--
  src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107
+
  2 files changed, 133 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index c9b9221..a4dd6ef 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task,
  }

  static inline unsigned
-build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
+build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy)
  {
 unsigned mask = 0;

-   int64_t c0 = c;
-   int64_t c1 = c0 + dcdy;
-   int64_t c2 = c1 + dcdy;
-   int64_t c3 = c2 + dcdy;
-
-   mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
-   mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
-   mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
-   mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
-   mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
-   mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
-   mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
-   mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
-   mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
-   mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
-   mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
-   mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
-   mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
-   mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
-   mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14);
-   mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15);
+   int32_t c0 = c;
+   int32_t c1 = c0 + dcdy;
+   int32_t c2 = c1 + dcdy;
+   int32_t c3 = c2 + dcdy;
+
+   mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0);
+   mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1);
+   mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2);
+   mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3);
+   mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4);
+   mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5);
+   mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6);
+   mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7);
+   mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8);
+   mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9);
+   mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10);
+   mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11);
+   mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12);
+   mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13);
+   mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14);
+   mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15);

 return mask;
  }


  static inline void
-build_masks(int64_t c,
-int64_t cdiff,
-int64_t dcdx,
-int64_t dcdy,
-unsigned *outmask,
- 

[Mesa-dev] [PATCH 2/3] st/mesa: move bitmap cache flushing out of state validation

2016-01-06 Thread Brian Paul
Just do it where needed (before drawing, clearing, etc).
---
 src/mesa/state_tracker/st_atom.c  | 4 
 src/mesa/state_tracker/st_cb_clear.c  | 3 +++
 src/mesa/state_tracker/st_cb_drawpixels.c | 5 +
 src/mesa/state_tracker/st_cb_drawtex.c| 3 +++
 src/mesa/state_tracker/st_draw.c  | 3 +++
 src/mesa/state_tracker/st_draw_feedback.c | 3 +++
 6 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index c1a9d00..337213c 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -33,7 +33,6 @@
 #include "pipe/p_defines.h"
 #include "st_context.h"
 #include "st_atom.h"
-#include "st_cb_bitmap.h"
 #include "st_program.h"
 #include "st_manager.h"
 
@@ -181,9 +180,6 @@ void st_validate_state( struct st_context *st )
 
check_attrib_edgeflag(st);
 
-   if (state->mesa)
-  st_flush_bitmap_cache(st);
-
check_program_state( st );
 
st_manager_validate_framebuffers(st);
diff --git a/src/mesa/state_tracker/st_cb_clear.c 
b/src/mesa/state_tracker/st_cb_clear.c
index e09f5ec..7b6d10e 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -41,6 +41,7 @@
 #include "program/prog_instruction.h"
 #include "st_context.h"
 #include "st_atom.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_clear.h"
 #include "st_cb_fbo.h"
 #include "st_format.h"
@@ -466,6 +467,8 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
GLbitfield clear_buffers = 0x0;
GLuint i;
 
+   st_flush_bitmap_cache(st);
+
/* This makes sure the pipe has the latest scissor, etc values */
st_validate_state( st );
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c 
b/src/mesa/state_tracker/st_cb_drawpixels.c
index 86e8a55..7ed52dd 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -50,6 +50,7 @@
 
 #include "st_atom.h"
 #include "st_atom_constbuf.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_drawpixels.h"
 #include "st_cb_readpixels.h"
 #include "st_cb_fbo.h"
@@ -1063,6 +1064,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
/* Mesa state should be up to date by now */
assert(ctx->NewState == 0x0);
 
+   st_flush_bitmap_cache(st);
+
st_validate_state(st);
 
/* Limit the size of the glDrawPixels to the max texture size.
@@ -1422,6 +1425,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint 
srcy,
GLint readX, readY, readW, readH;
struct gl_pixelstore_attrib pack = ctx->DefaultPacking;
 
+   st_flush_bitmap_cache(st);
+
st_validate_state(st);
 
if (type == GL_DEPTH_STENCIL) {
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c 
b/src/mesa/state_tracker/st_cb_drawtex.c
index b3e4b5b..e6ab77f 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -21,6 +21,7 @@
 
 #include "st_context.h"
 #include "st_atom.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_drawtex.h"
 
 #include "pipe/p_context.h"
@@ -113,6 +114,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, 
GLfloat z,
struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS];
unsigned offset;
 
+   st_flush_bitmap_cache(st);
+
st_validate_state(st);
 
/* determine if we need vertex color */
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 63b4622..d7a9716 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -48,6 +48,7 @@
 
 #include "st_context.h"
 #include "st_atom.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_bufferobjects.h"
 #include "st_cb_xformfb.h"
 #include "st_debug.h"
@@ -197,6 +198,8 @@ st_draw_vbo(struct gl_context *ctx,
/* Mesa core state should have been validated already */
assert(ctx->NewState == 0x0);
 
+   st_flush_bitmap_cache(st);
+
/* Validate state. */
if (st->dirty.st || ctx->NewDriverState) {
   st_validate_state(st);
diff --git a/src/mesa/state_tracker/st_draw_feedback.c 
b/src/mesa/state_tracker/st_draw_feedback.c
index 88c10a8..b6e6dea 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -33,6 +33,7 @@
 
 #include "st_context.h"
 #include "st_atom.h"
+#include "st_cb_bitmap.h"
 #include "st_cb_bufferobjects.h"
 #include "st_draw.h"
 #include "st_program.h"
@@ -137,6 +138,8 @@ st_feedback_draw_vbo(struct gl_context *ctx,
 
assert(draw);
 
+   st_flush_bitmap_cache(st);
+
st_validate_state(st);
 
if (!index_bounds_valid)
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/mesa: check texture target in allocate_full_mipmap()

2016-01-06 Thread Brian Paul
Some kinds of textures never have mipmaps.  3D textures seldom have
mipmaps.
---
 src/mesa/state_tracker/st_cb_texture.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 867d4da..f8b3679 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -404,6 +404,16 @@ static boolean
 allocate_full_mipmap(const struct st_texture_object *stObj,
  const struct st_texture_image *stImage)
 {
+   switch (stObj->base.Target) {
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_BUFFER:
+   case GL_TEXTURE_EXTERNAL_OES:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+  /* these texture types cannot be mipmapped */
+  return FALSE;
+   }
+
if (stImage->base.Level > 0 || stObj->base.GenerateMipmap)
   return TRUE;
 
@@ -420,6 +430,10 @@ allocate_full_mipmap(const struct st_texture_object *stObj,
   /* not a mipmap minification filter */
   return FALSE;
 
+   if (stObj->base.Target == GL_TEXTURE_3D)
+  /* 3D textures are seldom mipmapped */
+  return FALSE;
+
return TRUE;
 }
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] st/mesa: move mipmap allocation check logic into a function

2016-01-06 Thread Brian Paul
Better readability and easier to extend.
---
 src/mesa/state_tracker/st_cb_texture.c | 54 ++
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 62f149a..867d4da 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -388,6 +388,43 @@ guess_base_level_size(GLenum target,
 
 
 /**
+ * Try to determine whether we should allocate memory for a full texture
+ * mipmap.  The problem is when we get a glTexImage(level=0) call, we
+ * can't immediately know if other mipmap levels are coming next.  Here
+ * we try to guess whether to allocate memory for a mipmap or just the
+ * 0th level.
+ *
+ * If we guess incorrectly here we'll later reallocate the right amount of
+ * memory either in st_AllocTextureImageBuffer() or st_finalize_texture().
+ *
+ * \param stObj  the texture object we're going to allocate memory for.
+ * \param stImage  describes the incoming image which we need to store.
+ */
+static boolean
+allocate_full_mipmap(const struct st_texture_object *stObj,
+ const struct st_texture_image *stImage)
+{
+   if (stImage->base.Level > 0 || stObj->base.GenerateMipmap)
+  return TRUE;
+
+   if (stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
+   stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT)
+  /* depth/stencil textures are seldom mipmapped */
+  return FALSE;
+
+   if (stObj->base.BaseLevel == 0 && stObj->base.MaxLevel == 0)
+  return FALSE;
+
+   if (stObj->base.Sampler.MinFilter == GL_NEAREST ||
+   stObj->base.Sampler.MinFilter == GL_LINEAR)
+  /* not a mipmap minification filter */
+  return FALSE;
+
+   return TRUE;
+}
+
+
+/**
  * Try to allocate a pipe_resource object for the given st_texture_object.
  *
  * We use the given st_texture_image as a clue to determine the size of the
@@ -431,22 +468,15 @@ guess_and_alloc_texture(struct st_context *st,
 * to re-allocating a texture buffer with space for more (or fewer)
 * mipmap levels later.
 */
-   if ((stObj->base.Sampler.MinFilter == GL_NEAREST ||
-stObj->base.Sampler.MinFilter == GL_LINEAR ||
-(stObj->base.BaseLevel == 0 &&
- stObj->base.MaxLevel == 0) ||
-stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
-stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
-   !stObj->base.GenerateMipmap &&
-   stImage->base.Level == 0) {
-  /* only alloc space for a single mipmap level */
-  lastLevel = 0;
-   }
-   else {
+   if (allocate_full_mipmap(stObj, stImage)) {
   /* alloc space for a full mipmap */
   lastLevel = _mesa_get_tex_max_num_levels(stObj->base.Target,
width, height, depth) - 1;
}
+   else {
+  /* only alloc space for a single mipmap level */
+  lastLevel = 0;
+   }
 
/* Save the level=0 dimensions */
stObj->width0 = width;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] wgl: Rudimentary wglUseFontBitmaps sample.

2016-01-06 Thread Jose Fonseca
It uses SYSTEM_FONT which actually creates some challenges when emulating
wglUseFontBitmaps:  in spite what 
https://msdn.microsoft.com/en-us/library/windows/desktop/dd374392.aspx
implies, GetGlyphOutline(GGO_BITMAP) does not seem to work with certain
fonts.  The only solution is to draw the font charactors with a HBITMAP
like the old Mesa fxwgl.c code used to do.  That too, seems to be the way
that opengl32.dll implements wglUseFontBitmaps.
---
 src/wgl/CMakeLists.txt |   2 +
 src/wgl/wglfont.c  | 103 +
 2 files changed, 105 insertions(+)
 create mode 100644 src/wgl/wglfont.c

diff --git a/src/wgl/CMakeLists.txt b/src/wgl/CMakeLists.txt
index 0229ac7..cb50cca 100644
--- a/src/wgl/CMakeLists.txt
+++ b/src/wgl/CMakeLists.txt
@@ -16,6 +16,7 @@ set_target_properties (wgl_sharedtex_mt PROPERTIES 
OUTPUT_NAME sharedtex_mt)
 add_executable (wglinfo wglinfo.c 
${CMAKE_SOURCE_DIR}/src/xdemos/glinfo_common.c)
 add_executable (wglcontext wglcontext.c)
 add_executable (wincopy WIN32 wincopy.c wglutil.c)
+add_executable (wglfont wglfont.c)
 
 install (
TARGETS
@@ -23,6 +24,7 @@ install (
wgl_sharedtex_mt
wglinfo
wglcontext
+   wglfont
wincopy
DESTINATION wgl)
 
diff --git a/src/wgl/wglfont.c b/src/wgl/wglfont.c
new file mode 100644
index 000..86c5f88
--- /dev/null
+++ b/src/wgl/wglfont.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015, VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+int
+main(int argc, char *argv[])
+{
+   WNDCLASS wc;
+   HWND hwnd;
+   HDC hdc;
+   PIXELFORMATDESCRIPTOR pfd;
+   int iPixelFormat;
+   HGLRC hglrc;
+
+   ZeroMemory(, sizeof wc);
+   wc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW;
+   wc.lpfnWndProc = DefWindowProc;
+   wc.hIcon = LoadIcon(NULL, IDI_APPLICATION);
+   wc.hCursor = LoadCursor(NULL, IDC_ARROW);
+   wc.hbrBackground = (HBRUSH) (COLOR_BTNFACE + 1);
+   wc.lpszClassName = "wglfont";
+
+   if (!RegisterClass()) {
+  abort();
+   }
+
+   hwnd = CreateWindowEx(0,
+ wc.lpszClassName,
+ "wglfont",
+ WS_VISIBLE | WS_CLIPSIBLINGS | WS_CLIPCHILDREN | 
WS_TILEDWINDOW,
+ CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, 
CW_USEDEFAULT,
+ NULL, NULL,
+ wc.hInstance,
+ NULL);
+   if (!hwnd) {
+  abort();
+   }
+
+   hdc = GetDC(hwnd);
+   if (!hdc) {
+  abort();
+   }
+
+   ZeroMemory(, sizeof pfd);
+   pfd.nSize = sizeof pfd;
+   pfd.nVersion = 1;
+   pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL;
+   pfd.iPixelType = PFD_TYPE_RGBA;
+   pfd.cColorBits = 24;
+   pfd.cDepthBits = 24;
+   pfd.iLayerType = PFD_MAIN_PLANE;
+
+   iPixelFormat = ChoosePixelFormat(hdc, );
+   if (!iPixelFormat) {
+  abort();
+   }
+
+   if (!SetPixelFormat(hdc, iPixelFormat, )) {
+  abort();
+   }
+
+   hglrc = wglCreateContext(hdc);
+   if (!hglrc) {
+  abort();
+   }
+
+   wglMakeCurrent(hdc, hglrc);
+
+   SelectObject(hdc, GetStockObject(SYSTEM_FONT));
+
+   wglUseFontBitmaps(hdc, 0, 255, 1000);
+
+   glListBase(1000);
+
+   glCallLists(12, GL_UNSIGNED_BYTE, "Hello World!");
+
+   SwapBuffers(hdc);
+
+   Sleep(1000);
+
+   return 0;
+}
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization

2016-01-06 Thread Jose Fonseca

On 06/01/16 00:06, srol...@vmware.com wrote:

From: Roland Scheidegger 

The trick here is to recognize that in the c + n * dcdx calculations,
not only can the lower FIXED_ORDER bits not change (as the dcdx values
have those all zero) but that this means the sign bit of the calculations
cannot be different as well, that is
sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)).
That shaves off more than enough bits to never require 64bit masks.
A shifted plane c value could still easily exceed 32 bits, however since we
throw out planes which are trivial accept even before binning (and similarly
don't even get to see tris for which there was a trivial reject plane)) this
is never a problem.
The idea isnt't all that revolutionary, in fact something similar was tried
ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the values were
only 32 bit anyway. I believe now it didn't quite work then because the
adjustment needed for testing trivial reject / partial masks wasn't handled
correctly.
This still keeps the separate 32/64 bit paths for now, as the 32 bit one still
looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo unscaled
from setup which would be a good reason to ditch the 32 bit path, we'd need to
change the special-purpose rasterization functions for small tris).

This passes piglit triangle-rasterization (-fbo -auto -max_size
-subpixelbits 8). It still fails triangle-rasterization-overdraw -max_size
(no change, fails everything at position 2048 - interestingly for softpipe,
nvidia maxwell 1 blob, and amd evergreen open-source drivers the test fails
as well but at 4096 - seems like we're missing a float mantissa bit
somewhere!).


I don't think that's how the test is supposed to be run.

If you do an apitrace, you'll see the test creates a fbo with 1000x1000, 
a viewport with 16Kx16K, and does a readpixels of 4Kx4K...


Jose


---
  src/gallium/drivers/llvmpipe/lp_rast_tri.c |  84 +--
  src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107 +
  2 files changed, 133 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c 
b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index c9b9221..a4dd6ef 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task,
  }

  static inline unsigned
-build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
+build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy)
  {
 unsigned mask = 0;

-   int64_t c0 = c;
-   int64_t c1 = c0 + dcdy;
-   int64_t c2 = c1 + dcdy;
-   int64_t c3 = c2 + dcdy;
-
-   mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
-   mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
-   mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
-   mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
-   mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
-   mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
-   mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
-   mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
-   mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
-   mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
-   mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
-   mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
-   mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
-   mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
-   mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14);
-   mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15);
+   int32_t c0 = c;
+   int32_t c1 = c0 + dcdy;
+   int32_t c2 = c1 + dcdy;
+   int32_t c3 = c2 + dcdy;
+
+   mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0);
+   mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1);
+   mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2);
+   mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3);
+   mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4);
+   mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5);
+   mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6);
+   mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7);
+   mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8);
+   mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9);
+   mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10);
+   mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11);
+   mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12);
+   mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13);
+   mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14);
+   mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15);

 return mask;
  }


  static inline void
-build_masks(int64_t c,
-int64_t cdiff,
-int64_t dcdx,
-int64_t dcdy,
-   unsigned *outmask,
-   unsigned *partmask)
+build_masks(int32_t c,
+int32_t cdiff,
+int32_t dcdx,
+int32_t dcdy,
+unsigned *outmask,
+unsigned *partmask)
  {
 *outmask |= build_mask_linear(c, dcdx, dcdy);
 *partmask |= 

Re: [Mesa-dev] [PATCH] wgl: Rudimentary wglUseFontBitmaps sample.

2016-01-06 Thread Brian Paul

The copyright line could be bumped to 2016.

Reviewed-by: Brian Paul 

On 01/06/2016 09:21 AM, Jose Fonseca wrote:

It uses SYSTEM_FONT which actually creates some challenges when emulating
wglUseFontBitmaps:  in spite what 
https://msdn.microsoft.com/en-us/library/windows/desktop/dd374392.aspx
implies, GetGlyphOutline(GGO_BITMAP) does not seem to work with certain
fonts.  The only solution is to draw the font charactors with a HBITMAP
like the old Mesa fxwgl.c code used to do.  That too, seems to be the way
that opengl32.dll implements wglUseFontBitmaps.
---
  src/wgl/CMakeLists.txt |   2 +
  src/wgl/wglfont.c  | 103 +
  2 files changed, 105 insertions(+)
  create mode 100644 src/wgl/wglfont.c

diff --git a/src/wgl/CMakeLists.txt b/src/wgl/CMakeLists.txt
index 0229ac7..cb50cca 100644
--- a/src/wgl/CMakeLists.txt
+++ b/src/wgl/CMakeLists.txt
@@ -16,6 +16,7 @@ set_target_properties (wgl_sharedtex_mt PROPERTIES 
OUTPUT_NAME sharedtex_mt)
  add_executable (wglinfo wglinfo.c 
${CMAKE_SOURCE_DIR}/src/xdemos/glinfo_common.c)
  add_executable (wglcontext wglcontext.c)
  add_executable (wincopy WIN32 wincopy.c wglutil.c)
+add_executable (wglfont wglfont.c)

  install (
TARGETS
@@ -23,6 +24,7 @@ install (
wgl_sharedtex_mt
wglinfo
wglcontext
+   wglfont
wincopy
DESTINATION wgl)

diff --git a/src/wgl/wglfont.c b/src/wgl/wglfont.c
new file mode 100644
index 000..86c5f88
--- /dev/null
+++ b/src/wgl/wglfont.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015, VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+int
+main(int argc, char *argv[])
+{
+   WNDCLASS wc;
+   HWND hwnd;
+   HDC hdc;
+   PIXELFORMATDESCRIPTOR pfd;
+   int iPixelFormat;
+   HGLRC hglrc;
+
+   ZeroMemory(, sizeof wc);
+   wc.style = CS_OWNDC | CS_HREDRAW | CS_VREDRAW;
+   wc.lpfnWndProc = DefWindowProc;
+   wc.hIcon = LoadIcon(NULL, IDI_APPLICATION);
+   wc.hCursor = LoadCursor(NULL, IDC_ARROW);
+   wc.hbrBackground = (HBRUSH) (COLOR_BTNFACE + 1);
+   wc.lpszClassName = "wglfont";
+
+   if (!RegisterClass()) {
+  abort();
+   }
+
+   hwnd = CreateWindowEx(0,
+ wc.lpszClassName,
+ "wglfont",
+ WS_VISIBLE | WS_CLIPSIBLINGS | WS_CLIPCHILDREN | 
WS_TILEDWINDOW,
+ CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, 
CW_USEDEFAULT,
+ NULL, NULL,
+ wc.hInstance,
+ NULL);
+   if (!hwnd) {
+  abort();
+   }
+
+   hdc = GetDC(hwnd);
+   if (!hdc) {
+  abort();
+   }
+
+   ZeroMemory(, sizeof pfd);
+   pfd.nSize = sizeof pfd;
+   pfd.nVersion = 1;
+   pfd.dwFlags = PFD_DOUBLEBUFFER | PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL;
+   pfd.iPixelType = PFD_TYPE_RGBA;
+   pfd.cColorBits = 24;
+   pfd.cDepthBits = 24;
+   pfd.iLayerType = PFD_MAIN_PLANE;
+
+   iPixelFormat = ChoosePixelFormat(hdc, );
+   if (!iPixelFormat) {
+  abort();
+   }
+
+   if (!SetPixelFormat(hdc, iPixelFormat, )) {
+  abort();
+   }
+
+   hglrc = wglCreateContext(hdc);
+   if (!hglrc) {
+  abort();
+   }
+
+   wglMakeCurrent(hdc, hglrc);
+
+   SelectObject(hdc, GetStockObject(SYSTEM_FONT));
+
+   wglUseFontBitmaps(hdc, 0, 255, 1000);
+
+   glListBase(1000);
+
+   glCallLists(12, GL_UNSIGNED_BYTE, "Hello World!");
+
+   SwapBuffers(hdc);
+
+   Sleep(1000);
+
+   return 0;
+}



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] vbo: s/GLuint/GLbitfield/ for state bitmasks

2016-01-06 Thread Brian Paul
---
 src/mesa/vbo/vbo.h | 2 +-
 src/mesa/vbo/vbo_context.c | 2 +-
 src/mesa/vbo/vbo_exec.c| 2 +-
 src/mesa/vbo/vbo_exec.h| 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index cef3b8c..dd9b428 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -78,7 +78,7 @@ struct _mesa_index_buffer {
 
 GLboolean _vbo_CreateContext( struct gl_context *ctx );
 void _vbo_DestroyContext( struct gl_context *ctx );
-void _vbo_InvalidateState( struct gl_context *ctx, GLuint new_state );
+void _vbo_InvalidateState( struct gl_context *ctx, GLbitfield new_state );
 
 
 void
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index 5e1a760..19b35a4 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -186,7 +186,7 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx )
 }
 
 
-void _vbo_InvalidateState( struct gl_context *ctx, GLuint new_state )
+void _vbo_InvalidateState( struct gl_context *ctx, GLbitfield new_state )
 {
vbo_exec_invalidate_state(ctx, new_state);
 }
diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c
index a301c6c..4db4f40 100644
--- a/src/mesa/vbo/vbo_exec.c
+++ b/src/mesa/vbo/vbo_exec.c
@@ -73,7 +73,7 @@ void vbo_exec_destroy( struct gl_context *ctx )
  * invoked according to the state flags.  That will have to wait for a
  * mesa rework:
  */ 
-void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state )
+void vbo_exec_invalidate_state( struct gl_context *ctx, GLbitfield new_state )
 {
struct vbo_context *vbo = vbo_context(ctx);
struct vbo_exec_context *exec = >exec;
diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h
index a80b2c9..27bff4a 100644
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -146,7 +146,7 @@ struct vbo_exec_context
  */
 void vbo_exec_init( struct gl_context *ctx );
 void vbo_exec_destroy( struct gl_context *ctx );
-void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
+void vbo_exec_invalidate_state( struct gl_context *ctx, GLbitfield new_state );
 
 
 /* Internal functions:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] main: s/GLuint/GLbitfield for state bitmasks

2016-01-06 Thread Brian Paul
---
 src/mesa/main/api_arrayelt.c | 4 ++--
 src/mesa/main/api_arrayelt.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 92d8238..c84db5f 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -65,7 +65,7 @@ typedef struct {
 typedef struct {
AEarray arrays[32];
AEattrib attribs[VERT_ATTRIB_MAX + 1];
-   GLuint NewState;
+   GLbitfield NewState;
 
/* List of VBOs we need to map before executing ArrayElements */
struct gl_buffer_object *vbo[VERT_ATTRIB_MAX];
@@ -1802,7 +1802,7 @@ _ae_ArrayElement(GLint elt)
 
 
 void
-_ae_invalidate_state(struct gl_context *ctx, GLuint new_state)
+_ae_invalidate_state(struct gl_context *ctx, GLbitfield new_state)
 {
AEcontext *actx = AE_CONTEXT(ctx);
 
diff --git a/src/mesa/main/api_arrayelt.h b/src/mesa/main/api_arrayelt.h
index 39fdeb9..03cd9ec 100644
--- a/src/mesa/main/api_arrayelt.h
+++ b/src/mesa/main/api_arrayelt.h
@@ -33,7 +33,7 @@
 
 extern GLboolean _ae_create_context( struct gl_context *ctx );
 extern void _ae_destroy_context( struct gl_context *ctx );
-extern void _ae_invalidate_state( struct gl_context *ctx, GLuint new_state );
+extern void _ae_invalidate_state( struct gl_context *ctx, GLbitfield new_state 
);
 extern void GLAPIENTRY _ae_ArrayElement( GLint elt );
 
 /* May optionally be called before a batch of element calls:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nir/algebraic: Add more lowering

2016-01-06 Thread Jason Ekstrand
This commit adds lowering options for the following opcodes:

 - nir_op_fmod
 - nir_op_bitfield_insert
 - nir_op_uadd_carry
 - nir_op_usub_borrow
---
 src/glsl/nir/nir.h| 4 
 src/glsl/nir/nir_opt_algebraic.py | 6 ++
 2 files changed, 10 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 59f6f68..61e51da 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1509,6 +1509,10 @@ typedef struct nir_shader_compiler_options {
bool lower_fpow;
bool lower_fsat;
bool lower_fsqrt;
+   bool lower_fmod;
+   bool lower_bitfield_insert;
+   bool lower_uadd_carry;
+   bool lower_usub_borrow;
/** lowers fneg and ineg to fsub and isub. */
bool lower_negate;
/** lowers fsub and isub to fadd+fneg and iadd+ineg. */
diff --git a/src/glsl/nir/nir_opt_algebraic.py 
b/src/glsl/nir/nir_opt_algebraic.py
index a5a4841..f4a8632 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -222,6 +222,12 @@ optimizations = [
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
+
+   # Misc. lowering
+   (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b, 
'options->lower_fmod'),
+   (('bitfield_insert', a, b, c, d), ('bfi', ('bfm', d, c), b, a), 
'options->lower_bitfield_insert'),
+   (('uadd_carry', a, b), ('ult', ('iadd', a, b), a), 
'options->lower_uadd_carry'),
+   (('usub_borrow', a, b), ('ult', a, b), 'options->lower_usub_borrow'),
 ]
 
 # Add optimizations to handle the case where the result of a ternary is
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965/compiler: Enable more lowering in NIR

2016-01-06 Thread Jason Ekstrand
We don't need these for GLSL or ARB, but we need them for SPIR-V
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6d15c60..4ae403c 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -104,6 +104,11 @@ brw_compiler_create(void *mem_ctx, const struct 
brw_device_info *devinfo)
nir_options->lower_ffma = true;
nir_options->lower_sub = true;
nir_options->lower_fdiv = true;
+   nir_options->lower_scmp = true;
+   nir_options->lower_fmod = true;
+   nir_options->lower_bitfield_insert = true;
+   nir_options->lower_uadd_carry = true;
+   nir_options->lower_usub_borrow = true;
 
/* In the vec4 backend, our dpN instruction replicates its result to all
 * the components of a vec4.  We would like NIR to give us replicated fdot
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages

2016-01-06 Thread Timothy Arceri
On Wed, 2016-01-06 at 17:50 -0500, Ilia Mirkin wrote:
> On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri
>  wrote:
> > Tessellation shaders treat varyings as shared memory and
> > invocations
> > can access each others varyings therefore we can't use the existing
> > method to lower them.
> 
> That's not strictly true... this is only true of tess control outputs
> (which can be written by the current invocation, but also read in by
> other invocations, effectively acting as a shared memory -- both true
> of per-invocation outputs as well as per-patch outputs). Does that
> information change this patch at all?

I don't think so. The problem is that the current packing code works
like this:

- Change vars to be packed to temporaries, create new packed varyings.
- Copy *all* values from the new packed input varying to the
 temporaries at the start of main.
- Copy *all* values from the temporaries to the new packed output vars
at the end of main (or before emit for GS).

As well as the invocations stomping on each other this results in 32
(GL_MAX_PATCH_VERTICES?) copies for each TCS input as it just copies
the full array.

The current packing just doesn't work well for tessellation, its easier
to just disbale it for tessellation and do it all using a different
method rather than trying to mix and match.


> 
> > 
> > This adds a check for these stages as following patches will
> > allow explicit locations to be lowered even when the driver and
> > existing
> > tesselation checks ask for it to be disabled, we do this to enable
> > support
> > for the component layout qualifier.
> > ---
> >  src/glsl/lower_packed_varyings.cpp | 62 +-
> > 
> >  1 file changed, 34 insertions(+), 28 deletions(-)
> > 
> > diff --git a/src/glsl/lower_packed_varyings.cpp
> > b/src/glsl/lower_packed_varyings.cpp
> > index 2899846..e4e9a35 100644
> > --- a/src/glsl/lower_packed_varyings.cpp
> > +++ b/src/glsl/lower_packed_varyings.cpp
> > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned
> > locations_used,
> >ir_variable_mode mode, unsigned
> > gs_input_vertices,
> >gl_shader *shader, bool
> > disable_varying_packing)
> >  {
> > -   exec_list *instructions = shader->ir;
> > ir_function *main_func = shader->symbols->get_function("main");
> > exec_list void_parameters;
> > ir_function_signature *main_func_sig
> >= main_func->matching_signature(NULL, _parameters,
> > false);
> > -   exec_list new_instructions, new_variables;
> > -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used,
> > mode,
> > - gs_input_vertices,
> > - _instructions,
> > - _variables,
> > - disable_varying_packing);
> > -   visitor.run(shader);
> > -   if (mode == ir_var_shader_out) {
> > -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
> > - /* For geometry shaders, outputs need to be lowered
> > before each call
> > -  * to EmitVertex()
> > -  */
> > - lower_packed_varyings_gs_splicer splicer(mem_ctx,
> > _instructions);
> > -
> > - /* Add all the variables in first. */
> > - main_func_sig->body.head->insert_before(_variables);
> > 
> > - /* Now update all the EmitVertex instances */
> > - splicer.run(instructions);
> > +   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
> > + shader->Stage == MESA_SHADER_TESS_EVAL)) {
> > +  exec_list *instructions = shader->ir;
> > +  exec_list new_instructions, new_variables;
> > +
> > +  lower_packed_varyings_visitor visitor(mem_ctx,
> > locations_used, mode,
> > +gs_input_vertices,
> > +_instructions,
> > +_variables,
> > +   
> >  disable_varying_packing);
> > +  visitor.run(shader);
> > +  if (mode == ir_var_shader_out) {
> > + if (shader->Stage == MESA_SHADER_GEOMETRY) {
> > +/* For geometry shaders, outputs need to be lowered
> > before each
> > + * call to EmitVertex()
> > + */
> > +lower_packed_varyings_gs_splicer splicer(mem_ctx,
> > +
> >  _instructions);
> > +
> > +/* Add all the variables in first. */
> > +main_func_sig->body.head
> > ->insert_before(_variables);
> > +
> > +/* Now update all the EmitVertex instances */
> > +splicer.run(instructions);
> > + } else {
> > +/* For other shader types, outputs need to be lowered
> > at the end
> > + * of main()
> > + */
> > +main_func_sig->body.append_list(_variables);
> > +

[Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code

2016-01-06 Thread Timothy Arceri
Following patches will allow packing of varyings with explicit locations
via the component layout qualifier. Moving the rules here will enable
us to call an alternate path for packing tessellation stages with
explicit locations.
---
 V2: move the tessellation packing rules, allow TES output to be packed.

 src/glsl/link_varyings.cpp | 17 ++
 src/glsl/lower_packed_varyings.cpp | 63 +-
 2 files changed, 38 insertions(+), 42 deletions(-)

diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index be662bc..69e24e3 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -1640,18 +1640,7 @@ assign_varying_locations(struct gl_context *ctx,
   assert(!ctx->Extensions.EXT_transform_feedback);
}
 
-   /* Tessellation shaders treat inputs and outputs as shared memory and can
-* access inputs and outputs of other invocations.
-* Therefore, they can't be lowered to temps easily (and definitely not
-* efficiently).
-*/
-   bool disable_varying_packing =
-  ctx->Const.DisableVaryingPacking ||
-  (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
-  (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
-  (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
-
-   varying_matches matches(disable_varying_packing,
+   varying_matches matches(ctx->Const.DisableVaryingPacking,
producer ? producer->Stage : (gl_shader_stage)-1,
consumer ? consumer->Stage : (gl_shader_stage)-1);
hash_table *tfeedback_candidates
@@ -1864,13 +1853,13 @@ assign_varying_locations(struct gl_context *ctx,
 
if (producer) {
   lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
-0, producer, disable_varying_packing);
+0, producer, ctx->Const.DisableVaryingPacking);
}
 
if (consumer) {
   lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
 consumer_vertices, consumer,
-disable_varying_packing);
+ctx->Const.DisableVaryingPacking);
}
 
return true;
diff --git a/src/glsl/lower_packed_varyings.cpp 
b/src/glsl/lower_packed_varyings.cpp
index 2899846..4723c2b 100644
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -737,40 +737,47 @@ lower_packed_varyings(void *mem_ctx, unsigned 
locations_used,
   ir_variable_mode mode, unsigned gs_input_vertices,
   gl_shader *shader, bool disable_varying_packing)
 {
-   exec_list *instructions = shader->ir;
ir_function *main_func = shader->symbols->get_function("main");
exec_list void_parameters;
ir_function_signature *main_func_sig
   = main_func->matching_signature(NULL, _parameters, false);
-   exec_list new_instructions, new_variables;
-   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
- gs_input_vertices,
- _instructions,
- _variables,
- disable_varying_packing);
-   visitor.run(shader);
-   if (mode == ir_var_shader_out) {
-  if (shader->Stage == MESA_SHADER_GEOMETRY) {
- /* For geometry shaders, outputs need to be lowered before each call
-  * to EmitVertex()
-  */
- lower_packed_varyings_gs_splicer splicer(mem_ctx, _instructions);
-
- /* Add all the variables in first. */
- main_func_sig->body.head->insert_before(_variables);
 
- /* Now update all the EmitVertex instances */
- splicer.run(instructions);
+   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
+ (shader->Stage == MESA_SHADER_TESS_EVAL &&
+  mode == ir_var_shader_in))) {
+  exec_list *instructions = shader->ir;
+  exec_list new_instructions, new_variables;
+
+  lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
+gs_input_vertices,
+_instructions,
+_variables,
+disable_varying_packing);
+  visitor.run(shader);
+  if (mode == ir_var_shader_out) {
+ if (shader->Stage == MESA_SHADER_GEOMETRY) {
+/* For geometry shaders, outputs need to be lowered before each
+ * call to EmitVertex()
+ */
+lower_packed_varyings_gs_splicer splicer(mem_ctx,
+ _instructions);
+
+/* Add all the variables in first. */
+main_func_sig->body.head->insert_before(_variables);
+
+/* Now update all the EmitVertex instances */
+splicer.run(instructions);
+ } else {
+/* For 

Re: [Mesa-dev] [PATCH] draw: fix line stippling with unfilled prims

2016-01-06 Thread Brian Paul


Reviewed-by: Brian Paul 


On 01/06/2016 03:26 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

The unfilled stage was not filling in the prim header, and the line stage
then decided to reset the stipple counter or not based on the uninitialized
data. This causes some failures in conform linestipple test (albeit quite
randomly happening depending on environment).
So fill in the prim header in the unfilled stage - I am not entirely sure
if anybody really needs determinant after that stage, but there's at least
later stages (wide line for instance) which copy over the determinant as well.
---
  src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 56 +
  1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c 
b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 8e6435c..b9ded14 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -86,27 +86,33 @@ inject_front_face_info(struct draw_stage *stage,
  }


-static void point( struct draw_stage *stage,
-  struct vertex_header *v0 )
+static void point(struct draw_stage *stage,
+  struct prim_header *header,
+  struct vertex_header *v0)
  {
 struct prim_header tmp;
+   tmp.det = header->det;
+   tmp.flags = 0;
 tmp.v[0] = v0;
-   stage->next->point( stage->next,  );
+   stage->next->point(stage->next, );
  }

-static void line( struct draw_stage *stage,
- struct vertex_header *v0,
- struct vertex_header *v1 )
+static void line(struct draw_stage *stage,
+ struct prim_header *header,
+ struct vertex_header *v0,
+ struct vertex_header *v1)
  {
 struct prim_header tmp;
+   tmp.det = header->det;
+   tmp.flags = 0;
 tmp.v[0] = v0;
 tmp.v[1] = v1;
-   stage->next->line( stage->next,  );
+   stage->next->line(stage->next, );
  }


-static void points( struct draw_stage *stage,
-   struct prim_header *header )
+static void points(struct draw_stage *stage,
+   struct prim_header *header)
  {
 struct vertex_header *v0 = header->v[0];
 struct vertex_header *v1 = header->v[1];
@@ -114,27 +120,41 @@ static void points( struct draw_stage *stage,

 inject_front_face_info(stage, header);

-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, 
v0 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, 
v1 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, 
v2 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
+  point(stage, header, v0);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
+  point(stage, header, v1);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
+  point(stage, header, v2);
  }


-static void lines( struct draw_stage *stage,
-  struct prim_header *header )
+static void lines(struct draw_stage *stage,
+  struct prim_header *header)
  {
 struct vertex_header *v0 = header->v[0];
 struct vertex_header *v1 = header->v[1];
 struct vertex_header *v2 = header->v[2];

 if (header->flags & DRAW_PIPE_RESET_STIPPLE)
-  stage->next->reset_stipple_counter( stage->next );
+  /*
+   * XXX could revisit this. The only stage which cares is the line
+   * stipple stage. Could just emit correct reset flags here and not
+   * bother about all the calling through reset_stipple_counter
+   * stages. Though technically it is necessary if line stipple is
+   * handled by the driver, but this is not actually hooked up when
+   * using vbuf (vbuf stage reset_stipple_counter does nothing).
+   */
+  stage->next->reset_stipple_counter(stage->next);

 inject_front_face_info(stage, header);

-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, 
v2, v0 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, 
v0, v1 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, 
v1, v2 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
+  line(stage, header, v2, v0);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
+  line(stage, header, v0, v1);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
+  line(stage, header, v1, v2);
  }





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages

2016-01-06 Thread Timothy Arceri
On Tue, 2015-12-29 at 17:38 +1100, eocallag...@alterapraxis.com wrote:
> On 2015-12-29 16:00, Timothy Arceri wrote:
> > Tessellation shaders treat varyings as shared memory and
> > invocations
> > can access each others varyings therefore we can't use the existing
> > method to lower them.
> > 
> > This adds a check for these stages as following patches will
> > allow explicit locations to be lowered even when the driver and 
> > existing
> > tesselation checks ask for it to be disabled, we do this to enable 
> > support
> > for the component layout qualifier.
> 
> I find this a little hard to read and understand, could you brush it
> up 
> a bit
> please if that's ok?

Sure I'll try again :)


> 
> > ---
> >  src/glsl/lower_packed_varyings.cpp | 62 
> > +-
> >  1 file changed, 34 insertions(+), 28 deletions(-)
> > 
> > diff --git a/src/glsl/lower_packed_varyings.cpp
> > b/src/glsl/lower_packed_varyings.cpp
> > index 2899846..e4e9a35 100644
> > --- a/src/glsl/lower_packed_varyings.cpp
> > +++ b/src/glsl/lower_packed_varyings.cpp
> > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned
> > locations_used,
> >ir_variable_mode mode, unsigned 
> > gs_input_vertices,
> >gl_shader *shader, bool
> > disable_varying_packing)
> >  {
> > -   exec_list *instructions = shader->ir;
> > ir_function *main_func = shader->symbols->get_function("main");
> > exec_list void_parameters;
> > ir_function_signature *main_func_sig
> >= main_func->matching_signature(NULL, _parameters,
> > false);
> > -   exec_list new_instructions, new_variables;
> > -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, 
> > mode,
> > - gs_input_vertices,
> > - _instructions,
> > - _variables,
> > - disable_varying_packing);
> > -   visitor.run(shader);
> > -   if (mode == ir_var_shader_out) {
> > -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
> > - /* For geometry shaders, outputs need to be lowered
> > before 
> > each call
> > -  * to EmitVertex()
> > -  */
> > - lower_packed_varyings_gs_splicer splicer(mem_ctx, 
> > _instructions);
> > -
> > - /* Add all the variables in first. */
> > - main_func_sig->body.head->insert_before(_variables);
> > 
> > - /* Now update all the EmitVertex instances */
> > - splicer.run(instructions);
> > +   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
> > + shader->Stage == MESA_SHADER_TESS_EVAL)) {
> > +  exec_list *instructions = shader->ir;
> > +  exec_list new_instructions, new_variables;
> > +
> > +  lower_packed_varyings_visitor visitor(mem_ctx,
> > locations_used, 
> > mode,
> > +gs_input_vertices,
> > +_instructions,
> > +_variables,
> > +   
> >  disable_varying_packing);
> > +  visitor.run(shader);
> > +  if (mode == ir_var_shader_out) {
> > + if (shader->Stage == MESA_SHADER_GEOMETRY) {
> > +/* For geometry shaders, outputs need to be lowered
> > before 
> > each
> > + * call to EmitVertex()
> > + */
> > +lower_packed_varyings_gs_splicer splicer(mem_ctx,
> > + 
> > _instructions);
> > +
> > +/* Add all the variables in first. */
> > +main_func_sig->body.head
> > ->insert_before(_variables);
> > +
> > +/* Now update all the EmitVertex instances */
> > +splicer.run(instructions);
> > + } else {
> > +/* For other shader types, outputs need to be lowered
> > at 
> > the end
> > + * of main()
> > + */
> > +main_func_sig->body.append_list(_variables);
> > +main_func_sig->body.append_list(_instructions);
> > + }
> >} else {
> > - /* For other shader types, outputs need to be lowered at
> > the 
> > end of
> > -  * main()
> > -  */
> > - main_func_sig->body.append_list(_variables);
> > - main_func_sig->body.append_list(_instructions);
> > + /* Shader inputs need to be lowered at the beginning of 
> > main() */
> > + main_func_sig->body.head
> > ->insert_before(_instructions);
> > + main_func_sig->body.head->insert_before(_variables);
> >}
> > -   } else {
> > -  /* Shader inputs need to be lowered at the beginning of
> > main() 
> > */
> > -  main_func_sig->body.head->insert_before(_instructions);
> > -  main_func_sig->body.head->insert_before(_variables);
> > }
> >  }
> 
> ___
> mesa-dev 

Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages

2016-01-06 Thread Ilia Mirkin
On Wed, Jan 6, 2016 at 6:40 PM, Timothy Arceri
 wrote:
> On Wed, 2016-01-06 at 17:50 -0500, Ilia Mirkin wrote:
>> On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri
>>  wrote:
>> > Tessellation shaders treat varyings as shared memory and
>> > invocations
>> > can access each others varyings therefore we can't use the existing
>> > method to lower them.
>>
>> That's not strictly true... this is only true of tess control outputs
>> (which can be written by the current invocation, but also read in by
>> other invocations, effectively acting as a shared memory -- both true
>> of per-invocation outputs as well as per-patch outputs). Does that
>> information change this patch at all?
>
> I don't think so. The problem is that the current packing code works
> like this:
>
> - Change vars to be packed to temporaries, create new packed varyings.
> - Copy *all* values from the new packed input varying to the
>  temporaries at the start of main.
> - Copy *all* values from the temporaries to the new packed output vars
> at the end of main (or before emit for GS).
>
> As well as the invocations stomping on each other this results in 32
> (GL_MAX_PATCH_VERTICES?) copies for each TCS input as it just copies
> the full array.

Presumably it also does this for GS? Although it's a lot more common
for a single GS invocation to consume

>
> The current packing just doesn't work well for tessellation, its easier
> to just disbale it for tessellation and do it all using a different
> method rather than trying to mix and match.

I thought it already *was* disabled... but I think you still have to
have packing on TES outputs, because (a) your arguments against don't
apply and (b) it might feed into transform feedback, which i have
faint recollections must go through packing.

>
>
>>
>> >
>> > This adds a check for these stages as following patches will
>> > allow explicit locations to be lowered even when the driver and
>> > existing
>> > tesselation checks ask for it to be disabled, we do this to enable
>> > support
>> > for the component layout qualifier.
>> > ---
>> >  src/glsl/lower_packed_varyings.cpp | 62 +-
>> > 
>> >  1 file changed, 34 insertions(+), 28 deletions(-)
>> >
>> > diff --git a/src/glsl/lower_packed_varyings.cpp
>> > b/src/glsl/lower_packed_varyings.cpp
>> > index 2899846..e4e9a35 100644
>> > --- a/src/glsl/lower_packed_varyings.cpp
>> > +++ b/src/glsl/lower_packed_varyings.cpp
>> > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned
>> > locations_used,
>> >ir_variable_mode mode, unsigned
>> > gs_input_vertices,
>> >gl_shader *shader, bool
>> > disable_varying_packing)
>> >  {
>> > -   exec_list *instructions = shader->ir;
>> > ir_function *main_func = shader->symbols->get_function("main");
>> > exec_list void_parameters;
>> > ir_function_signature *main_func_sig
>> >= main_func->matching_signature(NULL, _parameters,
>> > false);
>> > -   exec_list new_instructions, new_variables;
>> > -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used,
>> > mode,
>> > - gs_input_vertices,
>> > - _instructions,
>> > - _variables,
>> > - disable_varying_packing);
>> > -   visitor.run(shader);
>> > -   if (mode == ir_var_shader_out) {
>> > -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
>> > - /* For geometry shaders, outputs need to be lowered
>> > before each call
>> > -  * to EmitVertex()
>> > -  */
>> > - lower_packed_varyings_gs_splicer splicer(mem_ctx,
>> > _instructions);
>> > -
>> > - /* Add all the variables in first. */
>> > - main_func_sig->body.head->insert_before(_variables);
>> >
>> > - /* Now update all the EmitVertex instances */
>> > - splicer.run(instructions);
>> > +   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
>> > + shader->Stage == MESA_SHADER_TESS_EVAL)) {
>> > +  exec_list *instructions = shader->ir;
>> > +  exec_list new_instructions, new_variables;
>> > +
>> > +  lower_packed_varyings_visitor visitor(mem_ctx,
>> > locations_used, mode,
>> > +gs_input_vertices,
>> > +_instructions,
>> > +_variables,
>> > +
>> >  disable_varying_packing);
>> > +  visitor.run(shader);
>> > +  if (mode == ir_var_shader_out) {
>> > + if (shader->Stage == MESA_SHADER_GEOMETRY) {
>> > +/* For geometry shaders, outputs need to be lowered
>> > before each
>> > + * call to EmitVertex()
>> > + */
>> > +lower_packed_varyings_gs_splicer splicer(mem_ctx,
>> > +
>> >  _instructions);
>> > +
>> > +

Re: [Mesa-dev] [PATCH] glsl: replace null check with assert

2016-01-06 Thread Timothy Arceri
Ping.

Just looking over some older patches of mine. I made this change both
as a clean up but also because the constant expression evaluation code
is some of the most expensive in the compiler and this was right in the
hot path for some of the AoA tests until I improved some other
optimisation to avoid calling it so much.

It's not going to make much of a difference but with enhanced layouts
also now making use of the constant expression path it would still be
good to land this.

On Tue, 2015-07-14 at 23:30 +1000, Timothy Arceri wrote:
> This was added in 54f583a20 since then error handling has improved.
> 
> The test this was added to fix now fails earlier since 01822706ec
> ---
>  src/glsl/ir_constant_expression.cpp | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/src/glsl/ir_constant_expression.cpp
> b/src/glsl/ir_constant_expression.cpp
> index 171b8e9..5732867 100644
> --- a/src/glsl/ir_constant_expression.cpp
> +++ b/src/glsl/ir_constant_expression.cpp
> @@ -1857,9 +1857,7 @@ ir_swizzle::constant_expression_value(struct
> hash_table *variable_context)
>  ir_constant *
>  ir_dereference_variable::constant_expression_value(struct hash_table
> *variable_context)
>  {
> -   /* This may occur during compile and var->type is
> glsl_type::error_type */
> -   if (!var)
> -  return NULL;
> +   assert(var);
>  
> /* Give priority to the context hashtable, if it exists */
> if (variable_context) {
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations

2016-01-06 Thread Ilia Mirkin
On Wed, Jan 6, 2016 at 4:32 PM, Timothy Arceri
 wrote:
> On Wed, 2016-01-06 at 09:46 -0500, Ilia Mirkin wrote:
>> On Wed, Jan 6, 2016 at 4:32 AM, Timothy Arceri
>>  wrote:
>> > Previously each member was being counted as using a single slot,
>> > count_attribute_slots() fixes the count for array and struct
>> > members.
>> >
>> > Also don't assign a negitive to the unsigned expl_location
>> > variable.
>> > ---
>> >
>> >  Fixes these new piglit tests:
>> >http://patchwork.freedesktop.org/patch/69531/
>> >
>> >  src/glsl/ast_to_hir.cpp | 9 +
>> >  1 file changed, 5 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
>> > index 0197cdc..50d5e22 100644
>> > --- a/src/glsl/ast_to_hir.cpp
>> > +++ b/src/glsl/ast_to_hir.cpp
>> > @@ -6408,12 +6408,13 @@
>> > ast_process_struct_or_iface_block_members(exec_list *instructions,
>> >  if (process_qualifier_constant(state, ,
>> > "location",
>> > qual->location,
>> > _location)) {
>> > fields[i].location = VARYING_SLOT_VAR0 +
>> > qual_location;
>> > -   expl_location = fields[i].location + 1;
>> > +   expl_location = fields[i].location +
>> > +  fields[i].type->count_attribute_slots(false);
>> >  }
>> >   } else {
>> >  if (layout && layout->flags.q.explicit_location) {
>> > fields[i].location = expl_location;
>> > -   expl_location = expl_location + 1;
>> > +   expl_location += fields[i].type
>> > ->count_attribute_slots(false);
>> >  } else {
>> > fields[i].location = -1;
>> >  }
>> > @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list
>> > *instructions,
>> >
>> > state->struct_specifier_depth++;
>> >
>> > -   unsigned expl_location = -1;
>> > +   unsigned expl_location = 0;
>> > if (layout && layout->flags.q.explicit_location) {
>> >if (!process_qualifier_constant(state, , "location",
>> >layout->location,
>> > _location)) {
>> > @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list
>> > *instructions,
>> >return NULL;
>> > }
>> >
>> > -   unsigned expl_location = -1;
>> > +   unsigned expl_location = 0;
>>
>> There are a number of places that check for location != -1 as a
>> sanity
>> check... won't this defeat that?
>
> No because we only use expl_location when the explicit location flag is
> set and if there is an error we don't copy the value from
> expl_location.
>
> I believe I initialised it to stop gcc complaining although I just
> tried removing this and it no longer complains so I guess I can just
> remove the initialisation altogether.
>
> Are you happy with the change otherwise?

Oh I see what's going on now. I took a much more careful look at the
surrounding logic and I think switching expl_location to be init to 0
is fine -- if it's set on the layout it'll be initialized, otherwise
it will never be used. Basically "expl_location" is "what is the
current location that we should assign the next variable to when
there's no explicit location listed on the var, but there is one on
the block".

So actually as originally sent, your patch is

Reviewed-by: Ilia Mirkin 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] nir/opcodes: Fix the folding expression for usub_borrow

2016-01-06 Thread Jason Ekstrand
---
 src/glsl/nir/nir_opcodes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 1cd01a4..4bc6d16 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -366,7 +366,7 @@ binop_convert("uadd_carry", tbool, tuint, commutative, 
"src0 + src1 < src0")
 # returns a boolean representing the borrow resulting from the subtraction
 # of the two unsigned arguments.
 
-binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0")
+binop_convert("usub_borrow", tbool, tuint, "", "src0 < src1")
 
 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
 binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages

2016-01-06 Thread Ilia Mirkin
On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri
 wrote:
> Tessellation shaders treat varyings as shared memory and invocations
> can access each others varyings therefore we can't use the existing
> method to lower them.

That's not strictly true... this is only true of tess control outputs
(which can be written by the current invocation, but also read in by
other invocations, effectively acting as a shared memory -- both true
of per-invocation outputs as well as per-patch outputs). Does that
information change this patch at all?

>
> This adds a check for these stages as following patches will
> allow explicit locations to be lowered even when the driver and existing
> tesselation checks ask for it to be disabled, we do this to enable support
> for the component layout qualifier.
> ---
>  src/glsl/lower_packed_varyings.cpp | 62 
> +-
>  1 file changed, 34 insertions(+), 28 deletions(-)
>
> diff --git a/src/glsl/lower_packed_varyings.cpp 
> b/src/glsl/lower_packed_varyings.cpp
> index 2899846..e4e9a35 100644
> --- a/src/glsl/lower_packed_varyings.cpp
> +++ b/src/glsl/lower_packed_varyings.cpp
> @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx, unsigned 
> locations_used,
>ir_variable_mode mode, unsigned gs_input_vertices,
>gl_shader *shader, bool disable_varying_packing)
>  {
> -   exec_list *instructions = shader->ir;
> ir_function *main_func = shader->symbols->get_function("main");
> exec_list void_parameters;
> ir_function_signature *main_func_sig
>= main_func->matching_signature(NULL, _parameters, false);
> -   exec_list new_instructions, new_variables;
> -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
> - gs_input_vertices,
> - _instructions,
> - _variables,
> - disable_varying_packing);
> -   visitor.run(shader);
> -   if (mode == ir_var_shader_out) {
> -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
> - /* For geometry shaders, outputs need to be lowered before each call
> -  * to EmitVertex()
> -  */
> - lower_packed_varyings_gs_splicer splicer(mem_ctx, 
> _instructions);
> -
> - /* Add all the variables in first. */
> - main_func_sig->body.head->insert_before(_variables);
>
> - /* Now update all the EmitVertex instances */
> - splicer.run(instructions);
> +   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
> + shader->Stage == MESA_SHADER_TESS_EVAL)) {
> +  exec_list *instructions = shader->ir;
> +  exec_list new_instructions, new_variables;
> +
> +  lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
> +gs_input_vertices,
> +_instructions,
> +_variables,
> +disable_varying_packing);
> +  visitor.run(shader);
> +  if (mode == ir_var_shader_out) {
> + if (shader->Stage == MESA_SHADER_GEOMETRY) {
> +/* For geometry shaders, outputs need to be lowered before each
> + * call to EmitVertex()
> + */
> +lower_packed_varyings_gs_splicer splicer(mem_ctx,
> + _instructions);
> +
> +/* Add all the variables in first. */
> +main_func_sig->body.head->insert_before(_variables);
> +
> +/* Now update all the EmitVertex instances */
> +splicer.run(instructions);
> + } else {
> +/* For other shader types, outputs need to be lowered at the end
> + * of main()
> + */
> +main_func_sig->body.append_list(_variables);
> +main_func_sig->body.append_list(_instructions);
> + }
>} else {
> - /* For other shader types, outputs need to be lowered at the end of
> -  * main()
> -  */
> - main_func_sig->body.append_list(_variables);
> - main_func_sig->body.append_list(_instructions);
> + /* Shader inputs need to be lowered at the beginning of main() */
> + main_func_sig->body.head->insert_before(_instructions);
> + main_func_sig->body.head->insert_before(_variables);
>}
> -   } else {
> -  /* Shader inputs need to be lowered at the beginning of main() */
> -  main_func_sig->body.head->insert_before(_instructions);
> -  main_func_sig->body.head->insert_before(_variables);
> }
>  }
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 16/28] glsl: don't pack tessellation stages like we do other stages

2016-01-06 Thread Timothy Arceri
On Wed, 2016-01-06 at 18:45 -0500, Ilia Mirkin wrote:
> On Wed, Jan 6, 2016 at 6:40 PM, Timothy Arceri
>  wrote:
> > On Wed, 2016-01-06 at 17:50 -0500, Ilia Mirkin wrote:
> > > On Tue, Dec 29, 2015 at 12:00 AM, Timothy Arceri
> > >  wrote:
> > > > Tessellation shaders treat varyings as shared memory and
> > > > invocations
> > > > can access each others varyings therefore we can't use the
> > > > existing
> > > > method to lower them.
> > > 
> > > That's not strictly true... this is only true of tess control
> > > outputs
> > > (which can be written by the current invocation, but also read in
> > > by
> > > other invocations, effectively acting as a shared memory -- both
> > > true
> > > of per-invocation outputs as well as per-patch outputs). Does
> > > that
> > > information change this patch at all?
> > 
> > I don't think so. The problem is that the current packing code
> > works
> > like this:
> > 
> > - Change vars to be packed to temporaries, create new packed
> > varyings.
> > - Copy *all* values from the new packed input varying to the
> >  temporaries at the start of main.
> > - Copy *all* values from the temporaries to the new packed output
> > vars
> > at the end of main (or before emit for GS).
> > 
> > As well as the invocations stomping on each other this results in
> > 32
> > (GL_MAX_PATCH_VERTICES?) copies for each TCS input as it just
> > copies
> > the full array.
> 
> Presumably it also does this for GS? Although it's a lot more common
> for a single GS invocation to consume

Right. I thought about changing GS to do it different also but until
the backend can clean this up better it would likely make things even
worse.

> 
> > 
> > The current packing just doesn't work well for tessellation, its
> > easier
> > to just disbale it for tessellation and do it all using a different
> > method rather than trying to mix and match.
> 
> I thought it already *was* disabled... but I think you still have to
> have packing on TES outputs, because (a) your arguments against don't
> apply and (b) it might feed into transform feedback, which i have
> faint recollections must go through packing.

Yeah its a bit of a mess. Gallium tries to always disable packing
unless transform feedback is enabled. Are there any Gallium drivers
where its not enabled??

Then there is code that disables it for tessellation (except TES
outputs), as far as I understand it yes varyings for transform feedback
must go through packing. In which case I do need to allow these to be
lowered for TES outputs thanks for point it out, will change this.


> 
> > 
> > 
> > > 
> > > > 
> > > > This adds a check for these stages as following patches will
> > > > allow explicit locations to be lowered even when the driver and
> > > > existing
> > > > tesselation checks ask for it to be disabled, we do this to
> > > > enable
> > > > support
> > > > for the component layout qualifier.
> > > > ---
> > > >  src/glsl/lower_packed_varyings.cpp | 62 +-
> > > > 
> > > > 
> > > >  1 file changed, 34 insertions(+), 28 deletions(-)
> > > > 
> > > > diff --git a/src/glsl/lower_packed_varyings.cpp
> > > > b/src/glsl/lower_packed_varyings.cpp
> > > > index 2899846..e4e9a35 100644
> > > > --- a/src/glsl/lower_packed_varyings.cpp
> > > > +++ b/src/glsl/lower_packed_varyings.cpp
> > > > @@ -737,40 +737,46 @@ lower_packed_varyings(void *mem_ctx,
> > > > unsigned
> > > > locations_used,
> > > >ir_variable_mode mode, unsigned
> > > > gs_input_vertices,
> > > >gl_shader *shader, bool
> > > > disable_varying_packing)
> > > >  {
> > > > -   exec_list *instructions = shader->ir;
> > > > ir_function *main_func = shader->symbols
> > > > ->get_function("main");
> > > > exec_list void_parameters;
> > > > ir_function_signature *main_func_sig
> > > >= main_func->matching_signature(NULL, _parameters,
> > > > false);
> > > > -   exec_list new_instructions, new_variables;
> > > > -   lower_packed_varyings_visitor visitor(mem_ctx,
> > > > locations_used,
> > > > mode,
> > > > - gs_input_vertices,
> > > > - _instructions,
> > > > - _variables,
> > > > -
> > > >  disable_varying_packing);
> > > > -   visitor.run(shader);
> > > > -   if (mode == ir_var_shader_out) {
> > > > -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
> > > > - /* For geometry shaders, outputs need to be lowered
> > > > before each call
> > > > -  * to EmitVertex()
> > > > -  */
> > > > - lower_packed_varyings_gs_splicer splicer(mem_ctx,
> > > > _instructions);
> > > > -
> > > > - /* Add all the variables in first. */
> > > > - main_func_sig->body.head
> > > > ->insert_before(_variables);
> > > > 
> > > > - /* Now update all the 

Re: [Mesa-dev] [PATCH 3/3] llvmpipe: add sse code for fixed position calculation

2016-01-06 Thread Roland Scheidegger
Am 04.01.2016 um 20:38 schrieb Jose Fonseca:
> On 02/01/16 20:39, srol...@vmware.com wrote:
>> From: Roland Scheidegger 
>>
>> This is quite a few less instructions, albeit still do the 2 64bit muls
>> with scalar c code (they'd need way more shuffles, plus fixup for the
>> signed
>> mul so it totally doesn't seem worth it - x86 can do 32x32->64bit signed
>> scalar muls natively just fine after all (even on 32bit).
>>
>> (This still doesn't have a measurable performance impact in reality,
>> although
>> profiler seems to say time spent in setup indeed has gone down by 10%
>> or so
>> overall.)
>> ---
>>   src/gallium/drivers/llvmpipe/lp_setup_tri.c | 58
>> +
>>   1 file changed, 50 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
>> b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
>> index cb1d715..fefd1c1 100644
>> --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
>> +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
>> @@ -65,11 +65,11 @@ fixed_to_float(int a)
>>   struct fixed_position {
>>  int32_t x[4];
>>  int32_t y[4];
>> -   int64_t area;
>>  int32_t dx01;
>>  int32_t dy01;
>>  int32_t dx20;
>>  int32_t dy20;
>> +   int64_t area;
>>   };
>>
>>
>> @@ -866,29 +866,71 @@ static void retry_triangle_ccw( struct
>> lp_setup_context *setup,
>>
>>   /**
>>* Calculate fixed position data for a triangle
>> + * It is unfortunate we need to do that here (as we need area
>> + * calculated in fixed point), as there's quite some code duplication
>> + * to what is done in the jit setup prog.
>>*/
>>   static inline void
>> -calc_fixed_position( struct lp_setup_context *setup,
>> - struct fixed_position* position,
>> - const float (*v0)[4],
>> - const float (*v1)[4],
>> - const float (*v2)[4])
>> +calc_fixed_position(struct lp_setup_context *setup,
>> +struct fixed_position* position,
>> +const float (*v0)[4],
>> +const float (*v1)[4],
>> +const float (*v2)[4])
>>   {
>> +   /*
>> +* The rounding may not be quite the same with PIPE_ARCH_SSE
>> +* (util_iround right now only does nearest/even on x87,
>> +* otherwise nearest/away-from-zero).
>> +* Both should be acceptable, I think.
>> +*/
>> +#if defined(PIPE_ARCH_SSE)
>> +   __m128d v0r, v1r, v2r;
>> +   __m128 vxy0xy2, vxy1xy0;
>> +   __m128i vxy0xy2i, vxy1xy0i;
>> +   __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
>> +   __m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
>> +   __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
>> +   v0r = _mm_load_sd((const double *)v0[0]);
>> +   v1r = _mm_load_sd((const double *)v1[0]);
>> +   v2r = _mm_load_sd((const double *)v2[0]);
>> +   vxy0xy2 = (__m128)_mm_unpacklo_pd(v0r, v2r);
>> +   vxy1xy0 = (__m128)_mm_unpacklo_pd(v1r, v0r);
>> +   vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
>> +   vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
>> +   vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
>> +   vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one);
>> +   vxy0xy2i = _mm_cvtps_epi32(vxy0xy2);
>> +   vxy1xy0i = _mm_cvtps_epi32(vxy1xy0);
>> +   dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i);
>> +   _mm_store_si128((__m128i *)>dx01, dxdy0120);
>> +   /*
>> +* For the mul, would need some more shuffles, plus emulation
>> +* for the signed mul (without sse41), so don't bother.
>> +*/
>> +   x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0));
>> +   x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0));
>> +   x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0);
>> +   y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0);
>> +   _mm_store_si128((__m128i *)>x[0], x0120);
>> +   _mm_store_si128((__m128i *)>y[0], y0120);
>> +
>> +#else
>>  position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
>>  position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
>>  position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
>> -   position->x[3] = 0;
>> +   position->x[3] = 0; // should be unused
>>
>>  position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
>>  position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
>>  position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
>> -   position->y[3] = 0;
>> +   position->y[3] = 0; // should be unused
>>
>>  position->dx01 = position->x[0] - position->x[1];
>>  position->dy01 = position->y[0] - position->y[1];
>>
>>  position->dx20 = position->x[2] - position->x[0];
>>  position->dy20 = position->y[2] - position->y[0];
>> +#endif
>>
>>  position->area = IMUL64(position->dx01, position->dy20) -
>>IMUL64(position->dx20, position->dy01);
>>
> 
> LGTM too.
> 
> Reviewed-by: Jose Fonseca 

Hmm actually I suppose I didn't do enough testing with that. This fails
one piglit (completely 

Re: [Mesa-dev] [PATCH 1/4] vbo: create a new draw function interface for indirect draws

2016-01-06 Thread Ian Romanick
On 01/04/2016 07:22 PM, Ilia Mirkin wrote:
> Sure, no problem. Do you think you'll have time to look at it in the
> next day or two though?

Yes.  I'll review it on Thursday.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code

2016-01-06 Thread Timothy Arceri
Whoops, the subject line should be [PATCH V2 16/28]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code

2016-01-06 Thread Timothy Arceri
On Wed, 2016-01-06 at 20:19 -0500, Ilia Mirkin wrote:
> On Wed, Jan 6, 2016 at 8:00 PM, Timothy Arceri
>  wrote:
> > Following patches will allow packing of varyings with explicit
> > locations
> > via the component layout qualifier. Moving the rules here will
> > enable
> > us to call an alternate path for packing tessellation stages with
> > explicit locations.
> > ---
> >  V2: move the tessellation packing rules, allow TES output to be
> > packed.
> > 
> >  src/glsl/link_varyings.cpp | 17 ++
> >  src/glsl/lower_packed_varyings.cpp | 63 +-
> > 
> >  2 files changed, 38 insertions(+), 42 deletions(-)
> > 
> > diff --git a/src/glsl/link_varyings.cpp
> > b/src/glsl/link_varyings.cpp
> > index be662bc..69e24e3 100644
> > --- a/src/glsl/link_varyings.cpp
> > +++ b/src/glsl/link_varyings.cpp
> > @@ -1640,18 +1640,7 @@ assign_varying_locations(struct gl_context
> > *ctx,
> >assert(!ctx->Extensions.EXT_transform_feedback);
> > }
> > 
> > -   /* Tessellation shaders treat inputs and outputs as shared
> > memory and can
> > -* access inputs and outputs of other invocations.
> > -* Therefore, they can't be lowered to temps easily (and
> > definitely not
> > -* efficiently).
> > -*/
> > -   bool disable_varying_packing =
> > -  ctx->Const.DisableVaryingPacking ||
> > -  (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
> > -  (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
> > -  (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
> > -
> > -   varying_matches matches(disable_varying_packing,
> > +   varying_matches matches(ctx->Const.DisableVaryingPacking,
> > producer ? producer->Stage :
> > (gl_shader_stage)-1,
> > consumer ? consumer->Stage :
> > (gl_shader_stage)-1);
> > hash_table *tfeedback_candidates
> > @@ -1864,13 +1853,13 @@ assign_varying_locations(struct gl_context
> > *ctx,
> > 
> > if (producer) {
> >lower_packed_varyings(mem_ctx, slots_used,
> > ir_var_shader_out,
> > -0, producer, disable_varying_packing);
> > +0, producer, ctx
> > ->Const.DisableVaryingPacking);
> > }
> > 
> > if (consumer) {
> >lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
> >  consumer_vertices, consumer,
> > -disable_varying_packing);
> > +ctx->Const.DisableVaryingPacking);
> > }
> > 
> > return true;
> > diff --git a/src/glsl/lower_packed_varyings.cpp
> > b/src/glsl/lower_packed_varyings.cpp
> > index 2899846..4723c2b 100644
> > --- a/src/glsl/lower_packed_varyings.cpp
> > +++ b/src/glsl/lower_packed_varyings.cpp
> > @@ -737,40 +737,47 @@ lower_packed_varyings(void *mem_ctx, unsigned
> > locations_used,
> >ir_variable_mode mode, unsigned
> > gs_input_vertices,
> >gl_shader *shader, bool
> > disable_varying_packing)
> >  {
> > -   exec_list *instructions = shader->ir;
> > ir_function *main_func = shader->symbols->get_function("main");
> > exec_list void_parameters;
> > ir_function_signature *main_func_sig
> >= main_func->matching_signature(NULL, _parameters,
> > false);
> > -   exec_list new_instructions, new_variables;
> > -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used,
> > mode,
> > - gs_input_vertices,
> > - _instructions,
> > - _variables,
> > - disable_varying_packing);
> > -   visitor.run(shader);
> > -   if (mode == ir_var_shader_out) {
> > -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
> > - /* For geometry shaders, outputs need to be lowered
> > before each call
> > -  * to EmitVertex()
> > -  */
> > - lower_packed_varyings_gs_splicer splicer(mem_ctx,
> > _instructions);
> > -
> > - /* Add all the variables in first. */
> > - main_func_sig->body.head->insert_before(_variables);
> > 
> > - /* Now update all the EmitVertex instances */
> > - splicer.run(instructions);
> > +   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
> > + (shader->Stage == MESA_SHADER_TESS_EVAL &&
> > +  mode == ir_var_shader_in))) {
> 
> To match what was being done before (and IMHO much more readable, you
> might do something like
> 
> if (shader->Stage == TESS_CTRL || (shader->stage == TESS_EVAL && mode
> == in))
>   disable_varying_packing = true;
> 
> That would be equivalent to what was being done before right? Or if
> you don't want the disable_varying_packing thing to be set to true in
> that case, you could just return... would still be easier to read and
> avoid the extra indent.
> 
>   -ilia

The indenting is to reduce 

Re: [Mesa-dev] [PATCH 09/23] radeonsi: use EXP_NULL for pixel shaders without outputs

2016-01-06 Thread Michel Dänzer
On 06.01.2016 21:41, Marek Olšák wrote:
> From: Marek Olšák 
> 
> This never happens currently.

So this change is untested? If so, how confident are you that this won't
break if it ever actually gets hit? :)


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: quieten compiler warning about out-of-bounds access

2016-01-06 Thread Ilia Mirkin
On Tue, Jan 5, 2016 at 8:30 PM, Jason Ekstrand  wrote:
>
> On Jan 4, 2016 8:30 PM, "Ilia Mirkin"  wrote:
>>
>> gcc 4.9.3 shows the following error:
>>
>> brw_vue_map.c:260:20: warning: array subscript is above array bounds
>> [-Warray-bounds]
>> return brw_names[slot - VARYING_SLOT_MAX];
>>
>> This is because BRW_VARYING_SLOT_COUNT is a valid value for the enum
>> type. Adding an assert will generate no additional code but will teach
>> the compiler to not complain.
>
> What of you build in release mode?

You get the warning :)

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 16/35] glsl: move packing rules for tessellation stages into the packing code

2016-01-06 Thread Ilia Mirkin
On Wed, Jan 6, 2016 at 8:00 PM, Timothy Arceri
 wrote:
> Following patches will allow packing of varyings with explicit locations
> via the component layout qualifier. Moving the rules here will enable
> us to call an alternate path for packing tessellation stages with
> explicit locations.
> ---
>  V2: move the tessellation packing rules, allow TES output to be packed.
>
>  src/glsl/link_varyings.cpp | 17 ++
>  src/glsl/lower_packed_varyings.cpp | 63 
> +-
>  2 files changed, 38 insertions(+), 42 deletions(-)
>
> diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
> index be662bc..69e24e3 100644
> --- a/src/glsl/link_varyings.cpp
> +++ b/src/glsl/link_varyings.cpp
> @@ -1640,18 +1640,7 @@ assign_varying_locations(struct gl_context *ctx,
>assert(!ctx->Extensions.EXT_transform_feedback);
> }
>
> -   /* Tessellation shaders treat inputs and outputs as shared memory and can
> -* access inputs and outputs of other invocations.
> -* Therefore, they can't be lowered to temps easily (and definitely not
> -* efficiently).
> -*/
> -   bool disable_varying_packing =
> -  ctx->Const.DisableVaryingPacking ||
> -  (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
> -  (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
> -  (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
> -
> -   varying_matches matches(disable_varying_packing,
> +   varying_matches matches(ctx->Const.DisableVaryingPacking,
> producer ? producer->Stage : (gl_shader_stage)-1,
> consumer ? consumer->Stage : (gl_shader_stage)-1);
> hash_table *tfeedback_candidates
> @@ -1864,13 +1853,13 @@ assign_varying_locations(struct gl_context *ctx,
>
> if (producer) {
>lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
> -0, producer, disable_varying_packing);
> +0, producer, ctx->Const.DisableVaryingPacking);
> }
>
> if (consumer) {
>lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
>  consumer_vertices, consumer,
> -disable_varying_packing);
> +ctx->Const.DisableVaryingPacking);
> }
>
> return true;
> diff --git a/src/glsl/lower_packed_varyings.cpp 
> b/src/glsl/lower_packed_varyings.cpp
> index 2899846..4723c2b 100644
> --- a/src/glsl/lower_packed_varyings.cpp
> +++ b/src/glsl/lower_packed_varyings.cpp
> @@ -737,40 +737,47 @@ lower_packed_varyings(void *mem_ctx, unsigned 
> locations_used,
>ir_variable_mode mode, unsigned gs_input_vertices,
>gl_shader *shader, bool disable_varying_packing)
>  {
> -   exec_list *instructions = shader->ir;
> ir_function *main_func = shader->symbols->get_function("main");
> exec_list void_parameters;
> ir_function_signature *main_func_sig
>= main_func->matching_signature(NULL, _parameters, false);
> -   exec_list new_instructions, new_variables;
> -   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
> - gs_input_vertices,
> - _instructions,
> - _variables,
> - disable_varying_packing);
> -   visitor.run(shader);
> -   if (mode == ir_var_shader_out) {
> -  if (shader->Stage == MESA_SHADER_GEOMETRY) {
> - /* For geometry shaders, outputs need to be lowered before each call
> -  * to EmitVertex()
> -  */
> - lower_packed_varyings_gs_splicer splicer(mem_ctx, 
> _instructions);
> -
> - /* Add all the variables in first. */
> - main_func_sig->body.head->insert_before(_variables);
>
> - /* Now update all the EmitVertex instances */
> - splicer.run(instructions);
> +   if (!(shader->Stage == MESA_SHADER_TESS_CTRL ||
> + (shader->Stage == MESA_SHADER_TESS_EVAL &&
> +  mode == ir_var_shader_in))) {

To match what was being done before (and IMHO much more readable, you
might do something like

if (shader->Stage == TESS_CTRL || (shader->stage == TESS_EVAL && mode == in))
  disable_varying_packing = true;

That would be equivalent to what was being done before right? Or if
you don't want the disable_varying_packing thing to be set to true in
that case, you could just return... would still be easier to read and
avoid the extra indent.

  -ilia

> +  exec_list *instructions = shader->ir;
> +  exec_list new_instructions, new_variables;
> +
> +  lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
> +gs_input_vertices,
> +_instructions,
> +

[Mesa-dev] [PATCH] i965: Explicitly write the "TR DS Cache Disable" bit at TCS EOT.

2016-01-06 Thread Kenneth Graunke
Bit 0 of the Patch Header is "TR DS Cache Disable".  Setting that bit
disables the DS Cache for tessellator-output topologies resulting in
stitch-transition regions (but leaves it enabled for other cases).

We probably shouldn't leave this to chance - the URB could contain
garbage - which could result in the cache randomly being turned on
or off.

This patch makes the final EOT write 0 to the first DWord (which
only contains this one bit).  This ensures the cache is always on.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 5 -
 src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp   | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c6a52c5..20e6305 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -274,9 +274,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
case SHADER_OPCODE_POW:
+   case TCS_OPCODE_THREAD_END:
   return 2;
case VS_OPCODE_URB_WRITE:
-   case TCS_OPCODE_THREAD_END:
   return 1;
case VS_OPCODE_PULL_CONSTANT_LOAD:
   return 2;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 6b03a1c..7ae1059 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -981,15 +981,18 @@ generate_tcs_thread_end(struct brw_codegen *p, 
vec4_instruction *inst)
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, header, brw_imm_ud(0));
+   brw_MOV(p, get_element_ud(header, 5), brw_imm_ud(WRITEMASK_X << 8));
brw_MOV(p, get_element_ud(header, 0),
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
+   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), brw_imm_ud(0u));
brw_pop_insn_state(p);
 
brw_urb_WRITE(p,
  brw_null_reg(), /* dest */
  inst->base_mrf, /* starting mrf reg nr */
  header,
- BRW_URB_WRITE_EOT | inst->urb_write_flags,
+ BRW_URB_WRITE_EOT | BRW_URB_WRITE_OWORD |
+ BRW_URB_WRITE_USE_CHANNEL_MASKS,
  inst->mlen,
  0,  /* response len */
  0,  /* urb destination offset */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index fb6ca8e..a65a633 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -205,7 +205,7 @@ vec4_tcs_visitor::emit_thread_end()
 
inst = emit(TCS_OPCODE_THREAD_END);
inst->base_mrf = 14;
-   inst->mlen = 1;
+   inst->mlen = 2;
 }
 
 
-- 
2.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] draw: fix line stippling with unfilled prims

2016-01-06 Thread sroland
From: Roland Scheidegger 

The unfilled stage was not filling in the prim header, and the line stage
then decided to reset the stipple counter or not based on the uninitialized
data. This causes some failures in conform linestipple test (albeit quite
randomly happening depending on environment).
So fill in the prim header in the unfilled stage - I am not entirely sure
if anybody really needs determinant after that stage, but there's at least
later stages (wide line for instance) which copy over the determinant as well.
---
 src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 56 +
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c 
b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 8e6435c..b9ded14 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -86,27 +86,33 @@ inject_front_face_info(struct draw_stage *stage,
 }
 

-static void point( struct draw_stage *stage,
-  struct vertex_header *v0 )
+static void point(struct draw_stage *stage,
+  struct prim_header *header,
+  struct vertex_header *v0)
 {
struct prim_header tmp;
+   tmp.det = header->det;
+   tmp.flags = 0;
tmp.v[0] = v0;
-   stage->next->point( stage->next,  );
+   stage->next->point(stage->next, );
 }
 
-static void line( struct draw_stage *stage,
- struct vertex_header *v0,
- struct vertex_header *v1 )
+static void line(struct draw_stage *stage,
+ struct prim_header *header,
+ struct vertex_header *v0,
+ struct vertex_header *v1)
 {
struct prim_header tmp;
+   tmp.det = header->det;
+   tmp.flags = 0;
tmp.v[0] = v0;
tmp.v[1] = v1;
-   stage->next->line( stage->next,  );
+   stage->next->line(stage->next, );
 }
 
 
-static void points( struct draw_stage *stage,
-   struct prim_header *header )
+static void points(struct draw_stage *stage,
+   struct prim_header *header)
 {
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
@@ -114,27 +120,41 @@ static void points( struct draw_stage *stage,
 
inject_front_face_info(stage, header);
 
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, 
v0 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, 
v1 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, 
v2 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
+  point(stage, header, v0);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
+  point(stage, header, v1);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
+  point(stage, header, v2);
 }
 
 
-static void lines( struct draw_stage *stage,
-  struct prim_header *header )
+static void lines(struct draw_stage *stage,
+  struct prim_header *header)
 {
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
struct vertex_header *v2 = header->v[2];
 
if (header->flags & DRAW_PIPE_RESET_STIPPLE)
-  stage->next->reset_stipple_counter( stage->next );
+  /*
+   * XXX could revisit this. The only stage which cares is the line
+   * stipple stage. Could just emit correct reset flags here and not
+   * bother about all the calling through reset_stipple_counter
+   * stages. Though technically it is necessary if line stipple is
+   * handled by the driver, but this is not actually hooked up when
+   * using vbuf (vbuf stage reset_stipple_counter does nothing).
+   */
+  stage->next->reset_stipple_counter(stage->next);
 
inject_front_face_info(stage, header);
 
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, 
v2, v0 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, 
v0, v1 );
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, 
v1, v2 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
+  line(stage, header, v2, v0);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
+  line(stage, header, v0, v1);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
+  line(stage, header, v1, v2);
 }
 
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: don't touch degree on physreg RIG nodes

2016-01-06 Thread Ilia Mirkin
These nodes don't go through reduction, so we shouldn't be increasing
their degrees.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91895
Signed-off-by: Ilia Mirkin 
Cc: "11.0 11.1" 
---

I would like to see a *bunch* of testing on this before merging it... RA-land
is far from my expertise. However it does fix the shaders in the original bug
and doesn't regress the few additional things that I tried.

 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index cd8c42c..f1ffcba 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1129,9 +1129,11 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
 void
 GCRA::RIG_Node::addInterference(RIG_Node *node)
 {
-   this->degree += relDegree[node->colors][colors];
-   node->degree += relDegree[colors][node->colors];
-
+   // don't add degree for physregs since they won't go through simplify()
+   if (this->reg < 0)
+  this->degree += relDegree[node->colors][colors];
+   if (node->reg < 0)
+  node->degree += relDegree[colors][node->colors];
this->attach(node, Graph::Edge::CROSS);
 }
 
-- 
2.4.10

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/28] glsl: validate and store component layout qualifier in GLSL IR

2016-01-06 Thread Anuj Phogat
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceri
 wrote:
> We make use of the existing IR field location_frac used for tracking
> component locations.
> ---
>  src/glsl/ast_to_hir.cpp | 38 ++
>  src/glsl/ir.h   |  5 +
>  2 files changed, 43 insertions(+)
>
> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> index 1091c02..bb35d72 100644
> --- a/src/glsl/ast_to_hir.cpp
> +++ b/src/glsl/ast_to_hir.cpp
> @@ -3075,10 +3075,42 @@ apply_layout_qualifier_to_variable(const struct 
> ast_type_qualifier *qual,
>
> if (qual->flags.q.explicit_location) {
>apply_explicit_location(qual, var, state, loc);
> +
> +  if (qual->flags.q.explicit_component) {
> + unsigned qual_component;
> + if (process_qualifier_constant(state, loc, "component",
> +qual->component, _component)) {
> +const glsl_type *type = var->type->without_array();
> +unsigned components = type->component_slots();
> +
> +if (type->is_matrix() || type->is_record()) {
> +   _mesa_glsl_error(loc, state, "component layout qualifier "
> +"cannot be applied to a matrix, a structure, 
> "
> +"a block, or an array containing any of "
> +"these.");
> +} else if (qual_component != 0 &&
> +(qual_component + components - 1) > 3) {
> +   _mesa_glsl_error(loc, state, "component overflow (%u > 3)",
> +(qual_component + components - 1));
> +} else if (qual_component == 1 && type->is_double()) {
> +   /* We don't bother checking for 3 as it should be caught by 
> the
> +* overflow check above.
> +*/
> +   _mesa_glsl_error(loc, state, "doubles cannot begin at "
> +"component 1 or 3");
> +} else {
> +   var->data.explicit_component = true;
> +   var->data.location_frac = qual_component;
> +}
> + }
> +  }
> } else if (qual->flags.q.explicit_index) {
>if (!qual->flags.q.subroutine_def)
>   _mesa_glsl_error(loc, state,
>"explicit index requires explicit location");
> +   } else if (qual->flags.q.explicit_component) {
> +  _mesa_glsl_error(loc, state,
> +   "explicit component requires explicit location");
> }
>
> if (qual->flags.q.explicit_binding) {
> @@ -6660,6 +6692,12 @@ ast_interface_block::hir(exec_list *instructions,
> "Interface block sets both readonly and writeonly");
> }
>
> +   if (this->layout.flags.q.explicit_component) {
> +  _mesa_glsl_error(, state, "component layout qualifier cannot be "
> +   "applied to a matrix, a structure, a block, or an "
> +   "array containing any of these.");
> +   }
> +
> unsigned qual_stream;
> if (!process_qualifier_constant(state, , "stream", 
> this->layout.stream,
> _stream) ||
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index 159f94d..d604a1f 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -714,6 +714,11 @@ public:
>unsigned explicit_binding:1;
>
>/**
> +   * Was an initial component explicitly set in the shader?
> +   */
> +  unsigned explicit_component:1;
> +
> +  /**
> * Does this variable have an initializer?
> *
> * This is used by the linker to cross-validiate initializers of global
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/28] glsl: fix overlapping of varying locations for arrays and structs

2016-01-06 Thread Anuj Phogat
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceri
 wrote:
> Previously we were only reserving a single location for arrays and
> structs.
>
> We also didn't take into account implicit locations clashing with
> explicit locations when assigning locations for their arrays or
> structs.
>
> This patch fixes both issues.
>
> V5: fix regression for patch inputs/outputs in tessellation shaders
> V4: just use count_attribute_slots() to get the number of slots,
> also calculate the correct number of slots to reserve for gs and
> tess stages by making use of the new get_varying_type() helper.
> V3: handle arrays of structs
> V2: also fix for arrays of arrays and structs.
> ---
>  src/glsl/link_varyings.cpp | 80 
> +++---
>  1 file changed, 68 insertions(+), 12 deletions(-)
>
> diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
> index d9550df..34e8418 100644
> --- a/src/glsl/link_varyings.cpp
> +++ b/src/glsl/link_varyings.cpp
> @@ -825,7 +825,8 @@ public:
> gl_shader_stage consumer_stage);
> ~varying_matches();
> void record(ir_variable *producer_var, ir_variable *consumer_var);
> -   unsigned assign_locations(uint64_t reserved_slots, bool separate_shader);
> +   unsigned assign_locations(struct gl_shader_program *prog,
> + uint64_t reserved_slots, bool separate_shader);
> void store_locations() const;
>
>  private:
> @@ -1042,7 +1043,9 @@ varying_matches::record(ir_variable *producer_var, 
> ir_variable *consumer_var)
>   * passed to varying_matches::record().
>   */
>  unsigned
> -varying_matches::assign_locations(uint64_t reserved_slots, bool 
> separate_shader)
> +varying_matches::assign_locations(struct gl_shader_program *prog,
> +  uint64_t reserved_slots,
> +  bool separate_shader)
>  {
> /* We disable varying sorting for separate shader programs for the
>  * following reasons:
> @@ -1079,10 +1082,21 @@ varying_matches::assign_locations(uint64_t 
> reserved_slots, bool separate_shader)
> for (unsigned i = 0; i < this->num_matches; i++) {
>unsigned *location = _location;
>
> -  if ((this->matches[i].consumer_var &&
> -   this->matches[i].consumer_var->data.patch) ||
> -  (this->matches[i].producer_var &&
> -   this->matches[i].producer_var->data.patch))
> +  const ir_variable *var;
> +  const glsl_type *type;
> +  bool is_vertex_input = false;
> +  if (matches[i].consumer_var) {
> + var = matches[i].consumer_var;
> + type = get_varying_type(var, consumer_stage);
> + is_vertex_input = false;
This is not required. is_vertex_input is already initialized to false.
> + if (consumer_stage == MESA_SHADER_VERTEX)
> +is_vertex_input = true;
> +  } else {
> + var = matches[i].producer_var;
> + type = get_varying_type(var, producer_stage);
> +  }
> +
> +  if (var->data.patch)
>   location = _patch_location;
>
>/* Advance to the next slot if this varying has a different packing
> @@ -1094,9 +1108,45 @@ varying_matches::assign_locations(uint64_t 
> reserved_slots, bool separate_shader)
>!= this->matches[i].packing_class) {
>   *location = ALIGN(*location, 4);
>}
> -  while ((*location < MAX_VARYING * 4u) &&
> -(reserved_slots & (1u << *location / 4u))) {
> - *location = ALIGN(*location + 1, 4);
> +
> +  unsigned num_elements =  type->count_attribute_slots(is_vertex_input);
> +  unsigned slot_end = this->disable_varying_packing ? 4 :
> + type->without_array()->vector_elements;
> +  slot_end += *location - 1;
> +
> +  /* FIXME: We could be smarter in the below code and loop back over
> +   * trying to fill any locations that we skipped because we couldn't 
> pack
> +   * the varying between an explicit location. For now just let the user
> +   * hit the linking error if we run out of room and suggest they use
> +   * explicit locations.
> +   */
> +  for (unsigned j = 0; j < num_elements; j++) {
> + while ((slot_end < MAX_VARYING * 4u) &&
> +((reserved_slots & (1u << *location / 4u) ||
> + (reserved_slots & (1u << slot_end / 4u) {
> +
> +*location = ALIGN(*location + 1, 4);
> +slot_end = *location;
> +
> +/* reset the counter and try again */
> +j = 0;
> + }
> +
> + /* Increase the slot to make sure there is enough room for next
> +  * array element.
> +  */
> + if (this->disable_varying_packing)
> +slot_end += 4;
> + else
> +slot_end += type->without_array()->vector_elements;
> +  }
> +
> +  if (!var->data.patch && *location >= MAX_VARYING * 4u) {
> + linker_error(prog, "insufficient 

Re: [Mesa-dev] [PATCH 07/28] glsl: don't try adding build-ins to explicit locations bitmask

2016-01-06 Thread Anuj Phogat
s/build-ins/built-ins in commit message

On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceri
 wrote:
> ---
>  src/glsl/link_varyings.cpp | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
> index 34e8418..ee7cae0 100644
> --- a/src/glsl/link_varyings.cpp
> +++ b/src/glsl/link_varyings.cpp
> @@ -1530,7 +1530,9 @@ reserved_varying_slot(struct gl_shader *stage, 
> ir_variable_mode io_mode)
> foreach_in_list(ir_instruction, node, stage->ir) {
>ir_variable *const var = node->as_variable();
>
> -  if (var == NULL || var->data.mode != io_mode || 
> !var->data.explicit_location)
> +  if (var == NULL || var->data.mode != io_mode ||
> +  !var->data.explicit_location ||
> +  var->data.location < VARYING_SLOT_VAR0)
>   continue;
>
>var_slot = var->data.location - VARYING_SLOT_VAR0;
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/28] glsl: parse component layout qualifier

2016-01-06 Thread Anuj Phogat
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceri
 wrote:
> ---
>  src/glsl/ast.h  | 14 ++
>  src/glsl/ast_type.cpp   |  3 +++
>  src/glsl/glsl_parser.yy | 11 +++
>  3 files changed, 28 insertions(+)
>
> diff --git a/src/glsl/ast.h b/src/glsl/ast.h
> index f8ab0b7..e22deed 100644
> --- a/src/glsl/ast.h
> +++ b/src/glsl/ast.h
> @@ -490,6 +490,12 @@ struct ast_type_qualifier {
>   */
>  unsigned explicit_index:1;
>
> +/**
> + * Flag set if GL_ARB_enhanced_layouts "component" layout
> + * qualifier is used.
> + */
> +unsigned explicit_component:1;
> +
>   /**
>* Flag set if GL_ARB_shading_language_420pack "binding" layout
>* qualifier is used.
> @@ -595,6 +601,14 @@ struct ast_type_qualifier {
>  */
> ast_expression *index;
>
> +   /**
> +* Component specified via GL_ARB_enhaced_layouts
> +*
> +* \note
> +* This field is only valid if \c explicit_component is set.
> +*/
> +   ast_expression *component;
> +
> /** Maximum output vertices in GLSL 1.50 geometry shaders. */
> ast_layout_expression *max_vertices;
>
> diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
> index 8643b7b..7330a34 100644
> --- a/src/glsl/ast_type.cpp
> +++ b/src/glsl/ast_type.cpp
> @@ -273,6 +273,9 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
> if (q.flags.q.explicit_index)
>this->index = q.index;
>
> +  if (q.flags.q.explicit_component)
> +  this->component = q.component;
> +
> if (q.flags.q.explicit_binding)
>this->binding = q.binding;
>
> diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
> index 51796a6..6b634f2 100644
> --- a/src/glsl/glsl_parser.yy
> +++ b/src/glsl/glsl_parser.yy
> @@ -1476,6 +1476,17 @@ layout_qualifier_id:
>   $$.location = $3;
>}
>
> +  if (match_layout_qualifier("component", $1, state) == 0) {
> + if (!state->has_enhanced_layouts()) {
> +_mesa_glsl_error(& @1, state,
> + "component qualifier requires "
> + "GLSL 4.40 or ARB_enhanced_layouts");
> + } else {
> +$$.flags.q.explicit_component = 1;
> +$$.component = $3;
> + }
> +  }
> +
>if (match_layout_qualifier("index", $1, state) == 0) {
>   if (state->es_shader && !state->EXT_blend_func_extended_enable) {
>  _mesa_glsl_error(& @3, state, "index layout qualifier requires 
> EXT_blend_func_extended");
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/28] glsl: fix cross validation for explicit locations on structs and arrays

2016-01-06 Thread Anuj Phogat
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceri
 wrote:
> ---
>  src/glsl/link_varyings.cpp | 43 ++-
>  1 file changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
> index ee7cae0..dea8741 100644
> --- a/src/glsl/link_varyings.cpp
> +++ b/src/glsl/link_varyings.cpp
> @@ -239,18 +239,24 @@ cross_validate_outputs_to_inputs(struct 
> gl_shader_program *prog,
>   /* User-defined varyings with explicit locations are handled
>* differently because they do not need to have matching names.
>*/
> - const unsigned idx = var->data.location - VARYING_SLOT_VAR0;
> + const glsl_type *type = get_varying_type(var, producer->Stage);
> + unsigned num_elements = type->count_attribute_slots(false);
> + unsigned idx = var->data.location - VARYING_SLOT_VAR0;
> + unsigned slot_limit = idx + num_elements;
>
> - if (explicit_locations[idx] != NULL) {
> -linker_error(prog,
> + while(idx < slot_limit) {
> +if (explicit_locations[idx] != NULL) {
> +   linker_error(prog,
>   "%s shader has multiple outputs explicitly "
>   "assigned to location %d\n",
>   _mesa_shader_stage_to_string(producer->Stage),
>   idx);
> -return;
> - }
> +   return;
> +}
>
> - explicit_locations[idx] = var;
> +explicit_locations[idx] = var;
> +idx++;
> + }
>}
> }
>
> @@ -298,14 +304,25 @@ cross_validate_outputs_to_inputs(struct 
> gl_shader_program *prog,
>   ir_variable *output = NULL;
>   if (input->data.explicit_location
>   && input->data.location >= VARYING_SLOT_VAR0) {
> -output = explicit_locations[input->data.location - 
> VARYING_SLOT_VAR0];
>
> -if (output == NULL) {
> -   linker_error(prog,
> -"%s shader input `%s' with explicit location "
> -"has no matching output\n",
> -_mesa_shader_stage_to_string(consumer->Stage),
> -input->name);
> +const glsl_type *type = get_varying_type(input, consumer->Stage);
> +unsigned num_elements = type->count_attribute_slots(false);
> +unsigned idx = input->data.location - VARYING_SLOT_VAR0;
> +unsigned slot_limit = idx + num_elements;
> +
> +while(idx < slot_limit) {
> +   output = explicit_locations[idx];
> +
> +   if (output == NULL ||
> +   input->data.location != output->data.location) {
> +  linker_error(prog,
> +   "%s shader input `%s' with explicit location "
> +   "has no matching output\n",
> +   _mesa_shader_stage_to_string(consumer->Stage),
> +   input->name);
> +  break;
> +   }
> +   idx++;
>  }
>   } else {
>  output = parameters.get_variable(input->name);
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] st/mesa: check state->mesa in early return check in st_validate_state()

2016-01-06 Thread Jose Fonseca

On 06/01/16 16:50, Brian Paul wrote:

We were checking the dirty->st flags but not the dirty->mesa flags.
When we took the early return, we didn't clear the dirty->mesa flags
so the next time we called st_validate_state() we'd often flush the
glBitmap cache.  And since st_validate_state() is called from
st_Bitmap(), it meant we flushed the bitmap cache for every glBitmap()
call.

This change seems to recover most of the performance loss observed
with the ipers demo on llvmpipe since commit commit 36c93a6fae27561.

Cc: mesa-sta...@lists.freedesktop.org
---
  src/mesa/state_tracker/st_atom.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 43dbadd..c1a9d00 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -188,7 +188,7 @@ void st_validate_state( struct st_context *st )

 st_manager_validate_framebuffers(st);

-   if (state->st == 0)
+   if (state->st == 0 && state->mesa == 0)
return;

 /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/



Good stuff.

Series looks good to me.

Reviewed-by: Jose Fonseca 

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: fix varying slot allocation for blocks and structs with explicit locations

2016-01-06 Thread Timothy Arceri
On Wed, 2016-01-06 at 09:46 -0500, Ilia Mirkin wrote:
> On Wed, Jan 6, 2016 at 4:32 AM, Timothy Arceri
>  wrote:
> > Previously each member was being counted as using a single slot,
> > count_attribute_slots() fixes the count for array and struct
> > members.
> > 
> > Also don't assign a negitive to the unsigned expl_location
> > variable.
> > ---
> > 
> >  Fixes these new piglit tests:
> >http://patchwork.freedesktop.org/patch/69531/
> > 
> >  src/glsl/ast_to_hir.cpp | 9 +
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
> > index 0197cdc..50d5e22 100644
> > --- a/src/glsl/ast_to_hir.cpp
> > +++ b/src/glsl/ast_to_hir.cpp
> > @@ -6408,12 +6408,13 @@
> > ast_process_struct_or_iface_block_members(exec_list *instructions,
> >  if (process_qualifier_constant(state, ,
> > "location",
> > qual->location,
> > _location)) {
> > fields[i].location = VARYING_SLOT_VAR0 +
> > qual_location;
> > -   expl_location = fields[i].location + 1;
> > +   expl_location = fields[i].location +
> > +  fields[i].type->count_attribute_slots(false);
> >  }
> >   } else {
> >  if (layout && layout->flags.q.explicit_location) {
> > fields[i].location = expl_location;
> > -   expl_location = expl_location + 1;
> > +   expl_location += fields[i].type
> > ->count_attribute_slots(false);
> >  } else {
> > fields[i].location = -1;
> >  }
> > @@ -6570,7 +6571,7 @@ ast_struct_specifier::hir(exec_list
> > *instructions,
> > 
> > state->struct_specifier_depth++;
> > 
> > -   unsigned expl_location = -1;
> > +   unsigned expl_location = 0;
> > if (layout && layout->flags.q.explicit_location) {
> >if (!process_qualifier_constant(state, , "location",
> >layout->location,
> > _location)) {
> > @@ -6763,7 +6764,7 @@ ast_interface_block::hir(exec_list
> > *instructions,
> >return NULL;
> > }
> > 
> > -   unsigned expl_location = -1;
> > +   unsigned expl_location = 0;
> 
> There are a number of places that check for location != -1 as a
> sanity
> check... won't this defeat that?

No because we only use expl_location when the explicit location flag is
set and if there is an error we don't copy the value from
expl_location.

I believe I initialised it to stop gcc complaining although I just
tried removing this and it no longer complains so I guess I can just
remove the initialisation altogether.

Are you happy with the change otherwise?

> 
> > if (layout.flags.q.explicit_location) {
> >if (!process_qualifier_constant(state, , "location",
> >layout.location,
> > _location)) {
> > --
> > 2.4.3
> > 
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] main: s/GLuint/GLbitfield for state bitmasks

2016-01-06 Thread Jose Fonseca

On 06/01/16 16:51, Brian Paul wrote:

---
  src/mesa/main/api_arrayelt.c | 4 ++--
  src/mesa/main/api_arrayelt.h | 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 92d8238..c84db5f 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -65,7 +65,7 @@ typedef struct {
  typedef struct {
 AEarray arrays[32];
 AEattrib attribs[VERT_ATTRIB_MAX + 1];
-   GLuint NewState;
+   GLbitfield NewState;

 /* List of VBOs we need to map before executing ArrayElements */
 struct gl_buffer_object *vbo[VERT_ATTRIB_MAX];
@@ -1802,7 +1802,7 @@ _ae_ArrayElement(GLint elt)


  void
-_ae_invalidate_state(struct gl_context *ctx, GLuint new_state)
+_ae_invalidate_state(struct gl_context *ctx, GLbitfield new_state)
  {
 AEcontext *actx = AE_CONTEXT(ctx);

diff --git a/src/mesa/main/api_arrayelt.h b/src/mesa/main/api_arrayelt.h
index 39fdeb9..03cd9ec 100644
--- a/src/mesa/main/api_arrayelt.h
+++ b/src/mesa/main/api_arrayelt.h
@@ -33,7 +33,7 @@

  extern GLboolean _ae_create_context( struct gl_context *ctx );
  extern void _ae_destroy_context( struct gl_context *ctx );
-extern void _ae_invalidate_state( struct gl_context *ctx, GLuint new_state );
+extern void _ae_invalidate_state( struct gl_context *ctx, GLbitfield new_state 
);
  extern void GLAPIENTRY _ae_ArrayElement( GLint elt );

  /* May optionally be called before a batch of element calls:



Series is

Reviewed-by: Jose Fonseca 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] st/mesa: check texture target in allocate_full_mipmap()

2016-01-06 Thread Jose Fonseca

On 06/01/16 17:10, Brian Paul wrote:

Some kinds of textures never have mipmaps.  3D textures seldom have
mipmaps.
---
  src/mesa/state_tracker/st_cb_texture.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 867d4da..f8b3679 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -404,6 +404,16 @@ static boolean
  allocate_full_mipmap(const struct st_texture_object *stObj,
   const struct st_texture_image *stImage)
  {
+   switch (stObj->base.Target) {
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_BUFFER:
+   case GL_TEXTURE_EXTERNAL_OES:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+  /* these texture types cannot be mipmapped */
+  return FALSE;
+   }
+


Makes sense.


 if (stImage->base.Level > 0 || stObj->base.GenerateMipmap)
return TRUE;

@@ -420,6 +430,10 @@ allocate_full_mipmap(const struct st_texture_object *stObj,
/* not a mipmap minification filter */
return FALSE;

+   if (stObj->base.Target == GL_TEXTURE_3D)
+  /* 3D textures are seldom mipmapped */
+  return FALSE;
+
 return TRUE;
  }




I don't have much first hand experience on how 3D apps use 3D volumes., 
but I'd imagine that in most cases they would want to use mipmaps to 
avoid aliasing effects, unless memory is tight.


Anyway, my understanding this function just makes a guess, and it's ok 
to mis-guess.  So, series is


Reviewed-by: Jose Fonseca 

Jose


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/28] glsl: fix overlapping of varying locations for arrays and structs

2016-01-06 Thread Timothy Arceri
Thanks alot for reviewing these :) 

Are you able to take a quick look at 5 you seem to have missed it. I'd
like to push these bug fixes as soon as possible.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] llvmpipe: avoid most 64 bit math in rasterization

2016-01-06 Thread Jose Fonseca

On 06/01/16 18:18, Roland Scheidegger wrote:

Am 06.01.2016 um 17:31 schrieb Jose Fonseca:

On 06/01/16 16:26, Jose Fonseca wrote:

On 06/01/16 00:06, srol...@vmware.com wrote:

From: Roland Scheidegger 

The trick here is to recognize that in the c + n * dcdx calculations,
not only can the lower FIXED_ORDER bits not change (as the dcdx values
have those all zero) but that this means the sign bit of the
calculations
cannot be different as well, that is
sign(c + n*dcdx) == sign((c >> FIXED_ORDER) + n*(dcdx >> FIXED_ORDER)).
That shaves off more than enough bits to never require 64bit masks.
A shifted plane c value could still easily exceed 32 bits, however
since we
throw out planes which are trivial accept even before binning (and
similarly
don't even get to see tris for which there was a trivial reject
plane)) this
is never a problem.
The idea isnt't all that revolutionary, in fact something similar was
tried
ages ago (9773722c2b09d5f0615a47cecf4347859474dc56) back when the
values were
only 32 bit anyway. I believe now it didn't quite work then because the
adjustment needed for testing trivial reject / partial masks wasn't
handled
correctly.
This still keeps the separate 32/64 bit paths for now, as the 32 bit
one still
looks minimally simpler (and also because if we'd pass in dcdx/dcdy/eo
unscaled
from setup which would be a good reason to ditch the 32 bit path, we'd
need to
change the special-purpose rasterization functions for small tris).

This passes piglit triangle-rasterization (-fbo -auto -max_size
-subpixelbits 8). It still fails triangle-rasterization-overdraw
-max_size
(no change, fails everything at position 2048 - interestingly for
softpipe,
nvidia maxwell 1 blob, and amd evergreen open-source drivers the test
fails
as well but at 4096 - seems like we're missing a float mantissa bit
somewhere!).


I don't think that's how the test is supposed to be run.

If you do an apitrace, you'll see the test creates a fbo with 1000x1000,
a viewport with 16Kx16K, and does a readpixels of 4Kx4K...


The problem is that the generic "-fbo" option is not useful for this, as
we can't reliably resize it after the fact.

Take a look at tests/general/triangle-rasterization.cpp -- it has a
different option "-use_fbo" that creates its own fbo.

OK I was running that the wrong way too I think. This one still passes
with -max_size -use_fbo -subpixelbits 8 (takes _forever_ though - all
due to convert_ubyte in readpixel path...)

triangle-rasterization-overdraw with just -auto passes. The max_size
parameter is a bit confusing since it won't do anything at
all without -fbo as piglit_width/height will just get overwritten to
window_width/height (and with fbo it will just fail badly). Increasing
the window size manually to 8192/8192 won't really work neither as the
size will be cut down to screen size. However, increasing this and then
use -fbo actually does the right thing. And passes.


Sounds great then.

I can't spot anything wrong with the change:

Reviewed-by: Jose Fonseca 


Would be nice if piglit could pick up those size parameters _after_
piglit_init...


It might be worthwhile to modify the piglit test to bail when the passed 
options are bound to not work.


Jose




Roland



Jose




Jose


---
   src/gallium/drivers/llvmpipe/lp_rast_tri.c |  84
+--
   src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 107
+
   2 files changed, 133 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index c9b9221..a4dd6ef 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -64,43 +64,43 @@ block_full_16(struct lp_rasterizer_task *task,
   }

   static inline unsigned
-build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
+build_mask_linear(int32_t c, int32_t dcdx, int32_t dcdy)
   {
  unsigned mask = 0;

-   int64_t c0 = c;
-   int64_t c1 = c0 + dcdy;
-   int64_t c2 = c1 + dcdy;
-   int64_t c3 = c2 + dcdy;
-
-   mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
-   mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
-   mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
-   mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
-   mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
-   mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
-   mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
-   mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
-   mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
-   mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
-   mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
-   mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
-   mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
-   mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
-   mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14);
-   mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 

Re: [Mesa-dev] [PATCH 05/28] glsl: create helper to remove outer vertex index array used by some stages

2016-01-06 Thread Anuj Phogat
On Mon, Dec 28, 2015 at 9:00 PM, Timothy Arceri
 wrote:
> This will be used in the following patch for calculating array sizes correctly
> when reserving explicit varying locations.
> ---
>  src/glsl/link_varyings.cpp | 36 ++--
>  1 file changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
> index 2ff4552..d9550df 100644
> --- a/src/glsl/link_varyings.cpp
> +++ b/src/glsl/link_varyings.cpp
> @@ -41,6 +41,29 @@
>
>
>  /**
> + * Get the varying type stripped of the outermost array if we're processing
> + * a stage whose varyings are arrays indexed by a vertex number (such as
> + * geometry shader inputs).
> + */
> +static const glsl_type *
> +get_varying_type(const ir_variable *var, gl_shader_stage stage)
> +{
> +   const glsl_type *type = var->type;
> +
> +   if (!var->data.patch &&
> +   ((var->data.mode == ir_var_shader_out &&
> + stage == MESA_SHADER_TESS_CTRL) ||
> +(var->data.mode == ir_var_shader_in &&
> + (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
> +  stage == MESA_SHADER_GEOMETRY {
> +  assert(type->is_array());
> +  type = type->fields.array;
> +   }
> +
> +   return type;
> +}
> +
> +/**
>   * Validate the types and qualifiers of an output from one stage against the
>   * matching input to another stage.
>   */
> @@ -981,18 +1004,11 @@ varying_matches::record(ir_variable *producer_var, 
> ir_variable *consumer_var)
> this->matches[this->num_matches].packing_order
>= this->compute_packing_order(var);
> if (this->disable_varying_packing) {
> -  const struct glsl_type *type = var->type;
>unsigned slots;
> +  gl_shader_stage stage =
> + (producer_var != NULL) ? producer_stage : consumer_stage;
>
> -  /* Some shader stages have 2-dimensional varyings. Use the inner type. 
> */
> -  if (!var->data.patch &&
> -  ((var == producer_var && producer_stage == MESA_SHADER_TESS_CTRL) 
> ||
> -   (var == consumer_var && (consumer_stage == MESA_SHADER_TESS_CTRL 
> ||
> -consumer_stage == MESA_SHADER_TESS_EVAL 
> ||
> -consumer_stage == 
> MESA_SHADER_GEOMETRY {
> - assert(type->is_array());
> - type = type->fields.array;
> -  }
> +  const glsl_type *type = get_varying_type(var, stage);
>
>if (type->is_array()) {
>   slots = 1;
> --
> 2.4.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 23/23] radeonsi: adjust the parameters of si_shader_dump

2016-01-06 Thread Nicolai Hähnle

I like the code structure established by this series. Patches 20-23 are

Reviewed-by: Nicolai Hähnle 

On 06.01.2016 07:41, Marek Olšák wrote:

From: Marek Olšák 

The function will be extended to dump all binaries shaders will consist of,
so si_shader* makes sense here.
---
  src/gallium/drivers/radeonsi/si_compute.c |  6 ++
  src/gallium/drivers/radeonsi/si_shader.c  | 18 +++---
  src/gallium/drivers/radeonsi/si_shader.h  |  7 ++-
  3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index ffac656..5a08cbf 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -125,8 +125,7 @@ static void *si_create_compute_state(
si_compile_llvm(sctx->screen, 
>kernels[i].binary,
>kernels[i].config, sctx->tm,
mod, >b.debug, 
TGSI_PROCESSOR_COMPUTE);
-   si_shader_dump(sctx->screen, 
>kernels[i].binary,
-  >kernels[i].config,
+   si_shader_dump(sctx->screen, >kernels[i],
   >b.debug, TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, 
>kernels[i]);
LLVMDisposeModule(mod);
@@ -143,8 +142,7 @@ static void *si_create_compute_state(
init_scratch_buffer(sctx, program);
si_shader_binary_read_config(>shader.binary,
 >shader.config, 0);
-   si_shader_dump(sctx->screen, >shader.binary,
-  >shader.config, >b.debug,
+   si_shader_dump(sctx->screen, >shader, >b.debug,
   TGSI_PROCESSOR_COMPUTE);
si_shader_binary_upload(sctx->screen, >shader);

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 58d16cf..b1a9a1e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3873,17 +3873,15 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
   conf->lds_size, conf->scratch_bytes_per_wave);
  }

-void si_shader_dump(struct si_screen *sscreen,
-   struct radeon_shader_binary *binary,
-   struct si_shader_config *conf,
-   struct pipe_debug_callback *debug,
-   unsigned processor)
+void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
+   struct pipe_debug_callback *debug, unsigned processor)
  {
if (r600_can_dump_shader(>b, processor))
if (!(sscreen->b.debug_flags & DBG_NO_ASM))
-   si_shader_dump_disassembly(binary, debug);
+   si_shader_dump_disassembly(>binary, debug);

-   si_shader_dump_stats(sscreen, conf, binary->code_size, debug, 
processor);
+   si_shader_dump_stats(sscreen, >config,
+shader->binary.code_size, debug, processor);
  }

  int si_compile_llvm(struct si_screen *sscreen,
@@ -3996,8 +3994,7 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
bld_base->base.gallivm->module,
debug, TGSI_PROCESSOR_GEOMETRY);
if (!r) {
-   si_shader_dump(sscreen, _shader_ctx->shader->binary,
-  _shader_ctx->shader->config, debug,
+   si_shader_dump(sscreen, si_shader_ctx->shader, debug,
   TGSI_PROCESSOR_GEOMETRY);
r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
}
@@ -4202,8 +4199,7 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
goto out;
}

-   si_shader_dump(sscreen, >binary, >config,
-  debug, si_shader_ctx.type);
+   si_shader_dump(sscreen, shader, debug, si_shader_ctx.type);

r = si_shader_binary_upload(sscreen, shader);
if (r) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 712bcd9..1635358 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -344,11 +344,8 @@ void si_shader_destroy(struct si_shader *shader);
  void si_shader_destroy_binary(struct radeon_shader_binary *binary);
  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned 
index);
  int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
-void si_shader_dump(struct si_screen *sscreen,
-   struct radeon_shader_binary *binary,
-   struct si_shader_config *conf,
-   struct pipe_debug_callback *debug,
-   unsigned processor);
+void si_shader_dump(struct 

Re: [Mesa-dev] [PATCH] gallium/r600: Replace ALIGN_DIVUP with DIV_ROUND_UP

2016-01-06 Thread Nicolai Hähnle

Pushed.

On 06.01.2016 12:10, Krzysztof A. Sobiecki wrote:

Nicolai Hähnle  writes:


On 30.12.2015 13:44, Krzysztof A. Sobiecki wrote:

Nicolai Hähnle  writes:


On 30.12.2015 08:42, Krzysztof A. Sobiecki wrote:

Nicolai Hähnle  writes:


On 29.12.2015 14:27, Krzysztof A. Sobiecki wrote:

From: Krzysztof Sobiecki 

ALIGN_DIVUP is a driver specific(r600g) macro that duplicates DIV_ROUND_UP 
functionality.
Replacing it with DIV_ROUND_UP eliminates this problems.


Those macros are actually slightly different, and the assembly
generated by the ALIGN_DIVUP looks clearly better to me.

I remember seeing a very long thread about this not so long ago - what
was the resolution there?

Cheers,
Nicolai


I would like to remove ALIGN_DIVUP first and then debate with
implementation DIV_ROUND_UP should use.

btw. I prefer 1 + ((x - 1) / y)


That produces an incorrect result when x is an unsigned type and equal
to 0 -- and that is something that existing code definitely relies on.

Cheers,
Nicolai


Then what about (x / y) + (i % y != 0)


Generates similar assembly to the DIV_ROUND_UP version.

Anyway, now that I look at it again I'd say just go ahead and add my
R-b. Yes, the assembly looks slightly worse, but only slightly, and
avoiding surprises with overflows down the line seems like a good
idea.

Cheers,
Nicolai


I don't have commit access, can you push it, sorry.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/23] radeonsi: move MRT color exporting into a separate function

2016-01-06 Thread Nicolai Hähnle

On 06.01.2016 07:41, Marek Olšák wrote:

From: Marek Olšák 

This will be used by a fragment shader epilog.
---
  src/gallium/drivers/radeonsi/si_shader.c | 93 +++-
  1 file changed, 55 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4204db0..f60b560 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2135,6 +2135,57 @@ static void si_export_mrt_z(struct lp_build_tgsi_context 
*bld_base,
   args, 9, 0);
  }

+static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+   LLVMValueRef *color, unsigned index,
+   bool is_last)
+{
+   struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+   struct lp_build_context *base = _base->base;
+   LLVMValueRef args[9];
+   int i;
+
+   /* Clamp color */
+   if (si_shader_ctx->shader->key.ps.clamp_color)
+   for (i = 0; i < 4; i++)
+   color[i] = radeon_llvm_saturate(bld_base, color[i]);
+
+   /* Alpha to one */
+   if (si_shader_ctx->shader->key.ps.alpha_to_one)
+   color[3] = base->one;
+
+   /* Alpha test */
+   if (index == 0 &&
+   si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+   si_alpha_test(bld_base, color[3]);
+
+   /* Line & polygon smoothing */
+   if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+   color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+
+   /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+   if (index == 0 &&
+   si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+   for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; 
c++) {
+   si_llvm_init_export_args(bld_base, color,
+V_008DFC_SQ_EXP_MRT + c, args);
+   lp_build_intrinsic(base->gallivm->builder, 
"llvm.SI.export",
+  
LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+   }
+   }
+
+   /* Export */
+   si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+args);
+   if (is_last) {
+   args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is 
valid */
+   args[2] = bld_base->uint_bld.one; /* DONE bit */
+   }
+   lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+  LLVMVoidTypeInContext(base->gallivm->context),
+  args, 9, 0);
+}
+
  static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
  {
struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
@@ -2177,7 +2228,7 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
-   unsigned target, j;
+   unsigned j;
LLVMValueRef color[4] = {};

/* Select the correct target */
@@ -2195,53 +2246,19 @@ static void si_llvm_emit_fs_epilogue(struct 
lp_build_tgsi_context * bld_base)
   
si_shader_ctx->radeon_bld.soa.outputs[i][0], "");
continue;
case TGSI_SEMANTIC_COLOR:
-   target = V_008DFC_SQ_EXP_MRT + semantic_index;
-
for (j = 0; j < 4; j++)
color[j] = LLVMBuildLoad(builder,
 
si_shader_ctx->radeon_bld.soa.outputs[i][j], "");

-   if (si_shader_ctx->shader->key.ps.clamp_color)
-   for (j = 0; j < 4; j++)
-   color[j] = 
radeon_llvm_saturate(bld_base, color[j]);
-
-   if (si_shader_ctx->shader->key.ps.alpha_to_one)
-   color[3] = base->one;
-
-   if (semantic_index == 0 &&
-   si_shader_ctx->shader->key.ps.alpha_func != 
PIPE_FUNC_ALWAYS)
-   si_alpha_test(bld_base, color[3]);
-
-   if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
-   color[3] = 
si_scale_alpha_by_sample_mask(bld_base, color[3]);
-   break;
+   si_export_mrt_color(bld_base, color, semantic_index,
+   last_color_export == i);
+   continue;
  

[Mesa-dev] [Bug 93577] Total war: Attila crashes at startup

2016-01-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=93577

--- Comment #5 from ArneJ  ---
You're right, it needs a GL 4.3 Core Context. I was able to start the game with
the following launch options in steam:

MESA_GL_VERSION_OVERRIDE=4.3 MESA_GLSL_VERSION_OVERRIDE=430 %command%

It runs quite well with medium settings at 1920x1080 on my R9 270X with mesa
11.1.0 (I just tested a little bit of the prologue). I didn't see any issues so
it's quite possible that the game requests a 4.3 context but doesn't need any
extensions that are not available on radeonsi.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/23] radeonsi: separate shader dumping code to si_shader_dump and *_dump_stats

2016-01-06 Thread Nicolai Hähnle

On 06.01.2016 07:41, Marek Olšák wrote:

From: Marek Olšák 

Eventually, I'd like to dump stats for several combined binaries, which is
why you don't see a binary parameter in si_shader_dump_stats
---
  src/gallium/drivers/radeonsi/si_shader.c | 42 +++-
  1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 0773fff..95cdf8a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3853,31 +3853,49 @@ static void si_shader_dump_disassembly(const struct 
radeon_shader_binary *binary
}
  }

-void si_shader_binary_read(struct si_screen *sscreen,
-  struct radeon_shader_binary *binary,
-  struct si_shader_config *conf,
-  struct pipe_debug_callback *debug,
-  unsigned processor)
+static void si_shader_dump_stats(struct si_screen *sscreen,
+struct si_shader_config *conf,
+unsigned code_size,
+struct pipe_debug_callback *debug,
+unsigned processor)
  {
-   si_shader_binary_read_config(binary, conf, 0);
-
if (r600_can_dump_shader(>b, processor)) {
-   if (!(sscreen->b.debug_flags & DBG_NO_ASM))
-   si_shader_dump_disassembly(binary, debug);
-
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d 
blocks\n"
"Scratch: %d bytes per wave\n\n",
-   conf->num_sgprs, conf->num_vgprs, binary->code_size,
+   conf->num_sgprs, conf->num_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave);
}

pipe_debug_message(debug, SHADER_INFO,
   "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d 
Scratch: %d",
-  conf->num_sgprs, conf->num_vgprs, binary->code_size,
+  conf->num_sgprs, conf->num_vgprs, code_size,
   conf->lds_size, conf->scratch_bytes_per_wave);
  }

+static void si_shader_dump(struct si_screen *sscreen,
+  struct radeon_shader_binary *binary,
+  struct si_shader_config *conf,
+  struct pipe_debug_callback *debug,
+  unsigned processor)
+{
+   if (r600_can_dump_shader(>b, processor))
+   if (!(sscreen->b.debug_flags & DBG_NO_ASM))
+   si_shader_dump_disassembly(binary, debug);


I prefer to have braces around an if-block that spans multiple lines, 
but that's a bit of a bike-sheddy quibble. Either way, patches 11-19 are


Reviewed-by: Nicolai Hähnle 


+
+   si_shader_dump_stats(sscreen, conf, binary->code_size, debug, 
processor);
+}
+
+void si_shader_binary_read(struct si_screen *sscreen,
+  struct radeon_shader_binary *binary,
+  struct si_shader_config *conf,
+  struct pipe_debug_callback *debug,
+  unsigned processor)
+{
+   si_shader_binary_read_config(binary, conf, 0);
+   si_shader_dump(sscreen, binary, conf, debug, processor);
+}
+
  int si_compile_llvm(struct si_screen *sscreen,
struct radeon_shader_binary *binary,
struct si_shader_config *conf,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev