Re: [Mesa-dev] [PATCH 29/29] vbo: Use a bitmask to track the active arrays in vbo_save*.

2016-06-22 Thread Mathias Fröhlich
On Monday, June 20, 2016 11:42:19 Mark Janes wrote:
> Mathias Fröhlich  writes:
> 
> > On Monday, June 20, 2016 10:33:42 Mark Janes wrote:
> >> mathias.froehl...@gmx.net writes:
> >> 
> >> > From: Mathias Fröhlich 
> >> >
> >> > The use of a bitmask makes functions iterating only active
> >> > attributes less visible in profiles.
> >> >
> >> > Signed-off-by: Mathias Fröhlich 
> >> > ---
> >> >  src/mesa/vbo/vbo_save.h  |  2 ++
> >> >  src/mesa/vbo/vbo_save_api.c  | 70 
> >> > ++--
> >> >  src/mesa/vbo/vbo_save_draw.c | 55 ++
> >> >  3 files changed, 72 insertions(+), 55 deletions(-)
> >> >
> >> > diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h
> >> > index 8032db8..e3b86bc 100644
> >> > --- a/src/mesa/vbo/vbo_save.h
> >> > +++ b/src/mesa/vbo/vbo_save.h
> >> > @@ -61,6 +61,7 @@ struct vbo_save_copied_vtx {
> >> >   * compiled using the fallback opcode mechanism provided by dlist.c.
> >> >   */
> >> >  struct vbo_save_vertex_list {
> >> > +   GLbitfield64 enabled;/**< mask of enabled vbo arrays. */
> >> > GLubyte attrsz[VBO_ATTRIB_MAX];
> >> > GLenum attrtype[VBO_ATTRIB_MAX];
> >> > GLuint vertex_size;  /**< size in GLfloats */
> >> > @@ -126,6 +127,7 @@ struct vbo_save_context {
> >> > struct gl_client_array arrays[VBO_ATTRIB_MAX];
> >> > const struct gl_client_array *inputs[VBO_ATTRIB_MAX];
> >> >  
> >> > +   GLbitfield64 enabled;/**< mask of enabled vbo arrays. */
> >> > GLubyte attrsz[VBO_ATTRIB_MAX];  /**< 1, 2, 3 or 4 */
> >> > GLenum attrtype[VBO_ATTRIB_MAX];  /**< GL_FLOAT, GL_INT, etc */
> >> > GLubyte active_sz[VBO_ATTRIB_MAX];  /**< 1, 2, 3 or 4 */
> >> > diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
> >> > index 97a1dfd..b178060 100644
> >> > --- a/src/mesa/vbo/vbo_save_api.c
> >> > +++ b/src/mesa/vbo/vbo_save_api.c
> >> > @@ -429,6 +429,7 @@ _save_compile_vertex_list(struct gl_context *ctx)
> >> >  
> >> > /* Duplicate our template, increment refcounts to the storage 
> >> > structs:
> >> >  */
> >> > +   node->enabled = save->enabled;
> >> > memcpy(node->attrsz, save->attrsz, sizeof(node->attrsz));
> >> > memcpy(node->attrtype, save->attrtype, sizeof(node->attrtype));
> >> > node->vertex_size = save->vertex_size;
> >> > @@ -624,14 +625,16 @@ static void
> >> >  _save_copy_to_current(struct gl_context *ctx)
> >> >  {
> >> > struct vbo_save_context *save = _context(ctx)->save;
> >> > -   GLuint i;
> >> > +   GLbitfield64 enabled = save->enabled & 
> >> > (~BITFIELD64_BIT(VBO_ATTRIB_POS));
> >> >  
> >> > -   for (i = VBO_ATTRIB_POS + 1; i < VBO_ATTRIB_MAX; i++) {
> >> > -  if (save->attrsz[i]) {
> >> > - save->currentsz[i][0] = save->attrsz[i];
> >> > - COPY_CLEAN_4V_TYPE_AS_UNION(save->current[i], save->attrsz[i],
> >> > - save->attrptr[i], 
> >> > save->attrtype[i]);
> >> > -  }
> >> > +   while (enabled) {
> >> > +  int i = ffsll(enabled) - 1;
> >> > +  enabled ^= BITFIELD64_BIT(i);
> >> > +  assert(save->attrsz[i]);
> >> > +
> >> > +  save->currentsz[i][0] = save->attrsz[i];
> >> > +  COPY_CLEAN_4V_TYPE_AS_UNION(save->current[i], save->attrsz[i],
> >> > +  save->attrptr[i], save->attrtype[i]);
> >> > }
> >> >  }
> >> >  
> >> > @@ -640,9 +643,12 @@ static void
> >> >  _save_copy_from_current(struct gl_context *ctx)
> >> >  {
> >> > struct vbo_save_context *save = _context(ctx)->save;
> >> > -   GLint i;
> >> > +   GLbitfield64 enabled = save->enabled & 
> >> > (~BITFIELD64_BIT(VBO_ATTRIB_POS));
> >> > +
> >> > +   while (enabled) {
> >> > +  int i = ffsll(enabled) - 1;
> >> > +  enabled ^= BITFIELD64_BIT(i);
> >> >  
> >> > -   for (i = VBO_ATTRIB_POS + 1; i < VBO_ATTRIB_MAX; i++) {
> >> >switch (save->attrsz[i]) {
> >> >case 4:
> >> >   save->attrptr[i][3] = save->current[i][3];
> >> > @@ -652,7 +658,9 @@ _save_copy_from_current(struct gl_context *ctx)
> >> >   save->attrptr[i][1] = save->current[i][1];
> >> >case 1:
> >> >   save->attrptr[i][0] = save->current[i][0];
> >> > + break;
> >> >case 0:
> >> > + assert(0);
> >> >   break;
> >> >}
> >> > }
> >> > @@ -691,6 +699,7 @@ _save_upgrade_vertex(struct gl_context *ctx, GLuint 
> >> > attr, GLuint newsz)
> >> >  */
> >> > oldsz = save->attrsz[attr];
> >> > save->attrsz[attr] = newsz;
> >> > +   save->enabled |= BITFIELD64_BIT(attr);
> >> >  
> >> > save->vertex_size += newsz - oldsz;
> >> > save->max_vert = ((VBO_SAVE_BUFFER_SIZE - save->vertex_store->used) /
> >> > @@ -723,7 +732,6 @@ _save_upgrade_vertex(struct gl_context *ctx, GLuint 
> >> > attr, GLuint newsz)
> >> > if (save->copied.nr) {
> >> >const fi_type *data = save->copied.buffer;
> >> 

Re: [Mesa-dev] [PATCH] if if is is, then then the the do do for for an a

2016-06-22 Thread Matt Turner
On Wed, Jun 22, 2016 at 10:05 PM, Giuseppe Bilotta
 wrote:
> Miscellaneous repetitions in commits and fly-by typo fixes.

How about an actual commit title?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] if if is is, then then the the do do for for an a

2016-06-22 Thread Giuseppe Bilotta
Miscellaneous repetitions in commits and fly-by typo fixes.

Signed-off-by: Giuseppe Bilotta 
---
 include/GL/mesa_glinterop.h   | 6 +++---
 src/compiler/glsl/glsl_to_nir.cpp | 2 +-
 src/compiler/nir/nir.h| 2 +-
 src/compiler/nir/nir_intrinsics.h | 4 ++--
 src/compiler/nir/nir_lower_vars_to_ssa.c  | 2 +-
 src/compiler/nir/nir_lower_wpos_ytransform.c  | 2 +-
 src/compiler/nir/nir_opt_dead_cf.c| 2 +-
 src/compiler/spirv/spirv_to_nir.c | 2 +-
 src/gallium/docs/source/context.rst   | 2 +-
 src/gallium/drivers/freedreno/freedreno_texture.h | 2 +-
 src/gallium/drivers/nouveau/nouveau_buffer.c  | 2 +-
 src/gallium/drivers/r300/compiler/radeon_dataflow.c   | 2 +-
 src/gallium/drivers/r300/compiler/radeon_vert_fc.c| 2 +-
 src/gallium/drivers/svga/svga_format.c| 2 +-
 src/gallium/drivers/swr/rasterizer/core/backend.h | 4 ++--
 src/gallium/drivers/swr/rasterizer/core/state.h   | 2 +-
 src/gallium/drivers/swr/rasterizer/core/threads.cpp   | 2 +-
 src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c | 2 +-
 src/gallium/drivers/vc4/vc4_qir_schedule.c| 2 +-
 src/gallium/state_trackers/nine/device9.c | 2 +-
 src/gbm/main/gbm.c| 2 +-
 src/gtest/src/gtest.cc| 2 +-
 src/mesa/drivers/dri/i965/brw_device_info.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 2 +-
 src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c | 2 +-
 src/mesa/drivers/dri/i965/brw_performance_monitor.c   | 2 +-
 src/mesa/drivers/x11/xm_buffer.c  | 2 +-
 src/mesa/main/texobj.c| 2 +-
 src/mesa/state_tracker/st_cb_fbo.c| 2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp| 2 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.c  | 2 +-
 31 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
index c0c20d6..383d7f9 100644
--- a/include/GL/mesa_glinterop.h
+++ b/include/GL/mesa_glinterop.h
@@ -97,7 +97,7 @@ struct mesa_glinterop_device_info {
/* The callee will overwrite it if it supports a lower version.
 *
 * The caller should check the value and access up-to the version supported
-* by the the callee.
+* by the callee.
 */
/* NOTE: Do not use the MESA_GLINTEROP_DEVICE_INFO_VERSION macro */
uint32_t version;
@@ -125,7 +125,7 @@ struct mesa_glinterop_export_in {
/* The callee will overwrite it if it supports a lower version.
 *
 * The caller should check the value and access up-to the version supported
-* by the the callee.
+* by the callee.
 */
/* NOTE: Do not use the MESA_GLINTEROP_EXPORT_IN_VERSION macro */
uint32_t version;
@@ -190,7 +190,7 @@ struct mesa_glinterop_export_out {
/* The callee will overwrite it if it supports a lower version.
 *
 * The caller should check the value and access up-to the version supported
-* by the the callee.
+* by the callee.
 */
/* NOTE: Do not use the MESA_GLINTEROP_EXPORT_OUT_VERSION macro */
uint32_t version;
diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 16d0c1d..a22fd5b 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1950,7 +1950,7 @@ void
 nir_visitor::visit(ir_constant *ir)
 {
/*
-* We don't know if this variable is an an array or struct that gets
+* We don't know if this variable is an array or struct that gets
 * dereferenced, so do the safe thing an make it a variable with a
 * constant initializer and return a dereference.
 */
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 1725ee3..94dee4d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -804,7 +804,7 @@ typedef struct {
 } nir_deref_var;
 
 /* This enum describes how the array is referenced.  If the deref is
- * direct then the base_offset is used.  If the deref is indirect then then
+ * direct then the base_offset is used.  If the deref is indirect then
  * offset is given by base_offset + indirect.  If the deref is a wildcard
  * then the deref refers to all of the elements of the array at the same
  * time.  Wildcard dereferences are only ever allowed in copy_var
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 6f86c9f..ae253f0 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -48,9 +48,9 @@ INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
 
 /*
  * Interpolation of input.  The interp_var_at* intrinsics are similar to the
- * load_var intrinsic acting an a shader input except that they interpolate
+ * load_var intrinsic 

[Mesa-dev] [Bug 96573] Micro freezes resulting in crash

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96573

Kenneth Graunke  changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |i...@freedesktop.org
   |org |
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
  Component|Mesa core   |Drivers/DRI/i965

--- Comment #1 from Kenneth Graunke  ---
When this happens, can you check the output of "dmesg"?  Do you see messages
about GPU hangs?  If so, please attach /sys/class/drm/card0/error.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Avoid division by zero.

2016-06-22 Thread Kenneth Graunke
On Wednesday, June 22, 2016 7:05:04 PM PDT Matt Turner wrote:
> From: Ardinartsev Nikita 
> 
> Fixes regression introduced by af5ca43f2676bff7499f93277f908b681cb821d0
> 
> Reviewed-by: Matt Turner 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95419
> ---
>  src/mesa/drivers/dri/i965/gen7_urb.c | 26 +++---
>  1 file changed, 15 insertions(+), 11 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
> b/src/mesa/drivers/dri/i965/gen7_urb.c
> index 387ed2e..797d1b6 100644
> --- a/src/mesa/drivers/dri/i965/gen7_urb.c
> +++ b/src/mesa/drivers/dri/i965/gen7_urb.c
> @@ -300,17 +300,21 @@ gen7_upload_urb(struct brw_context *brw)
>remaining_space -= vs_additional;
>total_wants -= vs_wants;
>  
> -  unsigned hs_additional = (unsigned)
> - round(hs_wants * (((double) remaining_space) / total_wants));
> -  hs_chunks += hs_additional;
> -  remaining_space -= hs_additional;
> -  total_wants -= hs_wants;
> -
> -  unsigned ds_additional = (unsigned)
> - round(ds_wants * (((double) remaining_space) / total_wants));
> -  ds_chunks += ds_additional;
> -  remaining_space -= ds_additional;
> -  total_wants -= ds_wants;
> +  if (total_wants > 0) {
> + unsigned hs_additional = (unsigned)
> +round(hs_wants * (((double) remaining_space) / total_wants));
> + hs_chunks += hs_additional;
> + remaining_space -= hs_additional;
> + total_wants -= hs_wants;
> +  }
> +
> +  if (total_wants > 0) {
> + unsigned ds_additional = (unsigned)
> +round(ds_wants * (((double) remaining_space) / total_wants));
> + ds_chunks += ds_additional;
> + remaining_space -= ds_additional;
> + total_wants -= ds_wants;
> +  }
>  
>gs_chunks += remaining_space;
> }
> 

Right, the total_wants may have been entirely VS, at which point HS and
DS won't need anything.  Looks good to me!

Cc: "12.0 11.2" 
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] clover: fix getting struct args api size

2016-06-22 Thread Francisco Jerez
Jan Vesely  writes:

> On Wed, 2016-06-22 at 17:07 -0700, Francisco Jerez wrote:
>> Jan Vesely  writes:
>> 
>> > On Mon, 2016-06-13 at 17:24 -0700, Francisco Jerez wrote:
>> > > Serge Martin  writes:
>> > > 
>> > > > This fix getting the size of a struct arg. vec3 types still
>> > > > work
>> > > > ok.
>> > > > Only buit-in args need to have power of two alignment,
>> > > > getTypeAllocSize
>> > > > reports the correct size.
>> > > > ---
>> > > >  src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 ++-
>> > > >  1 file changed, 2 insertions(+), 1 deletion(-)
>> > > > 
>> > > > diff --git
>> > > > a/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > > > b/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > > > index 03487d6..9af51539 100644
>> > > > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > > > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > > > @@ -472,7 +472,8 @@ namespace {
>> > > >   // aligned to the next larger power of two".  We need
>> > > > this
>> > > >   // alignment for three element vectors, which have
>> > > >   // non-power-of-2 store size.
>> > > > - const unsigned arg_api_size =
>> > > > util_next_power_of_two(arg_store_size);
>> > > > + const unsigned arg_api_size = arg_type->isStructTy()
>> > > > ?
>> > > > +   arg_store_size :
>> > > > util_next_power_of_two(arg_store_size);
>> > > >  
>> > > Hm...  Isn't this still going to be broken if you pass a struct
>> > > argument
>> > > to a kernel function and the alignment of any of the struct
>> > > members
>> > > doesn't match the target-specific data layout?  Not sure we can
>> > > fix
>> > > this
>> > > sensibly without requiring the target's data layout to match the
>> > > CL
>> > > API
>> > > exactly.  Any suggestions Tom?
>> > 
>> > according to 6.7.2.1 compilers can arbitrarily insert padding
>> > between
>> > struct members (except at the beginning).
>> 
>> What spec version are you looking at?  My CL spec doesn't have any
>> section labeled 6.7.2.1.
>
> c99 specs, I did not find anything specific for CLC (it might be that I
> just need to look harder). CLC 2.0 adds additional constraint that you
> can't use address space qualifiers.
>

I'd expect that whatever the CL spec says regarding the memory layout of
CLC types (e.g. section 6.1.5 which specifies the usual alignment rules
for CL types and section 6.11.1 and 6.11.3 which specify various
variable and type declaration attributes giving finer control over the
alignment of variable and struct member declarations) fully overrides
the C99 spec.

>> 
>> > Even if size/alignment of individual members match CL API exactly,
>> > there's no guarantee that the structure layout/size will be the
>> > same.
>> > 
>> How can you exchange structured data with a CL kernel then, assuming
>> that the layout of structure types in memory is fully unspecified as
>> you
>> say?
>
> that is my point. My understanding is that it relies on a silent
> assumption that both CLC and the host compiler will create the same
> structure layout given the same structure elements.
>
> big endian host can create:
> struct foo {
>   cl_int a;
>   // 16 bit padding;
>   cl_short b;
>   cl_int c;
> };

I don't think this is a valid representation of the structure according
to CL rules, my understanding is if your host happens to lay out
structure fields in this way you have to either marshal things manually
or use compiler-specific attributes to get the host compiler to put
things at the right location (according to CL API rules).  I believe
that Khronos' cl_platform.h specifies alignment attributes in the cl_*
host-side typedefs specifically for this purpose.

> while little endian device could create:
> struct foo {
>   int a;
>   short b;
>   // 16 bit padding
>   int c;
> };
>
> If cl_short/short alignment is 2bytes, the above structures and all the
> members have the same size/alignment, yet are not compatible.
>
> Am I missing something that would prevent the above?
>
> Jan
>
>> 
>> > Jan
>> > 
>> > > 
>> > > >   llvm::Type *target_type = arg_type->isIntegerTy() ?
>> > > > TD.getSmallestLegalIntType(mod->getContext(),
>> > > > arg_store_size * 8)
>> > > > -- 
>> > > > 2.5.5
>> > -- 
>> > 
>> > Jan Vesely 
> -- 
>
> Jan Vesely 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeonsi: emit 1/sqrt for RSQ

2016-06-22 Thread Michel Dänzer
On 22.06.2016 22:13, Marek Olšák wrote:
> From: Marek Olšák 
> 
> We don't need the clamped version and we don't have to use any intrinsic.
> 
> Stats on Tonga:
> 
> 15382 shaders in 9128 tests
> Totals:
> SGPRS: 1230560 -> 1230560 (0.00 %)
> VGPRS: 469577 -> 462504 (-1.51 %)
> Code Size: 22089908 -> 21730052 (-1.63 %) bytes
> LDS: 598 -> 598 (0.00 %) blocks
> Scratch: 283648 -> 281600 (-0.72 %) bytes per wave
> Max Waves: 125664 -> 126969 (1.04 %)
> Wait states: 0 -> 0 (0.00 %)
> 
> Totals from affected shaders:
> SGPRS: 547280 -> 547280 (0.00 %)
> VGPRS: 269132 -> 262059 (-2.63 %)
> Code Size: 15709604 -> 15349748 (-2.29 %) bytes
> LDS: 198 -> 198 (0.00 %) blocks
> Scratch: 74752 -> 72704 (-2.74 %) bytes per wave
> Max Waves: 47840 -> 49145 (2.73 %)
> Wait states: 0 -> 0 (0.00 %)

Nice. This series is

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Michel Dänzer
On 22.06.2016 21:04, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Do not rely on the git sha1:
>  - its current truncated form makes it less unique
>  - it does not attribute for local (Vulkand or otherwise) changes
> 
> Use a timestamp produced at the time of build. It's perfectly unique,
> unless someone explicitly thinkers with their system clock. Even then
> chances of producing the exact same one are very small, if not zero.
> 
> Cc: Jason Ekstrand 
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Emil Velikov 
> ---
> Current approach uses seconds since Epoch, but if people prefer we
> can use nano seconds, combination of the two and/or other.
> ---
>  src/intel/vulkan/.gitignore   |  2 ++
>  src/intel/vulkan/Makefile.am  | 15 ++-
>  src/intel/vulkan/anv_device.c |  4 ++--
>  3 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore
> index a496146..7ef6a48 100644
> --- a/src/intel/vulkan/.gitignore
> +++ b/src/intel/vulkan/.gitignore
> @@ -2,3 +2,5 @@
>  /anv_entrypoints.c
>  /anv_entrypoints.h
>  /dev_icd.json
> +/anv_timestamp.h.tmp
> +/anv_timestamp.h
> diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
> index 4d9ff90..8332ae5 100644
> --- a/src/intel/vulkan/Makefile.am
> +++ b/src/intel/vulkan/Makefile.am
> @@ -131,7 +131,20 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
> $(vulkan_include_HEADERS)
>   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
>   $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
>  
> -BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
> +
> +.PHONY: anv_timestamp.h.tmp
> +anv_timestamp.h.tmp:
> + $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@

This conflicts with efforts to make builds reproducible, see e.g.
https://wiki.debian.org/ReproducibleBuilds . One technique used by such
efforts is to replace any timestamps with all 0s.


Would it be possible to generate a hash over all source files listed in
any Makefile.sources, or something like that?


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] radeonsi: extract IB and bo list saving into separate functions

2016-06-22 Thread Ilia Mirkin
On Wed, Jun 22, 2016 at 6:01 AM, Michael Schellenberger Costa
 wrote:
> Hi Nicolai
>
> Am 22.06.2016 um 11:40 schrieb Nicolai Hähnle:
>> From: Nicolai Hähnle 
>>
>> ---
>>  src/gallium/drivers/radeon/r600_pipe_common.c | 53 
>> +++
>>  src/gallium/drivers/radeon/r600_pipe_common.h | 12 ++
>>  src/gallium/drivers/radeonsi/si_debug.c   | 39 +---
>>  src/gallium/drivers/radeonsi/si_hw_context.c  | 25 +
>>  src/gallium/drivers/radeonsi/si_pipe.c|  8 +---
>>  src/gallium/drivers/radeonsi/si_pipe.h|  5 +--
>>  6 files changed, 88 insertions(+), 54 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
>> b/src/gallium/drivers/radeon/r600_pipe_common.c
>> index fa9f70d..ee70a1a 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>> @@ -302,6 +302,59 @@ static void r600_flush_dma_ring(void *ctx, unsigned 
>> flags,
>>   rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
>>  }
>>
>> +/**
>> + * Store a linearized copy of all chunks of \p cs together with the buffer
>> + * list in \p saved.
>> + */
>> +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
>> + struct radeon_saved_cs *saved)
>> +{
>> + void *buf;
>> + unsigned i;
>> +
>> + /* Save the IB chunks. */
>> + saved->num_dw = cs->prev_dw + cs->current.cdw;
>> + saved->ib = MALLOC(4 * saved->num_dw);
>> + if (!saved->ib)
>> + goto oom;
>> +
>> + buf = saved->ib;
>> + for (i = 0; i < cs->num_prev; ++i) {
>> + memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
>> + buf += cs->prev[i].cdw;
>> + }
>> + memcpy(buf, cs->current.buf, cs->current.cdw * 4);
>> +
>> + /* Save the buffer list. */
>> + saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
>> + saved->bo_list = CALLOC(saved->bo_count,
>> + sizeof(saved->bo_list[0]));
>> + if (!saved->bo_list) {
>> + FREE(saved->ib);
>> + goto oom;
>> + }
>> + ws->cs_get_buffer_list(cs, saved->bo_list);
>> +
>> + return;
>> +
>> +oom:
>> + fprintf(stderr, "%s: out of memory\n", __func__);
>> + memset(saved, 0, sizeof(*saved));
> Is that Goto really worth it? It costs you one extra line of code and
> obfuscates things.

goto is a pretty common way to do error handling in C. This is
perfectly fine, and, in fact, preferable to an alternative that
duplicates the code.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Avoid division by zero.

2016-06-22 Thread Matt Turner
From: Ardinartsev Nikita 

Fixes regression introduced by af5ca43f2676bff7499f93277f908b681cb821d0

Reviewed-by: Matt Turner 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95419
---
 src/mesa/drivers/dri/i965/gen7_urb.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
b/src/mesa/drivers/dri/i965/gen7_urb.c
index 387ed2e..797d1b6 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -300,17 +300,21 @@ gen7_upload_urb(struct brw_context *brw)
   remaining_space -= vs_additional;
   total_wants -= vs_wants;
 
-  unsigned hs_additional = (unsigned)
- round(hs_wants * (((double) remaining_space) / total_wants));
-  hs_chunks += hs_additional;
-  remaining_space -= hs_additional;
-  total_wants -= hs_wants;
-
-  unsigned ds_additional = (unsigned)
- round(ds_wants * (((double) remaining_space) / total_wants));
-  ds_chunks += ds_additional;
-  remaining_space -= ds_additional;
-  total_wants -= ds_wants;
+  if (total_wants > 0) {
+ unsigned hs_additional = (unsigned)
+round(hs_wants * (((double) remaining_space) / total_wants));
+ hs_chunks += hs_additional;
+ remaining_space -= hs_additional;
+ total_wants -= hs_wants;
+  }
+
+  if (total_wants > 0) {
+ unsigned ds_additional = (unsigned)
+round(ds_wants * (((double) remaining_space) / total_wants));
+ ds_chunks += ds_additional;
+ remaining_space -= ds_additional;
+ total_wants -= ds_wants;
+  }
 
   gs_chunks += remaining_space;
}
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/gen4: Pull texture formats from the texture object not the miptree

2016-06-22 Thread Rhys Kidd
On 22 June 2016 at 15:07, Jason Ekstrand  wrote:

> This makes texture views sort-of work.  It doesn't add full texture view
> support for gen4-5 but it is enough to fix the GL_ARB_copy_image formats
> piglit test on Iron Lake.
>
> Signed-off-by: Jason Ekstrand 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83036
> Cc: "11.1 11.2 12.0" 
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 133a944..b07bf19 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -335,7 +335,7 @@ brw_update_texture_surface(struct gl_context *ctx,
> surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
>   6 * 4, 32, surf_offset);
>
> -   uint32_t tex_format = translate_tex_format(brw, mt->format,
> +   uint32_t tex_format = translate_tex_format(brw, intelObj->_Format,
>sampler->sRGBDecode);
>
> if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
>
>
For what it is worth, this series is:

Reviewed-by: Rhys Kidd 
Tested-by: Rhys Kidd 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] radeonsi: extract IB and bo list saving into separate functions

2016-06-22 Thread Michael Schellenberger Costa
Hi Nicolai

Am 22.06.2016 um 11:40 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle 
> 
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c | 53 
> +++
>  src/gallium/drivers/radeon/r600_pipe_common.h | 12 ++
>  src/gallium/drivers/radeonsi/si_debug.c   | 39 +---
>  src/gallium/drivers/radeonsi/si_hw_context.c  | 25 +
>  src/gallium/drivers/radeonsi/si_pipe.c|  8 +---
>  src/gallium/drivers/radeonsi/si_pipe.h|  5 +--
>  6 files changed, 88 insertions(+), 54 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index fa9f70d..ee70a1a 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -302,6 +302,59 @@ static void r600_flush_dma_ring(void *ctx, unsigned 
> flags,
>   rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
>  }
>  
> +/**
> + * Store a linearized copy of all chunks of \p cs together with the buffer
> + * list in \p saved.
> + */
> +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
> + struct radeon_saved_cs *saved)
> +{
> + void *buf;
> + unsigned i;
> +
> + /* Save the IB chunks. */
> + saved->num_dw = cs->prev_dw + cs->current.cdw;
> + saved->ib = MALLOC(4 * saved->num_dw);
> + if (!saved->ib)
> + goto oom;
> +
> + buf = saved->ib;
> + for (i = 0; i < cs->num_prev; ++i) {
> + memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
> + buf += cs->prev[i].cdw;
> + }
> + memcpy(buf, cs->current.buf, cs->current.cdw * 4);
> +
> + /* Save the buffer list. */
> + saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
> + saved->bo_list = CALLOC(saved->bo_count,
> + sizeof(saved->bo_list[0]));
> + if (!saved->bo_list) {
> + FREE(saved->ib);
> + goto oom;
> + }
> + ws->cs_get_buffer_list(cs, saved->bo_list);
> +
> + return;
> +
> +oom:
> + fprintf(stderr, "%s: out of memory\n", __func__);
> + memset(saved, 0, sizeof(*saved));
Is that Goto really worth it? It costs you one extra line of code and
obfuscates things.
--Michael

> +}
> +
> +void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
> +{
> + unsigned i;
> +
> + FREE(saved->ib);
> +
> + for (i = 0; i < saved->bo_count; i++)
> + pb_reference(>bo_list[i].buf, NULL);
> + FREE(saved->bo_list);
> +
> + memset(saved, 0, sizeof(*saved));
> +}
> +
>  static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
>  {
>   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index fb6d1a5..a83908d 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -458,6 +458,15 @@ struct r600_ring {
> struct pipe_fence_handle **fence);
>  };
>  
> +/* Saved CS data for debugging features. */
> +struct radeon_saved_cs {
> + uint32_t*ib;
> + unsignednum_dw;
> +
> + struct radeon_bo_list_item  *bo_list;
> + unsignedbo_count;
> +};
> +
>  struct r600_common_context {
>   struct pipe_context b; /* base class */
>  
> @@ -623,6 +632,9 @@ const char *r600_get_llvm_processor_name(enum 
> radeon_family family);
>  void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
>struct r600_resource *dst, struct r600_resource *src);
>  void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
> +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
> + struct radeon_saved_cs *saved);
> +void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
>  
>  /* r600_gpu_load.c */
>  void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
> b/src/gallium/drivers/radeonsi/si_debug.c
> index b551c72..176a195 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -508,7 +508,7 @@ static void si_dump_last_ib(struct si_context *sctx, FILE 
> *f)
>  {
>   int last_trace_id = -1;
>  
> - if (!sctx->last_ib)
> + if (!sctx->last_gfx.ib)
>   return;
>  
>   if (sctx->last_trace_buf) {
> @@ -533,11 +533,8 @@ static void si_dump_last_ib(struct si_context *sctx, 
> FILE *f)
>   sctx->init_config_gs_rings->ndw,
>   -1, "IB2: Init GS rings");
>  
> - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
> + si_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
>   last_trace_id, "IB");
> - 

[Mesa-dev] [PATCH 2/2] clover: Constify llvm::Function handling

2016-06-22 Thread Jan Vesely
Signed-off-by: Jan Vesely 
---

This can be squashed with the previous one

 .../state_trackers/clover/llvm/invocation.cpp  | 28 ++
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index db748b4..41064bd 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -276,12 +276,12 @@ namespace {
 #endif
}
 
-   std::vector
-   find_kernels(llvm::Module *mod) {
-  std::vector kernels;
+   std::vector
+   find_kernels(const llvm::Module *mod) {
+  std::vector kernels;
 #if HAVE_LLVM >= 0x0309
-  auto  = mod->getFunctionList();
-  for_each(list.begin(), list.end(), [&](llvm::Function ){
+  const auto  = mod->getFunctionList();
+  for_each(list.begin(), list.end(), [&](const llvm::Function ){
  if (f.getMetadata("kernel_arg_type"))
kernels.push_back();
   });
@@ -293,7 +293,7 @@ namespace {
   // require that we return an error here, but there will be an error if
   // the user tries to pass this program to a clCreateKernel() call.
   if (!kernel_node) {
- return std::vector();
+ return kernels;
   }
 
   kernels.reserve(kernel_node->getNumOperands());
@@ -317,7 +317,7 @@ namespace {
   llvm::PassManager PM;
 #endif
 
-  const std::vector kernels = find_kernels(mod);
+  const std::vector kernels = find_kernels(mod);
 
   // Add a function internalizer pass.
   //
@@ -618,7 +618,7 @@ namespace {
}
 
module
-   build_module_llvm(llvm::Module *mod,
+   build_module_llvm(const llvm::Module *mod,
  clang::LangAS::Map& address_spaces) {
 
   module m;
@@ -632,7 +632,7 @@ namespace {
   bitcode_ostream.flush();
 #endif
 
-  const std::vector kernels = find_kernels(mod);
+  const std::vector kernels = find_kernels(mod);
   for (unsigned i = 0; i < kernels.size(); ++i) {
  std::string kernel_name = kernels[i]->getName();
  std::vector args =
@@ -735,7 +735,7 @@ namespace {
 
std::map
get_kernel_offsets(std::vector ,
-  const std::vector ,
+  const std::vector ,
   std::string _log) {
 
   // One of the libelf implementations
@@ -786,9 +786,7 @@ namespace {
   // Determine the offsets for each kernel
   for (int i = 0; (symbol = gelf_getsym(symtab_data, i, )); i++) {
  char *name = elf_strptr(elf, symtab_header.sh_link, symbol->st_name);
- for (std::vector::const_iterator it = 
kernels.begin(),
-  e = kernels.end(); it != e; ++it) {
-llvm::Function *f = *it;
+ for (const llvm::Function *f : kernels) {
 if (f->getName() == std::string(name))
kernel_offsets[f->getName()] = symbol->st_value;
  }
@@ -799,11 +797,11 @@ namespace {
 
module
build_module_native(std::vector ,
-   llvm::Module *mod,
+   const llvm::Module *mod,
const clang::LangAS::Map _spaces,
std::string _log) {
 
-  const std::vector kernels = find_kernels(mod);
+  const std::vector kernels = find_kernels(mod);
 
   std::map kernel_offsets =
 get_kernel_offsets(code, kernels, r_log);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] clover: Fix kernel metadata retrieval after clang r273425

2016-06-22 Thread Jan Vesely
Signed-off-by: Jan Vesely 
---
 .../state_trackers/clover/llvm/invocation.cpp  | 35 +++---
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 57e..db748b4 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -277,7 +277,16 @@ namespace {
}
 
std::vector
-   find_kernels(const llvm::Module *mod) {
+   find_kernels(llvm::Module *mod) {
+  std::vector kernels;
+#if HAVE_LLVM >= 0x0309
+  auto  = mod->getFunctionList();
+  for_each(list.begin(), list.end(), [&](llvm::Function ){
+ if (f.getMetadata("kernel_arg_type"))
+   kernels.push_back();
+  });
+  return kernels;
+#endif
   const llvm::NamedMDNode *kernel_node =
  mod->getNamedMetadata("opencl.kernels");
   // This means there are no kernels in the program.  The spec does not
@@ -287,7 +296,6 @@ namespace {
  return std::vector();
   }
 
-  std::vector kernels;
   kernels.reserve(kernel_node->getNumOperands());
   for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) {
 #if HAVE_LLVM >= 0x0306
@@ -373,8 +381,27 @@ namespace {
   kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
  type_name(type_name_), access_qual(access_qual_) {}
};
+#if HAVE_LLVM >= 0x0309
+   std::vector
+   get_kernel_arg_md(const llvm::Function *kernel_func) {
 
-#if HAVE_LLVM >= 0x0306
+  size_t num_args = kernel_func->getArgumentList().size();
+
+  auto aq = kernel_func->getMetadata("kernel_arg_access_qual");
+  auto ty = kernel_func->getMetadata("kernel_arg_type");
+
+  std::vector res;
+  res.reserve(num_args);
+  for (size_t i = 0; i < num_args; ++i) {
+ res.push_back(kernel_arg_md(
+llvm::cast(ty->getOperand(i))->getString(),
+llvm::cast(aq->getOperand(i))->getString()));
+  }
+
+  return res;
+   }
+
+#elif HAVE_LLVM >= 0x0306
 
const llvm::MDNode *
get_kernel_metadata(const llvm::Function *kernel_func) {
@@ -772,7 +799,7 @@ namespace {
 
module
build_module_native(std::vector ,
-   const llvm::Module *mod,
+   llvm::Module *mod,
const clang::LangAS::Map _spaces,
std::string _log) {
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Combine 3DSTATE_STREAMOUT emitters and genX_sol_state atoms.

2016-06-22 Thread Kenneth Graunke
On Wednesday, June 22, 2016 5:21:34 PM PDT Jason Ekstrand wrote:
> On Wed, Jun 22, 2016 at 4:26 PM, Kenneth Graunke 
> wrote:
> 
> > They're basically the same.  Let's avoid the code duplication.
> >
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/mesa/drivers/dri/i965/brw_state.h|  2 +-
> >  src/mesa/drivers/dri/i965/brw_state_upload.c |  2 +-
> >  src/mesa/drivers/dri/i965/gen7_sol_state.c   | 32 --
> >  src/mesa/drivers/dri/i965/gen8_sol_state.c   | 90
> > +---
> >  4 files changed, 31 insertions(+), 95 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_state.h
> > b/src/mesa/drivers/dri/i965/brw_state.h
> > index b29412e..a16e876 100644
> > --- a/src/mesa/drivers/dri/i965/brw_state.h
> > +++ b/src/mesa/drivers/dri/i965/brw_state.h
> > @@ -166,7 +166,6 @@ extern const struct brw_tracked_state gen8_wm_state;
> >  extern const struct brw_tracked_state gen8_raster_state;
> >  extern const struct brw_tracked_state gen8_sbe_state;
> >  extern const struct brw_tracked_state gen8_sf_state;
> > -extern const struct brw_tracked_state gen8_sol_state;
> >  extern const struct brw_tracked_state gen8_sf_clip_viewport;
> >  extern const struct brw_tracked_state gen8_vertices;
> >  extern const struct brw_tracked_state gen8_vf_topology;
> > @@ -303,6 +302,7 @@ void gen8_upload_ps_extra(struct brw_context *brw,
> >  /* gen7_sol_state.c */
> >  void gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
> >const struct brw_vue_map *vue_map);
> > +void gen8_upload_3dstate_so_buffers(struct brw_context *brw);
> >
> >  /* gen8_surface_state.c */
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c
> > b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > index 0b47ebe..4a20821 100644
> > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> > @@ -337,7 +337,7 @@ static const struct brw_tracked_state
> > *gen8_render_atoms[] =
> > _te_state,
> > _ds_state,
> > _gs_state,
> > -   _sol_state,
> > +   _sol_state,
> > _clip_state,
> > _raster_state,
> > _sbe_state,
> > diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > index 4749cc8..8fcc591 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> > @@ -222,7 +222,9 @@ upload_3dstate_streamout(struct brw_context *brw, bool
> > active,
> > /* BRW_NEW_TRANSFORM_FEEDBACK */
> > struct gl_transform_feedback_object *xfb_obj =
> >ctx->TransformFeedback.CurrentObject;
> > -   uint32_t dw1 = 0, dw2 = 0;
> > +   const struct gl_transform_feedback_info *linked_xfb_info =
> > +  _obj->shader_program->LinkedTransformFeedback;
> > +   uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0;
> > int i;
> >
> > if (active) {
> >
> 
> In here (not visible) gen7 sets a bunch of enable bits that don't exist on
> gen8.  That should be inside of an "if (brw->gen <= 7)" guard.

D'oh...thanks.  Fixed locally.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] clover: fix getting struct args api size

2016-06-22 Thread Jan Vesely
On Wed, 2016-06-22 at 17:07 -0700, Francisco Jerez wrote:
> Jan Vesely  writes:
> 
> > On Mon, 2016-06-13 at 17:24 -0700, Francisco Jerez wrote:
> > > Serge Martin  writes:
> > > 
> > > > This fix getting the size of a struct arg. vec3 types still
> > > > work
> > > > ok.
> > > > Only buit-in args need to have power of two alignment,
> > > > getTypeAllocSize
> > > > reports the correct size.
> > > > ---
> > > >  src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 ++-
> > > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git
> > > > a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > > > b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > > > index 03487d6..9af51539 100644
> > > > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > > > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > > > @@ -472,7 +472,8 @@ namespace {
> > > >   // aligned to the next larger power of two".  We need
> > > > this
> > > >   // alignment for three element vectors, which have
> > > >   // non-power-of-2 store size.
> > > > - const unsigned arg_api_size =
> > > > util_next_power_of_two(arg_store_size);
> > > > + const unsigned arg_api_size = arg_type->isStructTy()
> > > > ?
> > > > +   arg_store_size :
> > > > util_next_power_of_two(arg_store_size);
> > > >  
> > > Hm...  Isn't this still going to be broken if you pass a struct
> > > argument
> > > to a kernel function and the alignment of any of the struct
> > > members
> > > doesn't match the target-specific data layout?  Not sure we can
> > > fix
> > > this
> > > sensibly without requiring the target's data layout to match the
> > > CL
> > > API
> > > exactly.  Any suggestions Tom?
> > 
> > according to 6.7.2.1 compilers can arbitrarily insert padding
> > between
> > struct members (except at the beginning).
> 
> What spec version are you looking at?  My CL spec doesn't have any
> section labeled 6.7.2.1.

c99 specs, I did not find anything specific for CLC (it might be that I
just need to look harder). CLC 2.0 adds additional constraint that you
can't use address space qualifiers.

> 
> > Even if size/alignment of individual members match CL API exactly,
> > there's no guarantee that the structure layout/size will be the
> > same.
> > 
> How can you exchange structured data with a CL kernel then, assuming
> that the layout of structure types in memory is fully unspecified as
> you
> say?

that is my point. My understanding is that it relies on a silent
assumption that both CLC and the host compiler will create the same
structure layout given the same structure elements.

big endian host can create:
struct foo {
cl_int a;
// 16 bit padding;
cl_short b;
cl_int c;
};
while little endian device could create:
struct foo {
int a;
short b;
// 16 bit padding
int c;
};

If cl_short/short alignment is 2bytes, the above structures and all the
members have the same size/alignment, yet are not compatible.

Am I missing something that would prevent the above?

Jan

> 
> > Jan
> > 
> > > 
> > > >   llvm::Type *target_type = arg_type->isIntegerTy() ?
> > > > TD.getSmallestLegalIntType(mod->getContext(),
> > > > arg_store_size * 8)
> > > > -- 
> > > > 2.5.5
> > -- 
> > 
> > Jan Vesely 
-- 

Jan Vesely 

signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] glsl: Split arrays even in the presence of whole-array copies.

2016-06-22 Thread Kenneth Graunke
On Wednesday, June 22, 2016 2:23:00 PM PDT Timothy Arceri wrote:
> On Tue, 2016-06-21 at 20:02 -0700, Kenneth Graunke wrote:
> > Previously, we failed to split constant arrays.  Code such as
> > 
> >int[2] numbers = int[](1, 2);
> > 
> > would generates a whole-array assignment:
> > 
> >   (assign () (var_ref numbers)
> >  (constant (array int 4) (constant int 1) (constant int
> > 2)))
> > 
> > opt_array_splitting generally tried to visit ir_dereference_array
> > nodes,
> > and avoid recursing into the inner ir_dereference_variable.  So if it
> > ever saw a ir_dereference_variable, it assumed this was a whole-array
> > read and bailed.  However, in the above case, there's no array deref,
> > and we can totally handle it - we just have to "unroll" the
> > assignment,
> > creating assignments for each element.
> > 
> > This was mitigated by the fact that we constant propagate whole
> > arrays,
> > so a dereference of a single component would usually get the desired
> > single value anyway.  However, I plan to stop doing that shortly;
> > early experiments with disabling constant propagation of arrays
> > revealed this shortcoming.
> > 
> > This patch causes some arrays in Gl32GSCloth's geometry shaders to be
> > split, which allows other optimizations to eliminate unused GS
> > inputs.
> > The VS then doesn't have to write them, which eliminates the entire
> > VS
> > (5 -> 2 instructions).  It still renders correctly.
> > 
> > No other change in shader-db.
> > 
> > Cc: mesa-sta...@lists.freedesktop.org
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/compiler/glsl/opt_array_splitting.cpp | 56
> > +++
> >  1 file changed, 56 insertions(+)
> > 
> > diff --git a/src/compiler/glsl/opt_array_splitting.cpp
> > b/src/compiler/glsl/opt_array_splitting.cpp
> > index a294da5..9faeb87 100644
> > --- a/src/compiler/glsl/opt_array_splitting.cpp
> > +++ b/src/compiler/glsl/opt_array_splitting.cpp
> > @@ -93,6 +93,7 @@ public:
> > {
> >this->mem_ctx = ralloc_context(NULL);
> >this->variable_list.make_empty();
> > +  this->in_whole_array_copy = false;
> > }
> >  
> > ~ir_array_reference_visitor(void)
> > @@ -104,6 +105,8 @@ public:
> >  
> > virtual ir_visitor_status visit(ir_variable *);
> > virtual ir_visitor_status visit(ir_dereference_variable *);
> > +   virtual ir_visitor_status visit_enter(ir_assignment *);
> > +   virtual ir_visitor_status visit_leave(ir_assignment *);
> > virtual ir_visitor_status visit_enter(ir_dereference_array *);
> > virtual ir_visitor_status visit_enter(ir_function_signature *);
> >  
> > @@ -113,6 +116,8 @@ public:
> > exec_list variable_list;
> >  
> > void *mem_ctx;
> > +
> > +   bool in_whole_array_copy;
> >  };
> >  
> >  } /* namespace */
> > @@ -158,10 +163,34 @@ ir_array_reference_visitor::visit(ir_variable
> > *ir)
> >  }
> >  
> >  ir_visitor_status
> > +ir_array_reference_visitor::visit_enter(ir_assignment *ir)
> > +{
> > +   in_whole_array_copy =
> > +  ir->lhs->type->is_array() && !ir->lhs->type-
> > >is_array_of_arrays() &&
> > +  ir->whole_variable_written();
> 
> Maybe a TODO for AoA support? I assume we would just need to do some
> kind of recersive call in the new code below or is there more to it? If
> there is more to it?

Heh, good point - I mostly didn't want to think about it, and originally
didn't have code to handle it.  But I added the recursive call (the
assign_i->accept()) while fixing bugs.  It seems like it should work,
as we just unroll & split one level of arrays.  Jenkins is happy.

So I'll drop the !AOA check.  Thanks!

> 
> > +
> > +   return visit_continue;
> > +}
> > +
> > +ir_visitor_status
> > +ir_array_reference_visitor::visit_leave(ir_assignment *ir)
> > +{
> > +   in_whole_array_copy = false;
> > +
> > +   return visit_continue;
> > +}
> > +
> > +ir_visitor_status
> >  ir_array_reference_visitor::visit(ir_dereference_variable *ir)
> >  {
> > variable_entry *entry = this->get_variable_entry(ir->var);
> >  
> > +   /* Ignore whole-array assignments on the LHS.  We can split those
> > +* by "unrolling" the assignment into component-wise assignments.
> > +*/
> 
> Instead of ignore maybe "Allow" or "Allow splitting of" or something
> like that I think makes it easier to understand whats going on.

Yeah, that's much better.  I've changed it to "Allow".

> Otherwise patch 1-2 are:
> 
> Reviewed-by: Timothy Arceri 

Thanks!


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Combine 3DSTATE_STREAMOUT emitters and genX_sol_state atoms.

2016-06-22 Thread Jason Ekstrand
On Wed, Jun 22, 2016 at 4:26 PM, Kenneth Graunke 
wrote:

> They're basically the same.  Let's avoid the code duplication.
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_state.h|  2 +-
>  src/mesa/drivers/dri/i965/brw_state_upload.c |  2 +-
>  src/mesa/drivers/dri/i965/gen7_sol_state.c   | 32 --
>  src/mesa/drivers/dri/i965/gen8_sol_state.c   | 90
> +---
>  4 files changed, 31 insertions(+), 95 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h
> b/src/mesa/drivers/dri/i965/brw_state.h
> index b29412e..a16e876 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -166,7 +166,6 @@ extern const struct brw_tracked_state gen8_wm_state;
>  extern const struct brw_tracked_state gen8_raster_state;
>  extern const struct brw_tracked_state gen8_sbe_state;
>  extern const struct brw_tracked_state gen8_sf_state;
> -extern const struct brw_tracked_state gen8_sol_state;
>  extern const struct brw_tracked_state gen8_sf_clip_viewport;
>  extern const struct brw_tracked_state gen8_vertices;
>  extern const struct brw_tracked_state gen8_vf_topology;
> @@ -303,6 +302,7 @@ void gen8_upload_ps_extra(struct brw_context *brw,
>  /* gen7_sol_state.c */
>  void gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
>const struct brw_vue_map *vue_map);
> +void gen8_upload_3dstate_so_buffers(struct brw_context *brw);
>
>  /* gen8_surface_state.c */
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c
> b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 0b47ebe..4a20821 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -337,7 +337,7 @@ static const struct brw_tracked_state
> *gen8_render_atoms[] =
> _te_state,
> _ds_state,
> _gs_state,
> -   _sol_state,
> +   _sol_state,
> _clip_state,
> _raster_state,
> _sbe_state,
> diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> index 4749cc8..8fcc591 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
> @@ -222,7 +222,9 @@ upload_3dstate_streamout(struct brw_context *brw, bool
> active,
> /* BRW_NEW_TRANSFORM_FEEDBACK */
> struct gl_transform_feedback_object *xfb_obj =
>ctx->TransformFeedback.CurrentObject;
> -   uint32_t dw1 = 0, dw2 = 0;
> +   const struct gl_transform_feedback_info *linked_xfb_info =
> +  _obj->shader_program->LinkedTransformFeedback;
> +   uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0;
> int i;
>
> if (active) {
>

In here (not visible) gen7 sets a bunch of enable bits that don't exist on
gen8.  That should be inside of an "if (brw->gen <= 7)" guard.


> @@ -258,12 +260,30 @@ upload_3dstate_streamout(struct brw_context *brw,
> bool active,
>
>dw2 |= SET_FIELD(urb_entry_read_offset,
> SO_STREAM_3_VERTEX_READ_OFFSET);
>dw2 |= SET_FIELD(urb_entry_read_length - 1,
> SO_STREAM_3_VERTEX_READ_LENGTH);
> +
> +  if (brw->gen >= 8) {
> +/* Set buffer pitches; 0 means unbound. */
> +if (xfb_obj->Buffers[0])
> +   dw3 |= linked_xfb_info->Buffers[0].Stride * 4;
> +if (xfb_obj->Buffers[1])
> +   dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16;
> +if (xfb_obj->Buffers[2])
> +   dw4 |= linked_xfb_info->Buffers[2].Stride * 4;
> +if (xfb_obj->Buffers[3])
> +   dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16;
> +  }
> }
>
> -   BEGIN_BATCH(3);
> -   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
> +   const int dwords = brw->gen >= 8 ? 5 : 3;
> +
> +   BEGIN_BATCH(dwords);
> +   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (dwords - 2));
> OUT_BATCH(dw1);
> OUT_BATCH(dw2);
> +   if (dwords > 3) {
> +  OUT_BATCH(dw3);
> +  OUT_BATCH(dw4);
> +   }
> ADVANCE_BATCH();
>  }
>
> @@ -275,7 +295,11 @@ upload_sol_state(struct brw_context *brw)
> bool active = _mesa_is_xfb_active_and_unpaused(ctx);
>
> if (active) {
> -  upload_3dstate_so_buffers(brw);
> +  if (brw->gen >= 8)
> + gen8_upload_3dstate_so_buffers(brw);
> +  else
> + upload_3dstate_so_buffers(brw);
> +
>/* BRW_NEW_VUE_MAP_GEOM_OUT */
>gen7_upload_3dstate_so_decl_list(brw, >vue_map_geom_out);
> }
> diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c
> b/src/mesa/drivers/dri/i965/gen8_sol_state.c
> index a72f5e1..21cc129 100644
> --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c
> @@ -35,7 +35,7 @@
>  #include "intel_buffer_objects.h"
>  #include "main/transformfeedback.h"
>
> -static void
> +void
>  gen8_upload_3dstate_so_buffers(struct brw_context *brw)
>  {
> struct gl_context *ctx = >ctx;
> @@ -93,91 +93,3 @@ 

Re: [Mesa-dev] [PATCH 2/2] clover: fix getting struct args api size

2016-06-22 Thread Francisco Jerez
Jan Vesely  writes:

> On Mon, 2016-06-13 at 17:24 -0700, Francisco Jerez wrote:
>> Serge Martin  writes:
>> 
>> > This fix getting the size of a struct arg. vec3 types still work
>> > ok.
>> > Only buit-in args need to have power of two alignment,
>> > getTypeAllocSize
>> > reports the correct size.
>> > ---
>> >  src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 ++-
>> >  1 file changed, 2 insertions(+), 1 deletion(-)
>> > 
>> > diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > b/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > index 03487d6..9af51539 100644
>> > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
>> > @@ -472,7 +472,8 @@ namespace {
>> >   // aligned to the next larger power of two".  We need
>> > this
>> >   // alignment for three element vectors, which have
>> >   // non-power-of-2 store size.
>> > - const unsigned arg_api_size =
>> > util_next_power_of_two(arg_store_size);
>> > + const unsigned arg_api_size = arg_type->isStructTy() ?
>> > +   arg_store_size :
>> > util_next_power_of_two(arg_store_size);
>> >  
>> Hm...  Isn't this still going to be broken if you pass a struct
>> argument
>> to a kernel function and the alignment of any of the struct members
>> doesn't match the target-specific data layout?  Not sure we can fix
>> this
>> sensibly without requiring the target's data layout to match the CL
>> API
>> exactly.  Any suggestions Tom?
>
> according to 6.7.2.1 compilers can arbitrarily insert padding between
> struct members (except at the beginning).

What spec version are you looking at?  My CL spec doesn't have any
section labeled 6.7.2.1.

> Even if size/alignment of individual members match CL API exactly,
> there's no guarantee that the structure layout/size will be the same.
>
How can you exchange structured data with a CL kernel then, assuming
that the layout of structure types in memory is fully unspecified as you
say?

> Jan
>
>> 
>> >   llvm::Type *target_type = arg_type->isIntegerTy() ?
>> > TD.getSmallestLegalIntType(mod->getContext(),
>> > arg_store_size * 8)
>> > -- 
>> > 2.5.5
> -- 
>
> Jan Vesely 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Combine 3DSTATE_STREAMOUT emitters and genX_sol_state atoms.

2016-06-22 Thread Kenneth Graunke
They're basically the same.  Let's avoid the code duplication.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_state.h|  2 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c |  2 +-
 src/mesa/drivers/dri/i965/gen7_sol_state.c   | 32 --
 src/mesa/drivers/dri/i965/gen8_sol_state.c   | 90 +---
 4 files changed, 31 insertions(+), 95 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index b29412e..a16e876 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -166,7 +166,6 @@ extern const struct brw_tracked_state gen8_wm_state;
 extern const struct brw_tracked_state gen8_raster_state;
 extern const struct brw_tracked_state gen8_sbe_state;
 extern const struct brw_tracked_state gen8_sf_state;
-extern const struct brw_tracked_state gen8_sol_state;
 extern const struct brw_tracked_state gen8_sf_clip_viewport;
 extern const struct brw_tracked_state gen8_vertices;
 extern const struct brw_tracked_state gen8_vf_topology;
@@ -303,6 +302,7 @@ void gen8_upload_ps_extra(struct brw_context *brw,
 /* gen7_sol_state.c */
 void gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
   const struct brw_vue_map *vue_map);
+void gen8_upload_3dstate_so_buffers(struct brw_context *brw);
 
 /* gen8_surface_state.c */
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 0b47ebe..4a20821 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -337,7 +337,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
_te_state,
_ds_state,
_gs_state,
-   _sol_state,
+   _sol_state,
_clip_state,
_raster_state,
_sbe_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 4749cc8..8fcc591 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -222,7 +222,9 @@ upload_3dstate_streamout(struct brw_context *brw, bool 
active,
/* BRW_NEW_TRANSFORM_FEEDBACK */
struct gl_transform_feedback_object *xfb_obj =
   ctx->TransformFeedback.CurrentObject;
-   uint32_t dw1 = 0, dw2 = 0;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+  _obj->shader_program->LinkedTransformFeedback;
+   uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0;
int i;
 
if (active) {
@@ -258,12 +260,30 @@ upload_3dstate_streamout(struct brw_context *brw, bool 
active,
 
   dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET);
   dw2 |= SET_FIELD(urb_entry_read_length - 1, 
SO_STREAM_3_VERTEX_READ_LENGTH);
+
+  if (brw->gen >= 8) {
+/* Set buffer pitches; 0 means unbound. */
+if (xfb_obj->Buffers[0])
+   dw3 |= linked_xfb_info->Buffers[0].Stride * 4;
+if (xfb_obj->Buffers[1])
+   dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16;
+if (xfb_obj->Buffers[2])
+   dw4 |= linked_xfb_info->Buffers[2].Stride * 4;
+if (xfb_obj->Buffers[3])
+   dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16;
+  }
}
 
-   BEGIN_BATCH(3);
-   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
+   const int dwords = brw->gen >= 8 ? 5 : 3;
+
+   BEGIN_BATCH(dwords);
+   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (dwords - 2));
OUT_BATCH(dw1);
OUT_BATCH(dw2);
+   if (dwords > 3) {
+  OUT_BATCH(dw3);
+  OUT_BATCH(dw4);
+   }
ADVANCE_BATCH();
 }
 
@@ -275,7 +295,11 @@ upload_sol_state(struct brw_context *brw)
bool active = _mesa_is_xfb_active_and_unpaused(ctx);
 
if (active) {
-  upload_3dstate_so_buffers(brw);
+  if (brw->gen >= 8)
+ gen8_upload_3dstate_so_buffers(brw);
+  else
+ upload_3dstate_so_buffers(brw);
+
   /* BRW_NEW_VUE_MAP_GEOM_OUT */
   gen7_upload_3dstate_so_decl_list(brw, >vue_map_geom_out);
}
diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c 
b/src/mesa/drivers/dri/i965/gen8_sol_state.c
index a72f5e1..21cc129 100644
--- a/src/mesa/drivers/dri/i965/gen8_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c
@@ -35,7 +35,7 @@
 #include "intel_buffer_objects.h"
 #include "main/transformfeedback.h"
 
-static void
+void
 gen8_upload_3dstate_so_buffers(struct brw_context *brw)
 {
struct gl_context *ctx = >ctx;
@@ -93,91 +93,3 @@ gen8_upload_3dstate_so_buffers(struct brw_context *brw)
}
brw_obj->zero_offsets = false;
 }
-
-static void
-gen8_upload_3dstate_streamout(struct brw_context *brw, bool active,
-  struct brw_vue_map *vue_map)
-{
-   struct gl_context *ctx = >ctx;
-
-   /* BRW_NEW_TRANSFORM_FEEDBACK */
-   struct gl_transform_feedback_object *xfb_obj =
-  ctx->TransformFeedback.CurrentObject;
-   const struct gl_transform_feedback_info *linked_xfb_info =
-  

[Mesa-dev] [PATCH 2/2] i965: Implement rasterizer discard via SOL unless required for queries.

2016-06-22 Thread Kenneth Graunke
We currently use CL_INVOCATION_COUNT for the GL_PRIMITIVES_GENERATED
query, which involves passing all primitives to the clipper.  When
rasterizer discard is enabled, we program the clipper in REJECT_ALL
mode, rather than using the SOL stage's "Rendering Disable" feature.

See commit f09b91f78247409f54c975f56cb10d5f350fe64e for an explanation
of why we implement GL_PRIMITIVES_GENERATED this way.

Apparently the SOL stage's "Rendering Disable" feature is a lot faster
than having the clipper reject all primitives.  It's safe to use when
no GL_PRIMITIVES_GENERATED query is active, as we don't care about
CL_INVOCATION_COUNT incrementing.

This patch makes us use SO_RENDERING_DISABLE when no query is active,
but continues falling back to the clipper in REJECT_ALL mode when the
queries are enabled.  It brings back the perf_debug for the clipper
case (which I removed in commit 1f9445ff57b, thinking it wasn't useful).

Improves performance in Gl32GSCloth by 84.8303% +/- 2.07132% (n = 10)
on my Broadwell GT2 laptop.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/gen6_queryobj.c  |  4 
 src/mesa/drivers/dri/i965/gen7_sol_state.c | 17 +
 2 files changed, 21 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c 
b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index f36f095..96db5e9 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -307,6 +307,8 @@ gen6_begin_query(struct gl_context *ctx, struct 
gl_query_object *q)
 
case GL_PRIMITIVES_GENERATED:
   write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
+  if (query->Base.Stream == 0)
+ ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
   break;
 
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
@@ -359,6 +361,8 @@ gen6_end_query(struct gl_context *ctx, struct 
gl_query_object *q)
 
case GL_PRIMITIVES_GENERATED:
   write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
+  if (query->Base.Stream == 0)
+ ctx->NewDriverState |= BRW_NEW_RASTERIZER_DISCARD;
   break;
 
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c 
b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index 8fcc591..6c747fa 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -214,6 +214,12 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
ADVANCE_BATCH();
 }
 
+static bool
+query_active(struct gl_query_object *q)
+{
+   return q && q->Active;
+}
+
 static void
 upload_3dstate_streamout(struct brw_context *brw, bool active,
 const struct brw_vue_map *vue_map)
@@ -235,6 +241,16 @@ upload_3dstate_streamout(struct brw_context *brw, bool 
active,
   dw1 |= SO_FUNCTION_ENABLE;
   dw1 |= SO_STATISTICS_ENABLE;
 
+  /* BRW_NEW_RASTERIZER_DISCARD */
+  if (ctx->RasterDiscard) {
+ if (!query_active(ctx->Query.PrimitivesGenerated[0])) {
+dw1 |= SO_RENDERING_DISABLE;
+ } else {
+perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
+   "query active relies on the clipper.");
+ }
+  }
+
   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
 dw1 |= SO_REORDER_TRAILING;
@@ -317,6 +333,7 @@ const struct brw_tracked_state gen7_sol_state = {
   .mesa  = _NEW_LIGHT,
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
+   BRW_NEW_RASTERIZER_DISCARD |
BRW_NEW_VUE_MAP_GEOM_OUT |
BRW_NEW_TRANSFORM_FEEDBACK,
},
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] clover: fix getting struct args api size

2016-06-22 Thread Jan Vesely
On Mon, 2016-06-13 at 17:24 -0700, Francisco Jerez wrote:
> Serge Martin  writes:
> 
> > This fix getting the size of a struct arg. vec3 types still work
> > ok.
> > Only buit-in args need to have power of two alignment,
> > getTypeAllocSize
> > reports the correct size.
> > ---
> >  src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > index 03487d6..9af51539 100644
> > --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> > @@ -472,7 +472,8 @@ namespace {
> >   // aligned to the next larger power of two".  We need
> > this
> >   // alignment for three element vectors, which have
> >   // non-power-of-2 store size.
> > - const unsigned arg_api_size =
> > util_next_power_of_two(arg_store_size);
> > + const unsigned arg_api_size = arg_type->isStructTy() ?
> > +   arg_store_size :
> > util_next_power_of_two(arg_store_size);
> >  
> Hm...  Isn't this still going to be broken if you pass a struct
> argument
> to a kernel function and the alignment of any of the struct members
> doesn't match the target-specific data layout?  Not sure we can fix
> this
> sensibly without requiring the target's data layout to match the CL
> API
> exactly.  Any suggestions Tom?

according to 6.7.2.1 compilers can arbitrarily insert padding between
struct members (except at the beginning). Even if size/alignment of
individual members match CL API exactly, there's no guarantee that the
structure layout/size will be the same.

Jan

> 
> >   llvm::Type *target_type = arg_type->isIntegerTy() ?
> > TD.getSmallestLegalIntType(mod->getContext(),
> > arg_store_size * 8)
> > -- 
> > 2.5.5
-- 

Jan Vesely 

signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/11] anv/cmd_buffer: Set depth/stencil extent based on the image

2016-06-22 Thread Chad Versace
On Fri 17 Jun 2016, Jason Ekstrand wrote:
> It used to be based on the framebuffer which isn't quite right.
> 
> Signed-off-by: Jason Ekstrand 
> Cc: Chad Versace 
> Cc: "12.0" 

Yes, please.
Reviewed-by: Chad Versace 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/64] i965/blorp: Only set src_z for gen8+ 3D textures

2016-06-22 Thread Chad Versace
On Tue 21 Jun 2016, Pohjolainen, Topi wrote:
> On Mon, Jun 20, 2016 at 08:52:50PM -0700, Kenneth Graunke wrote:
> > On Friday, June 17, 2016 4:55:41 PM PDT Jason Ekstrand wrote:
> > > On Thu, Jun 16, 2016 at 10:08 AM, Chad Versace 
> > > wrote:
> > > 
> > > > On Sat 11 Jun 2016, Jason Ekstrand wrote:

> > > > Enlighten me. Why does blorp use 3D surfaces on gen >= 8 but not
> > > > earlier?


> > > History?  TBH, I'm not really sure.  Probably because SKL 3-D is different
> > > but you'd have to ask topi to be sure.
> > 
> > Historically, we did everything via tile x/y offsets, and so BLORP
> > worked that way too.  However, we never applied any offsets to the
> > auxiliary surfaces (i.e. CMS).  This seems fairly sketchy, and got
> > us into trouble when we ported it to Gen8.  As a hack, falling back
> > from CMS to UMS made things work out.
> > 
> > Rather than try and figure out proper offset calculations for CCS_D,
> > CCS_E, and HiZ, Topi decided to just access everything using level/layer
> > like we do for normal GL.  It seemed a lot safer, and worked out well.
> > 
> > Gen6 never uses CMS, so it works out.  Topi seemed to think that Gen7
> > can't do CMS for array textures (though I can't seem to find that code
> > to confirm), so it might work out.  I'm still not sure why Gen7 works.
> 
> I need to dig some more for this, I can't remember either.
> 
> > 
> > Honestly, I think it'd be better to convert Gen6-7 to be like Gen8+.

Yes, I agree. We should XOffset/YOffset when possible.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [BRANCH] Gallium Radeon cleanups

2016-06-22 Thread Marek Olšák
On Thu, Jun 23, 2016 at 12:16 AM, Vedran Miletić  wrote:
> On 06/22/2016 08:49 PM, Marek Olšák wrote:
>>
>> Hi,
>>
>> The top 6 patches in this branch are cleanups I'd like to push. I'm
>> not sending them to the list because they are kinda boring. (and there
>> is another series with 6 patches already)
>>
>> https://cgit.freedesktop.org/~mareko/mesa/log/?h=radeon-cleanups
>>
>> Commits:
>>   gallium/radeon: use r600_resource_reference
>>   gallium/radeon: add and use r600_texture_reference
>>   gallium/radeon/winsyses: boolean -> bool, TRUE -> true, FALSE ->
>> false
>>   gallium/radeon: boolean -> bool, TRUE -> true, FALSE -> false
>>   radeonsi: boolean -> bool, TRUE -> true, FALSE -> false
>>   radeonsi: make si_is_format_supported static
>>
>> Please review.
>>
>
> I presume the next series will address boolean-> bool in si_state.c. This
> series is

Those are Gallium interfaces that can't be changed just in one driver.
The boolean -> bool transition within the driver is really complete
with this series.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/6] Gallium multithreaded queue - cleanup and multiple threads per queue

2016-06-22 Thread Marek Olšák
On Thu, Jun 23, 2016 at 12:29 AM, Marek Olšák  wrote:
> On Tue, Jun 21, 2016 at 4:47 PM, Nicolai Hähnle  wrote:
>> On 21.06.2016 14:17, Marek Olšák wrote:
>>>
>>> Hi,
>>>
>>> This improves u_queue to be more usable in more cases.
>>>
>>> With this, the size of the queue (maximum number of jobs waiting) is
>>> configurable as well as the number of threads executing the jobs. The
>>> semaphores are ditched in favor of simpler condvars, and multiple waiters on
>>> fences are allowed as well.
>>>
>>> This is a prerequisite for a later series that will add multithreaded
>>> shader compilation into radeonsi.
>>
>>
>> I like the overall cleanup, but I'd appreciate it if you could rearrange
>> patch #3 and #5 a little to avoid the temporary deadlock. No need to risk
>> someone running into that while bisecting.
>>
>> I think it would suffice to add queue->lock locking around the
>> queue->kill_threads already in patch #3, but tbh I'd also be fine with the
>> slightly less-work option of squashing the two patches together.
>
> To be honest, I'm not able to see the deadlock now. Maybe I should
> remove the comment.
>
> Strictly speaking, patch 3 doesn't change anything with respect to
> locking. Either the deadlock is not there or it has always been there.

If I fix up calloc, free, and snprintf, does it have your Rb?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/6] Gallium multithreaded queue - cleanup and multiple threads per queue

2016-06-22 Thread Marek Olšák
On Tue, Jun 21, 2016 at 4:47 PM, Nicolai Hähnle  wrote:
> On 21.06.2016 14:17, Marek Olšák wrote:
>>
>> Hi,
>>
>> This improves u_queue to be more usable in more cases.
>>
>> With this, the size of the queue (maximum number of jobs waiting) is
>> configurable as well as the number of threads executing the jobs. The
>> semaphores are ditched in favor of simpler condvars, and multiple waiters on
>> fences are allowed as well.
>>
>> This is a prerequisite for a later series that will add multithreaded
>> shader compilation into radeonsi.
>
>
> I like the overall cleanup, but I'd appreciate it if you could rearrange
> patch #3 and #5 a little to avoid the temporary deadlock. No need to risk
> someone running into that while bisecting.
>
> I think it would suffice to add queue->lock locking around the
> queue->kill_threads already in patch #3, but tbh I'd also be fine with the
> slightly less-work option of squashing the two patches together.

To be honest, I'm not able to see the deadlock now. Maybe I should
remove the comment.

Strictly speaking, patch 3 doesn't change anything with respect to
locking. Either the deadlock is not there or it has always been there.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] clover: conditionally use MESA_GIT_SHA1

2016-06-22 Thread Vedran Miletić

On 06/22/2016 02:04 PM, Emil Velikov wrote:

From: Emil Velikov 

Considering how hard/annoying it was for many peoples' workflow to
properly generate the macro, it will be demoted to conditionally
available with follow-up commits.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Vedran Miletić 
Cc: Francisco Jerez 
Signed-off-by: Emil Velikov 
---
 src/gallium/state_trackers/clover/api/device.cpp   | 6 +-
 src/gallium/state_trackers/clover/api/platform.cpp | 5 -
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index ed323e0..7ad01d9 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -302,7 +302,11 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,

case CL_DEVICE_VERSION:
   buf.as_string() = "OpenCL 1.1 Mesa " PACKAGE_VERSION
-" (" MESA_GIT_SHA1 ")";
+#ifdef MESA_GIT_SHA1
+" (" MESA_GIT_SHA1 ")"
+


This newline is unnecessary.


+#endif
+   ;
   break;

case CL_DEVICE_EXTENSIONS:
diff --git a/src/gallium/state_trackers/clover/api/platform.cpp 
b/src/gallium/state_trackers/clover/api/platform.cpp
index cdb8a99..b1b1fdf 100644
--- a/src/gallium/state_trackers/clover/api/platform.cpp
+++ b/src/gallium/state_trackers/clover/api/platform.cpp
@@ -59,7 +59,10 @@ clover::GetPlatformInfo(cl_platform_id d_platform, 
cl_platform_info param,

case CL_PLATFORM_VERSION:
   buf.as_string() = "OpenCL 1.1 Mesa " PACKAGE_VERSION
-" (" MESA_GIT_SHA1 ")";
+#ifdef MESA_GIT_SHA1
+" (" MESA_GIT_SHA1 ")"
+#endif
+;
   break;

case CL_PLATFORM_NAME:



With the comment above addressed, this patch is:

Reviewed-by: Vedran Miletić 

--
Vedran Miletić
vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [BRANCH] Gallium Radeon cleanups

2016-06-22 Thread Vedran Miletić

On 06/22/2016 08:49 PM, Marek Olšák wrote:

Hi,

The top 6 patches in this branch are cleanups I'd like to push. I'm
not sending them to the list because they are kinda boring. (and there
is another series with 6 patches already)

https://cgit.freedesktop.org/~mareko/mesa/log/?h=radeon-cleanups

Commits:
  gallium/radeon: use r600_resource_reference
  gallium/radeon: add and use r600_texture_reference
  gallium/radeon/winsyses: boolean -> bool, TRUE -> true, FALSE -> false
  gallium/radeon: boolean -> bool, TRUE -> true, FALSE -> false
  radeonsi: boolean -> bool, TRUE -> true, FALSE -> false
  radeonsi: make si_is_format_supported static

Please review.



I presume the next series will address boolean-> bool in si_state.c. 
This series is


Reviewed-by: Vedran Miletić 


Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



--
Vedran Miletić
vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: keep using v_rcp_f32 for division in future LLVM

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/radeon_llvm.h   |  3 +++
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 28 --
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index ec16def..61afa7a 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -101,6 +101,9 @@ struct radeon_llvm_context {
LLVMValueRef main_fn;
LLVMTypeRef return_type;
 
+   unsigned fpmath_md_kind;
+   LLVMValueRef fpmath_md_2p5_ulp;
+
struct gallivm_state gallivm;
 };
 
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d395208..79ab4ef 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1523,19 +1523,34 @@ static void emit_up2h(const struct lp_build_tgsi_action 
*action,
}
 }
 
+static void emit_fdiv(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
+
+   emit_data->output[emit_data->chan] =
+   LLVMBuildFDiv(bld_base->base.gallivm->builder,
+ emit_data->args[0], emit_data->args[1], "");
+
+   /* Use v_rcp_f32 instead of precise division. */
+   LLVMSetMetadata(emit_data->output[emit_data->chan],
+   ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+}
+
 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
  * the target machine. f64 needs global unsafe math flags to get rsq. */
 static void emit_rsq(const struct lp_build_tgsi_action *action,
 struct lp_build_tgsi_context *bld_base,
 struct lp_build_emit_data *emit_data)
 {
-   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMValueRef sqrt =
lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
 emit_data->args[0]);
 
emit_data->output[emit_data->chan] =
-   LLVMBuildFDiv(builder, bld_base->base.one, sqrt, "");
+   lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
+ bld_base->base.one, sqrt);
 }
 
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char 
*triple)
@@ -1586,6 +1601,14 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx, const char *trip
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
 
+   /* metadata allowing 2.5 ULP */
+   ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
+  "fpmath", 6);
+   LLVMValueRef arg = LLVMMDStringInContext(ctx->gallivm.context,
+"float 2.5", 9);
+   ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
+, 1);
+
/* Allocate outputs */
ctx->soa.outputs = ctx->outputs;
 
@@ -1615,6 +1638,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx, const char *trip
bld_base->op_actions[TGSI_OPCODE_DFMA].emit = 
build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
+   bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeon/vce: use vce structures for vce_52 firmware

2016-06-22 Thread Zhang, Boyuan
OK, so I added get parameters call for each firmware versions, and moved all 
the value assignments to firmware specific file. As a result, changes made for 
specific version won't affect other version. For future firmware verison, we 
still can use the same structure but assign different values in version 
specific calls. Please see the new patch set I just sent.

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: June-22-16 11:55 AM
To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org
Subject: Re: [PATCH 3/3] radeon/vce: use vce structures for vce_52 firmware

Am 22.06.2016 um 17:43 schrieb Zhang, Boyuan:
>> We should write the encode structure directly without the use of the
>> RVCE_CS() macros.
>>
>> Otherwise all of that doesn't make much sense and is just another layer of 
>> abstraction.
> Different from UVD where firmware takes the address of the IB structure, VCE 
> firmware directly takes the value of IB, not the address. The encode 
> structure here is used for storing value. We need this layer is because we 
> want to assign different values to some of the IB in VAAPI which had 
> previously hardcoded values for OMX. Therefore, we still want to keep the 
> RVCE_CS() macros. By keeping this, all firmware version can work, even the 
> structure changes b/w different version of firmware, it still works because 
> we only take the value of IB not the structure itself.

And exactly that's what we don't want.

Each firmware version should have a complete separate implementation of mapping 
the values from the pipe description into the binary representation of the IB.

Otherwise we would need to test with all the older firmware versions as well 
when we make a change.

Adding different values to the IB is also possible completely without the 
structure by just using the values from the picture descriptor directly.

Regards,
Christian.

>
> Regards,
> Boyuan
>
> -Original Message-
> From: Christian König [mailto:deathsim...@vodafone.de]
> Sent: June-22-16 3:34 AM
> To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org
> Subject: Re: [PATCH 3/3] radeon/vce: use vce structures for vce_52 
> firmware
>
> Am 21.06.2016 um 16:50 schrieb Boyuan Zhang:
>> Signed-off-by: Boyuan Zhang 
>> ---
>>src/gallium/drivers/radeon/radeon_vce.c| 171 +++
>>src/gallium/drivers/radeon/radeon_vce.h|   1 +
>>src/gallium/drivers/radeon/radeon_vce_52.c | 447 
>> +++--
>>3 files changed, 533 insertions(+), 86 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/radeon_vce.c
>> b/src/gallium/drivers/radeon/radeon_vce.c
>> index e16e0cf..0d96085 100644
>> --- a/src/gallium/drivers/radeon/radeon_vce.c
>> +++ b/src/gallium/drivers/radeon/radeon_vce.c
>> @@ -139,6 +139,176 @@ static void sort_cpb(struct rvce_encoder *enc)
>>  }
>>}
>>
>> +static void get_rate_control_param(struct rvce_encoder *enc, struct 
>> +pipe_h264_enc_picture_desc *pic) {
> Move all of this into the firmware specific file. Don't add anything to the 
> common file since we don't want to implement this for the older firmware 
> versions.
>
>> +enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method;
>> +enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate;
>> +enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate;
>> +enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames;
>> +enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames;
>> +enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames;
>> +enc->enc_pic.rc.gop_size = pic->gop_size;
>> +enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num;
>> +enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den;
>> +enc->enc_pic.rc.max_qp = 51;
>> +
>> +if (pic->enable_low_level_control == true) {
>> +enc->enc_pic.rc.vbv_buffer_size = 2000;
>> +if (pic->rate_ctrl.frame_rate_num == 0)
>> +enc->enc_pic.rc.frame_rate_num = 30;
>> +if (pic->rate_ctrl.frame_rate_den == 0)
>> +enc->enc_pic.rc.frame_rate_den = 1;
>> +enc->enc_pic.rc.vbv_buf_lv = 48;
>> +enc->enc_pic.rc.fill_data_enable = 1;
>> +enc->enc_pic.rc.enforce_hrd = 1;
>> +enc->enc_pic.rc.target_bits_picture = 
>> enc->enc_pic.rc.target_bitrate / enc->enc_pic.rc.frame_rate_num;
>> +enc->enc_pic.rc.peak_bits_picture_integer = 
>> enc->enc_pic.rc.peak_bitrate / enc->enc_pic.rc.frame_rate_num;
>> +enc->enc_pic.rc.peak_bits_picture_fraction = 0;
>> +} else {
>> +enc->enc_pic.rc.vbv_buffer_size = 
>> pic->rate_ctrl.vbv_buffer_size;
>> +enc->enc_pic.rc.vbv_buf_lv = 0;
>> +enc->enc_pic.rc.fill_data_enable = 0;
>> +enc->enc_pic.rc.enforce_hrd = 0;
>> +enc->enc_pic.rc.target_bits_picture = 
>> pic->rate_ctrl.target_bits_picture;
>> +

Re: [Mesa-dev] [PATCH 2/3] vl: add parameters for VAAPI encode

2016-06-22 Thread Zhang, Boyuan
Agree, please see the new patch set I just sent.

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: June-22-16 3:29 AM
To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org
Subject: Re: [PATCH 2/3] vl: add parameters for VAAPI encode

Am 21.06.2016 um 16:50 schrieb Boyuan Zhang:
> Signed-off-by: Boyuan Zhang 

Please move that patch to the end of the series. E.g. implement the existing 
interface first, then add the new one and make the changes to support the new 
one.

Regards,
Christian.

> ---
>   src/gallium/include/pipe/p_video_state.h | 13 +
>   1 file changed, 13 insertions(+)
>
> diff --git a/src/gallium/include/pipe/p_video_state.h 
> b/src/gallium/include/pipe/p_video_state.h
> index d353be6..d519d17 100644
> --- a/src/gallium/include/pipe/p_video_state.h
> +++ b/src/gallium/include/pipe/p_video_state.h
> @@ -131,6 +131,7 @@ enum pipe_h264_enc_rate_control_method
>   struct pipe_picture_desc
>   {
>  enum pipe_video_profile profile;
> +   enum pipe_video_entrypoint entry_point;
>   };
>   
>   struct pipe_quant_matrix
> @@ -369,11 +370,23 @@ struct pipe_h264_enc_picture_desc
>   
>  enum pipe_h264_enc_picture_type picture_type;
>  unsigned frame_num;
> +   unsigned frame_num_cnt;
> +   unsigned p_remain;
> +   unsigned i_remain;
> +   unsigned idr_pic_id;
> +   unsigned gop_cnt;
>  unsigned pic_order_cnt;
>  unsigned ref_idx_l0;
>  unsigned ref_idx_l1;
> +   unsigned gop_size;
>   
>  bool not_referenced;
> +   bool is_idr;
> +   bool has_ref_pic_list;
> +   bool enable_low_level_control;
> +   unsigned int ref_pic_list_0[32];
> +   unsigned int ref_pic_list_1[32];
> +   unsigned int frame_idx[32];
>   };
>   
>   struct pipe_h265_sps

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] radeon/vce: add vce structures

2016-06-22 Thread Boyuan Zhang
Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vce.h | 297 
 1 file changed, 297 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_vce.h 
b/src/gallium/drivers/radeon/radeon_vce.h
index e438148..da61285 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -65,6 +65,303 @@ struct rvce_cpb_slot {
unsignedpic_order_cnt;
 };
 
+struct rvce_rate_control {
+   uint32_trc_method;
+   uint32_ttarget_bitrate;
+   uint32_tpeak_bitrate;
+   uint32_tframe_rate_num;
+   uint32_tgop_size;
+   uint32_tquant_i_frames;
+   uint32_tquant_p_frames;
+   uint32_tquant_b_frames;
+   uint32_tvbv_buffer_size;
+   uint32_tframe_rate_den;
+   uint32_tvbv_buf_lv;
+   uint32_tmax_au_size;
+   uint32_tqp_initial_mode;
+   uint32_ttarget_bits_picture;
+   uint32_tpeak_bits_picture_integer;
+   uint32_tpeak_bits_picture_fraction;
+   uint32_tmin_qp;
+   uint32_tmax_qp;
+   uint32_tskip_frame_enable;
+   uint32_tfill_data_enable;
+   uint32_tenforce_hrd;
+   uint32_tb_pics_delta_qp;
+   uint32_tref_b_pics_delta_qp;
+   uint32_trc_reinit_disable;
+   uint32_tenc_lcvbr_init_qp_flag;
+   uint32_tlcvbrsatd_based_nonlinear_bit_budget_flag;
+};
+
+struct rvce_motion_estimation {
+   uint32_tenc_ime_decimation_search;
+   uint32_tmotion_est_half_pixel;
+   uint32_tmotion_est_quarter_pixel;
+   uint32_tdisable_favor_pmv_point;
+   uint32_tforce_zero_point_center;
+   uint32_tlsmvert;
+   uint32_tenc_search_range_x;
+   uint32_tenc_search_range_y;
+   uint32_tenc_search1_range_x;
+   uint32_tenc_search1_range_y;
+   uint32_tdisable_16x16_frame1;
+   uint32_tdisable_satd;
+   uint32_tenable_amd;
+   uint32_tenc_disable_sub_mode;
+   uint32_tenc_ime_skip_x;
+   uint32_tenc_ime_skip_y;
+   uint32_tenc_en_ime_overw_dis_subm;
+   uint32_tenc_ime_overw_dis_subm_no;
+   uint32_tenc_ime2_search_range_x;
+   uint32_tenc_ime2_search_range_y;
+   uint32_tparallel_mode_speedup_enable;
+   uint32_tfme0_enc_disable_sub_mode;
+   uint32_tfme1_enc_disable_sub_mode;
+   uint32_time_sw_speedup_enable;
+};
+
+struct rvce_pic_control {
+   uint32_tenc_use_constrained_intra_pred;
+   uint32_tenc_cabac_enable;
+   uint32_tenc_cabac_idc;
+   uint32_tenc_loop_filter_disable;
+   int32_t enc_lf_beta_offset;
+   int32_t enc_lf_alpha_c0_offset;
+   uint32_tenc_crop_left_offset;
+   uint32_tenc_crop_right_offset;
+   uint32_tenc_crop_top_offset;
+   uint32_tenc_crop_bottom_offset;
+   uint32_tenc_num_mbs_per_slice;
+   uint32_tenc_intra_refresh_num_mbs_per_slot;
+   uint32_tenc_force_intra_refresh;
+   uint32_tenc_force_imb_period;
+   uint32_tenc_pic_order_cnt_type;
+   uint32_tlog2_max_pic_order_cnt_lsb_minus4;
+   uint32_tenc_sps_id;
+   uint32_tenc_pps_id;
+   uint32_tenc_constraint_set_flags;
+   uint32_tenc_b_pic_pattern;
+   uint32_tweight_pred_mode_b_picture;
+   uint32_tenc_number_of_reference_frames;
+   uint32_tenc_max_num_ref_frames;
+   uint32_tenc_num_default_active_ref_l0;
+   uint32_tenc_num_default_active_ref_l1;
+   uint32_tenc_slice_mode;
+   uint32_tenc_max_slice_size;
+};
+
+struct rvce_task_info {
+   uint32_toffset_of_next_task_info;
+   uint32_ttask_operation;
+   uint32_treference_picture_dependency;
+   uint32_tcollocate_flag_dependency;
+   uint32_tfeedback_index;
+   

[Mesa-dev] [PATCH 3/4] vl: add parameters for VAAPI encode

2016-06-22 Thread Boyuan Zhang
Signed-off-by: Boyuan Zhang 
---
 src/gallium/include/pipe/p_video_state.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index d353be6..2f2a4d9 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -369,11 +369,23 @@ struct pipe_h264_enc_picture_desc
 
enum pipe_h264_enc_picture_type picture_type;
unsigned frame_num;
+   unsigned frame_num_cnt;
+   unsigned p_remain;
+   unsigned i_remain;
+   unsigned idr_pic_id;
+   unsigned gop_cnt;
unsigned pic_order_cnt;
unsigned ref_idx_l0;
unsigned ref_idx_l1;
+   unsigned gop_size;
 
bool not_referenced;
+   bool is_idr;
+   bool has_ref_pic_list;
+   bool enable_low_level_control;
+   unsigned int ref_pic_list_0[32];
+   unsigned int ref_pic_list_1[32];
+   unsigned int frame_idx[32];
 };
 
 struct pipe_h265_sps
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radeon/vce: use vce structure for vce 52 firmware

2016-06-22 Thread Boyuan Zhang
Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vce.c|   4 +
 src/gallium/drivers/radeon/radeon_vce.h|  17 +
 src/gallium/drivers/radeon/radeon_vce_40_2_2.c |   4 +
 src/gallium/drivers/radeon/radeon_vce_50.c |   4 +
 src/gallium/drivers/radeon/radeon_vce_52.c | 566 +
 5 files changed, 509 insertions(+), 86 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
b/src/gallium/drivers/radeon/radeon_vce.c
index e16e0cf..e8aac8e 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -267,6 +267,7 @@ static void rvce_begin_frame(struct pipe_video_codec 
*encoder,
enc->pic.quant_b_frames != pic->quant_b_frames;
 
enc->pic = *pic;
+   get_pic_param(enc, pic);
 
enc->get_buffer(vid_buf->resources[0], >handle, >luma);
enc->get_buffer(vid_buf->resources[1], NULL, >chroma);
@@ -474,6 +475,7 @@ struct pipe_video_codec *rvce_create_encoder(struct 
pipe_context *context,
switch (rscreen->info.vce_fw_version) {
case FW_40_2_2:
radeon_vce_40_2_2_init(enc);
+   get_pic_param = radeon_vce_40_2_2_get_param;
break;
 
case FW_50_0_1:
@@ -481,11 +483,13 @@ struct pipe_video_codec *rvce_create_encoder(struct 
pipe_context *context,
case FW_50_10_2:
case FW_50_17_3:
radeon_vce_50_init(enc);
+   get_pic_param = radeon_vce_50_get_param;
break;
 
case FW_52_0_3:
case FW_52_4_3:
radeon_vce_52_init(enc);
+   get_pic_param = radeon_vce_52_get_param;
break;
 
default:
diff --git a/src/gallium/drivers/radeon/radeon_vce.h 
b/src/gallium/drivers/radeon/radeon_vce.h
index da61285..8c0b073 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -405,6 +405,7 @@ struct rvce_encoder {
struct rvid_buffer  *fb;
struct rvid_buffer  cpb;
struct pipe_h264_enc_picture_desc pic;
+   struct rvce_h264_enc_picenc_pic;
 
unsignedtask_info_idx;
unsignedbs_idx;
@@ -442,4 +443,20 @@ void radeon_vce_50_init(struct rvce_encoder *enc);
 /* init vce fw 52 specific callbacks */
 void radeon_vce_52_init(struct rvce_encoder *enc);
 
+/* version specific function for getting parameters */
+void (*get_pic_param)(struct rvce_encoder *enc,
+   struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 40.2.2 */
+void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
+struct 
pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 50 */
+void radeon_vce_50_get_param(struct rvce_encoder *enc,
+struct 
pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 52 */
+void radeon_vce_52_get_param(struct rvce_encoder *enc,
+struct 
pipe_h264_enc_picture_desc *pic);
+
 #endif
diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c 
b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index 2906ad0..fe15ded 100644
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -431,6 +431,10 @@ static void destroy(struct rvce_encoder *enc)
RVCE_END();
 }
 
+void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc, struct 
pipe_h264_enc_picture_desc *pic)
+{
+}
+
 void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
 {
enc->session = session;
diff --git a/src/gallium/drivers/radeon/radeon_vce_50.c 
b/src/gallium/drivers/radeon/radeon_vce_50.c
index 82e7ad2..262e13b 100644
--- a/src/gallium/drivers/radeon/radeon_vce_50.c
+++ b/src/gallium/drivers/radeon/radeon_vce_50.c
@@ -233,6 +233,10 @@ static void encode(struct rvce_encoder *enc)
RVCE_END();
 }
 
+void radeon_vce_50_get_param(struct rvce_encoder *enc, struct 
pipe_h264_enc_picture_desc *pic)
+{
+}
+
 void radeon_vce_50_init(struct rvce_encoder *enc)
 {
radeon_vce_40_2_2_init(enc);
diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c 
b/src/gallium/drivers/radeon/radeon_vce_52.c
index 3894eea..56b6dd8 100644
--- a/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -40,27 +40,146 @@
 
 static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
 
+static void get_rate_control_param(struct rvce_encoder *enc, struct 
pipe_h264_enc_picture_desc *pic)
+{
+   enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method;
+   enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate;
+   enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate;
+   enc->enc_pic.rc.quant_i_frames = 

[Mesa-dev] [PATCH 4/4] radeon/vce: handle newly added parameters

2016-06-22 Thread Boyuan Zhang
Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vce_52.c | 107 +
 1 file changed, 79 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce_52.c 
b/src/gallium/drivers/radeon/radeon_vce_52.c
index 56b6dd8..ac577e3 100644
--- a/src/gallium/drivers/radeon/radeon_vce_52.c
+++ b/src/gallium/drivers/radeon/radeon_vce_52.c
@@ -48,36 +48,65 @@ static void get_rate_control_param(struct rvce_encoder 
*enc, struct pipe_h264_en
enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames;
enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames;
enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames;
+   enc->enc_pic.rc.gop_size = pic->gop_size;
enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num;
enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den;
enc->enc_pic.rc.max_qp = 51;
-   enc->enc_pic.rc.vbv_buffer_size = pic->rate_ctrl.vbv_buffer_size;
-   enc->enc_pic.rc.vbv_buf_lv = 0;
-   enc->enc_pic.rc.fill_data_enable = 0;
-   enc->enc_pic.rc.enforce_hrd = 0;
-   enc->enc_pic.rc.target_bits_picture = 
pic->rate_ctrl.target_bits_picture;
-   enc->enc_pic.rc.peak_bits_picture_integer = 
pic->rate_ctrl.peak_bits_picture_integer;
-   enc->enc_pic.rc.peak_bits_picture_fraction = 
pic->rate_ctrl.peak_bits_picture_fraction;
 
+   if (pic->enable_low_level_control == true) {
+   enc->enc_pic.rc.vbv_buffer_size = 2000;
+   if (pic->rate_ctrl.frame_rate_num == 0)
+   enc->enc_pic.rc.frame_rate_num = 30;
+   if (pic->rate_ctrl.frame_rate_den == 0)
+   enc->enc_pic.rc.frame_rate_den = 1;
+   enc->enc_pic.rc.vbv_buf_lv = 48;
+   enc->enc_pic.rc.fill_data_enable = 1;
+   enc->enc_pic.rc.enforce_hrd = 1;
+   enc->enc_pic.rc.target_bits_picture = 
enc->enc_pic.rc.target_bitrate / enc->enc_pic.rc.frame_rate_num;
+   enc->enc_pic.rc.peak_bits_picture_integer = 
enc->enc_pic.rc.peak_bitrate / enc->enc_pic.rc.frame_rate_num;
+   enc->enc_pic.rc.peak_bits_picture_fraction = 0;
+   } else {
+   enc->enc_pic.rc.vbv_buffer_size = 
pic->rate_ctrl.vbv_buffer_size;
+   enc->enc_pic.rc.vbv_buf_lv = 0;
+   enc->enc_pic.rc.fill_data_enable = 0;
+   enc->enc_pic.rc.enforce_hrd = 0;
+   enc->enc_pic.rc.target_bits_picture = 
pic->rate_ctrl.target_bits_picture;
+   enc->enc_pic.rc.peak_bits_picture_integer = 
pic->rate_ctrl.peak_bits_picture_integer;
+   enc->enc_pic.rc.peak_bits_picture_fraction = 
pic->rate_ctrl.peak_bits_picture_fraction;
+   }
 }
 
 static void get_motion_estimation_param(struct rvce_encoder *enc, struct 
pipe_h264_enc_picture_desc *pic)
 {
-
-   enc->enc_pic.me.motion_est_quarter_pixel = 0x;
-   enc->enc_pic.me.enc_disable_sub_mode = 0x00fe;
-   enc->enc_pic.me.lsmvert = 0x;
-   enc->enc_pic.me.enc_en_ime_overw_dis_subm = 0x;
-   enc->enc_pic.me.enc_ime_overw_dis_subm_no = 0x;
-   enc->enc_pic.me.enc_ime2_search_range_x = 0x0001;
-   enc->enc_pic.me.enc_ime2_search_range_y = 0x0001;
-   enc->enc_pic.me.enc_ime_decimation_search = 0x0001;
-   enc->enc_pic.me.motion_est_half_pixel = 0x0001;
-   enc->enc_pic.me.enc_search_range_x = 0x0010;
-   enc->enc_pic.me.enc_search_range_y = 0x0010;
-   enc->enc_pic.me.enc_search1_range_x = 0x0010;
-   enc->enc_pic.me.enc_search1_range_y = 0x0010;
-
+   if (pic->enable_low_level_control == true) {
+   enc->enc_pic.me.motion_est_quarter_pixel = 0x0001;
+   enc->enc_pic.me.enc_disable_sub_mode = 0x0078;
+   enc->enc_pic.me.lsmvert = 0x0002;
+   enc->enc_pic.me.enc_en_ime_overw_dis_subm = 0x0001;
+   enc->enc_pic.me.enc_ime_overw_dis_subm_no = 0x0001;
+   enc->enc_pic.me.enc_ime2_search_range_x = 0x0004;
+   enc->enc_pic.me.enc_ime2_search_range_y = 0x0004;
+   enc->enc_pic.me.enc_ime_decimation_search = 0x0001;
+   enc->enc_pic.me.motion_est_half_pixel = 0x0001;
+   enc->enc_pic.me.enc_search_range_x = 0x0010;
+   enc->enc_pic.me.enc_search_range_y = 0x0010;
+   enc->enc_pic.me.enc_search1_range_x = 0x0010;
+   enc->enc_pic.me.enc_search1_range_y = 0x0010;
+   } else {
+   enc->enc_pic.me.motion_est_quarter_pixel = 0x;
+   enc->enc_pic.me.enc_disable_sub_mode = 0x00fe;
+   enc->enc_pic.me.lsmvert = 0x;
+   enc->enc_pic.me.enc_en_ime_overw_dis_subm = 0x;
+   enc->enc_pic.me.enc_ime_overw_dis_subm_no = 0x;
+   

Re: [Mesa-dev] [PATCH v2 05/14] swr: [rasterizer jitter] cleanup supporting different llvm versions

2016-06-22 Thread Emil Velikov
On 22 June 2016 at 21:01, Rowley, Timothy O  wrote:
>
>> On Jun 22, 2016, at 2:27 PM, Emil Velikov  wrote:
>>
>> On 22 June 2016 at 20:19, Rowley, Timothy O  
>> wrote:
>>>
 On Jun 22, 2016, at 1:52 PM, Emil Velikov  wrote:

 On 20 June 2016 at 22:36, Tim Rowley  wrote:
> ---
> .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  9 +--
> .../drivers/swr/rasterizer/jitter/JitManager.h |  7 -
> .../drivers/swr/rasterizer/jitter/blend_jit.cpp|  8 +-
> .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 31 
> +++---
> .../drivers/swr/rasterizer/jitter/builder_misc.h   |  6 +
> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 15 ++-
> .../jitter/scripts/gen_llvm_ir_macros.py   | 24 -
> .../swr/rasterizer/jitter/streamout_jit.cpp|  7 +
> 8 files changed, 73 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
> index 4bbd9ad..6e00a70 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
> @@ -35,11 +35,13 @@
> #include "JitManager.h"
> #include "fetch_jit.h"
>
> +#pragma push_macro("DEBUG")
> +#undef DEBUG
> +
> #if defined(_WIN32)
> #include "llvm/ADT/Triple.h"
> #endif
> #include "llvm/IR/Function.h"
> -#include "llvm/Support/DynamicLibrary.h"
>
> #include "llvm/Support/MemoryBuffer.h"
> #include "llvm/Support/SourceMgr.h"
> @@ -53,6 +55,8 @@
> #include "llvm/ExecutionEngine/JITEventListener.h"
> #endif
>
> +#pragma pop_macro("DEBUG")
> +
 I'm afraid that these still are still off - they should be wrapped in
 "if HAVE_LLVM >= 0x0307 ... endif". Plus the ones in JitManager.h
 really want a similar treatment.
>>>
>>> Any reason to avoid the push/pop on older LLVM?  Saves things from becoming 
>>> too messy with preprocessor directives.
>>>
>> Because those are used by gallium (and mesa). If you undefine it here,
>> then somewhere down the chain of includes you'll end up in headers
>> that use it and things will go meh.
>
> I think I’m missing something obvious - the push/undef/pop sequence surrounds 
> just the llvm includes in JitManager.{h,cpp}, and at the end of pop_macro the 
> DEBUG macro will be back to what it was originally defined as.  The reason 
> for adding them is to isolate the llvm usage.
>
It's a bit fragile, as there's nothing stopping others (yourself X
months down the line) from moving a swr/gallium header between the
push and pop. But at the end of the day I won't be debugging (if it
breaks) or keeping track, so don't mind me.

>>
>> a>> Mildly related bugs/cleanups:
 - There's a few cases of _DEBUG which should (?) be replaced with ifndef 
 NDEBUG
>>>
>>> Ok, I can address this in another patch.
>>>
>> IMHO it's worth sorting both identical issues (and checking for other
>> offenders) in one patch. Be that here, or as follow on it's up-to you.
>
> _DEBUG usage spills into rasterizer/{common,core} which is why I was thinking 
> of addressing it in a different commit rather than this one which concerns 
> itself just with the jitter directory.
>
Apologies, got the line ordering wrong - my suggestion does not apply here.

 - swr uses both mesa and LLVM provided version macros. Please stick to one.
 If the latter is reliable (available all the way to min. supported
 LLVM version) and can be used in both C and C++ sources I'm inclined
 to just use it everywhere in mesa and drop out local macros…
>>>
>>> Are you referring to the HAVE_LLVM macro?  I can remove the conditional 
>>> definition of this from swr (since Mesa provides the definition).
>>>
>> Mesa provides HAVE_LLVM and MESA_LLVM_VERSION_PATCH while LLVM does
>> LLVM_VERSION_{MAJOR,MINOR,PATCH}. Personally I'm in faviour of the
>> latter (considering the 'reliable' note above), but using one of them
>> (regardless which) is what you want imho.
>
> Conditional code for different llvm versions is much easier with the 
> HAVE_LLVM style combined major/minor rather than using LLVM_VERSION_* (which 
> if not done properly will break if they ever switch to llvm-4.x).
>
I hope they don't break that one ... there'll be dozens of projects
that'll be busted :-)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: expose EXT_vertex_array_bgra when supported by backend

2016-06-22 Thread Christian Gmeiner
Thanks for the reviews.. I only need someone to push this patch :)


2016-06-20 16:46 GMT+02:00 Marek Olšák :
> Reviewed-by: Marek Olšák 
>
> Marek
>
> On Mon, Jun 20, 2016 at 9:44 AM, Christian Gmeiner
>  wrote:
>> Signed-off-by: Christian Gmeiner 
>> ---
>>  src/mesa/state_tracker/st_extensions.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/state_tracker/st_extensions.c 
>> b/src/mesa/state_tracker/st_extensions.c
>> index 383983b..122bc88 100644
>> --- a/src/mesa/state_tracker/st_extensions.c
>> +++ b/src/mesa/state_tracker/st_extensions.c
>> @@ -773,6 +773,8 @@ void st_init_extensions(struct pipe_screen *screen,
>>
>> /* Required: vertex fetch support. */
>> static const struct st_extension_format_mapping vertex_mapping[] = {
>> +  { { o(EXT_vertex_array_bgra) },
>> +{ PIPE_FORMAT_B8G8R8A8_UNORM } },
>>{ { o(ARB_vertex_type_2_10_10_10_rev) },
>>  { PIPE_FORMAT_R10G10B10A2_UNORM,
>>PIPE_FORMAT_B10G10R10A2_UNORM,
>> @@ -825,7 +827,6 @@ void st_init_extensions(struct pipe_screen *screen,
>> extensions->EXT_provoking_vertex = GL_TRUE;
>>
>> extensions->EXT_texture_env_dot3 = GL_TRUE;
>> -   extensions->EXT_vertex_array_bgra = GL_TRUE;
>>
>> extensions->ATI_fragment_shader = GL_TRUE;
>> extensions->ATI_texture_env_combine3 = GL_TRUE;
>> --
>> 2.5.5
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

greets
--
Christian Gmeiner, MSc

https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Emil Velikov
On 22 June 2016 at 21:02, Jason Ekstrand  wrote:

>> @@ -131,7 +131,14 @@ anv_entrypoints.c : anv_entrypoints_gen.py
>> $(vulkan_include_HEADERS)
>> $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
>> $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
>>
>> -BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
>> +
>> +.PHONY: anv_timestamp.h
>> +
>> +anv_timestamp.h:
>> +   @echo "Updating anv_timestamp.h"
>> +   $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
>> +
>> +BUILT_SOURCES = $(VULKAN_GENERATED_FILES) anv_timestamp.h
>
>
> Should this really go in anv?  Timothy is going to need it for the shader
> cache for GL so it might be good to put it some place more generic.  Or
> Timothy can just move it when he needs it.
>
One could even copy the 4 lines. No need to watch out for which
folder/Makefile get opened first, adding extra includes etc.

> I don't care much either way.
>
> Reviewed-by: Jason Ekstrand 
>

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] r600g: move PA_SU_POLY_OFFSET_DB_FMT_CNTL to poly offset states for evergreen

2016-06-22 Thread Axel Davy
Emit PA_SU_POLY_OFFSET_DB_FMT_CNTL with the other poly_offset states.
This will be useful to implement
PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED.

v2: Increase the num_dw field for the poly offset atom

Signed-off-by: Axel Davy 
---
 src/gallium/drivers/r600/evergreen_state.c | 38 ++
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 1ac8914..b572be7 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1223,27 +1223,6 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(levelinfo->nblk_x *
   levelinfo->nblk_y / 64 - 
1);
 
-   switch (surf->base.format) {
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-   surf->pa_su_poly_offset_db_fmt_cntl =
-   S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24);
-   break;
-   case PIPE_FORMAT_Z32_FLOAT:
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-   surf->pa_su_poly_offset_db_fmt_cntl =
-   S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) |
-   S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
-   break;
-   case PIPE_FORMAT_Z16_UNORM:
-   surf->pa_su_poly_offset_db_fmt_cntl =
-   S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16);
-   break;
-   default:;
-   }
-
if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
uint64_t stencil_offset;
unsigned stile_split = rtex->surface.stencil_tile_split;
@@ -1628,8 +1607,6 @@ static void evergreen_emit_framebuffer_state(struct 
r600_context *rctx, struct r
   
RADEON_PRIO_DEPTH_BUFFER_MSAA :
   
RADEON_PRIO_DEPTH_BUFFER);
 
-   radeon_set_context_reg(cs, 
R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-  zb->pa_su_poly_offset_db_fmt_cntl);
radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, 
zb->db_depth_view);
 
radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 8);
@@ -1682,6 +1659,7 @@ static void evergreen_emit_polygon_offset(struct 
r600_context *rctx, struct r600
struct r600_poly_offset_state *state = (struct 
r600_poly_offset_state*)a;
float offset_units = state->offset_units;
float offset_scale = state->offset_scale;
+   uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
 
switch (state->zs_format) {
case PIPE_FORMAT_Z24X8_UNORM:
@@ -1689,11 +1667,18 @@ static void evergreen_emit_polygon_offset(struct 
r600_context *rctx, struct r600
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
offset_units *= 2.0f;
+   pa_su_poly_offset_db_fmt_cntl =
+   S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24);
break;
case PIPE_FORMAT_Z16_UNORM:
offset_units *= 4.0f;
+   pa_su_poly_offset_db_fmt_cntl =
+   S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16);
break;
-   default:;
+   default:
+   pa_su_poly_offset_db_fmt_cntl =
+   S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) |
+   S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
}
 
radeon_set_context_reg_seq(cs, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 
4);
@@ -1701,6 +1686,9 @@ static void evergreen_emit_polygon_offset(struct 
r600_context *rctx, struct r600
radeon_emit(cs, fui(offset_units));
radeon_emit(cs, fui(offset_scale));
radeon_emit(cs, fui(offset_units));
+
+   radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+  pa_su_poly_offset_db_fmt_cntl);
 }
 
 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct 
r600_atom *atom)
@@ -3641,7 +3629,7 @@ void evergreen_init_state_functions(struct r600_context 
*rctx)
r600_init_atom(rctx, >db_misc_state.atom, id++, 
evergreen_emit_db_misc_state, 10);
r600_init_atom(rctx, >db_state.atom, id++, 
evergreen_emit_db_state, 14);
r600_init_atom(rctx, >dsa_state.atom, id++, r600_emit_cso_state, 
0);
-   r600_init_atom(rctx, >poly_offset_state.atom, id++, 
evergreen_emit_polygon_offset, 6);
+   r600_init_atom(rctx, >poly_offset_state.atom, id++, 
evergreen_emit_polygon_offset, 9);
r600_init_atom(rctx, >rasterizer_state.atom, id++, 
r600_emit_cso_state, 0);
r600_add_atom(rctx, >b.scissors.atom, id++);

[Mesa-dev] [PATCH v2] r600g: move PA_SU_POLY_OFFSET_DB_FMT_CNTL to poly offset states for r600

2016-06-22 Thread Axel Davy
Emit PA_SU_POLY_OFFSET_DB_FMT_CNTL with the other poly_offset states.
This will be useful to implement
PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED.

v2: Increase the num_dw field for the poly offset atom

Signed-off-by: Axel Davy 
---
 src/gallium/drivers/r600/r600_state.c | 37 ---
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index cf7f0b3..4584067 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -254,16 +254,24 @@ static void r600_emit_polygon_offset(struct r600_context 
*rctx, struct r600_atom
struct r600_poly_offset_state *state = (struct 
r600_poly_offset_state*)a;
float offset_units = state->offset_units;
float offset_scale = state->offset_scale;
+   uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
 
switch (state->zs_format) {
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
offset_units *= 2.0f;
+   pa_su_poly_offset_db_fmt_cntl =
+   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24);
break;
case PIPE_FORMAT_Z16_UNORM:
offset_units *= 4.0f;
+   pa_su_poly_offset_db_fmt_cntl =
+   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16);
break;
-   default:;
+   default:
+   pa_su_poly_offset_db_fmt_cntl =
+   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) |
+   S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
}
 
radeon_set_context_reg_seq(cs, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE, 
4);
@@ -271,6 +279,9 @@ static void r600_emit_polygon_offset(struct r600_context 
*rctx, struct r600_atom
radeon_emit(cs, fui(offset_units));
radeon_emit(cs, fui(offset_scale));
radeon_emit(cs, fui(offset_units));
+
+   radeon_set_context_reg(cs, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+  pa_su_poly_offset_db_fmt_cntl);
 }
 
 static uint32_t r600_get_blend_control(const struct pipe_blend_state *state, 
unsigned i)
@@ -1059,25 +1070,6 @@ static void r600_init_depth_surface(struct r600_context 
*rctx,
surf->db_depth_size = S_028000_PITCH_TILE_MAX(pitch) | 
S_028000_SLICE_TILE_MAX(slice);
surf->db_prefetch_limit = (rtex->surface.level[level].nblk_y / 8) - 1;
 
-   switch (surf->base.format) {
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   surf->pa_su_poly_offset_db_fmt_cntl =
-   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-24);
-   break;
-   case PIPE_FORMAT_Z32_FLOAT:
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-   surf->pa_su_poly_offset_db_fmt_cntl =
-   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-23) |
-   S_028DF8_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
-   break;
-   case PIPE_FORMAT_Z16_UNORM:
-   surf->pa_su_poly_offset_db_fmt_cntl =
-   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS((char)-16);
-   break;
-   default:;
-   }
-
/* use htile only for first level */
if (rtex->htile_buffer && !level) {
surf->db_htile_data_base = 0;
@@ -1457,9 +1449,6 @@ static void r600_emit_framebuffer_state(struct 
r600_context *rctx, struct r600_a
   
RADEON_PRIO_DEPTH_BUFFER_MSAA :
   
RADEON_PRIO_DEPTH_BUFFER);
 
-   radeon_set_context_reg(cs, 
R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-  surf->pa_su_poly_offset_db_fmt_cntl);
-
radeon_set_context_reg_seq(cs, R_028000_DB_DEPTH_SIZE, 2);
radeon_emit(cs, surf->db_depth_size); /* R_028000_DB_DEPTH_SIZE 
*/
radeon_emit(cs, surf->db_depth_view); /* R_028004_DB_DEPTH_VIEW 
*/
@@ -3085,7 +3074,7 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, >db_misc_state.atom, id++, 
r600_emit_db_misc_state, 7);
r600_init_atom(rctx, >db_state.atom, id++, r600_emit_db_state, 
11);
r600_init_atom(rctx, >dsa_state.atom, id++, r600_emit_cso_state, 
0);
-   r600_init_atom(rctx, >poly_offset_state.atom, id++, 
r600_emit_polygon_offset, 6);
+   r600_init_atom(rctx, >poly_offset_state.atom, id++, 
r600_emit_polygon_offset, 9);
r600_init_atom(rctx, >rasterizer_state.atom, id++, 
r600_emit_cso_state, 0);
r600_add_atom(rctx, >b.scissors.atom, id++);
r600_add_atom(rctx, >b.viewports.atom, id++);
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 4/4] swr: automake: don't ship LLVM version specific generated sources

2016-06-22 Thread Emil Velikov
On 22 June 2016 at 20:44, Rowley, Timothy O  wrote:
> Couple of minor comments inlined below.
>
> Tested-by: Tim Rowley 
>
Great, thanks.

>> On Jun 22, 2016, at 7:04 AM, Emil Velikov  wrote:
>>
>> From: Emil Velikov 
>>
>> Otherwise things will fail to build, if the builder is using another
>> version of LLVM.
>>
>> v2: annotate all the dependencies of builder_gen.h
>> v3: clean the generated files as needed
>>
>> Cc: "12.0" 
>> Cc: Tim Rowley 
>> Cc: Chuck Atkins 
>> Tested-by: Chuck Atkins  (v2)
>> Reported-by: Chuck Atkins 
>> Signed-off-by: Emil Velikov 
>> ---
>> src/gallium/drivers/swr/Makefile.am | 48 
>> +++--
>> 1 file changed, 46 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/swr/Makefile.am 
>> b/src/gallium/drivers/swr/Makefile.am
>> index d896154..30087be 100644
>> --- a/src/gallium/drivers/swr/Makefile.am
>> +++ b/src/gallium/drivers/swr/Makefile.am
>> @@ -52,8 +52,6 @@ BUILT_SOURCES = \
>>   rasterizer/scripts/gen_knobs.cpp \
>>   rasterizer/scripts/gen_knobs.h \
>>   rasterizer/jitter/state_llvm.h \
>> - rasterizer/jitter/builder_gen.h \
>> - rasterizer/jitter/builder_gen.cpp \
>>   rasterizer/jitter/builder_x86.h \
>>   rasterizer/jitter/builder_x86.cpp
>>
>> @@ -122,6 +120,34 @@ COMMON_LDFLAGS = \
>>   $(NO_UNDEFINED) \
>>   $(LLVM_LDFLAGS)
>>
>> +
>> +# XXX: As we cannot use BUILT_SOURCES (the files will end up in the dist
>> +# tarball) just annotate the dependency directly.
>> +# As the single direct user of builder_gen.h is a header (builder.h) trace 
>> all
>> +# the translusive users (one that use the latter header).
>> +#
>> +# Note: one should really clean the includes a bit, according to Tim there's
>> +# only 4 users of the builder_gen methods/API.
>
> This note can be removed; I was thinking of the final users of the builder 
> class, and forgot about the other files which make up the definition.
>
Ack - will fix/drop (same for the typo bellow). Any news from the LLVM
squad about reworking things from their end ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Jason Ekstrand
On Wed, Jun 22, 2016 at 11:25 AM, Emil Velikov 
wrote:

> From: Emil Velikov 
>
> Do not rely on the git sha1:
>  - its current truncated form makes it less unique
>  - it does not attribute for local (Vulkand or otherwise) changes
>
> Use a timestamp produced at the time of build. It's perfectly unique,
> unless someone explicitly thinkers with their system clock. Even then
> chances of producing the exact same one are very small, if not zero.
>
> v2: Remove .tmp rule. Its not needed since we want for the header to be
> regenerated on each time we call make (Eric).
>
> Cc: Jason Ekstrand 
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Emil Velikov 
> ---
> Strictly speaking v2 _could_ make a difference if one has a machine
> powerful enough to have `make' and `make install' each this Makefile
> within the same second. That sounds pretty much impossible though :-)
>
> If people prefer we could drop the header all together and provide the
> define via the Makefile. It would be a bit unusual (in mesa at least)
> but it should work.
>

That would be bad because a change in #defines would cause lots of stuff to
get recompiled when it isn't needed.  Using the header means only
anv_device.c gets recompiled which is much nicer.


> ---
>  src/intel/vulkan/.gitignore   | 1 +
>  src/intel/vulkan/Makefile.am  | 9 -
>  src/intel/vulkan/anv_device.c | 4 ++--
>  3 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore
> index a496146..ce6f23f 100644
> --- a/src/intel/vulkan/.gitignore
> +++ b/src/intel/vulkan/.gitignore
> @@ -2,3 +2,4 @@
>  /anv_entrypoints.c
>  /anv_entrypoints.h
>  /dev_icd.json
> +/anv_timestamp.h
> diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
> index 4d9ff90..5f68f29 100644
> --- a/src/intel/vulkan/Makefile.am
> +++ b/src/intel/vulkan/Makefile.am
> @@ -131,7 +131,14 @@ anv_entrypoints.c : anv_entrypoints_gen.py
> $(vulkan_include_HEADERS)
> $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
> $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
>
> -BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
> +
> +.PHONY: anv_timestamp.h
> +
> +anv_timestamp.h:
> +   @echo "Updating anv_timestamp.h"
> +   $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
> +
> +BUILT_SOURCES = $(VULKAN_GENERATED_FILES) anv_timestamp.h
>

Should this really go in anv?  Timothy is going to need it for the shader
cache for GL so it might be good to put it some place more generic.  Or
Timothy can just move it when he needs it.

I don't care much either way.

Reviewed-by: Jason Ekstrand 


>  CLEANFILES = $(BUILT_SOURCES) dev_icd.json
>  EXTRA_DIST = \
> $(top_srcdir)/include/vulkan/vk_icd.h \
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 97300c3..e395b1c 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -28,7 +28,7 @@
>  #include 
>
>  #include "anv_private.h"
> -#include "git_sha1.h"
> +#include "anv_timestamp.h"
>  #include "util/strtod.h"
>  #include "util/debug.h"
>
> @@ -426,7 +426,7 @@ void
>  anv_device_get_cache_uuid(void *uuid)
>  {
> memset(uuid, 0, VK_UUID_SIZE);
> -   snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
> +   snprintf(uuid, VK_UUID_SIZE, "anv-%s", ANV_TIMESTAMP);
>

In a few hundred years or so, this might hit 16 characters... meh


>  }
>
>  void anv_GetPhysicalDeviceProperties(
> --
> 2.8.2
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 05/14] swr: [rasterizer jitter] cleanup supporting different llvm versions

2016-06-22 Thread Rowley, Timothy O

> On Jun 22, 2016, at 2:27 PM, Emil Velikov  wrote:
> 
> On 22 June 2016 at 20:19, Rowley, Timothy O  
> wrote:
>> 
>>> On Jun 22, 2016, at 1:52 PM, Emil Velikov  wrote:
>>> 
>>> On 20 June 2016 at 22:36, Tim Rowley  wrote:
 ---
 .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  9 +--
 .../drivers/swr/rasterizer/jitter/JitManager.h |  7 -
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp|  8 +-
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 31 
 +++---
 .../drivers/swr/rasterizer/jitter/builder_misc.h   |  6 +
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 15 ++-
 .../jitter/scripts/gen_llvm_ir_macros.py   | 24 -
 .../swr/rasterizer/jitter/streamout_jit.cpp|  7 +
 8 files changed, 73 insertions(+), 34 deletions(-)
 
 diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
 b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
 index 4bbd9ad..6e00a70 100644
 --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
 +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
 @@ -35,11 +35,13 @@
 #include "JitManager.h"
 #include "fetch_jit.h"
 
 +#pragma push_macro("DEBUG")
 +#undef DEBUG
 +
 #if defined(_WIN32)
 #include "llvm/ADT/Triple.h"
 #endif
 #include "llvm/IR/Function.h"
 -#include "llvm/Support/DynamicLibrary.h"
 
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 @@ -53,6 +55,8 @@
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #endif
 
 +#pragma pop_macro("DEBUG")
 +
>>> I'm afraid that these still are still off - they should be wrapped in
>>> "if HAVE_LLVM >= 0x0307 ... endif". Plus the ones in JitManager.h
>>> really want a similar treatment.
>> 
>> Any reason to avoid the push/pop on older LLVM?  Saves things from becoming 
>> too messy with preprocessor directives.
>> 
> Because those are used by gallium (and mesa). If you undefine it here,
> then somewhere down the chain of includes you'll end up in headers
> that use it and things will go meh.

I think I’m missing something obvious - the push/undef/pop sequence surrounds 
just the llvm includes in JitManager.{h,cpp}, and at the end of pop_macro the 
DEBUG macro will be back to what it was originally defined as.  The reason for 
adding them is to isolate the llvm usage.

> 
> a>> Mildly related bugs/cleanups:
>>> - There's a few cases of _DEBUG which should (?) be replaced with ifndef 
>>> NDEBUG
>> 
>> Ok, I can address this in another patch.
>> 
> IMHO it's worth sorting both identical issues (and checking for other
> offenders) in one patch. Be that here, or as follow on it's up-to you.

_DEBUG usage spills into rasterizer/{common,core} which is why I was thinking 
of addressing it in a different commit rather than this one which concerns 
itself just with the jitter directory.

>>> - swr uses both mesa and LLVM provided version macros. Please stick to one.
>>> If the latter is reliable (available all the way to min. supported
>>> LLVM version) and can be used in both C and C++ sources I'm inclined
>>> to just use it everywhere in mesa and drop out local macros…
>> 
>> Are you referring to the HAVE_LLVM macro?  I can remove the conditional 
>> definition of this from swr (since Mesa provides the definition).
>> 
> Mesa provides HAVE_LLVM and MESA_LLVM_VERSION_PATCH while LLVM does
> LLVM_VERSION_{MAJOR,MINOR,PATCH}. Personally I'm in faviour of the
> latter (considering the 'reliable' note above), but using one of them
> (regardless which) is what you want imho.

Conditional code for different llvm versions is much easier with the HAVE_LLVM 
style combined major/minor rather than using LLVM_VERSION_* (which if not done 
properly will break if they ever switch to llvm-4.x).

> All of ^^ are just ideas, feel free to take or leave them.
> -Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 4/4] swr: automake: don't ship LLVM version specific generated sources

2016-06-22 Thread Rowley, Timothy O
Couple of minor comments inlined below.

Tested-by: Tim Rowley 

> On Jun 22, 2016, at 7:04 AM, Emil Velikov  wrote:
> 
> From: Emil Velikov 
> 
> Otherwise things will fail to build, if the builder is using another
> version of LLVM.
> 
> v2: annotate all the dependencies of builder_gen.h
> v3: clean the generated files as needed
> 
> Cc: "12.0" 
> Cc: Tim Rowley 
> Cc: Chuck Atkins 
> Tested-by: Chuck Atkins  (v2)
> Reported-by: Chuck Atkins 
> Signed-off-by: Emil Velikov 
> ---
> src/gallium/drivers/swr/Makefile.am | 48 +++--
> 1 file changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/Makefile.am 
> b/src/gallium/drivers/swr/Makefile.am
> index d896154..30087be 100644
> --- a/src/gallium/drivers/swr/Makefile.am
> +++ b/src/gallium/drivers/swr/Makefile.am
> @@ -52,8 +52,6 @@ BUILT_SOURCES = \
>   rasterizer/scripts/gen_knobs.cpp \
>   rasterizer/scripts/gen_knobs.h \
>   rasterizer/jitter/state_llvm.h \
> - rasterizer/jitter/builder_gen.h \
> - rasterizer/jitter/builder_gen.cpp \
>   rasterizer/jitter/builder_x86.h \
>   rasterizer/jitter/builder_x86.cpp
> 
> @@ -122,6 +120,34 @@ COMMON_LDFLAGS = \
>   $(NO_UNDEFINED) \
>   $(LLVM_LDFLAGS)
> 
> +
> +# XXX: As we cannot use BUILT_SOURCES (the files will end up in the dist
> +# tarball) just annotate the dependency directly.
> +# As the single direct user of builder_gen.h is a header (builder.h) trace 
> all
> +# the translusive users (one that use the latter header).
> +#
> +# Note: one should really clean the includes a bit, according to Tim there's
> +# only 4 users of the builder_gen methods/API.

This note can be removed; I was thinking of the final users of the builder 
class, and forgot about the other files which make up the definition.

> +rasterizer/jitter/blend_jit.cpp: rasterizer/jitter/builder_gen.h
> +rasterizer/jitter/builder.cpp: rasterizer/jitter/builder_gen.h
> +rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/builder_gen.h
> +rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/builder_gen.h
> +rasterizer/jitter/builder_misc.cpp: rasterizer/jitter/builder_gen.h
> +rasterizer/jitter/fetch_jit.cpp: rasterizer/jitter/builder_gen.h
> +rasterizer/jitter/streamout_jit.cpp: rasterizer/jitter/builder_gen.h
> +swr_shader.cpp: rasterizer/jitter/builder_gen.h
> +
> +CLEANFILES = \
> + rasterizer/jitter/builder_gen.h \
> + rasterizer/jitter/builder_gen.cpp
> +
> +# XXX: Due to the funky dependencies above, the buildder_x86.cpp file gets

buildder -> builder

> +# generated (copied) into builddir when building from release tarball.
> +# Add a temporary workaround to remove it, until the above issue is resolved.
> +distclean-local:
> + ( test $(top_srcdir) != $(top_builddir) && \
> + rm $(builddir)/rasterizer/jitter/builder_x86.cpp ) || true
> +
> lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
> 
> libswrAVX_la_CXXFLAGS = \
> @@ -132,6 +158,15 @@ libswrAVX_la_CXXFLAGS = \
> libswrAVX_la_SOURCES = \
>   $(COMMON_SOURCES)
> 
> +# XXX: Don't ship these generated sources for now, since they are specific
> +# to the LLVM version they are generated from. Thus a release tarball
> +# containing the said files, generated against eg. LLVM 3.8 will fail to 
> build
> +# on systems with other versions of LLVM eg. 3.7 or 3.6.
> +# Move these back to BUILT_SOURCES once that is resolved.
> +nodist_libswrAVX_la_SOURCES = \
> + rasterizer/jitter/builder_gen.h \
> + rasterizer/jitter/builder_gen.cpp
> +
> libswrAVX_la_LIBADD = \
>   $(COMMON_LIBADD)
> 
> @@ -146,6 +181,15 @@ libswrAVX2_la_CXXFLAGS = \
> libswrAVX2_la_SOURCES = \
>   $(COMMON_SOURCES)
> 
> +# XXX: Don't ship these generated sources for now, since they are specific
> +# to the LLVM version they are generated from. Thus a release tarball
> +# containing the said files, generated against eg. LLVM 3.8 will fail to 
> build
> +# on systems with other versions of LLVM eg. 3.7 or 3.6.
> +# Move these back to BUILT_SOURCES once that is resolved.
> +nodist_libswrAVX2_la_SOURCES = \
> + rasterizer/jitter/builder_gen.h \
> + rasterizer/jitter/builder_gen.cpp
> +
> libswrAVX2_la_LIBADD = \
>   $(COMMON_LIBADD)
> 
> -- 
> 2.8.2
> 
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [BRANCH] Gallium Radeon cleanups

2016-06-22 Thread Alex Deucher
On Wed, Jun 22, 2016 at 2:49 PM, Marek Olšák  wrote:
> Hi,
>
> The top 6 patches in this branch are cleanups I'd like to push. I'm
> not sending them to the list because they are kinda boring. (and there
> is another series with 6 patches already)
>
> https://cgit.freedesktop.org/~mareko/mesa/log/?h=radeon-cleanups
>
> Commits:
>   gallium/radeon: use r600_resource_reference
>   gallium/radeon: add and use r600_texture_reference
>   gallium/radeon/winsyses: boolean -> bool, TRUE -> true, FALSE -> false
>   gallium/radeon: boolean -> bool, TRUE -> true, FALSE -> false
>   radeonsi: boolean -> bool, TRUE -> true, FALSE -> false
>   radeonsi: make si_is_format_supported static
>
> Please review.

For the series:
Reviewed-by: Alex Deucher 

>
> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 05/14] swr: [rasterizer jitter] cleanup supporting different llvm versions

2016-06-22 Thread Emil Velikov
On 22 June 2016 at 20:19, Rowley, Timothy O  wrote:
>
>> On Jun 22, 2016, at 1:52 PM, Emil Velikov  wrote:
>>
>> On 20 June 2016 at 22:36, Tim Rowley  wrote:
>>> ---
>>> .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  9 +--
>>> .../drivers/swr/rasterizer/jitter/JitManager.h |  7 -
>>> .../drivers/swr/rasterizer/jitter/blend_jit.cpp|  8 +-
>>> .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 31 
>>> +++---
>>> .../drivers/swr/rasterizer/jitter/builder_misc.h   |  6 +
>>> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 15 ++-
>>> .../jitter/scripts/gen_llvm_ir_macros.py   | 24 -
>>> .../swr/rasterizer/jitter/streamout_jit.cpp|  7 +
>>> 8 files changed, 73 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
>>> b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
>>> index 4bbd9ad..6e00a70 100644
>>> --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
>>> +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
>>> @@ -35,11 +35,13 @@
>>> #include "JitManager.h"
>>> #include "fetch_jit.h"
>>>
>>> +#pragma push_macro("DEBUG")
>>> +#undef DEBUG
>>> +
>>> #if defined(_WIN32)
>>> #include "llvm/ADT/Triple.h"
>>> #endif
>>> #include "llvm/IR/Function.h"
>>> -#include "llvm/Support/DynamicLibrary.h"
>>>
>>> #include "llvm/Support/MemoryBuffer.h"
>>> #include "llvm/Support/SourceMgr.h"
>>> @@ -53,6 +55,8 @@
>>> #include "llvm/ExecutionEngine/JITEventListener.h"
>>> #endif
>>>
>>> +#pragma pop_macro("DEBUG")
>>> +
>> I'm afraid that these still are still off - they should be wrapped in
>> "if HAVE_LLVM >= 0x0307 ... endif". Plus the ones in JitManager.h
>> really want a similar treatment.
>
> Any reason to avoid the push/pop on older LLVM?  Saves things from becoming 
> too messy with preprocessor directives.
>
Because those are used by gallium (and mesa). If you undefine it here,
then somewhere down the chain of includes you'll end up in headers
that use it and things will go meh.

a>> Mildly related bugs/cleanups:
>> - There's a few cases of _DEBUG which should (?) be replaced with ifndef 
>> NDEBUG
>
> Ok, I can address this in another patch.
>
IMHO it's worth sorting both identical issues (and checking for other
offenders) in one patch. Be that here, or as follow on it's up-to you.

>> - swr uses both mesa and LLVM provided version macros. Please stick to one.
>> If the latter is reliable (available all the way to min. supported
>> LLVM version) and can be used in both C and C++ sources I'm inclined
>> to just use it everywhere in mesa and drop out local macros…
>
> Are you referring to the HAVE_LLVM macro?  I can remove the conditional 
> definition of this from swr (since Mesa provides the definition).
>
Mesa provides HAVE_LLVM and MESA_LLVM_VERSION_PATCH while LLVM does
LLVM_VERSION_{MAJOR,MINOR,PATCH}. Personally I'm in faviour of the
latter (considering the 'reliable' note above), but using one of them
(regardless which) is what you want imho.

All of ^^ are just ideas, feel free to take or leave them.
-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gallium: detect avx512 cpu features

2016-06-22 Thread Cherniak, Bruce


On May 26, 2016, at 3:06 PM, Tim Rowley 
> wrote:

v2: style code, add avx512 to cpu dump
---
src/gallium/auxiliary/util/u_cpu_detect.c | 26 ++
src/gallium/auxiliary/util/u_cpu_detect.h | 10 ++
2 files changed, 36 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c 
b/src/gallium/auxiliary/util/u_cpu_detect.c
index aa3c30a..03f45cf 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -387,6 +387,23 @@ util_cpu_detect(void)
 util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1;
  }

+  // check for avx512
+  if (((regs2[2] >> 27) & 1) && // OSXSAVE
+  (xgetbv() & (0x7 << 5)) && // OPMASK: upper-256 enabled by OS
+  (xgetbv() & (0x3 << 1))) { // XMM/YMM enabled by OS

In the general feature flags above this, has_avx uses ((xgetbv() & 6) == 6) to 
establish XMM/YMM support.
https://software.intel.com/en-us/articles/introduction-to-intel-advanced-vector-extensions


+ uint32_t regs3[4];
+ cpuid(0x0007, regs3);
+ util_cpu_caps.has_avx512f= (regs3[1] >> 16) & 1;
+ util_cpu_caps.has_avx512dq   = (regs3[1] >> 17) & 1;
+ util_cpu_caps.has_avx512ifma = (regs3[1] >> 21) & 1;
+ util_cpu_caps.has_avx512pf   = (regs3[1] >> 26) & 1;
+ util_cpu_caps.has_avx512er   = (regs3[1] >> 27) & 1;
+ util_cpu_caps.has_avx512cd   = (regs3[1] >> 28) & 1;
+ util_cpu_caps.has_avx512bw   = (regs3[1] >> 30) & 1;
+ util_cpu_caps.has_avx512vl   = (regs3[1] >> 31) & 1;
+ util_cpu_caps.has_avx512vbmi = (regs3[2] >>  1) & 1;
+  }
+
  if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 
0x49656e69) {
 /* GenuineIntel */
 util_cpu_caps.has_intel = 1;
@@ -454,6 +471,15 @@ util_cpu_detect(void)
  debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
  debug_printf("util_cpu_caps.has_altivec = %u\n", 
util_cpu_caps.has_altivec);
  debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
+  debug_printf("util_cpu_caps.has_avx512f = %u\n", 
util_cpu_caps.has_avx512f);
+  debug_printf("util_cpu_caps.has_avx512dq = %u\n", 
util_cpu_caps.has_avx512dq);
+  debug_printf("util_cpu_caps.has_avx512ifma = %u\n", 
util_cpu_caps.has_avx512ifma);
+  debug_printf("util_cpu_caps.has_avx512pf = %u\n", 
util_cpu_caps.has_avx512pf);
+  debug_printf("util_cpu_caps.has_avx512er = %u\n", 
util_cpu_caps.has_avx512er);
+  debug_printf("util_cpu_caps.has_avx512cd = %u\n", 
util_cpu_caps.has_avx512cd);
+  debug_printf("util_cpu_caps.has_avx512bw = %u\n", 
util_cpu_caps.has_avx512bw);
+  debug_printf("util_cpu_caps.has_avx512vl = %u\n", 
util_cpu_caps.has_avx512vl);
+  debug_printf("util_cpu_caps.has_avx512vbmi = %u\n", 
util_cpu_caps.has_avx512vbmi);
   }
#endif

diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h 
b/src/gallium/auxiliary/util/u_cpu_detect.h
index 5ccfc93..b612a2c 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -71,6 +71,16 @@ struct util_cpu_caps {
   unsigned has_xop:1;
   unsigned has_altivec:1;
   unsigned has_daz:1;
+
+   unsigned has_avx512f:1;
+   unsigned has_avx512dq:1;
+   unsigned has_avx512ifma:1;
+   unsigned has_avx512pf:1;
+   unsigned has_avx512er:1;
+   unsigned has_avx512cd:1;
+   unsigned has_avx512bw:1;
+   unsigned has_avx512vl:1;
+   unsigned has_avx512vbmi:1;
};

extern struct util_cpu_caps
--
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 05/14] swr: [rasterizer jitter] cleanup supporting different llvm versions

2016-06-22 Thread Rowley, Timothy O

> On Jun 22, 2016, at 1:52 PM, Emil Velikov  wrote:
> 
> On 20 June 2016 at 22:36, Tim Rowley  wrote:
>> ---
>> .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  9 +--
>> .../drivers/swr/rasterizer/jitter/JitManager.h |  7 -
>> .../drivers/swr/rasterizer/jitter/blend_jit.cpp|  8 +-
>> .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 31 
>> +++---
>> .../drivers/swr/rasterizer/jitter/builder_misc.h   |  6 +
>> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 15 ++-
>> .../jitter/scripts/gen_llvm_ir_macros.py   | 24 -
>> .../swr/rasterizer/jitter/streamout_jit.cpp|  7 +
>> 8 files changed, 73 insertions(+), 34 deletions(-)
>> 
>> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
>> b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
>> index 4bbd9ad..6e00a70 100644
>> --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
>> +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
>> @@ -35,11 +35,13 @@
>> #include "JitManager.h"
>> #include "fetch_jit.h"
>> 
>> +#pragma push_macro("DEBUG")
>> +#undef DEBUG
>> +
>> #if defined(_WIN32)
>> #include "llvm/ADT/Triple.h"
>> #endif
>> #include "llvm/IR/Function.h"
>> -#include "llvm/Support/DynamicLibrary.h"
>> 
>> #include "llvm/Support/MemoryBuffer.h"
>> #include "llvm/Support/SourceMgr.h"
>> @@ -53,6 +55,8 @@
>> #include "llvm/ExecutionEngine/JITEventListener.h"
>> #endif
>> 
>> +#pragma pop_macro("DEBUG")
>> +
> I'm afraid that these still are still off - they should be wrapped in
> "if HAVE_LLVM >= 0x0307 ... endif". Plus the ones in JitManager.h
> really want a similar treatment.

Any reason to avoid the push/pop on older LLVM?  Saves things from becoming too 
messy with preprocessor directives.

> Mildly related bugs/cleanups:
> - There's a few cases of _DEBUG which should (?) be replaced with ifndef 
> NDEBUG

Ok, I can address this in another patch.

> - swr uses both mesa and LLVM provided version macros. Please stick to one.
> If the latter is reliable (available all the way to min. supported
> LLVM version) and can be used in both C and C++ sources I'm inclined
> to just use it everywhere in mesa and drop out local macros…

Are you referring to the HAVE_LLVM macro?  I can remove the conditional 
definition of this from swr (since Mesa provides the definition).

-Tim


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/gen4: Pull texture formats from the texture object not the miptree

2016-06-22 Thread Kenneth Graunke
On Wednesday, June 22, 2016 12:07:05 PM PDT Jason Ekstrand wrote:
> This makes texture views sort-of work.  It doesn't add full texture view
> support for gen4-5 but it is enough to fix the GL_ARB_copy_image formats
> piglit test on Iron Lake.
> 
> Signed-off-by: Jason Ekstrand 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83036
> Cc: "11.1 11.2 12.0" 
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 133a944..b07bf19 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -335,7 +335,7 @@ brw_update_texture_surface(struct gl_context *ctx,
> surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
> 6 * 4, 32, surf_offset);
>  
> -   uint32_t tex_format = translate_tex_format(brw, mt->format,
> +   uint32_t tex_format = translate_tex_format(brw, intelObj->_Format,
>sampler->sRGBDecode);
>  
> if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
> 

Both of these are:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v2 01/64] i965: Drop the maximum 3D texture size to 512 on Sandy Bridge

2016-06-22 Thread Ian Romanick
This patch is

Reviewed-by: Ian Romanick 

On 06/21/2016 03:45 PM, Jason Ekstrand wrote:
> The RenderTargetViewExtent field of RENDER_SURFACE_STATE is supposed to be
> set to the depth of a 3-D texture when rendering.  Unfortunatley, that
> field is only 9 bits on Sandy Bridge and prior so we can't actually bind
> a 3-D texturing for rendering if it has depth > 512.  On Ivy Bridge, this
> field was bumpped to 11 bits so we can go all the way up to 2048.  On Iron
> Lake and prior, we don't support layered rendering and we use OffsetX/Y
> hacks to render to particular layers so 2048 is ok there too.
> 
> Cc: "11.1 11.2 12.0" 
> Cc: Ian Romanick 
> 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index f7c7874..c7a66cb 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -467,7 +467,16 @@ brw_initialize_context_constants(struct brw_context *brw)
> ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
> ctx->Const.MaxRenderbufferSize = 8192;
> ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
> -   ctx->Const.Max3DTextureLevels = 12; /* 2048 */
> +
> +   /* On Sandy Bridge and prior, the "Render Target View Extent" field of
> +* RENDER_SURFACE_STATE is only 9 bits so the largest 3-D texture we can 
> do
> +* a layered render into has a depth of 512.  On Iron Lake and earlier, we
> +* don't support layered rendering and we use manual offsetting to render
> +* into the different layers so this doesn't matter.  On Sandy Bridge,
> +* however, we do support layered rendering so this is a problem.
> +*/
> +   ctx->Const.Max3DTextureLevels = brw->gen == 6 ? 10 /* 512 */ : 12; /* 
> 2048 */
> +
> ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
> ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
> ctx->Const.MaxTextureMbytes = 1536;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/gen4-6: Handle gl_texture_object::BaseLevel and MinLayer correctly

2016-06-22 Thread Jason Ekstrand
This is basically a direct translation of what we do for gen7.

Signed-off-by: Jason Ekstrand 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83036
Cc: "11.1 11.2 12.0" 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index b07bf19..83c4c81 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -391,8 +391,10 @@ brw_update_texture_surface(struct gl_context *ctx,
  (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
  (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
 
+   const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
-  SET_FIELD(tObj->BaseLevel - mt->first_level, 
BRW_SURFACE_MIN_LOD));
+  SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
+  SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
 
surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/gen4: Pull texture formats from the texture object not the miptree

2016-06-22 Thread Jason Ekstrand
This makes texture views sort-of work.  It doesn't add full texture view
support for gen4-5 but it is enough to fix the GL_ARB_copy_image formats
piglit test on Iron Lake.

Signed-off-by: Jason Ekstrand 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83036
Cc: "11.1 11.2 12.0" 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 133a944..b07bf19 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -335,7 +335,7 @@ brw_update_texture_surface(struct gl_context *ctx,
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
  6 * 4, 32, surf_offset);
 
-   uint32_t tex_format = translate_tex_format(brw, mt->format,
+   uint32_t tex_format = translate_tex_format(brw, intelObj->_Format,
   sampler->sRGBDecode);
 
if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 05/14] swr: [rasterizer jitter] cleanup supporting different llvm versions

2016-06-22 Thread Emil Velikov
On 20 June 2016 at 22:36, Tim Rowley  wrote:
> ---
>  .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  9 +--
>  .../drivers/swr/rasterizer/jitter/JitManager.h |  7 -
>  .../drivers/swr/rasterizer/jitter/blend_jit.cpp|  8 +-
>  .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 31 
> +++---
>  .../drivers/swr/rasterizer/jitter/builder_misc.h   |  6 +
>  .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 15 ++-
>  .../jitter/scripts/gen_llvm_ir_macros.py   | 24 -
>  .../swr/rasterizer/jitter/streamout_jit.cpp|  7 +
>  8 files changed, 73 insertions(+), 34 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
> b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
> index 4bbd9ad..6e00a70 100644
> --- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
> @@ -35,11 +35,13 @@
>  #include "JitManager.h"
>  #include "fetch_jit.h"
>
> +#pragma push_macro("DEBUG")
> +#undef DEBUG
> +
>  #if defined(_WIN32)
>  #include "llvm/ADT/Triple.h"
>  #endif
>  #include "llvm/IR/Function.h"
> -#include "llvm/Support/DynamicLibrary.h"
>
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/SourceMgr.h"
> @@ -53,6 +55,8 @@
>  #include "llvm/ExecutionEngine/JITEventListener.h"
>  #endif
>
> +#pragma pop_macro("DEBUG")
> +
I'm afraid that these still are still off - they should be wrapped in
"if HAVE_LLVM >= 0x0307 ... endif". Plus the ones in JitManager.h
really want a similar treatment.

Mildly related bugs/cleanups:
 - There's a few cases of _DEBUG which should (?) be replaced with ifndef NDEBUG
 - swr uses both mesa and LLVM provided version macros. Please stick to one.
If the latter is reliable (available all the way to min. supported
LLVM version) and can be used in both C and C++ sources I'm inclined
to just use it everywhere in mesa and drop out local macros...

The python changes look great imho :-)
-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [BRANCH] Gallium Radeon cleanups

2016-06-22 Thread Marek Olšák
Hi,

The top 6 patches in this branch are cleanups I'd like to push. I'm
not sending them to the list because they are kinda boring. (and there
is another series with 6 patches already)

https://cgit.freedesktop.org/~mareko/mesa/log/?h=radeon-cleanups

Commits:
  gallium/radeon: use r600_resource_reference
  gallium/radeon: add and use r600_texture_reference
  gallium/radeon/winsyses: boolean -> bool, TRUE -> true, FALSE -> false
  gallium/radeon: boolean -> bool, TRUE -> true, FALSE -> false
  radeonsi: boolean -> bool, TRUE -> true, FALSE -> false
  radeonsi: make si_is_format_supported static

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] swr: push/pop DEBUG macro around llvm includes

2016-06-22 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 

> On Jun 17, 2016, at 11:01 AM, Tim Rowley  wrote:
> 
> llvm redefines DEBUG; adding push/pop prevents a undefined reference
> to debug_refcnt_state in llvm-3.7+.
> 
> v2: add undef DEBUG
> ---
> src/gallium/drivers/swr/swr_shader.cpp | 10 +++---
> src/gallium/drivers/swr/swr_state.cpp  |  7 ++-
> 2 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/swr/swr_shader.cpp 
> b/src/gallium/drivers/swr/swr_shader.cpp
> index 8af0700..4d1b604 100644
> --- a/src/gallium/drivers/swr/swr_shader.cpp
> +++ b/src/gallium/drivers/swr/swr_shader.cpp
> @@ -21,14 +21,18 @@
>  * IN THE SOFTWARE.
>  ***/
> 
> +// llvm redefines DEBUG
> +#pragma push_macro("DEBUG")
> +#undef DEBUG
> #include "JitManager.h"
> +#include "llvm-c/Core.h"
> +#include "llvm/Support/CBindingWrapping.h"
> +#pragma pop_macro("DEBUG")
> +
> #include "state.h"
> #include "state_llvm.h"
> #include "builder.h"
> 
> -#include "llvm-c/Core.h"
> -#include "llvm/Support/CBindingWrapping.h"
> -
> #include "tgsi/tgsi_strings.h"
> #include "gallivm/lp_bld_init.h"
> #include "gallivm/lp_bld_flow.h"
> diff --git a/src/gallium/drivers/swr/swr_state.cpp 
> b/src/gallium/drivers/swr/swr_state.cpp
> index 3eeb98d..f4c3b0e 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -21,9 +21,14 @@
>  * IN THE SOFTWARE.
>  ***/
> 
> +// llvm redefines DEBUG
> +#pragma push_macro("DEBUG")
> +#undef DEBUG
> +#include "JitManager.h"
> +#pragma pop_macro("DEBUG")
> +
> #include "common/os.h"
> #include "jit_api.h"
> -#include "JitManager.h"
> #include "state_llvm.h"
> 
> #include "gallivm/lp_bld_tgsi.h"
> -- 
> 1.9.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/14] update swr rasterizer

2016-06-22 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 

> On Jun 20, 2016, at 4:36 PM, Tim Rowley  wrote:
> 
> v2:
>   add conservativeRast.h to Makefile.sources
>   minimize changes in llvm support cleanup
>   remove tabs that were added by the v1 patches
> 
> Tim Rowley (14):
>  swr: [rasterizer common] workaround clang for windows __cpuid() bug
>  swr: [rasterizer common] fix include for Intel compiler
>  swr: [rasterizer] add support for building avx512 version
>  swr: [rasterizer jitter] unitialized component fix in fetch jit
>  swr: [rasterizer jitter] cleanup supporting different llvm versions
>  swr: [rasterizer core] remove old comment
>  swr: [rasterizer jitter] small fetch jit cleanup
>  swr: [rasterizer core] stop single threaded crash exit crash
>  swr: [rasterizer core] conservative rasterization frontend support
>  swr: [rasterizer core] GS viewport array index attribute
>  swr: [rasterizer core] track whether GS outputs viewport array index
>  swr: [rasterizer jitter] add support for component packing for 'odd'
>formats
>  swr: [rasterizer core] use wrap-around safe compares for dependency
>checking
>  swr: [rasterizer core] fix dependency bug
> 
> src/gallium/drivers/swr/Makefile.sources   |   1 +
> src/gallium/drivers/swr/rasterizer/common/isa.hpp  |  14 +-
> src/gallium/drivers/swr/rasterizer/common/os.h |   2 +-
> .../drivers/swr/rasterizer/common/simdintrin.h |   4 +-
> src/gallium/drivers/swr/rasterizer/core/api.cpp|  28 ++-
> src/gallium/drivers/swr/rasterizer/core/clip.h |   4 +-
> .../drivers/swr/rasterizer/core/conservativeRast.h | 120 
> src/gallium/drivers/swr/rasterizer/core/context.h  |   6 +-
> .../drivers/swr/rasterizer/core/format_types.h |   8 +-
> .../drivers/swr/rasterizer/core/frontend.cpp   | 164 ++--
> src/gallium/drivers/swr/rasterizer/core/frontend.h |  43 +
> src/gallium/drivers/swr/rasterizer/core/knobs.h|  17 +-
> .../drivers/swr/rasterizer/core/rasterizer.h   |   8 +
> .../drivers/swr/rasterizer/core/ringbuffer.h   |  16 +-
> src/gallium/drivers/swr/rasterizer/core/state.h|   8 +-
> .../drivers/swr/rasterizer/core/threads.cpp|  54 +++---
> src/gallium/drivers/swr/rasterizer/core/threads.h  |   6 +-
> src/gallium/drivers/swr/rasterizer/core/utils.h|  30 +++
> .../drivers/swr/rasterizer/jitter/JitManager.cpp   |   9 +-
> .../drivers/swr/rasterizer/jitter/JitManager.h |   7 +-
> .../drivers/swr/rasterizer/jitter/blend_jit.cpp|   8 +-
> .../drivers/swr/rasterizer/jitter/builder_misc.cpp |  31 ++-
> .../drivers/swr/rasterizer/jitter/builder_misc.h   |   6 +
> .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 210 ++---
> .../jitter/scripts/gen_llvm_ir_macros.py   |  24 ++-
> .../swr/rasterizer/jitter/streamout_jit.cpp|   7 +-
> .../drivers/swr/rasterizer/memory/Convert.h|   4 +-
> .../drivers/swr/rasterizer/memory/StoreTile.cpp|   4 +-
> .../drivers/swr/rasterizer/scripts/knob_defs.py|   5 +-
> 29 files changed, 546 insertions(+), 302 deletions(-)
> create mode 100644 src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
> 
> -- 
> 1.9.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] gallium/radeon: add state setup for a separate DCC buffer

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h |  8 
 src/gallium/drivers/radeon/r600_texture.c | 18 +++---
 src/gallium/drivers/radeonsi/si_descriptors.c | 11 ++-
 src/gallium/drivers/radeonsi/si_state.c   |  9 -
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index c0e4282..92cba13 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -265,6 +265,14 @@ struct r600_texture {
 
boolnon_disp_tiling; /* R600-Cayman only */
 
+   /* Whether the texture is a displayable back buffer and needs DCC
+* decompression, which is expensive. Therefore, it's enabled only
+* if statistics suggest that it will pay off and it's allocated
+* separately. Limited to target == 2D and last_level == 0. If enabled,
+* dcc_offset contains the absolute GPUVM address, not the relative one.
+*/
+   struct r600_resource*dcc_separate_buffer;
+
/* Counter that should be non-zero if the texture is bound to a
 * framebuffer. Implemented in radeonsi only.
 */
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 4053a75..23be5ed 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -387,6 +387,8 @@ static bool r600_texture_discard_dcc(struct 
r600_common_screen *rscreen,
if (!r600_can_disable_dcc(rtex))
return false;
 
+   assert(rtex->dcc_separate_buffer == NULL);
+
/* Disable DCC. */
rtex->dcc_offset = 0;
 
@@ -564,6 +566,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
pipe_resource_reference((struct 
pipe_resource**)>cmask_buffer, NULL);
}
pb_reference(>buf, NULL);
+   r600_resource_reference(>dcc_separate_buffer, NULL);
FREE(rtex);
 }
 
@@ -1800,12 +1803,21 @@ void vi_dcc_clear_level(struct r600_common_context 
*rctx,
struct r600_texture *rtex,
unsigned level, unsigned clear_value)
 {
-   struct pipe_resource *dcc_buffer = >resource.b.b;
-   uint64_t dcc_offset = rtex->dcc_offset +
- rtex->surface.level[level].dcc_offset;
+   struct pipe_resource *dcc_buffer;
+   uint64_t dcc_offset;
 
assert(rtex->dcc_offset && rtex->surface.level[level].dcc_enabled);
 
+   if (rtex->dcc_separate_buffer) {
+   dcc_buffer = >dcc_separate_buffer->b.b;
+   dcc_offset = 0;
+   } else {
+   dcc_buffer = >resource.b.b;
+   dcc_offset = rtex->dcc_offset;
+   }
+
+   dcc_offset += rtex->surface.level[level].dcc_offset;
+
rctx->clear_buffer(>b, dcc_buffer, dcc_offset,
   rtex->surface.level[level].dcc_fast_clear_size,
   clear_value, R600_COHERENCY_CB_META);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index e95556b..0bdeb04 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -304,6 +304,15 @@ static void si_sampler_view_add_buffer(struct si_context 
*sctx,
 
radeon_add_to_buffer_list(>b, >b.gfx, rres, usage,
  r600_get_sampler_view_priority(rres));
+
+   if (resource->target != PIPE_BUFFER) {
+   struct r600_texture *rtex = (struct r600_texture*)resource;
+
+   if (rtex->dcc_separate_buffer)
+   radeon_add_to_buffer_list(>b, >b.gfx,
+ rtex->dcc_separate_buffer, 
usage,
+ RADEON_PRIO_DCC);
+   }
 }
 
 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
@@ -352,7 +361,7 @@ void si_set_mutable_tex_desc_fields(struct r600_texture 
*tex,
 
if (tex->dcc_offset && tex->surface.level[first_level].dcc_enabled) {
state[6] |= S_008F28_COMPRESSION_EN(1);
-   state[7] = (tex->resource.gpu_address +
+   state[7] = ((!tex->dcc_separate_buffer ? 
tex->resource.gpu_address : 0) +
tex->dcc_offset +
base_level_info->dcc_offset) >> 8;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ccd9f860..492a670 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2418,6 +2418,12 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom
RADEON_PRIO_CMASK);
}
 
+   if 

[Mesa-dev] [PATCH 1/6] gallium/radeon: add flag R600_QUERY_HW_FLAG_BEGIN_RESUMES

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_query.c | 3 ++-
 src/gallium/drivers/radeon/r600_query.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 2e06746..d83d505 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -763,7 +763,8 @@ boolean r600_query_hw_begin(struct r600_common_context 
*rctx,
return false;
}
 
-   r600_query_hw_reset_buffers(rctx, query);
+   if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
+   r600_query_hw_reset_buffers(rctx, query);
 
r600_query_hw_emit_start(rctx, query);
if (!query->buffer.buf)
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index b573a17..7689bf9 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -92,6 +92,8 @@ struct r600_query {
 enum {
R600_QUERY_HW_FLAG_NO_START = (1 << 0),
R600_QUERY_HW_FLAG_PREDICATE = (1 << 1),
+   /* whether begin_query doesn't clear the result */
+   R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
 };
 
 struct r600_query_hw_ops {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] radeonsi: unreference framebuffer state with set_framebuffer_state

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c  | 7 +--
 src/gallium/drivers/radeonsi/si_state.c | 2 +-
 src/gallium/drivers/radeonsi/si_state.h | 1 -
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 65c0daa..def3e8c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -40,7 +40,11 @@ static void si_destroy_context(struct pipe_context *context)
struct si_context *sctx = (struct si_context *)context;
int i;
 
-   si_dec_framebuffer_counters(>framebuffer.state);
+   /* Unreference the framebuffer normally to disable related logic
+* properly.
+*/
+   struct pipe_framebuffer_state fb = {};
+   context->set_framebuffer_state(context, );
 
si_release_all_descriptors(sctx);
 
@@ -76,7 +80,6 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.b.delete_blend_state(>b.b, 
sctx->custom_blend_fastclear);
if (sctx->custom_blend_dcc_decompress)
sctx->b.b.delete_blend_state(>b.b, 
sctx->custom_blend_dcc_decompress);
-   util_unreference_framebuffer_state(>framebuffer.state);
 
if (sctx->blitter)
util_blitter_destroy(sctx->blitter);
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 1cef1dc..ccd9f860 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2216,7 +2216,7 @@ static void si_init_depth_surface(struct si_context *sctx,
surf->depth_initialized = true;
 }
 
-void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
+static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state 
*state)
 {
for (int i = 0; i < state->nr_cbufs; ++i) {
struct r600_surface *surf = NULL;
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 8d538e1..5677bd7 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -339,7 +339,6 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
  const struct pipe_sampler_view *state,
  unsigned width0, unsigned height0,
  unsigned force_level);
-void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state);
 
 /* si_state_shader.c */
 bool si_update_shaders(struct si_context *sctx);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] gallium/radeon: add a heuristic dynamically enabling DCC for scanout surfaces

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

DCC for displayable surfaces is allocated in a separate buffer and is
enabled or disabled based on PS invocations from 2 frames ago (to let
queries go idle) and the number of slow clears from the current frame.

At least an equivalent of 5 fullscreen draws or slow clears must be done
to enable DCC. (PS invocations / (width * height) + num_slow_clears >= 5)

Pipeline statistic queries are always active if a color buffer that can
have separate DCC is bound, even if separate DCC is disabled. That means
the window color buffer is always monitored and DCC is enabled only when
the situation is right.

The tracking of per-texture queries in r600_common_context is quite ugly,
but I don't see a better way.

The first fast clear always enables DCC. DCC decompression can disable it.
A later fast clear can enable it again. Enable/disable typically happens
only once per frame.

The impact is expected to be negligible because games usually don't have
a high level of overdraw. DCC usually activates when too much blending
is happening (smoke rendering) or when testing glClear performance and
CMASK isn't supported (Stoney).
---
 src/gallium/drivers/radeon/r600_pipe_common.c |  15 ++
 src/gallium/drivers/radeon/r600_pipe_common.h |  40 +
 src/gallium/drivers/radeon/r600_texture.c | 239 ++
 src/gallium/drivers/radeonsi/si_blit.c|  14 +-
 src/gallium/drivers/radeonsi/si_state.c   |  15 ++
 src/gallium/drivers/radeonsi/si_state_draw.c  |   5 +-
 6 files changed, 326 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 5d4a679..66afcfa 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -397,6 +397,21 @@ bool r600_common_context_init(struct r600_common_context 
*rctx,
 
 void r600_common_context_cleanup(struct r600_common_context *rctx)
 {
+   unsigned i,j;
+
+   /* Release DCC stats. */
+   for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
+   assert(!rctx->dcc_stats[i].query_active);
+
+   for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
+   if (rctx->dcc_stats[i].ps_stats[j])
+   rctx->b.destroy_query(>b,
+ 
rctx->dcc_stats[i].ps_stats[j]);
+
+   pipe_resource_reference((struct pipe_resource**)
+   >dcc_stats[i].tex, NULL);
+   }
+
if (rctx->gfx.cs)
rctx->ws->cs_destroy(rctx->gfx.cs);
if (rctx->dma.cs)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 92cba13..cdec907 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -272,6 +272,25 @@ struct r600_texture {
 * dcc_offset contains the absolute GPUVM address, not the relative one.
 */
struct r600_resource*dcc_separate_buffer;
+   /* When DCC is temporarily disabled, the separate buffer is here. */
+   struct r600_resource*last_dcc_separate_buffer;
+   /* We need to track DCC dirtiness, because st/dri usually calls
+* flush_resource twice per frame (not a bug) and we don't wanna
+* decompress DCC twice. Also, the dirty tracking must be done even
+* if DCC isn't used, because it's required by the DCC usage analysis
+* for a possible future enablement.
+*/
+   boolseparate_dcc_dirty;
+   /* Statistics gathering for the DCC enablement heuristic. */
+   booldcc_gather_statistics;
+   /* Estimate of how much this color buffer is written to in units of
+* full-screen draws: ps_invocations / (width * height)
+* Shader kills, late Z, and blending with trivial discards make it
+* inaccurate (we need to count CB updates, not PS invocations).
+*/
+   unsignedps_draw_ratio;
+   /* The number of clears since the last DCC usage analysis. */
+   unsignednum_slow_clears;
 
/* Counter that should be non-zero if the texture is bound to a
 * framebuffer. Implemented in radeonsi only.
@@ -536,6 +555,21 @@ struct r600_common_context {
float   sample_locations_8x[8][2];
float   sample_locations_16x[16][2];
 
+   /* Statistics gathering for the DCC enablement heuristic. It can't be
+* in r600_texture because r600_texture can be shared by multiple
+* contexts. This is for back buffers only. We shouldn't get too many
+* of those.
+*/
+   struct {
+   struct r600_texture *tex;
+   /* Query queue: 0 = 

[Mesa-dev] [PATCH 6/6] gallium/radeon: add a HUD query for PS draw ratio stats from separate DCC

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
 src/gallium/drivers/radeon/r600_query.c   | 5 +
 src/gallium/drivers/radeon/r600_query.h   | 1 +
 src/gallium/drivers/radeon/r600_texture.c | 1 +
 4 files changed, 8 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index cdec907..b192a74 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -538,6 +538,7 @@ struct r600_common_context {
unsignednum_spill_compute_calls;
unsignednum_dma_calls;
uint64_tnum_alloc_tex_transfer_bytes;
+   unsignedlast_tex_ps_draw_ratio; /* for query */
 
/* Render condition. */
struct r600_atomrender_cond_atom;
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index d83d505..b886bab 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -94,6 +94,7 @@ static boolean r600_query_sw_begin(struct r600_common_context 
*rctx,
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
+   case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
query->begin_result = 0;
break;
case R600_QUERY_BUFFER_WAIT_TIME:
@@ -176,6 +177,9 @@ static bool r600_query_sw_end(struct r600_common_context 
*rctx,
case R600_QUERY_NUM_SHADERS_CREATED:
query->end_result = 
p_atomic_read(>screen->num_shaders_created);
break;
+   case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
+   query->end_result = rctx->last_tex_ps_draw_ratio;
+   break;
case R600_QUERY_GPIN_ASIC_ID:
case R600_QUERY_GPIN_NUM_SIMD:
case R600_QUERY_GPIN_NUM_RB:
@@ -1176,6 +1180,7 @@ static struct pipe_driver_query_info 
r600_driver_query_list[] = {
X("num-bytes-moved",NUM_BYTES_MOVED,BYTES, 
CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("GTT-usage",  GTT_USAGE,  BYTES, AVERAGE),
+   X("back-buffer-ps-draw-ratio",  BACK_BUFFER_PS_DRAW_RATIO, UINT64, 
AVERAGE),
 
/* GPIN queries are for the benefit of old versions of GPUPerfStudio,
 * which use it as a fallback path to detect the GPU type.
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index 7689bf9..daf49a1 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -59,6 +59,7 @@ enum {
R600_QUERY_GPU_LOAD,
R600_QUERY_NUM_COMPILATIONS,
R600_QUERY_NUM_SHADERS_CREATED,
+   R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
R600_QUERY_GPIN_ASIC_ID,
R600_QUERY_GPIN_NUM_SIMD,
R600_QUERY_GPIN_NUM_RB,
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 7295ab6..a22fb36 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1894,6 +1894,7 @@ void vi_separate_dcc_analyze_stats(struct pipe_context 
*ctx,
tex->ps_draw_ratio =
result.pipeline_statistics.ps_invocations /
(tex->resource.b.b.width0 * tex->resource.b.b.height0);
+   rctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
 
disable = tex->dcc_separate_buffer &&
  !vi_can_enable_separate_dcc(tex);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] radeonsi: always calculate DCC info even if it's not used immediately

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

for a later use
---
 src/gallium/drivers/radeon/r600_texture.c  | 3 ++-
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 32347f2..4053a75 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1040,7 +1040,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 * apply_opaque_metadata later.
 */
if (rtex->surface.dcc_size &&
-   (buf || !(rscreen->debug_flags & DBG_NO_DCC))) {
+   (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
+   !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
/* Reserve space for the DCC buffer. */
rtex->dcc_offset = align64(rtex->size, 
rtex->surface.dcc_alignment);
rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index a45bcb0..081f0e1 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -365,7 +365,6 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 *   driver team).
 */
AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & 
RADEON_SURF_Z_OR_SBUFFER) &&
-!(surf->flags & RADEON_SURF_SCANOUT) &&
 !(surf->flags & 
RADEON_SURF_DISABLE_DCC) &&
 !compressed && AddrDccIn.numSamples <= 
1 &&
 ((surf->array_size == 1 && 
surf->npix_z == 1) ||
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] RadeonSI: DCC for displayable surfaces

2016-06-22 Thread Marek Olšák
Hi,

This series allows enabling DCC for displayable (currently all window)
surfaces depending on the circumstances. There is a heuristic that
determines whether it pays off based on the pipeline statistics query,
because DCC decompression is expensive and can outweigh the benefit
of DCC.

Most of the time this optimization is disabled and it only turns on
in special situations. It has a slightly higher chance to be enabled
on Stoney, because Stoney can't do fast color clears without DCC.
Also on Stoney, consecutive slow color clears turn it on as well.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Emil Velikov
From: Emil Velikov 

Do not rely on the git sha1:
 - its current truncated form makes it less unique
 - it does not attribute for local (Vulkand or otherwise) changes

Use a timestamp produced at the time of build. It's perfectly unique,
unless someone explicitly thinkers with their system clock. Even then
chances of producing the exact same one are very small, if not zero.

v2: Remove .tmp rule. Its not needed since we want for the header to be
regenerated on each time we call make (Eric).

Cc: Jason Ekstrand 
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Emil Velikov 
---
Strictly speaking v2 _could_ make a difference if one has a machine
powerful enough to have `make' and `make install' each this Makefile
within the same second. That sounds pretty much impossible though :-)

If people prefer we could drop the header all together and provide the
define via the Makefile. It would be a bit unusual (in mesa at least)
but it should work.
---
 src/intel/vulkan/.gitignore   | 1 +
 src/intel/vulkan/Makefile.am  | 9 -
 src/intel/vulkan/anv_device.c | 4 ++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore
index a496146..ce6f23f 100644
--- a/src/intel/vulkan/.gitignore
+++ b/src/intel/vulkan/.gitignore
@@ -2,3 +2,4 @@
 /anv_entrypoints.c
 /anv_entrypoints.h
 /dev_icd.json
+/anv_timestamp.h
diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 4d9ff90..5f68f29 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -131,7 +131,14 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
$(vulkan_include_HEADERS)
$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
 
-BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
+
+.PHONY: anv_timestamp.h
+
+anv_timestamp.h:
+   @echo "Updating anv_timestamp.h"
+   $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
+
+BUILT_SOURCES = $(VULKAN_GENERATED_FILES) anv_timestamp.h
 CLEANFILES = $(BUILT_SOURCES) dev_icd.json
 EXTRA_DIST = \
$(top_srcdir)/include/vulkan/vk_icd.h \
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 97300c3..e395b1c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -28,7 +28,7 @@
 #include 
 
 #include "anv_private.h"
-#include "git_sha1.h"
+#include "anv_timestamp.h"
 #include "util/strtod.h"
 #include "util/debug.h"
 
@@ -426,7 +426,7 @@ void
 anv_device_get_cache_uuid(void *uuid)
 {
memset(uuid, 0, VK_UUID_SIZE);
-   snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
+   snprintf(uuid, VK_UUID_SIZE, "anv-%s", ANV_TIMESTAMP);
 }
 
 void anv_GetPhysicalDeviceProperties(
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Emil Velikov
On 22 June 2016 at 17:07, Eric Engestrom  wrote:
> On Wed, Jun 22, 2016 at 04:57:39PM +0100, Eric Engestrom wrote:
>> On Wed, Jun 22, 2016 at 01:04:41PM +0100, Emil Velikov wrote:
>> > From: Emil Velikov 
>
> [...]
>
>> > diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
>> > index 4d9ff90..8332ae5 100644
>> > --- a/src/intel/vulkan/Makefile.am
>> > +++ b/src/intel/vulkan/Makefile.am
>> > @@ -131,7 +131,20 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
>> > $(vulkan_include_HEADERS)
>> > $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
>> > $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
>> >
>> > -BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
>> > +
>> > +.PHONY: anv_timestamp.h.tmp
>> > +anv_timestamp.h.tmp:
>> > +   $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
>> > +
>> > +anv_timestamp.h: anv_timestamp.h.tmp
>> > +   @echo "Updating anv_timestamp.h"
>> > +   @if ! cmp -s anv_timestamp.h.tmp anv_timestamp.h; then \
>> > +   mv anv_timestamp.h.tmp anv_timestamp.h ;\
>> > +   else \
>> > +   rm anv_timestamp.h.tmp ;\
>> > +   fi
>>
>> For the else branch to be taken, the target would have to be generated
>> twice within the same second, and I don't see why we would want to avoid
>> modifying anv_timestamp.h's timestamp in this case, since that would be
>> the only thing that would be changed, and said timestamp would be within
>> the same second anyway.
>> I suggest to drop the `if` and unconditionally do the move.
>>
>> Since this wouldn't change the behaviour anyway, with or without my
>> suggestion this patch is:
>> Reviewed-by: Eric Engestrom 
>
>
> Actually, the .tmp target isn't needed either, you just have to tag the
> .h target as PHONY:
>
>   .PHONY: anv_timestamp.h
>   anv_timestamp.h:
> @echo "Updating anv_timestamp.h"
> $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
>
> Unless I'm missing something, this has the exact same behaviour.
Hmm yes, we don't need the temporary here since we _want_ the .h to be
regenerated on each make invocation (unlike git_sha1.h where this came
from).
In practise this patch does the same, although it is an overkill :-)

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96629] dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width >= 1' failed.

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96629

Ilia Mirkin  changed:

   What|Removed |Added

 CC||nhaeh...@gmail.com

--- Comment #3 from Ilia Mirkin  ---
42624ea837e8f422f1cd04403af915bd7f218b8d is the first bad commit
commit 42624ea837e8f422f1cd04403af915bd7f218b8d
Author: Nicolai Hähnle 
Date:   Mon Jun 6 23:15:10 2016 +0200

st/mesa: use base level size as "guess" when available

When an applications specifies mip levels _before_ setting a mipmap texture
filter, we will initially guess a single texture level. When the second
level
image is created, we try to allocate the full texture -- however, we get
the
base level size guess wrong if that size is odd. This leads to yet another
re-allocation of the texture later during st_finalize_texture.

Even worse, this re-allocation breaks a (reasonable) assumption made by
st_generate_mipmaps, because the re-allocation in the finalization call
will
again allocate a single-level pipe texture (based on the non-mipmap texture
filter!). As a result, mipmap generation fails in interesting ways.

All of this can be avoided by just using the fact that we already know the
size of the base level.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95529
Cc: 12.0 
Reviewed-by: Brian Paul 

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96634] Mesa or libGL very slow rendering with kabini vga card

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96634

Alex Deucher  changed:

   What|Removed |Added

  Component|Mesa core   |DRM/Radeon
 QA Contact|mesa-dev@lists.freedesktop. |
   |org |
   Assignee|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
Product|Mesa|DRI
Version|11.2|unspecified

--- Comment #1 from Alex Deucher  ---
The driver is not loaded in the logs attached.  Please attach a copy of your
xorg log and dmesg output with the firmware installed.  Note that if you are
using an initrd, you need to update that with the firmware as well.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96639] st/mesa: transfer_map with too-high level with dEQP-GLES2.functional.texture.completeness.cube.extra_level

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96639

Bug ID: 96639
   Summary: st/mesa: transfer_map with too-high level with
dEQP-GLES2.functional.texture.completeness.cube.extra_
level
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: imir...@alum.mit.edu
QA Contact: mesa-dev@lists.freedesktop.org

./deqp-gles2 --deqp-visibility=hidden
--deqp-case='dEQP-GLES2.functional.texture.completeness.cube.extra_level'
dEQP Core git-aa4099c48f58c93d951b64451ef87adb31fce406 (0xaa4099c4) starting..
  target implementation = 'X11 GLX'

Test case 'dEQP-GLES2.functional.texture.completeness.cube.extra_level'..
deqp-gles2: lp_texture.c:522: llvmpipe_transfer_map: Assertion `level <=
resource->last_level' failed.
Aborted

I think the issue may be in the state tracker - we shouldn't be calling
transfer_map on non-existent levels...

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96629] dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width >= 1' failed.

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96629

--- Comment #2 from Ilia Mirkin  ---
Be that as it may, we shouldn't be asserting for reachable situations. I think
another one that asserts is on that list, since I was running with the master
list when I hit it (admittedly on a NV34 with force-enabled GLES 2). However
with this test, the issue also happens with llvmpipe and nvc0.

This is also something that happened in the past week or so. I was running with
a (non-descript) older version, and it passed, but I just updated and it
crashes. I guess there's a bisect in my future...

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 96629] dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width >= 1' failed.

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96629

--- Comment #1 from Mark Janes  ---
FWIW, this test is not in dEQP's android "must-pass" list:

https://android.googlesource.com/platform/external/deqp/+/master/android/cts/master/gles2-master.txt

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Eric Engestrom
On Wed, Jun 22, 2016 at 04:57:39PM +0100, Eric Engestrom wrote:
> On Wed, Jun 22, 2016 at 01:04:41PM +0100, Emil Velikov wrote:
> > From: Emil Velikov 

[...]

> > diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
> > index 4d9ff90..8332ae5 100644
> > --- a/src/intel/vulkan/Makefile.am
> > +++ b/src/intel/vulkan/Makefile.am
> > @@ -131,7 +131,20 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
> > $(vulkan_include_HEADERS)
> > $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
> > $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
> >  
> > -BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
> > +
> > +.PHONY: anv_timestamp.h.tmp
> > +anv_timestamp.h.tmp:
> > +   $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
> > +
> > +anv_timestamp.h: anv_timestamp.h.tmp
> > +   @echo "Updating anv_timestamp.h"
> > +   @if ! cmp -s anv_timestamp.h.tmp anv_timestamp.h; then \
> > +   mv anv_timestamp.h.tmp anv_timestamp.h ;\
> > +   else \
> > +   rm anv_timestamp.h.tmp ;\
> > +   fi
> 
> For the else branch to be taken, the target would have to be generated
> twice within the same second, and I don't see why we would want to avoid
> modifying anv_timestamp.h's timestamp in this case, since that would be
> the only thing that would be changed, and said timestamp would be within
> the same second anyway.
> I suggest to drop the `if` and unconditionally do the move.
> 
> Since this wouldn't change the behaviour anyway, with or without my
> suggestion this patch is:
> Reviewed-by: Eric Engestrom 


Actually, the .tmp target isn't needed either, you just have to tag the
.h target as PHONY:

  .PHONY: anv_timestamp.h
  anv_timestamp.h:
@echo "Updating anv_timestamp.h"
$(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@

Unless I'm missing something, this has the exact same behaviour.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] automake: don't mandate git_sha1.h/MESA_GIT_SHA1

2016-06-22 Thread Eric Engestrom
On Wed, Jun 22, 2016 at 01:04:42PM +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> It has proven subtle to get it right both form the build side POV (see

s/form/from/

Reviewed-by: Eric Engestrom 

> commit list below) and builders due to their varying workflows.
> 
> Furthermore it does not fully fulfil the reason why it was enforced -
> to detect uniqueness between different builds, in order to distinguish
> and invalidate Vulkan/GL caches.
> 
> With that having a much better solution (previous commit) we can drop
> this solution.
> 
> This effectively reverts the following commits:
> 359d9dfec33 ("mesa: automake: add directory prefix for git_sha1.h")
> 2c424e00c39 ("mesa: automake: ensure that git_sha1.h.tmp has the right
> attributes")
> b7f7ec78435 ("mesa: automake: distclean git_sha1.h when building OOT")
> 8229fe68b5d ("automake: get in-tree `make distclean' working again.")
> 
> Cc: Timo Aaltonen 
> Cc: Haixia Shi 
> Cc: Jason Ekstrand 
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Emil Velikov 
> ---
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Eric Engestrom
On Wed, Jun 22, 2016 at 01:04:41PM +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Do not rely on the git sha1:
>  - its current truncated form makes it less unique
>  - it does not attribute for local (Vulkand or otherwise) changes
> 
> Use a timestamp produced at the time of build. It's perfectly unique,
> unless someone explicitly thinkers with their system clock. Even then
> chances of producing the exact same one are very small, if not zero.
> 
> Cc: Jason Ekstrand 
> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Emil Velikov 
> ---
> Current approach uses seconds since Epoch, but if people prefer we
> can use nano seconds, combination of the two and/or other.
> ---
>  src/intel/vulkan/.gitignore   |  2 ++
>  src/intel/vulkan/Makefile.am  | 15 ++-
>  src/intel/vulkan/anv_device.c |  4 ++--
>  3 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore
> index a496146..7ef6a48 100644
> --- a/src/intel/vulkan/.gitignore
> +++ b/src/intel/vulkan/.gitignore
> @@ -2,3 +2,5 @@
>  /anv_entrypoints.c
>  /anv_entrypoints.h
>  /dev_icd.json
> +/anv_timestamp.h.tmp
> +/anv_timestamp.h
> diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
> index 4d9ff90..8332ae5 100644
> --- a/src/intel/vulkan/Makefile.am
> +++ b/src/intel/vulkan/Makefile.am
> @@ -131,7 +131,20 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
> $(vulkan_include_HEADERS)
>   $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
>   $(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
>  
> -BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
> +
> +.PHONY: anv_timestamp.h.tmp
> +anv_timestamp.h.tmp:
> + $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
> +
> +anv_timestamp.h: anv_timestamp.h.tmp
> + @echo "Updating anv_timestamp.h"
> + @if ! cmp -s anv_timestamp.h.tmp anv_timestamp.h; then \
> + mv anv_timestamp.h.tmp anv_timestamp.h ;\
> + else \
> + rm anv_timestamp.h.tmp ;\
> + fi

For the else branch to be taken, the target would have to be generated
twice within the same second, and I don't see why we would want to avoid
modifying anv_timestamp.h's timestamp in this case, since that would be
the only thing that would be changed, and said timestamp would be within
the same second anyway.
I suggest to drop the `if` and unconditionally do the move.

Since this wouldn't change the behaviour anyway, with or without my
suggestion this patch is:
Reviewed-by: Eric Engestrom 

> +
> +BUILT_SOURCES = $(VULKAN_GENERATED_FILES) anv_timestamp.h
>  CLEANFILES = $(BUILT_SOURCES) dev_icd.json
>  EXTRA_DIST = \
>   $(top_srcdir)/include/vulkan/vk_icd.h \
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 97300c3..e395b1c 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -28,7 +28,7 @@
>  #include 
>  
>  #include "anv_private.h"
> -#include "git_sha1.h"
> +#include "anv_timestamp.h"
>  #include "util/strtod.h"
>  #include "util/debug.h"
>  
> @@ -426,7 +426,7 @@ void
>  anv_device_get_cache_uuid(void *uuid)
>  {
> memset(uuid, 0, VK_UUID_SIZE);
> -   snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
> +   snprintf(uuid, VK_UUID_SIZE, "anv-%s", ANV_TIMESTAMP);
>  }
>  
>  void anv_GetPhysicalDeviceProperties(
> -- 
> 2.8.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeon/vce: use vce structures for vce_52 firmware

2016-06-22 Thread Christian König

Am 22.06.2016 um 17:43 schrieb Zhang, Boyuan:

We should write the encode structure directly without the use of the
RVCE_CS() macros.

Otherwise all of that doesn't make much sense and is just another layer of 
abstraction.

Different from UVD where firmware takes the address of the IB structure, VCE 
firmware directly takes the value of IB, not the address. The encode structure 
here is used for storing value. We need this layer is because we want to assign 
different values to some of the IB in VAAPI which had previously hardcoded 
values for OMX. Therefore, we still want to keep the RVCE_CS() macros. By 
keeping this, all firmware version can work, even the structure changes b/w 
different version of firmware, it still works because we only take the value of 
IB not the structure itself.


And exactly that's what we don't want.

Each firmware version should have a complete separate implementation of 
mapping the values from the pipe description into the binary 
representation of the IB.


Otherwise we would need to test with all the older firmware versions as 
well when we make a change.


Adding different values to the IB is also possible completely without 
the structure by just using the values from the picture descriptor directly.


Regards,
Christian.



Regards,
Boyuan

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de]
Sent: June-22-16 3:34 AM
To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org
Subject: Re: [PATCH 3/3] radeon/vce: use vce structures for vce_52 firmware

Am 21.06.2016 um 16:50 schrieb Boyuan Zhang:

Signed-off-by: Boyuan Zhang 
---
   src/gallium/drivers/radeon/radeon_vce.c| 171 +++
   src/gallium/drivers/radeon/radeon_vce.h|   1 +
   src/gallium/drivers/radeon/radeon_vce_52.c | 447 
+++--
   3 files changed, 533 insertions(+), 86 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c
b/src/gallium/drivers/radeon/radeon_vce.c
index e16e0cf..0d96085 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -139,6 +139,176 @@ static void sort_cpb(struct rvce_encoder *enc)
}
   }
   
+static void get_rate_control_param(struct rvce_encoder *enc, struct

+pipe_h264_enc_picture_desc *pic) {

Move all of this into the firmware specific file. Don't add anything to the 
common file since we don't want to implement this for the older firmware 
versions.


+   enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method;
+   enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate;
+   enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate;
+   enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames;
+   enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames;
+   enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames;
+   enc->enc_pic.rc.gop_size = pic->gop_size;
+   enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num;
+   enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den;
+   enc->enc_pic.rc.max_qp = 51;
+
+   if (pic->enable_low_level_control == true) {
+   enc->enc_pic.rc.vbv_buffer_size = 2000;
+   if (pic->rate_ctrl.frame_rate_num == 0)
+   enc->enc_pic.rc.frame_rate_num = 30;
+   if (pic->rate_ctrl.frame_rate_den == 0)
+   enc->enc_pic.rc.frame_rate_den = 1;
+   enc->enc_pic.rc.vbv_buf_lv = 48;
+   enc->enc_pic.rc.fill_data_enable = 1;
+   enc->enc_pic.rc.enforce_hrd = 1;
+   enc->enc_pic.rc.target_bits_picture = 
enc->enc_pic.rc.target_bitrate / enc->enc_pic.rc.frame_rate_num;
+   enc->enc_pic.rc.peak_bits_picture_integer = 
enc->enc_pic.rc.peak_bitrate / enc->enc_pic.rc.frame_rate_num;
+   enc->enc_pic.rc.peak_bits_picture_fraction = 0;
+   } else {
+   enc->enc_pic.rc.vbv_buffer_size = 
pic->rate_ctrl.vbv_buffer_size;
+   enc->enc_pic.rc.vbv_buf_lv = 0;
+   enc->enc_pic.rc.fill_data_enable = 0;
+   enc->enc_pic.rc.enforce_hrd = 0;
+   enc->enc_pic.rc.target_bits_picture = 
pic->rate_ctrl.target_bits_picture;
+   enc->enc_pic.rc.peak_bits_picture_integer = 
pic->rate_ctrl.peak_bits_picture_integer;
+   enc->enc_pic.rc.peak_bits_picture_fraction = 
pic->rate_ctrl.peak_bits_picture_fraction;
+   }
+}
+
+static void get_motion_estimation_param(struct rvce_encoder *enc,
+struct pipe_h264_enc_picture_desc *pic) {
+   if (pic->enable_low_level_control == true) {
+   enc->enc_pic.me.motion_est_quarter_pixel = 0x0001;
+   enc->enc_pic.me.enc_disable_sub_mode = 0x0078;
+   enc->enc_pic.me.lsmvert = 0x0002;
+   enc->enc_pic.me.enc_en_ime_overw_dis_subm = 0x0001;
+   enc->enc_pic.me.enc_ime_overw_dis_subm_no = 0x0001;
+   

Re: [Mesa-dev] [PATCH] st/mesa: fix readpixels regression with MESA_pack_invert

2016-06-22 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jun 22, 2016 at 5:39 PM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Fixes an error introduced in commit 3948cd37973696dc319170877382676809659465.
>
> Reported-by: Marek Olšák 
> ---
>  src/mesa/state_tracker/st_cb_readpixels.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
> b/src/mesa/state_tracker/st_cb_readpixels.c
> index 77c6332..99d9cd5 100644
> --- a/src/mesa/state_tracker/st_cb_readpixels.c
> +++ b/src/mesa/state_tracker/st_cb_readpixels.c
> @@ -520,7 +520,7 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
> /* memcpy data into a user buffer */
> {
>const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
> -  const uint destStride = _mesa_image_row_stride(pack, width, format, 
> type);
> +  const int destStride = _mesa_image_row_stride(pack, width, format, 
> type);
>char *dest = _mesa_image_address2d(pack, pixels,
>   width, height, format,
>   type, 0, 0);
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: fix readpixels regression with MESA_pack_invert

2016-06-22 Thread Brian Paul

On 06/22/2016 09:39 AM, Nicolai Hähnle wrote:

From: Nicolai Hähnle 

Fixes an error introduced in commit 3948cd37973696dc319170877382676809659465.

Reported-by: Marek Olšák 
---
  src/mesa/state_tracker/st_cb_readpixels.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
b/src/mesa/state_tracker/st_cb_readpixels.c
index 77c6332..99d9cd5 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -520,7 +520,7 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
 /* memcpy data into a user buffer */
 {
const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
-  const uint destStride = _mesa_image_row_stride(pack, width, format, 
type);
+  const int destStride = _mesa_image_row_stride(pack, width, format, type);
char *dest = _mesa_image_address2d(pack, pixels,
   width, height, format,
   type, 0, 0);



Reviewed-by: Brian Paul 

Yeah, strides should usually be signed because of this situation.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] clover: conditionally use MESA_GIT_SHA1

2016-06-22 Thread Eric Engestrom
On Wed, Jun 22, 2016 at 01:04:40PM +0100, Emil Velikov wrote:
> From: Emil Velikov 
> 
> Considering how hard/annoying it was for many peoples' workflow to
> properly generate the macro, it will be demoted to conditionally
> available with follow-up commits.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Vedran Miletić 
> Cc: Francisco Jerez 
> Signed-off-by: Emil Velikov 
> ---

LGTM
Reviewed-by: Eric Engestrom 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeon/vce: use vce structures for vce_52 firmware

2016-06-22 Thread Zhang, Boyuan
> We should write the encode structure directly without the use of the
> RVCE_CS() macros.
> 
> Otherwise all of that doesn't make much sense and is just another layer of 
> abstraction.

Different from UVD where firmware takes the address of the IB structure, VCE 
firmware directly takes the value of IB, not the address. The encode structure 
here is used for storing value. We need this layer is because we want to assign 
different values to some of the IB in VAAPI which had previously hardcoded 
values for OMX. Therefore, we still want to keep the RVCE_CS() macros. By 
keeping this, all firmware version can work, even the structure changes b/w 
different version of firmware, it still works because we only take the value of 
IB not the structure itself.

Regards,
Boyuan

-Original Message-
From: Christian König [mailto:deathsim...@vodafone.de] 
Sent: June-22-16 3:34 AM
To: Zhang, Boyuan; mesa-dev@lists.freedesktop.org
Subject: Re: [PATCH 3/3] radeon/vce: use vce structures for vce_52 firmware

Am 21.06.2016 um 16:50 schrieb Boyuan Zhang:
> Signed-off-by: Boyuan Zhang 
> ---
>   src/gallium/drivers/radeon/radeon_vce.c| 171 +++
>   src/gallium/drivers/radeon/radeon_vce.h|   1 +
>   src/gallium/drivers/radeon/radeon_vce_52.c | 447 
> +++--
>   3 files changed, 533 insertions(+), 86 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
> b/src/gallium/drivers/radeon/radeon_vce.c
> index e16e0cf..0d96085 100644
> --- a/src/gallium/drivers/radeon/radeon_vce.c
> +++ b/src/gallium/drivers/radeon/radeon_vce.c
> @@ -139,6 +139,176 @@ static void sort_cpb(struct rvce_encoder *enc)
>   }
>   }
>   
> +static void get_rate_control_param(struct rvce_encoder *enc, struct 
> +pipe_h264_enc_picture_desc *pic) {

Move all of this into the firmware specific file. Don't add anything to the 
common file since we don't want to implement this for the older firmware 
versions.

> + enc->enc_pic.rc.rc_method = pic->rate_ctrl.rate_ctrl_method;
> + enc->enc_pic.rc.target_bitrate = pic->rate_ctrl.target_bitrate;
> + enc->enc_pic.rc.peak_bitrate = pic->rate_ctrl.peak_bitrate;
> + enc->enc_pic.rc.quant_i_frames = pic->quant_i_frames;
> + enc->enc_pic.rc.quant_p_frames = pic->quant_p_frames;
> + enc->enc_pic.rc.quant_b_frames = pic->quant_b_frames;
> + enc->enc_pic.rc.gop_size = pic->gop_size;
> + enc->enc_pic.rc.frame_rate_num = pic->rate_ctrl.frame_rate_num;
> + enc->enc_pic.rc.frame_rate_den = pic->rate_ctrl.frame_rate_den;
> + enc->enc_pic.rc.max_qp = 51;
> +
> + if (pic->enable_low_level_control == true) {
> + enc->enc_pic.rc.vbv_buffer_size = 2000;
> + if (pic->rate_ctrl.frame_rate_num == 0)
> + enc->enc_pic.rc.frame_rate_num = 30;
> + if (pic->rate_ctrl.frame_rate_den == 0)
> + enc->enc_pic.rc.frame_rate_den = 1;
> + enc->enc_pic.rc.vbv_buf_lv = 48;
> + enc->enc_pic.rc.fill_data_enable = 1;
> + enc->enc_pic.rc.enforce_hrd = 1;
> + enc->enc_pic.rc.target_bits_picture = 
> enc->enc_pic.rc.target_bitrate / enc->enc_pic.rc.frame_rate_num;
> + enc->enc_pic.rc.peak_bits_picture_integer = 
> enc->enc_pic.rc.peak_bitrate / enc->enc_pic.rc.frame_rate_num;
> + enc->enc_pic.rc.peak_bits_picture_fraction = 0;
> + } else {
> + enc->enc_pic.rc.vbv_buffer_size = 
> pic->rate_ctrl.vbv_buffer_size;
> + enc->enc_pic.rc.vbv_buf_lv = 0;
> + enc->enc_pic.rc.fill_data_enable = 0;
> + enc->enc_pic.rc.enforce_hrd = 0;
> + enc->enc_pic.rc.target_bits_picture = 
> pic->rate_ctrl.target_bits_picture;
> + enc->enc_pic.rc.peak_bits_picture_integer = 
> pic->rate_ctrl.peak_bits_picture_integer;
> + enc->enc_pic.rc.peak_bits_picture_fraction = 
> pic->rate_ctrl.peak_bits_picture_fraction;
> + }
> +}
> +
> +static void get_motion_estimation_param(struct rvce_encoder *enc, 
> +struct pipe_h264_enc_picture_desc *pic) {
> + if (pic->enable_low_level_control == true) {
> + enc->enc_pic.me.motion_est_quarter_pixel = 0x0001;
> + enc->enc_pic.me.enc_disable_sub_mode = 0x0078;
> + enc->enc_pic.me.lsmvert = 0x0002;
> + enc->enc_pic.me.enc_en_ime_overw_dis_subm = 0x0001;
> + enc->enc_pic.me.enc_ime_overw_dis_subm_no = 0x0001;
> + enc->enc_pic.me.enc_ime2_search_range_x = 0x0004;
> + enc->enc_pic.me.enc_ime2_search_range_y = 0x0004;
> + enc->enc_pic.me.enc_ime_decimation_search = 0x0001;
> + enc->enc_pic.me.motion_est_half_pixel = 0x0001;
> + enc->enc_pic.me.enc_search_range_x = 0x0010;
> + enc->enc_pic.me.enc_search_range_y = 0x0010;
> + enc->enc_pic.me.enc_search1_range_x = 0x0010;
> + 

[Mesa-dev] [PATCH] st/mesa: fix readpixels regression with MESA_pack_invert

2016-06-22 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Fixes an error introduced in commit 3948cd37973696dc319170877382676809659465.

Reported-by: Marek Olšák 
---
 src/mesa/state_tracker/st_cb_readpixels.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
b/src/mesa/state_tracker/st_cb_readpixels.c
index 77c6332..99d9cd5 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -520,7 +520,7 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
/* memcpy data into a user buffer */
{
   const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
-  const uint destStride = _mesa_image_row_stride(pack, width, format, 
type);
+  const int destStride = _mesa_image_row_stride(pack, width, format, type);
   char *dest = _mesa_image_address2d(pack, pixels,
  width, height, format,
  type, 0, 0);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: reuse main extension table to appropriate restrict extensions

2016-06-22 Thread Jakob Bornecrantz
So I have encountered bug that should be fixed by this. In my case its
a happy little accident that I can use GL_ARB_gpu_shader5 in a
compatibility context (since I want the textureGatherOffset
functions). So while one part of me is happy that the bug exist.

Cheers, Jakob.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: reuse main extension table to appropriate restrict extensions

2016-06-22 Thread Ilia Mirkin
Urgh, just realized that I also need to teach glcpp about this stuff,
otherwise we can end up with e.g. GL_ARB_gpu_shader5 defined but not
enableable in a compat context. Also a discussion with Jakob on IRC
pointed out that we currently allow GL_ARB_gpu_shader5 to be enabled
in compat contexts, but with this patch, this will no longer be
allowed. IMHO this is correct, but just wanted to point it out. Will
send a v3 that accounts for glcpp later in the week probably.

On Tue, Jun 21, 2016 at 2:24 PM, Ilia Mirkin  wrote:
> Ping? I got a R-b from Eric Engestrom (thanks!) but I was hoping some
> more experienced Mesa contributors could give this a look and see if
> this makes sense or if I'm still missing some crucial bits.
>
> On Mon, Jun 13, 2016 at 11:43 PM, Ilia Mirkin  wrote:
>> Previously we were only restricting based on ES/non-ES-ness and whether
>> the overall enable bit had been flipped on. However we have been adding
>> more fine-grained restrictions, such as based on compat profiles, as
>> well as specific ES versions. Most of the time this doesn't matter, but
>> it can create awkward situations and duplication of logic.
>>
>> Here we separate the main extension table into a separate object file,
>> linked to the glsl compiler, which makes use of it with a custom
>> function which takes the ES-ness of the shader into account (thus
>> allowing desktop shaders to properly use ES extensions that would
>> otherwise have been disallowed.)
>>
>> The effect of this change should be nil in most cases.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> v1 -> v2:
>>  - use a final enum to obtain number of extensions
>>  - move calculation of the gl version to be once per shader, for better reuse
>>  - bake GL version into the "supported_versions" struct
>>  - while we're at it, fix supported_versions size, it was off by 1 since ES 
>> 3.20
>>"support" was added.
>>
>>  src/Makefile.am  |   1 +
>>  src/compiler/SConscript.glsl |   2 +
>>  src/compiler/glsl/glsl_parser_extras.cpp | 244 
>> +++
>>  src/compiler/glsl/glsl_parser_extras.h   |   4 +-
>>  src/mesa/Android.libmesa_glsl_utils.mk   |   2 +
>>  src/mesa/Makefile.sources|   1 +
>>  src/mesa/main/extensions.c   |  33 +
>>  src/mesa/main/extensions.h   |   1 +
>>  src/mesa/main/extensions_table.c |  51 +++
>>  9 files changed, 190 insertions(+), 149 deletions(-)
>>  create mode 100644 src/mesa/main/extensions_table.c
>>
>> diff --git a/src/Makefile.am b/src/Makefile.am
>> index 32372da..d38f7c4 100644
>> --- a/src/Makefile.am
>> +++ b/src/Makefile.am
>> @@ -114,6 +114,7 @@ AM_CPPFLAGS = \
>>  noinst_LTLIBRARIES = libglsl_util.la
>>
>>  libglsl_util_la_SOURCES = \
>> +   mesa/main/extensions_table.c \
>> mesa/main/imports.c \
>> mesa/program/prog_hash_table.c \
>> mesa/program/symbol_table.c \
>> diff --git a/src/compiler/SConscript.glsl b/src/compiler/SConscript.glsl
>> index 4252ce1..31d8f6d 100644
>> --- a/src/compiler/SConscript.glsl
>> +++ b/src/compiler/SConscript.glsl
>> @@ -70,6 +70,7 @@ if env['msvc']:
>>  # Copy these files to avoid generation object files into src/mesa/program
>>  env.Prepend(CPPPATH = ['#src/mesa/main'])
>>  env.Command('glsl/imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', 
>> '$SOURCE'))
>> +env.Command('glsl/extensions_table.c', '#src/mesa/main/extensions_table.c', 
>> Copy('$TARGET', '$SOURCE'))
>>  # Copy these files to avoid generation object files into src/mesa/program
>>  env.Prepend(CPPPATH = ['#src/mesa/program'])
>>  env.Command('glsl/prog_hash_table.c', 
>> '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE'))
>> @@ -79,6 +80,7 @@ env.Command('glsl/dummy_errors.c', 
>> '#src/mesa/program/dummy_errors.c', Copy('$TA
>>  compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES'])
>>
>>  mesa_objs = env.StaticObject([
>> +'glsl/extensions_table.c',
>>  'glsl/imports.c',
>>  'glsl/prog_hash_table.c',
>>  'glsl/symbol_table.c',
>> diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
>> b/src/compiler/glsl/glsl_parser_extras.cpp
>> index ce2c3e8..0e9f152 100644
>> --- a/src/compiler/glsl/glsl_parser_extras.cpp
>> +++ b/src/compiler/glsl/glsl_parser_extras.cpp
>> @@ -50,6 +50,8 @@ glsl_compute_version_string(void *mem_ctx, bool is_es, 
>> unsigned version)
>>
>>  static const unsigned known_desktop_glsl_versions[] =
>> { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 };
>> +static const unsigned known_desktop_gl_versions[] =
>> +   {  20,  21,  30,  31,  32,  33,  40,  41,  42,  43,  44,  45 };
>>
>>
>>  _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
>> @@ -74,6 +76,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct 
>> gl_context *_ctx,
>> /* Set default language version and extensions */
>> 

Re: [Mesa-dev] [PATCH 2/2] glsl/mesa: stop duplicating geom and tcs layout values

2016-06-22 Thread Iago Toral
On Wed, 2016-06-22 at 12:41 +1000, Timothy Arceri wrote:
> We already store these in gl_shader and gl_program here we
> remove it from gl_shader_program and just use the values
> from gl_shader.
> 
> This will allow us to keep the shader cache restore code as
> simple as it can be while making it somewhat clearer where these
> values originate from.
> ---
>  src/compiler/glsl/glsl_to_nir.cpp   |  2 +-
>  src/compiler/glsl/linker.cpp| 18 ++--
>  src/mesa/drivers/dri/i965/brw_tcs.c |  6 --
>  src/mesa/main/api_validate.c|  6 --
>  src/mesa/main/mtypes.h  | 20 +
>  src/mesa/main/shaderapi.c   | 43 
> -
>  src/mesa/main/shaderobj.c   |  6 +++---
>  7 files changed, 48 insertions(+), 53 deletions(-)
> 
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
> b/src/compiler/glsl/glsl_to_nir.cpp
> index daf237e..16d0c1d 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -166,7 +166,7 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
>  
> switch (stage) {
> case MESA_SHADER_TESS_CTRL:
> -  shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
> +  shader->info.tcs.vertices_out = sh->TessCtrl.VerticesOut;
>break;
>  
> case MESA_SHADER_GEOMETRY:
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index ec71bfe..9a3c326 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -861,7 +861,7 @@ validate_geometry_shader_executable(struct 
> gl_shader_program *prog,
> if (shader == NULL)
>return;
>  
> -   unsigned num_vertices = vertices_per_prim(prog->Geom.InputType);
> +   unsigned num_vertices = vertices_per_prim(shader->Geom.InputType);
> prog->Geom.VerticesIn = num_vertices;
>  
> analyze_clip_cull_usage(prog, shader, ctx,
> @@ -877,9 +877,11 @@ static void
>  validate_geometry_shader_emissions(struct gl_context *ctx,
> struct gl_shader_program *prog)
>  {
> -   if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) {
> +   struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
> +
> +   if (sh != NULL) {
>find_emit_vertex_visitor emit_vertex(ctx->Const.MaxVertexStreams - 1);
> -  emit_vertex.run(prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->ir);
> +  emit_vertex.run(sh->ir);
>if (emit_vertex.error()) {
>   linker_error(prog, "Invalid call %s(%d). Accepted values for the "
>"stream parameter are in the range [0, %d].\n",
> @@ -914,7 +916,7 @@ validate_geometry_shader_emissions(struct gl_context *ctx,
> * EmitStreamVertex() or EmitEndPrimitive() are called with a non-zero
> * stream.
> */
> -  if (prog->Geom.UsesStreams && prog->Geom.OutputType != GL_POINTS) {
> +  if (prog->Geom.UsesStreams && sh->Geom.OutputType != GL_POINTS) {
>   linker_error(prog, "EmitStreamVertex(n) and EndStreamPrimitive(n) "
>"with n>0 requires point output\n");
>}
> @@ -1797,7 +1799,6 @@ link_tcs_out_layout_qualifiers(struct gl_shader_program 
> *prog,
>  "vertices out layout qualifier\n");
>return;
> }
> -   prog->TessCtrl.VerticesOut = linked_shader->TessCtrl.VerticesOut;
>  }
>  
> 
> @@ -2059,26 +2060,21 @@ link_gs_inout_layout_qualifiers(struct 
> gl_shader_program *prog,
>  "geometry shader didn't declare primitive input type\n");
>return;
> }
> -   prog->Geom.InputType = linked_shader->Geom.InputType;
>  
> if (linked_shader->Geom.OutputType == PRIM_UNKNOWN) {
>linker_error(prog,
>  "geometry shader didn't declare primitive output type\n");
>return;
> }
> -   prog->Geom.OutputType = linked_shader->Geom.OutputType;
>  
> if (linked_shader->Geom.VerticesOut == -1) {
>linker_error(prog,
>  "geometry shader didn't declare max_vertices\n");
>return;
> }
> -   prog->Geom.VerticesOut = linked_shader->Geom.VerticesOut;
>  
> if (linked_shader->Geom.Invocations == 0)
>linked_shader->Geom.Invocations = 1;
> -
> -   prog->Geom.Invocations = linked_shader->Geom.Invocations;
>  }
>  
> 
> @@ -2353,7 +2349,7 @@ link_intrastage_shaders(void *mem_ctx,
>  
> /* Set the size of geometry shader input arrays */
> if (linked->Stage == MESA_SHADER_GEOMETRY) {
> -  unsigned num_vertices = vertices_per_prim(prog->Geom.InputType);
> +  unsigned num_vertices = vertices_per_prim(linked->Geom.InputType);
>geom_array_resize_visitor input_resize_visitor(num_vertices, prog);
>foreach_in_list(ir_instruction, ir, linked->ir) {
>   ir->accept(_resize_visitor);
> diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c 
> b/src/mesa/drivers/dri/i965/brw_tcs.c
> index a133fef..ff08c0a 100644
> --- a/src/mesa/drivers/dri/i965/brw_tcs.c
> +++ 

Re: [Mesa-dev] [PATCH V2 1/2] glsl/mesa: stop duplicating tes layout values

2016-06-22 Thread Iago Toral
On Wed, 2016-06-22 at 12:41 +1000, Timothy Arceri wrote:
> We already store this in gl_shader and gl_program here we
> remove it from gl_shader_program and just use the values
> from gl_shader.
> 
> This will allow us to keep the shader cache restore code as
> simple as it can be while making it somewhat clearer where these
> values originate from.
> 
> V2: remove unessisary NULL check

unnecessary

> 
> Reviewed-by: Marek Olšák 
> Reviewed-by: Iago Toral 
> ---
>  src/compiler/glsl/linker.cpp |  4 
>  src/mesa/main/api_validate.c | 11 ++-
>  src/mesa/main/mtypes.h   |  7 ---
>  src/mesa/main/shaderapi.c| 34 ++
>  4 files changed, 28 insertions(+), 28 deletions(-)
> 
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index 9c6147b..ec71bfe 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -1890,19 +1890,15 @@ link_tes_in_layout_qualifiers(struct 
> gl_shader_program *prog,
>  "primitive modes.\n");
>return;
> }
> -   prog->TessEval.PrimitiveMode = linked_shader->TessEval.PrimitiveMode;
>  
> if (linked_shader->TessEval.Spacing == 0)
>linked_shader->TessEval.Spacing = GL_EQUAL;
> -   prog->TessEval.Spacing = linked_shader->TessEval.Spacing;
>  
> if (linked_shader->TessEval.VertexOrder == 0)
>linked_shader->TessEval.VertexOrder = GL_CCW;
> -   prog->TessEval.VertexOrder = linked_shader->TessEval.VertexOrder;
>  
> if (linked_shader->TessEval.PointMode == -1)
>linked_shader->TessEval.PointMode = GL_FALSE;
> -   prog->TessEval.PointMode = linked_shader->TessEval.PointMode;
>  }
>  
> 
> diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
> index 4ef86b8..ab34d99 100644
> --- a/src/mesa/main/api_validate.c
> +++ b/src/mesa/main/api_validate.c
> @@ -206,9 +206,10 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum 
> mode, const char *name)
>GLenum mode_before_gs = mode;
>  
>if (tes) {
> - if (tes->TessEval.PointMode)
> + struct gl_shader *tes_sh = 
> tes->_LinkedShaders[MESA_SHADER_TESS_EVAL];
> + if (tes_sh->TessEval.PointMode)
>  mode_before_gs = GL_POINTS;
> - else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
> + else if (tes_sh->TessEval.PrimitiveMode == GL_ISOLINES)
>  mode_before_gs = GL_LINES;
>   else
>  /* the GL_QUADS mode generates triangles too */
> @@ -321,10 +322,10 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum 
> mode, const char *name)
>else if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]) {
>   struct gl_shader_program *tes =
>  ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
> -
> - if (tes->TessEval.PointMode)
> + struct gl_shader *tes_sh = 
> tes->_LinkedShaders[MESA_SHADER_TESS_EVAL];
> + if (tes_sh->TessEval.PointMode)
>  pass = ctx->TransformFeedback.Mode == GL_POINTS;
> - else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
> + else if (tes_sh->TessEval.PrimitiveMode == GL_ISOLINES)
>  pass = ctx->TransformFeedback.Mode == GL_LINES;
>   else
>  pass = ctx->TransformFeedback.Mode == GL_TRIANGLES;
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 83ff236..168e2ae 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2732,13 +2732,6 @@ struct gl_shader_program
>  * Tessellation Evaluation shader state from layout qualifiers.
>  */
> struct {
> -  /** GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
> -  GLenum PrimitiveMode;
> -  /** GL_EQUAL, GL_FRACTIONAL_ODD or GL_FRACTIONAL_EVEN */
> -  GLenum Spacing;
> -  /** GL_CW or GL_CCW */
> -  GLenum VertexOrder;
> -  bool PointMode;
>/**
> * True if gl_ClipDistance is written to.  Copied into
> * gl_tess_eval_program by _mesa_copy_linked_program_data().
> diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
> index fed8b6d..7f8b296 100644
> --- a/src/mesa/main/shaderapi.c
> +++ b/src/mesa/main/shaderapi.c
> @@ -831,26 +831,34 @@ get_programiv(struct gl_context *ctx, GLuint program, 
> GLenum pname,
> case GL_TESS_GEN_MODE:
>if (!has_tess)
>   break;
> -  if (check_tes_query(ctx, shProg))
> - *params = shProg->TessEval.PrimitiveMode;
> +  if (check_tes_query(ctx, shProg)) {
> + *params = shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->
> +TessEval.PrimitiveMode;
> +  }
>return;
> case GL_TESS_GEN_SPACING:
>if (!has_tess)
>   break;
> -  if (check_tes_query(ctx, shProg))
> - *params = shProg->TessEval.Spacing;
> +  if (check_tes_query(ctx, shProg)) {
> + *params = shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->
> +TessEval.Spacing;
> +   

Re: [Mesa-dev] [PATCH 6/6] winsys/radeon: add guard pages when R600_DEBUG=check_vm is enabled

2016-06-22 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Wed, Jun 22, 2016 at 11:40 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> This should help flush out GPU VM faults.
> ---
>  src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 4 +++-
>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 +++-
>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 1 +
>  3 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
> b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> index 2c10e2e..58b52a4 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
> @@ -555,8 +555,10 @@ static struct radeon_bo *radeon_create_bo(struct 
> radeon_drm_winsys *rws,
>
>  if (rws->info.has_virtual_memory) {
>  struct drm_radeon_gem_va va;
> +unsigned va_gap_size;
>
> -bo->va = radeon_bomgr_find_va(rws, size, alignment);
> +va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
> +bo->va = radeon_bomgr_find_va(rws, size + va_gap_size, alignment);
>
>  va.handle = bo->handle;
>  va.vm_id = 0;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
> b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 1f296f4..625ea8a 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -527,6 +527,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys 
> *ws)
>  (ws->info.family == CHIP_HAWAII &&
>   ws->accel_working2 < 3);
>
> +ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != 
> NULL;
> +
>  return TRUE;
>  }
>
> @@ -742,7 +744,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t 
> screen_create)
>  if (!do_winsys_init(ws))
>  goto fail1;
>
> -pb_cache_init(>bo_cache, 50, 2.0f, 0,
> +pb_cache_init(>bo_cache, 50, ws->check_vm ? 1.0f : 2.0f, 0,
>MIN2(ws->info.vram_size, ws->info.gart_size),
>radeon_bo_destroy,
>radeon_bo_can_reclaim);
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h 
> b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> index c429aba..fdbaebe 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
> @@ -91,6 +91,7 @@ struct radeon_drm_winsys {
>
>  uint64_t va_offset;
>  struct list_head va_holes;
> +bool check_vm;
>
>  struct radeon_surface_manager *surf_man;
>
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/vdpau: use bicubic filter for scaling

2016-06-22 Thread Andy Furniss

Andy Furniss wrote:


re-testing with an unpatched mesa and return false after PREFERS I see
the same.

s/w decode mplayer or mpv will segfault - bt below. No asserts with
debug build of mesa.

Sometimes/depending on input file it doesn't instantly segfault brief
output is corrupt and I get 00s of VM faults.


More testing and I do have files that play normally - can't work out
what the difference is so far.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeonsi: set LLVM denormal flags

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

- make sure FP32 denormals will stay disabled in LLVM in the future
  (the current default is disabled)
- tell LLVM that FP64 denormals are enabled
---
 src/gallium/drivers/radeonsi/si_pipe.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 65c0daa..0de25b9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -32,6 +32,9 @@
 #include "util/u_suballoc.h"
 #include "vl/vl_decoder.h"
 
+#define SI_LLVM_DEFAULT_FEATURES \
+   "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals"
+
 /*
  * pipe_context
  */
@@ -262,9 +265,9 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
   
r600_get_llvm_processor_name(sscreen->b.family),
 #if HAVE_LLVM >= 0x0308
   sscreen->b.debug_flags & 
DBG_SI_SCHED ?
-   
"+DumpCode,+vgpr-spilling,+si-scheduler" :
+   SI_LLVM_DEFAULT_FEATURES 
",+si-scheduler" :
 #endif
-   "+DumpCode,+vgpr-spilling",
+   SI_LLVM_DEFAULT_FEATURES,
   LLVMCodeGenLevelDefault,
   LLVMRelocDefault,
   LLVMCodeModelDefault);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeonsi: emit 1/sqrt for RSQ

2016-06-22 Thread Marek Olšák
From: Marek Olšák 

We don't need the clamped version and we don't have to use any intrinsic.

Stats on Tonga:

15382 shaders in 9128 tests
Totals:
SGPRS: 1230560 -> 1230560 (0.00 %)
VGPRS: 469577 -> 462504 (-1.51 %)
Code Size: 22089908 -> 21730052 (-1.63 %) bytes
LDS: 598 -> 598 (0.00 %) blocks
Scratch: 283648 -> 281600 (-0.72 %) bytes per wave
Max Waves: 125664 -> 126969 (1.04 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 547280 -> 547280 (0.00 %)
VGPRS: 269132 -> 262059 (-2.63 %)
Code Size: 15709604 -> 15349748 (-2.29 %) bytes
LDS: 198 -> 198 (0.00 %) blocks
Scratch: 74752 -> 72704 (-2.74 %) bytes per wave
Max Waves: 47840 -> 49145 (2.73 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 8084a20..d395208 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1523,6 +1523,21 @@ static void emit_up2h(const struct lp_build_tgsi_action 
*action,
}
 }
 
+/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
+ * the target machine. f64 needs global unsafe math flags to get rsq. */
+static void emit_rsq(const struct lp_build_tgsi_action *action,
+struct lp_build_tgsi_context *bld_base,
+struct lp_build_emit_data *emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef sqrt =
+   lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
+emit_data->args[0]);
+
+   emit_data->output[emit_data->chan] =
+   LLVMBuildFDiv(builder, bld_base->base.one, sqrt, "");
+}
+
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char 
*triple)
 {
struct lp_type type;
@@ -1661,8 +1676,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context 
* ctx, const char *trip
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = 
build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
-   bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = 
"llvm.AMDGPU.rsq.clamped.f32";
-   bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
+   bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: use a single memcpy in st_ReadPixels when possible

2016-06-22 Thread Marek Olšák
Hi,

FYI, piglit "mesa_pack_invert-readpixels -auto -fbo" crashes with this commit.

Marek

On Tue, Jun 21, 2016 at 11:08 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> This avoids costly address recomputations, function overhead, and may trigger
> large copy optimizations.
> ---
>  src/mesa/state_tracker/st_cb_readpixels.c | 23 +++
>  1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_cb_readpixels.c 
> b/src/mesa/state_tracker/st_cb_readpixels.c
> index 39d2274..77c6332 100644
> --- a/src/mesa/state_tracker/st_cb_readpixels.c
> +++ b/src/mesa/state_tracker/st_cb_readpixels.c
> @@ -520,14 +520,21 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
> /* memcpy data into a user buffer */
> {
>const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
> -  GLuint row;
> -
> -  for (row = 0; row < (unsigned) height; row++) {
> - void *dest = _mesa_image_address2d(pack, pixels,
> -  width, height, format,
> -  type, row, 0);
> - memcpy(dest, map, bytesPerRow);
> - map += tex_xfer->stride;
> +  const uint destStride = _mesa_image_row_stride(pack, width, format, 
> type);
> +  char *dest = _mesa_image_address2d(pack, pixels,
> + width, height, format,
> + type, 0, 0);
> +
> +  if (tex_xfer->stride == bytesPerRow && destStride == bytesPerRow) {
> + memcpy(dest, map, bytesPerRow * height);
> +  } else {
> + GLuint row;
> +
> + for (row = 0; row < (unsigned) height; row++) {
> +memcpy(dest, map, bytesPerRow);
> +map += tex_xfer->stride;
> +dest += destStride;
> + }
>}
> }
>
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] gallium/util: fix some 4-space indentation in blitter code

2016-06-22 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Jun 22, 2016 at 12:16 AM, Brian Paul  wrote:
> ---
>  src/gallium/auxiliary/util/u_blitter.c | 42 
> +-
>  1 file changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_blitter.c 
> b/src/gallium/auxiliary/util/u_blitter.c
> index 1257bb6..d06b3a8 100644
> --- a/src/gallium/auxiliary/util/u_blitter.c
> +++ b/src/gallium/auxiliary/util/u_blitter.c
> @@ -1332,9 +1332,9 @@ void util_blitter_custom_clear_depth(struct 
> blitter_context *blitter,
>   unsigned width, unsigned height,
>   double depth, void *custom_dsa)
>  {
> -static const union pipe_color_union color;
> -util_blitter_clear_custom(blitter, width, height, 0, 0, , depth, 0,
> -  NULL, custom_dsa);
> +   static const union pipe_color_union color;
> +   util_blitter_clear_custom(blitter, width, height, 0, 0, , depth, 0,
> + NULL, custom_dsa);
>  }
>
>  void util_blitter_default_dst_texture(struct pipe_surface *dst_templ,
> @@ -1342,11 +1342,11 @@ void util_blitter_default_dst_texture(struct 
> pipe_surface *dst_templ,
>unsigned dstlevel,
>unsigned dstz)
>  {
> -memset(dst_templ, 0, sizeof(*dst_templ));
> -dst_templ->format = util_format_linear(dst->format);
> -dst_templ->u.tex.level = dstlevel;
> -dst_templ->u.tex.first_layer = dstz;
> -dst_templ->u.tex.last_layer = dstz;
> +   memset(dst_templ, 0, sizeof(*dst_templ));
> +   dst_templ->format = util_format_linear(dst->format);
> +   dst_templ->u.tex.level = dstlevel;
> +   dst_templ->u.tex.first_layer = dstz;
> +   dst_templ->u.tex.last_layer = dstz;
>  }
>
>  static struct pipe_surface *
> @@ -1368,19 +1368,19 @@ void util_blitter_default_src_texture(struct 
> pipe_sampler_view *src_templ,
>struct pipe_resource *src,
>unsigned srclevel)
>  {
> -memset(src_templ, 0, sizeof(*src_templ));
> -src_templ->target = src->target;
> -src_templ->format = util_format_linear(src->format);
> -src_templ->u.tex.first_level = srclevel;
> -src_templ->u.tex.last_level = srclevel;
> -src_templ->u.tex.first_layer = 0;
> -src_templ->u.tex.last_layer =
> -src->target == PIPE_TEXTURE_3D ? u_minify(src->depth0, srclevel) - 1
> -   : src->array_size - 1;
> -src_templ->swizzle_r = PIPE_SWIZZLE_X;
> -src_templ->swizzle_g = PIPE_SWIZZLE_Y;
> -src_templ->swizzle_b = PIPE_SWIZZLE_Z;
> -src_templ->swizzle_a = PIPE_SWIZZLE_W;
> +   memset(src_templ, 0, sizeof(*src_templ));
> +   src_templ->target = src->target;
> +   src_templ->format = util_format_linear(src->format);
> +   src_templ->u.tex.first_level = srclevel;
> +   src_templ->u.tex.last_level = srclevel;
> +   src_templ->u.tex.first_layer = 0;
> +   src_templ->u.tex.last_layer =
> +  src->target == PIPE_TEXTURE_3D ? u_minify(src->depth0, srclevel) - 1
> + : src->array_size - 1;
> +   src_templ->swizzle_r = PIPE_SWIZZLE_X;
> +   src_templ->swizzle_g = PIPE_SWIZZLE_Y;
> +   src_templ->swizzle_b = PIPE_SWIZZLE_Z;
> +   src_templ->swizzle_a = PIPE_SWIZZLE_W;
>  }
>
>  static boolean is_blit_generic_supported(struct blitter_context *blitter,
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] Coding style scripts (Was Re: [PATCH 1/2] gallium: replace [0-9]*.f with [0-9]*.0f)

2016-06-22 Thread Rob Clark
On Wed, Jun 22, 2016 at 8:25 AM, Emil Velikov  wrote:
> Hi All,
>
> Seems like we have a few people are keen on the idea of having some
> form of at least semi-automated way to handle coding style issues.
>
> Some options/ideas:
>  - Combine the emacs .dir-local.el + emacs -batch to do the checking:
> Pros: rules aren't duplicated in multiple places (like the second
> option). Cons: not everyone has emacs
>  - or, check-in a few (as needed) xa-indent style scripts based on indent.
> Pros: indent seems (imho) more widely spread. Cons: the style rules
> are duplicated.
>
> IMHO we don't have to 'enforce' one or the other throughout the tree.
> Having either one would be beneficial, but definitely not a
> requirement.
>
> Once we're happy with that, we could have a simple toplevel
> "check-all-style" script, which can be used by both developers and git
> hooks. And with time patchwork/other solution will be able to
> pre-emptively run these and provide feedback, at which time we'll
> toggle the git hooks to reject 'non-compliant' pushes (or even before
> the pw stuff is in place) ?
>
> There's a couple small catches
>  - I cannot convince emacs to honour .dir-locals.el in batch mode. Any takers 
> ?
>  - We might have a couple of initial "x: unify coding style" commits.
>
> Rob, seems like most (all?) of your mode lines in freedreno are busted
> (missing : after tab-width, typo(s) in tab-width). Considering there's
> a fdno .dir-locals.el are you ok with just nuking them all together ?

no problem

BR,
-R

> Thanks
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC] Coding style scripts (Was Re: [PATCH 1/2] gallium: replace [0-9]*.f with [0-9]*.0f)

2016-06-22 Thread Emil Velikov
Hi All,

Seems like we have a few people are keen on the idea of having some
form of at least semi-automated way to handle coding style issues.

Some options/ideas:
 - Combine the emacs .dir-local.el + emacs -batch to do the checking:
Pros: rules aren't duplicated in multiple places (like the second
option). Cons: not everyone has emacs
 - or, check-in a few (as needed) xa-indent style scripts based on indent.
Pros: indent seems (imho) more widely spread. Cons: the style rules
are duplicated.

IMHO we don't have to 'enforce' one or the other throughout the tree.
Having either one would be beneficial, but definitely not a
requirement.

Once we're happy with that, we could have a simple toplevel
"check-all-style" script, which can be used by both developers and git
hooks. And with time patchwork/other solution will be able to
pre-emptively run these and provide feedback, at which time we'll
toggle the git hooks to reject 'non-compliant' pushes (or even before
the pw stuff is in place) ?

There's a couple small catches
 - I cannot convince emacs to honour .dir-locals.el in batch mode. Any takers ?
 - We might have a couple of initial "x: unify coding style" commits.

Rob, seems like most (all?) of your mode lines in freedreno are busted
(missing : after tab-width, typo(s) in tab-width). Considering there's
a fdno .dir-locals.el are you ok with just nuking them all together ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] swr: automake: don't ship LLVM version specific generated sources

2016-06-22 Thread Emil Velikov
From: Emil Velikov 

Otherwise things will fail to build, if the builder is using another
version of LLVM.

v2: annotate all the dependencies of builder_gen.h
v3: clean the generated files as needed

Cc: "12.0" 
Cc: Tim Rowley 
Cc: Chuck Atkins 
Tested-by: Chuck Atkins  (v2)
Reported-by: Chuck Atkins 
Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/swr/Makefile.am | 48 +++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index d896154..30087be 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -52,8 +52,6 @@ BUILT_SOURCES = \
rasterizer/scripts/gen_knobs.cpp \
rasterizer/scripts/gen_knobs.h \
rasterizer/jitter/state_llvm.h \
-   rasterizer/jitter/builder_gen.h \
-   rasterizer/jitter/builder_gen.cpp \
rasterizer/jitter/builder_x86.h \
rasterizer/jitter/builder_x86.cpp
 
@@ -122,6 +120,34 @@ COMMON_LDFLAGS = \
$(NO_UNDEFINED) \
$(LLVM_LDFLAGS)
 
+
+# XXX: As we cannot use BUILT_SOURCES (the files will end up in the dist
+# tarball) just annotate the dependency directly.
+# As the single direct user of builder_gen.h is a header (builder.h) trace all
+# the translusive users (one that use the latter header).
+#
+# Note: one should really clean the includes a bit, according to Tim there's
+# only 4 users of the builder_gen methods/API.
+rasterizer/jitter/blend_jit.cpp: rasterizer/jitter/builder_gen.h
+rasterizer/jitter/builder.cpp: rasterizer/jitter/builder_gen.h
+rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/builder_gen.h
+rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/builder_gen.h
+rasterizer/jitter/builder_misc.cpp: rasterizer/jitter/builder_gen.h
+rasterizer/jitter/fetch_jit.cpp: rasterizer/jitter/builder_gen.h
+rasterizer/jitter/streamout_jit.cpp: rasterizer/jitter/builder_gen.h
+swr_shader.cpp: rasterizer/jitter/builder_gen.h
+
+CLEANFILES = \
+   rasterizer/jitter/builder_gen.h \
+   rasterizer/jitter/builder_gen.cpp
+
+# XXX: Due to the funky dependencies above, the buildder_x86.cpp file gets
+# generated (copied) into builddir when building from release tarball.
+# Add a temporary workaround to remove it, until the above issue is resolved.
+distclean-local:
+   ( test $(top_srcdir) != $(top_builddir) && \
+   rm $(builddir)/rasterizer/jitter/builder_x86.cpp ) || true
+
 lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
 
 libswrAVX_la_CXXFLAGS = \
@@ -132,6 +158,15 @@ libswrAVX_la_CXXFLAGS = \
 libswrAVX_la_SOURCES = \
$(COMMON_SOURCES)
 
+# XXX: Don't ship these generated sources for now, since they are specific
+# to the LLVM version they are generated from. Thus a release tarball
+# containing the said files, generated against eg. LLVM 3.8 will fail to build
+# on systems with other versions of LLVM eg. 3.7 or 3.6.
+# Move these back to BUILT_SOURCES once that is resolved.
+nodist_libswrAVX_la_SOURCES = \
+   rasterizer/jitter/builder_gen.h \
+   rasterizer/jitter/builder_gen.cpp
+
 libswrAVX_la_LIBADD = \
$(COMMON_LIBADD)
 
@@ -146,6 +181,15 @@ libswrAVX2_la_CXXFLAGS = \
 libswrAVX2_la_SOURCES = \
$(COMMON_SOURCES)
 
+# XXX: Don't ship these generated sources for now, since they are specific
+# to the LLVM version they are generated from. Thus a release tarball
+# containing the said files, generated against eg. LLVM 3.8 will fail to build
+# on systems with other versions of LLVM eg. 3.7 or 3.6.
+# Move these back to BUILT_SOURCES once that is resolved.
+nodist_libswrAVX2_la_SOURCES = \
+   rasterizer/jitter/builder_gen.h \
+   rasterizer/jitter/builder_gen.cpp
+
 libswrAVX2_la_LIBADD = \
$(COMMON_LIBADD)
 
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] clover: conditionally use MESA_GIT_SHA1

2016-06-22 Thread Emil Velikov
From: Emil Velikov 

Considering how hard/annoying it was for many peoples' workflow to
properly generate the macro, it will be demoted to conditionally
available with follow-up commits.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Vedran Miletić 
Cc: Francisco Jerez 
Signed-off-by: Emil Velikov 
---
 src/gallium/state_trackers/clover/api/device.cpp   | 6 +-
 src/gallium/state_trackers/clover/api/platform.cpp | 5 -
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index ed323e0..7ad01d9 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -302,7 +302,11 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
 
case CL_DEVICE_VERSION:
   buf.as_string() = "OpenCL 1.1 Mesa " PACKAGE_VERSION
-" (" MESA_GIT_SHA1 ")";
+#ifdef MESA_GIT_SHA1
+" (" MESA_GIT_SHA1 ")"
+
+#endif
+   ;
   break;
 
case CL_DEVICE_EXTENSIONS:
diff --git a/src/gallium/state_trackers/clover/api/platform.cpp 
b/src/gallium/state_trackers/clover/api/platform.cpp
index cdb8a99..b1b1fdf 100644
--- a/src/gallium/state_trackers/clover/api/platform.cpp
+++ b/src/gallium/state_trackers/clover/api/platform.cpp
@@ -59,7 +59,10 @@ clover::GetPlatformInfo(cl_platform_id d_platform, 
cl_platform_info param,
 
case CL_PLATFORM_VERSION:
   buf.as_string() = "OpenCL 1.1 Mesa " PACKAGE_VERSION
-" (" MESA_GIT_SHA1 ")";
+#ifdef MESA_GIT_SHA1
+" (" MESA_GIT_SHA1 ")"
+#endif
+;
   break;
 
case CL_PLATFORM_NAME:
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] automake: don't mandate git_sha1.h/MESA_GIT_SHA1

2016-06-22 Thread Emil Velikov
From: Emil Velikov 

It has proven subtle to get it right both form the build side POV (see
commit list below) and builders due to their varying workflows.

Furthermore it does not fully fulfil the reason why it was enforced -
to detect uniqueness between different builds, in order to distinguish
and invalidate Vulkan/GL caches.

With that having a much better solution (previous commit) we can drop
this solution.

This effectively reverts the following commits:
359d9dfec33 ("mesa: automake: add directory prefix for git_sha1.h")
2c424e00c39 ("mesa: automake: ensure that git_sha1.h.tmp has the right
attributes")
b7f7ec78435 ("mesa: automake: distclean git_sha1.h when building OOT")
8229fe68b5d ("automake: get in-tree `make distclean' working again.")

Cc: Timo Aaltonen 
Cc: Haixia Shi 
Cc: Jason Ekstrand 
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Emil Velikov 
---
Timo, with this in place you should be able to drop the following
incomplete hunk

https://anonscm.debian.org/cgit/pkg-xorg/lib/mesa.git/commit/debian?h=debian-experimental=5daf16c05dfd9d5eed4b1084119619a066a5f5f6
---
 src/Makefile.am | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/Makefile.am b/src/Makefile.am
index 32372da..b130f5b 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -25,15 +25,13 @@ git_sha1.h.tmp:
@# a gitlink file if $(top_srcdir) is a submodule checkout or a linked
@# worktree.
@# If we are building from a release tarball copy the bundled header.
+   @touch git_sha1.h.tmp
@if test -e $(top_srcdir)/.git; then \
if which git > /dev/null; then \
git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \
sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
> git_sha1.h.tmp ; \
fi \
-   else \
-   cp $(srcdir)/git_sha1.h git_sha1.h.tmp ;\
-   chmod u+w git_sha1.h.tmp; \
fi
 
 git_sha1.h: git_sha1.h.tmp
@@ -45,12 +43,7 @@ git_sha1.h: git_sha1.h.tmp
fi
 
 BUILT_SOURCES = git_sha1.h
-
-# We want to keep the srcdir file since we need it on rebuild from tarball.
-# At the same time `make distclean' gets angry at us if we don't cleanup the
-# builddir one.
-distclean-local:
-   ( test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h ) 
|| true
+CLEANFILES = $(BUILT_SOURCES)
 
 SUBDIRS = . gtest util mapi/glapi/gen mapi
 
@@ -100,7 +93,7 @@ SUBDIRS += gallium
 endif
 
 EXTRA_DIST = \
-   getopt hgl SConscript git_sha1.h
+   getopt hgl SConscript
 
 AM_CFLAGS = $(VISIBILITY_CFLAGS)
 AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] anv: use cache uuid based on the build timestamp.

2016-06-22 Thread Emil Velikov
From: Emil Velikov 

Do not rely on the git sha1:
 - its current truncated form makes it less unique
 - it does not attribute for local (Vulkand or otherwise) changes

Use a timestamp produced at the time of build. It's perfectly unique,
unless someone explicitly thinkers with their system clock. Even then
chances of producing the exact same one are very small, if not zero.

Cc: Jason Ekstrand 
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Emil Velikov 
---
Current approach uses seconds since Epoch, but if people prefer we
can use nano seconds, combination of the two and/or other.
---
 src/intel/vulkan/.gitignore   |  2 ++
 src/intel/vulkan/Makefile.am  | 15 ++-
 src/intel/vulkan/anv_device.c |  4 ++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore
index a496146..7ef6a48 100644
--- a/src/intel/vulkan/.gitignore
+++ b/src/intel/vulkan/.gitignore
@@ -2,3 +2,5 @@
 /anv_entrypoints.c
 /anv_entrypoints.h
 /dev_icd.json
+/anv_timestamp.h.tmp
+/anv_timestamp.h
diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 4d9ff90..8332ae5 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -131,7 +131,20 @@ anv_entrypoints.c : anv_entrypoints_gen.py 
$(vulkan_include_HEADERS)
$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
$(PYTHON2) $(srcdir)/anv_entrypoints_gen.py code > $@
 
-BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
+
+.PHONY: anv_timestamp.h.tmp
+anv_timestamp.h.tmp:
+   $(AM_V_GEN) echo "#define ANV_TIMESTAMP `date +%s`" > $@
+
+anv_timestamp.h: anv_timestamp.h.tmp
+   @echo "Updating anv_timestamp.h"
+   @if ! cmp -s anv_timestamp.h.tmp anv_timestamp.h; then \
+   mv anv_timestamp.h.tmp anv_timestamp.h ;\
+   else \
+   rm anv_timestamp.h.tmp ;\
+   fi
+
+BUILT_SOURCES = $(VULKAN_GENERATED_FILES) anv_timestamp.h
 CLEANFILES = $(BUILT_SOURCES) dev_icd.json
 EXTRA_DIST = \
$(top_srcdir)/include/vulkan/vk_icd.h \
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 97300c3..e395b1c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -28,7 +28,7 @@
 #include 
 
 #include "anv_private.h"
-#include "git_sha1.h"
+#include "anv_timestamp.h"
 #include "util/strtod.h"
 #include "util/debug.h"
 
@@ -426,7 +426,7 @@ void
 anv_device_get_cache_uuid(void *uuid)
 {
memset(uuid, 0, VK_UUID_SIZE);
-   snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
+   snprintf(uuid, VK_UUID_SIZE, "anv-%s", ANV_TIMESTAMP);
 }
 
 void anv_GetPhysicalDeviceProperties(
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [GSoC lib soft float] Blog update

2016-06-22 Thread tournier.elie
2016-06-16 1:32 GMT+02:00 Rhys Kidd :

> On Monday, June 13, 2016, tournier.elie  wrote:
>
>> Hi,
>>
>> After a few weeks of coding, I wrote a new post on my blog (
>> https://hopetech.github.io/). I speak about the progress of my project,
>> the difficulties encountered and the tools I use.
>>
>> Happy reading,
>> Elie
>>
>
> Hello Elie,
>
> Thanks for the short write up on progress so far.
>
> Whilst not going to the technical elements, I encourage you to continue
> with your project and look forward to reviewing patches.
>
> Perhaps you could also consider how the documentation might be improved in
> the areas you found lacking?
>

I don't think that doxygen documentation is necessary like in Mesa.
Add a *.shader_test template in root/exemples/ explaining how to use
shader_runner seems a good idea to me.

>
> Rhys
>

Elie
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4] st/vdpau: use bicubic filter for scaling

2016-06-22 Thread Nayan Deshmukh
Thanks for the review.

On Wed, Jun 22, 2016 at 1:17 PM, Christian König 
wrote:

> Am 21.06.2016 um 21:21 schrieb Nayan Deshmukh:
>
>> use bicubic filtering as high quality scaling L1.
>>
>> v2: fix a typo and add a newline to code
>>
>> v3: -render the unscaled image on a temporary surface (Christian)
>>  -apply noise reduction and sharpness filter on
>>   unscaled surface
>>  -render the final scaled surface using bicubic
>>   interpolation
>>
>> v4: support high quality scaling
>>
>> Signed-off-by: Nayan Deshmukh 
>> ---
>>   src/gallium/state_trackers/vdpau/mixer.c | 109
>> ---
>>   src/gallium/state_trackers/vdpau/query.c |   1 +
>>   src/gallium/state_trackers/vdpau/vdpau_private.h |   6 ++
>>   3 files changed, 102 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/gallium/state_trackers/vdpau/mixer.c
>> b/src/gallium/state_trackers/vdpau/mixer.c
>> index 65c3ce2..2a67ac2 100644
>> --- a/src/gallium/state_trackers/vdpau/mixer.c
>> +++ b/src/gallium/state_trackers/vdpau/mixer.c
>> @@ -82,7 +82,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
>> switch (features[i]) {
>> /* they are valid, but we doesn't support them */
>> case VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL:
>> -  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1:
>> case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L2:
>> case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L3:
>> case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L4:
>> @@ -110,6 +109,9 @@ vlVdpVideoMixerCreate(VdpDevice device,
>>vmixer->luma_key.supported = true;
>>break;
>>   +  case VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1:
>> + vmixer->bicubic.supported = true;
>> + break;
>> default: goto no_params;
>> }
>>  }
>> @@ -202,6 +204,11 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
>> vl_matrix_filter_cleanup(vmixer->sharpness.filter);
>> FREE(vmixer->sharpness.filter);
>>  }
>> +
>> +   if (vmixer->bicubic.filter) {
>> +  vl_bicubic_filter_cleanup(vmixer->bicubic.filter);
>> +  FREE(vmixer->bicubic.filter);
>> +   }
>>  pipe_mutex_unlock(vmixer->device->mutex);
>>  DeviceReference(>device, NULL);
>>   @@ -230,9 +237,11 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer
>> mixer,
>>   VdpLayer const *layers)
>>   {
>>  enum vl_compositor_deinterlace deinterlace;
>> -   struct u_rect rect, clip, *prect;
>> +   struct u_rect rect, clip, *prect, *rect_temp, dirty_area, temp;
>>  unsigned i, layer = 0;
>>  struct pipe_video_buffer *video_buffer;
>> +   struct pipe_sampler_view *sampler_view;
>> +   struct pipe_surface *surface;
>>vlVdpVideoMixer *vmixer;
>>  vlVdpSurface *surf;
>> @@ -324,8 +333,48 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
>> rect.y1 = surf->templat.height;
>> prect = 
>>  }
>> +
>> +   if(vmixer->bicubic.filter){
>> +  struct pipe_context *pipe;
>> +  struct pipe_resource res_tmpl, *res;
>> +  struct pipe_sampler_view sv_templ;
>> +  struct pipe_surface surf_templ;
>> +
>> +  pipe = vmixer->device->context;
>> +  memset(_tmpl, 0, sizeof(res_tmpl));
>> +
>> +  res_tmpl.target = PIPE_TEXTURE_2D;
>> +  res_tmpl.format = surf->templat.chroma_format;
>>
>
> That is incorrect. The resource format isn't related in any way to the
> chroma format. Please use the format of the destination surface here.
>
> I should probably use res_tmpl = dst->sampler_view->texture->format; Right?


> +  res_tmpl.width0 = surf->templat.width;
>> +  res_tmpl.height0 = surf->templat.height;
>> +  res_tmpl.depth0 = 1;
>> +  res_tmpl.array_size = 1;
>> +  res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
>> + PIPE_BIND_LINEAR | PIPE_BIND_SHARED;
>>
>
> No need for PIPE_BIND_LINEAR or PIPE_BIND_SHARED here, we are not going to
> share the temporary texture with anybody.
>
> +  res_tmpl.usage = PIPE_USAGE_DEFAULT;
>> +
>> +  res = pipe->screen->resource_create(pipe->screen, _tmpl);
>> +
>> +  vlVdpDefaultSamplerViewTemplate(_templ, res);
>> +  sampler_view = pipe->create_sampler_view(pipe, res, _templ);
>> +
>> +  memset(_templ, 0, sizeof(surf_templ));
>> +  surf_templ.format = res->format;
>> +  surface = pipe->create_surface(pipe, res, _templ);
>> +
>> +  vl_compositor_reset_dirty_area(_area);
>> +  rect_temp = prect;
>>
>
> You need to free the resource with pipe_resource_reference(, NULL);
> here, otherwise you will create quite a memory leak.
>
> Same thing is true for the surface and the sampler view, but you need
> those and can only free them later on. Easiest way to do this is probably
> to grab an extra reference in the else case as well.
>
> I can also check in the end if (surface != dst->surface) 

[Mesa-dev] [Bug 96634] Mesa or libGL very slow rendering with kabini vga card

2016-06-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=96634

Bug ID: 96634
   Summary: Mesa or libGL very slow rendering with kabini vga card
   Product: Mesa
   Version: 11.2
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: sounizan-n...@yahoo.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 124663
  --> https://bugs.freedesktop.org/attachment.cgi?id=124663=edit
dmesg and Xorg log

Hi

I have a Compulab FitPC4 computer with AMD G-Series Jaguar @2.0GHz CPU and
Radeon HD 8400E GPU and latest Void Linux distro. I have noticed that rendering
speed of Cairo is very much slower on this machine than on my laptop, of
similar specs but different hardware. I tried various Linux distros including
Debian, Arch and Mint, but all with the same problem. Then I tried a Puppy
Linux LiveCD and surprisingly Cairo rendering was a lot faster. 

Eventually I found that the reason is Puppy Linux does not include KABINI
firmware blobs so I tried removing these from my Void Linux installation and
running it without them. The result was that the linux console went blank but I
was able to log in blind and start my Xorg desktop. Sure enough, Cairo
rendering became a lot faster. Unfortunately since the console goes blank, I
had to reinstate the KABINI firmware and the slow down returned. The driver I
use is the xf86-video-amdgpu.

I felt I should persist and eventually found that if I install the
catalyst-libs-15.12 or nvidia-libs-364.19, which replace libGL, then Cairo
rendering again becomes much faster. I therefore suspect that the mesa/libGL
combination slows down Cairo rendering, if KABINI firmware blobs are loaded by
the system. And with the slow down comes a heavy increase in processor
workload, which probably means that hardware acceleration is disabled when
KABINI blobs are loaded. I attach dmesg and xorg log.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/mesa: stop duplicating tess layout values

2016-06-22 Thread Timothy Arceri
On Wed, 2016-06-22 at 09:30 +0200, Iago Toral wrote:
> On Wed, 2016-06-22 at 09:54 +1000, Timothy Arceri wrote:
> > 
> > On Tue, 2016-06-21 at 17:45 +0200, Iago Toral wrote:
> > > 
> > > On Tue, 2016-06-21 at 12:21 +1000, Timothy Arceri wrote:
> > > > 
> > > > 
> > > > We already store this in gl_shader and gl_program here we
> > > > remove it from gl_shader_program and just use the values
> > > > from gl_shader.
> > > > 
> > > > This will allow us to keep the shader cache restore code as
> > > > simple as it can be while making it somewhat clearer where these
> > > > values originate from.
> > > This sounds like a good idea, but the issue I see is that other
> > > stages
> > > seem to follow the same pattern and here you only change one stage...
> > > If
> > > we are not going to do this change for all stages to keep some
> > > consistency I am not so sure that this is actually such a good idea,
> > > it
> > > may actually bring more confusion.
> > Fair enough point. I only did this stage because its the only one which
> > actually use the values from gl_program after linking is done (at least
> > on i965).
> > 
> > I'll do some tidy ups for the other stages too.
> Aha, ok, then it is a bit special after all. In that case maybe the it
> is fine as it is, dunno. If you think doing something similar for other
> stages helps something then go ahead, otherwise I guess we can keep it
> as is.

I already did it :) https://patchwork.freedesktop.org/patch/94481/

> 
> > 
> > > 
> > > 
> > > I have a few minor comments below, with those fixed you can add my Rb
> > > to
> > > this patch,
> > I've commented below I don't think any changes are needed :)
> I see, ok!
> 
> > 
> > > 
> > >  but I think you should at least try to get someone else to
> > > give an explicit ok for changing this only for Tess before pushing.
> > I'll send cleanups for the other stage too.
> > 
> > > 
> > > 
> > > > 
> > > > 
> > > > ---
> > > >  src/compiler/glsl/linker.cpp |  4 
> > > >  src/mesa/main/api_validate.c | 11 ++-
> > > >  src/mesa/main/mtypes.h   |  7 ---
> > > >  src/mesa/main/shaderapi.c| 35 +++-
> > > > ---
> > > >  4 files changed, 29 insertions(+), 28 deletions(-)
> > > > 
> > > > diff --git a/src/compiler/glsl/linker.cpp
> > > > b/src/compiler/glsl/linker.cpp
> > > > index 9c6147b..ec71bfe 100644
> > > > --- a/src/compiler/glsl/linker.cpp
> > > > +++ b/src/compiler/glsl/linker.cpp
> > > > @@ -1890,19 +1890,15 @@ link_tes_in_layout_qualifiers(struct
> > > > gl_shader_program *prog,
> > > >        "primitive modes.\n");
> > > >    return;
> > > > }
> > > > -   prog->TessEval.PrimitiveMode = linked_shader-
> > > > > 
> > > > > TessEval.PrimitiveMode;
> > > >  
> > > > if (linked_shader->TessEval.Spacing == 0)
> > > >    linked_shader->TessEval.Spacing = GL_EQUAL;
> > > > -   prog->TessEval.Spacing = linked_shader->TessEval.Spacing;
> > > >  
> > > > if (linked_shader->TessEval.VertexOrder == 0)
> > > >    linked_shader->TessEval.VertexOrder = GL_CCW;
> > > > -   prog->TessEval.VertexOrder = linked_shader-
> > > > > 
> > > > > TessEval.VertexOrder;
> > > >  
> > > > if (linked_shader->TessEval.PointMode == -1)
> > > >    linked_shader->TessEval.PointMode = GL_FALSE;
> > > > -   prog->TessEval.PointMode = linked_shader->TessEval.PointMode;
> > > >  }
> > > >  
> > > > 
> > > > diff --git a/src/mesa/main/api_validate.c
> > > > b/src/mesa/main/api_validate.c
> > > > index c7625c3..634040f 100644
> > > > --- a/src/mesa/main/api_validate.c
> > > > +++ b/src/mesa/main/api_validate.c
> > > > @@ -206,9 +206,10 @@ _mesa_valid_prim_mode(struct gl_context *ctx,
> > > > GLenum mode, const char *name)
> > > >    GLenum mode_before_gs = mode;
> > > >  
> > > >    if (tes) {
> > > Shouldn't we also do:
> > > 
> > > if (tes->_LinkedShaders[MESA_SHADER_TESS_EVAL]) instead of the line
> > > above
> > >  and remove the 'tes' variable? It looks like we only use it here
> > > after this change.
> > No I don't think so. We need to null check tes aka ctx->_Shader-
> > > 
> > > CurrentProgram[MESA_SHADER_TESS_EVAL] as the existing code does. We
> > then get the shader from the gl_shader_program struct.
> > 
> > ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]-
> > > 
> > > _LinkedShaders[MESA_SHADER_TESS_EVAL]
> > _LinkedShaders[MESA_SHADER_TESS_EVAL] should never be null in this case
> > unless something has gone wrong elsewhere.
> > 
> > > 
> > > 
> > > > 
> > > > 
> > > > - if (tes->TessEval.PointMode)
> > > > + struct gl_shader *tes_sh = tes-
> > > > > 
> > > > > _LinkedShaders[MESA_SHADER_TESS_EVAL];
> > > > + if (tes_sh->TessEval.PointMode)
> > > >  mode_before_gs = GL_POINTS;
> > > > - else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
> > > > + else if (tes_sh->TessEval.PrimitiveMode == GL_ISOLINES)
> > > >  mode_before_gs = GL_LINES;
> > > >   else
> > > >   

  1   2   >