date:20180501

[Mesa-dev] [PATCH 3/3] i965: Reuse batch decoder infrastructure rather than open coding it.

2018-05-01 Thread Kenneth Graunke

With the new callback, Jason's newer batch decoder infrastructure
should be able to do just as well as the old open coded INTEL_DEBUG=bat
handling, with much less code.  If there are any limitations, we'd like
to improve the common code rather than doing one-off hacks here.
---
 src/mesa/drivers/dri/i965/brw_context.h   |   3 +
 src/mesa/drivers/dri/i965/brw_state.h |   1 -
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 274 --
 3 files changed, 55 insertions(+), 223 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 1e6a45eee1f..4a01ca5d8ab 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -48,6 +48,7 @@
 #include 
 
 #include "common/gen_debug.h"
+#include "common/gen_decoder.h"
 #include "intel_screen.h"
 #include "intel_tex_obj.h"
 
@@ -524,6 +525,8 @@ struct intel_batchbuffer {
 
/** Map from batch offset to brw_state_batch data (with DEBUG_BATCH) */
struct hash_table *state_batch_sizes;
+
+   struct gen_batch_decode_ctx decoder;
 };
 
 #define BRW_MAX_XFB_STREAMS 4
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 9acb6257401..0417cc2aae0 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -188,7 +188,6 @@ void brw_print_program_cache(struct brw_context *brw);
 void brw_require_statebuffer_space(struct brw_context *brw, int size);
 void *brw_state_batch(struct brw_context *brw,
   int size, int alignment, uint32_t *out_offset);
-uint32_t brw_state_batch_size(struct brw_context *brw, uint32_t offset);
 
 /* brw_wm_surface_state.c */
 uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index d745c2a3113..f966b05c01a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -77,6 +77,40 @@ dump_validation_list(struct intel_batchbuffer *batch)
}
 }
 
+static struct gen_batch_decode_bo
+decode_get_bo(void *v_brw, uint64_t address)
+{
+   struct brw_context *brw = v_brw;
+   struct intel_batchbuffer *batch = >batch;
+
+   for (int i = 0; i < batch->exec_count; i++) {
+  struct brw_bo *bo = batch->exec_bos[i];
+  /* The decoder zeroes out the top 16 bits, so we need to as well */
+  uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);
+
+  if (address >= bo_address && address < bo_address + bo->size) {
+ return (struct gen_batch_decode_bo) {
+.addr = address,
+.size = bo->size,
+.map = brw_bo_map(brw, bo, MAP_READ) + (address - bo_address),
+ };
+  }
+   }
+
+   return (struct gen_batch_decode_bo) { };
+}
+
+static unsigned
+decode_get_state_size(void *v_brw, uint32_t offset_from_dsba)
+{
+   struct brw_context *brw = v_brw;
+   struct intel_batchbuffer *batch = >batch;
+   struct hash_entry *entry =
+  _mesa_hash_table_search(batch->state_batch_sizes,
+  (void *) (uintptr_t) offset_from_dsba);
+   return entry ? (uintptr_t) entry->data : 0;
+}
+
 static bool
 uint_key_compare(const void *a, const void *b)
 {
@@ -126,6 +160,16 @@ intel_batchbuffer_init(struct brw_context *brw)
if (INTEL_DEBUG & DEBUG_BATCH) {
   batch->state_batch_sizes =
  _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
+
+  const unsigned decode_flags =
+ GEN_BATCH_DECODE_FULL |
+ ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
+ GEN_BATCH_DECODE_OFFSETS |
+ GEN_BATCH_DECODE_FLOATS;
+
+  gen_batch_decode_ctx_init(>decoder, devinfo, stderr,
+decode_flags, NULL, decode_get_bo,
+decode_get_state_size, brw);
}
 
batch->use_batch_first =
@@ -287,8 +331,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
brw_bo_unreference(batch->last_bo);
brw_bo_unreference(batch->batch.bo);
brw_bo_unreference(batch->state.bo);
-   if (batch->state_batch_sizes)
+   if (batch->state_batch_sizes) {
   _mesa_hash_table_destroy(batch->state_batch_sizes, NULL);
+  gen_batch_decode_ctx_finish(>decoder);
+   }
 }
 
 /**
@@ -487,215 +533,6 @@ intel_batchbuffer_require_space(struct brw_context *brw, 
GLuint sz,
brw->batch.ring = ring;
 }
 
-#ifdef DEBUG
-#define CSI "\e["
-#define BLUE_HEADER  CSI "0;44m"
-#define NORMAL   CSI "0m"
-
-
-static void
-decode_struct(struct brw_context *brw, struct gen_spec *spec,
-  const char *struct_name, uint32_t *data,
-  uint32_t gtt_offset, uint32_t offset, bool color)
-{
-   struct gen_group *group = gen_spec_find_struct(spec, struct_name);
-   if (!group)
-  return;
-
-   fprintf(stderr, "%s\n", struct_name);
-   gen_print_group(stderr, group, gtt_offset + offset,
-

[Mesa-dev] [PATCH 1/3] intel: Move batch decoder/disassembler from tools/ to common/

2018-05-01 Thread Kenneth Graunke

Making these part of libintel_common allows us to use them in the DRI
driver.  The standalone tool binaries already link against the common
library, too, so it's no harder for them.
---
 src/intel/Makefile.sources| 3 +++
 src/intel/Makefile.tools.am   | 8 +---
 src/intel/{tools => common}/gen_batch_decoder.c   | 0
 src/intel/{tools/disasm.c => common/gen_disasm.c} | 0
 src/intel/{tools => common}/gen_disasm.h  | 0
 src/intel/common/meson.build  | 3 +++
 src/intel/tools/meson.build   | 6 ++
 7 files changed, 9 insertions(+), 11 deletions(-)
 rename src/intel/{tools => common}/gen_batch_decoder.c (100%)
 rename src/intel/{tools/disasm.c => common/gen_disasm.c} (100%)
 rename src/intel/{tools => common}/gen_disasm.h (100%)

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index 91c71a8dfaf..1adf6f990c6 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -9,10 +9,13 @@ BLORP_FILES = \
 
 COMMON_FILES = \
common/gen_clflush.h \
+   common/gen_batch_decoder.c \
common/gen_debug.c \
common/gen_debug.h \
common/gen_decoder.c \
common/gen_decoder.h \
+   common/gen_disasm.c \
+   common/gen_disasm.h \
common/gen_defines.h \
common/gen_l3_config.c \
common/gen_l3_config.h \
diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index a8685c24e1c..b00cc8cc2cb 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -25,9 +25,6 @@ noinst_PROGRAMS += \
 
 tools_aubinator_SOURCES = \
tools/aubinator.c \
-   tools/disasm.c \
-   tools/gen_batch_decoder.c \
-   tools/gen_disasm.h \
tools/intel_aub.h
 
 tools_aubinator_CFLAGS = \
@@ -48,10 +45,7 @@ tools_aubinator_LDADD = \
 
 
 tools_aubinator_error_decode_SOURCES = \
-   tools/aubinator_error_decode.c \
-   tools/disasm.c \
-   tools/gen_batch_decoder.c \
-   tools/gen_disasm.h
+   tools/aubinator_error_decode.c
 
 tools_aubinator_error_decode_LDADD = \
common/libintel_common.la \
diff --git a/src/intel/tools/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
similarity index 100%
rename from src/intel/tools/gen_batch_decoder.c
rename to src/intel/common/gen_batch_decoder.c
diff --git a/src/intel/tools/disasm.c b/src/intel/common/gen_disasm.c
similarity index 100%
rename from src/intel/tools/disasm.c
rename to src/intel/common/gen_disasm.c
diff --git a/src/intel/tools/gen_disasm.h b/src/intel/common/gen_disasm.h
similarity index 100%
rename from src/intel/tools/gen_disasm.h
rename to src/intel/common/gen_disasm.h
diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build
index 5e0394a5b86..ebf69c05370 100644
--- a/src/intel/common/meson.build
+++ b/src/intel/common/meson.build
@@ -22,10 +22,13 @@
 
 files_libintel_common = files(
   'gen_clflush.h',
+  'gen_batch_decoder.c',
   'gen_debug.c',
   'gen_debug.h',
   'gen_decoder.c',
   'gen_decoder.h',
+  'gen_disasm.c',
+  'gen_disasm.h',
   'gen_l3_config.c',
   'gen_l3_config.h',
   'gen_urb_config.c',
diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build
index e19de5af882..0be530546c3 100644
--- a/src/intel/tools/meson.build
+++ b/src/intel/tools/meson.build
@@ -20,8 +20,7 @@
 
 aubinator = executable(
   'aubinator',
-  files('aubinator.c', 'disasm.c', 'gen_batch_decoder.c',
-'gen_disasm.h', 'intel_aub.h'),
+  files('aubinator.c', 'intel_aub.h'),
   dependencies : [dep_expat, dep_zlib, dep_dl, dep_thread, dep_m],
   include_directories : [inc_common, inc_intel],
   link_with : [libintel_common, libintel_compiler, libintel_dev, libmesa_util],
@@ -32,8 +31,7 @@ aubinator = executable(
 
 aubinator_error_decode = executable(
   'aubinator_error_decode',
-  files('aubinator_error_decode.c', 'disasm.c', 'gen_disasm.h',
-'gen_batch_decoder.c'),
+  files('aubinator_error_decode.c'),
   dependencies : [dep_zlib, dep_thread],
   include_directories : [inc_common, inc_intel],
   link_with : [libintel_common, libintel_compiler, libintel_dev, libmesa_util],
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] intel: Give the batch decoder a callback to ask about state size.

2018-05-01 Thread Kenneth Graunke

Given an arbitrary batch, we don't always know what the size of certain
things are, such as how many entries are in a binding table.  But it's
easy for the driver to track that information, so with a simple callback
we can calculate this correctly for INTEL_DEBUG=bat.
---
 src/intel/common/gen_batch_decoder.c | 23 +++
 src/intel/common/gen_decoder.h   |  4 
 src/intel/tools/aubinator.c  |  2 +-
 src/intel/tools/aubinator_error_decode.c |  2 +-
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/intel/common/gen_batch_decoder.c 
b/src/intel/common/gen_batch_decoder.c
index c6b908758b2..37eac1ab2a1 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -33,11 +33,13 @@ gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx,
   const char *xml_path,
   struct gen_batch_decode_bo (*get_bo)(void *,
uint64_t),
+  unsigned (*get_state_size)(void *, uint32_t),
   void *user_data)
 {
memset(ctx, 0, sizeof(*ctx));
 
ctx->get_bo = get_bo;
+   ctx->get_state_size = get_state_size;
ctx->user_data = user_data;
ctx->fp = fp;
ctx->flags = flags;
@@ -103,6 +105,21 @@ ctx_get_bo(struct gen_batch_decode_ctx *ctx, uint64_t addr)
return bo;
 }
 
+static int
+update_count(struct gen_batch_decode_ctx *ctx,
+ uint32_t offset_from_dsba,
+ unsigned element_dwords,
+ unsigned guess)
+{
+   unsigned size = ctx->get_state_size(ctx->user_data, offset_from_dsba);
+
+   if (size > 0)
+  return size / (sizeof(uint32_t) * element_dwords);
+
+   /* In the absence of any information, just guess arbitrarily. */
+   return guess;
+}
+
 static void
 ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
 uint32_t ksp, const char *type)
@@ -196,9 +213,8 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, 
uint32_t offset, int count)
   return;
}
 
-   /* If we don't know the actual count, guess. */
if (count < 0)
-  count = 8;
+  count = update_count(ctx, offset, 1, 8);
 
if (ctx->surface_base.map == NULL) {
   fprintf(ctx->fp, "  binding table unavailable\n");
@@ -233,9 +249,8 @@ dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t 
offset, int count)
 {
struct gen_group *strct = gen_spec_find_struct(ctx->spec, "SAMPLER_STATE");
 
-   /* If we don't know the actual count, guess. */
if (count < 0)
-  count = 4;
+  count = update_count(ctx, offset, strct->dw_length, 4);
 
if (ctx->dynamic_base.map == NULL) {
   fprintf(ctx->fp, "  samplers unavailable\n");
diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index f28ac7d27af..fc567456624 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -207,6 +207,8 @@ struct gen_disasm *disasm;
 struct gen_batch_decode_ctx {
struct gen_batch_decode_bo (*get_bo)(void *user_data,
 uint64_t base_address);
+   unsigned (*get_state_size)(void *user_data,
+  uint32_t offset_from_dynamic_state_base_addr);
void *user_data;
 
FILE *fp;
@@ -226,6 +228,8 @@ void gen_batch_decode_ctx_init(struct gen_batch_decode_ctx 
*ctx,
const char *xml_path,
struct gen_batch_decode_bo (*get_bo)(void *,
 uint64_t),
+
+   unsigned (*get_state_size)(void *, uint32_t),
void *user_data);
 void gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx *ctx);
 
diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 2a72efa8a2c..ab053c66b36 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -178,7 +178,7 @@ aubinator_init(uint16_t aub_pci_id, const char *app_name)
batch_flags |= GEN_BATCH_DECODE_FLOATS;
 
gen_batch_decode_ctx_init(_ctx, , outfile, batch_flags,
- xml_path, get_gen_batch_bo, NULL);
+ xml_path, get_gen_batch_bo, NULL, NULL);
 
char *color = GREEN_HEADER, *reset_color = NORMAL;
if (option_color == COLOR_NEVER)
diff --git a/src/intel/tools/aubinator_error_decode.c 
b/src/intel/tools/aubinator_error_decode.c
index 0234c59371d..2735bd72714 100644
--- a/src/intel/tools/aubinator_error_decode.c
+++ b/src/intel/tools/aubinator_error_decode.c
@@ -595,7 +595,7 @@ read_data_file(FILE *file)
 
struct gen_batch_decode_ctx batch_ctx;
gen_batch_decode_ctx_init(_ctx, , stdout, batch_flags,
- xml_path, get_gen_batch_bo, NULL);
+ xml_path, get_gen_batch_bo, NULL, NULL);
 
 
for (int s = 0; s < sect_num; s++) {
-- 
2.17.0

Re: [Mesa-dev] [PATCH v2 08/14] mesa: handle OES_texture_half_float formats in _mesa_base_tex_format()

2018-05-01 Thread Tapani Pälli


Hi;

On 05/01/2018 05:48 PM, Christian Gmeiner wrote:

Signed-off-by: Christian Gmeiner 
Reviewed-by: Wladimir J. van der Laan 
---
  src/mesa/main/glformats.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index cba5e670db..1d3d524875 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2424,6 +2424,25 @@ _mesa_base_tex_format(const struct gl_context *ctx, 
GLint internalFormat)
   return GL_YCBCR_MESA;
 }
  
+   if (ctx->Extensions.OES_texture_half_float) {


this could be

if ((ctx->Extensions.ARB_texture_float) ||
  ctx->Extensions.OES_texture_half_float))


+   switch (internalFormat) {
+   case GL_ALPHA16F_ARB:
+  return GL_ALPHA;
+   case GL_RGBA16F_ARB:
+  return GL_RGBA;
+   case GL_RGB16F_ARB:
+  return GL_RGB;
+   case GL_INTENSITY16F_ARB:
+  return GL_INTENSITY;
+   case GL_LUMINANCE16F_ARB:
+  return GL_LUMINANCE;
+   case GL_LUMINANCE_ALPHA16F_ARB:
+  return GL_LUMINANCE_ALPHA;
+   default:
+  ; /* fallthrough */
+   }
+   }
+


It seems like we miss OES_texture_float as well .. with the above change 
(separation of half float formats from full float ones) we could have a 
OES_texture_float || ARB_texture_float check for the rest?




 if (ctx->Extensions.ARB_texture_float) {
switch (internalFormat) {
case GL_ALPHA16F_ARB:



// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 29613] OSMesa and GL cannot be linked together

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=29613

Matt Turner  changed:

   What|Removed |Added

 CC|matts...@gmail.com  |

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 106337] eglWaitClient() doesn't work as documented using DRI2 backend

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=106337

--- Comment #1 from Tapani Pälli  ---
(In reply to mgorc...@qnx.com from comment #0)
> According to EGL 1.4 specification eglWaitClient() should be equivalent of
> glFinish() call, but according to the function code of dri2_wait_client() it
> does just flush() without waiting for any pending operations on drawable
> surface.

Do you see issues with some particular driver? The flush() implementation
should take care that pending operations are done.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/9] radeonsi: add EQAA SC, DB, CB register programming

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.h  |  5 +-
 src/gallium/drivers/radeonsi/si_state.c | 74 +++--
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 241385baed7..55a135f3870 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -584,20 +584,21 @@ struct si_images {
 
 struct si_framebuffer {
struct pipe_framebuffer_state   state;
unsignedcolorbuf_enabled_4bit;
unsignedspi_shader_col_format;
unsignedspi_shader_col_format_alpha;
unsignedspi_shader_col_format_blend;
unsignedspi_shader_col_format_blend_alpha;
ubyte   nr_samples:5; /* at most 16xAA */
ubyte   log_samples:3; /* at most 4 = 16xAA */
+   ubyte   nr_color_samples; /* at most 8xAA */
ubyte   compressed_cb_mask;
ubyte   uncompressed_cb_mask;
ubyte   color_is_int8;
ubyte   color_is_int10;
ubyte   dirty_cbufs;
booldirty_zsbuf;
boolany_dst_linear;
boolCB_has_shader_readable_metadata;
boolDB_has_shader_readable_metadata;
 };
@@ -1467,23 +1468,23 @@ si_htile_enabled(struct r600_texture *tex, unsigned 
level)
 static inline bool
 vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
 {
assert(!tex->tc_compatible_htile || tex->htile_offset);
return tex->tc_compatible_htile && level == 0;
 }
 
 static inline unsigned si_get_ps_iter_samples(struct si_context *sctx)
 {
if (sctx->ps_uses_fbfetch)
-   return sctx->framebuffer.nr_samples;
+   return sctx->framebuffer.nr_color_samples;
 
-   return sctx->ps_iter_samples;
+   return MIN2(sctx->ps_iter_samples, sctx->framebuffer.nr_color_samples);
 }
 
 static inline unsigned si_get_total_colormask(struct si_context *sctx)
 {
if (sctx->queued.named.rasterizer->rasterizer_discard)
return 0;
 
struct si_shader_selector *ps = sctx->ps_shader.cso;
if (!ps)
return 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 3f9332081bf..fce796f7543 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2403,23 +2403,24 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
format != V_028C70_COLOR_24_8) |
S_028C70_NUMBER_TYPE(ntype) |
S_028C70_ENDIAN(endian);
 
/* Intensity is implemented as Red, so treat it that way. */
color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == 
PIPE_SWIZZLE_1 ||
  
util_format_is_intensity(surf->base.format));
 
if (rtex->buffer.b.b.nr_samples > 1) {
unsigned log_samples = 
util_logbase2(rtex->buffer.b.b.nr_samples);
+   unsigned log_fragments = util_logbase2(rtex->num_color_samples);
 
color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
-   S_028C74_NUM_FRAGMENTS(log_samples);
+   S_028C74_NUM_FRAGMENTS(log_fragments);
 
if (rtex->surface.fmask_size) {
color_info |= S_028C70_COMPRESSION(1);
unsigned fmask_bankh = 
util_logbase2(rtex->surface.u.legacy.fmask.bankh);
 
if (sctx->chip_class == SI) {
/* due to a hw bug, FMASK_BANK_HEIGHT must be 
set on SI too */
color_attrib |= 
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
}
}
@@ -2429,21 +2430,21 @@ static void si_initialize_color_surface(struct 
si_context *sctx,
unsigned max_uncompressed_block_size = 
V_028C78_MAX_BLOCK_SIZE_256B;
unsigned min_compressed_block_size = 
V_028C78_MIN_BLOCK_SIZE_32B;
 
/* amdvlk: [min-compressed-block-size] should be set to 32 for 
dGPU and
   64 for APU because all of our APUs to date use DIMMs which 
have
   a request granularity size of 64B while all other chips have 
a
   32B request size */
if (!sctx->screen->info.has_dedicated_vram)
min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
 
-   if (rtex->buffer.b.b.nr_samples > 1) {
+

[Mesa-dev] [PATCH 4/9] radeonsi: use better sample locations for 8x EQAA

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

Verified with the piglit MSAA accuracy test.
---
 src/gallium/drivers/radeonsi/si_state_msaa.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c 
b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 5066c31319e..afc98c1465a 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -85,63 +85,53 @@
 /* 1x MSAA */
 static const uint32_t sample_locs_1x =
FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0); /* S1, S2, S3 fields are not 
used by 1x */
 static const uint64_t centroid_priority_1x = 0xull;
 
 /* 2x MSAA */
 static const uint32_t sample_locs_2x =
FILL_SREG(-4,-4,   4, 4,   0, 0,   0, 0); /* S2 & S3 fields are not 
used by 2x MSAA */
 static const uint64_t centroid_priority_2x = 0x1010101010101010ull;
 
-/* 8x MSAA */
-static const uint32_t sample_locs_8x[] = {
-   FILL_SREG(-3,-5,   5, 1,  -5, 5,   7,-7),
-   FILL_SREG(-7,-1,   3, 7,  -1, 3,   1,-3),
-   FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0), /* S8, S9 etc. are not used 
by 8x */
-   FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0),
-};
-static const uint64_t centroid_priority_8x = 0x3542017635420176ull;
-
-/* 4x and 16x MSAA
- * (the first 4 locations happen to be optimal for 4x MSAA, better than
- *  the standard DX 4x locations)
+/* 4x, 8x, and 16x MSAA
+ * - The first 4 locations happen to be optimal for 4x MSAA, better than
+ *   the standard DX 4x locations.
+ * - The first 8 locations happen to be almost as good as 8x DX locations,
+ *   but the DX locations are horrible for worst-case EQAA 8s4f and 8s2f.
  */
-static const uint32_t sample_locs_4x_16x[] = {
+static const uint32_t sample_locs_4x_8x_16x[] = {
FILL_SREG(-5,-2,   5, 3,  -2, 6,   3,-5),
FILL_SREG(-6,-7,   1, 1,  -6, 4,   7,-3),
FILL_SREG(-1,-3,   6, 7,  -3, 2,   0,-7),
FILL_SREG(-4,-6,   2, 5,  -8, 0,   4,-1),
 };
 static const uint64_t centroid_priority_4x = 0x2310231023102310ull;
+static const uint64_t centroid_priority_8x = 0x4762310547623105ull;
 static const uint64_t centroid_priority_16x = 0x49e7c6b231d0fa85ull;
 
 static void si_get_sample_position(struct pipe_context *ctx, unsigned 
sample_count,
   unsigned sample_index, float *out_value)
 {
const uint32_t *sample_locs;
 
switch (sample_count) {
case 1:
default:
sample_locs = _locs_1x;
break;
case 2:
sample_locs = _locs_2x;
break;
case 4:
-   sample_locs = sample_locs_4x_16x;
-   break;
case 8:
-   sample_locs = sample_locs_8x;
-   break;
case 16:
-   sample_locs = sample_locs_4x_16x;
+   sample_locs = sample_locs_4x_8x_16x;
break;
}
 
out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
 }
 
 static void si_emit_max_4_sample_locs(struct radeon_winsys_cs *cs,
  uint64_t centroid_priority,
  uint32_t sample_locs)
@@ -175,27 +165,27 @@ void si_emit_sample_locations(struct radeon_winsys_cs 
*cs, int nr_samples)
 {
switch (nr_samples) {
default:
case 1:
si_emit_max_4_sample_locs(cs, centroid_priority_1x, 
sample_locs_1x);
break;
case 2:
si_emit_max_4_sample_locs(cs, centroid_priority_2x, 
sample_locs_2x);
break;
case 4:
-   si_emit_max_4_sample_locs(cs, centroid_priority_4x, 
sample_locs_4x_16x[0]);
+   si_emit_max_4_sample_locs(cs, centroid_priority_4x, 
sample_locs_4x_8x_16x[0]);
break;
case 8:
-   si_emit_max_16_sample_locs(cs, centroid_priority_8x, 
sample_locs_8x, 8);
+   si_emit_max_16_sample_locs(cs, centroid_priority_8x, 
sample_locs_4x_8x_16x, 8);
break;
case 16:
-   si_emit_max_16_sample_locs(cs, centroid_priority_16x, 
sample_locs_4x_16x, 16);
+   si_emit_max_16_sample_locs(cs, centroid_priority_16x, 
sample_locs_4x_8x_16x, 16);
break;
}
 }
 
 void si_init_msaa_functions(struct si_context *sctx)
 {
int i;
 
sctx->b.get_sample_position = si_get_sample_position;
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/9] radeonsi: set up EQAA image descriptors properly

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 96 -
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index fce796f7543..e133bf28589 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3594,25 +3594,28 @@ si_make_texture_descriptor(struct si_screen *screen,
   unsigned first_level, unsigned last_level,
   unsigned first_layer, unsigned last_layer,
   unsigned width, unsigned height, unsigned depth,
   uint32_t *state,
   uint32_t *fmask_state)
 {
struct pipe_resource *res = >buffer.b.b;
const struct util_format_description *desc;
unsigned char swizzle[4];
int first_non_void;
-   unsigned num_format, data_format, type;
+   unsigned num_format, data_format, type, num_samples;
uint64_t va;
 
desc = util_format_description(pipe_format);
 
+   num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ?
+   MAX2(1, res->nr_samples) : tex->num_color_samples;
+
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
const unsigned char swizzle_[4] = {0, 0, 0, 0};
const unsigned char swizzle_[4] = {1, 1, 1, 1};
const unsigned char swizzle_[4] = {3, 3, 3, 3};
 
switch (pipe_format) {
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
case PIPE_FORMAT_X32_S8X24_UINT:
case PIPE_FORMAT_X8Z24_UNORM:
util_format_compose_swizzles(swizzle_, 
state_swizzle, swizzle);
@@ -3721,21 +3724,21 @@ si_make_texture_descriptor(struct si_screen *screen,
  res->target == PIPE_TEXTURE_3D))) {
/* For the purpose of shader images, treat cube maps and 3D
 * textures as 2D arrays. For 3D textures, the address
 * calculations for mipmaps are different, so we rely on the
 * caller to effectively disable mipmaps.
 */
type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
 
assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && 
last_level == 0));
} else {
-   type = si_tex_dim(screen, tex, target, res->nr_samples);
+   type = si_tex_dim(screen, tex, target, num_samples);
}
 
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
height = 1;
depth = res->array_size;
} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
if (sampler || res->target != PIPE_TEXTURE_3D)
depth = res->array_size;
} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
@@ -3744,45 +3747,44 @@ si_make_texture_descriptor(struct si_screen *screen,
state[0] = 0;
state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
S_008F14_NUM_FORMAT_GFX6(num_format));
state[2] = (S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1) |
S_008F18_PERF_MOD(4));
state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
-   S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
-   0 : first_level) |
-   S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
-   util_logbase2(res->nr_samples) :
+   S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) |
+   S_008F1C_LAST_LEVEL(num_samples > 1 ?
+   util_logbase2(num_samples) :
last_level) |
S_008F1C_TYPE(type));
state[4] = 0;
state[5] = S_008F24_BASE_ARRAY(first_layer);
state[6] = 0;
state[7] = 0;
 
if (screen->info.chip_class >= GFX9) {
unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
 
/* Depth is the the last accessible layer on Gfx9.
 * The hw doesn't need to know the total number of layers.
 */
if (type == V_008F1C_SQ_RSRC_IMG_3D)
state[4] |= S_008F20_DEPTH(depth - 1);
else
state[4] |= S_008F20_DEPTH(last_layer);
 
state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
-   state[5] |= S_008F24_MAX_MIP(res->nr_samples > 1 ?
-

[Mesa-dev] [PATCH 9/9] radeonsi: add an environment variable that forces EQAA for MSAA allocations

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

This is for testing and experiments.
---
 src/gallium/drivers/radeonsi/si_pipe.c| 22 
 src/gallium/drivers/radeonsi/si_pipe.h|  3 +++
 src/gallium/drivers/radeonsi/si_state.c   |  5 
 src/gallium/drivers/radeonsi/si_texture.c | 31 +++
 4 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 1ca38ed55cb..35c2c200e57 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1065,20 +1065,42 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_VMEM_L1;
if (sscreen->info.chip_class <= VI) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
sscreen->barrier_flags.L2_to_cp |= 
SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
 
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
sscreen->debug_flags |= DBG_ALL_SHADERS;
 
+   /* Syntax:
+* EQAA=s,z,c
+* Example:
+* EQAA=8,4,2
+
+* That means 8 coverage samples, 4 Z/S samples, and 2 color samples.
+* Constraints:
+* s >= z >= c (ignoring this only wastes memory)
+* s = [2..16]
+* z = [2..8]
+* c = [2..8]
+*
+* Only MSAA color and depth buffers are overriden.
+*/
+   const char *eqaa = debug_get_option("EQAA", NULL);
+   unsigned s,z,f;
+   if (eqaa && sscanf(eqaa, "%u,%u,%u", , , ) == 3 && s && z && f) {
+   sscreen->eqaa_force_coverage_samples = s;
+   sscreen->eqaa_force_z_samples = z;
+   sscreen->eqaa_force_color_samples = f;
+   }
+
for (i = 0; i < num_comp_hi_threads; i++)
si_init_compiler(sscreen, >compiler[i]);
for (i = 0; i < num_comp_lo_threads; i++)
si_init_compiler(sscreen, >compiler_lowp[i]);
 
/* Create the auxiliary context. This must be done last. */
sscreen->aux_context = si_create_context(>b, 0);
 
if (sscreen->debug_flags & DBG(TEST_DMA))
si_test_dma(sscreen);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 55a135f3870..6917d5e6068 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -409,20 +409,23 @@ struct si_screen {
 
struct radeon_info  info;
uint64_tdebug_flags;
charrenderer_string[100];
 
unsignedgs_table_depth;
unsignedtess_offchip_block_dw_size;
unsignedtess_offchip_ring_size;
unsignedtess_factor_ring_size;
unsignedvgt_hs_offchip_param;
+   unsignedeqaa_force_coverage_samples;
+   unsignedeqaa_force_z_samples;
+   unsignedeqaa_force_color_samples;
boolhas_clear_state;
boolhas_distributed_tess;
boolhas_draw_indirect_multi;
boolhas_out_of_order_rast;
boolassume_no_z_fights;
boolcommutative_blend_add;
boolclear_db_cache_before_clear;
boolhas_msaa_sample_loc_bug;
boolhas_ls_vgpr_init_bug;
booldpbb_allowed;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index e133bf28589..c7585b285e9 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2112,20 +2112,21 @@ static bool si_is_zs_format_supported(enum pipe_format 
format)
 {
return si_translate_dbformat(format) != V_028040_Z_INVALID;
 }
 
 static boolean si_is_format_supported(struct pipe_screen *screen,
  enum pipe_format format,
  enum pipe_texture_target target,
  unsigned sample_count,
  unsigned usage)
 {
+   struct si_screen *sscreen = (struct si_screen *)screen;
unsigned retval = 0;
 
if (target >= PIPE_MAX_TEXTURE_TYPES) {
PRINT_ERR("r600: unsupported texture type %d\n", target);
return false;
}
 
if (!util_format_is_supported(format, usage))
return false;
 
@@ -2135,20 +2136,24 @@

[Mesa-dev] [PATCH 2/9] radeonsi: use better sample locations for 4x MSAA

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

Discovered by luck. Verified with the piglit MSAA accuracy test.
It also shows that the worst case EQAA 16s4f results in very good 4x MSAA
in the worst case.

Nine might not like these positions, but they are prettier to the eye and
GL doesn't care.
---
 src/gallium/drivers/radeonsi/si_state_msaa.c | 21 ++--
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c 
b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 0f9e0fea1c7..b6504d53a2a 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -85,64 +85,63 @@
 /* 1x MSAA */
 static const uint32_t sample_locs_1x =
FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0); /* S1, S2, S3 fields are not 
used by 1x */
 static const uint64_t centroid_priority_1x = 0xull;
 
 /* 2x MSAA */
 static const uint32_t sample_locs_2x =
FILL_SREG(-4,-4,   4, 4,   0, 0,   0, 0); /* S2 & S3 fields are not 
used by 2x MSAA */
 static const uint64_t centroid_priority_2x = 0x1010101010101010ull;
 
-/* 4x MSAA */
-static const uint32_t sample_locs_4x =
-   FILL_SREG(-2,-6,   2, 6,  -6, 2,   6,-2);
-static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
-
 /* 8x MSAA */
 static const uint32_t sample_locs_8x[] = {
FILL_SREG(-3,-5,   5, 1,  -5, 5,   7,-7),
FILL_SREG(-7,-1,   3, 7,  -1, 3,   1,-3),
FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0), /* S8, S9 etc. are not used 
by 8x */
FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0),
 };
 static const uint64_t centroid_priority_8x = 0x3542017635420176ull;
 
-/* 16x MSAA */
-static const uint32_t sample_locs_16x[] = {
+/* 4x and 16x MSAA
+ * (the first 4 locations happen to be optimal for 4x MSAA, better than
+ *  the standard DX 4x locations)
+ */
+static const uint32_t sample_locs_4x_16x[] = {
FILL_SREG(-5,-2,   5, 3,  -2, 6,   3,-5),
FILL_SREG(-7,-8,   1, 1,  -6, 4,   7,-4),
FILL_SREG(-1,-3,   6, 7,  -3, 2,   0,-7),
FILL_SREG(-4,-6,   2, 5,  -8, 0,   4,-1),
 };
+static const uint64_t centroid_priority_4x = 0x2310231023102310ull;
 static const uint64_t centroid_priority_16x = 0x497ec6b231d0fa85ull;
 
 static void si_get_sample_position(struct pipe_context *ctx, unsigned 
sample_count,
   unsigned sample_index, float *out_value)
 {
const uint32_t *sample_locs;
 
switch (sample_count) {
case 1:
default:
sample_locs = _locs_1x;
break;
case 2:
sample_locs = _locs_2x;
break;
case 4:
-   sample_locs = _locs_4x;
+   sample_locs = sample_locs_4x_16x;
break;
case 8:
sample_locs = sample_locs_8x;
break;
case 16:
-   sample_locs = sample_locs_16x;
+   sample_locs = sample_locs_4x_16x;
break;
}
 
out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
 }
 
 static void si_emit_max_4_sample_locs(struct radeon_winsys_cs *cs,
  uint64_t centroid_priority,
  uint32_t sample_locs)
@@ -176,27 +175,27 @@ void si_emit_sample_locations(struct radeon_winsys_cs 
*cs, int nr_samples)
 {
switch (nr_samples) {
default:
case 1:
si_emit_max_4_sample_locs(cs, centroid_priority_1x, 
sample_locs_1x);
break;
case 2:
si_emit_max_4_sample_locs(cs, centroid_priority_2x, 
sample_locs_2x);
break;
case 4:
-   si_emit_max_4_sample_locs(cs, centroid_priority_4x, 
sample_locs_4x);
+   si_emit_max_4_sample_locs(cs, centroid_priority_4x, 
sample_locs_4x_16x[0]);
break;
case 8:
si_emit_max_16_sample_locs(cs, centroid_priority_8x, 
sample_locs_8x, 8);
break;
case 16:
-   si_emit_max_16_sample_locs(cs, centroid_priority_16x, 
sample_locs_16x, 16);
+   si_emit_max_16_sample_locs(cs, centroid_priority_16x, 
sample_locs_4x_16x, 16);
break;
}
 }
 
 void si_init_msaa_functions(struct si_context *sctx)
 {
int i;
 
sctx->b.get_sample_position = si_get_sample_position;
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/9] RadeonSI: Implement EQAA

2018-05-01 Thread Marek Olšák

Hi,

This series implements EQAA (enhanced quality anti-aliasing), though
some might argue that the proper name should be "flexible multi-sample
anti-aliasing" or flexible MSAA.

It's controlled via an environment variable.

This gives our users the ability to switch an application which is
using MSAA to use EQAA instead by changing the number of color, Z, and
coverage samples independently. Yes, the requirement is that an app
has to use MSAA first.

Please review.

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/9] radeonsi: reorder sample locations as required by EQAA

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c  |   3 -
 src/gallium/drivers/radeonsi/si_state_msaa.c | 154 ---
 2 files changed, 98 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 62d0ed99d94..3f9332081bf 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4688,23 +4688,20 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
}
 
si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
if (!has_clear_state)
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->chip_class < CIK)
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, 
S_008A14_NUM_CLIP_SEQ(3) |
   S_008A14_CLIP_VTX_REORDER_ENA(1));
 
-   si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
-   si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
-
if (!has_clear_state)
si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
/* CLEAR_STATE doesn't clear these correctly on certain generations.
 * I don't know why. Deduced by trial and error.
 */
if (sctx->chip_class <= CIK) {
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, 
S_028204_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, 
S_028240_WINDOW_OFFSET_DISABLE(1));
diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c 
b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 7ee17a9f292..0f9e0fea1c7 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -30,45 +30,97 @@
 (((unsigned)(s1x) & 0xf) << 8)  | (((unsigned)(s1y) & 0xf) << 12) | \
 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
 
 /* For obtaining location coordinates from registers */
 #define SEXT4(x)   ((int)((x) | ((x) & 0x8 ? 0xfff0 : 0)))
 #define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index) * 4)) & 0xf)
 #define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 
2)
 #define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 
2 + 1)
 
+/* The following sample ordering is required by EQAA.
+ *
+ * Sample 0 is approx. in the top-left quadrant.
+ * Sample 1 is approx. in the bottom-right quadrant.
+ *
+ * Sample 2 is approx. in the bottom-left quadrant.
+ * Sample 3 is approx. in the top-right quadrant.
+ * (sample I={2,3} adds more detail to the vicinity of sample I-2)
+ *
+ * Sample 4 is approx. in the same quadrant as sample 0. (top-left)
+ * Sample 5 is approx. in the same quadrant as sample 1. (bottom-right)
+ * Sample 6 is approx. in the same quadrant as sample 2. (bottom-left)
+ * Sample 7 is approx. in the same quadrant as sample 3. (top-right)
+ * (sample I={4,5,6,7} adds more detail to the vicinity of sample I-4)
+ *
+ * The next 8 samples add more detail to the vicinity of the previous samples.
+ * (sample I (I >= 8) adds more detail to the vicinity of sample I-8)
+ *
+ * The ordering is specified such that:
+ *   If we take the first 2 samples, we should get good 2x MSAA.
+ *   If we add 2 more samples, we should get good 4x MSAA with the same sample 
locations.
+ *   If we add 4 more samples, we should get good 8x MSAA with the same sample 
locations.
+ *   If we add 8 more samples, we should get perfect 16x MSAA with the same 
sample locations.
+ *
+ * The ordering also allows finding samples in the same vicinity.
+ *
+ * Group N of 2 samples in the same vicinity in 16x MSAA: {N,N+8}
+ * Group N of 2 samples in the same vicinity in 8x MSAA: {N,N+4}
+ * Group N of 2 samples in the same vicinity in 4x MSAA: {N,N+2}
+ *
+ * Groups of 4 samples in the same vicinity in 16x MSAA:
+ *   Top left: {0,4,8,12}
+ *   Bottom right: {1,5,9,13}
+ *   Bottom left:  {2,6,10,14}
+ *   Top right:{3,7,11,15}
+ *
+ * Groups of 4 samples in the same vicinity in 8x MSAA:
+ *   Left half:  {0,2,4,6}
+ *   Right half: {1,3,5,7}
+ *
+ * Groups of 8 samples in the same vicinity in 16x MSAA:
+ *   Left half:  {0,2,4,6,8,10,12,14}
+ *   Right half: {1,3,5,7,9,11,13,15}
+ */
+
 /* 1x MSAA */
 static const uint32_t sample_locs_1x =
FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0); /* S1, S2, S3 fields are not 
used by 1x */
+static const uint64_t centroid_priority_1x = 0xull;
 
 /* 2x MSAA */
 static const uint32_t sample_locs_2x =
-   FILL_SREG(4, 4, -4, -4, 0, 0, 0, 0); /* S2 & S3 fields are not used by 
2x MSAA */
+   FILL_SREG(-4,-4,   4, 4,   0, 0,   0, 0); /* S2 & S3 fields

[Mesa-dev] [PATCH 3/9] radeonsi: improve quality of 16 sample locations

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

This results in better 16x and 8x quality when using these locations.
Verified with the piglit MSAA accuracy test.
---
 src/gallium/drivers/radeonsi/si_state_msaa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c 
b/src/gallium/drivers/radeonsi/si_state_msaa.c
index b6504d53a2a..5066c31319e 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -100,26 +100,26 @@ static const uint32_t sample_locs_8x[] = {
FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0),
 };
 static const uint64_t centroid_priority_8x = 0x3542017635420176ull;
 
 /* 4x and 16x MSAA
  * (the first 4 locations happen to be optimal for 4x MSAA, better than
  *  the standard DX 4x locations)
  */
 static const uint32_t sample_locs_4x_16x[] = {
FILL_SREG(-5,-2,   5, 3,  -2, 6,   3,-5),
-   FILL_SREG(-7,-8,   1, 1,  -6, 4,   7,-4),
+   FILL_SREG(-6,-7,   1, 1,  -6, 4,   7,-3),
FILL_SREG(-1,-3,   6, 7,  -3, 2,   0,-7),
FILL_SREG(-4,-6,   2, 5,  -8, 0,   4,-1),
 };
 static const uint64_t centroid_priority_4x = 0x2310231023102310ull;
-static const uint64_t centroid_priority_16x = 0x497ec6b231d0fa85ull;
+static const uint64_t centroid_priority_16x = 0x49e7c6b231d0fa85ull;
 
 static void si_get_sample_position(struct pipe_context *ctx, unsigned 
sample_count,
   unsigned sample_index, float *out_value)
 {
const uint32_t *sample_locs;
 
switch (sample_count) {
case 1:
default:
sample_locs = _locs_1x;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/9] ac/surface: add EQAA support

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/amd/common/ac_surface.c   | 29 +--
 src/amd/common/ac_surface.h   |  3 +-
 src/amd/vulkan/radv_image.c   |  1 +
 src/gallium/drivers/radeon/radeon_winsys.h|  1 +
 src/gallium/drivers/radeonsi/si_texture.c |  4 +--
 .../winsys/amdgpu/drm/amdgpu_surface.c|  2 ++
 .../winsys/radeon/drm/radeon_drm_surface.c|  6 ++--
 7 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 5030d10242e..16c4da706ca 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -242,24 +242,41 @@ static int surf_config_sanity(const struct ac_surf_config 
*config,
!config->info.array_size || !config->info.levels)
return -EINVAL;
 
switch (config->info.samples) {
case 0:
case 1:
case 2:
case 4:
case 8:
break;
+   case 16:
+   if (flags & RADEON_SURF_Z_OR_SBUFFER)
+   return -EINVAL;
+   break;
default:
return -EINVAL;
}
 
+   if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
+   switch (config->info.color_samples) {
+   case 0:
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+   break;
+   default:
+   return -EINVAL;
+   }
+   }
+
if (config->is_3d && config->info.array_size > 1)
return -EINVAL;
if (config->is_cube && config->info.depth > 1)
return -EINVAL;
 
return 0;
 }
 
 static int gfx6_compute_level(ADDR_HANDLE addrlib,
  const struct ac_surf_config *config,
@@ -600,23 +617,28 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
break;
default:
assert(0);
}
}
else {
AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
}
 
AddrDccIn.numSamples = AddrSurfInfoIn.numSamples =
-   config->info.samples ? config->info.samples : 1;
+   MAX2(1, config->info.samples);
AddrSurfInfoIn.tileIndex = -1;
 
+   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+   AddrDccIn.numSamples = AddrSurfInfoIn.numFrags =
+   MAX2(1, config->info.color_samples);
+   }
+
/* Set the micro tile type. */
if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
else
AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
 
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
@@ -1315,23 +1337,26 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
 
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
/* flags.texture currently refers to TC-compatible HTILE */
AddrSurfInfoIn.flags.texture = AddrSurfInfoIn.flags.color ||
   surf->flags & 
RADEON_SURF_TC_COMPATIBLE_HTILE;
AddrSurfInfoIn.flags.opt4space = 1;
 
AddrSurfInfoIn.numMipLevels = config->info.levels;
-   AddrSurfInfoIn.numSamples = config->info.samples ? config->info.samples 
: 1;
+   AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
 
+   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
+   AddrSurfInfoIn.numFrags = MAX2(1, config->info.color_samples);
+
/* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
 * must sample 1D textures as 2D. */
if (config->is_3d)
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
else
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
 
AddrSurfInfoIn.width = config->info.width;
AddrSurfInfoIn.height = config->info.height;
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 45fb8045e53..864b5bad529 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -217,21 +217,22 @@ struct radeon_surf {
 
 /* GFX9+ return values. */
 struct gfx9_surf_layout gfx9;
 } u;
 };
 
 struct ac_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
-   uint8_t samples;
+   uint8_t

[Mesa-dev] [PATCH 6/9] radeonsi: support creating EQAA color textures

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_clear.c   |  4 +-
 src/gallium/drivers/radeonsi/si_pipe.h|  1 +
 src/gallium/drivers/radeonsi/si_texture.c | 45 +++
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index 0f3546b02da..23977186611 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -240,32 +240,32 @@ void vi_dcc_clear_level(struct si_context *sctx,
} else {
dcc_buffer = >buffer.b.b;
dcc_offset = rtex->dcc_offset;
}
 
if (sctx->chip_class >= GFX9) {
/* Mipmap level clears aren't implemented. */
assert(rtex->buffer.b.b.last_level == 0);
/* 4x and 8x MSAA needs a sophisticated compute shader for
 * the clear. See AMDVLK. */
-   assert(rtex->buffer.b.b.nr_samples <= 2);
+   assert(rtex->num_color_samples <= 2);
clear_size = rtex->surface.dcc_size;
} else {
unsigned num_layers = util_num_layers(>buffer.b.b, level);
 
/* If this is 0, fast clear isn't possible. (can occur with 
MSAA) */
assert(rtex->surface.u.legacy.level[level].dcc_fast_clear_size);
/* Layered 4x and 8x MSAA DCC fast clears need to clear
 * dcc_fast_clear_size bytes for each layer. A compute shader
 * would be more efficient than separate per-layer clear 
operations.
 */
-   assert(rtex->buffer.b.b.nr_samples <= 2 || num_layers == 1);
+   assert(rtex->num_color_samples <= 2 || num_layers == 1);
 
dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
clear_size = 
rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
 num_layers;
}
 
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
clear_value, SI_COHERENCY_CB_META);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b5caf944759..241385baed7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -247,20 +247,21 @@ struct r600_texture {
 
/* Colorbuffer compression and fast clear. */
uint64_tfmask_offset;
struct r600_cmask_info  cmask;
struct r600_resource*cmask_buffer;
uint64_tdcc_offset; /* 0 = disabled */
unsignedcb_color_info; /* fast clear enable bit 
*/
unsignedcolor_clear_value[2];
unsignedlast_msaa_resolve_target_micro_mode;
unsignednum_level0_transfers;
+   unsignednum_color_samples;
 
/* Depth buffer compression and fast clear. */
uint64_thtile_offset;
float   depth_clear_value;
uint16_tdirty_level_mask; /* each bit says if 
that mipmap is compressed */
uint16_tstencil_dirty_level_mask; /* each bit 
says if that mipmap is compressed */
enum pipe_formatdb_render_format:16;
uint8_t stencil_clear_value;
booltc_compatible_htile:1;
booldepth_cleared:1; /* if it was cleared 
at least once */
diff --git a/src/gallium/drivers/radeonsi/si_texture.c 
b/src/gallium/drivers/radeonsi/si_texture.c
index 1e328b90b62..52b8b87732f 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -213,20 +213,21 @@ static unsigned si_texture_get_offset(struct si_screen 
*sscreen,
   box->z * 
(uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 +
   (box->y / rtex->surface.blk_h *
rtex->surface.u.legacy.level[level].nblk_x +
box->x / rtex->surface.blk_w) * rtex->surface.bpe;
}
 }
 
 static int si_init_surface(struct si_screen *sscreen,
   struct radeon_surf *surface,
   const struct pipe_resource *ptex,
+  unsigned num_color_samples,
   enum radeon_surf_mode array_mode,
   unsigned pitch_in_bytes_override,
   unsigned offset,
   bool is_imported,
   bool is_scanout,
   bool is_flushed_depth,
   bool tc_compatible_htile)
 {
const struct util_format_description *desc =

[Mesa-dev] [Bug 89322] ../../libtool: line 7916: func_munge_path_list: command not found

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=89322

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |WORKSFORME
 Status|NEW |RESOLVED

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/16] radeonsi: set DB_EQAA the same as Vulkan

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

These never change, but they only affect EQAA, which isn't implemented.
---
 src/gallium/drivers/radeonsi/si_state.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index b50182582c6..62d0ed99d94 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3314,20 +3314,24 @@ static void si_emit_msaa_config(struct si_context *sctx)
S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
/* always 1: */
S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+   unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+  S_028804_INCOHERENT_EQAA_READS(1) |
+  S_028804_INTERPOLATE_COMP_Z(1) |
+  S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
 
int setup_samples = sctx->framebuffer.nr_samples > 1 ? 
sctx->framebuffer.nr_samples :
sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES 
: 0;
 
/* Required by OpenGL line rasterization.
 *
 * TODO: We should also enable perpendicular endcaps for AA lines,
 *   but that requires implementing line stippling in the pixel
 *   shader. SC can only do line stippling with axis-aligned
 *   endcaps.
@@ -3350,45 +3354,41 @@ static void si_emit_msaa_config(struct si_context *sctx)
 
radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, sc_line_cntl |
S_028BDC_EXPAND_LINE_WIDTH(1)); /* 
R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* 
R_028BE0_PA_SC_AA_CONFIG */
 
if (sctx->framebuffer.nr_samples > 1) {
radeon_set_context_reg(cs, R_028804_DB_EQAA,
+  db_eqaa |
   
S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
   
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
   
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
-  
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
-  
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
-  
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+  
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples));
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   
S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
   sc_mode_cntl_1);
} else if (sctx->smoothing_enabled) {
radeon_set_context_reg(cs, R_028804_DB_EQAA,
-  
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
-  
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
+  db_eqaa |
   
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   sc_mode_cntl_1);
}
} else {
radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, sc_line_cntl); /* R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* R_028BE0_PA_SC_AA_CONFIG */
 
-   radeon_set_context_reg(cs, R_028804_DB_EQAA,
-  S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
-  S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+   radeon_set_context_reg(cs, R_028804_DB_EQAA, db_eqaa);
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   sc_mode_cntl_1);
}
 
/* GFX9: Flush DFSM when the AA mode changes. */
if (sctx->screen->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs,

[Mesa-dev] [PATCH 08/16] ac/surface/gfx6: don't overallocate mipmapped HTILE

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/amd/common/ac_surface.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index b2af1f70b69..341a7854fe5 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -841,22 +841,27 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 *
 * "dcc_alignment * 4" was determined by trial and error.
 */
surf->dcc_size = align64(surf->surf_size >> 8,
 surf->dcc_alignment * 4);
}
 
/* Make sure HTILE covers the whole miptree, because the shader reads
 * TC-compatible HTILE even for levels where it's disabled by DB.
 */
-   if (surf->htile_size && config->info.levels > 1)
-   surf->htile_size *= 2;
+   if (surf->htile_size && config->info.levels > 1 &&
+   surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
+   surf->htile_size =
+   surf->surf_size * 4 / (8 * 8 * surf->bpe *
+  MAX2(1, config->info.samples));
+   surf->htile_size = align64(surf->htile_size, 
surf->htile_alignment);
+   }
 
surf->is_linear = surf->u.legacy.level[0].mode == 
RADEON_SURF_MODE_LINEAR_ALIGNED;
surf->is_displayable = surf->is_linear ||
   surf->micro_tile_mode == 
RADEON_MICRO_MODE_DISPLAY ||
   surf->micro_tile_mode == 
RADEON_MICRO_MODE_ROTATED;
return 0;
 }
 
 /* This is only called when expecting a tiled layout. */
 static int
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/16] radeonsi: simplify arrays of sample locations

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_msaa.c | 105 +++
 1 file changed, 40 insertions(+), 65 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c 
b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 19bed09df4b..2ad093ad485 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -19,176 +19,151 @@
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
  * SOFTWARE.
  */
 
 #include "si_build_pm4.h"
 
 /* For MSAA sample positions. */
 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
-   (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |  \
-   (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |  
   \
-   (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | 
   \
+   unsigned)(s0x) & 0xf) << 0)  | (((unsigned)(s0y) & 0xf) << 4)  | \
+(((unsigned)(s1x) & 0xf) << 8)  | (((unsigned)(s1y) & 0xf) << 12) | \
+(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
 
 /* 2xMSAA
  * There are two locations (4, 4), (-4, -4). */
-static const uint32_t sample_locs_2x[4] = {
-   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
-   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
-   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
-   FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
-};
+static const uint32_t sample_locs_2x =
+   FILL_SREG(4, 4, -4, -4, 0, 0, 0, 0); /* S2 & S3 fields are not used by 
2x MSAA */
+
 /* 4xMSAA
  * There are 4 locations: (-2, -6), (6, -2), (-6, 2), (2, 6). */
-static const uint32_t sample_locs_4x[4] = {
-   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
-   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
-   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
-   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
-};
+static const uint32_t sample_locs_4x =
+   FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6);
 
 /* Cayman 8xMSAA */
 static const uint32_t sample_locs_8x[] = {
FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
-   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
-   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
-   FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
-   FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
-   FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
-   FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
 };
 /* Cayman 16xMSAA */
 static const uint32_t sample_locs_16x[] = {
FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
-   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
-   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
-   FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
-   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
-   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
-   FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
-   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
-   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
-   FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
-   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
-   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
-   FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
 };
 
 static void si_get_sample_position(struct pipe_context *ctx, unsigned 
sample_count,
   unsigned sample_index, float *out_value)
 {
int offset, index;
struct {
int idx:4;
} val;
 
switch (sample_count) {
case 1:
default:
out_value[0] = out_value[1] = 0.5;
break;
case 2:
offset = 4 * (sample_index * 2);
-   val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+   val.idx = (sample_locs_2x >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
-   val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+   val.idx = (sample_locs_2x >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 4:
offset = 4 * (sample_index * 2);
-   val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+   val.idx = (sample_locs_4x >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
-   val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+   val.idx = (sample_locs_4x >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 8:
offset = 4 * (sample_index % 4 * 2);
-

[Mesa-dev] [PATCH 05/16] ac: sort raster configs

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/amd/common/ac_gpu_info.c | 66 +++-
 1 file changed, 27 insertions(+), 39 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 3ba7b4b717a..fd49dbefd58 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -559,85 +559,73 @@ ac_get_gs_table_depth(enum chip_class chip_class, enum 
radeon_family family)
unreachable("Unknown GPU");
}
 }
 
 void
 ac_get_raster_config(struct radeon_info *info,
 uint32_t *raster_config_p,
 uint32_t *raster_config_1_p)
 {
unsigned raster_config, raster_config_1;
+
switch (info->family) {
-   case CHIP_TAHITI:
-   case CHIP_PITCAIRN:
-   raster_config = 0x2a00126a;
+   /* 1 SE / 1 RB */
+   case CHIP_HAINAN:
+   case CHIP_KABINI:
+   case CHIP_MULLINS:
+   case CHIP_STONEY:
+   raster_config = 0x;
raster_config_1 = 0x;
break;
+   /* 1 SE / 4 RBs */
case CHIP_VERDE:
raster_config = 0x124a;
raster_config_1 = 0x;
break;
+   /* 1 SE / 2 RBs (Oland is special) */
case CHIP_OLAND:
raster_config = 0x0082;
raster_config_1 = 0x;
break;
-   case CHIP_HAINAN:
-   raster_config = 0x;
+   /* 1 SE / 2 RBs */
+   case CHIP_KAVERI:
+   case CHIP_ICELAND:
+   case CHIP_CARRIZO:
+   raster_config = 0x0002;
raster_config_1 = 0x;
break;
+   /* 2 SEs / 4 RBs */
case CHIP_BONAIRE:
+   case CHIP_POLARIS11:
+   case CHIP_POLARIS12:
raster_config = 0x1612;
raster_config_1 = 0x;
break;
-   case CHIP_HAWAII:
-   raster_config = 0x3a00161a;
-   raster_config_1 = 0x002e;
-   break;
-   case CHIP_FIJI:
-   raster_config = 0x3a00161a;
-   raster_config_1 = 0x002e;
+   /* 2 SEs / 8 RBs */
+   case CHIP_TAHITI:
+   case CHIP_PITCAIRN:
+   raster_config = 0x2a00126a;
+   raster_config_1 = 0x;
break;
+   /* 4 SEs / 8 RBs */
+   case CHIP_TONGA:
case CHIP_POLARIS10:
raster_config = 0x1612;
raster_config_1 = 0x002a;
break;
-   case CHIP_POLARIS11:
-   case CHIP_POLARIS12:
-   raster_config = 0x1612;
-   raster_config_1 = 0x;
-   break;
+   /* 4 SEs / 16 RBs */
+   case CHIP_HAWAII:
+   case CHIP_FIJI:
case CHIP_VEGAM:
raster_config = 0x3a00161a;
raster_config_1 = 0x002e;
break;
-   case CHIP_TONGA:
-   raster_config = 0x1612;
-   raster_config_1 = 0x002a;
-   break;
-   case CHIP_ICELAND:
-   raster_config = 0x0002;
-   raster_config_1 = 0x;
-   break;
-   case CHIP_CARRIZO:
-   raster_config = 0x0002;
-   raster_config_1 = 0x;
-   break;
-   case CHIP_KAVERI:
-   raster_config = 0x0002;
-   raster_config_1 = 0x;
-   break;
-   case CHIP_KABINI:
-   case CHIP_MULLINS:
-   case CHIP_STONEY:
-   raster_config = 0x;
-   raster_config_1 = 0x;
-   break;
default:
fprintf(stderr,
"ac: Unknown GPU, using 0 for raster_config\n");
raster_config = 0x;
raster_config_1 = 0x;
break;
}
 
/* Fiji: Old kernels have incorrect tiling config. This decreases
 * RB performance by 25%. (it disables 1 RB in the second packer)
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/16] ac/surface: unify common legacy and gfx9 fmask fields

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/amd/common/ac_surface.c   | 20 +--
 src/amd/common/ac_surface.h   | 10 +++---
 src/amd/vulkan/radv_image.c   | 12 +--
 src/gallium/drivers/radeonsi/si_texture.c | 16 +++
 .../winsys/radeon/drm/radeon_drm_surface.c|  6 +++---
 5 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 69a0c8a0f2f..5030d10242e 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -846,23 +846,23 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
fin.numSlices = AddrSurfInfoIn.numSlices;
fin.numSamples = AddrSurfInfoIn.numSamples;
fin.numFrags = AddrSurfInfoIn.numFrags;
fin.tileIndex = AddrSurfInfoOut.tileIndex;
fout.pTileInfo = _tile_info;
 
r = AddrComputeFmaskInfo(addrlib, , );
if (r)
return r;
 
-   surf->u.legacy.fmask.size = fout.fmaskBytes;
-   surf->u.legacy.fmask.alignment = fout.baseAlign;
-   surf->u.legacy.fmask.tile_swizzle = 0;
+   surf->fmask_size = fout.fmaskBytes;
+   surf->fmask_alignment = fout.baseAlign;
+   surf->fmask_tile_swizzle = 0;
 
surf->u.legacy.fmask.slice_tile_max =
(fout.pitch * fout.height) / 64;
if (surf->u.legacy.fmask.slice_tile_max)
surf->u.legacy.fmask.slice_tile_max -= 1;
 
surf->u.legacy.fmask.tiling_index = fout.tileIndex;
surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight;
surf->u.legacy.fmask.pitch_in_pixels = fout.pitch;
 
@@ -881,21 +881,21 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
xin.macroModeIndex = fout.macroModeIndex;
xin.pTileInfo = fout.pTileInfo;
xin.tileMode = fin.tileMode;
 
int r = AddrComputeBaseSwizzle(addrlib, , );
if (r != ADDR_OK)
return r;
 
assert(xout.tileSwizzle <=
   u_bit_consecutive(0, sizeof(surf->tile_swizzle) 
* 8));
-   surf->u.legacy.fmask.tile_swizzle = xout.tileSwizzle;
+   surf->fmask_tile_swizzle = xout.tileSwizzle;
}
}
 
/* Recalculate the whole DCC miptree size including disabled levels.
 * This is what addrlib does, but calling addrlib would be a lot more
 * complicated.
 */
if (surf->dcc_size && config->info.levels > 1) {
/* The smallest miplevels that are never compressed by DCC
 * still read the DCC buffer via TC if the base level uses DCC,
@@ -1176,22 +1176,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
fin.numSlices = in->numSlices;
fin.numSamples = in->numSamples;
fin.numFrags = in->numFrags;
 
ret = Addr2ComputeFmaskInfo(addrlib, , );
if (ret != ADDR_OK)
return ret;
 
surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode;
surf->u.gfx9.fmask.epitch = fout.pitch - 1;
-   surf->u.gfx9.fmask_size = fout.fmaskBytes;
-   surf->u.gfx9.fmask_alignment = fout.baseAlign;
+   surf->fmask_size = fout.fmaskBytes;
+   surf->fmask_alignment = fout.baseAlign;
 
/* Compute tile swizzle for the FMASK surface. */
if (config->info.fmask_surf_index &&
fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
!(surf->flags & RADEON_SURF_SHAREABLE)) {
ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
 
xin.size = 
sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
xout.size = 
sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
@@ -1203,22 +1203,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
xin.resourceType = in->resourceType;
xin.format = in->format;
xin.numSamples = in->numSamples;
xin.numFrags = in->numFrags;
 
ret = Addr2ComputePipeBankXor(addrlib, , 
);
if (ret != ADDR_OK)
return ret;
 
assert(xout.pipeBankXor <=
-

[Mesa-dev] [PATCH 11/16] radeonsi: remove r600_fmask_info

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

radeon_surf contains almost everything.
---
 src/gallium/drivers/radeonsi/si_blit.c|  6 +-
 src/gallium/drivers/radeonsi/si_clear.c   |  2 +-
 src/gallium/drivers/radeonsi/si_descriptors.c | 10 ++--
 src/gallium/drivers/radeonsi/si_pipe.h| 17 +-
 src/gallium/drivers/radeonsi/si_state.c   | 38 ++--
 src/gallium/drivers/radeonsi/si_texture.c | 60 ++-
 6 files changed, 46 insertions(+), 87 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 1cbd26f46e0..6f4cd1f9044 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -463,21 +463,21 @@ static void si_blit_decompress_color(struct si_context 
*sctx,
if (need_dcc_decompress) {
custom_blend = sctx->custom_blend_dcc_decompress;
 
assert(rtex->dcc_offset);
 
/* disable levels without DCC */
for (int i = first_level; i <= last_level; i++) {
if (!vi_dcc_enabled(rtex, i))
level_mask &= ~(1 << i);
}
-   } else if (rtex->fmask.size) {
+   } else if (rtex->surface.fmask_size) {
custom_blend = sctx->custom_blend_fmask_decompress;
} else {
custom_blend = sctx->custom_blend_eliminate_fastclear;
}
 
sctx->decompression_enabled = true;
 
while (level_mask) {
unsigned level = u_bit_scan(_mask);
 
@@ -521,21 +521,21 @@ static void si_blit_decompress_color(struct si_context 
*sctx,
sctx->decompression_enabled = false;
si_make_CB_shader_coherent(sctx, rtex->buffer.b.b.nr_samples,
   vi_dcc_enabled(rtex, first_level));
 }
 
 static void
 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
unsigned first_level, unsigned last_level)
 {
/* CMASK or DCC can be discarded and we can still end up here. */
-   if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
+   if (!tex->cmask.size && !tex->surface.fmask_size && !tex->dcc_offset)
return;
 
si_blit_decompress_color(sctx, tex, first_level, last_level, 0,
 util_max_layer(>buffer.b.b, first_level),
 false);
 }
 
 static void
 si_decompress_sampler_color_textures(struct si_context *sctx,
 struct si_samplers *textures)
@@ -842,21 +842,21 @@ static void si_decompress_subresource(struct pipe_context 
*ctx,
 * by dirtying the framebuffer.
 */
if (sctx->framebuffer.state.zsbuf &&
sctx->framebuffer.state.zsbuf->u.tex.level == level &&
sctx->framebuffer.state.zsbuf->texture == tex)
si_update_fb_dirtiness_after_rendering(sctx);
 
si_decompress_depth(sctx, rtex, planes,
level, level,
first_layer, last_layer);
-   } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
+   } else if (rtex->surface.fmask_size || rtex->cmask.size || 
rtex->dcc_offset) {
/* If we've rendered into the framebuffer and it's a blitting
 * source, make sure the decompression pass is invoked
 * by dirtying the framebuffer.
 */
for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) 
{
if (sctx->framebuffer.state.cbufs[i] &&
sctx->framebuffer.state.cbufs[i]->u.tex.level == 
level &&
sctx->framebuffer.state.cbufs[i]->texture == tex) {
si_update_fb_dirtiness_after_rendering(sctx);
break;
diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index 0de51488f59..8ecd47fea9b 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -570,21 +570,21 @@ static void si_clear(struct pipe_context *ctx, unsigned 
buffers,
 
/* These buffers cannot use fast clear, make sure to disable 
expansion. */
for (unsigned i = 0; i < fb->nr_cbufs; i++) {
struct r600_texture *tex;
 
/* If not clearing this buffer, skip. */
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)) || 
!fb->cbufs[i])
continue;
 
tex = (struct r600_texture *)fb->cbufs[i]->texture;
-   if (tex->fmask.size == 0)
+   if (tex->surface.fmask_size == 0)
tex->dirty_level_mask &= ~(1 <<

[Mesa-dev] [PATCH 12/16] radeonsi: don't update clear color registers if they don't change

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_clear.c | 32 -
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index 8ecd47fea9b..0f3546b02da 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -58,21 +58,21 @@ static void si_alloc_separate_cmask(struct si_screen 
*sscreen,
}
 
/* update colorbuffer state bits */
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
 
rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
 
p_atomic_inc(>compressed_colortex_counter);
 }
 
-static void si_set_clear_color(struct r600_texture *rtex,
+static bool si_set_clear_color(struct r600_texture *rtex,
   enum pipe_format surface_format,
   const union pipe_color_union *color)
 {
union util_color uc;
 
memset(, 0, sizeof(uc));
 
if (rtex->surface.bpe == 16) {
/* DCC fast clear only:
 *   CLEAR_WORD0 = R = G = B
@@ -83,21 +83,25 @@ static void si_set_clear_color(struct r600_texture *rtex,
uc.ui[0] = color->ui[0];
uc.ui[1] = color->ui[3];
} else if (util_format_is_pure_uint(surface_format)) {
util_format_write_4ui(surface_format, color->ui, 0, , 0, 0, 
0, 1, 1);
} else if (util_format_is_pure_sint(surface_format)) {
util_format_write_4i(surface_format, color->i, 0, , 0, 0, 0, 
1, 1);
} else {
util_pack_color(color->f, surface_format, );
}
 
+   if (memcmp(rtex->color_clear_value, , 2 * sizeof(uint32_t)) == 0)
+   return false;
+
memcpy(rtex->color_clear_value, , 2 * sizeof(uint32_t));
+   return true;
 }
 
 /** Linearize and convert luminace/intensity to red. */
 enum pipe_format si_simplify_cb_format(enum pipe_format format)
 {
format = util_format_linear(format);
format = util_format_luminance_to_red(format);
return util_format_intensity_to_red(format);
 }
 
@@ -538,24 +542,24 @@ static void si_do_fast_color_clear(struct si_context 
*sctx,
 
if (need_decompress_pass &&
!(tex->dirty_level_mask & (1 << level))) {
tex->dirty_level_mask |= 1 << level;

p_atomic_inc(>screen->compressed_colortex_counter);
}
 
/* We can change the micro tile mode before a full clear. */
si_set_optimal_micro_tile_mode(sctx->screen, tex);
 
-   si_set_clear_color(tex, fb->cbufs[i]->format, color);
-
-   sctx->framebuffer.dirty_cbufs |= 1 << i;
-   si_mark_atom_dirty(sctx, >atoms.s.framebuffer);
+   if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) {
+   sctx->framebuffer.dirty_cbufs |= 1 << i;
+   si_mark_atom_dirty(sctx, >atoms.s.framebuffer);
+   }
*buffers &= ~clear_bit;
}
 }
 
 static void si_clear(struct pipe_context *ctx, unsigned buffers,
 const union pipe_color_union *color,
 double depth, unsigned stencil)
 {
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_framebuffer_state *fb = >framebuffer.state;
@@ -589,41 +593,47 @@ static void si_clear(struct pipe_context *ctx, unsigned 
buffers,
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
if (buffers & PIPE_CLEAR_DEPTH &&
(!zstex->tc_compatible_htile ||
 depth == 0 || depth == 1)) {
/* Need to disable EXPCLEAR temporarily if clearing
 * to a new value. */
if (!zstex->depth_cleared || zstex->depth_clear_value 
!= depth) {
sctx->db_depth_disable_expclear = true;
}
 
-   zstex->depth_clear_value = depth;
-   sctx->framebuffer.dirty_zsbuf = true;
-   si_mark_atom_dirty(sctx, >atoms.s.framebuffer); 
/* updates DB_DEPTH_CLEAR */
+   if (zstex->depth_clear_value != (float)depth) {
+   /* Update DB_DEPTH_CLEAR. */
+   zstex->depth_clear_value = depth;
+   sctx->framebuffer.dirty_zsbuf = true;
+   si_mark_atom_dirty(sctx, 
>atoms.s.framebuffer);
+   }
sctx->db_depth_clear = true;
si_mark_atom_dirty(sctx, 
>atoms.s.db_render_state);
}
 
/* TC-compatible HTILE only supports stencil clears to 0. */
if (buffers & PIPE_CLEAR_STENCIL &&

[Mesa-dev] [PATCH 06/16] ac: set correct LLVM processor names for Raven & Vega12

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/amd/common/ac_llvm_util.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index a06c83a2963..be2d92b4c08 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -108,23 +108,25 @@ const char *ac_get_llvm_processor_name(enum radeon_family 
family)
return "fiji";
case CHIP_STONEY:
return "stoney";
case CHIP_POLARIS10:
return "polaris10";
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
return "polaris11";
case CHIP_VEGA10:
-   case CHIP_VEGA12:
-   case CHIP_RAVEN:
return "gfx900";
+   case CHIP_RAVEN:
+   return "gfx902";
+   case CHIP_VEGA12:
+   return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
default:
return "";
}
 }
 
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
  enum ac_target_machine_options 
tm_options,
  const char **out_triple)
 {
assert(family >= CHIP_TAHITI);
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 13/16] radeonsi: remove CM_ prefixes

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index f7878ed2367..b50182582c6 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3343,24 +3343,24 @@ static void si_emit_msaa_config(struct si_context *sctx)
7, /* 8x MSAA */
8, /* 16x MSAA */
};
unsigned log_samples = util_logbase2(setup_samples);
unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
unsigned log_ps_iter_samples =
util_logbase2(util_next_power_of_two(ps_iter_samples));
 
radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, sc_line_cntl |
-   S_028BDC_EXPAND_LINE_WIDTH(1)); /* 
CM_R_028BDC_PA_SC_LINE_CNTL */
+   S_028BDC_EXPAND_LINE_WIDTH(1)); /* 
R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
-   S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* 
CM_R_028BE0_PA_SC_AA_CONFIG */
+   S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* 
R_028BE0_PA_SC_AA_CONFIG */
 
if (sctx->framebuffer.nr_samples > 1) {
radeon_set_context_reg(cs, R_028804_DB_EQAA,
   
S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
   
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
   
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
   
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
   
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
   
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
@@ -3369,22 +3369,22 @@ static void si_emit_msaa_config(struct si_context *sctx)
} else if (sctx->smoothing_enabled) {
radeon_set_context_reg(cs, R_028804_DB_EQAA,
   
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
   
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
   
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   sc_mode_cntl_1);
}
} else {
radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
-   radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
-   radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+   radeon_emit(cs, sc_line_cntl); /* R_028BDC_PA_SC_LINE_CNTL */
+   radeon_emit(cs, 0); /* R_028BE0_PA_SC_AA_CONFIG */
 
radeon_set_context_reg(cs, R_028804_DB_EQAA,
   S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
   S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   sc_mode_cntl_1);
}
 
/* GFX9: Flush DFSM when the AA mode changes. */
if (sctx->screen->dfsm_allowed) {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/16] ac/surface/gfx6: compute FMASK together with the color surface

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

instead of invoking FMASK computation separately.
---
 src/amd/common/ac_surface.c   | 86 ---
 src/amd/common/ac_surface.h   | 13 ++-
 src/amd/vulkan/radv_image.c   | 54 ++--
 src/gallium/drivers/radeonsi/si_texture.c | 48 ++-
 .../winsys/radeon/drm/radeon_drm_surface.c| 48 +++
 5 files changed, 149 insertions(+), 100 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 341a7854fe5..69a0c8a0f2f 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -220,22 +220,30 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info 
*info,
 
if (max_alignment) {
addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, 
);
if (addrRet == ADDR_OK){
*max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
}
}
return addrCreateOutput.hLib;
 }
 
-static int surf_config_sanity(const struct ac_surf_config *config)
+static int surf_config_sanity(const struct ac_surf_config *config,
+ unsigned flags)
 {
+   /* FMASK is allocated together with the color surface and can't be
+* allocated separately.
+*/
+   assert(!(flags & RADEON_SURF_FMASK));
+   if (flags & RADEON_SURF_FMASK)
+   return -EINVAL;
+
/* all dimension must be at least 1 ! */
if (!config->info.width || !config->info.height || !config->info.depth 
||
!config->info.array_size || !config->info.levels)
return -EINVAL;
 
switch (config->info.samples) {
case 0:
case 1:
case 2:
case 4:
@@ -438,21 +446,20 @@ static unsigned cik_get_macro_tile_index(struct 
radeon_surf *surf)
return index;
 }
 
 static bool get_display_flag(const struct ac_surf_config *config,
 const struct radeon_surf *surf)
 {
unsigned num_channels = config->info.num_channels;
unsigned bpe = surf->bpe;
 
if (surf->flags & RADEON_SURF_SCANOUT &&
-   !(surf->flags & RADEON_SURF_FMASK) &&
config->info.samples <= 1 &&
surf->blk_w <= 2 && surf->blk_h == 1) {
/* subsampled */
if (surf->blk_w == 2 && surf->blk_h == 1)
return true;
 
if  (/* RGBA8 or RGBA16F */
 (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
 /* R5G6B5 or R5G5B5A1 */
 (bpe == 2 && num_channels >= 3) ||
@@ -549,23 +556,22 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
AddrSurfInfoOut.pTileInfo = 
 
compressed = surf->blk_w == 4 && surf->blk_h == 4;
 
-   /* MSAA and FMASK require 2D tiling. */
-   if (config->info.samples > 1 ||
-   (surf->flags & RADEON_SURF_FMASK))
+   /* MSAA requires 2D tiling. */
+   if (config->info.samples > 1)
mode = RADEON_SURF_MODE_2D;
 
/* DB doesn't support linear layouts. */
if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
mode < RADEON_SURF_MODE_1D)
mode = RADEON_SURF_MODE_1D;
 
/* Set the requested tiling mode. */
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
@@ -600,29 +606,28 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
}
 
AddrDccIn.numSamples = AddrSurfInfoIn.numSamples =
config->info.samples ? config->info.samples : 1;
AddrSurfInfoIn.tileIndex = -1;
 
/* Set the micro tile type. */
if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
-   else if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK))
+   else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
else
AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
 
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
AddrSurfInfoIn.flags.cube = config->is_cube;
-   AddrSurfInfoIn.flags.fmask = (surf->flags & RADEON_SURF_FMASK) != 0;
AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;

[Mesa-dev] [PATCH 16/16] radeonsi: simplify si_get_sample_position

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_msaa.c | 49 
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c 
b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 2ad093ad485..7ee17a9f292 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -24,22 +24,31 @@
 
 #include "si_build_pm4.h"
 
 /* For MSAA sample positions. */
 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
unsigned)(s0x) & 0xf) << 0)  | (((unsigned)(s0y) & 0xf) << 4)  | \
 (((unsigned)(s1x) & 0xf) << 8)  | (((unsigned)(s1y) & 0xf) << 12) | \
 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
 
-/* 2xMSAA
- * There are two locations (4, 4), (-4, -4). */
+/* For obtaining location coordinates from registers */
+#define SEXT4(x)   ((int)((x) | ((x) & 0x8 ? 0xfff0 : 0)))
+#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index) * 4)) & 0xf)
+#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 
2)
+#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 
2 + 1)
+
+/* 1x MSAA */
+static const uint32_t sample_locs_1x =
+   FILL_SREG( 0, 0,   0, 0,   0, 0,   0, 0); /* S1, S2, S3 fields are not 
used by 1x */
+
+/* 2x MSAA */
 static const uint32_t sample_locs_2x =
FILL_SREG(4, 4, -4, -4, 0, 0, 0, 0); /* S2 & S3 fields are not used by 
2x MSAA */
 
 /* 4xMSAA
  * There are 4 locations: (-2, -6), (6, -2), (-6, 2), (2, 6). */
 static const uint32_t sample_locs_4x =
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6);
 
 /* Cayman 8xMSAA */
 static const uint32_t sample_locs_8x[] = {
@@ -50,61 +59,43 @@ static const uint32_t sample_locs_8x[] = {
 static const uint32_t sample_locs_16x[] = {
FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
 };
 
 static void si_get_sample_position(struct pipe_context *ctx, unsigned 
sample_count,
   unsigned sample_index, float *out_value)
 {
-   int offset, index;
-   struct {
-   int idx:4;
-   } val;
+   const uint32_t *sample_locs;
 
switch (sample_count) {
case 1:
default:
-   out_value[0] = out_value[1] = 0.5;
+   sample_locs = _locs_1x;
break;
case 2:
-   offset = 4 * (sample_index * 2);
-   val.idx = (sample_locs_2x >> offset) & 0xf;
-   out_value[0] = (float)(val.idx + 8) / 16.0f;
-   val.idx = (sample_locs_2x >> (offset + 4)) & 0xf;
-   out_value[1] = (float)(val.idx + 8) / 16.0f;
+   sample_locs = _locs_2x;
break;
case 4:
-   offset = 4 * (sample_index * 2);
-   val.idx = (sample_locs_4x >> offset) & 0xf;
-   out_value[0] = (float)(val.idx + 8) / 16.0f;
-   val.idx = (sample_locs_4x >> (offset + 4)) & 0xf;
-   out_value[1] = (float)(val.idx + 8) / 16.0f;
+   sample_locs = _locs_4x;
break;
case 8:
-   offset = 4 * (sample_index % 4 * 2);
-   index = sample_index / 4;
-   val.idx = (sample_locs_8x[index] >> offset) & 0xf;
-   out_value[0] = (float)(val.idx + 8) / 16.0f;
-   val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf;
-   out_value[1] = (float)(val.idx + 8) / 16.0f;
+   sample_locs = sample_locs_8x;
break;
case 16:
-   offset = 4 * (sample_index % 4 * 2);
-   index = sample_index / 4;
-   val.idx = (sample_locs_16x[index] >> offset) & 0xf;
-   out_value[0] = (float)(val.idx + 8) / 16.0f;
-   val.idx = (sample_locs_16x[index] >> (offset + 4)) & 0xf;
-   out_value[1] = (float)(val.idx + 8) / 16.0f;
+   sample_locs = sample_locs_16x;
break;
}
+
+   out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
+   out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
 }
 
 void si_emit_sample_locations(struct radeon_winsys_cs *cs, int nr_samples)
 {
switch (nr_samples) {
default:
case 1:
radeon_set_context_reg(cs, 
R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
radeon_set_context_reg(cs, 
R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
radeon_set_context_reg(cs, 
R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 04/16] ac: remove 1 RB raster config for Iceland

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

Iceland always reports 2 RBs.
---
 src/amd/common/ac_gpu_info.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 273ccb6cb1d..3ba7b4b717a 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -558,21 +558,20 @@ ac_get_gs_table_depth(enum chip_class chip_class, enum 
radeon_family family)
default:
unreachable("Unknown GPU");
}
 }
 
 void
 ac_get_raster_config(struct radeon_info *info,
 uint32_t *raster_config_p,
 uint32_t *raster_config_1_p)
 {
-   unsigned num_rb = MIN2(info->num_render_backends, 16);
unsigned raster_config, raster_config_1;
switch (info->family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
raster_config = 0x2a00126a;
raster_config_1 = 0x;
break;
case CHIP_VERDE:
raster_config = 0x124a;
raster_config_1 = 0x;
@@ -608,24 +607,21 @@ ac_get_raster_config(struct radeon_info *info,
break;
case CHIP_VEGAM:
raster_config = 0x3a00161a;
raster_config_1 = 0x002e;
break;
case CHIP_TONGA:
raster_config = 0x1612;
raster_config_1 = 0x002a;
break;
case CHIP_ICELAND:
-   if (num_rb == 1)
-   raster_config = 0x;
-   else
-   raster_config = 0x0002;
+   raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_CARRIZO:
raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_KAVERI:
raster_config = 0x0002;
raster_config_1 = 0x;
break;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/16] ac: enable both RBs on Kaveri

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

This can result in 2x increase in performance on non-harvested Kaveris.
---
 src/amd/common/ac_gpu_info.c  | 8 ++--
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 031fd183b6f..da54e5f8b4a 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -309,21 +309,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
info->uvd_enc_supported =
uvd_enc.available_rings ? true : false;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
info->has_syncobj_wait_for_submit = info->has_syncobj && 
info->drm_minor >= 20;
info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
info->has_ctx_priority = info->drm_minor >= 22;
/* TODO: Enable this once the kernel handles it efficiently. */
info->has_local_buffers = info->drm_minor >= 20 &&
  !info->has_dedicated_vram;
+
info->num_render_backends = amdinfo->rb_pipes;
+   /* The value returned by the kernel driver was wrong. */
+   if (info->family == CHIP_KAVERI)
+   info->num_render_backends = 2;
+
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
if (!info->clock_crystal_freq) {
fprintf(stderr, "amdgpu: clock crystal frequency is 0, 
timestamps will be wrong\n");
info->clock_crystal_freq = 1;
}
info->tcc_cache_line_size = 64; /* TC L2 line size on GCN */
info->gb_addr_config = amdinfo->gb_addr_cfg;
if (info->chip_class == GFX9) {
info->num_tile_pipes = 1 << 
G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
info->pipe_interleave_bytes =
@@ -620,22 +625,21 @@ ac_get_raster_config(struct radeon_info *info,
raster_config = 0x;
else
raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_CARRIZO:
raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_KAVERI:
-   /* KV should be 0x0002, but that causes problems with 
radeon */
-   raster_config = 0x; /* 0x0002 */
+   raster_config = 0x0002;
raster_config_1 = 0x;
break;
case CHIP_KABINI:
case CHIP_MULLINS:
case CHIP_STONEY:
raster_config = 0x;
raster_config_1 = 0x;
break;
default:
fprintf(stderr,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 3ee243adbcc..28811c959fe 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -386,20 +386,24 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
 return false;
 }
 else if (ws->gen >= DRV_R600) {
 uint32_t tiling_config = 0;
 
 if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
   "num backends",
   >info.num_render_backends))
 return false;
 
+   /* The value returned by the kernel driver was wrong. */
+   if (ws->info.family == CHIP_KAVERI)
+   ws->info.num_render_backends = 2;
+
 /* get the GPU counter frequency, failure is not fatal */
 radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
  >info.clock_crystal_freq);
 
 radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
  _config);
 
 ws->info.r600_num_banks =
 ws->info.chip_class >= EVERGREEN ?
 4 << ((tiling_config & 0xf0) >> 4) :
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/16] radeonsi: work around a GPU hang due to broken indirect indexing in LLVM

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

Fixes: 6d19120da85 "radeonsi/gfx9: workaround for INTERP with indirect indexing"
Cc: 18.1 
---
 src/gallium/drivers/radeonsi/si_get.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index d4e0eab187d..c31ab43cb42 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -487,20 +487,29 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen,
 
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
/* TODO: Indirect indexing of GS inputs is unimplemented. */
if (shader == PIPE_SHADER_GEOMETRY)
return 0;
 
if (shader == PIPE_SHADER_VERTEX &&
!sscreen->llvm_has_working_vgpr_indexing)
return 0;
 
+   /* Doing indirect indexing on GFX9 with LLVM 6.0 hangs.
+* This means we don't support INTERP instructions with
+* indirect indexing on inputs.
+*/
+   if (shader == PIPE_SHADER_FRAGMENT &&
+   !sscreen->llvm_has_working_vgpr_indexing &&
+   HAVE_LLVM < 0x0700)
+   return 0;
+
/* TCS and TES load inputs directly from LDS or offchip
 * memory, so indirect indexing is always supported.
 * PS has to support indirect indexing, because we can't
 * lower that to TEMPs for INTERP instructions.
 */
return 1;
 
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return sscreen->llvm_has_working_vgpr_indexing ||
   /* TCS stores outputs directly to memory. */
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/16] ac: move the Fiji kernel workaround for raster config out of the switch

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

---
 src/amd/common/ac_gpu_info.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index da54e5f8b4a..273ccb6cb1d 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -587,28 +587,22 @@ ac_get_raster_config(struct radeon_info *info,
break;
case CHIP_BONAIRE:
raster_config = 0x1612;
raster_config_1 = 0x;
break;
case CHIP_HAWAII:
raster_config = 0x3a00161a;
raster_config_1 = 0x002e;
break;
case CHIP_FIJI:
-   if (info->cik_macrotile_mode_array[0] == 0x00e8) {
-   /* old kernels with old tiling config */
-   raster_config = 0x1612;
-   raster_config_1 = 0x002a;
-   } else {
-   raster_config = 0x3a00161a;
-   raster_config_1 = 0x002e;
-   }
+   raster_config = 0x3a00161a;
+   raster_config_1 = 0x002e;
break;
case CHIP_POLARIS10:
raster_config = 0x1612;
raster_config_1 = 0x002a;
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
raster_config = 0x1612;
raster_config_1 = 0x;
break;
@@ -641,20 +635,30 @@ ac_get_raster_config(struct radeon_info *info,
raster_config = 0x;
raster_config_1 = 0x;
break;
default:
fprintf(stderr,
"ac: Unknown GPU, using 0 for raster_config\n");
raster_config = 0x;
raster_config_1 = 0x;
break;
}
+
+   /* Fiji: Old kernels have incorrect tiling config. This decreases
+* RB performance by 25%. (it disables 1 RB in the second packer)
+*/
+   if (info->family == CHIP_FIJI &&
+   info->cik_macrotile_mode_array[0] == 0x00e8) {
+   raster_config = 0x1612;
+   raster_config_1 = 0x002a;
+   }
+
*raster_config_p = raster_config;
*raster_config_1_p = raster_config_1;
 }
 
 void
 ac_get_harvested_configs(struct radeon_info *info,
 unsigned raster_config,
 unsigned *cik_raster_config_1_p,
 unsigned *raster_config_se)
 {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/16] ac/surface/gfx9: fix a typo in CMASK RB/pipe alignment

2018-05-01 Thread Marek Olšák

From: Marek Olšák 

No change in behavior because it's always aligned.
---
 src/amd/common/ac_surface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index f14fa1c8b5e..b2af1f70b69 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -1147,21 +1147,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
}
 
/* CMASK */
if (in->swizzleMode != ADDR_SW_LINEAR) {
ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
 
cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
 
-   if (in->numSamples) {
+   if (in->numSamples > 1) {
/* FMASK is always aligned. */
cin.cMaskFlags.pipeAligned = 1;
cin.cMaskFlags.rbAligned = 1;
} else {
cin.cMaskFlags.pipeAligned = 
!in->flags.metaPipeUnaligned;
cin.cMaskFlags.rbAligned = 
!in->flags.metaRbUnaligned;
}
cin.colorFlags = in->flags;
cin.resourceType = in->resourceType;
cin.unalignedWidth = in->width;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 00/16] RadeonSI: A bunch of random changes

2018-05-01 Thread Marek Olšák

Hi,

These are pretty random. Please review.

Thanks,
Marek

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 71363] line rendering with --with-osmesa-bits=32

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=71363

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 63472] OSMesa Gallium Segfault in VTK Test

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=63472

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 18730] Memory corruption with OSMesaDestroyContext

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=18730

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |WORKSFORME
 Status|NEW |RESOLVED

--- Comment #2 from Timothy Arceri  ---
Closing as works for me as it worked for Brian in 2008.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 11161] OSMesaMakeCurrent not properly changing contexts

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=11161

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 97145] include/GL/*.h not installed when GLX is disabled (eg for OSMesa)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=97145

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 95035] Gallium OSMesa driver is far from being thread-safe

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=95035

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 94491] osmesa gallium build with scons on windows is missing a lot of GL* symbols

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=94491

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 94489] osmesa gallium build with scons on linux contains no OSMesa* and GL* symbols

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=94489

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 29613] OSMesa and GL cannot be linked together

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=29613

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/OSMesa

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 92467] Program for dumping images crashes at OSMesa library giving floating exception in Linux(OpenSuse 13.2 and Centos 6.6)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=92467

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/DRI/swrast

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 83785] Shader branches excluded by uniform values are not optimized out

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=83785

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/Gallium/llvmpipe

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 100227] gl_marshal.py: generating duplicate declaration specifiers

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=100227

Timothy Arceri  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #5 from Timothy Arceri  ---
Fixed by:

commit 31c3c440b5361299fc0529bcf049e9b271d4fab9
Author: Chad Versace 
Date:   Thu Jun 22 15:12:29 2017 -0700

glapi: Fix -Wduplicate-decl-specifier due to double-const

Fix all lines in src/mesa/main/marshal_generated.c that declare
double-const variables. Below is all such lines, with duplicates
removed:

   $ grep 'const const' marshal_generated.c | sort -u
   const const GLboolean * pointer = cmd->pointer;
   const const GLvoid * indices = cmd->indices;
   const const GLvoid * pointer = cmd->pointer;

Reviewed-by: Kenneth Graunke 

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/2] i965/extensions: Enable ASTC HDR on CannonLake

2018-05-01 Thread Nanley Chery

---
 src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 73a6c73f537..c451f6cf749 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -297,7 +297,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_post_depth_coverage = true;
}
 
-   if (gen_device_info_is_9lp(devinfo))
+   if (devinfo->gen == 10 || gen_device_info_is_9lp(devinfo))
   ctx->Extensions.KHR_texture_compression_astc_hdr = true;
 
if (devinfo->gen >= 6)
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/2] i965/miptree: Set the aux_usage to NONE when emulating ETC

2018-05-01 Thread Nanley Chery

Found when prototyping an alternative compressed texture upload
algorithm. I asserted that the aux_usage was NONE and was initially
surprised to find that it was triggered.

This change shouldn't affect driver behavior now, but it should prevent
unexpected behavior if we make aux-related changes or assertions later.

v2: Fix commit title (Kenneth Graunke)
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b9a564552df..1b46b41a64c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -209,11 +209,11 @@ intel_miptree_supports_ccs(struct brw_context *brw,
if (devinfo->gen < 8 && (mip_mapped || arrayed))
   return false;
 
-   /* There's no point in using an MCS buffer if the surface isn't in a
-* renderable format.
-*/
-   if (!brw->mesa_format_supports_render[mt->format])
+   /* There's no need for an MCS buffer if the surface isn't renderable. */
+   if (!brw->mesa_format_supports_render[mt->format] ||
+   (mt->etc_format != MESA_FORMAT_NONE)) {
   return false;
+   }
 
return true;
 }
-- 
2.16.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] opencl: autotools: Fix linking order for OpenCL target

2018-05-01 Thread Dieter Nützel


Tested-by: Dieter Nützel 

Dieter

Am 01.05.2018 14:14, schrieb Kai Wasserbäch:

Otherwise the build fails with an undefined reference to
clang::FrontendTimesIsEnabled.

Bugzilla: https://bugs.freedesktop.org/106209
Cc: mesa-sta...@lists.freedesktop.org
Cc: Jan Vesely 
Signed-off-by: Kai Wasserbäch 
---

Hey,
this patch fixes a FTBFS for me with recent LLVM/Clang 7 revisions from
upstream's SVN (I use the packages from apt.llvm.org).

If you accept it, please commit it for me, I do not have commit access.

The CC to stable can be dropped, if stable branches are not to be
expected to be buildable with LLVM/Clang from SVN.

Thank you in advance for considering this patch.

Cheers,
Kai


 src/gallium/targets/opencl/Makefile.am | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/targets/opencl/Makefile.am
b/src/gallium/targets/opencl/Makefile.am
index de68a93ad5..f0e1de7797 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -23,11 +23,10 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
$(LIBELF_LIBS) \
$(DLOPEN_LIBS) \
-lclangCodeGen \
-   -lclangFrontendTool \
-lclangFrontend \
+   -lclangFrontendTool \
-lclangDriver \
-lclangSerialization \
-   -lclangCodeGen \
-lclangParse \
-lclangSema \
-lclangAnalysis \

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/5] intel: decoder: identify groups with fixed length

2018-05-01 Thread Lionel Landwerlin

 &  elements always have fixed length. The
get_length() method implies that we're dealing with an instruction in
which the length is encoded into the variable data but the field
iterator uses it without checking what kind of gen_group it is dealing
with.

Let's make get_length() report the correct length regardless of the
gen_group (register, struct or instruction).

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_decoder.c | 18 --
 src/intel/common/gen_decoder.h |  1 +
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index 7c462a0be4e..93fa4864ee3 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -151,7 +151,8 @@ static struct gen_group *
 create_group(struct parser_context *ctx,
  const char *name,
  const char **atts,
- struct gen_group *parent)
+ struct gen_group *parent,
+ bool fixed_length)
 {
struct gen_group *group;
 
@@ -161,6 +162,7 @@ create_group(struct parser_context *ctx,
 
group->spec = ctx->spec;
group->variable = false;
+   group->fixed_length = fixed_length;
 
for (int i = 0; atts[i]; i += 2) {
   char *p;
@@ -370,18 +372,19 @@ start_element(void *data, const char *element_name, const 
char **atts)
  minor = 0;
 
   ctx->spec->gen = gen_make_gen(major, minor);
-   } else if (strcmp(element_name, "instruction") == 0 ||
-  strcmp(element_name, "struct") == 0) {
-  ctx->group = create_group(ctx, name, atts, NULL);
+   } else if (strcmp(element_name, "instruction") == 0) {
+  ctx->group = create_group(ctx, name, atts, NULL, false);
+   } else if (strcmp(element_name, "struct") == 0) {
+  ctx->group = create_group(ctx, name, atts, NULL, true);
} else if (strcmp(element_name, "register") == 0) {
-  ctx->group = create_group(ctx, name, atts, NULL);
+  ctx->group = create_group(ctx, name, atts, NULL, true);
   get_register_offset(atts, >group->register_offset);
} else if (strcmp(element_name, "group") == 0) {
   struct gen_group *previous_group = ctx->group;
   while (previous_group->next)
  previous_group = previous_group->next;
 
-  struct gen_group *group = create_group(ctx, "", atts, ctx->group);
+  struct gen_group *group = create_group(ctx, "", atts, ctx->group, false);
   previous_group->next = group;
   ctx->group = group;
} else if (strcmp(element_name, "field") == 0) {
@@ -713,6 +716,9 @@ gen_group_find_field(struct gen_group *group, const char 
*name)
 int
 gen_group_get_length(struct gen_group *group, const uint32_t *p)
 {
+   if (group && group->fixed_length)
+  return group->dw_length;
+
uint32_t h = p[0];
uint32_t type = field_value(h, 29, 31);
 
diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index f28ac7d27af..7d3bedca5b5 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -104,6 +104,7 @@ struct gen_group {
uint32_t group_offset, group_count;
uint32_t group_size;
bool variable;
+   bool fixed_length; /* True for  &  */
 
struct gen_group *parent;
struct gen_group *next;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/5] intel: decoder fixes

2018-05-01 Thread Lionel Landwerlin

Hi all,

While investigating an error state I noticed some strange values. Here
are a few changes and fixes. You can give it a try on the error states
from https://bugs.freedesktop.org/show_bug.cgi?id=106243

Cheers,

Lionel Landwerlin (5):
  intel: decoder: make the field iterator use more natural
  intel: decoder: identify groups with fixed length
  intel: decoder: document when fields should be used
  intel: decoder: fix starting dword of struct fields
  intel: batch-decoder: iterate VERTEX_BUFFER_STATE fields

 src/intel/common/gen_decoder.c  |  46 +++-
 src/intel/common/gen_decoder.h  |   6 +-
 src/intel/tools/gen_batch_decoder.c | 110 +++-
 3 files changed, 91 insertions(+), 71 deletions(-)

--
2.17.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] intel: decoder: document when fields should be used

2018-05-01 Thread Lionel Landwerlin

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_decoder.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
index 7d3bedca5b5..8324ff95696 100644
--- a/src/intel/common/gen_decoder.h
+++ b/src/intel/common/gen_decoder.h
@@ -103,7 +103,7 @@ struct gen_group {
uint32_t dw_length;
uint32_t group_offset, group_count;
uint32_t group_size;
-   bool variable;
+   bool variable; /*  specific */
bool fixed_length; /* True for  &  */
 
struct gen_group *parent;
@@ -112,8 +112,7 @@ struct gen_group {
uint32_t opcode_mask;
uint32_t opcode;
 
-   /* Register specific */
-   uint32_t register_offset;
+   uint32_t register_offset; /*  specific */
 };
 
 struct gen_value {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] intel: decoder: fix starting dword of struct fields

2018-05-01 Thread Lionel Landwerlin

Struct fields might span several dwords, but iter_dword is incremented
up to the last dword of the current field before we print out the
struct's fields. We can't use iter_dword for computing the offset into
the pointer of data to decode.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_decoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index 93fa4864ee3..a0a9634c5d9 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -1064,7 +1064,7 @@ gen_print_group(FILE *outfile, struct gen_group *group, 
uint64_t offset,
  if (iter.struct_desc) {
 uint64_t struct_offset = offset + 4 * iter_dword;
 gen_print_group(outfile, iter.struct_desc, struct_offset,
-[iter_dword], iter.start_bit % 32, color);
+[iter.start_bit / 32], iter.start_bit % 32, 
color);
  }
   }
}
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/5] intel: batch-decoder: iterate VERTEX_BUFFER_STATE fields

2018-05-01 Thread Lionel Landwerlin

The gen_field_iterator only iterates the fields of a given gen_group.
If we want to iterate the fields of another gen_group contained as
field, we need to do it manually.

Signed-off-by: Lionel Landwerlin 
---
 src/intel/tools/gen_batch_decoder.c | 70 -
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/src/intel/tools/gen_batch_decoder.c 
b/src/intel/tools/gen_batch_decoder.c
index e8d0e11682f..a0d6dbd3e58 100644
--- a/src/intel/tools/gen_batch_decoder.c
+++ b/src/intel/tools/gen_batch_decoder.c
@@ -322,6 +322,7 @@ handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx 
*ctx,
   const uint32_t *p)
 {
struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *vbs = gen_spec_find_struct(ctx->spec, 
"VERTEX_BUFFER_STATE");
 
struct gen_batch_decode_bo vb = {};
uint32_t vb_size = 0;
@@ -332,43 +333,50 @@ handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx 
*ctx,
struct gen_field_iterator iter;
gen_field_iterator_init(, inst, p, 0, false);
while (gen_field_iterator_next()) {
-  if (strcmp(iter.name, "Vertex Buffer Index") == 0) {
- index = iter.raw_value;
-  } else if (strcmp(iter.name, "Buffer Pitch") == 0) {
- pitch = iter.raw_value;
-  } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
- vb = ctx_get_bo(ctx, iter.raw_value);
-  } else if (strcmp(iter.name, "Buffer Size") == 0) {
- vb_size = iter.raw_value;
- ready = true;
-  } else if (strcmp(iter.name, "End Address") == 0) {
- if (vb.map && iter.raw_value >= vb.addr)
-vb_size = iter.raw_value - vb.addr;
- else
-vb_size = 0;
- ready = true;
-  }
-
-  if (!ready)
+  if (iter.struct_desc != vbs)
  continue;
 
-  fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
+  struct gen_field_iterator vbs_iter;
+  gen_field_iterator_init(_iter, vbs, [iter.start_bit / 32], 0, 
false);
+  while (gen_field_iterator_next(_iter)) {
+ if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
+index = vbs_iter.raw_value;
+ } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
+pitch = vbs_iter.raw_value;
+ } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
+vb = ctx_get_bo(ctx, vbs_iter.raw_value);
+ } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
+vb_size = vbs_iter.raw_value;
+ready = true;
+ } else if (strcmp(vbs_iter.name, "End Address") == 0) {
+if (vb.map && vbs_iter.raw_value >= vb.addr)
+   vb_size = vbs_iter.raw_value - vb.addr;
+else
+   vb_size = 0;
+ready = true;
+ }
 
-  if (vb.map == NULL) {
- fprintf(ctx->fp, "  buffer contents unavailable\n");
- continue;
-  }
+ if (!ready)
+continue;
 
-  if (vb.map == 0 || vb_size == 0)
- continue;
+ fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
+
+ if (vb.map == NULL) {
+fprintf(ctx->fp, "  buffer contents unavailable\n");
+continue;
+ }
+
+ if (vb.map == 0 || vb_size == 0)
+continue;
 
-  ctx_print_buffer(ctx, vb, vb_size, pitch);
+ ctx_print_buffer(ctx, vb, vb_size, pitch);
 
-  vb.map = NULL;
-  vb_size = 0;
-  index = -1;
-  pitch = -1;
-  ready = false;
+ vb.map = NULL;
+ vb_size = 0;
+ index = -1;
+ pitch = -1;
+ ready = false;
+  }
}
 }
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/5] intel: decoder: make the field iterator use more natural

2018-05-01 Thread Lionel Landwerlin

while (iter_next()) { ... }

instead of

do { ... } while (iter_next());

Signed-off-by: Lionel Landwerlin 
---
 src/intel/common/gen_decoder.c  | 26 +++
 src/intel/tools/gen_batch_decoder.c | 40 ++---
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index 1b8123bf394..7c462a0be4e 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -981,25 +981,31 @@ gen_field_iterator_init(struct gen_field_iterator *iter,
memset(iter, 0, sizeof(*iter));
 
iter->group = group;
-   if (group->fields)
-  iter->field = group->fields;
-   else
-  iter->field = group->next->fields;
iter->p = p;
iter->p_bit = p_bit;
 
int length = gen_group_get_length(iter->group, iter->p);
iter->p_end = length > 0 ? [length] : NULL;
iter->print_colors = print_colors;
-
-   bool result = iter_decode_field(iter);
-   if (length >= 0)
-  assert(result);
 }
 
 bool
 gen_field_iterator_next(struct gen_field_iterator *iter)
 {
+   /* Initial condition */
+   if (!iter->field) {
+  if (iter->group->fields)
+ iter->field = iter->group->fields;
+  else
+ iter->field = iter->group->next->fields;
+
+  bool result = iter_decode_field(iter);
+  if (iter->p_end)
+ assert(result);
+
+  return true;
+   }
+
if (!iter_advance_field(iter))
   return false;
 
@@ -1040,7 +1046,7 @@ gen_print_group(FILE *outfile, struct gen_group *group, 
uint64_t offset,
int last_dword = -1;
 
gen_field_iterator_init(, group, p, p_bit, color);
-   do {
+   while (gen_field_iterator_next()) {
   int iter_dword = iter.end_bit / 32;
   if (last_dword != iter_dword) {
  for (int i = last_dword + 1; i <= iter_dword; i++)
@@ -1055,5 +1061,5 @@ gen_print_group(FILE *outfile, struct gen_group *group, 
uint64_t offset,
 [iter_dword], iter.start_bit % 32, color);
  }
   }
-   } while (gen_field_iterator_next());
+   }
 }
diff --git a/src/intel/tools/gen_batch_decoder.c 
b/src/intel/tools/gen_batch_decoder.c
index c6b908758b2..e8d0e11682f 100644
--- a/src/intel/tools/gen_batch_decoder.c
+++ b/src/intel/tools/gen_batch_decoder.c
@@ -175,7 +175,7 @@ handle_state_base_address(struct gen_batch_decode_ctx *ctx, 
const uint32_t *p)
struct gen_field_iterator iter;
gen_field_iterator_init(, inst, p, 0, false);
 
-   do {
+   while (gen_field_iterator_next()) {
   if (strcmp(iter.name, "Surface State Base Address") == 0) {
  ctx->surface_base = ctx_get_bo(ctx, iter.raw_value);
   } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
@@ -183,7 +183,7 @@ handle_state_base_address(struct gen_batch_decode_ctx *ctx, 
const uint32_t *p)
   } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
  ctx->instruction_base = ctx_get_bo(ctx, iter.raw_value);
   }
-   } while (gen_field_iterator_next());
+   }
 }
 
 static void
@@ -272,14 +272,14 @@ handle_media_interface_descriptor_load(struct 
gen_batch_decode_ctx *ctx,
gen_field_iterator_init(, inst, p, 0, false);
uint32_t descriptor_offset = 0;
int descriptor_count = 0;
-   do {
+   while (gen_field_iterator_next()) {
   if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
  descriptor_offset = strtol(iter.value, NULL, 16);
   } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
  descriptor_count =
 strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
   }
-   } while (gen_field_iterator_next());
+   }
 
uint64_t desc_addr = ctx->dynamic_base.addr + descriptor_offset;
const uint32_t *desc_map = ctx->dynamic_base.map + descriptor_offset;
@@ -292,7 +292,7 @@ handle_media_interface_descriptor_load(struct 
gen_batch_decode_ctx *ctx,
   uint64_t ksp;
   uint32_t sampler_offset, sampler_count;
   uint32_t binding_table_offset, binding_entry_count;
-  do {
+  while (gen_field_iterator_next()) {
  if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
 ksp = strtoll(iter.value, NULL, 16);
  } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
@@ -304,7 +304,7 @@ handle_media_interface_descriptor_load(struct 
gen_batch_decode_ctx *ctx,
  } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
 binding_entry_count = strtol(iter.value, NULL, 10);
  }
-  } while (gen_field_iterator_next());
+  }
 
   ctx_disassemble_program(ctx, ksp, "compute shader");
   printf("\n");
@@ -331,7 +331,7 @@ handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx 
*ctx,
 
struct gen_field_iterator iter;
gen_field_iterator_init(, inst, p, 0, false);
-   do {
+   while (gen_field_iterator_next()) {
   if (strcmp(iter.name, "Vertex Buffer Index") == 0) {
  index =

[Mesa-dev] [Bug 104626] broadcom/vc5: double compare

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=104626

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |Drivers/DRI/v3d
 QA Contact|mesa-dev@lists.freedesktop. |
   |org |

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: add disabled support for SDMA transfer on VI/Polaris

2018-05-01 Thread Dave Airlie

From: Dave Airlie 

This series adds the code to enable SDMA based trasnfer
queues on VI/Polaris, but disabled by default for now.

No CIK or GFX9 support yet.
(CIK needs hw workarounds, GFX9 just needs support added
for it's SDMA packet)

It passes the CTS tests but hangs with the last version
of mpv I tried it with, but I'd like to push it and
add GFX9 support etc in tree while I work out the problems
with real world apps.

RADV_DEBUG=transfer will enable transfer queues.

Thanks to Fredrik Höglund for some compressed texture fixes.
Signed-off-by: Dave Airlie 
---
 src/amd/common/sid.h  |   1 +
 src/amd/vulkan/Makefile.sources   |   1 +
 src/amd/vulkan/radv_cik_sdma.c| 758 ++
 src/amd/vulkan/radv_cmd_buffer.c  |   9 +
 src/amd/vulkan/radv_debug.c   |   2 +
 src/amd/vulkan/radv_debug.h   |   1 +
 src/amd/vulkan/radv_device.c  |  32 ++
 src/amd/vulkan/radv_meta_buffer.c |  42 +-
 src/amd/vulkan/radv_meta_copy.c   |  19 +
 src/amd/vulkan/radv_private.h |  37 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c |  12 +-
 11 files changed, 905 insertions(+), 9 deletions(-)
 create mode 100644 src/amd/vulkan/radv_cik_sdma.c

diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 3588d39d62b..dd6d87f73f9 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9134,6 +9134,7 @@
 #defineCIK_SDMA_PACKET_TRAP0x6
 #defineCIK_SDMA_PACKET_SEMAPHORE   0x7
 #defineCIK_SDMA_PACKET_CONSTANT_FILL   0xb
+#defineSDMA_CONSTANT_FILL_DWORDS  (2 << 14) /* use in 
the n field */
 #defineCIK_SDMA_PACKET_SRBM_WRITE  0xe
 #defineCIK_SDMA_COPY_MAX_SIZE  0x3fffe0
 
diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources
index ccb956a2396..00434ac6e60 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -31,6 +31,7 @@ RADV_WS_AMDGPU_FILES := \
winsys/amdgpu/radv_amdgpu_winsys_public.h
 
 VULKAN_FILES := \
+   radv_cik_sdma.c \
radv_cmd_buffer.c \
radv_cs.h \
radv_debug.c \
diff --git a/src/amd/vulkan/radv_cik_sdma.c b/src/amd/vulkan/radv_cik_sdma.c
new file mode 100644
index 000..a83114fcd3c
--- /dev/null
+++ b/src/amd/vulkan/radv_cik_sdma.c
@@ -0,0 +1,758 @@
+/*
+ * Copyright © 2016 Red Hat.
+ *
+ * based on cik_sdma.c:
+ * Copyright 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "radv_private.h"
+#include "sid.h"
+#include "vk_format.h"
+#include "radv_cs.h"
+
+static VkFormat get_format_from_aspect_mask(VkImageAspectFlags aspectMask,
+   VkFormat format)
+{
+   if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+   format = vk_format_depth_only(format);
+   else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+   format = vk_format_stencil_only(format);
+   return format;
+}
+
+static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned 
blk_w)
+{
+   width = radv_minify(width, level);
+   return DIV_ROUND_UP(width, blk_w);
+}
+
+static const struct legacy_surf_level *get_base_level_info(const struct 
radv_image *img,
+  VkImageAspectFlags 
aspectMask, int base_mip_level)
+{
+   if (aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+   return >surface.u.legacy.stencil_level[base_mip_level];
+   return >surface.u.legacy.level[base_mip_level];
+}
+
+static void get_image_info(struct radv_cmd_buffer *cmd_buffer,
+  const struct radv_image *img,
+  const VkImageSubresourceLayers

[Mesa-dev] [PATCH 1/4] i965/drm: Reorganize code for the next patch

2018-05-01 Thread James Xiong

From: "Xiong, James" 

split bo_alloc_internal, and add a new function cached_bo_for_size
searches for a suitable cached buffer for a given size.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 92 +-
 1 file changed, 45 insertions(+), 47 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 7cb1f03..e68da26 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -263,53 +263,29 @@ brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
}
 }
 
+/* search for a suitable cached bo for reuse */
 static struct brw_bo *
-bo_alloc_internal(struct brw_bufmgr *bufmgr,
-  const char *name,
-  uint64_t size,
-  unsigned flags,
-  uint32_t tiling_mode,
-  uint32_t stride)
+cached_bo_for_size(struct brw_bufmgr *bufmgr,
+   uint64_t size,
+   uint32_t tiling_mode,
+   uint32_t stride,
+   unsigned flags)
 {
-   struct brw_bo *bo;
-   unsigned int page_size = getpagesize();
-   int ret;
-   struct bo_cache_bucket *bucket;
-   bool alloc_from_cache;
-   uint64_t bo_size;
-   bool busy = false;
-   bool zeroed = false;
-
-   if (flags & BO_ALLOC_BUSY)
-  busy = true;
-
-   if (flags & BO_ALLOC_ZEROED)
-  zeroed = true;
+   bool busy = (flags & BO_ALLOC_BUSY) ? true : false;
+   bool zeroed = (flags & BO_ALLOC_ZEROED) ? true : false;
+   struct bo_cache_bucket *bucket =
+  (bufmgr->bo_reuse) ? bucket_for_size(bufmgr, size) : NULL;
 
/* BUSY does doesn't really jive with ZEROED as we have to wait for it to
 * be idle before we can memset.  Just disallow that combination.
 */
assert(!(busy && zeroed));
 
-   /* Round the allocated size up to a power of two number of pages. */
-   bucket = bucket_for_size(bufmgr, size);
-
-   /* If we don't have caching at this size, don't actually round the
-* allocation up.
-*/
-   if (bucket == NULL) {
-  bo_size = size;
-  if (bo_size < page_size)
- bo_size = page_size;
-   } else {
-  bo_size = bucket->size;
-   }
-
-   mtx_lock(>lock);
-   /* Get a buffer out of the cache if available */
+   if(bucket != NULL && !list_empty(>head)) {
+  struct brw_bo *bo;
 retry:
-   alloc_from_cache = false;
-   if (bucket != NULL && !list_empty(>head)) {
+  bo = NULL;
+
   if (busy && !zeroed) {
  /* Allocate new render-target BOs from the tail (MRU)
   * of the list, as it will likely be hot in the GPU
@@ -319,7 +295,6 @@ retry:
   */
  bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
  list_del(>head);
- alloc_from_cache = true;
   } else {
  /* For non-render-target BOs (where we're probably
   * going to map it first thing in order to fill it
@@ -330,16 +305,15 @@ retry:
   */
  bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
  if (!brw_bo_busy(bo)) {
-alloc_from_cache = true;
 list_del(>head);
  }
   }
 
-  if (alloc_from_cache) {
+  if (bo) {
  if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
 bo_free(bo);
 brw_bo_cache_purge_bucket(bufmgr, bucket);
-goto retry;
+return NULL;
  }
 
  if (bo_set_tiling_internal(bo, tiling_mode, stride)) {
@@ -353,20 +327,44 @@ retry:
bo_free(bo);
goto retry;
 }
-memset(map, 0, bo_size);
+memset(map, 0, bo->size);
  }
   }
+
+  return bo;
}
 
-   if (!alloc_from_cache) {
+   return NULL;
+}
+
+static struct brw_bo *
+bo_alloc_internal(struct brw_bufmgr *bufmgr,
+  const char *name,
+  uint64_t size,
+  unsigned flags,
+  uint32_t tiling_mode,
+  uint32_t stride)
+{
+   struct brw_bo *bo;
+   int ret;
+
+   /* align the request size to page size */
+   size = ALIGN(size, getpagesize());
+
+   mtx_lock(>lock);
+
+   /* Get a buffer out of the cache if available */
+   bo = cached_bo_for_size(bufmgr, size, tiling_mode, stride, flags);
+
+   if (bo == NULL) {
   bo = calloc(1, sizeof(*bo));
   if (!bo)
  goto err;
 
-  bo->size = bo_size;
+  bo->size = size;
   bo->idle = true;
 
-  struct drm_i915_gem_create create = { .size = bo_size };
+  struct drm_i915_gem_create create = { .size = size };
 
   /* All new BOs we get from the kernel are zeroed, so we don't need to
* worry about that here.
@@ -413,7 +411,7 @@ retry:
mtx_unlock(>lock);
 
DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
-   (unsigned long long) size);
+   (unsigned long long) bo->size);
 
return bo;
 
-- 
2.7.4

___
mesa-dev mailing list

[Mesa-dev] [PATCH 3/4] i965/drm: Searching for a cached buffer for reuse

2018-05-01 Thread James Xiong

From: "Xiong, James" 

Now that a bucket contains cached buffers with different sizes, go
through its list and search for a cached buffer with enough size.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 21 +++--
 src/util/list.h|  5 +
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 6a9b005..5235aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -281,7 +281,7 @@ cached_bo_for_size(struct brw_bufmgr *bufmgr,
assert(!(busy && zeroed));
 
if(bucket != NULL && !list_empty(>head)) {
-  struct brw_bo *bo;
+  struct brw_bo *bo, *temp_bo;
 retry:
   bo = NULL;
 
@@ -292,8 +292,13 @@ retry:
   * asked us to zero the buffer, we don't want this
   * because we are going to mmap it.
   */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.prev, head);
- list_del(>head);
+ LIST_FOR_EACH_ENTRY_REV(temp_bo, >head, head) {
+if (temp_bo->size >= size) {
+   bo = temp_bo;
+   list_del(>head);
+   break;
+}
+ }
   } else {
  /* For non-render-target BOs (where we're probably
   * going to map it first thing in order to fill it
@@ -302,9 +307,13 @@ retry:
   * allocating a new buffer is probably faster than
   * waiting for the GPU to finish.
   */
- bo = LIST_ENTRY(struct brw_bo, bucket->head.next, head);
- if (!brw_bo_busy(bo)) {
-list_del(>head);
+ LIST_FOR_EACH_ENTRY(temp_bo, >head, head) {
+if (temp_bo->size >= size &&
+!brw_bo_busy(temp_bo)) {
+   bo = temp_bo;
+   list_del(>head);
+   break;
+}
  }
   }
 
diff --git a/src/util/list.h b/src/util/list.h
index 6edb750..9362072 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -189,6 +189,11 @@ static inline void list_validate(struct list_head *list)
>member != (head); \
pos = container_of(pos->member.next, pos, member))
 
+#define LIST_FOR_EACH_ENTRY_REV(pos, head, member)  \
+   for (pos = NULL, pos = container_of((head)->prev, pos, member);  \
+>member != (head); \
+pos = container_of(pos->member.prev, pos, member))
+
 #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member)   \
for (pos = NULL, pos = container_of((head)->next, pos, member), \
storage = container_of(pos->member.next, pos, member);  \
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/4] improve buffer cache and reuse

2018-05-01 Thread James Xiong

From: "Xiong, James" 

With the current implementation, brw_bufmgr may round up a request
size to the next bucket size, result in 25% more memory allocated in
the worst senario. For example:
Request sizeActual size
32KB+1Byte  40KB
.
8MB+1Byte   10MB
.
96MB+1Byte  112MB
This series align the buffer size up to page instead of a bucket size
to improve memory allocation efficiency. Performances are almost the
same with Basemark ES3, GfxBench4 and 5:

Basemark ES3
   scorepeak memory allocation
  before  afterdiffbeforeafter  diff
21.537462  21.888784  1.61%419766272  408809472  -10956800
19.566198  19.763429  1.00% 

GfxBench 4.0
scorepeak memory
 before after diff before   after 
diff
gl_4 564.6052246094  565.2348632813  0.11%  578490368 550199296 
-28291072
gl_4_off 727.0440063477  703.5833129883  -3.33% 629501952 598216704 
-31285248
gl_manhattan 1053.4223632813 1057.3690185547 0.37%  449568768 421134336 
-28434432
gl_trex  2708.0656738281 2699.2646484375 -0.33% 130076672 125042688 
-5033984
gl_alu2  1207.1490478516 1212.2220458984 0.42%  55496704  55029760  
-466944
gl_driver2   103.0383071899  103.5478439331  0.49%  13107200  12980224  
-126976
gl_manhattan_off 1703.4780273438 1736.9074707031 1.92%  490016768 456548352 
-33468416
gl_trex_off  2951.6809082031 3058.5422363281 3.49%  157511680 152260608 
-5251072
gl_alu2_off  2604.0903320313 2626.2524414063 0.84%  86130688  85483520  
-647168
gl_driver2_off   204.0173187256  207.0510101318  1.47%  40869888  40615936  
-253952

GfxBench 5.0
score   peak memory 
 before after   before after   diff
gl_5   259   259  1137549312  1038286848 -99262464
gl_5_off   297   297  1170853888  1071357952 -99495936

Xiong, James (4):
  i965/drm: Reorganize code for the next patch
  i965/drm: Round down buffer size and calculate the bucket index
  i965/drm: Searching for a cached buffer for reuse
  i965/drm: Purge the bucket when its cached buffer is evicted

 src/mesa/drivers/dri/i965/brw_bufmgr.c | 139 ++---
 src/util/list.h|   5 ++
 2 files changed, 79 insertions(+), 65 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] i965/drm: Purge the bucket when its cached buffer is evicted

2018-05-01 Thread James Xiong

From: "Xiong, James" 

When one of cached buffers is found to be evicted by kernel,
most likely the buffers freed earlier than this buffer are
gone too, go through the cached list in the bucket and purge.

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 5235aa6..9f2e566 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -248,17 +248,20 @@ brw_bo_madvise(struct brw_bo *bo, int state)
return madv.retained;
 }
 
-/* drop the oldest entries that have been purged by the kernel */
+/* drop the entries that are older than the given time */
 static void
 brw_bo_cache_purge_bucket(struct brw_bufmgr *bufmgr,
-  struct bo_cache_bucket *bucket)
+  struct bo_cache_bucket *bucket,
+  time_t time)
 {
list_for_each_entry_safe(struct brw_bo, bo, >head, head) {
-  if (brw_bo_madvise(bo, I915_MADV_DONTNEED))
+  if (bo->free_time >= time) {
+ brw_bo_madvise(bo, I915_MADV_DONTNEED);
+ list_del(>head);
+ bo_free(bo);
+  } else {
  break;
-
-  list_del(>head);
-  bo_free(bo);
+  }
}
 }
 
@@ -319,8 +322,8 @@ retry:
 
   if (bo) {
  if (!brw_bo_madvise(bo, I915_MADV_WILLNEED)) {
+brw_bo_cache_purge_bucket(bufmgr, bucket, bo->free_time);
 bo_free(bo);
-brw_bo_cache_purge_bucket(bufmgr, bucket);
 return NULL;
  }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] i965/drm: Round down buffer size and calculate the bucket index

2018-05-01 Thread James Xiong

From: "Xiong, James" 

a buffer is now put in cached bucket #n when its size is between
bucket[n].size and bucket[n+1].size - 1

Signed-off-by: Xiong, James 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index e68da26..6a9b005 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -189,8 +189,8 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, 
uint32_t tiling)
 static struct bo_cache_bucket *
 bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
 {
-   /* Calculating the pages and rounding up to the page size. */
-   const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+   /* Calculating the pages and rounding down to the  page size. */
+   const unsigned pages = (size < PAGE_SIZE) ? 1 : size / PAGE_SIZE;
 
/* Row  Bucket sizesclz((x-1) | 3)   RowColumn
 *in pages  stride   size
@@ -211,8 +211,7 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size)
int col_size_log2 = row - 1;
col_size_log2 += (col_size_log2 < 0);
 
-   const unsigned col = (pages - prev_row_max_pages +
-((1 << col_size_log2) - 1)) >> col_size_log2;
+   const unsigned col = (pages - prev_row_max_pages) >> col_size_log2;
 
/* Calculating the index based on the row and column. */
const unsigned index = (row * 4) + (col - 1);
@@ -1285,9 +1284,9 @@ add_bucket(struct brw_bufmgr *bufmgr, int size)
bufmgr->cache_bucket[i].size = size;
bufmgr->num_buckets++;
 
+   assert(bucket_for_size(bufmgr, size - 1) == 
>cache_bucket[i==0?0:i-1]);
assert(bucket_for_size(bufmgr, size) == >cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size - 2048) == >cache_bucket[i]);
-   assert(bucket_for_size(bufmgr, size + 1) != >cache_bucket[i]);
+   assert(bucket_for_size(bufmgr, size + 1) == >cache_bucket[i]);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] freedreno: Fix ir3_cmdline.c build.

2018-05-01 Thread Timothy Arceri


Thanks.

Reviewed-by: Timothy Arceri 

On 02/05/18 06:45, Eric Anholt wrote:

Fixes: 6487e7a30c9e ("nir: move GL specific passes to src/compiler/glsl")
---
  src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 
b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 5631216ebd9e..55809d527adb 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -46,6 +46,7 @@
  
  #include "compiler/glsl/standalone.h"

  #include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/gl_nir.h"
  #include "compiler/nir_types.h"
  #include "compiler/spirv/nir_spirv.h"
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] opencl: autotools: Fix linking order for OpenCL target

2018-05-01 Thread Dylan Baker

Quoting Aaron Watry (2018-05-01 10:59:43)
> Given the discussion that's ongoing, this patch might not land as-is,
> but if it does:
> Tested-By: Aaron Watry 
> 
> Note: The meson build currently works as-is and doesn't require an
> equivalent patch.

Just FYI, meson uses as-needed and link-groups by default.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Marek Olšák

On Tue, May 1, 2018 at 5:53 PM, Roland Scheidegger 
wrote:

> Am 01.05.2018 um 22:49 schrieb Marek Olšák:
> > On Tue, May 1, 2018 at 10:48 AM, Roland Scheidegger  > > wrote:
> >
> > Am 01.05.2018 um 01:43 schrieb Marek Olšák:
> > > From: Marek Olšák  >>
> > >
> > > This is a hypothetical interface for EQAA (a superset of CSAA).
> CSAA could be
> > > exposed via GL_NV_framebuffer_multisample_coverage. EQAA
> additionally removes
> > > the restriction that the number of samples in all FBO attachments
> must match,
> > > which means it allows arbitrary sample counts in each FBO
> attachment.
> >
> > The idea sounds ok to me, as long as you don't expose the additional
> > modes with more samples as ordinary msaa modes.
> > (I'm quite sure at some point nvidia exposed 16x msaa modes through
> > ordinary means, which then gave you really 4 color samples and 12
> > coverage-only samples, in some not-very-compliant way. Though IIRC
> the
> > extension is actually older than the ability to do per-sample msaa fb
> > reads, so it wasn't as problematic back then.)
> >
> > > ---
> > >  src/gallium/docs/source/screen.rst   | 17 +++--
> > >  src/gallium/include/pipe/p_defines.h |  1 +
> > >  src/gallium/include/pipe/p_state.h   |  3 ++-
> > >  3 files changed, 18 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/src/gallium/docs/source/screen.rst
> b/src/gallium/docs/source/screen.rst
> > > index 3837360fb40..28934c2f7b9 100644
> > > --- a/src/gallium/docs/source/screen.rst
> > > +++ b/src/gallium/docs/source/screen.rst
> > > @@ -398,20 +398,22 @@ The integer capabilities:
> > >  * ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports
> TGSI_OPCODE_LOAD use
> > >with constant buffers.
> > >  * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be
> used as
> > >an address for indirect register indexing.
> > >  * ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
> > >GL_MESA_tile_raster_order, using the tile_raster_order_* fields
> in
> > >pipe_rasterizer_state.
> > >  * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on
> combined shader
> > >output resources (images + buffers + fragment outputs). If 0
> the state
> > >tracker works it out.
> > > +* ``PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK``: If the i-th bit
> is set, EQAA
> > > +  supports (i+1) color samples.
> > I agree with Nicolai here, this looks strange, this should instead
> query
> > for total samples or something.
> >
> > >  * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
> > >Whether pipe_vertex_buffer::buffer_offset is treated as
> signed. The u_vbuf
> > >module needs this for optimal performance in workstation
> applications.
> > >  * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support
> per-context
> > >priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x
> for the
> > >supported priority levels.  A driver that does not support
> prioritized
> > >contexts can return 0.
> > >  * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports
> signaling semaphores
> > >using fence_server_signal().
> > >  * ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags
> that must be
> > > @@ -743,22 +745,33 @@ Modern APIs allow using buffers as shader
> resources.
> > >  (1 for 1D or 1D array textures).
> > >
> > >  **depth0** the depth of the base mip level of the texture
> > >  (1 for everything else).
> > >
> > >  **array_size** the array size for 1D and 2D array textures.
> > >  For cube maps this must be 6, for other textures 1.
> > >
> > >  **last_level** the last mip map level present.
> > >
> > > -**nr_samples** the nr of msaa samples. 0 (or 1) specifies a
> resource
> > > -which isn't multisampled.
> > > +**nr_samples**: For Z/S, this is the number of samples. For
> color, if EQAA
> > > +is unsupported, this is the number of both coverage samples and
> color samples.
> > > +If EQAA is supported, this is the number of coverage samples. 0
> and 1
> > > +specify a resource which isn't multisampled.
> > I think you should keep nr_samples alone, and re-change the meaning
> to
> > actually be "real" samples with all associated data (which is what
> > everybody expects with msaa).
> >
> > > +
> > > +**nr_color_samples**: This is the number of color samples for
> EQAA, while
> > > +``nr_samples`` is the number of coverage samples. If the format
> is Z/S,
> > > +``nr_color_samples`` is ignored. Constraints:
> > > +* ``nr_color_samples`` must not be greater than ``nr_samples``.
> > > +* If ``nr_color_samples`` is equal to ``nr_samples``, it is
>

Re: [Mesa-dev] [PATCH 17/29] i965/blorp: Remove a pile of blorp_blit restrictions

2018-05-01 Thread Jason Ekstrand

On Tue, Mar 6, 2018 at 12:15 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Fri, Jan 26, 2018 at 05:59:46PM -0800, Jason Ekstrand wrote:
> > Previously, blorp could only blit into something that was renderable.
> > Thanks to recent additions to blorp, it can now blit into basically
> > anything so long as it isn't compressed.
> > ---
> >  src/mesa/drivers/dri/i965/brw_blorp.c | 67
> +--
> >  1 file changed, 33 insertions(+), 34 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> > index 82d9de1..b3b007f 100644
> > --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> > +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> > @@ -199,6 +199,26 @@ blorp_surf_for_miptree(struct brw_context *brw,
> > *level -= mt->first_level;
> >  }
> >
> > +static bool
> > +brw_blorp_supports_dst_format(struct brw_context *brw, mesa_format
> format)
> > +{
> > +   /* If it's renderable, it's definitely supported. */
> > +   if (brw->mesa_format_supports_render[format])
> > +  return true;
> > +
> > +   /* BLORP can't compress anything */
> > +   if (_mesa_is_format_compressed(format))
> > +  return false;
> > +
> > +   /* No exotic formats such as GL_LUMINANCE_ALPHA */
> > +   if (_mesa_get_format_bits(format, GL_RED_BITS) == 0 &&
> > +   _mesa_get_format_bits(format, GL_DEPTH_BITS) == 0 &&
> > +   _mesa_get_format_bits(format, GL_STENCIL_BITS) == 0)
> > +  return false;
> > +
> > +   return true;
> > +}
> > +
> >  static enum isl_format
> >  brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
> >  bool is_render_target)
> > @@ -216,15 +236,20 @@ brw_blorp_to_isl_format(struct brw_context *brw,
> mesa_format format,
> >return ISL_FORMAT_R32_FLOAT;
> > case MESA_FORMAT_Z_UNORM16:
> >return ISL_FORMAT_R16_UNORM;
> > -   default: {
> > +   default:
> >if (is_render_target) {
> > - assert(brw->mesa_format_supports_render[format]);
> > - return brw->mesa_to_isl_render_format[format];
> > + assert(brw_blorp_supports_dst_format(brw, format));
> > + if (brw->mesa_format_supports_render[format]) {
> > +return brw->mesa_to_isl_render_format[format];
> > + } else {
>

Maybe I should move the comment below up to here where it actually means
something. :-)


> > +return brw_isl_format_for_mesa_format(format);
> > + }
> >} else {
> > + /* Some destinations (is_render_target == true) are supported
> by
>
> I didn't really understand this comment, this is inside block where
> "is_render_target == false".
>
>
> +  * blorp even though we technically can't render to them.
> > +  */
> >   return brw_isl_format_for_mesa_format(format);
> >}
> > -  break;
> > -   }
> > }
> >  }
> >
> > @@ -553,14 +578,6 @@ try_blorp_blit(struct brw_context *brw,
> >src_mt = find_miptree(buffer_bit, src_irb);
> >dst_mt = find_miptree(buffer_bit, dst_irb);
> >
> > -  /* We can't handle format conversions between Z24 and other
> formats
> > -   * since we have to lie about the surface format. See the
> comments in
> > -   * brw_blorp_surface_info::set().
> > -   */
> > -  if ((src_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) !=
> > -  (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT))
> > - return false;
> > -
> >/* We also can't handle any combined depth-stencil formats
> because we
> > * have to reinterpret as a color format.
> > */
> > @@ -629,32 +646,14 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
> > struct intel_mipmap_tree *src_mt = src_irb->mt;
> > struct intel_mipmap_tree *dst_mt = intel_image->mt;
> >
> > -   /* There is support for only up to eight samples. */
> > -   if (src_mt->surf.samples > 8 || dst_mt->surf.samples > 8)
> > -  return false;
>
> Previous patches only dealt with formats and I fail to see how they enable
> samples > 8. We probably can, to me it just seems that this is because of
> something else than changes in this series. Or did I miss something?
>

We've been able to support 16x MSAA for a long time.  This series really
has nothing to do with it.  I'm happy to make that it's own patch if you'd
like.


> > -
> > -   if (_mesa_get_format_base_format(src_rb->Format) !=
> > -   _mesa_get_format_base_format(dst_image->TexFormat)) {
> > -  return false;
> > -   }
> > -
> > -   /* We can't handle format conversions between Z24 and other formats
> since
> > -* we have to lie about the surface format.  See the comments in
> > -* brw_blorp_surface_info::set().
> > -*/
> > -   if ((src_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) !=
> > -   (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT)) {
> > -  return false;
> > -   }
> > -
> > -   /* We also can't handle any combined depth-stencil formats because we
> >

Re: [Mesa-dev] [PATCH] opencl: autotools: Fix linking order for OpenCL target

2018-05-01 Thread Jan Vesely

On Tue, 2018-05-01 at 18:23 +0200, Kai Wasserbäch wrote:
> Hey Jan,
> Jan Vesely wrote on 01.05.2018 17:19:
> > On Tue, 2018-05-01 at 14:14 +0200, Kai Wasserbäch wrote:
> > > Otherwise the build fails with an undefined reference to
> > > clang::FrontendTimesIsEnabled.
> > > 
> > > Bugzilla: https://bugs.freedesktop.org/106209
> > > Cc: mesa-sta...@lists.freedesktop.org
> > > Cc: Jan Vesely 
> > > Signed-off-by: Kai Wasserbäch 
> > > ---
> > > 
> > > Hey,
> > > this patch fixes a FTBFS for me with recent LLVM/Clang 7 revisions from
> > > upstream's SVN (I use the packages from apt.llvm.org).
> > > 
> > > If you accept it, please commit it for me, I do not have commit access.
> > > 
> > > The CC to stable can be dropped, if stable branches are not to be
> > > expected to be buildable with LLVM/Clang from SVN.
> > 
> > thanks for looking into this. TBH I don't understand how this patch
> > works, it's dropping and reordering linked libraries.
> > I've added Emil and Francisco to cc.
> 
> the removal is just the removal of a duplicate line (the first Clang library
> named is already "clangCodeGen"). And AFAICT there's no "-Wl,--as-needed" in
> use. At least I don't find anything in the build logs.
> 
> > >  src/gallium/targets/opencl/Makefile.am | 3 +--
> > >  1 file changed, 1 insertion(+), 2 deletions(-)
> > > 
> > > diff --git a/src/gallium/targets/opencl/Makefile.am 
> > > b/src/gallium/targets/opencl/Makefile.am
> > > index de68a93ad5..f0e1de7797 100644
> > > --- a/src/gallium/targets/opencl/Makefile.am
> > > +++ b/src/gallium/targets/opencl/Makefile.am
> > > @@ -23,11 +23,10 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
> > >   $(LIBELF_LIBS) \
> > >   $(DLOPEN_LIBS) \
> > >   -lclangCodeGen \
> > > - -lclangFrontendTool \
> > >   -lclangFrontend \
> > > + -lclangFrontendTool \
> > 
> > This is strange. Why does reordering help here? Do we use -Wl,--as-
> > needed anywhere?
> 
> No, not that I can see.
> 
> > Should we use -Wl,--start-group/-Wl,--end-group for all clang libraries
> > instead?
> 
> Maybe? This was the simplest fix I could come up with, but if there's a
> preference for a link group, I can give that a try as well.

So the fix is to change ordering?
Does using groups fix the issue as well? I think that would be
preferable, but I use split .so files, so I don't hit this issue.

> 
> > >   -lclangDriver \
> > >   -lclangSerialization \
> > > - -lclangCodeGen \
> > 
> > Is this change related?
> 
> Not really, just a minor clean-up while I was busy a few lines above.
> "clangCodeGen" is already named on the first Clang library line.

ah, all right, maybe mention it in the commit message?

Jan
> 
> Cheers,
> Kai
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Roland Scheidegger

Am 01.05.2018 um 22:49 schrieb Marek Olšák:
> On Tue, May 1, 2018 at 10:48 AM, Roland Scheidegger  > wrote:
> 
> Am 01.05.2018 um 01:43 schrieb Marek Olšák:
> > From: Marek Olšák >
> > 
> > This is a hypothetical interface for EQAA (a superset of CSAA). CSAA 
> could be
> > exposed via GL_NV_framebuffer_multisample_coverage. EQAA additionally 
> removes
> > the restriction that the number of samples in all FBO attachments must 
> match,
> > which means it allows arbitrary sample counts in each FBO attachment.
> 
> The idea sounds ok to me, as long as you don't expose the additional
> modes with more samples as ordinary msaa modes.
> (I'm quite sure at some point nvidia exposed 16x msaa modes through
> ordinary means, which then gave you really 4 color samples and 12
> coverage-only samples, in some not-very-compliant way. Though IIRC the
> extension is actually older than the ability to do per-sample msaa fb
> reads, so it wasn't as problematic back then.)
> 
> > ---
> >  src/gallium/docs/source/screen.rst   | 17 +++--
> >  src/gallium/include/pipe/p_defines.h |  1 +
> >  src/gallium/include/pipe/p_state.h   |  3 ++-
> >  3 files changed, 18 insertions(+), 3 deletions(-)
> > 
> > diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> > index 3837360fb40..28934c2f7b9 100644
> > --- a/src/gallium/docs/source/screen.rst
> > +++ b/src/gallium/docs/source/screen.rst
> > @@ -398,20 +398,22 @@ The integer capabilities:
> >  * ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports 
> TGSI_OPCODE_LOAD use
> >    with constant buffers.
> >  * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used 
> as
> >    an address for indirect register indexing.
> >  * ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
> >    GL_MESA_tile_raster_order, using the tile_raster_order_* fields in
> >    pipe_rasterizer_state.
> >  * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on combined 
> shader
> >    output resources (images + buffers + fragment outputs). If 0 the 
> state
> >    tracker works it out.
> > +* ``PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK``: If the i-th bit is set, 
> EQAA
> > +  supports (i+1) color samples.
> I agree with Nicolai here, this looks strange, this should instead query
> for total samples or something.
> 
> >  * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
> >    Whether pipe_vertex_buffer::buffer_offset is treated as signed. The 
> u_vbuf
> >    module needs this for optimal performance in workstation 
> applications.
> >  * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support 
> per-context
> >    priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
> >    supported priority levels.  A driver that does not support 
> prioritized
> >    contexts can return 0.
> >  * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling 
> semaphores
> >    using fence_server_signal().
> >  * ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags that 
> must be
> > @@ -743,22 +745,33 @@ Modern APIs allow using buffers as shader 
> resources.
> >  (1 for 1D or 1D array textures).
> >  
> >  **depth0** the depth of the base mip level of the texture
> >  (1 for everything else).
> >  
> >  **array_size** the array size for 1D and 2D array textures.
> >  For cube maps this must be 6, for other textures 1.
> >  
> >  **last_level** the last mip map level present.
> >  
> > -**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
> > -which isn't multisampled.
> > +**nr_samples**: For Z/S, this is the number of samples. For color, if 
> EQAA
> > +is unsupported, this is the number of both coverage samples and color 
> samples.
> > +If EQAA is supported, this is the number of coverage samples. 0 and 1
> > +specify a resource which isn't multisampled.
> I think you should keep nr_samples alone, and re-change the meaning to
> actually be "real" samples with all associated data (which is what
> everybody expects with msaa).
> 
> > +
> > +**nr_color_samples**: This is the number of color samples for EQAA, 
> while
> > +``nr_samples`` is the number of coverage samples. If the format is Z/S,
> > +``nr_color_samples`` is ignored. Constraints:
> > +* ``nr_color_samples`` must not be greater than ``nr_samples``.
> > +* If ``nr_color_samples`` is equal to ``nr_samples``, it is called 
> MSAA.
> > +* If ``nr_color_samples`` is less than ``nr_samples``, it is called 
> EQAA.
> > +* If ``nr_color_samples`` is equal to 1, the behavior of the resolve 
> blit is
> > +driver-dependent.
> >  
>

Re: [Mesa-dev] [PATCH 26/29] intel/blorp: Add support for more format bitcasting

2018-05-01 Thread Jason Ekstrand

On Wed, Mar 7, 2018 at 5:08 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Fri, Jan 26, 2018 at 05:59:55PM -0800, Jason Ekstrand wrote:
> > By making use of the NIR helper for uint vector casts, we should now be
> > able to bitcast between any two uint formats so long as their channels
> > are in RGBA order (possibly with channels missing).  In order to do this
> > we need to rework the key a bit to pass the actual formats instead of
> > just the number of bits in each.
> > ---
> >  src/intel/blorp/blorp_blit.c | 105 ++
> +
> >  src/intel/blorp/blorp_priv.h |  13 +++---
> >  2 files changed, 95 insertions(+), 23 deletions(-)
> >
> > diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
> > index ea0687f..321b3e3 100644
> > --- a/src/intel/blorp/blorp_blit.c
> > +++ b/src/intel/blorp/blorp_blit.c
> > @@ -871,17 +871,71 @@ bit_cast_color(struct nir_builder *b, nir_ssa_def
> *color,
> >  {
> > assert(key->texture_data_type == nir_type_uint);
> >
> > -   /* We don't actually know how many source channels we have and NIR
> will
> > -* assert if the number of destination channels ends up being more
> than 4.
> > -* Choose the largest number of source channels that won't over-fill
> a
> > -* destination vec4.
> > -*/
> > -   const unsigned src_channels =
> > -  MIN2(4, (4 * key->dst_bpc) / key->src_bpc);
> > -   color = nir_channels(b, color, (1 << src_channels) - 1);
> > +   if (key->src_format == key->dst_format)
> > +  return color;
> >
> > -   color = nir_format_bitcast_uint_vec_unmasked(b, color, key->src_bpc,
> > -key->dst_bpc);
> > +   const struct isl_format_layout *src_fmtl =
> > +  isl_format_get_layout(key->src_format);
> > +   const struct isl_format_layout *dst_fmtl =
> > +  isl_format_get_layout(key->dst_format);
> > +
> > +   /* They must be uint formats with the same bit size */
> > +   assert(src_fmtl->bpb == dst_fmtl->bpb);
> > +   assert(src_fmtl->channels.r.type == ISL_UINT);
> > +   assert(dst_fmtl->channels.r.type == ISL_UINT);
> > +
> > +   /* They must be in regular color formats (no luminance or alpha) */
> > +   assert(src_fmtl->channels.r.bits > 0);
> > +   assert(dst_fmtl->channels.r.bits > 0);
> > +
> > +   /* They must be in RGBA order (possibly with channels missing) */
> > +   assert(src_fmtl->channels.r.start_bit == 0);
> > +   assert(dst_fmtl->channels.r.start_bit == 0);
> > +
> > +   if (src_fmtl->bpb <= 32) {
> > +  const unsigned src_channels =
> > + isl_format_get_num_channels(key->src_format);
> > +  const unsigned src_bits[4] = {
> > + src_fmtl->channels.r.bits,
> > + src_fmtl->channels.g.bits,
> > + src_fmtl->channels.b.bits,
> > + src_fmtl->channels.a.bits,
> > +  };
> > +  const unsigned dst_channels =
> > + isl_format_get_num_channels(key->dst_format);
> > +  const unsigned dst_bits[4] = {
> > + dst_fmtl->channels.r.bits,
> > + dst_fmtl->channels.g.bits,
> > + dst_fmtl->channels.b.bits,
> > + dst_fmtl->channels.a.bits,
> > +  };
> > +  nir_ssa_def *packed =
> > + nir_format_pack_uint_unmasked(b, color, src_bits,
> src_channels);
> > +  color = nir_format_unpack_uint(b, packed, dst_bits, dst_channels);
>
> I tried to think why nir_format_bitcast_uint_vec_unmasked() can't handle
> these cases (src_fmtl->bpb <= 32). At this point it still does, right?
> Using
> nir_format_pack_uint_unmasked()/nir_format_unpack_uint() already here is
> preparing for cases where src or dst is ISL_FORMAT_R11G11B10_FLOAT and
> which
> nir_format_bitcast_uint_vec_unmasked() can't handle anymore?
>

Correct.  nir_format_bitcast_uint_vec_unmasked can handle any cast from one
format with a uniform channel size to another so long as the channel sizes
are at least 8 bits.  However, for 1010102 and similar, it falls over.
Maybe I should say that in the commit message.


> > +   } else {
> > +  const unsigned src_bpc = src_fmtl->channels.r.bits;
> > +  const unsigned dst_bpc = dst_fmtl->channels.r.bits;
> > +
> > +  assert(src_fmtl->channels.g.bits == 0 ||
> > + src_fmtl->channels.g.bits == src_fmtl->channels.r.bits);
> > +  assert(src_fmtl->channels.b.bits == 0 ||
> > + src_fmtl->channels.b.bits == src_fmtl->channels.r.bits);
> > +  assert(src_fmtl->channels.a.bits == 0 ||
> > + src_fmtl->channels.a.bits == src_fmtl->channels.r.bits);
> > +  assert(dst_fmtl->channels.g.bits == 0 ||
> > + dst_fmtl->channels.g.bits == dst_fmtl->channels.r.bits);
> > +  assert(dst_fmtl->channels.b.bits == 0 ||
> > + dst_fmtl->channels.b.bits == dst_fmtl->channels.r.bits);
> > +  assert(dst_fmtl->channels.a.bits == 0 ||
> > + dst_fmtl->channels.a.bits == dst_fmtl->channels.r.bits);
> > +
> > +  /* Restrict to only the channels we actually

Re: [Mesa-dev] [PATCH 20/29] intel/isl/format: Add field locations informations to channel_layout

2018-05-01 Thread Jason Ekstrand

ping

On Tue, Mar 6, 2018 at 9:53 AM, Jason Ekstrand  wrote:

> On Tue, Mar 6, 2018 at 9:46 AM, Pohjolainen, Topi <
> topi.pohjolai...@gmail.com> wrote:
>
>> On Tue, Mar 06, 2018 at 09:11:18AM -0800, Jason Ekstrand wrote:
>> > On Tue, Mar 6, 2018 at 1:54 AM, Pohjolainen, Topi <
>> > topi.pohjolai...@gmail.com> wrote:
>> >
>> > > On Fri, Jan 26, 2018 at 05:59:49PM -0800, Jason Ekstrand wrote:
>> > > >  class Format(object):
>> > > > @@ -160,7 +163,14 @@ class Format(object):
>> > > >  self.l = Channel(line[9])
>> > > >  self.i = Channel(line[10])
>> > > >  self.p = Channel(line[11])
>> > > > -self.order = line[12]
>> > > > +
>> > > > +# Set the start bit value for each channel
>> > > > +order = line[12].strip()
>> > >
>> > > You aren't setting "self.order" anymore?
>> > >
>> >
>> > No.  Nothing was using it.  It doesn't really matter to me whether we
>> store
>> > it in the Format object or not.
>>
>> So it is not really needed in the previous patch then?
>>
>
> Correct.
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Marek Olšák

We need a way to distinguish between AA and no AA even if color samples ==
1. nr_samples is currently used for that purpose, so it makes sense to use
nr_samples for coverage samples. color samples == 1 means that you can
store 1 color sample per pixel and other samples are flagged as undefined
and can be guessed by looking at neighbors. In special cases, it's possible
to get 16x AA quality when color samples == 1.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Marek Olšák

The maximum config is:
- 16 coverage samples
- 8 Z/S samples
- 8 color samples

The minimum config is:
- 1-16 coverage samples
- 1 Z/S sample
- 1 color sample

The coverage buffer (each color buffer has one) remembers undefined samples
if color samples < coverage samples, and the resolve shader can guess
values of undefined samples by looking at neighbors.

Marek

On Mon, Apr 30, 2018 at 7:43 PM, Marek Olšák  wrote:

> From: Marek Olšák 
>
> This is a hypothetical interface for EQAA (a superset of CSAA). CSAA could
> be
> exposed via GL_NV_framebuffer_multisample_coverage. EQAA additionally
> removes
> the restriction that the number of samples in all FBO attachments must
> match,
> which means it allows arbitrary sample counts in each FBO attachment.
> ---
>  src/gallium/docs/source/screen.rst   | 17 +++--
>  src/gallium/include/pipe/p_defines.h |  1 +
>  src/gallium/include/pipe/p_state.h   |  3 ++-
>  3 files changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/docs/source/screen.rst
> b/src/gallium/docs/source/screen.rst
> index 3837360fb40..28934c2f7b9 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -398,20 +398,22 @@ The integer capabilities:
>  * ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports
> TGSI_OPCODE_LOAD use
>with constant buffers.
>  * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used as
>an address for indirect register indexing.
>  * ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
>GL_MESA_tile_raster_order, using the tile_raster_order_* fields in
>pipe_rasterizer_state.
>  * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on combined
> shader
>output resources (images + buffers + fragment outputs). If 0 the state
>tracker works it out.
> +* ``PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK``: If the i-th bit is set,
> EQAA
> +  supports (i+1) color samples.
>  * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
>Whether pipe_vertex_buffer::buffer_offset is treated as signed. The
> u_vbuf
>module needs this for optimal performance in workstation applications.
>  * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support
> per-context
>priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
>supported priority levels.  A driver that does not support prioritized
>contexts can return 0.
>  * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling
> semaphores
>using fence_server_signal().
>  * ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags that
> must be
> @@ -743,22 +745,33 @@ Modern APIs allow using buffers as shader resources.
>  (1 for 1D or 1D array textures).
>
>  **depth0** the depth of the base mip level of the texture
>  (1 for everything else).
>
>  **array_size** the array size for 1D and 2D array textures.
>  For cube maps this must be 6, for other textures 1.
>
>  **last_level** the last mip map level present.
>
> -**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
> -which isn't multisampled.
> +**nr_samples**: For Z/S, this is the number of samples. For color, if EQAA
> +is unsupported, this is the number of both coverage samples and color
> samples.
> +If EQAA is supported, this is the number of coverage samples. 0 and 1
> +specify a resource which isn't multisampled.
> +
> +**nr_color_samples**: This is the number of color samples for EQAA, while
> +``nr_samples`` is the number of coverage samples. If the format is Z/S,
> +``nr_color_samples`` is ignored. Constraints:
> +* ``nr_color_samples`` must not be greater than ``nr_samples``.
> +* If ``nr_color_samples`` is equal to ``nr_samples``, it is called MSAA.
> +* If ``nr_color_samples`` is less than ``nr_samples``, it is called EQAA.
> +* If ``nr_color_samples`` is equal to 1, the behavior of the resolve blit
> is
> +driver-dependent.
>
>  **usage** one of the :ref:`PIPE_USAGE` flags.
>
>  **bind** bitmask of the :ref:`PIPE_BIND` flags.
>
>  **flags** bitmask of PIPE_RESOURCE_FLAG flags.
>
>
>
>  resource_changed
> diff --git a/src/gallium/include/pipe/p_defines.h
> b/src/gallium/include/pipe/p_defines.h
> index c4ae0532060..97e1a3a3d42 100644
> --- a/src/gallium/include/pipe/p_defines.h
> +++ b/src/gallium/include/pipe/p_defines.h
> @@ -783,20 +783,21 @@ enum pipe_cap
> PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION,
> PIPE_CAP_POST_DEPTH_COVERAGE,
> PIPE_CAP_BINDLESS_TEXTURE,
> PIPE_CAP_NIR_SAMPLERS_AS_DEREF,
> PIPE_CAP_QUERY_SO_OVERFLOW,
> PIPE_CAP_MEMOBJ,
> PIPE_CAP_LOAD_CONSTBUF,
> PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS,
> PIPE_CAP_TILE_RASTER_ORDER,
> PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES,
> +   PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK,
> PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET,
> PIPE_CAP_CONTEXT_PRIORITY_MASK,
> PIPE_CAP_FENCE_SIGNAL,
> PIPE_CAP_CONSTBUF0_FLAGS,
> PIPE_CAP_PACKED_UNIFORMS,
>  };
>
>  /**

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Marek Olšák

On Tue, May 1, 2018 at 10:51 AM, Axel Davy  wrote:

> Hi,
>
>
> On 01/05/2018 01:43, Marek Olšák wrote:
>
>> From: Marek Olšák 
>>
>> This is a hypothetical interface for EQAA (a superset of CSAA). CSAA
>> could be
>> exposed via GL_NV_framebuffer_multisample_coverage. EQAA additionally
>> removes
>> the restriction that the number of samples in all FBO attachments must
>> match,
>> which means it allows arbitrary sample counts in each FBO attachment.
>> ---
>>   src/gallium/docs/source/screen.rst   | 17 +++--
>>   src/gallium/include/pipe/p_defines.h |  1 +
>>   src/gallium/include/pipe/p_state.h   |  3 ++-
>>   3 files changed, 18 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/docs/source/screen.rst
>> b/src/gallium/docs/source/screen.rst
>> index 3837360fb40..28934c2f7b9 100644
>> --- a/src/gallium/docs/source/screen.rst
>> +++ b/src/gallium/docs/source/screen.rst
>> @@ -398,20 +398,22 @@ The integer capabilities:
>>   * ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports
>> TGSI_OPCODE_LOAD use
>> with constant buffers.
>>   * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used
>> as
>> an address for indirect register indexing.
>>   * ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
>> GL_MESA_tile_raster_order, using the tile_raster_order_* fields in
>> pipe_rasterizer_state.
>>   * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on combined
>> shader
>> output resources (images + buffers + fragment outputs). If 0 the state
>> tracker works it out.
>> +* ``PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK``: If the i-th bit is set,
>> EQAA
>> +  supports (i+1) color samples.
>>   * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
>> Whether pipe_vertex_buffer::buffer_offset is treated as signed. The
>> u_vbuf
>> module needs this for optimal performance in workstation applications.
>>   * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support
>> per-context
>> priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
>> supported priority levels.  A driver that does not support prioritized
>> contexts can return 0.
>>   * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling
>> semaphores
>> using fence_server_signal().
>>   * ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags that
>> must be
>> @@ -743,22 +745,33 @@ Modern APIs allow using buffers as shader resources.
>>   (1 for 1D or 1D array textures).
>> **depth0** the depth of the base mip level of the texture
>>   (1 for everything else).
>> **array_size** the array size for 1D and 2D array textures.
>>   For cube maps this must be 6, for other textures 1.
>> **last_level** the last mip map level present.
>>   -**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
>> -which isn't multisampled.
>> +**nr_samples**: For Z/S, this is the number of samples. For color, if
>> EQAA
>> +is unsupported, this is the number of both coverage samples and color
>> samples.
>> +If EQAA is supported, this is the number of coverage samples. 0 and 1
>> +specify a resource which isn't multisampled.
>> +
>> +**nr_color_samples**: This is the number of color samples for EQAA, while
>> +``nr_samples`` is the number of coverage samples. If the format is Z/S,
>> +``nr_color_samples`` is ignored. Constraints:
>> +* ``nr_color_samples`` must not be greater than ``nr_samples``.
>> +* If ``nr_color_samples`` is equal to ``nr_samples``, it is called MSAA.
>> +* If ``nr_color_samples`` is less than ``nr_samples``, it is called EQAA.
>> +* If ``nr_color_samples`` is equal to 1, the behavior of the resolve
>> blit is
>> +driver-dependent.
>> **usage** one of the :ref:`PIPE_USAGE` flags.
>> **bind** bitmask of the :ref:`PIPE_BIND` flags.
>> **flags** bitmask of PIPE_RESOURCE_FLAG flags.
>>
>>
> To remove ambiguity, I'd suggest have both nr_coverage_samples and
> nr_zsc_samples (for Z/S + color)
> which would have to be equal if EQAA is not supported.
>
> I guess there's going to be constraints on the combination of the values
> of these two.
> I'm not sure the proposed cap is sufficient.
> If CSAA has more constraints than EQAA, are these constraints respected
> with the proposed system ?


This is for EQAA, not CSAA, but the interface will support CSAA too.


>
>
>   resource_changed
>> diff --git a/src/gallium/include/pipe/p_defines.h
>> b/src/gallium/include/pipe/p_defines.h
>> index c4ae0532060..97e1a3a3d42 100644
>> --- a/src/gallium/include/pipe/p_defines.h
>> +++ b/src/gallium/include/pipe/p_defines.h
>> @@ -783,20 +783,21 @@ enum pipe_cap
>>  PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION,
>>  PIPE_CAP_POST_DEPTH_COVERAGE,
>>  PIPE_CAP_BINDLESS_TEXTURE,
>>  PIPE_CAP_NIR_SAMPLERS_AS_DEREF,
>>  PIPE_CAP_QUERY_SO_OVERFLOW,
>>  PIPE_CAP_MEMOBJ,
>>  PIPE_CAP_LOAD_CONSTBUF,
>>  PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS,
>>

[Mesa-dev] [PATCH 3/5] v3d: Rename the driver files from "vc5" to "v3d".

2018-05-01 Thread Eric Anholt

---
 configure.ac  |  4 +-
 src/broadcom/Makefile.am  |  2 +-
 .../{Makefile.vc5.am => Makefile.v3d.am}  |  0
 src/gallium/Makefile.am   |  2 +-
 .../auxiliary/target-helpers/drm_helper.h |  2 +-
 .../drivers/{vc5 => v3d}/.editorconfig|  0
 src/gallium/drivers/{vc5 => v3d}/Automake.inc |  4 +-
 src/gallium/drivers/{vc5 => v3d}/Makefile.am  |  0
 src/gallium/drivers/v3d/Makefile.sources  | 36 +
 src/gallium/drivers/{vc5 => v3d}/meson.build  | 50 +--
 .../{vc5/vc5_blit.c => v3d/v3d_blit.c}|  2 +-
 .../{vc5/vc5_bufmgr.c => v3d/v3d_bufmgr.c}|  4 +-
 .../{vc5/vc5_bufmgr.h => v3d/v3d_bufmgr.h}|  2 +-
 .../drivers/{vc5/vc5_cl.c => v3d/v3d_cl.c}|  2 +-
 .../drivers/{vc5/vc5_cl.h => v3d/v3d_cl.h}|  0
 .../{vc5/vc5_context.c => v3d/v3d_context.c}  |  6 +--
 .../{vc5/vc5_context.h => v3d/v3d_context.h}  |  8 +--
 .../{vc5/vc5_fence.c => v3d/v3d_fence.c}  |  4 +-
 .../v3d_format_table.h}   |  0
 .../{vc5/vc5_formats.c => v3d/v3d_formats.c}  |  4 +-
 .../drivers/{vc5/vc5_job.c => v3d/v3d_job.c}  |  2 +-
 .../{vc5/vc5_program.c => v3d/v3d_program.c}  |  2 +-
 .../{vc5/vc5_query.c => v3d/v3d_query.c}  |  2 +-
 .../vc5_resource.c => v3d/v3d_resource.c} |  8 +--
 .../vc5_resource.h => v3d/v3d_resource.h} |  2 +-
 .../{vc5/vc5_screen.c => v3d/v3d_screen.c}|  6 +--
 .../{vc5/vc5_screen.h => v3d/v3d_screen.h}|  0
 .../vc5_simulator.c => v3d/v3d_simulator.c}   |  6 +--
 .../v3d_simulator_wrapper.cpp}|  4 +-
 .../v3d_simulator_wrapper.h}  |  0
 .../{vc5/vc5_tiling.c => v3d/v3d_tiling.c}|  6 +--
 .../{vc5/vc5_tiling.h => v3d/v3d_tiling.h}|  0
 .../vc5_uniforms.c => v3d/v3d_uniforms.c} |  2 +-
 .../drivers/{vc5 => v3d}/v3dx_context.h   |  2 +-
 .../{vc5/vc5_draw.c => v3d/v3dx_draw.c}   |  6 +--
 .../{vc5/vc5_emit.c => v3d/v3dx_emit.c}   |  2 +-
 .../drivers/{vc5 => v3d}/v3dx_format_table.c  |  4 +-
 src/gallium/drivers/{vc5 => v3d}/v3dx_job.c   |  2 +-
 .../drivers/{vc5/vc5_rcl.c => v3d/v3dx_rcl.c} |  4 +-
 .../drivers/{vc5 => v3d}/v3dx_simulator.c |  6 +--
 .../{vc5/vc5_state.c => v3d/v3dx_state.c} |  4 +-
 src/gallium/drivers/vc5/Makefile.sources  | 36 -
 src/gallium/meson.build   |  4 +-
 src/gallium/targets/dri/Makefile.am   |  2 +-
 .../winsys/{vc5 => v3d}/drm/Android.mk|  0
 .../winsys/{vc5 => v3d}/drm/Makefile.am   |  0
 src/gallium/winsys/v3d/drm/Makefile.sources   |  3 ++
 .../winsys/{vc5 => v3d}/drm/meson.build   |  2 +-
 .../drm/v3d_drm_public.h} |  0
 .../drm/v3d_drm_winsys.c} |  4 +-
 src/gallium/winsys/vc5/drm/Makefile.sources   |  3 --
 51 files changed, 128 insertions(+), 128 deletions(-)
 rename src/broadcom/{Makefile.vc5.am => Makefile.v3d.am} (100%)
 rename src/gallium/drivers/{vc5 => v3d}/.editorconfig (100%)
 rename src/gallium/drivers/{vc5 => v3d}/Automake.inc (68%)
 rename src/gallium/drivers/{vc5 => v3d}/Makefile.am (100%)
 create mode 100644 src/gallium/drivers/v3d/Makefile.sources
 rename src/gallium/drivers/{vc5 => v3d}/meson.build (84%)
 rename src/gallium/drivers/{vc5/vc5_blit.c => v3d/v3d_blit.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_bufmgr.c => v3d/v3d_bufmgr.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_bufmgr.h => v3d/v3d_bufmgr.h} (99%)
 rename src/gallium/drivers/{vc5/vc5_cl.c => v3d/v3d_cl.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_cl.h => v3d/v3d_cl.h} (100%)
 rename src/gallium/drivers/{vc5/vc5_context.c => v3d/v3d_context.c} (98%)
 rename src/gallium/drivers/{vc5/vc5_context.h => v3d/v3d_context.h} (99%)
 rename src/gallium/drivers/{vc5/vc5_fence.c => v3d/v3d_fence.c} (98%)
 rename src/gallium/drivers/{vc5/vc5_format_table.h => v3d/v3d_format_table.h} 
(100%)
 rename src/gallium/drivers/{vc5/vc5_formats.c => v3d/v3d_formats.c} (98%)
 rename src/gallium/drivers/{vc5/vc5_job.c => v3d/v3d_job.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_program.c => v3d/v3d_program.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_query.c => v3d/v3d_query.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_resource.c => v3d/v3d_resource.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_resource.h => v3d/v3d_resource.h} (99%)
 rename src/gallium/drivers/{vc5/vc5_screen.c => v3d/v3d_screen.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_screen.h => v3d/v3d_screen.h} (100%)
 rename src/gallium/drivers/{vc5/vc5_simulator.c => v3d/v3d_simulator.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_simulator_wrapper.cpp => 
v3d/v3d_simulator_wrapper.cpp} (97%)
 rename src/gallium/drivers/{vc5/vc5_simulator_wrapper.h => 
v3d/v3d_simulator_wrapper.h} (100%)
 rename src/gallium/drivers/{vc5/vc5_tiling.c => v3d/v3d_tiling.c} (99%)
 rename src/gallium/drivers/{vc5/vc5_tiling.h => v3d/v3d_tiling.h} (100%)
 rename src/gallium/drivers/{vc5/vc5_uniforms.c => v3d/v3d_uniforms.c} (99%)
 rename

[Mesa-dev] [PATCH 1/5] v3d: Switch the vc5 driver to using the finalized V3D UABI.

2018-05-01 Thread Eric Anholt

In the process of merging to the kernel, I renamed the driver to the
general product line's name (since we have both vc5 and vc6 supported
already).  Since the ABI is finalized, move the header to include/drm-uapi.
---
 Makefile.am   |  1 +
 .../vc5_drm.h => include/drm-uapi/v3d_drm.h   | 75 ++-
 src/gallium/drivers/vc5/Makefile.sources  |  1 -
 src/gallium/drivers/vc5/v3dx_context.h|  4 +-
 src/gallium/drivers/vc5/v3dx_simulator.c  | 18 ++---
 src/gallium/drivers/vc5/vc5_bufmgr.c  | 16 ++--
 src/gallium/drivers/vc5/vc5_context.h |  8 +-
 src/gallium/drivers/vc5/vc5_job.c |  2 +-
 src/gallium/drivers/vc5/vc5_screen.c  | 13 ++--
 src/gallium/drivers/vc5/vc5_simulator.c   | 20 ++---
 10 files changed, 79 insertions(+), 79 deletions(-)
 rename src/gallium/drivers/vc5/vc5_drm.h => include/drm-uapi/v3d_drm.h (68%)

diff --git a/Makefile.am b/Makefile.am
index 86d7e7f9107b..9e27db046e52 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -77,6 +77,7 @@ noinst_HEADERS = \
include/drm-uapi/drm_mode.h \
include/drm-uapi/i915_drm.h \
include/drm-uapi/tegra_drm.h \
+   include/drm-uapi/v3d_drm.h \
include/drm-uapi/vc4_drm.h \
include/D3D9 \
include/GL/wglext.h \
diff --git a/src/gallium/drivers/vc5/vc5_drm.h b/include/drm-uapi/v3d_drm.h
similarity index 68%
rename from src/gallium/drivers/vc5/vc5_drm.h
rename to include/drm-uapi/v3d_drm.h
index 184863d206a6..7b6627783608 100644
--- a/src/gallium/drivers/vc5/vc5_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2014-2017 Broadcom
+ * Copyright © 2014-2018 Broadcom
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -21,8 +21,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef _VC5_DRM_H_
-#define _VC5_DRM_H_
+#ifndef _V3D_DRM_H_
+#define _V3D_DRM_H_
 
 #include "drm.h"
 
@@ -30,28 +30,28 @@
 extern "C" {
 #endif
 
-#define DRM_VC5_SUBMIT_CL 0x00
-#define DRM_VC5_WAIT_BO   0x01
-#define DRM_VC5_CREATE_BO 0x02
-#define DRM_VC5_MMAP_BO   0x03
-#define DRM_VC5_GET_PARAM 0x04
-#define DRM_VC5_GET_BO_OFFSET 0x05
+#define DRM_V3D_SUBMIT_CL 0x00
+#define DRM_V3D_WAIT_BO   0x01
+#define DRM_V3D_CREATE_BO 0x02
+#define DRM_V3D_MMAP_BO   0x03
+#define DRM_V3D_GET_PARAM 0x04
+#define DRM_V3D_GET_BO_OFFSET 0x05
 
-#define DRM_IOCTL_VC5_SUBMIT_CL   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl)
-#define DRM_IOCTL_VC5_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + 
DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo)
-#define DRM_IOCTL_VC5_CREATE_BO   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_VC5_CREATE_BO, struct drm_vc5_create_bo)
-#define DRM_IOCTL_VC5_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + 
DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo)
-#define DRM_IOCTL_VC5_GET_PARAM   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_VC5_GET_PARAM, struct drm_vc5_get_param)
-#define DRM_IOCTL_VC5_GET_BO_OFFSET   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_VC5_GET_BO_OFFSET, struct drm_vc5_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_CL   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
+#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + 
DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
+#define DRM_IOCTL_V3D_CREATE_BO   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_V3D_CREATE_BO, struct drm_v3d_create_bo)
+#define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + 
DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
+#define DRM_IOCTL_V3D_GET_PARAM   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
+#define DRM_IOCTL_V3D_GET_BO_OFFSET   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
 
 /**
- * struct drm_vc5_submit_cl - ioctl argument for submitting commands to the 3D
+ * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
  * engine.
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
  */
-struct drm_vc5_submit_cl {
+struct drm_v3d_submit_cl {
/* Pointer to the binner command list.
 *
 * This is the first set of commands executed, which runs the
@@ -101,29 +101,32 @@ struct drm_vc5_submit_cl {
 
/* Number of BO handles passed in (size is that times 4). */
__u32 bo_handle_count;
+
+   /* Pad, must be zero-filled. */
+   __u32 pad;
 };
 
 /**
- * struct drm_vc5_wait_bo - ioctl argument for waiting for
- * completion of the last DRM_VC5_SUBMIT_CL on a BO.
+ * struct drm_v3d_wait_bo - ioctl

[Mesa-dev] [PATCH 2/5] v3d: Rename the vc5_dri.so driver to v3d_dri.so.

2018-05-01 Thread Eric Anholt

This allows the driver to load against the merged kernel DRM driver.  In
the process, rename most of the build system variables and gallium
plumbing functions.
---
 configure.ac  | 18 ++--
 meson.build   |  6 ++--
 meson_options.txt |  2 +-
 src/broadcom/Makefile.vc5.am  |  6 ++--
 src/broadcom/meson.build  | 10 +++
 src/gallium/Makefile.am   |  4 +--
 .../auxiliary/pipe-loader/pipe_loader_drm.c   |  8 +++---
 .../auxiliary/target-helpers/drm_helper.h | 10 +++
 .../target-helpers/drm_helper_public.h|  4 +--
 src/gallium/drivers/vc5/Automake.inc  | 10 +++
 src/gallium/drivers/vc5/Makefile.am   | 28 +--
 src/gallium/drivers/vc5/Makefile.sources  |  2 +-
 src/gallium/drivers/vc5/meson.build   | 22 +++
 src/gallium/drivers/vc5/v3dx_simulator.c  |  4 +--
 src/gallium/drivers/vc5/vc5_bufmgr.c  |  2 +-
 src/gallium/drivers/vc5/vc5_context.h |  2 +-
 src/gallium/drivers/vc5/vc5_job.c |  2 +-
 src/gallium/drivers/vc5/vc5_screen.c  |  4 +--
 src/gallium/drivers/vc5/vc5_screen.h  |  2 +-
 src/gallium/drivers/vc5/vc5_simulator.c   |  4 +--
 .../drivers/vc5/vc5_simulator_wrapper.cpp |  4 +--
 src/gallium/meson.build   |  4 +--
 src/gallium/targets/dri/meson.build   |  4 +--
 src/gallium/targets/dri/target.c  |  8 +++---
 src/gallium/winsys/vc5/drm/Android.mk |  2 +-
 src/gallium/winsys/vc5/drm/Makefile.am|  4 +--
 src/gallium/winsys/vc5/drm/meson.build|  4 +--
 src/gallium/winsys/vc5/drm/vc5_drm_public.h   |  2 +-
 src/gallium/winsys/vc5/drm/vc5_drm_winsys.c   |  4 +--
 29 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/configure.ac b/configure.ac
index f1fbdcc6c7c0..476cf7974b89 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1359,7 +1359,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
 [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
 [comma delimited Gallium drivers list, e.g.
-
"i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,tegra,vc4,vc5,virgl,etnaviv,imx"
+
"i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,tegra,v3d,vc4,virgl,etnaviv,imx"
 @<:@default=r300,r600,svga,swrast@:>@])],
 [with_gallium_drivers="$withval"],
 [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -2721,12 +2721,12 @@ if test -n "$with_gallium_drivers"; then
DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"],
   [USE_VC4_SIMULATOR=no])
 ;;
-xvc5)
-HAVE_GALLIUM_VC5=yes
+xv3d)
+HAVE_GALLIUM_V3D=yes
 
-PKG_CHECK_MODULES([VC5_SIMULATOR], [v3dv3],
-  [USE_VC5_SIMULATOR=yes;
-   DEFINES="$DEFINES -DUSE_VC5_SIMULATOR"],
+PKG_CHECK_MODULES([V3D_SIMULATOR], [v3dv3],
+  [USE_V3D_SIMULATOR=yes;
+   DEFINES="$DEFINES -DUSE_V3D_SIMULATOR"],
   [AC_MSG_ERROR([vc5 requires the simulator])])
 ;;
 xpl111)
@@ -2879,8 +2879,8 @@ AM_CONDITIONAL(HAVE_GALLIUM_SWR, test 
"x$HAVE_GALLIUM_SWR" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SWRAST, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes -o \
  "x$HAVE_GALLIUM_LLVMPIPE" = xyes -o \
  "x$HAVE_GALLIUM_SWR" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_V3D, test "x$HAVE_GALLIUM_V3D" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
-AM_CONDITIONAL(HAVE_GALLIUM_VC5, test "x$HAVE_GALLIUM_VC5" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
 
 AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test 
"x$enable_shared_pipe_drivers" = xno)
@@ -2908,7 +2908,7 @@ AM_CONDITIONAL(HAVE_AMD_DRIVERS, test 
"x$HAVE_GALLIUM_RADEONSI" = xyes -o \
   "x$HAVE_RADEON_VULKAN" = xyes)
 
 AM_CONDITIONAL(HAVE_BROADCOM_DRIVERS, test "x$HAVE_GALLIUM_VC4" = xyes -o \
-  "x$HAVE_GALLIUM_VC5" = xyes)
+  "x$HAVE_GALLIUM_V3D" = xyes)
 
 AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
 "x$HAVE_I965_DRI" = xyes)
@@ -2919,8 +2919,8 @@ AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test 
"x$HAVE_GALLIUM_R300" = xyes -o \
 AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$enable_glx" = xgallium-xlib)
 AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_LLVM, test "x$enable_llvm" = xyes)
+AM_CONDITIONAL(USE_V3D_SIMULATOR, test x$USE_V3D_SIMULATOR = xyes)

[Mesa-dev] [PATCH 5/5] v3d: Enable the driver by default.

2018-05-01 Thread Eric Anholt

Now that we have a stabilized ABI and a fairly conformant driver, turn it
on.
---
 configure.ac| 2 +-
 src/gallium/drivers/v3d/meson.build | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index d5228032bb7b..626b238511e0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2727,7 +2727,7 @@ if test -n "$with_gallium_drivers"; then
 PKG_CHECK_MODULES([V3D_SIMULATOR], [v3dv3],
   [USE_V3D_SIMULATOR=yes;
DEFINES="$DEFINES -DUSE_V3D_SIMULATOR"],
-  [AC_MSG_ERROR([vc5 requires the simulator])])
+  [USE_V3D_SIMULATOR=no])
 ;;
 xpl111)
 HAVE_GALLIUM_PL111=yes
diff --git a/src/gallium/drivers/v3d/meson.build 
b/src/gallium/drivers/v3d/meson.build
index 38021515eda6..18e68a626968 100644
--- a/src/gallium/drivers/v3d/meson.build
+++ b/src/gallium/drivers/v3d/meson.build
@@ -53,7 +53,7 @@ files_per_version = files(
 )
 
 v3dv3_c_args = []
-dep_v3dv3 = dependency('v3dv3')
+dep_v3dv3 = dependency('v3dv3', required: false)
 if dep_v3dv3.found()
   v3dv3_c_args = '-DUSE_V3D_SIMULATOR'
 endif
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/5] vc5 rename to v3d and enable by default.

2018-05-01 Thread Eric Anholt

Unless there's some last-minute catch, I'll be landing the kernel
driver this week, so I'd like to enable the Mesa side of the vc5
driver once that hits drm-next.  I've renamed the kernel side to "v3d"
since we've got both V3D 3.x ("vc5") and V3D 4.x ("vc6") supported
already, and I'd like to avoid the confusion we always had with the
i915 kernel driver vs i965 in mesa.  This series renames Mesa to
follow the kernel's name.

Eric Anholt (5):
  v3d: Switch the vc5 driver to using the finalized V3D UABI.
  v3d: Rename the vc5_dri.so driver to v3d_dri.so.
  v3d: Rename the driver files from "vc5" to "v3d".
  v3d: Rename driver functions from vc5 to v3d.
  v3d: Enable the driver by default.

 Makefile.am   |   1 +
 configure.ac  |  24 +-
 .../vc5_drm.h => include/drm-uapi/v3d_drm.h   |  75 ++--
 meson.build   |   6 +-
 meson_options.txt |   2 +-
 src/broadcom/Makefile.am  |   2 +-
 .../{Makefile.vc5.am => Makefile.v3d.am}  |   6 +-
 src/broadcom/meson.build  |  10 +-
 src/gallium/Makefile.am   |   6 +-
 .../auxiliary/pipe-loader/pipe_loader_drm.c   |   8 +-
 .../auxiliary/target-helpers/drm_helper.h |  12 +-
 .../target-helpers/drm_helper_public.h|   4 +-
 .../drivers/{vc5 => v3d}/.editorconfig|   0
 src/gallium/drivers/v3d/Automake.inc  |  14 +
 src/gallium/drivers/{vc5 => v3d}/Makefile.am  |  28 +-
 src/gallium/drivers/v3d/Makefile.sources  |  36 ++
 src/gallium/drivers/{vc5 => v3d}/meson.build  |  74 ++--
 .../{vc5/vc5_blit.c => v3d/v3d_blit.c}|  98 +++---
 .../{vc5/vc5_bufmgr.c => v3d/v3d_bufmgr.c}| 172 +-
 .../{vc5/vc5_bufmgr.h => v3d/v3d_bufmgr.h}|  52 +--
 .../drivers/{vc5/vc5_cl.c => v3d/v3d_cl.c}|  26 +-
 .../drivers/{vc5/vc5_cl.h => v3d/v3d_cl.h}|  98 +++---
 .../{vc5/vc5_context.c => v3d/v3d_context.c}  | 118 +++
 .../{vc5/vc5_context.h => v3d/v3d_context.h}  | 230 +++--
 .../{vc5/vc5_fence.c => v3d/v3d_fence.c}  |  42 +--
 .../v3d_format_table.h}   |   2 +-
 .../{vc5/vc5_formats.c => v3d/v3d_formats.c}  |  38 +--
 .../drivers/{vc5/vc5_job.c => v3d/v3d_job.c}  | 192 +--
 .../{vc5/vc5_program.c => v3d/v3d_program.c}  | 266 +++
 .../{vc5/vc5_query.c => v3d/v3d_query.c}  |  86 ++---
 .../vc5_resource.c => v3d/v3d_resource.c} | 228 ++---
 .../vc5_resource.h => v3d/v3d_resource.h} |  50 +--
 .../{vc5/vc5_screen.c => v3d/v3d_screen.c}|  91 +++--
 .../{vc5/vc5_screen.h => v3d/v3d_screen.h}|  24 +-
 .../vc5_simulator.c => v3d/v3d_simulator.c}   | 184 +-
 .../v3d_simulator_wrapper.cpp}|   8 +-
 .../v3d_simulator_wrapper.h}  |   0
 .../{vc5/vc5_tiling.c => v3d/v3d_tiling.c}| 108 +++---
 .../{vc5/vc5_tiling.h => v3d/v3d_tiling.h}|  18 +-
 .../vc5_uniforms.c => v3d/v3d_uniforms.c} | 116 +++
 .../drivers/{vc5 => v3d}/v3dx_context.h   |  14 +-
 .../{vc5/vc5_draw.c => v3d/v3dx_draw.c}   | 270 +++
 .../{vc5/vc5_emit.c => v3d/v3dx_emit.c}   | 260 +++---
 .../drivers/{vc5 => v3d}/v3dx_format_table.c  |   8 +-
 src/gallium/drivers/{vc5 => v3d}/v3dx_job.c   |   6 +-
 .../drivers/{vc5/vc5_rcl.c => v3d/v3dx_rcl.c} |  78 ++---
 .../drivers/{vc5 => v3d}/v3dx_simulator.c |  50 +--
 .../{vc5/vc5_state.c => v3d/v3dx_state.c} | 322 +-
 src/gallium/drivers/vc5/Automake.inc  |  14 -
 src/gallium/drivers/vc5/Makefile.sources  |  37 --
 src/gallium/meson.build   |   8 +-
 src/gallium/targets/dri/Makefile.am   |   2 +-
 src/gallium/targets/dri/meson.build   |   4 +-
 src/gallium/targets/dri/target.c  |   8 +-
 .../winsys/{vc5 => v3d}/drm/Android.mk|   2 +-
 .../winsys/{vc5 => v3d}/drm/Makefile.am   |   4 +-
 src/gallium/winsys/v3d/drm/Makefile.sources   |   3 +
 .../winsys/{vc5 => v3d}/drm/meson.build   |   6 +-
 .../drm/v3d_drm_public.h} |   2 +-
 .../drm/v3d_drm_winsys.c} |   8 +-
 src/gallium/winsys/vc5/drm/Makefile.sources   |   3 -
 61 files changed, 1832 insertions(+), 1832 deletions(-)
 rename src/gallium/drivers/vc5/vc5_drm.h => include/drm-uapi/v3d_drm.h (68%)
 rename src/broadcom/{Makefile.vc5.am => Makefile.v3d.am} (87%)
 rename src/gallium/drivers/{vc5 => v3d}/.editorconfig (100%)
 create mode 100644 src/gallium/drivers/v3d/Automake.inc
 rename src/gallium/drivers/{vc5 => v3d}/Makefile.am (77%)
 create mode 100644 src/gallium/drivers/v3d/Makefile.sources
 rename src/gallium/drivers/{vc5 => v3d}/meson.build (70%)
 rename src/gallium/drivers/{vc5/vc5_blit.c => v3d/v3d_blit.c} (76%)
 rename src/gallium/drivers/{vc5/vc5_bufmgr.c => v3d/v3d_bufmgr.c} (75%)
 rename src/gallium/drivers/{vc5/vc5_bufmgr.h => v3d/v3d_bufmgr.h} (72%)
 rename src/gallium/drivers/{vc5/vc5_cl.c

[Mesa-dev] [PATCH 4/5] v3d: Rename driver functions from vc5 to v3d.

2018-05-01 Thread Eric Anholt

This is the final step of the driver rename.
---

The 300k of contents of this patch are just s/vc5/v3d/ over the files.
See my vc5-publish branch for full series.

 src/gallium/drivers/v3d/v3d_blit.c  |  96 +++---
 src/gallium/drivers/v3d/v3d_bufmgr.c| 158 +-
 src/gallium/drivers/v3d/v3d_bufmgr.h|  50 +--
 src/gallium/drivers/v3d/v3d_cl.c|  24 +-
 src/gallium/drivers/v3d/v3d_cl.h|  98 +++---
 src/gallium/drivers/v3d/v3d_context.c   | 112 +++
 src/gallium/drivers/v3d/v3d_context.h   | 210 ++---
 src/gallium/drivers/v3d/v3d_fence.c |  38 +--
 src/gallium/drivers/v3d/v3d_format_table.h  |   2 +-
 src/gallium/drivers/v3d/v3d_formats.c   |  34 +--
 src/gallium/drivers/v3d/v3d_job.c   | 188 ++--
 src/gallium/drivers/v3d/v3d_program.c   | 264 
 src/gallium/drivers/v3d/v3d_query.c |  84 +++---
 src/gallium/drivers/v3d/v3d_resource.c  | 220 +++---
 src/gallium/drivers/v3d/v3d_resource.h  |  48 +--
 src/gallium/drivers/v3d/v3d_screen.c|  72 ++---
 src/gallium/drivers/v3d/v3d_screen.h|  22 +-
 src/gallium/drivers/v3d/v3d_simulator.c | 162 +-
 src/gallium/drivers/v3d/v3d_tiling.c| 102 +++
 src/gallium/drivers/v3d/v3d_tiling.h|  18 +-
 src/gallium/drivers/v3d/v3d_uniforms.c  | 114 +++
 src/gallium/drivers/v3d/v3dx_context.h  |   8 +-
 src/gallium/drivers/v3d/v3dx_draw.c | 264 
 src/gallium/drivers/v3d/v3dx_emit.c | 258 
 src/gallium/drivers/v3d/v3dx_format_table.c |   4 +-
 src/gallium/drivers/v3d/v3dx_job.c  |   4 +-
 src/gallium/drivers/v3d/v3dx_rcl.c  |  74 ++---
 src/gallium/drivers/v3d/v3dx_simulator.c|  22 +-
 src/gallium/drivers/v3d/v3dx_state.c| 318 ++--
 29 files changed, 1534 insertions(+), 1534 deletions(-)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Marek Olšák

On Tue, May 1, 2018 at 10:48 AM, Roland Scheidegger 
wrote:

> Am 01.05.2018 um 01:43 schrieb Marek Olšák:
> > From: Marek Olšák 
> >
> > This is a hypothetical interface for EQAA (a superset of CSAA). CSAA
> could be
> > exposed via GL_NV_framebuffer_multisample_coverage. EQAA additionally
> removes
> > the restriction that the number of samples in all FBO attachments must
> match,
> > which means it allows arbitrary sample counts in each FBO attachment.
>
> The idea sounds ok to me, as long as you don't expose the additional
> modes with more samples as ordinary msaa modes.
> (I'm quite sure at some point nvidia exposed 16x msaa modes through
> ordinary means, which then gave you really 4 color samples and 12
> coverage-only samples, in some not-very-compliant way. Though IIRC the
> extension is actually older than the ability to do per-sample msaa fb
> reads, so it wasn't as problematic back then.)
>
> > ---
> >  src/gallium/docs/source/screen.rst   | 17 +++--
> >  src/gallium/include/pipe/p_defines.h |  1 +
> >  src/gallium/include/pipe/p_state.h   |  3 ++-
> >  3 files changed, 18 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/gallium/docs/source/screen.rst
> b/src/gallium/docs/source/screen.rst
> > index 3837360fb40..28934c2f7b9 100644
> > --- a/src/gallium/docs/source/screen.rst
> > +++ b/src/gallium/docs/source/screen.rst
> > @@ -398,20 +398,22 @@ The integer capabilities:
> >  * ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports
> TGSI_OPCODE_LOAD use
> >with constant buffers.
> >  * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used
> as
> >an address for indirect register indexing.
> >  * ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
> >GL_MESA_tile_raster_order, using the tile_raster_order_* fields in
> >pipe_rasterizer_state.
> >  * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on combined
> shader
> >output resources (images + buffers + fragment outputs). If 0 the state
> >tracker works it out.
> > +* ``PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK``: If the i-th bit is set,
> EQAA
> > +  supports (i+1) color samples.
> I agree with Nicolai here, this looks strange, this should instead query
> for total samples or something.
>
> >  * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
> >Whether pipe_vertex_buffer::buffer_offset is treated as signed. The
> u_vbuf
> >module needs this for optimal performance in workstation applications.
> >  * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support
> per-context
> >priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
> >supported priority levels.  A driver that does not support prioritized
> >contexts can return 0.
> >  * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling
> semaphores
> >using fence_server_signal().
> >  * ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags that
> must be
> > @@ -743,22 +745,33 @@ Modern APIs allow using buffers as shader
> resources.
> >  (1 for 1D or 1D array textures).
> >
> >  **depth0** the depth of the base mip level of the texture
> >  (1 for everything else).
> >
> >  **array_size** the array size for 1D and 2D array textures.
> >  For cube maps this must be 6, for other textures 1.
> >
> >  **last_level** the last mip map level present.
> >
> > -**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
> > -which isn't multisampled.
> > +**nr_samples**: For Z/S, this is the number of samples. For color, if
> EQAA
> > +is unsupported, this is the number of both coverage samples and color
> samples.
> > +If EQAA is supported, this is the number of coverage samples. 0 and 1
> > +specify a resource which isn't multisampled.
> I think you should keep nr_samples alone, and re-change the meaning to
> actually be "real" samples with all associated data (which is what
> everybody expects with msaa).
>
> > +
> > +**nr_color_samples**: This is the number of color samples for EQAA,
> while
> > +``nr_samples`` is the number of coverage samples. If the format is Z/S,
> > +``nr_color_samples`` is ignored. Constraints:
> > +* ``nr_color_samples`` must not be greater than ``nr_samples``.
> > +* If ``nr_color_samples`` is equal to ``nr_samples``, it is called MSAA.
> > +* If ``nr_color_samples`` is less than ``nr_samples``, it is called
> EQAA.
> > +* If ``nr_color_samples`` is equal to 1, the behavior of the resolve
> blit is
> > +driver-dependent.
> >
> Hence instead use something like "total_samples" or "coverage samples"
> here (albeit the latter could be a bit confusing, unclear if it means
> all samples or just the extra ones).
> If you'd use something like "number of additional coverage samples"
> instead, then you could even leave all state trackers alone, since 0
> would automatically mean ordinary msaa without crazy stuff.
> And fwiw I'd stick to the CSAA name - coverage at least has some
> meaning, whereas

Re: [Mesa-dev] [RFC PATCH] gallium: add interface for EQAA

2018-05-01 Thread Marek Olšák

On Tue, May 1, 2018 at 5:35 AM, Nicolai Hähnle  wrote:

> On 01.05.2018 01:43, Marek Olšák wrote:
>
>> From: Marek Olšák 
>>
>> This is a hypothetical interface for EQAA (a superset of CSAA). CSAA
>> could be
>> exposed via GL_NV_framebuffer_multisample_coverage. EQAA additionally
>> removes
>> the restriction that the number of samples in all FBO attachments must
>> match,
>> which means it allows arbitrary sample counts in each FBO attachment.
>> ---
>>   src/gallium/docs/source/screen.rst   | 17 +++--
>>   src/gallium/include/pipe/p_defines.h |  1 +
>>   src/gallium/include/pipe/p_state.h   |  3 ++-
>>   3 files changed, 18 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/docs/source/screen.rst
>> b/src/gallium/docs/source/screen.rst
>> index 3837360fb40..28934c2f7b9 100644
>> --- a/src/gallium/docs/source/screen.rst
>> +++ b/src/gallium/docs/source/screen.rst
>> @@ -398,20 +398,22 @@ The integer capabilities:
>>   * ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports
>> TGSI_OPCODE_LOAD use
>> with constant buffers.
>>   * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used
>> as
>> an address for indirect register indexing.
>>   * ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
>> GL_MESA_tile_raster_order, using the tile_raster_order_* fields in
>> pipe_rasterizer_state.
>>   * ``PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES``: Limit on combined
>> shader
>> output resources (images + buffers + fragment outputs). If 0 the state
>> tracker works it out.
>> +* ``PIPE_CAP_EQAA_COLOR_SAMPLE_SUPPORT_MASK``: If the i-th bit is set,
>> EQAA
>> +  supports (i+1) color samples.
>>
>
> The number of supported samples is currently only exposed via
> is_format_supported.
>
> If there is hardware that supports more coverage samples than color
> samples, this interface wouldn't allow us to expose this fact, so it seems
> like perhaps the cap should be for the number of supported coverage samples
> instead?


Yes, this needs more thought.


>
>
>
>   * ``PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET``:
>> Whether pipe_vertex_buffer::buffer_offset is treated as signed. The
>> u_vbuf
>> module needs this for optimal performance in workstation applications.
>>   * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support
>> per-context
>> priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
>> supported priority levels.  A driver that does not support prioritized
>> contexts can return 0.
>>   * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling
>> semaphores
>> using fence_server_signal().
>>   * ``PIPE_CAP_CONSTBUF0_FLAGS``: The bits of pipe_resource::flags that
>> must be
>> @@ -743,22 +745,33 @@ Modern APIs allow using buffers as shader resources.
>>   (1 for 1D or 1D array textures).
>> **depth0** the depth of the base mip level of the texture
>>   (1 for everything else).
>> **array_size** the array size for 1D and 2D array textures.
>>   For cube maps this must be 6, for other textures 1.
>> **last_level** the last mip map level present.
>>   -**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
>> -which isn't multisampled.
>> +**nr_samples**: For Z/S, this is the number of samples. For color, if
>> EQAA
>> +is unsupported, this is the number of both coverage samples and color
>> samples.
>> +If EQAA is supported, this is the number of coverage samples. 0 and 1
>> +specify a resource which isn't multisampled.
>> +
>> +**nr_color_samples**: This is the number of color samples for EQAA, while
>> +``nr_samples`` is the number of coverage samples. If the format is Z/S,
>> +``nr_color_samples`` is ignored. Constraints:
>> +* ``nr_color_samples`` must not be greater than ``nr_samples``.
>> +* If ``nr_color_samples`` is equal to ``nr_samples``, it is called MSAA.
>> +* If ``nr_color_samples`` is less than ``nr_samples``, it is called EQAA.
>> +* If ``nr_color_samples`` is equal to 1, the behavior of the resolve
>> blit is
>> +driver-dependent.
>>
>
> Why the last buller?
>

Because we can support N >= 2 coverage samples and 1 color sample.


>
> Also, are all state trackers expected to set nr_color_samples correctly?
> This probably only affects nine, but still, it needs to be kept in mind.
>

Yes.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radv: Don't check the incoming apiVersion on CreateInstance.

2018-05-01 Thread Bas Nieuwenhuizen

This fixes

dEQP-VK.api.device_init.create_instance_invalid_api_version

CC: 18.1 
---
 src/amd/vulkan/radv_device.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 629957afec2..26f2e3b1aea 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -463,15 +463,6 @@ VkResult radv_CreateInstance(
client_version = VK_MAKE_VERSION(1, 0, 0);
}
 
-   if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
-   client_version > VK_MAKE_VERSION(1, 1, 0xfff)) {
-   return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
-"Client requested version %d.%d.%d",
-VK_VERSION_MAJOR(client_version),
-VK_VERSION_MINOR(client_version),
-VK_VERSION_PATCH(client_version));
-   }
-
instance = vk_zalloc2(_alloc, pAllocator, sizeof(*instance), 8,
  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!instance)
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radv: UseEnumerateInstanceVersion for the default version.

2018-05-01 Thread Bas Nieuwenhuizen

---
 src/amd/vulkan/radv_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 26f2e3b1aea..7e7d784ca43 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -460,7 +460,7 @@ VkResult radv_CreateInstance(
pCreateInfo->pApplicationInfo->apiVersion != 0) {
client_version = pCreateInfo->pApplicationInfo->apiVersion;
} else {
-   client_version = VK_MAKE_VERSION(1, 0, 0);
+   radv_EnumerateInstanceVersion(_version);
}
 
instance = vk_zalloc2(_alloc, pAllocator, sizeof(*instance), 8,
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] freedreno: Fix ir3_cmdline.c build.

2018-05-01 Thread Eric Anholt

Fixes: 6487e7a30c9e ("nir: move GL specific passes to src/compiler/glsl")
---
 src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 
b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 5631216ebd9e..55809d527adb 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -46,6 +46,7 @@
 
 #include "compiler/glsl/standalone.h"
 #include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/gl_nir.h"
 #include "compiler/nir_types.h"
 #include "compiler/spirv/nir_spirv.h"
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 106180] [bisected] radv vulkan smoke test black screen (Add support for DRI3 v1.2)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=106180

--- Comment #12 from mercuriete  ---
I've tested my patch and now I can play again dota2 and i can do vulkan
smoketest.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 106180] [bisected] radv vulkan smoke test black screen (Add support for DRI3 v1.2)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=106180

mercuriete  changed:

   What|Removed |Added

 CC||b...@basnieuwenhuizen.nl

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 106180] [bisected] radv vulkan smoke test black screen (Add support for DRI3 v1.2)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=106180

--- Comment #11 from mercuriete  ---
The previous patch fix this issue

I've never contributed before to mesa,
I want to send this patch to the mail list but i dont know how:

if you want to commit that patch i would like to be:

Abel Garcia Dorta 

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 106180] [bisected] radv vulkan smoke test black screen (Add support for DRI3 v1.2)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=106180

--- Comment #10 from mercuriete  ---
Created attachment 139255
  --> https://bugs.freedesktop.org/attachment.cgi?id=139255=edit
good patch

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 97516] GLX_OML_swap_method not fully supported

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=97516

Timothy Arceri  changed:

   What|Removed |Added

  Component|Other   |GLX

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 32678] egl_gallium driver doesn't support EGL_KHR_image_pixmap extension

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=32678

Timothy Arceri  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |INVALID

--- Comment #4 from Timothy Arceri  ---
The gallium EGL state tracker was removed with the following commit. Closing.

commit 5564c361b5cc1f5ec4be3622d7f9be601e3c268a
Author: Jose Fonseca 
Date:   Tue Mar 3 16:01:22 2015 +

st/egl: Remove.

Largely superseeded by src/egl, and
WGL/GLX_EXT_create_context_es_profile extensions.

Note this will break Android.mk with gallium drivers -- somebody
familiar with that build infrastructure will need to update it to use
gallium drivers through egl_dri2.

v2: Remove the _EGL_BUILT_IN_DRIVER_GALLIUM define from
src/egl/main/Android.mk; and update the src/egl/main/Sconscript to
create a SharedLibrary, add versioning, create symlink - copy the bits
from egl-static, per Emil Velikov.

Reviewed-by: Emil Velikov 

v3: Disallow undefined symbols in libEGL.so.  Update release notes

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 65427] Gallium EGL on Wayland blocks in eglInitialize (no display thread bound)

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=65427

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |INVALID
 Status|NEW |RESOLVED

--- Comment #2 from Timothy Arceri  ---
The EGL state tracker was dropped from Mesa long ago. Closing.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 55021] Regression: eglInitialize crashes with 9.0

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=55021

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |INVALID
 Status|NEW |RESOLVED

--- Comment #3 from Timothy Arceri  ---
Reporter never provided follow up information. There also seems to be no other
reports of this in the years since so it was likely fixed. Closing.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 34874] --enable-shared-glapi breaks apps

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=34874

Timothy Arceri  changed:

   What|Removed |Added

 Status|NEEDINFO|RESOLVED
 Resolution|--- |WORKSFORME

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 93561] ninja: error: '$(PRIVATE_SCRIPT)', needed by 'out/target/product/rpi2/gen/STATIC_LIBRARIES/libmesa_dri_common_intermediates/xmlpool/options.h', missing and no known rule to make

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=93561

Timothy Arceri  changed:

   What|Removed |Added

 Resolution|--- |NOTOURBUG
 Status|NEW |RESOLVED

--- Comment #1 from Timothy Arceri  ---
Looks like this was an issue with androids build scripts or your setup rather
than a Mesa bug.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 104457] Resetting rcs0 after gpu hang

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=104457

Timothy Arceri  changed:

   What|Removed |Added

  Component|Mesa core   |Drivers/DRI/i965
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
   Assignee|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] opencl: autotools: Fix linking order for OpenCL target

2018-05-01 Thread Aaron Watry

Given the discussion that's ongoing, this patch might not land as-is,
but if it does:
Tested-By: Aaron Watry 

Note: The meson build currently works as-is and doesn't require an
equivalent patch.

--Aaron

On Tue, May 1, 2018 at 7:14 AM, Kai Wasserbäch
 wrote:
> Otherwise the build fails with an undefined reference to
> clang::FrontendTimesIsEnabled.
>
> Bugzilla: https://bugs.freedesktop.org/106209
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Jan Vesely 
> Signed-off-by: Kai Wasserbäch 
> ---
>
> Hey,
> this patch fixes a FTBFS for me with recent LLVM/Clang 7 revisions from
> upstream's SVN (I use the packages from apt.llvm.org).
>
> If you accept it, please commit it for me, I do not have commit access.
>
> The CC to stable can be dropped, if stable branches are not to be
> expected to be buildable with LLVM/Clang from SVN.
>
> Thank you in advance for considering this patch.
>
> Cheers,
> Kai
>
>
>  src/gallium/targets/opencl/Makefile.am | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/gallium/targets/opencl/Makefile.am 
> b/src/gallium/targets/opencl/Makefile.am
> index de68a93ad5..f0e1de7797 100644
> --- a/src/gallium/targets/opencl/Makefile.am
> +++ b/src/gallium/targets/opencl/Makefile.am
> @@ -23,11 +23,10 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
> $(LIBELF_LIBS) \
> $(DLOPEN_LIBS) \
> -lclangCodeGen \
> -   -lclangFrontendTool \
> -lclangFrontend \
> +   -lclangFrontendTool \
> -lclangDriver \
> -lclangSerialization \
> -   -lclangCodeGen \
> -lclangParse \
> -lclangSema \
> -lclangAnalysis \
> --
> 2.17.0
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: Don't advertise Float64 or Int64 on HW withou 64-bit types

2018-05-01 Thread Anuj Phogat

On Mon, Apr 30, 2018 at 3:15 PM, Jason Ekstrand 
wrote:

> ---
>  src/intel/vulkan/anv_device.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index adcd506..e82f294 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -757,8 +757,10 @@ void anv_GetPhysicalDeviceFeatures(
>.shaderStorageImageArrayDynamicIndexing   = true,
>.shaderClipDistance   = true,
>.shaderCullDistance   = true,
> -  .shaderFloat64= pdevice->info.gen >= 8,
> -  .shaderInt64  = pdevice->info.gen >= 8,
> +  .shaderFloat64= pdevice->info.gen >= 8
> &&
> +
> pdevice->info.has_64bit_types,
> +  .shaderInt64  = pdevice->info.gen >= 8
> &&
> +
> pdevice->info.has_64bit_types,
>.shaderInt16  = false,
>.shaderResourceMinLod = false,
>.variableMultisampleRate  = true,
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


s/withou/without in subject. With typo fixed:
Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 106337] eglWaitClient() doesn't work as documented using DRI2 backend

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=106337

Bug ID: 106337
   Summary: eglWaitClient() doesn't work as documented using DRI2
backend
   Product: Mesa
   Version: 18.0
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: EGL
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: mgorc...@qnx.com
QA Contact: mesa-dev@lists.freedesktop.org

According to EGL 1.4 specification eglWaitClient() should be equivalent of
glFinish() call, but according to the function code of dri2_wait_client() it
does just flush() without waiting for any pending operations on drawable
surface.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 104302] Wolfenstein 2 (2017) under wine graphical artifacting on RADV

2018-05-01 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=104302

--- Comment #17 from Samuel Pitoiset  ---
Thanks for confirming. I'm aware of the faces issue but I haven't been able to
fix it yet.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv: Allow lookup of vkEnumerateInstanceVersion without an instance

2018-05-01 Thread Bas Nieuwenhuizen

well that works too.

Reviewed-by: Bas Nieuwenhuizen 

On Tue, May 1, 2018 at 7:01 PM, Jason Ekstrand  wrote:
> Fixes: cbab2d1da5edfe9df27a010adf8b1aa9dbee473b
> ---
>  src/intel/vulkan/anv_device.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index e82f294..6aff4e9 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1181,6 +1181,7 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr(
>
> LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
> LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceLayerProperties);
> +   LOOKUP_ANV_ENTRYPOINT(EnumerateInstanceVersion);
> LOOKUP_ANV_ENTRYPOINT(CreateInstance);
>
>  #undef LOOKUP_ANV_ENTRYPOINT
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 >

1 - 100 of 170 matches

Mail list logo