[Mesa-dev] [PATCH v2 5/5] panfrost: Print errors from kernel

2019-08-07 Thread Tomeu Vizoso
Signed-off-by: Tomeu Vizoso 
Reviewed-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_drm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index 71eda2d1e328..36a6b975680a 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -49,14 +49,14 @@ panfrost_drm_mmap_bo(struct panfrost_screen *screen, struct 
panfrost_bo *bo)
 
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
 if (ret) {
-fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", 
ret);
+fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
 assert(0);
 }
 
 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
   screen->fd, mmap_bo.offset);
 if (bo->cpu == MAP_FAILED) {
-fprintf(stderr, "mmap failed: %p\n", bo->cpu);
+fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
 assert(0);
 }
 
@@ -122,7 +122,7 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, 
size_t size,
 
 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, 
&create_bo);
 if (ret) {
-fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: 
%d\n", ret);
+fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: 
%m\n");
 assert(0);
 }
 
@@ -176,7 +176,7 @@ panfrost_drm_release_bo(struct panfrost_screen *screen, 
struct panfrost_bo *bo,
 
 ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
 if (ret) {
-fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret);
+fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
 assert(0);
 }
 
@@ -332,7 +332,7 @@ panfrost_fence_create(struct panfrost_context *ctx)
  */
 drmSyncobjExportSyncFile(screen->fd, ctx->out_sync, &f->fd);
 if (f->fd == -1) {
-fprintf(stderr, "export failed\n");
+fprintf(stderr, "export failed: %m\n");
 free(f);
 return NULL;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/5] panfrost: Allocate shaders in their own BOs

2019-08-07 Thread Tomeu Vizoso
Instead of all shaders being stored in a single BO, have each shader in
its own.

This removes the need for a 16MB allocation per context, and allows us
to place transient blend shaders in BOs marked as executable (before
they were allocated in the transient pool, which shouldn't be
executable).

v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid
  reading from GPU-accessible memory when patching (Alyssa).
- Free struct panfrost_blend_shader (Alyssa).
- Give the job a reference to regular shaders when emitting
  (Alyssa).

Signed-off-by: Tomeu Vizoso 
---
 src/gallium/drivers/panfrost/pan_allocate.h   |  2 +
 src/gallium/drivers/panfrost/pan_assemble.c   |  5 ++-
 src/gallium/drivers/panfrost/pan_blend.h  | 13 --
 src/gallium/drivers/panfrost/pan_blend_cso.c  | 41 +--
 .../drivers/panfrost/pan_blend_shaders.c  | 13 ++
 src/gallium/drivers/panfrost/pan_bo_cache.c   |  5 +--
 src/gallium/drivers/panfrost/pan_context.c| 14 +--
 src/gallium/drivers/panfrost/pan_context.h|  3 +-
 src/gallium/drivers/panfrost/pan_drm.c|  5 ++-
 9 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.h 
b/src/gallium/drivers/panfrost/pan_allocate.h
index 8d925ee38a4c..0e06567d2069 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.h
+++ b/src/gallium/drivers/panfrost/pan_allocate.h
@@ -59,6 +59,8 @@ struct panfrost_bo {
 size_t size;
 
 int gem_handle;
+
+uint32_t flags;
 };
 
 struct panfrost_memory {
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
b/src/gallium/drivers/panfrost/pan_assemble.c
index 15f77585a25c..dd877056d3b5 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -43,6 +43,7 @@ panfrost_shader_compile(
 gl_shader_stage stage,
 struct panfrost_shader_state *state)
 {
+struct panfrost_screen *screen = pan_screen(ctx->base.screen);
 uint8_t *dst;
 
 nir_shader *s;
@@ -80,7 +81,9 @@ panfrost_shader_compile(
  * I bet someone just thought that would be a cute pun. At least,
  * that's how I'd do it. */
 
-meta->shader = panfrost_upload(&ctx->shaders, dst, size) | 
program.first_tag;
+state->bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE);
+memcpy(state->bo->cpu, dst, size);
+meta->shader = state->bo->gpu | program.first_tag;
 
 util_dynarray_fini(&program.compiled);
 
diff --git a/src/gallium/drivers/panfrost/pan_blend.h 
b/src/gallium/drivers/panfrost/pan_blend.h
index a881e0945f48..a29353ff0014 100644
--- a/src/gallium/drivers/panfrost/pan_blend.h
+++ b/src/gallium/drivers/panfrost/pan_blend.h
@@ -33,8 +33,10 @@
 /* An internal blend shader descriptor, from the compiler */
 
 struct panfrost_blend_shader {
-/* The compiled shader in GPU memory */
-struct panfrost_transfer shader;
+struct panfrost_context *ctx;
+
+/* The compiled shader */
+void *buffer;
 
 /* Byte count of the shader */
 unsigned size;
@@ -53,8 +55,11 @@ struct panfrost_blend_shader {
 /* A blend shader descriptor ready for actual use */
 
 struct panfrost_blend_shader_final {
-/* The upload, possibly to transient memory */
-mali_ptr gpu;
+/* The compiled shader in GPU memory, possibly patched */
+struct panfrost_bo *bo;
+
+/* First instruction tag (for tagging the pointer) */
+unsigned first_tag;
 
 /* Same meaning as panfrost_blend_shader */
 unsigned work_count;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
b/src/gallium/drivers/panfrost/pan_blend_cso.c
index f685b25b41b3..08a86980ca88 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -151,11 +151,24 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
 ctx->dirty |= PAN_DIRTY_FS;
 }
 
+static void
+panfrost_delete_blend_shader(struct hash_entry *entry)
+{
+struct panfrost_blend_shader *shader = (struct panfrost_blend_shader 
*)entry->data;
+free(shader->buffer);
+free(shader);
+}
+
 static void
 panfrost_delete_blend_state(struct pipe_context *pipe,
-void *blend)
+void *cso)
 {
-/* TODO: Free shader binary? */
+struct panfrost_blend_state *blend = (struct panfrost_blend_state *) 
cso;
+
+for (unsigned c = 0; c < 4; ++c) {
+struct panfrost_blend_rt *rt = &blend->rt[c];
+_mesa_hash_table_u64_clear(rt->shaders, 
panfrost_delete_blend_shader);
+}
 ralloc_free(blend);
 }
 
@@ -208,6 +221,9 @@ panfrost_blend_constant(float *out, float *in, unsigned 
mask)
 struct panfrost_blend_final
 panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
 {
+struct panfrost_screen *screen

[Mesa-dev] [PATCH v2 4/5] panfrost: Mark buffers as PANFROST_BO_HEAP

2019-08-07 Thread Tomeu Vizoso
What we call GROWABLE in Mesa corresponds to the HEAP BO flag in the
kernel. These buffers cannot be memory mapped in the CPU side at the
moment, so make sure they are also marked INVISIBLE.

This allows us to allocate a big heap upfront (16MB) without actually
reserving space unless it's needed.

Signed-off-by: Tomeu Vizoso 
Reviewed-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_drm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index 6d1e0c08d33d..71eda2d1e328 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -91,10 +91,16 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, 
size_t size,
 /* To maximize BO cache usage, don't allocate tiny BOs */
 size = MAX2(size, 4096);
 
+/* GROWABLE BOs cannot be mmapped */
+if (flags & PAN_ALLOCATE_GROWABLE)
+assert(flags & PAN_ALLOCATE_INVISIBLE);
+
 unsigned translated_flags = 0;
 
 if (screen->kernel_version->version_major > 1 ||
 screen->kernel_version->version_minor >= 1) {
+if (flags & PAN_ALLOCATE_GROWABLE)
+translated_flags |= PANFROST_BO_HEAP;
 if (!(flags & PAN_ALLOCATE_EXECUTE))
 translated_flags |= PANFROST_BO_NOEXEC;
 }
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 1/5] util/hash_table: Fix hashing in clears on 32-bit

2019-08-07 Thread Tomeu Vizoso
Some hash functions (eg. key_u64_hash) will attempt to dereference the
key, causing an invalid access when passed DELETED_KEY_VALUE (0x1) or
FREED_KEY_VALUE (0x0).

To avoid this problem, stuff the fake keys into a hash_key_u64 struct
and pass the pointer to it instead.

Signed-off-by: Tomeu Vizoso 
Suggested-by: Caio Marcelo de Oliveira Filho 
Cc: Samuel Pitoiset 
Cc: Nicolai Hähnle 
---
 src/util/hash_table.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index f58575de558f..58e6fc2d9169 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -667,7 +667,12 @@ _mesa_hash_table_u64_clear(struct hash_table_u64 *ht,
  struct hash_entry entry;
 
  /* Create a fake entry for the delete function. */
- entry.hash = table->key_hash_function(table->deleted_key);
+ if (sizeof(void *) == 8) {
+entry.hash = table->key_hash_function(table->deleted_key);
+ } else {
+struct hash_key_u64 _key = { .value = 
(uintptr_t)table->deleted_key };
+entry.hash = table->key_hash_function(&_key);
+ }
  entry.key = table->deleted_key;
  entry.data = ht->deleted_key_data;
 
@@ -682,7 +687,12 @@ _mesa_hash_table_u64_clear(struct hash_table_u64 *ht,
  struct hash_entry entry;
 
  /* Create a fake entry for the delete function. */
- entry.hash = table->key_hash_function(uint_key(FREED_KEY_VALUE));
+ if (sizeof(void *) == 8) {
+entry.hash = table->key_hash_function(uint_key(FREED_KEY_VALUE));
+ } else {
+struct hash_key_u64 _key = { .value = (uintptr_t)FREED_KEY_VALUE };
+entry.hash = table->key_hash_function(&_key);
+ }
  entry.key = uint_key(FREED_KEY_VALUE);
  entry.data = ht->freed_key_data;
 
-- 
2.20.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 3/5] panfrost: Mark BOs as NOEXEC

2019-08-07 Thread Tomeu Vizoso
Unless a BO has the EXECUTABLE flag, mark it as NOEXEC.

v2: - Rework version detection (Alyssa).

Signed-off-by: Tomeu Vizoso 
---
 include/drm-uapi/panfrost_drm.h   | 27 +++
 src/gallium/drivers/panfrost/pan_drm.c|  6 -
 src/gallium/drivers/panfrost/pan_screen.c |  3 ++-
 src/gallium/drivers/panfrost/pan_screen.h |  3 +++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/include/drm-uapi/panfrost_drm.h b/include/drm-uapi/panfrost_drm.h
index a52e0283b90d..9150dd75aad8 100644
--- a/include/drm-uapi/panfrost_drm.h
+++ b/include/drm-uapi/panfrost_drm.h
@@ -18,6 +18,8 @@ extern "C" {
 #define DRM_PANFROST_MMAP_BO   0x03
 #define DRM_PANFROST_GET_PARAM 0x04
 #define DRM_PANFROST_GET_BO_OFFSET 0x05
+#define DRM_PANFROST_PERFCNT_ENABLE0x06
+#define DRM_PANFROST_PERFCNT_DUMP  0x07
 
 #define DRM_IOCTL_PANFROST_SUBMIT  DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
 #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
@@ -26,6 +28,15 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_GET_PARAM   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param)
 #define DRM_IOCTL_PANFROST_GET_BO_OFFSET   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset)
 
+/*
+ * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
+ * param is set to true.
+ * All these ioctl(s) are subject to deprecation, so please don't rely on
+ * them for anything but debugging purpose.
+ */
+#define DRM_IOCTL_PANFROST_PERFCNT_ENABLE  DRM_IOW(DRM_COMMAND_BASE + 
DRM_PANFROST_PERFCNT_ENABLE, struct drm_panfrost_perfcnt_enable)
+#define DRM_IOCTL_PANFROST_PERFCNT_DUMP
DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct 
drm_panfrost_perfcnt_dump)
+
 #define PANFROST_JD_REQ_FS (1 << 0)
 /**
  * struct drm_panfrost_submit - ioctl argument for submitting commands to the 
3D
@@ -71,6 +82,9 @@ struct drm_panfrost_wait_bo {
__s64 timeout_ns;   /* absolute */
 };
 
+#define PANFROST_BO_NOEXEC 1
+#define PANFROST_BO_HEAP   2
+
 /**
  * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
  *
@@ -135,6 +149,19 @@ struct drm_panfrost_get_bo_offset {
__u64 offset;
 };
 
+struct drm_panfrost_perfcnt_enable {
+   __u32 enable;
+   /*
+* On bifrost we have 2 sets of counters, this parameter defines the
+* one to track.
+*/
+   __u32 counterset;
+};
+
+struct drm_panfrost_perfcnt_dump {
+   __u64 buf_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
b/src/gallium/drivers/panfrost/pan_drm.c
index 8ae541ae11b6..6d1e0c08d33d 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -93,7 +93,11 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, 
size_t size,
 
 unsigned translated_flags = 0;
 
-/* TODO: translate flags to kernel flags, if the kernel supports */
+if (screen->kernel_version->version_major > 1 ||
+screen->kernel_version->version_minor >= 1) {
+if (!(flags & PAN_ALLOCATE_EXECUTE))
+translated_flags |= PANFROST_BO_NOEXEC;
+}
 
 struct drm_panfrost_create_bo create_bo = {
 .size = size,
diff --git a/src/gallium/drivers/panfrost/pan_screen.c 
b/src/gallium/drivers/panfrost/pan_screen.c
index b2fac88e956e..5f527d0d3b3d 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -37,7 +37,6 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "draw/draw_context.h"
-#include 
 
 #include 
 
@@ -537,6 +536,7 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
 {
 struct panfrost_screen *screen = pan_screen(pscreen);
 panfrost_bo_cache_evict_all(screen);
+drmFreeVersion(screen->kernel_version);
 ralloc_free(screen);
 }
 
@@ -617,6 +617,7 @@ panfrost_create_screen(int fd, struct renderonly *ro)
 
 screen->gpu_id = panfrost_drm_query_gpu_version(screen);
 screen->require_sfbd = screen->gpu_id < 0x0750; /* T760 is the first 
to support MFBD */
+screen->kernel_version = drmGetVersion(fd);
 
 /* Check if we're loading against a supported GPU model. */
 
diff --git a/src/gallium/drivers/panfrost/pan_screen.h 
b/src/gallium/drivers/panfrost/pan_screen.h
index 22cd7dcc9fea..35fb8de26282 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -29,6 +29,7 @@
 #ifndef PAN_SCREEN_H
 #define PAN_SCREEN_H
 
+#include 
 #include "pipe/p_screen.h"
 #include "pipe/p_defines.h"
 #include "renderonly/renderonly.h"
@@ -99,6 +100,8 @@ struct panfrost_screen {
 u

Re: [Mesa-dev] EXT_shader_image_load_store requires format-less loads?

2019-08-07 Thread Pelloux-prayer, Pierre-eric
Hi Ilia,

Actually I wasn't sure that the PIPE_CAP_IMAGE_LOAD_FORMATTED was needed.
Your reasonning and your patch series make sense, thanks for fixing this.

Best,
Pierre-Eric

On 07/08/2019 02:10, Ilia Mirkin wrote:
> Hi Pierre-Eric,
> 
> I see you recently added EXT_shader_image_load_store - nice. It seems
> like you're relying on the format-less read access feature to make it
> happen:
> 
> +   extensions->EXT_shader_image_load_store &=
> +  screen->get_param(screen, PIPE_CAP_IMAGE_LOAD_FORMATTED);
> 
> Can you elaborate why this is necessary? From a quick read of the
> spec, it seems like the format is required, and maps to a format same
> way that ARB_image_load_store does (well, *slightly* different, but in
> the end, we get one of those same enum values out). The LOAD_FORMATTED
> thing enables you to read from an image with PIPE_FORMAT_NONE (which
> normally can only be written to).
> 
> I don't see anything in the spec which requires it, but perhaps I
> missed something. I'd be a bit surprised though, since this is rather
> non-trivial on NVIDIA GPUs.
> 
> Cheers,
> 
>   -ilia
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Mesa 19.2.0 release plan

2019-08-07 Thread Emil Velikov
Hi all,

On Wed, 31 Jul 2019 at 09:37, Emil Velikov  wrote:
>
> Hi all,
>
> Here is the tentative release plan for 19.2.0.
>
> As many of you are well aware, it's time to the next branch point.
> The calendar is already updated, so these are the tentative dates:
>
>  Aug 06 2019 - Feature freeze/Release candidate 1
>  Aug 13 2019 - Release candidate 2
>  Aug 20 2019 - Release candidate 3
>  Aug 27 2019 - Release candidate 4/final release
>
With multiple teams finalising the final features for their drivers,
I've decided to push the branch point by one week.

I would like to remind teams that they are welcome to merge
functionality early and keep it disabled by default.
This way they can address outstanding issues and enable, during the
stabilisation period.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111316] Imported GBM BO released with DESTROY_DUMB

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111316

Bug ID: 111316
   Summary: Imported GBM BO released with DESTROY_DUMB
   Product: Mesa
   Version: 18.3
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: ppaala...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Because grepping for GEM_CLOSE in Mesa GBM did not yield the results I would
have expected, I wrote a small test program:
https://gitlab.freedesktop.org/pq/gbm-test/blob/master/gbm-test.c

I was hypothesizing that a display-only kernel driver (with no driver at all in
Mesa) doing dmabuf imports from GBM might be leaking GEM handles in Mesa. The
program shows that it is not leaking, but there is another issue: the ioctl to
close the handle does not seem right.

The program uses two DRM devices: one device to allocate a GBM BO with
gbm_bo_create_with_modifiers() and export the buffer as dmabuf, and another
display-only device to import the dmabuf and get the GEM handle. (This is a
similar pattern to what a display server supporting display-only secondary DRM
devices would do for zero-copy, except it would use a gbm_surface with EGL
instead of gbm_bo_create.)

Doing an 'strace -e ioctl' of the test program, one allocate-export-import
cycle looks like this:

ioctl(3, DRM_IOCTL_I915_GEM_CREATE, 0x7ffe128116b0) = 0
ioctl(3, DRM_IOCTL_I915_GEM_SET_TILING, 0x7ffe12811600) = 0
ioctl(3, DRM_IOCTL_I915_GEM_SET_DOMAIN, 0x7ffe128116a4) = 0
ioctl(3, DRM_IOCTL_PRIME_HANDLE_TO_FD, 0x7ffe1281190c) = 0
ioctl(5, DRM_IOCTL_PRIME_FD_TO_HANDLE, 0x7ffe1281163c) = 0
GEM handle of imported buffer: 1
ioctl(5, DRM_IOCTL_MODE_DESTROY_DUMB, 0x7ffe12811904) = 0
ioctl(3, DRM_IOCTL_GEM_CLOSE, 0x7ffe128118a0) = 0

Is it ok to use DESTROY_DUMB here?

This is all purely by inspection, I have not hit actual problems so far.


< ickle> pq: cut-and-paste drivers/gpu/drm/drm_dumb_buffers.c
drm_mode_destroy_dumb() into the report

int drm_mode_destroy_dumb(struct drm_device *dev, u32 handle,
  struct drm_file *file_priv)
{
if (!dev->driver->dumb_create)
return -ENOSYS;

if (dev->driver->dumb_destroy)
return dev->driver->dumb_destroy(file_priv, dev, handle);
else
return drm_gem_dumb_destroy(file_priv, dev, handle);
}

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111316] Imported GBM BO released with DESTROY_DUMB

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111316

Pekka Paalanen  changed:

   What|Removed |Added

 CC||ch...@chris-wilson.co.uk,
   ||dan...@ffwll.ch
 OS|All |Linux (All)
   Hardware|Other   |x86-64 (AMD64)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111308] [Regression, NIR, bisected] Black squares in Unigine Heaven via DXVK

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111308

Danylo  changed:

   What|Removed |Added

 CC||danylo.pilia...@gmail.com

--- Comment #3 from Danylo  ---
Created attachment 144968
  --> https://bugs.freedesktop.org/attachment.cgi?id=144968&action=edit
good_vs_bad_shaders_nir

> does changing 'flt' to '~flt' make any difference?
No

> Do you think you could send me dumps with and without that optimization with 
> the environment variable NIR_PRINT=true?
I've attached the relevant parts of optimizations and the final NIR, the whole
dump is 300 mb so I didn't attach it.

Vulkan renderdoc of frame with issue:
https://mega.nz/#!sR8DGKwD!IHSQv6dWjk-YfyOnWZy36v-STBctmJbqGod19RPVDfg .
Captured on Mesa master and HD 620. It is a capture of dx11 capture because the
app is 32 bit so I was unable to directly capture Vulkan trace of it. Don't
mind green artifacts in trace - they appear only in Vulkan trace.

The corruption starts from the call EID 2821, select "NaN/INF" overlay and look
at the only output of this draw. NaNs would be highlighted as red.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 2/5] panfrost: Allocate shaders in their own BOs

2019-08-07 Thread Alyssa Rosenzweig
Patches 2-5 have my R-b, looks good! :)

On Wed, Aug 07, 2019 at 10:36:54AM +0200, Tomeu Vizoso wrote:
> Instead of all shaders being stored in a single BO, have each shader in
> its own.
> 
> This removes the need for a 16MB allocation per context, and allows us
> to place transient blend shaders in BOs marked as executable (before
> they were allocated in the transient pool, which shouldn't be
> executable).
> 
> v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid
>   reading from GPU-accessible memory when patching (Alyssa).
> - Free struct panfrost_blend_shader (Alyssa).
> - Give the job a reference to regular shaders when emitting
>   (Alyssa).
> 
> Signed-off-by: Tomeu Vizoso 
> ---
>  src/gallium/drivers/panfrost/pan_allocate.h   |  2 +
>  src/gallium/drivers/panfrost/pan_assemble.c   |  5 ++-
>  src/gallium/drivers/panfrost/pan_blend.h  | 13 --
>  src/gallium/drivers/panfrost/pan_blend_cso.c  | 41 +--
>  .../drivers/panfrost/pan_blend_shaders.c  | 13 ++
>  src/gallium/drivers/panfrost/pan_bo_cache.c   |  5 +--
>  src/gallium/drivers/panfrost/pan_context.c| 14 +--
>  src/gallium/drivers/panfrost/pan_context.h|  3 +-
>  src/gallium/drivers/panfrost/pan_drm.c|  5 ++-
>  9 files changed, 66 insertions(+), 35 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pan_allocate.h 
> b/src/gallium/drivers/panfrost/pan_allocate.h
> index 8d925ee38a4c..0e06567d2069 100644
> --- a/src/gallium/drivers/panfrost/pan_allocate.h
> +++ b/src/gallium/drivers/panfrost/pan_allocate.h
> @@ -59,6 +59,8 @@ struct panfrost_bo {
>  size_t size;
>  
>  int gem_handle;
> +
> +uint32_t flags;
>  };
>  
>  struct panfrost_memory {
> diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
> b/src/gallium/drivers/panfrost/pan_assemble.c
> index 15f77585a25c..dd877056d3b5 100644
> --- a/src/gallium/drivers/panfrost/pan_assemble.c
> +++ b/src/gallium/drivers/panfrost/pan_assemble.c
> @@ -43,6 +43,7 @@ panfrost_shader_compile(
>  gl_shader_stage stage,
>  struct panfrost_shader_state *state)
>  {
> +struct panfrost_screen *screen = pan_screen(ctx->base.screen);
>  uint8_t *dst;
>  
>  nir_shader *s;
> @@ -80,7 +81,9 @@ panfrost_shader_compile(
>   * I bet someone just thought that would be a cute pun. At least,
>   * that's how I'd do it. */
>  
> -meta->shader = panfrost_upload(&ctx->shaders, dst, size) | 
> program.first_tag;
> +state->bo = panfrost_drm_create_bo(screen, size, 
> PAN_ALLOCATE_EXECUTE);
> +memcpy(state->bo->cpu, dst, size);
> +meta->shader = state->bo->gpu | program.first_tag;
>  
>  util_dynarray_fini(&program.compiled);
>  
> diff --git a/src/gallium/drivers/panfrost/pan_blend.h 
> b/src/gallium/drivers/panfrost/pan_blend.h
> index a881e0945f48..a29353ff0014 100644
> --- a/src/gallium/drivers/panfrost/pan_blend.h
> +++ b/src/gallium/drivers/panfrost/pan_blend.h
> @@ -33,8 +33,10 @@
>  /* An internal blend shader descriptor, from the compiler */
>  
>  struct panfrost_blend_shader {
> -/* The compiled shader in GPU memory */
> -struct panfrost_transfer shader;
> +struct panfrost_context *ctx;
> +
> +/* The compiled shader */
> +void *buffer;
>  
>  /* Byte count of the shader */
>  unsigned size;
> @@ -53,8 +55,11 @@ struct panfrost_blend_shader {
>  /* A blend shader descriptor ready for actual use */
>  
>  struct panfrost_blend_shader_final {
> -/* The upload, possibly to transient memory */
> -mali_ptr gpu;
> +/* The compiled shader in GPU memory, possibly patched */
> +struct panfrost_bo *bo;
> +
> +/* First instruction tag (for tagging the pointer) */
> +unsigned first_tag;
>  
>  /* Same meaning as panfrost_blend_shader */
>  unsigned work_count;
> diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c 
> b/src/gallium/drivers/panfrost/pan_blend_cso.c
> index f685b25b41b3..08a86980ca88 100644
> --- a/src/gallium/drivers/panfrost/pan_blend_cso.c
> +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
> @@ -151,11 +151,24 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
>  ctx->dirty |= PAN_DIRTY_FS;
>  }
>  
> +static void
> +panfrost_delete_blend_shader(struct hash_entry *entry)
> +{
> +struct panfrost_blend_shader *shader = (struct panfrost_blend_shader 
> *)entry->data;
> +free(shader->buffer);
> +free(shader);
> +}
> +
>  static void
>  panfrost_delete_blend_state(struct pipe_context *pipe,
> -void *blend)
> +void *cso)
>  {
> -/* TODO: Free shader binary? */
> +struct panfrost_blend_state *blend = (struct panfrost_blend_state *) 
> cso;
> +
> +for (unsigned c = 0; c < 4; ++c) {
> +struct panfrost_blend_rt *rt = &blend->rt[c];
> + 

[Mesa-dev] [Bug 111288] Memory leak in minecraft (supposedly related to rendering)

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111288

--- Comment #6 from Denis  ---
Thanks for the trace and explanations. I installed minecraft (demo version).
First observation - is that the app versions available for me 1.14.4 and 1.14

I ran it thru the valgrind. Result below:

==9226== LEAK SUMMARY:
==9226==definitely lost: 672 bytes in 6 blocks
==9226==indirectly lost: 9,878,823 bytes in 1,984 blocks
==9226==  possibly lost: 6,480,869 bytes in 26,861 blocks
==9226==still reachable: 7,899,321 bytes in 62,276 blocks
==9226==   of which reachable via heuristic:
==9226== length64   : 1,384 bytes in 28 blocks
==9226== newarray   : 1,808 bytes in 33 blocks
==9226== multipleinheritance: 1,960 bytes in 10 blocks
==9226== suppressed: 0 bytes in 0 blocks


According to it, during my game session there were lost about 10 MB of memory.

I made tests on:
Manjaro OS
kernel 5.1.16-1-MANJARO
mesa 19.1.2 (system)
gpu - Intel HD620

Also I am not sure how memory allocation should work during app launching (I
mean, possibly that could be find that app requested memory in renderD128 and
freed it only after app closing...). It would be useful for me to know, why did
you decide that those records relate to memory leaks (my observation - after
normal game launching there were about 30 records for "renderD128" were
created. And they were not increased then)

upd - as I found out, these memleaks are for the Launcher, not the exact game.
Trying to connect to java process with valgrind now

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111306] gbm creates BO with wrong pitch when dri3_get_modifiers returns modifiers, causing drmModeAddFB2WithModifiers to fail

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111306

Michel Dänzer  changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
 QA Contact|mesa-dev@lists.freedesktop. |intel-3d-bugs@lists.freedes
   |org |ktop.org
  Component|Mesa core   |Drivers/DRI/i965

--- Comment #1 from Michel Dänzer  ---
The pitch is determined by the driver.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111308] [Regression, NIR, bisected] Black squares in Unigine Heaven via DXVK

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111308

--- Comment #4 from Ian Romanick  ---
I think I have an idea what could be happening.  There are a lot of occurrences
of a pattern like

y = exp2(-(x*x)) * small_constant + y;

At the end, y is compared 0 < y, and that comparison is eliminated.  If x*x is
sufficiently large, exp2(-(x*x)) will flush to zero.

Does changing

   case nir_op_fexp2:
  r = (struct ssa_result_range){gt_zero, analyze_expression(alu, 0,
ht).is_integral};
  break;

to

   case nir_op_fexp2:
  r = (struct ssa_result_range){ge_zero, analyze_expression(alu, 0,
ht).is_integral};
  break;

help?  I don't have the renderdoc set up on this system.  I can try that later
today of you don't beat me to it.

If that fixes the problem, then fmul and ffma (and possibly others) will need
fixes to account for flush-to-zero behavior.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] panfrost: Assign varying buffers dynamically

2019-08-07 Thread Alyssa Rosenzweig
Rather than hardcoding certain varying buffer indices "by convention",
work it out at draw time. This added flexibility is needed for
futureproofing and will be enable streamout.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_assemble.c |  6 ---
 src/gallium/drivers/panfrost/pan_varyings.c | 53 +
 2 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_assemble.c 
b/src/gallium/drivers/panfrost/pan_assemble.c
index 15f77585a25..c56881c0a69 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -138,28 +138,22 @@ panfrost_shader_compile(
 /* Check for special cases, otherwise assume general varying */
 
 if (location == VARYING_SLOT_POS) {
-v.index = 1;
 v.format = MALI_VARYING_POS;
 } else if (location == VARYING_SLOT_PSIZ) {
-v.index = 2;
 v.format = MALI_R16F;
 v.swizzle = default_vec1_swizzle;
 
 state->writes_point_size = true;
 } else if (location == VARYING_SLOT_PNTC) {
-v.index = 3;
 v.format = MALI_RG16F;
 v.swizzle = default_vec2_swizzle;
 
 state->reads_point_coord = true;
 } else if (location == VARYING_SLOT_FACE) {
-v.index = 4;
 v.format = MALI_R32I;
 v.swizzle = default_vec1_swizzle;
 
 state->reads_face = true;
-} else {
-v.index = 0;
 }
 
 state->varyings[i] = v;
diff --git a/src/gallium/drivers/panfrost/pan_varyings.c 
b/src/gallium/drivers/panfrost/pan_varyings.c
index b918d1426c4..b4ed512917a 100644
--- a/src/gallium/drivers/panfrost/pan_varyings.c
+++ b/src/gallium/drivers/panfrost/pan_varyings.c
@@ -60,6 +60,20 @@ panfrost_emit_front_face(union mali_attr *slot)
 
 /* Given a shader and buffer indices, link varying metadata together */
 
+static bool
+is_special_varying(gl_varying_slot loc)
+{
+switch (loc) {
+case VARYING_SLOT_POS:
+case VARYING_SLOT_PSIZ:
+case VARYING_SLOT_PNTC:
+case VARYING_SLOT_FACE:
+return true;
+default:
+return false;
+}
+}
+
 static void
 panfrost_emit_varying_meta(
 void *outptr, struct panfrost_shader_state *ss,
@@ -115,19 +129,9 @@ panfrost_emit_varying_descriptor(
 struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
  vs_size + fs_size);
 
-/*
- * Assign ->src_offset now that we know about all the general purpose
- * varyings that will be used by the fragment and vertex shaders.
- */
 for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
-/*
- * General purpose varyings have ->index set to 0, skip other
- * entries.
- */
-if (vs->varyings[i].index)
-continue;
-
-vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
+if (!is_special_varying(vs->varyings_loc[i]))
+vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
 }
 
 for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
@@ -177,15 +181,14 @@ panfrost_emit_varying_descriptor(
 memcpy(trans.cpu, vs->varyings, vs_size);
 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
 
-/* Buffer indices must be in this order per our convention */
 union mali_attr varyings[PIPE_MAX_ATTRIBS];
-unsigned idx = 0;
 
+unsigned idx = 0;
 signed general = idx++;
 signed gl_Position = idx++;
-signed gl_PointSize = (vs->writes_point_size || fs->reads_point_coord 
|| fs->reads_face) ? (idx++) : -1;
-signed gl_PointCoord = (fs->reads_point_coord || fs->reads_face) ? 
(idx++) : -1;
-signed gl_FrontFacing = (fs->reads_face) ? (idx++) : -1;
+signed gl_PointSize = vs->writes_point_size ? (idx++) : -1;
+signed gl_PointCoord = fs->reads_point_coord ? (idx++) : -1;
+signed gl_FrontFacing = fs->reads_face ? (idx++) : -1;
 
 panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
vertex_count);
@@ -196,26 +199,16 @@ panfrost_emit_varying_descriptor(
sizeof(float) * 4, vertex_count);
 
 
-if (vs->writes_point_size || fs->reads_point_coord) {
-/* fp16 vec1 gl_PointSize */
+if (vs->writes_point_size)
 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
  

[Mesa-dev] [PATCH 2/3] panfrost: Assign indices at draw-time

2019-08-07 Thread Alyssa Rosenzweig
This will allow us to shuffle buffers.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/pan_varyings.c | 71 ++---
 1 file changed, 63 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_varyings.c 
b/src/gallium/drivers/panfrost/pan_varyings.c
index 53c05bab61e..b918d1426c4 100644
--- a/src/gallium/drivers/panfrost/pan_varyings.c
+++ b/src/gallium/drivers/panfrost/pan_varyings.c
@@ -58,6 +58,44 @@ panfrost_emit_front_face(union mali_attr *slot)
 slot->elements = MALI_VARYING_FRONT_FACING | MALI_ATTR_INTERNAL;
 }
 
+/* Given a shader and buffer indices, link varying metadata together */
+
+static void
+panfrost_emit_varying_meta(
+void *outptr, struct panfrost_shader_state *ss,
+signed general, signed gl_Position,
+signed gl_PointSize, signed gl_PointCoord,
+signed gl_FrontFacing)
+{
+struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
+
+for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
+gl_varying_slot location = ss->varyings_loc[i];
+int index = -1;
+
+switch (location) {
+case VARYING_SLOT_POS:
+index = gl_Position;
+break;
+case VARYING_SLOT_PSIZ:
+index = gl_PointSize;
+break;
+case VARYING_SLOT_PNTC:
+index = gl_PointCoord;
+break;
+case VARYING_SLOT_FACE:
+index = gl_FrontFacing;
+break;
+default:
+index = general;
+break;
+}
+
+assert(index >= 0);
+out[i].index = index;
+}
+}
+
 void
 panfrost_emit_varying_descriptor(
 struct panfrost_context *ctx,
@@ -139,26 +177,29 @@ panfrost_emit_varying_descriptor(
 memcpy(trans.cpu, vs->varyings, vs_size);
 memcpy(trans.cpu + vs_size, fs->varyings, fs_size);
 
-ctx->payloads[PIPE_SHADER_VERTEX].postfix.varying_meta = trans.gpu;
-ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varying_meta = trans.gpu + 
vs_size;
-
 /* Buffer indices must be in this order per our convention */
 union mali_attr varyings[PIPE_MAX_ATTRIBS];
 unsigned idx = 0;
 
-panfrost_emit_varyings(ctx, &varyings[idx++], num_gen_varyings * 16,
+signed general = idx++;
+signed gl_Position = idx++;
+signed gl_PointSize = (vs->writes_point_size || fs->reads_point_coord 
|| fs->reads_face) ? (idx++) : -1;
+signed gl_PointCoord = (fs->reads_point_coord || fs->reads_face) ? 
(idx++) : -1;
+signed gl_FrontFacing = (fs->reads_face) ? (idx++) : -1;
+
+panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16,
vertex_count);
 
 /* fp32 vec4 gl_Position */
 ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.position_varying =
-panfrost_emit_varyings(ctx, &varyings[idx++],
+panfrost_emit_varyings(ctx, &varyings[gl_Position],
sizeof(float) * 4, vertex_count);
 
 
 if (vs->writes_point_size || fs->reads_point_coord) {
 /* fp16 vec1 gl_PointSize */
 ctx->payloads[PIPE_SHADER_FRAGMENT].primitive_size.pointer =
-panfrost_emit_varyings(ctx, &varyings[idx++],
+panfrost_emit_varyings(ctx, &varyings[gl_PointSize],
2, vertex_count);
 } else if (fs->reads_face) {
 /* Dummy to advance index */
@@ -167,16 +208,30 @@ panfrost_emit_varying_descriptor(
 
 if (fs->reads_point_coord) {
 /* Special descriptor */
-panfrost_emit_point_coord(&varyings[idx++]);
+panfrost_emit_point_coord(&varyings[gl_PointCoord]);
 } else if (fs->reads_face) {
 ++idx;
 }
 
 if (fs->reads_face) {
-panfrost_emit_front_face(&varyings[idx++]);
+panfrost_emit_front_face(&varyings[gl_FrontFacing]);
 }
 
+/* Let's go ahead and link varying meta to the buffer in question, now
+ * that that information is available */
+
+panfrost_emit_varying_meta(trans.cpu, vs,
+general, gl_Position, gl_PointSize,
+gl_PointCoord, gl_FrontFacing);
+
+panfrost_emit_varying_meta(trans.cpu + vs_size, fs,
+general, gl_Position, gl_PointSize,
+gl_PointCoord, gl_FrontFacing);
+
 mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * 
sizeof(union mali_attr));
 ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p;
 ctx->payl

[Mesa-dev] [PATCH 1/3] panfrost: Break out pan_varyings.c

2019-08-07 Thread Alyssa Rosenzweig
This code is fairly self-contained, so let's factor it out of the giant
pan_context.c monster.

Signed-off-by: Alyssa Rosenzweig 
---
 src/gallium/drivers/panfrost/meson.build|   1 +
 src/gallium/drivers/panfrost/pan_context.c  | 156 -
 src/gallium/drivers/panfrost/pan_context.h  |   7 +
 src/gallium/drivers/panfrost/pan_varyings.c | 182 
 4 files changed, 190 insertions(+), 156 deletions(-)
 create mode 100644 src/gallium/drivers/panfrost/pan_varyings.c

diff --git a/src/gallium/drivers/panfrost/meson.build 
b/src/gallium/drivers/panfrost/meson.build
index 1efe11f4962..76ec702ee4f 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -51,6 +51,7 @@ files_panfrost = files(
   'pan_sfbd.c',
   'pan_mfbd.c',
   'pan_tiler.c',
+  'pan_varyings.c',
 )
 
 panfrost_includes = [
diff --git a/src/gallium/drivers/panfrost/pan_context.c 
b/src/gallium/drivers/panfrost/pan_context.c
index d4f0c5a3393..4e618d0e52a 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -462,162 +462,6 @@ panfrost_vertex_tiler_job(struct panfrost_context *ctx, 
bool is_tiler)
 return transfer;
 }
 
-static mali_ptr
-panfrost_emit_varyings(
-struct panfrost_context *ctx,
-union mali_attr *slot,
-unsigned stride,
-unsigned count)
-{
-/* Fill out the descriptor */
-slot->stride = stride;
-slot->size = stride * count;
-slot->shift = slot->extra_flags = 0;
-
-struct panfrost_transfer transfer =
-panfrost_allocate_transient(ctx, slot->size);
-
-slot->elements = transfer.gpu | MALI_ATTR_LINEAR;
-
-return transfer.gpu;
-}
-
-static void
-panfrost_emit_point_coord(union mali_attr *slot)
-{
-slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR;
-slot->stride = slot->size = slot->shift = slot->extra_flags = 0;
-}
-
-static void
-panfrost_emit_front_face(union mali_attr *slot)
-{
-slot->elements = MALI_VARYING_FRONT_FACING | MALI_ATTR_INTERNAL;
-}
-
-static void
-panfrost_emit_varying_descriptor(
-struct panfrost_context *ctx,
-unsigned vertex_count)
-{
-/* Load the shaders */
-
-struct panfrost_shader_state *vs = 
&ctx->shader[PIPE_SHADER_VERTEX]->variants[ctx->shader[PIPE_SHADER_VERTEX]->active_variant];
-struct panfrost_shader_state *fs = 
&ctx->shader[PIPE_SHADER_FRAGMENT]->variants[ctx->shader[PIPE_SHADER_FRAGMENT]->active_variant];
-unsigned int num_gen_varyings = 0;
-
-/* Allocate the varying descriptor */
-
-size_t vs_size = sizeof(struct mali_attr_meta) * 
vs->tripipe->varying_count;
-size_t fs_size = sizeof(struct mali_attr_meta) * 
fs->tripipe->varying_count;
-
-struct panfrost_transfer trans = panfrost_allocate_transient(ctx,
- vs_size + fs_size);
-
-/*
- * Assign ->src_offset now that we know about all the general purpose
- * varyings that will be used by the fragment and vertex shaders.
- */
-for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
-/*
- * General purpose varyings have ->index set to 0, skip other
- * entries.
- */
-if (vs->varyings[i].index)
-continue;
-
-vs->varyings[i].src_offset = 16 * (num_gen_varyings++);
-}
-
-for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
-unsigned j;
-
-/* If we have a point sprite replacement, handle that here. We
- * have to translate location first.  TODO: Flip y in shader.
- * We're already keying ... just time crunch .. */
-
-unsigned loc = fs->varyings_loc[i];
-unsigned pnt_loc =
-(loc >= VARYING_SLOT_VAR0) ? (loc - VARYING_SLOT_VAR0) 
:
-(loc == VARYING_SLOT_PNTC) ? 8 :
-~0;
-
-if (~pnt_loc && fs->point_sprite_mask & (1 << pnt_loc)) {
-/* gl_PointCoord index by convention */
-fs->varyings[i].index = 3;
-fs->reads_point_coord = true;
-
-/* Swizzle out the z/w to 0/1 */
-fs->varyings[i].format = MALI_RG16F;
-fs->varyings[i].swizzle =
-panfrost_get_default_swizzle(2);
-
-continue;
-}
-
-if (fs->varyings[i].index)
-continue;
-
-/*
- * Re-use the VS general purpose varying pos if it exists,
- * create a new one otherwise.
- */
-for (j = 0; j < vs->tripipe->varying_count; j++) {
-  

[Mesa-dev] [Bug 111288] Memory leak in minecraft (supposedly related to rendering)

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111288

--- Comment #7 from Greg  ---
>why did you decide that those records relate to memory leaks
I've decided that there's memory leak since memory used by minecraft grows
indefinitely and it's surely not how it should be (I played minecraft on other
PCs and there were no such problems), and absurdly high amount of these records
in process memory map seemed to be quite rational explanation of where that
memory leak is.
As mentioned, short timespan captured on trace was enough to accumulate >100 of
these records (and as I said these records don't get deleted until program
termination (exiting to main menu doesn't help), so amount of these can easily
grow until there's no memory left).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [ANNOUNCE] Mesa 19.1.4

2019-08-07 Thread Juan A. Suarez Romero
Mesa 19.1.4 is now available.

In this release we have:

Mostly, as usual, in fixes for different drivers (anv, radv, radeon, nv50,
nvc0) as well as in backend parts (egl, spirv, nir, ...).

Of those fixes, we could highlight several ones:

- Vulkan 24/48 bit formats are now not supported on Ivybridge.

- R8G8B8_UNORM_SRGB is not supported on Haswell.

- A fix for hair artifacts in Max Payne 3 on AMD/RADV.

- Vulkan transform feedback extension is disabled on Intel gen7.


Andres Rodriguez (1):
  radv: fix queries with WAIT_BIT returning VK_NOT_READY

Andrii Simiklit (2):
  intel/compiler: don't use a keyword struct for a class fs_reg
  meson: add a warning for meson < 0.46.0

Arcady Goldmints-Orlov (1):
  anv: report HOST_ALLOCATION as supported for images

Bas Nieuwenhuizen (3):
  radv: Set correct metadata size for GFX9+.
  radv: Take variable descriptor counts into account for buffer entries.
  radv: Fix descriptor set allocation failure.

Boyuan Zhang (4):
  radeon/uvd: fix poc for hevc encode
  radeon/vcn: fix poc for hevc encode
  radeon/uvd: enable rate control for hevc encoding
  radeon/vcn: enable rate control for hevc encoding

Caio Marcelo de Oliveira Filho (1):
  anv: Remove special allocation for anv_push_constants

Connor Abbott (1):
  nir: Allow qualifiers on copy_deref and image instructions

Daniel Schürmann (1):
  spirv: Fix order of barriers in SpvOpControlBarrier

Dave Airlie (1):
  st/nir: fix arb fragment stage conversion

Dylan Baker (1):
  meson: allow building all glx without any drivers

Emil Velikov (1):
  egl/drm: ensure the backing gbm is set before using it

Eric Anholt (1):
  freedreno: Fix data races with allocating/freeing struct ir3.

Eric Engestrom (5):
  nir: don't return void
  util: fix no-op macro (bad number of arguments)
  gallium+mesa: fix tgsi_semantic array type
  scons+meson: suppress spammy build warning on MacOS
  nir: remove explicit nir_intrinsic_index_flag values

Francisco Jerez (1):
  intel/ir: Fix CFG corruption in opt_predicated_break().

Ilia Mirkin (4):
  gallium/vl: fix compute tgsi shaders to not process undefined components
  nv50,nvc0: update sampler/view bind functions to accept NULL array
  nvc0: allow a non-user buffer to be bound at position 0
  nv50/ir: handle insn not being there for definition of CVT arg

Jason Ekstrand (6):
  intel/fs: Stop stack allocating large arrays
  anv: Disable transform feedback on gen7
  isl/formats: R8G8B8_UNORM_SRGB isn't supported on HSW
  anv: Don't claim support for 24 and 48-bit formats on IVB
  intel/fs: Use ALIGN16 instructions for all derivatives on gen <= 7
  intel/fs: Implement quad_swap_horizontal with a swizzle on gen7

Juan A. Suarez Romero (3):
  docs: add sha256 checksums for 19.1.3
  Update version to 19.1.4
  docs: add release notes for 19.1.4

Kenneth Graunke (4):
  mesa: Fix ReadBuffers with pbuffers
  egl: Quiet warning about front buffer rendering for pixmaps/pbuffers
  egl: Make the 565 pbuffer-only config single buffered.
  egl: Only expose 565 pbuffer configs if X can export them as DRI3 images

Lionel Landwerlin (5):
  anv: fix use of comma operator
  nir: add access to image_deref intrinsics
  spirv: wrap push ssa/pointer values
  spirv: propagate access qualifiers through ssa & pointer
  spirv: don't discard access set by vtn_pointer_dereference

Mark Menzynski (1):
  nvc0/ir: Fix assert accessing null pointer

Nataraj Deshpande (1):
  egl/android: Update color_buffers querying for buffer age

Nicolas Dufresne (1):
  egl: Also query modifiers when exporting DMABuf

Rhys Perry (1):
  ac/nir: fix txf_ms with an offset

Samuel Pitoiset (1):
  radv: fix crash in vkCmdClearAttachments with unused attachment

Tapani Pälli (1):
  mesa: add glsl_type ref to one_time_init and decref to atexit

Yevhenii Kolesnikov (1):
  main: Fix memleaks in mesa_use_program

git tag: mesa-19.1.4

https://mesa.freedesktop.org/archive/mesa-19.1.4.tar.xz
MD5:  90eed05e3239c96ad9e92eb11eb67ada  mesa-19.1.4.tar.xz
SHA1: 393053bfa41b7fc65add756713004f034c39c3ce  mesa-19.1.4.tar.xz
SHA256: a6d268a7d9edcfd92b6da80f2e34e6e0a7baaa442efbeba2fc66c404943c6bfb  
mesa-19.1.4.tar.xz
SHA512: 
234032d917c9b378c3f6ceb921677b64e549344c3957331810b50fd73e0dccd2f4f62e2bd39e619590f389bc58fdab10fab4b88f7c117557cbeb1dda049b9fc5
  mesa-19.1.4.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-19.1.4.tar.xz.sig



signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111288] Memory leak in minecraft (supposedly related to rendering)

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111288

--- Comment #8 from Greg  ---
>First observation - is that the app versions available for me 1.14.4 and 1.14

I use older version (to play with specific mods), but issue also persists on
newest (1.14.4) and oldest versions (1.0 release) available to me

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/5] util/hash_table: Fix hashing in clears on 32-bit

2019-08-07 Thread Caio Marcelo de Oliveira Filho
On Wed, Aug 07, 2019 at 10:36:53AM +0200, Tomeu Vizoso wrote:
> Some hash functions (eg. key_u64_hash) will attempt to dereference the
> key, causing an invalid access when passed DELETED_KEY_VALUE (0x1) or
> FREED_KEY_VALUE (0x0).
> 
> To avoid this problem, stuff the fake keys into a hash_key_u64 struct
> and pass the pointer to it instead.

A more precise description would be something along the lines

   When in 32-bit arch a 64-bit key value doesn't fit into a pointer,
   so hash_table_u64 internally use a pointer to a struct containing
   the 64-bit key value.

   Fix _mesa_hash_table_u64_clear() to handle the 32-bit case by
   creating a temporary hash_key_u64 to pass to the hash function.


This patch is

   Reviewed-by: Caio Marcelo de Oliveira Filho 


Caio
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 111308] [Regression, NIR, bisected] Black squares in Unigine Heaven via DXVK

2019-08-07 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=111308

--- Comment #5 from Danylo  ---
Interesting observation. I would be able to try tomorrow.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Mesa 19.2.0 release plan

2019-08-07 Thread Mark Janes
Eric Engestrom  writes:

> On 2019-07-31 at 09:38, Emil Velikov  wrote:
>> Hi all,
>> 
>> Here is the tentative release plan for 19.2.0.
>> 
>> As many of you are well aware, it's time to the next branch point.
>> The calendar is already updated, so these are the tentative dates:
>> 
>>  Aug 06 2019 - Feature freeze/Release candidate 1
>>  Aug 13 2019 - Release candidate 2
>>  Aug 20 2019 - Release candidate 3
>>  Aug 27 2019 - Release candidate 4/final release
>> 
>> This gives us around 1 week until the branch point.
>> 
>> Note: In the spirit of keeping things clearer and more transparent, we
>> will be keeping track of any features planned for the release in
>> Bugzilla [1].
>> 
>> Do add a separate "Depends on" for each work you have planned.
>> Alternatively you can reply to this email and I'll add them for you.
>> 
>> [1] https://bugs.freedesktop.org/show_bug.cgi?id=111265
>
> Thanks!
>
> As per previous discussions (I don't remember where, sorry) as well as
> internal discussions, I think we should add all currently open
> regressions since 19.1 as blockers for this release.

My understanding is that the "feature tracker" blocks the creation of
the release branchpoint.  A separate "release tracker" blocks the
release of 19.2.0.  Unfixed regressions go on the "release tracker", not
the "feature tracker".  We backport bug fixes to release branches, but
we don't backport features.

> We should also add that to the procedure in our docs; I'll do that
> later today if nobody beats me to it.

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] util/slab: add slab_free_fast()

2019-08-07 Thread Rob Clark
From: Rob Clark 

I noticed slab_free() showing up at the top of perf results in
gl_driver2, due to "streaming" GPU state objects, which are allocated
and destroyed within a single draw call.

In this case, it is guaranteed that we free on the same child pool,
from the same (only) thread accessing the child pool.  So add a faster,
but more restricted version of slab_free() to cut the overhead.

Signed-off-by: Rob Clark 
---
 src/util/slab.c | 20 
 src/util/slab.h |  1 +
 2 files changed, 21 insertions(+)

diff --git a/src/util/slab.c b/src/util/slab.c
index 62634034fdc..21076a80238 100644
--- a/src/util/slab.c
+++ b/src/util/slab.c
@@ -276,6 +276,26 @@ void slab_free(struct slab_child_pool *pool, void *ptr)
}
 }
 
+/**
+ * Similar to slab_free(), except that freeing an object in a different pool
+ * from the one it was allocated from is *not* allowed.
+ */
+void slab_free_fast(struct slab_child_pool *pool, void *ptr)
+{
+   struct slab_element_header *elt = ((struct slab_element_header*)ptr - 1);
+
+   CHECK_MAGIC(elt, SLAB_MAGIC_ALLOCATED);
+   SET_MAGIC(elt, SLAB_MAGIC_FREE);
+
+   assert(p_atomic_read(&elt->owner) == (intptr_t)pool);
+
+   /* This is the simple case: The caller guarantees that we can safely
+* access the free list.
+*/
+   elt->next = pool->free;
+   pool->free = elt;
+}
+
 /**
  * Allocate an object from the slab. Single-threaded (no mutex).
  */
diff --git a/src/util/slab.h b/src/util/slab.h
index 5a25adaf7f4..9748cd95858 100644
--- a/src/util/slab.h
+++ b/src/util/slab.h
@@ -78,6 +78,7 @@ void slab_create_child(struct slab_child_pool *pool,
 void slab_destroy_child(struct slab_child_pool *pool);
 void *slab_alloc(struct slab_child_pool *pool);
 void slab_free(struct slab_child_pool *pool, void *ptr);
+void slab_free_fast(struct slab_child_pool *pool, void *ptr);
 
 struct slab_mempool {
struct slab_parent_pool parent;
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util/slab: add slab_free_fast()

2019-08-07 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, Aug 7, 2019 at 6:02 PM Rob Clark  wrote:

> From: Rob Clark 
>
> I noticed slab_free() showing up at the top of perf results in
> gl_driver2, due to "streaming" GPU state objects, which are allocated
> and destroyed within a single draw call.
>
> In this case, it is guaranteed that we free on the same child pool,
> from the same (only) thread accessing the child pool.  So add a faster,
> but more restricted version of slab_free() to cut the overhead.
>
> Signed-off-by: Rob Clark 
> ---
>  src/util/slab.c | 20 
>  src/util/slab.h |  1 +
>  2 files changed, 21 insertions(+)
>
> diff --git a/src/util/slab.c b/src/util/slab.c
> index 62634034fdc..21076a80238 100644
> --- a/src/util/slab.c
> +++ b/src/util/slab.c
> @@ -276,6 +276,26 @@ void slab_free(struct slab_child_pool *pool, void
> *ptr)
> }
>  }
>
> +/**
> + * Similar to slab_free(), except that freeing an object in a different
> pool
> + * from the one it was allocated from is *not* allowed.
> + */
> +void slab_free_fast(struct slab_child_pool *pool, void *ptr)
> +{
> +   struct slab_element_header *elt = ((struct slab_element_header*)ptr -
> 1);
> +
> +   CHECK_MAGIC(elt, SLAB_MAGIC_ALLOCATED);
> +   SET_MAGIC(elt, SLAB_MAGIC_FREE);
> +
> +   assert(p_atomic_read(&elt->owner) == (intptr_t)pool);
> +
> +   /* This is the simple case: The caller guarantees that we can safely
> +* access the free list.
> +*/
> +   elt->next = pool->free;
> +   pool->free = elt;
> +}
> +
>  /**
>   * Allocate an object from the slab. Single-threaded (no mutex).
>   */
> diff --git a/src/util/slab.h b/src/util/slab.h
> index 5a25adaf7f4..9748cd95858 100644
> --- a/src/util/slab.h
> +++ b/src/util/slab.h
> @@ -78,6 +78,7 @@ void slab_create_child(struct slab_child_pool *pool,
>  void slab_destroy_child(struct slab_child_pool *pool);
>  void *slab_alloc(struct slab_child_pool *pool);
>  void slab_free(struct slab_child_pool *pool, void *ptr);
> +void slab_free_fast(struct slab_child_pool *pool, void *ptr);
>
>  struct slab_mempool {
> struct slab_parent_pool parent;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 2/5] panfrost: Allocate shaders in their own BOs

2019-08-07 Thread Rob Herring
On Wed, Aug 7, 2019 at 2:37 AM Tomeu Vizoso  wrote:
>
> Instead of all shaders being stored in a single BO, have each shader in
> its own.
>
> This removes the need for a 16MB allocation per context, and allows us
> to place transient blend shaders in BOs marked as executable (before
> they were allocated in the transient pool, which shouldn't be
> executable).
>
> v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid
>   reading from GPU-accessible memory when patching (Alyssa).
> - Free struct panfrost_blend_shader (Alyssa).
> - Give the job a reference to regular shaders when emitting
>   (Alyssa).
>
> Signed-off-by: Tomeu Vizoso 


> diff --git a/src/gallium/drivers/panfrost/pan_bo_cache.c 
> b/src/gallium/drivers/panfrost/pan_bo_cache.c
> index fba495c1dd69..7378d0a8abea 100644
> --- a/src/gallium/drivers/panfrost/pan_bo_cache.c
> +++ b/src/gallium/drivers/panfrost/pan_bo_cache.c
> @@ -84,11 +84,10 @@ panfrost_bo_cache_fetch(
>  {
>  struct list_head *bucket = pan_bucket(screen, size);
>
> -/* TODO: Honour flags? */
> -
>  /* Iterate the bucket looking for something suitable */
>  list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
> -if (entry->size >= size) {
> +if (entry->size >= size &&
> +entry->flags == flags) {

This change probably warrants its own patch IMO. This is using the
untranslated flags, but I think it should be the 'translated_flags' as
those are the ones changing the allocation. For example, I don't think
there's any reason for DELAYED_MMAP to be used as a match criteria
(BTW, I'm also not sure if we can reclaim BOs if they remain mmap'ed).

Another problem I see, if we have a 100MB buffer in the cache, would
we really want to hit on a 4KB allocation? Perhaps a 'entry->size * 2
< size' check.

>  /* This one works, splice it out of the cache */
>  list_del(&entry->link);
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 5/5] panfrost: Print errors from kernel

2019-08-07 Thread Rob Herring
On Wed, Aug 7, 2019 at 2:37 AM Tomeu Vizoso  wrote:
>
> Signed-off-by: Tomeu Vizoso 
> Reviewed-by: Alyssa Rosenzweig 
> ---
>  src/gallium/drivers/panfrost/pan_drm.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
> b/src/gallium/drivers/panfrost/pan_drm.c
> index 71eda2d1e328..36a6b975680a 100644
> --- a/src/gallium/drivers/panfrost/pan_drm.c
> +++ b/src/gallium/drivers/panfrost/pan_drm.c
> @@ -49,14 +49,14 @@ panfrost_drm_mmap_bo(struct panfrost_screen *screen, 
> struct panfrost_bo *bo)
>
>  ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
>  if (ret) {
> -fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", 
> ret);
> +fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");

Is this going to work on Android and bionic? stderr goes to /dev/null
anyways on Android, so probably not a big deal.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 2/5] panfrost: Allocate shaders in their own BOs

2019-08-07 Thread Alyssa Rosenzweig
> This is using the
> untranslated flags, but I think it should be the 'translated_flags' as
> those are the ones changing the allocation.

It's a little more complex than that. There some hypothetical
untranslated flags that I would want to match on. For instance, future
CPU read-only/write-only modifiers -- those affect the mmap (and need to
be accounted for in the BO cache) but aren't specified as
translated_flags to the kernel.

{ I'd like to experiment with read-only/write-only flags on certain
buffers as a sanity check for security/robustness reasons }


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 5/5] panfrost: Print errors from kernel

2019-08-07 Thread Alyssa Rosenzweig
> Is this going to work on Android and bionic? stderr goes to /dev/null
> anyways on Android, so probably not a big deal.

Tomeu said yes.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util/slab: add slab_free_fast()

2019-08-07 Thread Jason Ekstrand

Do you have any actual numbers for this?

On August 7, 2019 17:02:22 Rob Clark  wrote:


From: Rob Clark 

I noticed slab_free() showing up at the top of perf results in
gl_driver2, due to "streaming" GPU state objects, which are allocated
and destroyed within a single draw call.

In this case, it is guaranteed that we free on the same child pool,
from the same (only) thread accessing the child pool.  So add a faster,
but more restricted version of slab_free() to cut the overhead.

Signed-off-by: Rob Clark 
---
src/util/slab.c | 20 
src/util/slab.h |  1 +
2 files changed, 21 insertions(+)

diff --git a/src/util/slab.c b/src/util/slab.c
index 62634034fdc..21076a80238 100644
--- a/src/util/slab.c
+++ b/src/util/slab.c
@@ -276,6 +276,26 @@ void slab_free(struct slab_child_pool *pool, void *ptr)
   }
}

+/**
+ * Similar to slab_free(), except that freeing an object in a different pool
+ * from the one it was allocated from is *not* allowed.
+ */
+void slab_free_fast(struct slab_child_pool *pool, void *ptr)
+{
+   struct slab_element_header *elt = ((struct slab_element_header*)ptr - 1);
+
+   CHECK_MAGIC(elt, SLAB_MAGIC_ALLOCATED);
+   SET_MAGIC(elt, SLAB_MAGIC_FREE);
+
+   assert(p_atomic_read(&elt->owner) == (intptr_t)pool);
+
+   /* This is the simple case: The caller guarantees that we can safely
+* access the free list.
+*/
+   elt->next = pool->free;
+   pool->free = elt;
+}
+
/**
 * Allocate an object from the slab. Single-threaded (no mutex).
 */
diff --git a/src/util/slab.h b/src/util/slab.h
index 5a25adaf7f4..9748cd95858 100644
--- a/src/util/slab.h
+++ b/src/util/slab.h
@@ -78,6 +78,7 @@ void slab_create_child(struct slab_child_pool *pool,
void slab_destroy_child(struct slab_child_pool *pool);
void *slab_alloc(struct slab_child_pool *pool);
void slab_free(struct slab_child_pool *pool, void *ptr);
+void slab_free_fast(struct slab_child_pool *pool, void *ptr);

struct slab_mempool {
   struct slab_parent_pool parent;
--
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util/slab: add slab_free_fast()

2019-08-07 Thread Jason Ekstrand
It'd be nice to throw some numbers in the donut message before you push. 
"Real" ones of it's convenient but what you have below is probably fine if 
you've already thrown the data away.


On August 7, 2019 19:13:21 Rob Clark  wrote:


Yes, although not in front of me.. overall for gl_driver2 it was
something along the lines of 2-3%.  Looking at perf-report, slab_free
(+ slab_free_fast) went from ~20% to nearly nothing(ish) iirc..

BR,
-R

On Wed, Aug 7, 2019 at 4:58 PM Jason Ekstrand  wrote:


Do you have any actual numbers for this?

On August 7, 2019 17:02:22 Rob Clark  wrote:

> From: Rob Clark 
>
> I noticed slab_free() showing up at the top of perf results in
> gl_driver2, due to "streaming" GPU state objects, which are allocated
> and destroyed within a single draw call.
>
> In this case, it is guaranteed that we free on the same child pool,
> from the same (only) thread accessing the child pool.  So add a faster,
> but more restricted version of slab_free() to cut the overhead.
>
> Signed-off-by: Rob Clark 
> ---
> src/util/slab.c | 20 
> src/util/slab.h |  1 +
> 2 files changed, 21 insertions(+)
>
> diff --git a/src/util/slab.c b/src/util/slab.c
> index 62634034fdc..21076a80238 100644
> --- a/src/util/slab.c
> +++ b/src/util/slab.c
> @@ -276,6 +276,26 @@ void slab_free(struct slab_child_pool *pool, void *ptr)
>}
> }
>
> +/**
> + * Similar to slab_free(), except that freeing an object in a different pool
> + * from the one it was allocated from is *not* allowed.
> + */
> +void slab_free_fast(struct slab_child_pool *pool, void *ptr)
> +{
> +   struct slab_element_header *elt = ((struct slab_element_header*)ptr - 1);
> +
> +   CHECK_MAGIC(elt, SLAB_MAGIC_ALLOCATED);
> +   SET_MAGIC(elt, SLAB_MAGIC_FREE);
> +
> +   assert(p_atomic_read(&elt->owner) == (intptr_t)pool);
> +
> +   /* This is the simple case: The caller guarantees that we can safely
> +* access the free list.
> +*/
> +   elt->next = pool->free;
> +   pool->free = elt;
> +}
> +
> /**
>  * Allocate an object from the slab. Single-threaded (no mutex).
>  */
> diff --git a/src/util/slab.h b/src/util/slab.h
> index 5a25adaf7f4..9748cd95858 100644
> --- a/src/util/slab.h
> +++ b/src/util/slab.h
> @@ -78,6 +78,7 @@ void slab_create_child(struct slab_child_pool *pool,
> void slab_destroy_child(struct slab_child_pool *pool);
> void *slab_alloc(struct slab_child_pool *pool);
> void slab_free(struct slab_child_pool *pool, void *ptr);
> +void slab_free_fast(struct slab_child_pool *pool, void *ptr);
>
> struct slab_mempool {
>struct slab_parent_pool parent;
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev







___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util/slab: add slab_free_fast()

2019-08-07 Thread Rob Clark
should be easy enough to re-create..  I will note that the perf
results differ a *lot* if I pin things to small (in-order) calls, so
I'm guestimating that what perf is showing me w/ slab_free() is where
reality catches up to mis-predicted branches.

BR,
-R


On Wed, Aug 7, 2019 at 9:44 PM Jason Ekstrand  wrote:
>
> It'd be nice to throw some numbers in the donut message before you push.
> "Real" ones of it's convenient but what you have below is probably fine if
> you've already thrown the data away.
>
> On August 7, 2019 19:13:21 Rob Clark  wrote:
>
> > Yes, although not in front of me.. overall for gl_driver2 it was
> > something along the lines of 2-3%.  Looking at perf-report, slab_free
> > (+ slab_free_fast) went from ~20% to nearly nothing(ish) iirc..
> >
> > BR,
> > -R
> >
> > On Wed, Aug 7, 2019 at 4:58 PM Jason Ekstrand  wrote:
> >>
> >> Do you have any actual numbers for this?
> >>
> >> On August 7, 2019 17:02:22 Rob Clark  wrote:
> >>
> >> > From: Rob Clark 
> >> >
> >> > I noticed slab_free() showing up at the top of perf results in
> >> > gl_driver2, due to "streaming" GPU state objects, which are allocated
> >> > and destroyed within a single draw call.
> >> >
> >> > In this case, it is guaranteed that we free on the same child pool,
> >> > from the same (only) thread accessing the child pool.  So add a faster,
> >> > but more restricted version of slab_free() to cut the overhead.
> >> >
> >> > Signed-off-by: Rob Clark 
> >> > ---
> >> > src/util/slab.c | 20 
> >> > src/util/slab.h |  1 +
> >> > 2 files changed, 21 insertions(+)
> >> >
> >> > diff --git a/src/util/slab.c b/src/util/slab.c
> >> > index 62634034fdc..21076a80238 100644
> >> > --- a/src/util/slab.c
> >> > +++ b/src/util/slab.c
> >> > @@ -276,6 +276,26 @@ void slab_free(struct slab_child_pool *pool, void 
> >> > *ptr)
> >> >}
> >> > }
> >> >
> >> > +/**
> >> > + * Similar to slab_free(), except that freeing an object in a different 
> >> > pool
> >> > + * from the one it was allocated from is *not* allowed.
> >> > + */
> >> > +void slab_free_fast(struct slab_child_pool *pool, void *ptr)
> >> > +{
> >> > +   struct slab_element_header *elt = ((struct slab_element_header*)ptr 
> >> > - 1);
> >> > +
> >> > +   CHECK_MAGIC(elt, SLAB_MAGIC_ALLOCATED);
> >> > +   SET_MAGIC(elt, SLAB_MAGIC_FREE);
> >> > +
> >> > +   assert(p_atomic_read(&elt->owner) == (intptr_t)pool);
> >> > +
> >> > +   /* This is the simple case: The caller guarantees that we can safely
> >> > +* access the free list.
> >> > +*/
> >> > +   elt->next = pool->free;
> >> > +   pool->free = elt;
> >> > +}
> >> > +
> >> > /**
> >> >  * Allocate an object from the slab. Single-threaded (no mutex).
> >> >  */
> >> > diff --git a/src/util/slab.h b/src/util/slab.h
> >> > index 5a25adaf7f4..9748cd95858 100644
> >> > --- a/src/util/slab.h
> >> > +++ b/src/util/slab.h
> >> > @@ -78,6 +78,7 @@ void slab_create_child(struct slab_child_pool *pool,
> >> > void slab_destroy_child(struct slab_child_pool *pool);
> >> > void *slab_alloc(struct slab_child_pool *pool);
> >> > void slab_free(struct slab_child_pool *pool, void *ptr);
> >> > +void slab_free_fast(struct slab_child_pool *pool, void *ptr);
> >> >
> >> > struct slab_mempool {
> >> >struct slab_parent_pool parent;
> >> > --
> >> > 2.21.0
> >> >
> >> > ___
> >> > mesa-dev mailing list
> >> > mesa-dev@lists.freedesktop.org
> >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >>
> >>
> >>
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 2/5] panfrost: Allocate shaders in their own BOs

2019-08-07 Thread Tomeu Vizoso
On Thu, 8 Aug 2019 at 00:47, Rob Herring  wrote:
>
> On Wed, Aug 7, 2019 at 2:37 AM Tomeu Vizoso  
> wrote:
> >
> > Instead of all shaders being stored in a single BO, have each shader in
> > its own.
> >
> > This removes the need for a 16MB allocation per context, and allows us
> > to place transient blend shaders in BOs marked as executable (before
> > they were allocated in the transient pool, which shouldn't be
> > executable).
> >
> > v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid
> >   reading from GPU-accessible memory when patching (Alyssa).
> > - Free struct panfrost_blend_shader (Alyssa).
> > - Give the job a reference to regular shaders when emitting
> >   (Alyssa).
> >
> > Signed-off-by: Tomeu Vizoso 
>
>
> > diff --git a/src/gallium/drivers/panfrost/pan_bo_cache.c 
> > b/src/gallium/drivers/panfrost/pan_bo_cache.c
> > index fba495c1dd69..7378d0a8abea 100644
> > --- a/src/gallium/drivers/panfrost/pan_bo_cache.c
> > +++ b/src/gallium/drivers/panfrost/pan_bo_cache.c
> > @@ -84,11 +84,10 @@ panfrost_bo_cache_fetch(
> >  {
> >  struct list_head *bucket = pan_bucket(screen, size);
> >
> > -/* TODO: Honour flags? */
> > -
> >  /* Iterate the bucket looking for something suitable */
> >  list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
> > -if (entry->size >= size) {
> > +if (entry->size >= size &&
> > +entry->flags == flags) {
>
> This change probably warrants its own patch IMO.

Agreed.

> This is using the
> untranslated flags, but I think it should be the 'translated_flags' as
> those are the ones changing the allocation. For example, I don't think
> there's any reason for DELAYED_MMAP to be used as a match criteria
> (BTW, I'm also not sure if we can reclaim BOs if they remain mmap'ed).
>
> Another problem I see, if we have a 100MB buffer in the cache, would
> we really want to hit on a 4KB allocation? Perhaps a 'entry->size * 2
> < size' check.

Yeah, as mentioned in the v1 discussion, we have plenty of room for
improvements here, but the goal now is just to stop doing memory
allocation so greedily that we reach OOM after launching a few GL
clients.

For 19.3 we could start tracking performance and other metrics such as
peak memory usage, etc.

Cheers,

Tomeu

>
> >  /* This one works, splice it out of the cache */
> >  list_del(&entry->link);
> >
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 5/5] panfrost: Print errors from kernel

2019-08-07 Thread Tomeu Vizoso
On Thu, 8 Aug 2019 at 00:52, Rob Herring  wrote:
>
> On Wed, Aug 7, 2019 at 2:37 AM Tomeu Vizoso  
> wrote:
> >
> > Signed-off-by: Tomeu Vizoso 
> > Reviewed-by: Alyssa Rosenzweig 
> > ---
> >  src/gallium/drivers/panfrost/pan_drm.c | 10 +-
> >  1 file changed, 5 insertions(+), 5 deletions(-)
> >
> > diff --git a/src/gallium/drivers/panfrost/pan_drm.c 
> > b/src/gallium/drivers/panfrost/pan_drm.c
> > index 71eda2d1e328..36a6b975680a 100644
> > --- a/src/gallium/drivers/panfrost/pan_drm.c
> > +++ b/src/gallium/drivers/panfrost/pan_drm.c
> > @@ -49,14 +49,14 @@ panfrost_drm_mmap_bo(struct panfrost_screen *screen, 
> > struct panfrost_bo *bo)
> >
> >  ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
> >  if (ret) {
> > -fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", 
> > ret);
> > +fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
>
> Is this going to work on Android and bionic?

Yep: 
https://android.googlesource.com/platform/bionic/+/refs/heads/master/libc/stdio/vfprintf.cpp#455

Musl and uClibc also support it.

Cheers,

Tomeu

> stderr goes to /dev/null
> anyways on Android, so probably not a big deal.
>
> Rob
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev