Re: [PATCH xserver 1/2] Revert "glamor: Remove the FBO cache."

2017-03-06 Thread Marek Olšák
On Fri, Mar 3, 2017 at 9:46 AM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> This reverts commit 950ffb8d6fd1480f305e38c571bda44f247f1de2 (and parts
> of commit 4b5326aeba539249fcded91bf7806a708eeca651).
>
> It halved (or worse) the performance with some x11perf tests on
> radeonsi, even though radeonsi has a BO cache.
>
> Signed-off-by: Michel Dänzer 
> ---
>
> I suspect the reason why radeonsi's BO cache can't make up for the lack of
> the glamor FBO cache is that the former can only return idle BOs. So
> while the glamor FBO cache allows re-using the same FBO over and over
> for a series of operations using temporary pixmaps of the same size, the
> radeonsi BO cache has to keep using different BOs until a command stream
> is flushed to the GPU and processed by it.

I don't think the radeonsi cache is sufficient here. It only hides the
kernel overhead. It doesn't hide any overhead of Mesa itself.

Marek
___
xorg-devel@lists.x.org: X.Org development
Archives: http://lists.x.org/archives/xorg-devel
Info: https://lists.x.org/mailman/listinfo/xorg-devel

Re: [PATCH xserver 1/2] Revert "glamor: Remove the FBO cache."

2017-03-03 Thread Max Staudt
On 03/03/2017 09:46 AM, Michel Dänzer wrote:
> This reverts commit 950ffb8d6fd1480f305e38c571bda44f247f1de2 (and parts
> of commit 4b5326aeba539249fcded91bf7806a708eeca651).

Oh no.

Glamor's BO cache was horribly broken, and I tried to fix it before:

  https://lists.x.org/archives/xorg-devel/2016-July/050403.html

It was then decided to drop it altogether.


The thing is, at least on the Raspberry Pi (vc4) we get 3 BO caches:
 - kernel
 - Mesa
 - GLAMOR

That's quite a lot of memory kept around "just in case".


If you insist on bringing it back, please have a thorough look at my old
series.
There was a lot of broken, as well as dead (!) code that I had to fix
to make this workable on VC4. If we reintroduce a FBO cache, at least
let's do it right this time.



Max

___
xorg-devel@lists.x.org: X.Org development
Archives: http://lists.x.org/archives/xorg-devel
Info: https://lists.x.org/mailman/listinfo/xorg-devel

[PATCH xserver 1/2] Revert "glamor: Remove the FBO cache."

2017-03-03 Thread Michel Dänzer
From: Michel Dänzer 

This reverts commit 950ffb8d6fd1480f305e38c571bda44f247f1de2 (and parts
of commit 4b5326aeba539249fcded91bf7806a708eeca651).

It halved (or worse) the performance with some x11perf tests on
radeonsi, even though radeonsi has a BO cache.

Signed-off-by: Michel Dänzer 
---

I suspect the reason why radeonsi's BO cache can't make up for the lack of
the glamor FBO cache is that the former can only return idle BOs. So
while the glamor FBO cache allows re-using the same FBO over and over
for a series of operations using temporary pixmaps of the same size, the
radeonsi BO cache has to keep using different BOs until a command stream
is flushed to the GPU and processed by it.

 glamor/glamor.c   |   5 +
 glamor/glamor_fbo.c   | 249 --
 glamor/glamor_priv.h  |  27 ++
 glamor/glamor_utils.h |  40 
 4 files changed, 315 insertions(+), 6 deletions(-)

diff --git a/glamor/glamor.c b/glamor/glamor.c
index c54cf3b43..feae7a21f 100644
--- a/glamor/glamor.c
+++ b/glamor/glamor.c
@@ -110,6 +110,7 @@ glamor_set_pixmap_texture(PixmapPtr pixmap, unsigned int 
tex)
 ErrorF("XXX fail to create fbo.\n");
 return;
 }
+fbo->external = TRUE;
 
 glamor_pixmap_attach_fbo(pixmap, fbo);
 }
@@ -253,7 +254,9 @@ glamor_block_handler(ScreenPtr screen)
 glamor_screen_private *glamor_priv = glamor_get_screen_private(screen);
 
 glamor_make_current(glamor_priv);
+glamor_priv->tick++;
 glFlush();
+glamor_fbo_expire(glamor_priv);
 }
 
 static void
@@ -719,6 +722,7 @@ glamor_init(ScreenPtr screen, unsigned int flags)
 ps->Glyphs = glamor_composite_glyphs;
 
 glamor_init_vbo(screen);
+glamor_init_pixmap_fbo(screen);
 
 #ifdef GLAMOR_GRADIENT_SHADER
 glamor_init_gradient_shader(screen);
@@ -748,6 +752,7 @@ glamor_release_screen_priv(ScreenPtr screen)
 
 glamor_priv = glamor_get_screen_private(screen);
 glamor_fini_vbo(screen);
+glamor_fini_pixmap_fbo(screen);
 glamor_pixmap_fini(screen);
 free(glamor_priv);
 
diff --git a/glamor/glamor_fbo.c b/glamor/glamor_fbo.c
index 988bb585b..a531f6052 100644
--- a/glamor/glamor_fbo.c
+++ b/glamor/glamor_fbo.c
@@ -30,9 +30,107 @@
 
 #include "glamor_priv.h"
 
-void
-glamor_destroy_fbo(glamor_screen_private *glamor_priv,
-   glamor_pixmap_fbo *fbo)
+#define GLAMOR_CACHE_EXPIRE_MAX 100
+
+#define GLAMOR_CACHE_DEFAULT0
+#define GLAMOR_CACHE_EXACT_SIZE 1
+
+//#define NO_FBO_CACHE 1
+#define FBO_CACHE_THRESHOLD  (256*1024*1024)
+
+/* Loop from the tail to the head. */
+#define xorg_list_for_each_entry_reverse(pos, head, member) \
+for (pos = __container_of((head)->prev, pos, member);   \
+ >member != (head);\
+ pos = __container_of(pos->member.prev, pos, member))
+
+#define xorg_list_for_each_entry_safe_reverse(pos, tmp, head, member)   \
+for (pos = __container_of((head)->prev, pos, member),   \
+ tmp = __container_of(pos->member.prev, pos, member);   \
+ >member != (head);\
+ pos = tmp, tmp = __container_of(pos->member.prev, tmp, member))
+
+inline static int
+cache_wbucket(int size)
+{
+int order = __fls(size / 32);
+
+if (order >= CACHE_BUCKET_WCOUNT)
+order = CACHE_BUCKET_WCOUNT - 1;
+return order;
+}
+
+inline static int
+cache_hbucket(int size)
+{
+int order = __fls(size / 32);
+
+if (order >= CACHE_BUCKET_HCOUNT)
+order = CACHE_BUCKET_HCOUNT - 1;
+return order;
+}
+
+static int
+cache_format(GLenum format)
+{
+switch (format) {
+case GL_ALPHA:
+case GL_LUMINANCE:
+case GL_RED:
+return 2;
+case GL_RGB:
+return 1;
+case GL_RGBA:
+return 0;
+default:
+return -1;
+}
+}
+
+static glamor_pixmap_fbo *
+glamor_pixmap_fbo_cache_get(glamor_screen_private *glamor_priv,
+int w, int h, GLenum format)
+{
+struct xorg_list *cache;
+glamor_pixmap_fbo *fbo_entry, *ret_fbo = NULL;
+int n_format;
+
+#ifdef NO_FBO_CACHE
+return NULL;
+#else
+n_format = cache_format(format);
+if (n_format == -1)
+return NULL;
+cache = _priv->fbo_cache[n_format]
+[cache_wbucket(w)]
+[cache_hbucket(h)];
+
+xorg_list_for_each_entry(fbo_entry, cache, list) {
+if (fbo_entry->width == w && fbo_entry->height == h) {
+
+DEBUGF("Request w %d h %d format %x \n", w, h, format);
+DEBUGF("got cache entry %p w %d h %d fbo %d tex %d format %x\n",
+   fbo_entry, fbo_entry->width, fbo_entry->height,
+   fbo_entry->fb, fbo_entry->tex, fbo_entry->format);
+assert(format == fbo_entry->format);
+xorg_list_del(_entry->list);
+ret_fbo = fbo_entry;
+break;
+}
+