Re: [Nouveau] [PATCH] exa: add GM10x acceleration support

2016-10-17 Thread Samuel Pitoiset



On 10/17/2016 02:27 PM, Ilia Mirkin wrote:

On Mon, Oct 17, 2016 at 5:28 AM, Samuel Pitoiset
 wrote:

Looks reasonable, some minor comments below.


On 10/16/2016 02:06 AM, Ilia Mirkin wrote:

diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c
index 6add60b..a53dfe6 100644
--- a/src/nvc0_exa.c
+++ b/src/nvc0_exa.c
@@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix,
if (!PUSH_SPACE(push, 64))
return;

+   if (pNv->dev->chipset >= 0x110) {
+   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+   PUSH_DATA (push, 256);
+   PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);



No PUSH_DATAh in the DDX?


Nope. Didn't feel the burning need to add a helper either.


Fine by me.




 $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp
-   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
+   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100
-o $@
 $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp
-   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
+   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100
-o $@

 $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp
-   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o
$@
+   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104
-o $@
 $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp
-   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o
$@
+   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104
-o $@



This is unrelated to your main change, but well should be *exactly* the same
thing. :)


You mean the bit about me adding -V gf100? Figured I'd fix it up while
I was at it. The machine/variant names changed though.


Yeah, I won't ask for a separate patch anyways. :-)








 $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp
cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
 $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp
cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
+
+$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp
+   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
+$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp
+   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
new file mode 100644
index 000..ce78036
--- /dev/null
+++ b/src/shader/exac8nv110.fp
@@ -0,0 +1,47 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_Composite_A8[] = {
+   0x1462,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x8000,
+   0x0a0a,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x,
+   0x000f,
+   0x,
+#include "exac8nv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)



Those sched codes are definitely bad, but let's keep them as it for now. I
might have a look at some point to improve the thing.


Yeah, way wrong. However it's what our compiler would produce. You can
use this as a proving ground for your various theories. All simple
shaders though, no control flow. Only complex thing is textures.

  -ilia



--
-Samuel
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] exa: add GM10x acceleration support

2016-10-17 Thread Ilia Mirkin
On Mon, Oct 17, 2016 at 5:28 AM, Samuel Pitoiset
 wrote:
> Looks reasonable, some minor comments below.
>
>
> On 10/16/2016 02:06 AM, Ilia Mirkin wrote:
>> diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c
>> index 6add60b..a53dfe6 100644
>> --- a/src/nvc0_exa.c
>> +++ b/src/nvc0_exa.c
>> @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix,
>> if (!PUSH_SPACE(push, 64))
>> return;
>>
>> +   if (pNv->dev->chipset >= 0x110) {
>> +   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>> +   PUSH_DATA (push, 256);
>> +   PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);
>
>
> No PUSH_DATAh in the DDX?

Nope. Didn't feel the burning need to add a helper either.

>>  $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp
>> -   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
>> +   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100
>> -o $@
>>  $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp
>> -   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
>> +   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100
>> -o $@
>>
>>  $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp
>> -   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o
>> $@
>> +   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104
>> -o $@
>>  $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp
>> -   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o
>> $@
>> +   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104
>> -o $@
>
>
> This is unrelated to your main change, but well should be *exactly* the same
> thing. :)

You mean the bit about me adding -V gf100? Figured I'd fix it up while
I was at it. The machine/variant names changed though.

>
>
>>
>>  $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp
>> cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
>>  $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp
>> cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
>> +
>> +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp
>> +   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
>> +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp
>> +   cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
>> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
>> new file mode 100644
>> index 000..ce78036
>> --- /dev/null
>> +++ b/src/shader/exac8nv110.fp
>> @@ -0,0 +1,47 @@
>> +#ifndef ENVYAS
>> +static uint32_t
>> +NV110FP_Composite_A8[] = {
>> +   0x1462,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x8000,
>> +   0x0a0a,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x,
>> +   0x000f,
>> +   0x,
>> +#include "exac8nv110.fpc"
>> +};
>> +#else
>> +
>> +sched (st 0x0) (st 0x0) (st 0x0)
>
>
> Those sched codes are definitely bad, but let's keep them as it for now. I
> might have a look at some point to improve the thing.

Yeah, way wrong. However it's what our compiler would produce. You can
use this as a proving ground for your various theories. All simple
shaders though, no control flow. Only complex thing is textures.

  -ilia
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH] exa: add GM10x acceleration support

2016-10-17 Thread Samuel Pitoiset

Looks reasonable, some minor comments below.

On 10/16/2016 02:06 AM, Ilia Mirkin wrote:

rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to
work. Very lightly tested.

Instead of sticking coordinates into pushbufs, the vertex shader is
modified to read them from a constbuf, indexed by vertex id. This
approach could be used for all nvc0 generations, but I didn't want to
rock the boat.

Signed-off-by: Ilia Mirkin 
---

Note: this won't work for GM20x - we need to allow TIC format to be updated
for that to work. But this is a step in that direction.

 src/Makefile.am   |  16 
 src/nouveau_copy.c|   1 +
 src/nouveau_exa.c |   2 +-
 src/nouveau_xv.c  |   2 +-
 src/nv_accel_common.c |   1 +
 src/nv_driver.c   |   1 +
 src/nvc0_accel.c  |  37 ++---
 src/nvc0_exa.c|  48 --
 src/nvc0_xv.c |  48 --
 src/shader/Makefile   |  23 ---
 src/shader/exac8nv110.fp  |  47 +
 src/shader/exac8nv110.fpc |  38 +
 src/shader/exacanv110.fp  |  47 +
 src/shader/exacanv110.fpc |  38 +
 src/shader/exacmnv110.fp  |  47 +
 src/shader/exacmnv110.fpc |  38 +
 src/shader/exas8nv110.fp  |  42 +++
 src/shader/exas8nv110.fpc |  28 +
 src/shader/exasanv110.fp  |  47 +
 src/shader/exasanv110.fpc |  38 +
 src/shader/exascnv110.fp  |  38 +
 src/shader/exascnv110.fpc |  20 +
 src/shader/videonv110.fp  |  54 
 src/shader/videonv110.fpc |  52 +++
 src/shader/xfrm2nv110.vp  |  82 +
 src/shader/xfrm2nv110.vpc | 102 ++
 26 files changed, 918 insertions(+), 19 deletions(-)
 create mode 100644 src/shader/exac8nv110.fp
 create mode 100644 src/shader/exac8nv110.fpc
 create mode 100644 src/shader/exacanv110.fp
 create mode 100644 src/shader/exacanv110.fpc
 create mode 100644 src/shader/exacmnv110.fp
 create mode 100644 src/shader/exacmnv110.fpc
 create mode 100644 src/shader/exas8nv110.fp
 create mode 100644 src/shader/exas8nv110.fpc
 create mode 100644 src/shader/exasanv110.fp
 create mode 100644 src/shader/exasanv110.fpc
 create mode 100644 src/shader/exascnv110.fp
 create mode 100644 src/shader/exascnv110.fpc
 create mode 100644 src/shader/videonv110.fp
 create mode 100644 src/shader/videonv110.fpc
 create mode 100644 src/shader/xfrm2nv110.vp
 create mode 100644 src/shader/xfrm2nv110.vpc

diff --git a/src/Makefile.am b/src/Makefile.am
index 1e04ddf..6ba8d87 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \
 shader/exac8nve0.fpc \
 shader/exac8nvf0.fp \
 shader/exac8nvf0.fpc \
+shader/exac8nv110.fp \
+shader/exac8nv110.fpc \
 shader/exacanvc0.fp \
 shader/exacanvc0.fpc \
 shader/exacanve0.fp \
 shader/exacanve0.fpc \
 shader/exacanvf0.fp \
 shader/exacanvf0.fpc \
+shader/exacanv110.fp \
+shader/exacanv110.fpc \
 shader/exacmnvc0.fp \
 shader/exacmnvc0.fpc \
 shader/exacmnve0.fp \
 shader/exacmnve0.fpc \
 shader/exacmnvf0.fp \
 shader/exacmnvf0.fpc \
+shader/exacmnv110.fp \
+shader/exacmnv110.fpc \
 shader/exas8nvc0.fp \
 shader/exas8nvc0.fpc \
 shader/exas8nve0.fp \
 shader/exas8nve0.fpc \
 shader/exas8nvf0.fp \
 shader/exas8nvf0.fpc \
+shader/exas8nv110.fp \
+shader/exas8nv110.fpc \
 shader/exasanvc0.fp \
 shader/exasanvc0.fpc \
 shader/exasanve0.fp \
 shader/exasanve0.fpc \
 shader/exasanvf0.fp \
 shader/exasanvf0.fpc \
+shader/exasanv110.fp \
+shader/exasanv110.fpc \
 shader/exascnvc0.fp \
 shader/exascnvc0.fpc \
 shader/exascnve0.fp \
 shader/exascnve0.fpc \
 shader/exascnvf0.fp \
 shader/exascnvf0.fpc \
+shader/exascnv110.fp \
+shader/exascnv110.fpc \
 shader/videonvc0.fp \
 shader/videonvc0.fpc \
 shader/videonve0.fp \
 shader/videonve0.fpc \
 shader/videonvf0.fp \
 shader/videonvf0.fpc \
+shader/videonv110.fp \
+shader/videonv110.fpc \
 shader/xfrm2nvc0.vp \
 shader/xfrm2nvc0.vpc \
 shader/xfrm2nve0.vp \
 shader/xfrm2nve0.vpc \
 shader/xfrm2nvf0.vp \
 shader/xfrm2nvf0.vpc \
+shader/xfrm2nv110.vp \
+   

[Nouveau] [PATCH] exa: add GM10x acceleration support

2016-10-15 Thread Ilia Mirkin
rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to
work. Very lightly tested.

Instead of sticking coordinates into pushbufs, the vertex shader is
modified to read them from a constbuf, indexed by vertex id. This
approach could be used for all nvc0 generations, but I didn't want to
rock the boat.

Signed-off-by: Ilia Mirkin 
---

Note: this won't work for GM20x - we need to allow TIC format to be updated
for that to work. But this is a step in that direction.

 src/Makefile.am   |  16 
 src/nouveau_copy.c|   1 +
 src/nouveau_exa.c |   2 +-
 src/nouveau_xv.c  |   2 +-
 src/nv_accel_common.c |   1 +
 src/nv_driver.c   |   1 +
 src/nvc0_accel.c  |  37 ++---
 src/nvc0_exa.c|  48 --
 src/nvc0_xv.c |  48 --
 src/shader/Makefile   |  23 ---
 src/shader/exac8nv110.fp  |  47 +
 src/shader/exac8nv110.fpc |  38 +
 src/shader/exacanv110.fp  |  47 +
 src/shader/exacanv110.fpc |  38 +
 src/shader/exacmnv110.fp  |  47 +
 src/shader/exacmnv110.fpc |  38 +
 src/shader/exas8nv110.fp  |  42 +++
 src/shader/exas8nv110.fpc |  28 +
 src/shader/exasanv110.fp  |  47 +
 src/shader/exasanv110.fpc |  38 +
 src/shader/exascnv110.fp  |  38 +
 src/shader/exascnv110.fpc |  20 +
 src/shader/videonv110.fp  |  54 
 src/shader/videonv110.fpc |  52 +++
 src/shader/xfrm2nv110.vp  |  82 +
 src/shader/xfrm2nv110.vpc | 102 ++
 26 files changed, 918 insertions(+), 19 deletions(-)
 create mode 100644 src/shader/exac8nv110.fp
 create mode 100644 src/shader/exac8nv110.fpc
 create mode 100644 src/shader/exacanv110.fp
 create mode 100644 src/shader/exacanv110.fpc
 create mode 100644 src/shader/exacmnv110.fp
 create mode 100644 src/shader/exacmnv110.fpc
 create mode 100644 src/shader/exas8nv110.fp
 create mode 100644 src/shader/exas8nv110.fpc
 create mode 100644 src/shader/exasanv110.fp
 create mode 100644 src/shader/exasanv110.fpc
 create mode 100644 src/shader/exascnv110.fp
 create mode 100644 src/shader/exascnv110.fpc
 create mode 100644 src/shader/videonv110.fp
 create mode 100644 src/shader/videonv110.fpc
 create mode 100644 src/shader/xfrm2nv110.vp
 create mode 100644 src/shader/xfrm2nv110.vpc

diff --git a/src/Makefile.am b/src/Makefile.am
index 1e04ddf..6ba8d87 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \
 shader/exac8nve0.fpc \
 shader/exac8nvf0.fp \
 shader/exac8nvf0.fpc \
+shader/exac8nv110.fp \
+shader/exac8nv110.fpc \
 shader/exacanvc0.fp \
 shader/exacanvc0.fpc \
 shader/exacanve0.fp \
 shader/exacanve0.fpc \
 shader/exacanvf0.fp \
 shader/exacanvf0.fpc \
+shader/exacanv110.fp \
+shader/exacanv110.fpc \
 shader/exacmnvc0.fp \
 shader/exacmnvc0.fpc \
 shader/exacmnve0.fp \
 shader/exacmnve0.fpc \
 shader/exacmnvf0.fp \
 shader/exacmnvf0.fpc \
+shader/exacmnv110.fp \
+shader/exacmnv110.fpc \
 shader/exas8nvc0.fp \
 shader/exas8nvc0.fpc \
 shader/exas8nve0.fp \
 shader/exas8nve0.fpc \
 shader/exas8nvf0.fp \
 shader/exas8nvf0.fpc \
+shader/exas8nv110.fp \
+shader/exas8nv110.fpc \
 shader/exasanvc0.fp \
 shader/exasanvc0.fpc \
 shader/exasanve0.fp \
 shader/exasanve0.fpc \
 shader/exasanvf0.fp \
 shader/exasanvf0.fpc \
+shader/exasanv110.fp \
+shader/exasanv110.fpc \
 shader/exascnvc0.fp \
 shader/exascnvc0.fpc \
 shader/exascnve0.fp \
 shader/exascnve0.fpc \
 shader/exascnvf0.fp \
 shader/exascnvf0.fpc \
+shader/exascnv110.fp \
+shader/exascnv110.fpc \
 shader/videonvc0.fp \
 shader/videonvc0.fpc \
 shader/videonve0.fp \
 shader/videonve0.fpc \
 shader/videonvf0.fp \
 shader/videonvf0.fpc \
+shader/videonv110.fp \
+shader/videonv110.fpc \
 shader/xfrm2nvc0.vp \
 shader/xfrm2nvc0.vpc \
 shader/xfrm2nve0.vp \
 shader/xfrm2nve0.vpc \
 shader/xfrm2nvf0.vp \
 shader/xfrm2nvf0.vpc \
+shader/xfrm2nv110.vp \
+shader/xfrm2nv110.vpc \
 shader/Makefile \
 nouveau_local.