Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-27 Thread Richard Henderson
On 2/27/19 8:01 PM, David Gibson wrote:
> On Wed, Feb 27, 2019 at 04:47:30PM -0800, Richard Henderson wrote:
>> On 2/24/19 3:31 PM, David Gibson wrote:
>>> I have access to POWER8 and POWER9 machines, but I haven't worked with
>>> RISU before.  If you can give me a straightforward recipe I can try
>>> running the tests.
>>
>> From
>>
>>   https://git.linaro.org/people/peter.maydell/risu.git
> 
> And I assume build it, at which point I hit this:
> 
> $ make
> gcc  -Wall -D_GNU_SOURCE -DARCH=ppc64 -I /home/dwg/risu/risu -g  -o 
> risu_ppc64.o -c risu_ppc64.c
> risu_ppc64.c: In function ‘advance_pc’:
> risu_ppc64.c:19:25: error: dereferencing pointer to incomplete type ‘struct 
> pt_regs’
>  uc->uc_mcontext.regs->nip += 4;
>  ^~
> make: *** [Makefile:44: risu_ppc64.o] Error 1

Dunno.  Works for me.

$ uname -a
Linux gcc135.osuosl.org 4.14.0-115.2.2.el7a.ppc64le #1 SMP Wed Nov 28 21:39:33
GMT 2018 ppc64le ppc64le ppc64le GNU/Linux
$ head -2 /etc/os-release
NAME="CentOS Linux"
VERSION="7 (AltArch)"

I guess you'll need to debug this in your environment to find out what's 
different?


r~



Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-27 Thread David Gibson
On Wed, Feb 27, 2019 at 04:47:30PM -0800, Richard Henderson wrote:
> On 2/24/19 3:31 PM, David Gibson wrote:
> > I have access to POWER8 and POWER9 machines, but I haven't worked with
> > RISU before.  If you can give me a straightforward recipe I can try
> > running the tests.
> 
> From
> 
>   https://git.linaro.org/people/peter.maydell/risu.git

And I assume build it, at which point I hit this:

$ make
gcc  -Wall -D_GNU_SOURCE -DARCH=ppc64 -I /home/dwg/risu/risu -g  -o 
risu_ppc64.o -c risu_ppc64.c
risu_ppc64.c: In function ‘advance_pc’:
risu_ppc64.c:19:25: error: dereferencing pointer to incomplete type ‘struct 
pt_regs’
 uc->uc_mcontext.regs->nip += 4;
 ^~
make: *** [Makefile:44: risu_ppc64.o] Error 1


I'm running on ppc64le (as I usually do nowadays), so I'm not sure if
this is due to a difference between the ppc64 and ppc64le ABIs (and
mcontext structures) or something else.

> First you need to generate the test cases.  It looks like the current
> ppc64.risu file is fairly complete, at least to some level.  So:
> 
> mkdir ../risu-testcases-ppc64
> ./scripts/generate_all.sh ppc64.risu ../risu-testcases-ppc64
> 
> Then record traces from real hardware to compare against:
> 
> RISU=`pwd`/risu ./scripts/record_traces.sh ../risu-testcases-ppc64/*.bin
> 
> Now compare QEMU against hardware:
> 
> QEMU=qemu-ppc64 RISU=`pwd`/risu \
> ./scripts/run_risu ../risu-testcases-ppc64/*.bin
> 
> Which is all hunky dory until something fails.  At which point it's a matter 
> of
> using qemu's -d and -dfilter options judiciously, and "objdump -b binary -m
> ppc64 -D foo.bin".
> 
> 
> r~
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-27 Thread Richard Henderson
On 2/24/19 3:31 PM, David Gibson wrote:
> I have access to POWER8 and POWER9 machines, but I haven't worked with
> RISU before.  If you can give me a straightforward recipe I can try
> running the tests.

From

  https://git.linaro.org/people/peter.maydell/risu.git

First you need to generate the test cases.  It looks like the current
ppc64.risu file is fairly complete, at least to some level.  So:

mkdir ../risu-testcases-ppc64
./scripts/generate_all.sh ppc64.risu ../risu-testcases-ppc64

Then record traces from real hardware to compare against:

RISU=`pwd`/risu ./scripts/record_traces.sh ../risu-testcases-ppc64/*.bin

Now compare QEMU against hardware:

QEMU=qemu-ppc64 RISU=`pwd`/risu \
./scripts/run_risu ../risu-testcases-ppc64/*.bin

Which is all hunky dory until something fails.  At which point it's a matter of
using qemu's -d and -dfilter options judiciously, and "objdump -b binary -m
ppc64 -D foo.bin".


r~



Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-27 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20190222055950.17403-1-richard.hender...@linaro.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 20190222055950.17403-1-richard.hender...@linaro.org
Subject: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   
patchew/1551095970-14645-1-git-send-email-th...@redhat.com -> 
patchew/1551095970-14645-1-git-send-email-th...@redhat.com
 * [new tag]   patchew/20190221081054.13853-1-kra...@redhat.com -> 
patchew/20190221081054.13853-1-kra...@redhat.com
 * [new tag]   patchew/20190221173758.24672-1-...@redhat.com -> 
patchew/20190221173758.24672-1-...@redhat.com
 * [new tag]   
patchew/20190222055950.17403-1-richard.hender...@linaro.org -> 
patchew/20190222055950.17403-1-richard.hender...@linaro.org
 * [new tag]   
patchew/20190222170936.13268-1-peter.mayd...@linaro.org -> 
patchew/20190222170936.13268-1-peter.mayd...@linaro.org
Switched to a new branch 'test'
56177b9cb7 tcg/ppc: Add vector opcodes

=== OUTPUT BEGIN ===
ERROR: spaces required around that '|' (ctx:VxV)
#187: FILE: tcg/ppc/tcg-target.inc.c:327:
+#define VX4(opc)  (OPCD(4)|(opc))
   ^

ERROR: trailing whitespace
#552: FILE: tcg/ppc/tcg-target.inc.c:2876:
+umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, $

ERROR: trailing whitespace
#553: FILE: tcg/ppc/tcg-target.inc.c:2877:
+smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, $

ERROR: trailing whitespace
#554: FILE: tcg/ppc/tcg-target.inc.c:2878:
+umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, $

WARNING: Block comments use a leading /* on a separate line
#623: FILE: tcg/ppc/tcg-target.inc.c:2947:
+/* Recall we use VSX integer loads, so the integer is right

WARNING: Block comments use * on subsequent lines
#624: FILE: tcg/ppc/tcg-target.inc.c:2948:
+/* Recall we use VSX integer loads, so the integer is right
+   justified within the left (zero-index) double-word.  */

WARNING: Block comments use a trailing */ on a separate line
#624: FILE: tcg/ppc/tcg-target.inc.c:2948:
+   justified within the left (zero-index) double-word.  */

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#822: 
new file mode 100644

WARNING: Block comments use a leading /* on a separate line
#827: FILE: tcg/ppc/tcg-target.opc.h:1:
+/* Target-specific opcodes for host vector expansion.  These will be

WARNING: Block comments use * on subsequent lines
#828: FILE: tcg/ppc/tcg-target.opc.h:2:
+/* Target-specific opcodes for host vector expansion.  These will be
+   emitted by tcg_expand_vec_op.  For those familiar with GCC internals,

WARNING: Block comments use a trailing */ on a separate line
#829: FILE: tcg/ppc/tcg-target.opc.h:3:
+   consider these to be UNSPEC with names.  */

total: 4 errors, 7 warnings, 783 lines checked

Commit 56177b9cb743 (tcg/ppc: Add vector opcodes) has style problems, please 
review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20190222055950.17403-1-richard.hender...@linaro.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-24 Thread David Gibson
On Fri, Feb 22, 2019 at 05:13:37PM +, Mark Cave-Ayland wrote:
> On 22/02/2019 05:59, Richard Henderson wrote:
> 
> > This requires VSX, not just Altivec, so Power7 or later.
> > 
> > Signed-off-by: Richard Henderson 
> > ---
> > 
> > At present there are no tunables that can avoid the 64-bit element
> > load/store requirement.  As with requiring AVX1 for x86 hosts, I'm
> > not sure it's worth inventing such a tunable for pre-power7 hosts.
> > 
> > Tested vs aarch64 risu test cases.  It's probably worth testing
> > this vs Mark's target/ppc conversion.
> 
> Oooh this looks really exciting! However... I only have a G4 Mac Mini around 
> that I
> use for testing which is Altivec-only :(  Is it much work to support
> non-VSX hosts?

I have access to POWER8 and POWER9 machines, but I haven't worked with
RISU before.  If you can give me a straightforward recipe I can try
running the tests.

> 
> This leads me to a related point that came up when Howard and I were testing 
> the PPC
> vector patches - how do we know at runtime which optimisations were being 
> used, e.g.
> what is the value of have_avx2 on a particular CPU running QEMU?
> 
> Under Linux this isn't too bad since you can just do "cat /proc/cpuinfo | 
> grep avx2"
> but it becomes more tricky when getting bug reports from Windows users who 
> aren't
> particularly technical...
> 
> 
> ATB,
> 
> Mark.
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-22 Thread Richard Henderson
On 2/22/19 9:13 AM, Mark Cave-Ayland wrote:
> On 22/02/2019 05:59, Richard Henderson wrote:
> 
>> This requires VSX, not just Altivec, so Power7 or later.
>>
>> Signed-off-by: Richard Henderson 
>> ---
>>
>> At present there are no tunables that can avoid the 64-bit element
>> load/store requirement.  As with requiring AVX1 for x86 hosts, I'm
>> not sure it's worth inventing such a tunable for pre-power7 hosts.
>>
>> Tested vs aarch64 risu test cases.  It's probably worth testing
>> this vs Mark's target/ppc conversion.
> 
> Oooh this looks really exciting! However... I only have a G4 Mac Mini around 
> that I
> use for testing which is Altivec-only :(  Is it much work to support non-VSX 
> hosts?

Hmm, dunno.  I'll think about it.

> This leads me to a related point that came up when Howard and I were testing 
> the PPC
> vector patches - how do we know at runtime which optimisations were being 
> used, e.g.
> what is the value of have_avx2 on a particular CPU running QEMU?

There is no way from within qemu itself.

> Under Linux this isn't too bad since you can just do "cat /proc/cpuinfo | 
> grep avx2"
> but it becomes more tricky when getting bug reports from Windows users who 
> aren't
> particularly technical...

Something like:
https://www.intel.com/content/www/us/en/support/articles/05651/processors.html

or any one of several other utilities that can display cpuid information.


r~



Re: [Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-22 Thread Mark Cave-Ayland
On 22/02/2019 05:59, Richard Henderson wrote:

> This requires VSX, not just Altivec, so Power7 or later.
> 
> Signed-off-by: Richard Henderson 
> ---
> 
> At present there are no tunables that can avoid the 64-bit element
> load/store requirement.  As with requiring AVX1 for x86 hosts, I'm
> not sure it's worth inventing such a tunable for pre-power7 hosts.
> 
> Tested vs aarch64 risu test cases.  It's probably worth testing
> this vs Mark's target/ppc conversion.

Oooh this looks really exciting! However... I only have a G4 Mac Mini around 
that I
use for testing which is Altivec-only :(  Is it much work to support non-VSX 
hosts?

This leads me to a related point that came up when Howard and I were testing 
the PPC
vector patches - how do we know at runtime which optimisations were being used, 
e.g.
what is the value of have_avx2 on a particular CPU running QEMU?

Under Linux this isn't too bad since you can just do "cat /proc/cpuinfo | grep 
avx2"
but it becomes more tricky when getting bug reports from Windows users who 
aren't
particularly technical...


ATB,

Mark.



[Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-21 Thread Richard Henderson
This requires VSX, not just Altivec, so Power7 or later.

Signed-off-by: Richard Henderson 
---

At present there are no tunables that can avoid the 64-bit element
load/store requirement.  As with requiring AVX1 for x86 hosts, I'm
not sure it's worth inventing such a tunable for pre-power7 hosts.

Tested vs aarch64 risu test cases.  It's probably worth testing
this vs Mark's target/ppc conversion.


r~

---
 tcg/ppc/tcg-target.h |  31 +-
 tcg/ppc/tcg-target.opc.h |   3 +
 tcg/ppc/tcg-target.inc.c | 604 +++
 3 files changed, 577 insertions(+), 61 deletions(-)
 create mode 100644 tcg/ppc/tcg-target.opc.h

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 52c1bb04b1..0b9943e7e6 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -31,7 +31,7 @@
 # define TCG_TARGET_REG_BITS  32
 #endif
 
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
@@ -45,12 +45,22 @@ typedef enum {
 TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
 TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
 
+TCG_REG_V0,  TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,
+TCG_REG_V4,  TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,
+TCG_REG_V8,  TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11,
+TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
 TCG_REG_CALL_STACK = TCG_REG_R1,
 TCG_AREG0 = TCG_REG_R27
 } TCGReg;
 
 extern bool have_isa_2_06;
 extern bool have_isa_3_00;
+extern bool have_isa_2_07_vsx;
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_ext8u_i320 /* andi */
@@ -124,6 +134,25 @@ extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_mulsh_i641
 #endif
 
+/* VSX required over ALTIVEC to perform 64-bit loads.  */
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_v64  have_isa_2_07_vsx
+#define TCG_TARGET_HAS_v128 have_isa_2_07_vsx
+#define TCG_TARGET_HAS_v256 0
+#endif
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec  1
+#define TCG_TARGET_HAS_not_vec  1
+#define TCG_TARGET_HAS_neg_vec  0
+#define TCG_TARGET_HAS_shi_vec  0
+#define TCG_TARGET_HAS_shs_vec  0
+#define TCG_TARGET_HAS_shv_vec  0
+#define TCG_TARGET_HAS_cmp_vec  1
+#define TCG_TARGET_HAS_mul_vec  1
+#define TCG_TARGET_HAS_sat_vec  1
+#define TCG_TARGET_HAS_minmax_vec   1
+
 void flush_icache_range(uintptr_t start, uintptr_t stop);
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
 
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
new file mode 100644
index 00..4816a6c3d4
--- /dev/null
+++ b/tcg/ppc/tcg-target.opc.h
@@ -0,0 +1,3 @@
+/* Target-specific opcodes for host vector expansion.  These will be
+   emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+   consider these to be UNSPEC with names.  */
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 773690f1d9..d0782e2eb2 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -42,6 +42,9 @@
 # define TCG_REG_TMP1   TCG_REG_R12
 #endif
 
+#define TCG_VEC_TMP1TCG_REG_V0
+#define TCG_VEC_TMP2TCG_REG_V1
+
 #define TCG_REG_TB TCG_REG_R31
 #define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
 
@@ -63,6 +66,7 @@ static tcg_insn_unit *tb_ret_addr;
 
 bool have_isa_2_06;
 bool have_isa_3_00;
+bool have_isa_2_07_vsx;
 
 #define HAVE_ISA_2_06  have_isa_2_06
 #define HAVE_ISEL  have_isa_2_06
@@ -72,39 +76,15 @@ bool have_isa_3_00;
 #endif
 
 #ifdef CONFIG_DEBUG_TCG
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-"r0",
-"r1",
-"r2",
-"r3",
-"r4",
-"r5",
-"r6",
-"r7",
-"r8",
-"r9",
-"r10",
-"r11",
-"r12",
-"r13",
-"r14",
-"r15",
-"r16",
-"r17",
-"r18",
-"r19",
-"r20",
-"r21",
-"r22",
-"r23",
-"r24",
-"r25",
-"r26",
-"r27",
-"r28",
-"r29",
-"r30",
-"r31"
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
+"r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+"r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+"v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+"v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
 };
 #endif
 
@@ -139,6 +119,26 @@ static const int tcg_target_reg_alloc_order[] = {
 TCG_REG_R5,