Re: [RFC PATCH v3 2/3] powerpc: Only set numa node information for present cpus at boottime

2014-09-03 Thread Cyril Bur



On 03/09/14 13:02, Nishanth Aravamudan wrote:

On 27.08.2014 [17:34:00 +0800], Li Zhong wrote:

As Nish suggested, it makes more sense to init the numa node informatiion
for present cpus at boottime, which could also avoid WARN_ON(1) in
numa_setup_cpu().
Hit this on a Power8 LPAR. With the patchset applied the warnings no 
longer present.


With this change, we also need to change the smp_prepare_cpus() to set up
numa information only on present cpus.

For those possible, but not present cpus, their numa information
will be set up after they are started, as the original code did before commit
2fabf084b6ad.

Cc: Nishanth Aravamudan n...@linux.vnet.ibm.com
Cc: Nathan Fontenot nf...@linux.vnet.ibm.com
Signed-off-by: Li Zhong zh...@linux.vnet.ibm.com


Acked-by: Nishanth Aravamudan n...@linux.vnet.ibm.com


Tested-by: Cyril Bur cyril@au1.ibm.com

---
  arch/powerpc/kernel/smp.c | 10 --
  arch/powerpc/mm/numa.c|  2 +-
  2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a0738af..dc0e774 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -379,8 +379,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
/*
 * numa_node_id() works after this.
 */
-   set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
-   set_cpu_numa_mem(cpu, 
local_memory_node(numa_cpu_lookup_table[cpu]));
+   if (cpu_present(cpu)) {
+   set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+   set_cpu_numa_mem(cpu,
+   local_memory_node(numa_cpu_lookup_table[cpu]));
+   }
}

cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@@ -728,6 +731,9 @@ void start_secondary(void *unused)
}
traverse_core_siblings(cpu, true);

+   set_numa_node(numa_cpu_lookup_table[cpu]);
+   set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9918c02..3a9061e 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1127,7 +1127,7 @@ void __init do_init_bootmem(void)
 * even before we online them, so that we can use cpu_to_{node,mem}
 * early in boot, cf. smp_prepare_cpus().
 */
-   for_each_possible_cpu(cpu) {
+   for_each_present_cpu(cpu) {
numa_setup_cpu((unsigned long)cpu);
}
  }
--
1.9.1



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] Make CONFIG_FHANDLE=y for all 64 bit powerpc defconfigs

2014-09-04 Thread Cyril Bur
CONFIG_FHANDLE is a requirement for systemd and with the increasing
uptake of systemd within distros it makes sense for 64 bit defconfigs
to include it.

Signed-off-by: Cyril Bur cyril@au1.ibm.com
---
 arch/powerpc/configs/cell_defconfig  | 1 +
 arch/powerpc/configs/celleb_defconfig| 1 +
 arch/powerpc/configs/corenet64_smp_defconfig | 1 +
 arch/powerpc/configs/g5_defconfig| 1 +
 arch/powerpc/configs/maple_defconfig | 1 +
 arch/powerpc/configs/pasemi_defconfig| 1 +
 arch/powerpc/configs/ppc64_defconfig | 1 +
 arch/powerpc/configs/ppc64e_defconfig| 1 +
 arch/powerpc/configs/ps3_defconfig   | 1 +
 arch/powerpc/configs/pseries_defconfig   | 1 +
 arch/powerpc/configs/pseries_le_defconfig| 1 +
 11 files changed, 11 insertions(+)

diff --git a/arch/powerpc/configs/cell_defconfig 
b/arch/powerpc/configs/cell_defconfig
index 4bee1a6..45fd06c 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -5,6 +5,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
+CONFIG_FHANDLE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
diff --git a/arch/powerpc/configs/celleb_defconfig 
b/arch/powerpc/configs/celleb_defconfig
index 6d7b22f..77d7bf3 100644
--- a/arch/powerpc/configs/celleb_defconfig
+++ b/arch/powerpc/configs/celleb_defconfig
@@ -5,6 +5,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=4
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
+CONFIG_FHANDLE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig 
b/arch/powerpc/configs/corenet64_smp_defconfig
index 4b07bad..269d6e4 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -4,6 +4,7 @@ CONFIG_ALTIVEC=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=24
 CONFIG_SYSVIPC=y
+CONFIG_FHANDLE=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
diff --git a/arch/powerpc/configs/g5_defconfig 
b/arch/powerpc/configs/g5_defconfig
index 3c72fa6..7594c5a 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -5,6 +5,7 @@ CONFIG_NR_CPUS=4
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_BLK_DEV_INITRD=y
diff --git a/arch/powerpc/configs/maple_defconfig 
b/arch/powerpc/configs/maple_defconfig
index 95e545d..c8b6a9d 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -4,6 +4,7 @@ CONFIG_NR_CPUS=4
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 # CONFIG_COMPAT_BRK is not set
diff --git a/arch/powerpc/configs/pasemi_defconfig 
b/arch/powerpc/configs/pasemi_defconfig
index cec044a..e5e7838 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -3,6 +3,7 @@ CONFIG_ALTIVEC=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_SYSVIPC=y
+CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BLK_DEV_INITRD=y
diff --git a/arch/powerpc/configs/ppc64_defconfig 
b/arch/powerpc/configs/ppc64_defconfig
index f26b267..f6c02f8 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -4,6 +4,7 @@ CONFIG_VSX=y
 CONFIG_SMP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig 
b/arch/powerpc/configs/ppc64e_defconfig
index 438e813..587f551 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -3,6 +3,7 @@ CONFIG_PPC_BOOK3E_64=y
 CONFIG_SMP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_TASKSTATS=y
diff --git a/arch/powerpc/configs/ps3_defconfig 
b/arch/powerpc/configs/ps3_defconfig
index fdee37f..2e637c8 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -5,6 +5,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_RD_LZMA=y
diff --git a/arch/powerpc/configs/pseries_defconfig 
b/arch/powerpc/configs/pseries_defconfig
index a905063..50375f1 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -5,6 +5,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=2048
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
 CONFIG_AUDITSYSCALL=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
diff --git a/arch/powerpc/configs/pseries_le_defconfig 
b/arch/powerpc/configs/pseries_le_defconfig
index 58e3dbf..4428ee4 100644
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ b/arch/powerpc/configs/pseries_le_defconfig
@@ -6,6 +6,7 @@ CONFIG_NR_CPUS=2048
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y

[PATCH 0/2] powerpc/pseries: RTAS mobility fixes

2014-09-15 Thread Cyril Bur
This patchset addresses endian issues and bugs in device tree update for
ibm,update-nodes and ibm,update-properties RTAS calls.

A subseqent patchset will deal with issues in device tree node addition
(ibm,configure-connector RTAS call) as well as more robust handling of
deleting critical device tree nodes.

Cyril Bur (2):
  powerpc/pseries: fix endian bugs in mobility RTAS calls
  powerpc/pseries: fix bugs in RTAS mobility code

 arch/powerpc/platforms/pseries/mobility.c | 143 +++---
 1 file changed, 89 insertions(+), 54 deletions(-)

-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc/pseries: fix endian bugs in mobility RTAS calls

2014-09-15 Thread Cyril Bur
These calls use a buffer shared memory buffer to comunicate device tree
updates.

PAPR specifies that RTAS buffers are to be written in big endian.

Signed-off-by: Cyril Bur cyril@au1.ibm.com
---
 arch/powerpc/platforms/pseries/mobility.c | 50 ---
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index e7cb6d4..09bef23 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -40,7 +40,7 @@ struct update_props_workarea {
 
 #define MIGRATION_SCOPE(1)
 
-static int mobility_rtas_call(int token, char *buf, s32 scope)
+static int mobility_rtas_call(int token, __be32 *buf, s32 scope)
 {
int rc;
 
@@ -129,14 +129,14 @@ static int update_dt_property(struct device_node *dn, 
struct property **prop,
 
 static int update_dt_node(u32 phandle, s32 scope)
 {
-   struct update_props_workarea *upwa;
+   struct update_props_workarea upwa;
struct device_node *dn;
struct property *prop = NULL;
int i, rc, rtas_rc;
-   char *prop_data;
-   char *rtas_buf;
int update_properties_token;
+   char *prop_data;
u32 vd;
+   __be32 *rtas_buf;
 
update_properties_token = rtas_token(ibm,update-properties);
if (update_properties_token == RTAS_UNKNOWN_SERVICE)
@@ -152,16 +152,17 @@ static int update_dt_node(u32 phandle, s32 scope)
return -ENOENT;
}
 
-   upwa = (struct update_props_workarea *)rtas_buf[0];
-   upwa-phandle = phandle;
-
+   *rtas_buf = cpu_to_be32(phandle);
do {
rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
scope);
if (rtas_rc  0)
break;
-
-   prop_data = rtas_buf + sizeof(*upwa);
+   upwa.phandle = be32_to_cpu(*rtas_buf);
+   upwa.state = be32_to_cpu(*(rtas_buf + 1));
+   upwa.reserved = be64_to_cpu(*((__be64 *)(rtas_buf + 2)));
+   upwa.nprops = be32_to_cpu(*(rtas_buf + 4));
+   prop_data = ((char *)rtas_buf) + sizeof(upwa);
 
/* On the first call to ibm,update-properties for a node the
 * the first property value descriptor contains an empty
@@ -169,18 +170,18 @@ static int update_dt_node(u32 phandle, s32 scope)
 * and the property value is the node path being updated.
 */
if (*prop_data == 0) {
-   prop_data++;
-   vd = *(u32 *)prop_data;
+   prop_data += sizeof(u32);
+   vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += vd + sizeof(vd);
-   upwa-nprops--;
+   upwa.nprops--;
}
 
-   for (i = 0; i  upwa-nprops; i++) {
+   for (i = 0; i  upwa.nprops; i++) {
char *prop_name;
 
prop_name = prop_data;
prop_data += strlen(prop_name) + 1;
-   vd = *(u32 *)prop_data;
+   vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += sizeof(vd);
 
switch (vd) {
@@ -236,10 +237,11 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
 
 int pseries_devicetree_update(s32 scope)
 {
-   char *rtas_buf;
-   u32 *data;
+   __be32 *rtas_buf;
int update_nodes_token;
int rc;
+   __be32 *data;
+   u32 node;
 
update_nodes_token = rtas_token(ibm,update-nodes);
if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
@@ -253,17 +255,16 @@ int pseries_devicetree_update(s32 scope)
rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
if (rc  rc != 1)
break;
+   data = rtas_buf + 4;
+   node = be32_to_cpu(*data++);
 
-   data = (u32 *)rtas_buf + 4;
-   while (*data  NODE_ACTION_MASK) {
+   while (node  NODE_ACTION_MASK) {
int i;
-   u32 action = *data  NODE_ACTION_MASK;
-   int node_count = *data  NODE_COUNT_MASK;
-
-   data++;
+   u32 action = node  NODE_ACTION_MASK;
+   int node_count = node  NODE_COUNT_MASK;
 
for (i = 0; i  node_count; i++) {
-   u32 phandle = *data++;
+   u32 phandle = be32_to_cpu(*data++);
u32 drc_index;
 
switch (action) {
@@ -274,11 +275,12 @@ int pseries_devicetree_update(s32 scope)
update_dt_node(phandle, scope

[PATCH 2/2] powerpc/pseries: fix bugs in RTAS mobility code

2014-09-15 Thread Cyril Bur
Running this code on a little endian machine has exposed some very unlikely
corner cases. Most of these oversights will lead to a buffer overflow.

Reworked some of the error pathes. It seems more sane to stop trying to parse
a buffer on errors. Attempting to continue opens up the possibility of
overflows and/or garbage reads.

Don't warn about failed allcations when the amount was taken from the buffer,
assume the value was incorrect, don't needlessly concern the user.

Signed-off-by: Cyril Bur cyril@au1.ibm.com
---
 arch/powerpc/platforms/pseries/mobility.c | 95 +--
 1 file changed, 64 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 09bef23..00bd939 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -68,62 +68,45 @@ static int delete_dt_node(u32 phandle)
 }
 
 static int update_dt_property(struct device_node *dn, struct property **prop,
- const char *name, u32 vd, char *value)
+ const char *name, int length, char *value)
 {
struct property *new_prop = *prop;
-   int more = 0;
-
-   /* A negative 'vd' value indicates that only part of the new property
-* value is contained in the buffer and we need to call
-* ibm,update-properties again to get the rest of the value.
-*
-* A negative value is also the two's compliment of the actual value.
-*/
-   if (vd  0x8000) {
-   vd = ~vd + 1;
-   more = 1;
-   }
 
if (new_prop) {
/* partial property fixup */
-   char *new_data = kzalloc(new_prop-length + vd, GFP_KERNEL);
+   char *new_data = kzalloc(new_prop-length + length, GFP_KERNEL 
| __GFP_NOWARN);
if (!new_data)
return -ENOMEM;
 
memcpy(new_data, new_prop-value, new_prop-length);
-   memcpy(new_data + new_prop-length, value, vd);
+   memcpy(new_data + new_prop-length, value, length);
 
kfree(new_prop-value);
new_prop-value = new_data;
-   new_prop-length += vd;
+   new_prop-length += length;
} else {
new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
if (!new_prop)
return -ENOMEM;
 
-   new_prop-name = kstrdup(name, GFP_KERNEL);
+   new_prop-name = kstrdup(name, GFP_KERNEL | __GFP_NOWARN);
if (!new_prop-name) {
kfree(new_prop);
return -ENOMEM;
}
 
-   new_prop-length = vd;
-   new_prop-value = kzalloc(new_prop-length, GFP_KERNEL);
+   new_prop-length = length;
+   new_prop-value = kzalloc(new_prop-length, GFP_KERNEL | 
__GFP_NOWARN);
if (!new_prop-value) {
kfree(new_prop-name);
kfree(new_prop);
return -ENOMEM;
}
 
-   memcpy(new_prop-value, value, vd);
+   memcpy(new_prop-value, value, length);
*prop = new_prop;
}
 
-   if (!more) {
-   of_update_property(dn, new_prop);
-   *prop = NULL;
-   }
-
return 0;
 }
 
@@ -196,21 +179,52 @@ static int update_dt_node(u32 phandle, s32 scope)
break;
 
default:
+   /* A negative 'vd' value indicates that only 
part of the new property
+* value is contained in the buffer and we need 
to call
+* ibm,update-properties again to get the rest 
of the value.
+*
+* A negative value is also the two's 
compliment of the actual value.
+*/
+
rc = update_dt_property(dn, prop, prop_name,
-   vd, prop_data);
+   vd  0x8000 ? ~vd + 
1 : vd, prop_data);
if (rc) {
-   printk(KERN_ERR Could not update %s
-   property\n, prop_name);
+   printk(KERN_ERR Could not update %s 
property\n,
+  prop_name);
+   /* Could try to continue but if the 
failure was for a section
+* of a node it gets too easy to mess 
up the device tree.
+* Plus, ENOMEM likely means we have 
bigger problems than

[PATCH v2 0/3] fix bugs in mobility RTAS calls

2014-09-25 Thread Cyril Bur
This patchset addresses endian issues and bugs in device tree update for
ibm,update-nodes and ibm,update-properties RTAS calls.

A subseqent patchset will deal with issues in device tree node addition
(ibm,configure-connector RTAS call) as well as more robust handling of
deleting critical device tree nodes.

v1 attempted to keep the structure of the existing code.
v2 rewrites the relevent sections of mobility.c.

Cyril Bur (3):
  drivers/of: add of_changeset_apply_locked
  powerpc/pseries: create rtas buffer accessor
  powerpc/pseries: fix bugs in mobility RTAS calls

 arch/powerpc/platforms/pseries/Makefile  |   4 +-
 arch/powerpc/platforms/pseries/mobility.c| 406 ---
 arch/powerpc/platforms/pseries/pseries.h |  41 +++
 arch/powerpc/platforms/pseries/rtas_buffer.c | 126 +
 drivers/of/dynamic.c |  11 +
 include/linux/of.h   |   1 +
 6 files changed, 425 insertions(+), 164 deletions(-)
 create mode 100644 arch/powerpc/platforms/pseries/rtas_buffer.c

-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/3] drivers/of: add of_changeset_apply_locked

2014-09-25 Thread Cyril Bur
Due to the requirement of of_changesets that of_changeset_apply be called
holding the of_mutex and that the of_mutex cannot be accessed nicely outside
the of code, added a wrapper which grabs the lock and called
of_changeset_apply.

Signed-off-by: Cyril Bur cyril@au1.ibm.com
---
 drivers/of/dynamic.c | 11 +++
 include/linux/of.h   |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 54fecc4..cbff2a2 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -542,6 +542,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
__of_changeset_entry_destroy(ce);
 }
 
+int of_changeset_apply_locked(struct of_changeset *ocs)
+{
+   int ret;
+
+   mutex_lock(of_mutex);
+   ret = of_changeset_apply(ocs);
+   mutex_unlock(of_mutex);
+
+   return ret;
+}
+
 /**
  * of_changeset_apply - Applies a changeset
  *
diff --git a/include/linux/of.h b/include/linux/of.h
index 6c4363b..f5c48fa 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -827,6 +827,7 @@ struct of_changeset {
 extern void of_changeset_init(struct of_changeset *ocs);
 extern void of_changeset_destroy(struct of_changeset *ocs);
 extern int of_changeset_apply(struct of_changeset *ocs);
+extern int of_changeset_apply_locked(struct of_changeset *ocs);
 extern int of_changeset_revert(struct of_changeset *ocs);
 extern int of_changeset_action(struct of_changeset *ocs,
unsigned long action, struct device_node *np,
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/3] powerpc/pseries: fix bugs in mobility RTAS calls

2014-09-25 Thread Cyril Bur
These calls use a shared memory buffer to communicate device tree updates and
PAPR specifies that RTAS buffers are to be written in big endian.  Used the
rtas buffer accessor to help solve both endian problems and standard buffer
access problems.

It seems more sane to stop trying to parse a buffer on errors. Attempting to
continue opens up the possibility of overflows and/or garbage reads. Used
of_changesets throughout to avoid leaving the device tree in an inconsitent
state on error.

Don't warn about failed allcations when the amount was taken from the buffer,
assume the value was incorrect, don't needlessly concern the user.

Signed-off-by: Cyril Bur cyril@au1.ibm.com
---
 arch/powerpc/platforms/pseries/mobility.c | 406 ++
 1 file changed, 243 insertions(+), 163 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index e7cb6d4..37d16da 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -20,16 +20,11 @@
 
 #include asm/machdep.h
 #include asm/rtas.h
+
 #include pseries.h
 
-static struct kobject *mobility_kobj;
 
-struct update_props_workarea {
-   u32 phandle;
-   u32 state;
-   u64 reserved;
-   u32 nprops;
-} __packed;
+static struct kobject *mobility_kobj;
 
 #define NODE_ACTION_MASK   0xff00
 #define NODE_COUNT_MASK0x00ff
@@ -40,7 +35,7 @@ struct update_props_workarea {
 
 #define MIGRATION_SCOPE(1)
 
-static int mobility_rtas_call(int token, char *buf, s32 scope)
+static int mobility_rtas_call(int token, void *buf, s32 scope)
 {
int rc;
 
@@ -54,171 +49,193 @@ static int mobility_rtas_call(int token, char *buf, s32 
scope)
return rc;
 }
 
-static int delete_dt_node(u32 phandle)
+static int create_property(struct rtas_buffer *rtas_buf, struct property 
**prop,
+   const char *name, int length)
 {
-   struct device_node *dn;
-
-   dn = of_find_node_by_phandle(phandle);
-   if (!dn)
-   return -ENOENT;
+   void *prop_value;
+   void *new_value;
 
-   dlpar_detach_node(dn);
-   of_node_put(dn);
-   return 0;
-}
+   struct property *working_prop = *prop;
 
-static int update_dt_property(struct device_node *dn, struct property **prop,
- const char *name, u32 vd, char *value)
-{
-   struct property *new_prop = *prop;
-   int more = 0;
+   if (!get_rtas_buf_mem(rtas_buf, prop_value, length))
+   return -EOVERFLOW;
 
-   /* A negative 'vd' value indicates that only part of the new property
-* value is contained in the buffer and we need to call
-* ibm,update-properties again to get the rest of the value.
-*
-* A negative value is also the two's compliment of the actual value.
-*/
-   if (vd  0x8000) {
-   vd = ~vd + 1;
-   more = 1;
-   }
-
-   if (new_prop) {
-   /* partial property fixup */
-   char *new_data = kzalloc(new_prop-length + vd, GFP_KERNEL);
-   if (!new_data)
-   return -ENOMEM;
-
-   memcpy(new_data, new_prop-value, new_prop-length);
-   memcpy(new_data + new_prop-length, value, vd);
-
-   kfree(new_prop-value);
-   new_prop-value = new_data;
-   new_prop-length += vd;
-   } else {
-   new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
-   if (!new_prop)
-   return -ENOMEM;
-
-   new_prop-name = kstrdup(name, GFP_KERNEL);
-   if (!new_prop-name) {
-   kfree(new_prop);
+   if (!working_prop) {
+   working_prop = kzalloc(sizeof(*working_prop), GFP_KERNEL);
+   if (!working_prop) {
return -ENOMEM;
}
 
-   new_prop-length = vd;
-   new_prop-value = kzalloc(new_prop-length, GFP_KERNEL);
-   if (!new_prop-value) {
-   kfree(new_prop-name);
-   kfree(new_prop);
+   working_prop-name = kstrdup(name, GFP_KERNEL | __GFP_NOWARN);
+   if (!working_prop-name) {
+   kfree(working_prop);
return -ENOMEM;
}
-
-   memcpy(new_prop-value, value, vd);
-   *prop = new_prop;
+   *prop = working_prop;
}
 
-   if (!more) {
-   of_update_property(dn, new_prop);
+   new_value = krealloc(working_prop-value, working_prop-length + 
length, GFP_KERNEL | __GFP_NOWARN);
+   if (!new_value) {
+   kfree(working_prop-value);
+   kfree(working_prop-name);
+   kfree(working_prop);
*prop = NULL;
+   return -ENOMEM;
}
 
+   working_prop-value

[PATCH v2 2/3] powerpc/pseries: create rtas buffer accessor

2014-09-25 Thread Cyril Bur
Added simple accessor functions for rtas in memory buffers which performs
accesses of appropriate type and performs endian conversions.

Signed-off-by: Cyril Bur cyril@au1.ibm.com
---
 arch/powerpc/platforms/pseries/Makefile  |   4 +-
 arch/powerpc/platforms/pseries/pseries.h |  41 +
 arch/powerpc/platforms/pseries/rtas_buffer.c | 126 +++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/pseries/rtas_buffer.c

diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 0348079..7eb7c46 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,7 +3,9 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
 
 obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
   setup.o iommu.o event_sources.o ras.o \
-  firmware.o power.o dlpar.o mobility.o rng.o
+  firmware.o power.o dlpar.o mobility.o \
+  rng.o rtas_buffer.o
+
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_SCANLOG)  += scanlog.o
 obj-$(CONFIG_EEH)  += eeh_pseries.o
diff --git a/arch/powerpc/platforms/pseries/pseries.h 
b/arch/powerpc/platforms/pseries/pseries.h
index 361add6..f24e352 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -66,4 +66,45 @@ int pseries_root_bridge_prepare(struct pci_host_bridge 
*bridge);
 
 unsigned long pseries_memory_block_size(void);
 
+/* Manipulation of the in memory data returned from an RTAS call */
+
+/* Data pointed to by ptr is in big endian */
+struct rtas_buffer {
+   void *ptr;
+   int len;
+   int pos;
+};
+
+/* Buffer is already zeroed */
+int make_rtas_buf(struct rtas_buffer *b, size_t size);
+void free_rtas_buf(struct rtas_buffer *b);
+
+/* Return pointer to the buffer being used */
+void *get_rtas_buf(struct rtas_buffer *b);
+
+/* Checks if the buffer exists and the read position is less than the length*/
+bool check_rtas_buf(struct rtas_buffer *b);
+size_t get_rtas_buf_size(struct rtas_buffer *b);
+
+/* Advance the internal position of the buffer by size bytes */
+bool advance_rtas_buf(struct rtas_buffer *b, size_t size);
+
+/* Put a value val into the buffer at position pos. Function expect val in cpu
+ * endian. Returns true if the write to the buffer was successful.
+ */
+bool put_rtas_buf_32(struct rtas_buffer *b, u32 val, int pos);
+
+/* Grab the byte at the current position of the buffer without incrementing
+ * the internal position of the buffer */
+bool peek_rtas_buf(struct rtas_buffer *b, u8 *c);
+
+/* Accessor functions return true if access succeeded and value is written to
+ * val in cpu endian. Automatically advances its reference into the buffer by
+ * the requested amount.
+ */
+bool get_rtas_buf_32(struct rtas_buffer *b, u32 *val);
+bool get_rtas_buf_64(struct rtas_buffer *b, u64 *val);
+bool get_rtas_buf_mem(struct rtas_buffer *b, void **p, size_t len);
+bool get_rtas_buf_str(struct rtas_buffer *b, char **s);
+
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/rtas_buffer.c 
b/arch/powerpc/platforms/pseries/rtas_buffer.c
new file mode 100644
index 000..f06b73c
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas_buffer.c
@@ -0,0 +1,126 @@
+#include linux/kernel.h
+#include linux/slab.h
+
+#include pseries.h
+
+
+int make_rtas_buf(struct rtas_buffer *b, size_t sz)
+{
+   b-ptr = kzalloc(sz, GFP_KERNEL);
+   b-len = sz;
+   b-pos = 0;
+   return (!b-ptr) ? -ENOMEM : 0;
+}
+
+void free_rtas_buf(struct rtas_buffer *b)
+{
+   kfree(b-ptr);
+}
+
+void *get_rtas_buf(struct rtas_buffer *b)
+{
+   return (b) ? b-ptr : NULL;
+}
+
+size_t get_rtas_buf_size(struct rtas_buffer *b)
+{
+   return (b) ? b-len : 0;
+}
+
+bool check_rtas_buf(struct rtas_buffer *b)
+{
+   return (b  b-ptr  b-pos  b-len);
+}
+
+static inline void *buf_pos(struct rtas_buffer *b)
+{
+   return (b  b-ptr) ? b-ptr + b-pos : NULL;
+}
+
+bool peek_rtas_buf(struct rtas_buffer *b, u8 *c)
+{
+   if (!b || !c || b-pos = b-len)
+   return false;
+
+   *c = *(u8 *)buf_pos(b);
+
+   return true;
+}
+
+bool put_rtas_buf_32(struct rtas_buffer *b, u32 val, int pos)
+{
+   if (!b || b-pos = b-len)
+   return false;
+
+   *((__be32 *)buf_pos(b)) = cpu_to_be32(val);
+
+   return true;
+}
+
+bool get_rtas_buf_32(struct rtas_buffer *b, u32 *val)
+{
+   if (!b || !val || b-len - b-pos  sizeof(u32))
+   return false;
+
+   *val = be32_to_cpu(*((__be32 *)buf_pos(b)));
+   b-pos += sizeof(u32);
+   return true;
+}
+
+bool get_rtas_buf_64(struct rtas_buffer *b, u64 *val)
+{
+   if (!b || !val || b-len - b-pos  sizeof(u64))
+   return false;
+
+   *val = be64_to_cpu(*((__be64 *)buf_pos(b)));
+   b-pos += sizeof(u64);
+   return true

Re: [PATCH v2 2/3] powerpc/pseries: create rtas buffer accessor

2014-09-25 Thread Cyril Bur
On Thu, 2014-09-25 at 15:47 -0700, Tyrel Datwyler wrote:
 On 09/24/2014 11:41 PM, Cyril Bur wrote:
  Added simple accessor functions for rtas in memory buffers which performs
  accesses of appropriate type and performs endian conversions.
  
  Signed-off-by: Cyril Bur cyril@au1.ibm.com
  ---
   arch/powerpc/platforms/pseries/Makefile  |   4 +-
   arch/powerpc/platforms/pseries/pseries.h |  41 +
   arch/powerpc/platforms/pseries/rtas_buffer.c | 126 
  +++
   3 files changed, 170 insertions(+), 1 deletion(-)
   create mode 100644 arch/powerpc/platforms/pseries/rtas_buffer.c
 
 Maybe Michael has an opinion here, but seems to me since this is all
 RTAS related it would make sense for this code to belong in
 kernel/rtas.c and include/asm/rtas.h.
 
Could quite possibly go there, I wanted to get the code looked at first
but I'll take a look if it could be used more generally and if so move.

Thanks

 -Tyrel
 
  
  diff --git a/arch/powerpc/platforms/pseries/Makefile 
  b/arch/powerpc/platforms/pseries/Makefile
  index 0348079..7eb7c46 100644
  --- a/arch/powerpc/platforms/pseries/Makefile
  +++ b/arch/powerpc/platforms/pseries/Makefile
  @@ -3,7 +3,9 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
   
   obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
 setup.o iommu.o event_sources.o ras.o \
  -  firmware.o power.o dlpar.o mobility.o rng.o
  +  firmware.o power.o dlpar.o mobility.o \
  +  rng.o rtas_buffer.o
  +
   obj-$(CONFIG_SMP)  += smp.o
   obj-$(CONFIG_SCANLOG)  += scanlog.o
   obj-$(CONFIG_EEH)  += eeh_pseries.o
  diff --git a/arch/powerpc/platforms/pseries/pseries.h 
  b/arch/powerpc/platforms/pseries/pseries.h
  index 361add6..f24e352 100644
  --- a/arch/powerpc/platforms/pseries/pseries.h
  +++ b/arch/powerpc/platforms/pseries/pseries.h
  @@ -66,4 +66,45 @@ int pseries_root_bridge_prepare(struct pci_host_bridge 
  *bridge);
   
   unsigned long pseries_memory_block_size(void);
   
  +/* Manipulation of the in memory data returned from an RTAS call */
  +
  +/* Data pointed to by ptr is in big endian */
  +struct rtas_buffer {
  +   void *ptr;
  +   int len;
  +   int pos;
  +};
  +
  +/* Buffer is already zeroed */
  +int make_rtas_buf(struct rtas_buffer *b, size_t size);
  +void free_rtas_buf(struct rtas_buffer *b);
  +
  +/* Return pointer to the buffer being used */
  +void *get_rtas_buf(struct rtas_buffer *b);
  +
  +/* Checks if the buffer exists and the read position is less than the 
  length*/
  +bool check_rtas_buf(struct rtas_buffer *b);
  +size_t get_rtas_buf_size(struct rtas_buffer *b);
  +
  +/* Advance the internal position of the buffer by size bytes */
  +bool advance_rtas_buf(struct rtas_buffer *b, size_t size);
  +
  +/* Put a value val into the buffer at position pos. Function expect val in 
  cpu
  + * endian. Returns true if the write to the buffer was successful.
  + */
  +bool put_rtas_buf_32(struct rtas_buffer *b, u32 val, int pos);
  +
  +/* Grab the byte at the current position of the buffer without incrementing
  + * the internal position of the buffer */
  +bool peek_rtas_buf(struct rtas_buffer *b, u8 *c);
  +
  +/* Accessor functions return true if access succeeded and value is written 
  to
  + * val in cpu endian. Automatically advances its reference into the buffer 
  by
  + * the requested amount.
  + */
  +bool get_rtas_buf_32(struct rtas_buffer *b, u32 *val);
  +bool get_rtas_buf_64(struct rtas_buffer *b, u64 *val);
  +bool get_rtas_buf_mem(struct rtas_buffer *b, void **p, size_t len);
  +bool get_rtas_buf_str(struct rtas_buffer *b, char **s);
  +
   #endif /* _PSERIES_PSERIES_H */
  diff --git a/arch/powerpc/platforms/pseries/rtas_buffer.c 
  b/arch/powerpc/platforms/pseries/rtas_buffer.c
  new file mode 100644
  index 000..f06b73c
  --- /dev/null
  +++ b/arch/powerpc/platforms/pseries/rtas_buffer.c
  @@ -0,0 +1,126 @@
  +#include linux/kernel.h
  +#include linux/slab.h
  +
  +#include pseries.h
  +
  +
  +int make_rtas_buf(struct rtas_buffer *b, size_t sz)
  +{
  +   b-ptr = kzalloc(sz, GFP_KERNEL);
  +   b-len = sz;
  +   b-pos = 0;
  +   return (!b-ptr) ? -ENOMEM : 0;
  +}
  +
  +void free_rtas_buf(struct rtas_buffer *b)
  +{
  +   kfree(b-ptr);
  +}
  +
  +void *get_rtas_buf(struct rtas_buffer *b)
  +{
  +   return (b) ? b-ptr : NULL;
  +}
  +
  +size_t get_rtas_buf_size(struct rtas_buffer *b)
  +{
  +   return (b) ? b-len : 0;
  +}
  +
  +bool check_rtas_buf(struct rtas_buffer *b)
  +{
  +   return (b  b-ptr  b-pos  b-len);
  +}
  +
  +static inline void *buf_pos(struct rtas_buffer *b)
  +{
  +   return (b  b-ptr) ? b-ptr + b-pos : NULL;
  +}
  +
  +bool peek_rtas_buf(struct rtas_buffer *b, u8 *c)
  +{
  +   if (!b || !c || b-pos = b-len)
  +   return false;
  +
  +   *c = *(u8 *)buf_pos(b);
  +
  +   return true;
  +}
  +
  +bool put_rtas_buf_32(struct rtas_buffer *b, u32 val, int pos

Re: [PATCH v2 0/6] pseries: Move memory hotplug to the kernel

2014-11-17 Thread Cyril Bur
Hi Nathan,

I tried to apply these to Linus' tree and Mpes tree and to stable and
got several problems, I got stuck at the third hunk in patch 5.

Could you point out where I'm going wrong?

Thanks,

Cyril

On Mon, 2014-11-17 at 15:44 -0600, Nathan Fontenot wrote:
 In order to better support device hotplug (cpu, memory, and pci) in the
 PowerVM and PowerKVM environments, the handling of device hotplug
 could be updated so that the act of hotplugging a device occurs entirely
 in the kernel. This patch set begins to address this by moving
 memory hotplug to the kernel. Patches to follow will do the same
 for cpu and pci devices.
 
 To provide background, the current handling of memory hotplug is
 handled by the drmgr command. This command is invoked when memory
 add/remove requests are made at the HMC and conveyed to a partition
 through the RSCT framework. The drmgr command then performs parts
 of the hotplug in user-space and makes requests to the kernel to perform
 other pieces. This is not really ideal, we can do everything in the
 kernel and do it faster.
 
 In this patchset, hotplug events will now be communicated to the kernel
 in the form of rtas hotplug events. For PowerKVM systems this is done
 by qemu using the ras epow interrupt. For PowerVM systems the drmgr
 command will be updated to create a rtas hotplug event and send it to
 the kernel via a new /sys/kernel/dlpar interface. Both of these
 entry points for hotplug rtas events then call a common routine
 for handling rtas hotplug events.
 
 -Nathan
 
 Patch 1/6
 - Add definition of hotplug rtas event sections.
 
 Patch 2/6
 - Update struct of_drconf_cell to use __be64/__be32
  
 Patch 3/6
 - Export the dlpar_[acquire|release]drc() routines.
 
 Patch 4/6
 - Create the new /sys/kernel/dlpar interface
 
 Patch 5/6
 - Implement memory hotplug add in the kernel.
 
 Patch 6/6
 - Implement memory hotplug remove in the kernel.
 
  include/asm/prom.h |   10 
  include/asm/rtas.h |   26 ++
  platforms/pseries/dlpar.c  |   72 +
  platforms/pseries/hotplug-memory.c |  469 
 -
  platforms/pseries/pseries.h|   12 
  5 files changed, 576 insertions(+), 13 deletions(-)
 
 ___
 Linuxppc-dev mailing list
 Linuxppc-dev@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 0/6] pseries: Move memory hotplug to the kernel

2014-11-18 Thread Cyril Bur
On Tue, 2014-11-18 at 12:34 -0600, Nathan Fontenot wrote:
 On 11/17/2014 08:00 PM, Cyril Bur wrote:
  Hi Nathan,
  
  I tried to apply these to Linus' tree and Mpes tree and to stable and
  got several problems, I got stuck at the third hunk in patch 5.
 
 I based these patches off of mpe's -next tree. I did a fresh pull of
 mpe's tree and found that they do apply with some fuzz to the master branch.
 
Got them onto mpe's -next thanks.

 Which tree were you having issue with patch 5?

Looks like 16d0f5c4af76b0c3424290937bf1ac22adf439b1 was the cause of my
problems. 
 
 -Nathan
 
  
  Could you point out where I'm going wrong?
  
  Thanks,
  
  Cyril
  
  On Mon, 2014-11-17 at 15:44 -0600, Nathan Fontenot wrote:
  In order to better support device hotplug (cpu, memory, and pci) in the
  PowerVM and PowerKVM environments, the handling of device hotplug
  could be updated so that the act of hotplugging a device occurs entirely
  in the kernel. This patch set begins to address this by moving
  memory hotplug to the kernel. Patches to follow will do the same
  for cpu and pci devices.
 
  To provide background, the current handling of memory hotplug is
  handled by the drmgr command. This command is invoked when memory
  add/remove requests are made at the HMC and conveyed to a partition
  through the RSCT framework. The drmgr command then performs parts
  of the hotplug in user-space and makes requests to the kernel to perform
  other pieces. This is not really ideal, we can do everything in the
  kernel and do it faster.
 
  In this patchset, hotplug events will now be communicated to the kernel
  in the form of rtas hotplug events. For PowerKVM systems this is done
  by qemu using the ras epow interrupt. For PowerVM systems the drmgr
  command will be updated to create a rtas hotplug event and send it to
  the kernel via a new /sys/kernel/dlpar interface. Both of these
  entry points for hotplug rtas events then call a common routine
  for handling rtas hotplug events.
 
  -Nathan
 
  Patch 1/6
  - Add definition of hotplug rtas event sections.
 
  Patch 2/6
  - Update struct of_drconf_cell to use __be64/__be32
   
  Patch 3/6
  - Export the dlpar_[acquire|release]drc() routines.
 
  Patch 4/6
  - Create the new /sys/kernel/dlpar interface
 
  Patch 5/6
  - Implement memory hotplug add in the kernel.
 
  Patch 6/6
  - Implement memory hotplug remove in the kernel.
 
   include/asm/prom.h |   10 
   include/asm/rtas.h |   26 ++
   platforms/pseries/dlpar.c  |   72 +
   platforms/pseries/hotplug-memory.c |  469 
  -
   platforms/pseries/pseries.h|   12 
   5 files changed, 576 insertions(+), 13 deletions(-)
 
  ___
  Linuxppc-dev mailing list
  Linuxppc-dev@lists.ozlabs.org
  https://lists.ozlabs.org/listinfo/linuxppc-dev
  
  
  ___
  Linuxppc-dev mailing list
  Linuxppc-dev@lists.ozlabs.org
  https://lists.ozlabs.org/listinfo/linuxppc-dev
  
 
 ___
 Linuxppc-dev mailing list
 Linuxppc-dev@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 6/6] pseries: Implement memory hotplug remove in the kernel

2014-11-20 Thread Cyril Bur

On Mon, 2014-11-17 at 15:56 -0600, Nathan Fontenot wrote:
 Move handling of memory hotplug remove on pseries completely into the kernel.
 
 The current memory hotplug remove path involves the drmgr command doing part
 of this work in userspace and requesting the kernel to do additional pieces.
 This patch allows us to handle the act completely in the kernel via rtas
 hotplug events. This allows us to perform the operation faster and provide
 a common memory hotplug remove path for PowerVM and PowerKVM systems.
 
 Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com
 ---
  arch/powerpc/platforms/pseries/hotplug-memory.c |  206 
 ++-
  1 file changed, 201 insertions(+), 5 deletions(-)
 
 diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
 b/arch/powerpc/platforms/pseries/hotplug-memory.c
 index b57d42b..c8189e8 100644
 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
 +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
 @@ -173,6 +173,179 @@ static int pseries_remove_mem_node(struct device_node 
 *np)
   pseries_remove_memblock(base, lmb_size);
   return 0;
  }
 +
 +static int lmb_is_removable(struct of_drconf_cell *lmb)
 +{
 + int i, scns_per_block;
 + int rc = 1;
 + unsigned long pfn, block_sz;
 + u64 phys_addr;
 +
 + if (!(be32_to_cpu(lmb-flags)  DRCONF_MEM_ASSIGNED))
 + return -1;
This makes me kind of nervous. You're using the return value of
lmb_is_removable as a boolean but it returns three possible values -1,0
and 1. Functionally it looks correct to me so its not a massive issue
 +
 + phys_addr = be64_to_cpu(lmb-base_addr);
 + block_sz = memory_block_size_bytes();
 + scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
 +
 + for (i = 0; i  scns_per_block; i++) {
 + pfn = PFN_DOWN(phys_addr);
 + if (!pfn_present(pfn))
 + continue;
 +
 + rc = is_mem_section_removable(pfn, PAGES_PER_SECTION);
 + phys_addr += MIN_MEMORY_BLOCK_SIZE;
 + }
 +
 + return rc;
 +}
 +
 +static int dlpar_add_lmb(struct of_drconf_cell *);
 +
 +static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 +{
 + struct memory_block *mem_block;
 + unsigned long block_sz;
 + u64 phys_addr;
 + uint32_t drc_index;
 + int nid, rc;
 +
 + if (!lmb_is_removable(lmb))
 + return -EINVAL;
 +
 + phys_addr = be64_to_cpu(lmb-base_addr);
 + drc_index = be32_to_cpu(lmb-drc_index);
 +
 + mem_block = lmb_to_memblock(lmb);
 + if (!mem_block)
 + return -EINVAL;
 +
 + rc = device_offline(mem_block-dev);
 + put_device(mem_block-dev);
 + if (rc)
 + return rc;
 +
 + block_sz = pseries_memory_block_size();
 + nid = memory_add_physaddr_to_nid(phys_addr);
 +
 + remove_memory(nid, phys_addr, block_sz);
 +
 + /* Update memory regions for memory remove */
 + memblock_remove(phys_addr, block_sz);
 +
 + dlpar_release_drc(drc_index);
 +
 + lmb-flags = cpu_to_be32(~DRCONF_MEM_ASSIGNED);
 + pr_info(Memory at %llx (drc index %x) has been hot-removed\n,
 + be64_to_cpu(lmb-base_addr), drc_index);
dlpar_add_lmb doesn't print anything but dlpar_remove_lmb does? Related
to my comment about printing a 'hot-add' messages prematurely, perhaps
move this to the callers
 +
 + return 0;
 +}
 +
 +static int dlpar_memory_remove_by_count(struct pseries_hp_errorlog *hp_elog,
 + struct property *prop)
 +{
 + struct of_drconf_cell *lmbs;
 + int lmbs_to_remove, lmbs_removed = 0;
 + int lmbs_available = 0;
 + uint32_t num_lmbs;
 + __be32 *p;
 + int i, rc;
 +
 + lmbs_to_remove = be32_to_cpu(hp_elog-_drc_u.drc_count);
Didn't you already do the endian conversion back in
handle_dlpar_errorlog?
 + pr_info(Attempting to hot-remove %d LMB(s)\n, lmbs_to_remove);
 +
 + if (lmbs_to_remove == 0)
 + return -EINVAL;
 +
 + p = prop-value;
 + num_lmbs = be32_to_cpu(*p++);
 + lmbs = (struct of_drconf_cell *)p;
 +
 + /* Validate that there are enough LMBs to satisfy the request */
 + for (i = 0; i  num_lmbs; i++) {
 + if (be32_to_cpu(lmbs[i].flags)  DRCONF_MEM_ASSIGNED)
 + lmbs_available++;
 + }
 +
 + if (lmbs_available  lmbs_to_remove)
 + return -EINVAL;
 +
 + for (i = 0; i  num_lmbs; i++) {
 + if (lmbs_to_remove == lmbs_removed)
 + break;
 +
 + rc = dlpar_remove_lmb(lmbs[i]);
 + if (rc)
 + continue;
 +
 + lmbs_removed++;
 +
 + /* Mark this lmb so we can add it later if all of the
 +  * requested LMBs cannot be removed.
 +  */
 + lmbs[i].reserved = 1;
 + }
 +
 + if (lmbs_removed != lmbs_to_remove) {
 + pr_err(Memory hot-remove failed, adding LMB's back\n);
 +
 + for (i = 0; i  

Re: [PATCH v2 5/6] pseries: Implement memory hotplug add in the kernel

2014-11-20 Thread Cyril Bur

On Mon, 2014-11-17 at 15:54 -0600, Nathan Fontenot wrote:
 Move handling of memory hotplug add on pseries completely into the kernel.
 
 The current memory hotplug add path involves the drmgr command doing part
 of this work in userspace and requesting the kernel to do additional pieces.
 This patch allows us to handle the act completely in the kernel via rtas
 hotplug events. This allows us to perform the operation faster and provide
 a common memory hotplug add path for PowerVM and PowerKVM systems.
 
 The patch does introduce a static rtas_hp_event variable that is set to
 true when updating the device tree during memory hotplug initiated from
 a rtas hotplug event. This is needed because we do not need to do the
 work in the of notifier, this work is already performed in handling the
 hotplug request. At a later time we can remove this when we deprecate the
 previous method of memory hotplug.
 
 Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com
 ---
  arch/powerpc/platforms/pseries/hotplug-memory.c |  244 
 +++
  1 file changed, 243 insertions(+), 1 deletion(-)
 
 diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
 b/arch/powerpc/platforms/pseries/hotplug-memory.c
 index 69d178b..b57d42b 100644
 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
 +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
 @@ -16,6 +16,7 @@
  #include linux/memblock.h
  #include linux/memory.h
  #include linux/memory_hotplug.h
 +#include linux/slab.h
  
  #include asm/firmware.h
  #include asm/machdep.h
 @@ -23,6 +24,8 @@
  #include asm/sparsemem.h
  #include pseries.h
  
 +static bool rtas_hp_event;
 +
  unsigned long pseries_memory_block_size(void)
  {
   struct device_node *np;
 @@ -66,6 +69,52 @@ unsigned long pseries_memory_block_size(void)
   return memblock_size;
  }
  
 +static void dlpar_free_drconf_property(struct property *prop)
 +{
 + kfree(prop-name);
 + kfree(prop-value);
 + kfree(prop);
 +}
 +
 +static struct property *dlpar_clone_drconf_property(struct device_node *dn)
 +{
 + struct property *prop, *new_prop;
 +
 + prop = of_find_property(dn, ibm,dynamic-memory, NULL);
 + if (!prop)
 + return NULL;
 +
 + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
 + if (!new_prop)
 + return NULL;
 +
 + new_prop-name = kstrdup(prop-name, GFP_KERNEL);
 + new_prop-value = kmalloc(prop-length, GFP_KERNEL);
 + if (!new_prop-name || !new_prop-value) {
 + dlpar_free_drconf_property(new_prop);
 + return NULL;
 + }
 +
 + memcpy(new_prop-value, prop-value, prop-length);
 + new_prop-length = prop-length;
 +
 + return new_prop;
 +}
 +
 +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
 +{
 + unsigned long section_nr;
 + struct mem_section *mem_sect;
 + struct memory_block *mem_block;
 + u64 phys_addr = be64_to_cpu(lmb-base_addr);
 +
 + section_nr = pfn_to_section_nr(PFN_DOWN(phys_addr));
 + mem_sect = __nr_to_section(section_nr);
 +
 + mem_block = find_memory_block(mem_sect);
 + return mem_block;
 +}
 +
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static int pseries_remove_memblock(unsigned long base, unsigned int 
 memblock_size)
  {
 @@ -136,19 +185,209 @@ static inline int pseries_remove_mem_node(struct 
 device_node *np)
  }
  #endif /* CONFIG_MEMORY_HOTREMOVE */
  
 +static int dlpar_add_lmb(struct of_drconf_cell *lmb)
 +{
 + struct memory_block *mem_block;
 + u64 phys_addr;
 + uint32_t drc_index;
I started commenting this and it turns out you've used uint32_t almost
everywhere for values you've pulled from the device tree or the elog.
These should be u32.
 + unsigned long pages_per_block;
 + unsigned long block_sz;
 + int nid, sections_per_block;
 + int rc;
 +
 + if (be32_to_cpu(lmb-flags)  DRCONF_MEM_ASSIGNED)
 + return -EINVAL;
 +
 + phys_addr = be64_to_cpu(lmb-base_addr);
 + drc_index = be32_to_cpu(lmb-drc_index);
 + block_sz = memory_block_size_bytes();
 + sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
 + pages_per_block = PAGES_PER_SECTION * sections_per_block;
 +
Perhaps I'm being a bit slow here but it isn't exactly clear what you're
getting with all those variables and could you explain what that
statement below is checking for?

 + if (phys_addr  ((pages_per_block  PAGE_SHIFT) - 1))
 + return -EINVAL;
 +
 + rc = dlpar_acquire_drc(drc_index);
 + if (rc)
 + return rc;
 +
 + /* Find the node id for this address */
 + nid = memory_add_physaddr_to_nid(phys_addr);
 +
 + /* Add the memory */
 + rc = add_memory(nid, phys_addr, block_sz);
 + if (rc) {
 + dlpar_release_drc(drc_index);
 + return rc;
 + }
 +
 + /* Register this block of memory */
 + rc = memblock_add(phys_addr, block_sz);
 + if (rc) {
 + remove_memory(nid, phys_addr, block_sz);
 + 

Re: [PATCH v2 3/6] pseries: Create new device hotplug entry point

2014-11-20 Thread Cyril Bur

On Mon, 2014-11-17 at 15:51 -0600, Nathan Fontenot wrote:
 Create a new entry point for device hotplug on pseries that will
 work for both PowerVM and PowerKVM systems.
 
 The current process to hotplug (or dlpar) devices (generally the same
 process for memory, cpu, and pci devices) on PowerVM systems is initiated
 from the HMC, which communicates the request to the partitions through
 the RSCT framework. The RSCT framework then invokes the drmgr command.
 The drmgr command performs the hotplug operation by doing some pieces,
 such as most of the rtas calls and device tree parsing, in userspace
 and make requests to the kernel to online/offline the device, update the
 device tree and add/remove the device.
 
 For PowerKVM the approach for device hotplug is to follow what is currently
 being done for pci hotplug. A hotplug request is initiated from the host,
 QEMU then generates an EPOW interrupt to the guest which causes the guest
 to make the rtas,check-exception call. In QEMU, the rtas,check-exception call
 returns a rtas hotplug event to the guest.

Please forgive my ignorance of the exact details of how this all works.
I've been trying to wrap my head around how it works in a little more
detail than your high level overview. Correct me where I go wrong.

So the EPOW interrupt comes in and QEMU receives the
rtas,check-exception call and returns the rtas hotplug event. As you
state below, the connection of the arrival of the rtas hotplug event to
the hotplug code will be made in a subsequent patch.

Here is my understanding of what happens when a hotplug event gets
processed.
An LMB is selected
from /ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory in the
device tree which is populated at boot time (although it's possible that
it can updated once the guest is running?) either because it matches a
specific drc-index or simply because it is in the list and we are to
hotplug 'count' LMBs. So there's a maximum amount of memory that can be
hotplugged (without device tree updates...)?

Once selected the kernel informs the hypervisor that it is going to use
that LMB with two RTAS calls, 'get-sensor-state' and 'set-indicator'
which first checks the actual state of that LMB with the hypervisor and
then marks it as 'in use' (in dlpar_acquire_drc).

After that, find the (NUMA?) node id of the memory and inform the
generic kernel about this new memory.

For some reason memblock needs to be informed separately (does it need
to be informed exactly?), which as you pointed out in the previous
version of this patchset you're not sure why it isn't done in
add_memory, and you still do it because it's what currently happens? I'd
very much like to know why this sequence of events but I suspect the
explanation might get quite involved.

Finally mark the LMB as assigned in its device tree node. This is
bookkeeping right?

This process is repeated for each LMB that should be added.

In the event a failure a best effort rollback is done, as you mentioned
in the previous version, it may not always be possible but at least it's
attempted.

Once all this succeeds update the device tree.

Basically the exact reverse of this process happens for unplug.

I do have questions about this process: What is the most likely part to
fail? I have no idea how feasible it would be but perhaps trying to do
the likely failure on all the LBMs might help unwinding and perhaps
provide a guarantee that it can be completely rolled back. I'm really
not sure but by the looks of things are going to be pretty reversible up
until device_online.

I can't help but notice the duplication with memory_probe_store
(although that doesn't do any rollback). Probably unavoidable and its
really not much code.

Thanks in advance for the clarifications,

Cyril
 
 Please note that the current pci hotplug path for PowerKVM involves the
 kernel receiving the rtas hotplug event, passing it to rtas_errd in
 userspace, and having rtas_errd invoke drmgr. The drmgr command then
 handles the request as described above for PowerVM systems. This is to
 be updated to perform pci completely in the kernel in a later patch set.
 
 There is no need for this circuitous route, we should handle the entire
 hotplug of devices in the kernel. What I am planning is to enable this
 by moving the code to handle device hotplug from drmgr into the kernel to
 provide a single path for both PowerVM and PowerKVM systems. This patch
 provides the common entry point. For PowerKVM a future update to the kernel
 rtas code will recognize rtas hotplug events returned from
 rtas,check-exception calls and use the common entry point to handle device
 hotplug entirely in the kernel.
 
 For PowerVM systems, this patch creates the /sys/kernel/dlpar file that rtas
 hotplug events can be written to by drmgr and passed to the common entry 
 point.
 There is no chance of updating how we receive hotplug requests on PowerVM
 systems.
 
 Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com
 ---
  

Re: [PATCH v2 6/6] pseries: Implement memory hotplug remove in the kernel

2014-11-24 Thread Cyril Bur
On Mon, 2014-11-24 at 09:03 -0600, Nathan Fontenot wrote:
 
 On 11/21/2014 01:49 AM, Cyril Bur wrote:
  
  On Mon, 2014-11-17 at 15:56 -0600, Nathan Fontenot wrote:
  Move handling of memory hotplug remove on pseries completely into the 
  kernel.
 
  The current memory hotplug remove path involves the drmgr command doing 
  part
  of this work in userspace and requesting the kernel to do additional 
  pieces.
  This patch allows us to handle the act completely in the kernel via rtas
  hotplug events. This allows us to perform the operation faster and provide
  a common memory hotplug remove path for PowerVM and PowerKVM systems.
 
  Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com
  ---
   arch/powerpc/platforms/pseries/hotplug-memory.c |  206 
  ++-
   1 file changed, 201 insertions(+), 5 deletions(-)
 
  diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
  b/arch/powerpc/platforms/pseries/hotplug-memory.c
  index b57d42b..c8189e8 100644
  --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
  +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
  @@ -173,6 +173,179 @@ static int pseries_remove_mem_node(struct 
  device_node *np)
 pseries_remove_memblock(base, lmb_size);
 return 0;
   }
  +
  +static int lmb_is_removable(struct of_drconf_cell *lmb)
  +{
  +  int i, scns_per_block;
  +  int rc = 1;
  +  unsigned long pfn, block_sz;
  +  u64 phys_addr;
  +
  +  if (!(be32_to_cpu(lmb-flags)  DRCONF_MEM_ASSIGNED))
  +  return -1;
  This makes me kind of nervous. You're using the return value of
  lmb_is_removable as a boolean but it returns three possible values -1,0
  and 1. Functionally it looks correct to me so its not a massive issue
 
 Oh yuck. this should be a boolean return. I'll fix that.
 
  +
  +  phys_addr = be64_to_cpu(lmb-base_addr);
  +  block_sz = memory_block_size_bytes();
  +  scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
  +
  +  for (i = 0; i  scns_per_block; i++) {
  +  pfn = PFN_DOWN(phys_addr);
  +  if (!pfn_present(pfn))
  +  continue;
  +
  +  rc = is_mem_section_removable(pfn, PAGES_PER_SECTION);
  +  phys_addr += MIN_MEMORY_BLOCK_SIZE;
  +  }
  +
  +  return rc;
  +}
  +
  +static int dlpar_add_lmb(struct of_drconf_cell *);
  +
  +static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
  +{
  +  struct memory_block *mem_block;
  +  unsigned long block_sz;
  +  u64 phys_addr;
  +  uint32_t drc_index;
  +  int nid, rc;
  +
  +  if (!lmb_is_removable(lmb))
  +  return -EINVAL;
  +
  +  phys_addr = be64_to_cpu(lmb-base_addr);
  +  drc_index = be32_to_cpu(lmb-drc_index);
  +
  +  mem_block = lmb_to_memblock(lmb);
  +  if (!mem_block)
  +  return -EINVAL;
  +
  +  rc = device_offline(mem_block-dev);
  +  put_device(mem_block-dev);
  +  if (rc)
  +  return rc;
  +
  +  block_sz = pseries_memory_block_size();
  +  nid = memory_add_physaddr_to_nid(phys_addr);
  +
  +  remove_memory(nid, phys_addr, block_sz);
  +
  +  /* Update memory regions for memory remove */
  +  memblock_remove(phys_addr, block_sz);
  +
  +  dlpar_release_drc(drc_index);
  +
  +  lmb-flags = cpu_to_be32(~DRCONF_MEM_ASSIGNED);
  +  pr_info(Memory at %llx (drc index %x) has been hot-removed\n,
  +  be64_to_cpu(lmb-base_addr), drc_index);
  dlpar_add_lmb doesn't print anything but dlpar_remove_lmb does? Related
  to my comment about printing a 'hot-add' messages prematurely, perhaps
  move this to the callers
 
 Yes, I'll update the remove mem messages also.
 
  +
  +  return 0;
  +}
  +
  +static int dlpar_memory_remove_by_count(struct pseries_hp_errorlog 
  *hp_elog,
  +  struct property *prop)
  +{
  +  struct of_drconf_cell *lmbs;
  +  int lmbs_to_remove, lmbs_removed = 0;
  +  int lmbs_available = 0;
  +  uint32_t num_lmbs;
  +  __be32 *p;
  +  int i, rc;
  +
  +  lmbs_to_remove = be32_to_cpu(hp_elog-_drc_u.drc_count);
  Didn't you already do the endian conversion back in
  handle_dlpar_errorlog?
  +  pr_info(Attempting to hot-remove %d LMB(s)\n, lmbs_to_remove);
  +
  +  if (lmbs_to_remove == 0)
  +  return -EINVAL;
  +
  +  p = prop-value;
  +  num_lmbs = be32_to_cpu(*p++);
  +  lmbs = (struct of_drconf_cell *)p;
  +
  +  /* Validate that there are enough LMBs to satisfy the request */
  +  for (i = 0; i  num_lmbs; i++) {
  +  if (be32_to_cpu(lmbs[i].flags)  DRCONF_MEM_ASSIGNED)
  +  lmbs_available++;
  +  }
  +
  +  if (lmbs_available  lmbs_to_remove)
  +  return -EINVAL;
  +
  +  for (i = 0; i  num_lmbs; i++) {
  +  if (lmbs_to_remove == lmbs_removed)
  +  break;
  +
  +  rc = dlpar_remove_lmb(lmbs[i]);
  +  if (rc)
  +  continue;
  +
  +  lmbs_removed++;
  +
  +  /* Mark this lmb so we can add it later if all of the
  +   * requested LMBs cannot be removed.
  +   */
  +  lmbs[i].reserved = 1

Re: [PATCH] powerpc/pseries: fix endian problems with LE migration

2015-01-21 Thread Cyril Bur
On Wed, 2015-01-21 at 14:33 +1100, Michael Ellerman wrote:
 On Wed, 2015-01-21 at 13:32 +1100, Cyril Bur wrote:
  The need to handle ibm,suspend_me specially from within ppc_rtas has left an
  endian bug exposed as rtas_ibm_suspend_me actually performs HCALLs and 
  should
  have its params in CPU endian.
 
 That needs a much better explanation.
 
Agreed

 Key points:
  - ppc_rtas() is a syscall, which takes arguments in BE
  - ibm,suspend-me is not a real RTAS call and is handled specially in there
  - ibm,suspend-me is actually implemented by an hcall
  - there is currently a bug on LE, because rtas_ibm_suspend_me() takes the
ppc_rtas() args and feeds them directly to the hcall
 
I've tried to write that out neatly and orderly. Here's how that went:


RTAS events require arguments be passed in big endian while hypercalls
have their arguments passed in registers and the values should therefore
be in CPU endian.

The ibm,suspend_me 'RTAS' call makes a sequence of hypercalls to setup
one true RTAS call. This means that ibm,suspend_me is handled specially
in the ppc_rtas syscall.

The ppc_rtas syscall has its arguments in big endian and can therefore
pass these arguments directly to the rtas call. ibm,suspend_me is
handled specially from within ppc_rtas (by calling rtas_ibm_suspend_me)
which has left an endian bug on little endian systems due to the
requirement of hypercalls. The return value from rtas_ibm_suspend me
gets returned in cpu endian, and is left unconverted, also a bug on
little endian systems.

rtas_ibm_suspend_me does not actually make use of the rtas_args that it
is passed. This patch removes the convoluted use of the rtas_args struct
to pass params to rtas_ibm_suspend_me in favour of passing what it needs
as actual arguments. This patch also ensures the two callers of
rtas_ibm_suspend_me pass function parameters in cpu endian and in the
case of ppc_rtas, converts the return value.

migrate_store (the other caller of rtas_ibm_suspend_me) is from a sysfs
file which deals with everything in cpu endian so this function only
underwent cleanup.

  Have ppc_rtas send the params correctly and also interpret the result
  correctly.
 
 That's a second bug which you should also mention above.
 
  Removed the convoluted use of the rtas_args struct to pass params to
  rtas_ibm_suspend_me in favour of passing what it needs directly.
  
  Signed-off-by: Cyril Bur cyril...@gmail.com
  ---
  This patch has been tested with KVM both LE and BE and on PowerVM both LE 
  and
  BE. Under QEMU/KVM the migration happens without touching the these code
  pathes.
  For PowerVM there is no obvious regression on BE and the LE code path now
  provides the correct parameters to the hypervisor
 
 Fold that into the changelog, it's worth remembering.
 
 cheers
 
 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/pseries: fix endian problems with LE migration

2015-01-20 Thread Cyril Bur
The need to handle ibm,suspend_me specially from within ppc_rtas has left an
endian bug exposed as rtas_ibm_suspend_me actually performs HCALLs and should
have its params in CPU endian.

Have ppc_rtas send the params correctly and also interpret the result
correctly.

Removed the convoluted use of the rtas_args struct to pass params to
rtas_ibm_suspend_me in favour of passing what it needs directly.

Signed-off-by: Cyril Bur cyril...@gmail.com
---
This patch has been tested with KVM both LE and BE and on PowerVM both LE and
BE. Under QEMU/KVM the migration happens without touching the these code
pathes.
For PowerVM there is no obvious regression on BE and the LE code path now
provides the correct parameters to the hypervisor

---
 arch/powerpc/include/asm/rtas.h   |  2 +-
 arch/powerpc/kernel/rtas.c| 22 +++---
 arch/powerpc/platforms/pseries/mobility.c | 22 ++
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55..2e23e92 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -327,7 +327,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data 
*data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_online_cpus_mask(cpumask_var_t cpus);
 extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
-extern int rtas_ibm_suspend_me(struct rtas_args *);
+extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return);
 
 struct rtc_time;
 extern unsigned long rtas_get_boot_time(void);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4af905e..21c45a2 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus)
 }
 EXPORT_SYMBOL(rtas_offline_cpus_mask);
 
-int rtas_ibm_suspend_me(struct rtas_args *args)
+int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 {
long state;
long rc;
@@ -911,8 +911,7 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
return -ENOSYS;
 
/* Make sure the state is valid */
-   rc = plpar_hcall(H_VASI_STATE, retbuf,
-((u64)args-args[0]  32) | args-args[1]);
+   rc = plpar_hcall(H_VASI_STATE, retbuf, handle);
 
state = retbuf[0];
 
@@ -920,12 +919,12 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
printk(KERN_ERR rtas_ibm_suspend_me: vasi_state returned 
%ld\n,rc);
return rc;
} else if (state == H_VASI_ENABLED) {
-   args-args[args-nargs] = RTAS_NOT_SUSPENDABLE;
+   *vasi_return = RTAS_NOT_SUSPENDABLE;
return 0;
} else if (state != H_VASI_SUSPENDING) {
printk(KERN_ERR rtas_ibm_suspend_me: vasi_state returned state 
%ld\n,
   state);
-   args-args[args-nargs] = -1;
+   *vasi_return = -1;
return 0;
}
 
@@ -973,7 +972,7 @@ out:
return atomic_read(data.error);
 }
 #else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me(struct rtas_args *args)
+int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 {
return -ENOSYS;
 }
@@ -1053,7 +1052,16 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
 
/* Need to handle ibm,suspend_me call specially */
if (token == ibm_suspend_me_token) {
-   rc = rtas_ibm_suspend_me(args);
+
+   /*
+* rtas_ibm_suspend_me assumes args are in cpu endian, or at 
least the
+* hcall within it requires it.
+*/
+   int vasi_rc = 0;
+   u64 handle = ((u64)be32_to_cpu(args.args[0])  32)
+ | be32_to_cpu(args.args[1]);
+   rc = rtas_ibm_suspend_me(handle, vasi_rc);
+   args.rets[0] = cpu_to_be32(vasi_rc);
if (rc)
return rc;
goto copy_return;
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index e7cb6d4..90cf3dc 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -316,34 +316,24 @@ void post_mobility_fixup(void)
 static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
 const char *buf, size_t count)
 {
-   struct rtas_args args;
u64 streamid;
int rc;
+   int vasi_rc = 0;
 
rc = kstrtou64(buf, 0, streamid);
if (rc)
return rc;
 
-   memset(args, 0, sizeof(args));
-   args.token = rtas_token(ibm,suspend-me);
-   args.nargs = 2;
-   args.nret = 1;
-
-   args.args[0] = streamid  32 ;
-   args.args[1] = streamid  0x;
-   args.rets = args.args[args.nargs];
-
do {
-   args.rets[0] = 0;
-   rc

Re: [PATCH 2/3] powerpc/pseries: Little endian fixes for post mobility device tree update

2015-03-03 Thread Cyril Bur
On Tue, 2015-03-03 at 15:15 -0800, Tyrel Datwyler wrote:
 On 03/02/2015 01:49 PM, Tyrel Datwyler wrote:
  On 03/01/2015 09:20 PM, Cyril Bur wrote:
  On Fri, 2015-02-27 at 18:24 -0800, Tyrel Datwyler wrote:
  We currently use the device tree update code in the kernel after resuming
  from a suspend operation to re-sync the kernels view of the device tree 
  with
  that of the hypervisor. The code as it stands is not endian safe as it 
  relies
  on parsing buffers returned by RTAS calls that thusly contains data in big
  endian format.
 
  This patch annotates variables and structure members with __be types as 
  well
  as performing necessary byte swaps to cpu endian for data that needs to be
  parsed.
 
  Signed-off-by: Tyrel Datwyler tyr...@linux.vnet.ibm.com
  ---
   arch/powerpc/platforms/pseries/mobility.c | 36 
  ---
   1 file changed, 19 insertions(+), 17 deletions(-)
 
  diff --git a/arch/powerpc/platforms/pseries/mobility.c 
  b/arch/powerpc/platforms/pseries/mobility.c
  index 29e4f04..0b1f70e 100644
  --- a/arch/powerpc/platforms/pseries/mobility.c
  +++ b/arch/powerpc/platforms/pseries/mobility.c
  @@ -25,10 +25,10 @@
   static struct kobject *mobility_kobj;
   
   struct update_props_workarea {
  - u32 phandle;
  - u32 state;
  - u64 reserved;
  - u32 nprops;
  + __be32 phandle;
  + __be32 state;
  + __be64 reserved;
  + __be32 nprops;
   } __packed;
   
   #define NODE_ACTION_MASK 0xff00
  @@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, 
  struct property **prop,
return 0;
   }
   
  -static int update_dt_node(u32 phandle, s32 scope)
  +static int update_dt_node(__be32 phandle, s32 scope)
   {
 
  On line 153 of this function:
 dn = of_find_node_by_phandle(phandle);
 
  You're passing a __be32 to device tree code, if we can treat the phandle
  as a opaque value returned to us from the rtas call and pass it around
  like that then all good.
 
 After digging deeper the device_node-phandle is stored in cpu endian
 under the covers. So, for the of_find_node_by_phandle() we do need to
 convert the phandle to cpu endian first. It appears I got lucky with the
 update fixing the observed RMC issue because the phandle for the root
 node seems to always be 0x.
 
I think we've both switched opinions here, initially I thought an endian
conversion was necessary but turns out that all of_find_node_by_phandle
really does is:
   for_each_of_allnodes(np)
  if (np-phandle == handle)
 break;
   of_node_get(np);

The == is safe either way and I think the of code might be trying to
imply that it doesn't matter by having a typedefed type 'phandle'.

I'm still digging around, we want to get this right!


Cyril
 -Tyrel
 
  
  Yes, of_find_node_by_phandle directly compares phandle passed in against
  the handle stored in each device_node when searching for a matching
  node. Since, the device tree is big endian it follows that the big
  endian phandle received in the rtas buffer needs no conversion.
  
  Further, we need to pass the phandle to ibm,update-properties in the
  work area which is also required to be big endian. So, again it seemed
  that converting to cpu endian was a waste of effort just to convert it
  back to big endian.
  
  Its also hard to be sure if these need to be BE and have always been
  that way because we've always run BE so they've never actually wanted
  CPU endian its just that CPU endian has always been BE (I think I
  started rambling...)
 
  Just want to check that *not* converting them is done on purpose.
  
  Yes, I explicitly did not convert them on purpose. As mentioned above we
  need phandle in BE for the ibm,update-properties rtas work area.
  Similarly, drc_index needs to be in BE for the ibm,configure-connector
  rtas work area. Outside, of that we do no other manipulation of those
  values.
  
 
  And having read on, I'm assuming the answer is yes since this
  observation is true for your changes which affect:
 delete_dt_node()
 update_dt_node()
  add_dt_node()
  Worth noting that you didn't change the definition of delete_dt_node()
  
  You are correct. Oversight. I will fix that as it should generate a
  sparse complaint.
  
  -Tyrel
  
 
  I'll have a look once you address the non compiling in patch 1/3 (I'm
  getting blocked the unused var because somehow Werror is on, odd it
  didn't trip you up) but I also suspect this will have sparse go a bit
  nuts. 
  I wonder if there is a nice way of shutting sparse up.
 
struct update_props_workarea *upwa;
struct device_node *dn;
  @@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
char *prop_data;
char *rtas_buf;
int update_properties_token;
  + u32 nprops;
u32 vd;
   
update_properties_token = rtas_token(ibm,update-properties);
  @@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
break;
   
prop_data = rtas_buf + sizeof(*upwa

Re: [PATCH 2/3] powerpc/pseries: Little endian fixes for post mobility device tree update

2015-03-01 Thread Cyril Bur
On Fri, 2015-02-27 at 18:24 -0800, Tyrel Datwyler wrote:
 We currently use the device tree update code in the kernel after resuming
 from a suspend operation to re-sync the kernels view of the device tree with
 that of the hypervisor. The code as it stands is not endian safe as it relies
 on parsing buffers returned by RTAS calls that thusly contains data in big
 endian format.
 
 This patch annotates variables and structure members with __be types as well
 as performing necessary byte swaps to cpu endian for data that needs to be
 parsed.
 
 Signed-off-by: Tyrel Datwyler tyr...@linux.vnet.ibm.com
 ---
  arch/powerpc/platforms/pseries/mobility.c | 36 
 ---
  1 file changed, 19 insertions(+), 17 deletions(-)
 
 diff --git a/arch/powerpc/platforms/pseries/mobility.c 
 b/arch/powerpc/platforms/pseries/mobility.c
 index 29e4f04..0b1f70e 100644
 --- a/arch/powerpc/platforms/pseries/mobility.c
 +++ b/arch/powerpc/platforms/pseries/mobility.c
 @@ -25,10 +25,10 @@
  static struct kobject *mobility_kobj;
  
  struct update_props_workarea {
 - u32 phandle;
 - u32 state;
 - u64 reserved;
 - u32 nprops;
 + __be32 phandle;
 + __be32 state;
 + __be64 reserved;
 + __be32 nprops;
  } __packed;
  
  #define NODE_ACTION_MASK 0xff00
 @@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, 
 struct property **prop,
   return 0;
  }
  
 -static int update_dt_node(u32 phandle, s32 scope)
 +static int update_dt_node(__be32 phandle, s32 scope)
  {

On line 153 of this function:
   dn = of_find_node_by_phandle(phandle);

You're passing a __be32 to device tree code, if we can treat the phandle
as a opaque value returned to us from the rtas call and pass it around
like that then all good.
Its also hard to be sure if these need to be BE and have always been
that way because we've always run BE so they've never actually wanted
CPU endian its just that CPU endian has always been BE (I think I
started rambling...)

Just want to check that *not* converting them is done on purpose.

And having read on, I'm assuming the answer is yes since this
observation is true for your changes which affect:
delete_dt_node()
update_dt_node()
add_dt_node()
Worth noting that you didn't change the definition of delete_dt_node()

I'll have a look once you address the non compiling in patch 1/3 (I'm
getting blocked the unused var because somehow Werror is on, odd it
didn't trip you up) but I also suspect this will have sparse go a bit
nuts. 
I wonder if there is a nice way of shutting sparse up.

   struct update_props_workarea *upwa;
   struct device_node *dn;
 @@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
   char *prop_data;
   char *rtas_buf;
   int update_properties_token;
 + u32 nprops;
   u32 vd;
  
   update_properties_token = rtas_token(ibm,update-properties);
 @@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
   break;
  
   prop_data = rtas_buf + sizeof(*upwa);
 + nprops = be32_to_cpu(upwa-nprops);
  
   /* On the first call to ibm,update-properties for a node the
* the first property value descriptor contains an empty
 @@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope)
*/
   if (*prop_data == 0) {
   prop_data++;
 - vd = *(u32 *)prop_data;
 + vd = be32_to_cpu(*(__be32 *)prop_data);
   prop_data += vd + sizeof(vd);
 - upwa-nprops--;
 + nprops--;
   }
  
 - for (i = 0; i  upwa-nprops; i++) {
 + for (i = 0; i  nprops; i++) {
   char *prop_name;
  
   prop_name = prop_data;
   prop_data += strlen(prop_name) + 1;
 - vd = *(u32 *)prop_data;
 + vd = be32_to_cpu(*(__be32 *)prop_data);
   prop_data += sizeof(vd);
  
   switch (vd) {
 @@ -212,7 +214,7 @@ static int update_dt_node(u32 phandle, s32 scope)
   return 0;
  }
  
 -static int add_dt_node(u32 parent_phandle, u32 drc_index)
 +static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
  {
   struct device_node *dn;
   struct device_node *parent_dn;
 @@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
  int pseries_devicetree_update(s32 scope)
  {
   char *rtas_buf;
 - u32 *data;
 + __be32 *data;
   int update_nodes_token;
   int rc;
  
 @@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope)
   if (rc  rc != 1)
   break;
  
 - data = (u32 *)rtas_buf + 4;
 - while (*data  NODE_ACTION_MASK) {
 + data = (__be32 *)rtas_buf + 4;
 + while (be32_to_cpu(*data)  

Re: [PATCH 1/3] powerpc/pseries: Simplify check for suspendability during suspend/migration

2015-03-01 Thread Cyril Bur
On Fri, 2015-02-27 at 18:24 -0800, Tyrel Datwyler wrote:
 During suspend/migration operation we must wait for the VASI state reported
 by the hypervisor to become Suspending prior to making the ibm,suspend-me
 RTAS call. Calling routines to rtas_ibm_supend_me() pass a vasi_state variable
 that exposes the VASI state to the caller. This is unnecessary as the caller
 only really cares about the following three conditions; if there is an error
 we should bailout, success indicating we have suspended and woken back up so
 proceed to device tree updated, or we are not suspendable yet so try calling
 rtas_ibm_suspend_me again shortly.
 
 This patch removes the extraneous vasi_state variable and simply uses the
 return code to communicate how to proceed. We either succeed, fail, or get
 -EAGAIN in which case we sleep for a second before trying to call
 rtas_ibm_suspend_me again.
 
 Signed-off-by: Tyrel Datwyler tyr...@linux.vnet.ibm.com
 ---
  arch/powerpc/include/asm/rtas.h   |  2 +-
  arch/powerpc/kernel/rtas.c| 15 +++
  arch/powerpc/platforms/pseries/mobility.c |  8 +++-
  3 files changed, 11 insertions(+), 14 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
 index 2e23e92..fc85eb0 100644
 --- a/arch/powerpc/include/asm/rtas.h
 +++ b/arch/powerpc/include/asm/rtas.h
 @@ -327,7 +327,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data 
 *data);
  extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
  extern int rtas_online_cpus_mask(cpumask_var_t cpus);
  extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
 -extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return);
 +extern int rtas_ibm_suspend_me(u64 handle);
  
I like ditching vasi_return, I was never happy with myself for doing
that!

  struct rtc_time;
  extern unsigned long rtas_get_boot_time(void);
 diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
 index 21c45a2..603b928 100644
 --- a/arch/powerpc/kernel/rtas.c
 +++ b/arch/powerpc/kernel/rtas.c
 @@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus)
  }
  EXPORT_SYMBOL(rtas_offline_cpus_mask);
  
 -int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 +int rtas_ibm_suspend_me(u64 handle)

That definition is actually in an #ifdef CONFIG_PPC_PSERIES, you'll need
to change the definition for !CONFIG_PPC_PSERIES
  {
   long state;
   long rc;
 @@ -919,13 +919,11 @@ int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
   printk(KERN_ERR rtas_ibm_suspend_me: vasi_state returned 
 %ld\n,rc);
   return rc;
   } else if (state == H_VASI_ENABLED) {
 - *vasi_return = RTAS_NOT_SUSPENDABLE;
 - return 0;
 + return -EAGAIN;
   } else if (state != H_VASI_SUSPENDING) {
   printk(KERN_ERR rtas_ibm_suspend_me: vasi_state returned state 
 %ld\n,
  state);
 - *vasi_return = -1;
 - return 0;
 + return -EIO;

I've had a look as to how these return values get passed back up the
stack and admittedly were dealing with a confusing mess, I've compared
back to before my patch (which wasn't perfect either it seems).
Both the state == H_VASI_ENABLED and state == H_VASI_SUSPENDING cause
ppc_rtas to go to the copy_return and return 0 (albeit with an error
code in args.rets[0]), because rtas_ppc goes back to out userland, I
hesitate to change any of that.
   }
  
   if (!alloc_cpumask_var(offline_mask, GFP_TEMPORARY))
 @@ -1060,9 +1058,10 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
   int vasi_rc = 0;

This generates unused variable warning.

   u64 handle = ((u64)be32_to_cpu(args.args[0])  32)
 | be32_to_cpu(args.args[1]);
 - rc = rtas_ibm_suspend_me(handle, vasi_rc);
 - args.rets[0] = cpu_to_be32(vasi_rc);
 - if (rc)
 + rc = rtas_ibm_suspend_me(handle);
 + if (rc == -EAGAIN)
 + args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);

(continuing on...) so perhaps here have
rc = 0;
else if (rc == -EIO)
args.rets[0] = cpu_to_be32(-1);
rc = 0;
Which should keep the original behaviour, the last thing we want to do
is break BE.

Might be worth checking that rc from rtas_ibm_suspend_me will only be
-EAGAIN and -EIO when they are explicitly set in rtas_ibm_suspend_me and
can't come back out from the hcall.
From reading PAPR we're ok there but just as a thought it might be worth
returning errno as positive because hcall errors are going to be
negative, to make life easier at some point... but then we'll have to
remember to make them negative when going back to userland (and there
are two places...) so there's no perfect win here.

 + else if (rc)
   return rc;
   goto copy_return;
   }
 diff --git 

Re: [PATCH v2] powerpc/pseries: Simplify check for suspendability during suspend/migration

2015-03-23 Thread Cyril Bur
On Wed, 2015-03-04 at 12:22 -0800, Tyrel Datwyler wrote:
 During suspend/migration operation we must wait for the VASI state reported
 by the hypervisor to become Suspending prior to making the ibm,suspend-me
 RTAS call. Calling routines to rtas_ibm_supend_me() pass a vasi_state variable
 that exposes the VASI state to the caller. This is unnecessary as the caller
 only really cares about the following three conditions; if there is an error
 we should bailout, success indicating we have suspended and woken back up so
 proceed to device tree updated, or we are not suspendable yet so try calling
 rtas_ibm_suspend_me again shortly.
 
 This patch removes the extraneous vasi_state variable and simply uses the
 return code to communicate how to proceed. We either succeed, fail, or get
 -EAGAIN in which case we sleep for a second before trying to call
 rtas_ibm_suspend_me again.
 
Hi Tyrel, sorry this fell off my radar. Thanks for addressing all those
issues.

 Signed-off-by: Tyrel Datwyler tyr...@linux.vnet.ibm.com
 Cc: Nathan Fontenot nf...@linux.vnet.ibm.com
 Cc: Cyril Bur cyril...@gmail.com
 ---
 
 Changes in v2:
 - Addressed Cyril's comments as follow:
 - Removed unused vasi_rc variable
 - Kept return behavior of ppc_rtas the same in the case of VASI error

Looks good for ppc_rtas(). Still changing a return value (pointed out
below) in migrate_store(), we might be ok with that since its the sysfs
file but mentioning it in the commit message would be a good idea.

 - Updated rtas_ibm_suspend_me function definition for !CONFIG_PPC_PSERIES
 

Apart from that potential non problem, looks good to me.

Reviewed-by: Cyril Bur cyril...@gmail.com

  arch/powerpc/include/asm/rtas.h   |  2 +-
  arch/powerpc/kernel/rtas.c| 26 +-
  arch/powerpc/platforms/pseries/mobility.c |  9 +++--
  3 files changed, 17 insertions(+), 20 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
 index 2e23e92..fc85eb0 100644
 --- a/arch/powerpc/include/asm/rtas.h
 +++ b/arch/powerpc/include/asm/rtas.h
 @@ -327,7 +327,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data 
 *data);
  extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
  extern int rtas_online_cpus_mask(cpumask_var_t cpus);
  extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
 -extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return);
 +extern int rtas_ibm_suspend_me(u64 handle);
  
  struct rtc_time;
  extern unsigned long rtas_get_boot_time(void);
 diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
 index 21c45a2..b9a7b89 100644
 --- a/arch/powerpc/kernel/rtas.c
 +++ b/arch/powerpc/kernel/rtas.c
 @@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus)
  }
  EXPORT_SYMBOL(rtas_offline_cpus_mask);
  
 -int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 +int rtas_ibm_suspend_me(u64 handle)
  {
   long state;
   long rc;
 @@ -919,13 +919,11 @@ int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
   printk(KERN_ERR rtas_ibm_suspend_me: vasi_state returned 
 %ld\n,rc);
   return rc;
   } else if (state == H_VASI_ENABLED) {
 - *vasi_return = RTAS_NOT_SUSPENDABLE;
 - return 0;
 + return -EAGAIN;
   } else if (state != H_VASI_SUSPENDING) {
   printk(KERN_ERR rtas_ibm_suspend_me: vasi_state returned state 
 %ld\n,
  state);
 - *vasi_return = -1;
 - return 0;
 + return -EIO;
   }
  
   if (!alloc_cpumask_var(offline_mask, GFP_TEMPORARY))
 @@ -972,7 +970,7 @@ out:
   return atomic_read(data.error);
  }
  #else /* CONFIG_PPC_PSERIES */
 -int rtas_ibm_suspend_me(u64 handle, int *vasi_return)
 +int rtas_ibm_suspend_me(u64 handle)
  {
   return -ENOSYS;
  }
 @@ -1022,7 +1020,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
   unsigned long flags;
   char *buff_copy, *errbuf = NULL;
   int nargs, nret, token;
 - int rc;
  
   if (!capable(CAP_SYS_ADMIN))
   return -EPERM;
 @@ -1054,15 +1051,18 @@ asmlinkage int ppc_rtas(struct rtas_args __user 
 *uargs)
   if (token == ibm_suspend_me_token) {
  
   /*
 -  * rtas_ibm_suspend_me assumes args are in cpu endian, or at 
 least the
 -  * hcall within it requires it.
 +  * rtas_ibm_suspend_me assumes the streamid handle is in cpu
 +  * endian, or at least the hcall within it requires it.
*/
 - int vasi_rc = 0;
 + int rc = 0;
   u64 handle = ((u64)be32_to_cpu(args.args[0])  32)
 | be32_to_cpu(args.args[1]);
 - rc = rtas_ibm_suspend_me(handle, vasi_rc);
 - args.rets[0] = cpu_to_be32(vasi_rc);
 - if (rc)
 + rc = rtas_ibm_suspend_me(handle);
 + if (rc == -EAGAIN

[PATCH] Add a MTD driver for OpenPower PNOR flash

2015-05-04 Thread Cyril Bur
Hi,

I'm resending the patch that Jeremy Kerr sent a while back.

This patch implements a simple mtd device to allow access to the PNOR
flash on OpenPower machines. The flash is accessed through firmware
calls.

The firmware calls are already merged in:
commit ed59190e41b725e1cfd79541f5fc66c20adb0671
Author: Cyril Bur cyril...@gmail.com
Date:   Wed Apr 1 14:05:30 2015 +0800

powerpc/powernv: Add interfaces for flash device access

Cheers,

Cyril



Cyril Bur (1):
  drivers/mtd: add powernv flash MTD abstraction driver

 drivers/mtd/devices/Kconfig |   6 +
 drivers/mtd/devices/Makefile|   1 +
 drivers/mtd/devices/powernv_flash.c | 288 
 3 files changed, 295 insertions(+)
 create mode 100644 drivers/mtd/devices/powernv_flash.c

-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] drivers/mtd: add powernv flash MTD abstraction driver

2015-05-04 Thread Cyril Bur
Powerpc powernv platforms allow access to certain system flash devices
through a firmwarwe interface. This change adds an mtd driver for these
flash devices.

Minor updates from Jeremy Kerr and Joel Stanley.

Signed-off-by: Cyril Bur cyril...@gmail.com
Signed-off-by: Joel Stanley j...@jms.id.au
Signed-off-by: Jeremy Kerr j...@ozlabs.org
---
 drivers/mtd/devices/Kconfig |   6 +
 drivers/mtd/devices/Makefile|   1 +
 drivers/mtd/devices/powernv_flash.c | 288 
 3 files changed, 295 insertions(+)
 create mode 100644 drivers/mtd/devices/powernv_flash.c

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index c49d0b1..5065e7c 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -195,6 +195,12 @@ config MTD_BLOCK2MTD
  Testing MTD users (eg JFFS2) on large media and media that might
  be removed during a write (using the floppy drive).
 
+config MTD_POWERNV_FLASH
+   tristate powernv flash MTD driver
+   depends on PPC_POWERNV
+   help
+ This provides an MTD device for flash on powernv OPAL platforms
+
 comment Disk-On-Chip Device Drivers
 
 config MTD_DOCG3
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index f0b0e61..7912d3a 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI)   += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)   += sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)+= bcm47xxsflash.o
 obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
+obj-$(CONFIG_MTD_POWERNV_FLASH)+= powernv_flash.o
 
 
 CFLAGS_docg3.o += -I$(src)
diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
new file mode 100644
index 000..18f8a19
--- /dev/null
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -0,0 +1,288 @@
+/*
+ * OPAL PNOR flash MTD abstraction
+ *
+ * IBM 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/errno.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/platform_device.h
+#include linux/string.h
+#include linux/slab.h
+#include linux/mtd/mtd.h
+#include linux/mtd/partitions.h
+
+#include linux/debugfs.h
+#include linux/seq_file.h
+
+#include asm/opal.h
+
+
+/*
+ * This driver creates the a Linux MTD abstraction for platform PNOR flash
+ * backed by OPAL calls
+ */
+
+struct powernv_flash {
+   struct mtd_info mtd;
+   uint64_tid;
+};
+
+enum flash_op {
+   FLASH_OP_READ,
+   FLASH_OP_WRITE,
+   FLASH_OP_ERASE,
+};
+
+static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
+   loff_t offset, size_t len, size_t *retlen, u_char *buf)
+{
+   struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
+   struct device *dev = mtd-dev;
+   int token;
+   struct opal_msg msg;
+   int rc;
+
+   dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
+   __func__, op, offset, len);
+
+   token = opal_async_get_token_interruptible();
+   if (token  0) {
+   dev_err(dev, Failed to get an async token\n);
+   return -ENOMEM;
+   }
+
+   switch (op) {
+   case FLASH_OP_READ:
+   rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_WRITE:
+   rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_ERASE:
+   rc = opal_flash_erase(info-id, offset, len, token);
+   break;
+   default:
+   BUG_ON(1);
+   }
+
+   if (rc != OPAL_ASYNC_COMPLETION) {
+   dev_err(dev, opal_flash_async_op(op=%d) failed (rc %d)\n,
+   op, rc);
+   return -EIO;
+   }
+
+   rc = opal_async_wait_response(token, msg);
+   opal_async_release_token(token);
+   if (rc) {
+   dev_err(dev, opal async wait failed (rc %d)\n, rc);
+   return -EIO;
+   }
+
+   rc = be64_to_cpu(msg.params[1]);
+   if (rc == OPAL_SUCCESS) {
+   rc = 0;
+   if (retlen)
+   *retlen

[PATCH 1/2] powerpc/configs: merge pseries_defconfig and pseries_le_defconfig

2015-05-14 Thread Cyril Bur
These two configs should be identical with the exception of big or little
endian

The big endian version has XMON_DEFAULT turned on while the little endian has
XMON_DEFAULT not set. Enable XMON_DEFAULT for little endian.

Signed-off-by: Cyril Bur cyril...@gmail.com
---
 arch/powerpc/configs/pseries_le_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/configs/pseries_le_defconfig 
b/arch/powerpc/configs/pseries_le_defconfig
index 09bc96e..2497c7d 100644
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ b/arch/powerpc/configs/pseries_le_defconfig
@@ -298,6 +298,7 @@ CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc: replace pseries_le_defconfig with a Makefile target using merge_config

2015-05-14 Thread Cyril Bur
Rather than continuing to maintain a copy of pseries_defconfig with enabled
CONFIG_CPU_LITTLE_ENDIAN, use the generic merge_config script and use an
le.config to enable little endian on top of pseries_defconfig without the need
for a duplicated _defconfig file.

This method will require less maintenance in the future and will ensure that
both 'defconfigs' are always in sync.

Signed-off-by: Cyril Bur cyril...@gmail.com
---
 arch/powerpc/Makefile |   3 +
 arch/powerpc/configs/le.config|   1 +
 arch/powerpc/configs/pseries_le_defconfig | 320 --
 3 files changed, 4 insertions(+), 320 deletions(-)
 create mode 100644 arch/powerpc/configs/le.config
 delete mode 100644 arch/powerpc/configs/pseries_le_defconfig

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 07a4808..e302c2c 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -269,6 +269,9 @@ bootwrapper_install:
 %.dtb: scripts
$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
+pseries_le_defconfig: pseries_defconfig
+   $(Q)$(MAKE) le.config
+
 define archhelp
   @echo '* zImage  - Build default images selected by kernel config'
   @echo '  zImage.*- Compressed kernel image 
(arch/$(ARCH)/boot/zImage.*)'
diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config
new file mode 100644
index 000..ee43fdb
--- /dev/null
+++ b/arch/powerpc/configs/le.config
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/pseries_le_defconfig 
b/arch/powerpc/configs/pseries_le_defconfig
deleted file mode 100644
index 2497c7d..000
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ /dev/null
@@ -1,320 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2048
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_USER_NS=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
-# CONFIG_PPC_PMAC is not set
-CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
-CONFIG_HZ_100=y
-CONFIG_BINFMT_MISC=m
-CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_KEXEC=y
-CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
-CONFIG_SCHED_SMT=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_NET_IPIP=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_ADVANCED is not set
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_UEVENT_HELPER_PATH=/sbin/hotplug
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_BE2ISCSI=m
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_IBMVSCSI=y
-CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-CONFIG_SCSI_IPR=y
-CONFIG_SCSI_QLA_FC=m
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-# CONFIG_ATA_SFF is not set
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_MD_RAID0=y
-CONFIG_MD_RAID1=y
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_UEVENT=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_MACVLAN

Re: [PATCH 0/2] Add a MTD driver for OpenPower PNOR flash

2015-04-12 Thread Cyril Bur
On Wed, 2015-04-01 at 14:05 +0800, Jeremy Kerr wrote:
 Hi all,
 
 This series implements a simple mtd device to allow access to the PNOR
 flash on OpenPower machines. The flash is accessed through firmware
 calls.
 
 Patch 1/2 adds the Linux interface to these calls. Patch 2/2 adds a mtd
 driver that uses these calls.
 
 Because there's two subsystems involved here, there are a couple of
 methods to merge this:
 
  1) The powerpc folks take 1/2, and the mtd folks take 2/2, to be
 applied once 1/2 is available (Michael has created a topic branch for
 this in the past, which can make things a little easier), or
 
  2) One of the maintainers takes both patches, once the other has acked
 their patch. I'd suggest that it goes through the powerpc tree in
 this case, as that will be less likely to conflict.
 
Ping about this patchset. What do the MTD maintainers think?

Hi David,

Jeremy mentioned you might be interested in seeing these.


Thanks,

Cyril

 Either way, I'm happy.
 
 Cheers,
 
   
 Jeremy
 
 ---
 Cyril Bur (2):
   powerpc/powernv: Add interfaces for flash device access
   drivers/mtd: add powernv flash MTD abstraction driver
 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] drivers/mtd: add powernv flash MTD abstraction driver

2015-05-21 Thread Cyril Bur

On Wed, 2015-05-20 at 14:17 -0700, Brian Norris wrote:
 You might run this through checkpatch, as it caught several small
 things.
 
Hi Brian,

Oops, sorry absolutely should have done checkpatch!

Thanks for the review, everything you've said is great, I've addressed
all that - I'll post a v2.

One question though,

 On Mon, May 04, 2015 at 04:42:19PM +1000, Cyril Bur wrote:
  Powerpc powernv platforms allow access to certain system flash devices
  through a firmwarwe interface. This change adds an mtd driver for these
  flash devices.
  
  Minor updates from Jeremy Kerr and Joel Stanley.
  
  Signed-off-by: Cyril Bur cyril...@gmail.com
  Signed-off-by: Joel Stanley j...@jms.id.au
  Signed-off-by: Jeremy Kerr j...@ozlabs.org
 
 While I have Jeremy's attention, let me plug a friendly reminder for
 this unrelated comment:
 
 http://patchwork.ozlabs.org/patch/413355/
 
 Jeremy, you still haven't updated patchwork.git for your last round of
 supposed merges.
 
  ---
   drivers/mtd/devices/Kconfig |   6 +
   drivers/mtd/devices/Makefile|   1 +
   drivers/mtd/devices/powernv_flash.c | 288 
  
   3 files changed, 295 insertions(+)
   create mode 100644 drivers/mtd/devices/powernv_flash.c
  

[snip]

  +
  +/**
  + * powernv_flash_set_driver_info - Fill the mtd_info structure and docg3
  + * structure @pdev: The platform device
  + * @mtd: The structure to fill
  + */
  +static int __init powernv_flash_set_driver_info(struct device *dev,
  +   struct mtd_info *mtd)
  +{
  +   const __be32 *reg, *erase_size;
  +   int count;
  +
  +   erase_size = of_get_property(dev-of_node,
  +   ibm,flash-block-size, NULL);
  +   if (!erase_size) {
  +   dev_err(dev, no device property 'ibm,flash-block-size'\n);
  +   return 1;
  +   }
  +
  +   reg = of_get_property(dev-of_node, reg, count);
  +   if (count / sizeof(__be32) != 2) {
  +   dev_err(dev, couldn't get resource information count=%d\n,
  +   count);
  +   return 1;
  +   }
  +
  +   /* Going to have to check what details I need to set and how to
  +* get them */
  +   mtd-name = of_get_property(dev-of_node, name, NULL);
  +   mtd-type = MTD_NANDFLASH;
  +   mtd-flags = MTD_CAP_NANDFLASH;
 
 Is this really NAND flash? It doesn't look like it; I see no bad block
 implementation, and writesize==1.
 

Correct, but the type here is a bit misleading, we have a firmware
interface for the low level read/write/erase functions, all this driver
does is pass the calls through to firmware, there isn't much that linux
or userspace can do since it doesn't actually do the hardware accesses. 

I've checked with Jeremy, turns out the hardware is actually NOR, no
idea how I ever thought it was NAND.

Perhaps just:
mtd-type = MTD_RAM;
mtd-flags = MTD_WRITEABLE;

I would have used MTD_NOR but Jeremy confirms that the backing flash may
not always be NOR on other platforms.

I would appreciate your thoughts here.

  +   mtd-size = of_read_number(reg, 2);
 
 of_property_read_u64()?
 
  +   mtd-erasesize = of_read_number(erase_size, 1);
 
 Looking for of_property_read_u32()?
 
  +   mtd-writebufsize = mtd-writesize = 1;
  +   mtd-owner = THIS_MODULE;
  +   mtd-_erase = powernv_flash_erase;
  +   mtd-_read = powernv_flash_read;
  +   mtd-_write = powernv_flash_write;
  +   mtd-dev.parent = dev;
  +   return 0;
  +}
  +

[snip]

Thanks very much for the review,

Cyril
 
 Brian



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver

2015-06-04 Thread Cyril Bur
On Tue, 2015-06-02 at 14:26 +1000, Cyril Bur wrote:
 Powerpc powernv platforms allow access to certain system flash devices
 through a firmwarwe interface. This change adds an mtd driver for these
 flash devices.
 
 Minor updates from Jeremy Kerr and Joel Stanley.
 
 Signed-off-by: Cyril Bur cyril...@gmail.com
 Signed-off-by: Joel Stanley j...@jms.id.au
 Signed-off-by: Jeremy Kerr j...@ozlabs.org
 ---
 Hello Brian and MTD folk,
 Could I please get an ACK for Michael to take this through the powerpc
 tree.
 Thanks.
 

Hello Brian,

As we have some deadlines approaching, I am getting pressure to ensure
this gets merged upstream as quickly as possible, please let me know if
there is anything more which can be done.

Thanks very much,

Cyril
 
 V2: Address Brian Norris' review
 Fix typos
 Change from NAND flash type to NOR flash type
 Correctness tweaks
 V3: Address Neelesh Gupta's review
 Minor corrections
 Release the opal token on error
 Unregister mtd device on module remove
 ---
  drivers/mtd/devices/Kconfig |   8 +
  drivers/mtd/devices/Makefile|   1 +
  drivers/mtd/devices/powernv_flash.c | 286 
 
  3 files changed, 295 insertions(+)
  create mode 100644 drivers/mtd/devices/powernv_flash.c
 
 diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
 index c49d0b1..f73c416 100644
 --- a/drivers/mtd/devices/Kconfig
 +++ b/drivers/mtd/devices/Kconfig
 @@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
 Testing MTD users (eg JFFS2) on large media and media that might
 be removed during a write (using the floppy drive).
  
 +config MTD_POWERNV_FLASH
 + tristate powernv flash MTD driver
 + depends on PPC_POWERNV
 + help
 +   This provides an MTD device to access flash on powernv OPAL
 +   platforms from Linux. This device abstracts away the
 +   firmware interface for flash access.
 +
  comment Disk-On-Chip Device Drivers
  
  config MTD_DOCG3
 diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
 index f0b0e61..7912d3a 100644
 --- a/drivers/mtd/devices/Makefile
 +++ b/drivers/mtd/devices/Makefile
 @@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o
  obj-$(CONFIG_MTD_SST25L) += sst25l.o
  obj-$(CONFIG_MTD_BCM47XXSFLASH)  += bcm47xxsflash.o
  obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
 +obj-$(CONFIG_MTD_POWERNV_FLASH)  += powernv_flash.o
  
 
  CFLAGS_docg3.o   += -I$(src)
 diff --git a/drivers/mtd/devices/powernv_flash.c 
 b/drivers/mtd/devices/powernv_flash.c
 new file mode 100644
 index 000..777e09f
 --- /dev/null
 +++ b/drivers/mtd/devices/powernv_flash.c
 @@ -0,0 +1,286 @@
 +/*
 + * OPAL PNOR flash MTD abstraction
 + *
 + * IBM 2015
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + */
 +
 +#include linux/kernel.h
 +#include linux/module.h
 +#include linux/errno.h
 +#include linux/of.h
 +#include linux/of_address.h
 +#include linux/platform_device.h
 +#include linux/string.h
 +#include linux/slab.h
 +#include linux/mtd/mtd.h
 +#include linux/mtd/partitions.h
 +
 +#include linux/debugfs.h
 +#include linux/seq_file.h
 +
 +#include asm/opal.h
 +
 +
 +/*
 + * This driver creates the a Linux MTD abstraction for platform PNOR flash
 + * backed by OPAL calls
 + */
 +
 +struct powernv_flash {
 + struct mtd_info mtd;
 + u32 id;
 +};
 +
 +enum flash_op {
 + FLASH_OP_READ,
 + FLASH_OP_WRITE,
 + FLASH_OP_ERASE,
 +};
 +
 +static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
 + loff_t offset, size_t len, size_t *retlen, u_char *buf)
 +{
 + struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
 + struct device *dev = mtd-dev;
 + int token;
 + struct opal_msg msg;
 + int rc;
 +
 + dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
 + __func__, op, offset, len);
 +
 + token = opal_async_get_token_interruptible();
 + if (token  0) {
 + if (token != -ERESTARTSYS)
 + dev_err(dev, Failed to get an async token\n);
 +
 + return token;
 + }
 +
 + switch (op) {
 + case FLASH_OP_READ:
 + rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
 + break;
 + case FLASH_OP_WRITE:
 + rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
 + break;
 + case FLASH_OP_ERASE:
 + rc = opal_flash_erase(info-id, offset, len, token

[PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver

2015-05-29 Thread Cyril Bur
Powerpc powernv platforms allow access to certain system flash devices
through a firmwarwe interface. This change adds an mtd driver for these
flash devices.

Minor updates from Jeremy Kerr and Joel Stanley.

Signed-off-by: Cyril Bur cyril...@gmail.com
Signed-off-by: Joel Stanley j...@jms.id.au
Signed-off-by: Jeremy Kerr j...@ozlabs.org
---
V2: Address Brian Norris' review
Fix typos
Change from NAND flash type to NOR flash type
Correctness tweaks
V3: Address Neelesh Gupta's review
Minor corrections
Release the opal token on error
Unregister mtd device on module remove
---
 drivers/mtd/devices/Kconfig |   8 +
 drivers/mtd/devices/Makefile|   1 +
 drivers/mtd/devices/powernv_flash.c | 286 
 3 files changed, 295 insertions(+)
 create mode 100644 drivers/mtd/devices/powernv_flash.c

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index c49d0b1..f73c416 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
  Testing MTD users (eg JFFS2) on large media and media that might
  be removed during a write (using the floppy drive).
 
+config MTD_POWERNV_FLASH
+   tristate powernv flash MTD driver
+   depends on PPC_POWERNV
+   help
+ This provides an MTD device to access flash on powernv OPAL
+ platforms from Linux. This device abstracts away the
+ firmware interface for flash access.
+
 comment Disk-On-Chip Device Drivers
 
 config MTD_DOCG3
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index f0b0e61..7912d3a 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI)   += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)   += sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)+= bcm47xxsflash.o
 obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
+obj-$(CONFIG_MTD_POWERNV_FLASH)+= powernv_flash.o
 
 
 CFLAGS_docg3.o += -I$(src)
diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
new file mode 100644
index 000..036d5b7
--- /dev/null
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -0,0 +1,286 @@
+/*
+ * OPAL PNOR flash MTD abstraction
+ *
+ * IBM 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/errno.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/platform_device.h
+#include linux/string.h
+#include linux/slab.h
+#include linux/mtd/mtd.h
+#include linux/mtd/partitions.h
+
+#include linux/debugfs.h
+#include linux/seq_file.h
+
+#include asm/opal.h
+
+
+/*
+ * This driver creates the a Linux MTD abstraction for platform PNOR flash
+ * backed by OPAL calls
+ */
+
+struct powernv_flash {
+   struct mtd_info mtd;
+   u32 id;
+};
+
+enum flash_op {
+   FLASH_OP_READ,
+   FLASH_OP_WRITE,
+   FLASH_OP_ERASE,
+};
+
+static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
+   loff_t offset, size_t len, size_t *retlen, u_char *buf)
+{
+   struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
+   struct device *dev = mtd-dev;
+   int token;
+   struct opal_msg msg;
+   int rc;
+
+   dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
+   __func__, op, offset, len);
+
+   token = opal_async_get_token_interruptible();
+   if (token  0) {
+   if (token != -ERESTARTSYS)
+   dev_err(dev, Failed to get an async token\n);
+
+   return token;
+   }
+
+   switch (op) {
+   case FLASH_OP_READ:
+   rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_WRITE:
+   rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_ERASE:
+   rc = opal_flash_erase(info-id, offset, len, token);
+   break;
+   default:
+   BUG_ON(1);
+   }
+
+   if (rc != OPAL_ASYNC_COMPLETION) {
+   dev_err(dev, opal_flash_async_op(op=%d) failed (rc %d)\n,
+   op, rc);
+   opal_async_release_token(token);
+   return -EIO;
+   }
+
+   rc = opal_async_wait_response(token, msg);
+   opal_async_release_token(token);
+   if (rc) {
+   dev_err(dev

Re: [PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver

2015-05-31 Thread Cyril Bur
On Fri, 2015-05-29 at 14:52 +0530, Neelesh Gupta wrote:
 
 [...]
 
  +/**
  + * @mtd: the device
  + * @erase: the erase info
  + * Returns 0 if erase successful or -ERRNO if an error occurred
  + */
  +static int powernv_flash_erase(struct mtd_info *mtd, struct erase_info 
  *erase)
  +{
  +   int rc;
  +
  +   erase-state = MTD_ERASING;
  +
  +   /* todo: register our own notifier to do a true async implementation */
  +   rc =  powernv_flash_async_op(mtd, FLASH_OP_ERASE, erase-addr,
  +   erase-len, NULL, NULL);
  +
  +   if (rc) {
  +   erase-fail_addr = erase-addr;
  +   erase-state = MTD_ERASE_FAILED;
  +   } else {
  +   erase-state = MTD_ERASE_DONE;
  +   }
  +   mtd_erase_callback(erase);
 
 return rc ? You also document the same  ' or -ERRNO if an error
 occurred'
 
Good catch, I'll amend.

  +   return 0;
  +}
  +
  +/**
  + * powernv_flash_set_driver_info - Fill the mtd_info structure and docg3
  + * structure @pdev: The platform device
  + * @mtd: The structure to fill
  + */
  +static int powernv_flash_set_driver_info(struct device *dev,
  +   struct mtd_info *mtd)
  +{
  +   u64 size;
  +   u32 erase_size;
  +   int rc;
  +
  +   rc = of_property_read_u32(dev-of_node, ibm,flash-block-size,
  +   erase_size);
  +   if (rc) {
  +   dev_err(dev, couldn't get resource block size information\n);
  +   return rc;
  +   }
  +
  +   rc = of_property_read_u64(dev-of_node, reg, size);
  +   if (rc) {
  +   dev_err(dev, couldn't get resource size information\n);
  +   return rc;
  +   }
  +
  +   /*
  +* Going to have to check what details I need to set and how to
  +* get them
  +*/
  +   mtd-name = of_get_property(dev-of_node, name, NULL);
  +   mtd-type = MTD_NORFLASH;
  +   mtd-flags = MTD_WRITEABLE;
  +   mtd-size = size;
  +   mtd-erasesize = erase_size;
  +   mtd-writebufsize = mtd-writesize = 1;
  +   mtd-owner = THIS_MODULE;
  +   mtd-_erase = powernv_flash_erase;
  +   mtd-_read = powernv_flash_read;
  +   mtd-_write = powernv_flash_write;
  +   mtd-dev.parent = dev;
  +   return 0;
  +}
  +
  +/**
  + * powernv_flash_probe
  + * @pdev: platform device
  + *
  + * Returns 0 on success, -ENOMEM, -ENXIO on error
  + */
  +static int powernv_flash_probe(struct platform_device *pdev)
  +{
  +   struct device *dev = pdev-dev;
  +   struct powernv_flash *data;
  +   int ret;
  +
  +   data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
  +   if (!data) {
  +   ret = -ENOMEM;
  +   goto out;
  +   }
  +   data-mtd.priv = data;
 
 'mtd' is contained within the 'data' so you can cast 'mtd' to get the
 'data'
 anywhere you want using container_of() macro.. 'priv' can be used to
 pass
 an unrelated structure   just a thought, you may ignore it.. :)

Yeah, I think I couldn't agree with myself when I wrote and I figured
there might be something I'd want to use priv for. There never was, that
stayed. I realised it got quite circular and there are now many ways of
getting back to data, I can't see any harm in leaving it like that,
except the strangeness of it.

Thanks,

Cyril
 Rest looks ok.
 
 Neelesh.
 
 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver

2015-05-31 Thread Cyril Bur
Powerpc powernv platforms allow access to certain system flash devices
through a firmwarwe interface. This change adds an mtd driver for these
flash devices.

Minor updates from Jeremy Kerr and Joel Stanley.

Signed-off-by: Cyril Bur cyril...@gmail.com
Signed-off-by: Joel Stanley j...@jms.id.au
Signed-off-by: Jeremy Kerr j...@ozlabs.org
---
V2: Address Brian Norris' review
Fix typos
Change from NAND flash type to NOR flash type
Correctness tweaks
V3: Address Neelesh Gupta's review
Minor corrections
Release the opal token on error
Unregister mtd device on module remove
---
 drivers/mtd/devices/Kconfig |   8 +
 drivers/mtd/devices/Makefile|   1 +
 drivers/mtd/devices/powernv_flash.c | 286 
 3 files changed, 295 insertions(+)
 create mode 100644 drivers/mtd/devices/powernv_flash.c

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index c49d0b1..f73c416 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
  Testing MTD users (eg JFFS2) on large media and media that might
  be removed during a write (using the floppy drive).
 
+config MTD_POWERNV_FLASH
+   tristate powernv flash MTD driver
+   depends on PPC_POWERNV
+   help
+ This provides an MTD device to access flash on powernv OPAL
+ platforms from Linux. This device abstracts away the
+ firmware interface for flash access.
+
 comment Disk-On-Chip Device Drivers
 
 config MTD_DOCG3
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index f0b0e61..7912d3a 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI)   += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)   += sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)+= bcm47xxsflash.o
 obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
+obj-$(CONFIG_MTD_POWERNV_FLASH)+= powernv_flash.o
 
 
 CFLAGS_docg3.o += -I$(src)
diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
new file mode 100644
index 000..777e09f
--- /dev/null
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -0,0 +1,286 @@
+/*
+ * OPAL PNOR flash MTD abstraction
+ *
+ * IBM 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/errno.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/platform_device.h
+#include linux/string.h
+#include linux/slab.h
+#include linux/mtd/mtd.h
+#include linux/mtd/partitions.h
+
+#include linux/debugfs.h
+#include linux/seq_file.h
+
+#include asm/opal.h
+
+
+/*
+ * This driver creates the a Linux MTD abstraction for platform PNOR flash
+ * backed by OPAL calls
+ */
+
+struct powernv_flash {
+   struct mtd_info mtd;
+   u32 id;
+};
+
+enum flash_op {
+   FLASH_OP_READ,
+   FLASH_OP_WRITE,
+   FLASH_OP_ERASE,
+};
+
+static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
+   loff_t offset, size_t len, size_t *retlen, u_char *buf)
+{
+   struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
+   struct device *dev = mtd-dev;
+   int token;
+   struct opal_msg msg;
+   int rc;
+
+   dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
+   __func__, op, offset, len);
+
+   token = opal_async_get_token_interruptible();
+   if (token  0) {
+   if (token != -ERESTARTSYS)
+   dev_err(dev, Failed to get an async token\n);
+
+   return token;
+   }
+
+   switch (op) {
+   case FLASH_OP_READ:
+   rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_WRITE:
+   rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_ERASE:
+   rc = opal_flash_erase(info-id, offset, len, token);
+   break;
+   default:
+   BUG_ON(1);
+   }
+
+   if (rc != OPAL_ASYNC_COMPLETION) {
+   dev_err(dev, opal_flash_async_op(op=%d) failed (rc %d)\n,
+   op, rc);
+   opal_async_release_token(token);
+   return -EIO;
+   }
+
+   rc = opal_async_wait_response(token, msg);
+   opal_async_release_token(token);
+   if (rc) {
+   dev_err(dev

[PATCH V2] drivers/mtd: add powernv flash MTD abstraction driver

2015-05-28 Thread Cyril Bur
Powerpc powernv platforms allow access to certain system flash devices
through a firmwarwe interface. This change adds an mtd driver for these
flash devices.

Minor updates from Jeremy Kerr and Joel Stanley.

Signed-off-by: Cyril Bur cyril...@gmail.com
Signed-off-by: Joel Stanley j...@jms.id.au
Signed-off-by: Jeremy Kerr j...@ozlabs.org
---
V2: Address Brian Norris' review
Fix typos
Change from NAND flash type to NOR flash type
Correctness tweaks
---
 drivers/mtd/devices/Kconfig |   8 +
 drivers/mtd/devices/Makefile|   1 +
 drivers/mtd/devices/powernv_flash.c | 286 
 3 files changed, 295 insertions(+)
 create mode 100644 drivers/mtd/devices/powernv_flash.c

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index c49d0b1..a8cc237 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
  Testing MTD users (eg JFFS2) on large media and media that might
  be removed during a write (using the floppy drive).
 
+config MTD_POWERNV_FLASH
+   tristate powernv flash MTD driver
+   depends on PPC_POWERNV
+   help
+ This provides an MTD device to access NVRAM on powernv OPAL
+ platforms from Linux. This device abstracts away the
+ firmware interface for NVRAM access.
+
 comment Disk-On-Chip Device Drivers
 
 config MTD_DOCG3
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index f0b0e61..7912d3a 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI)   += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)   += sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)+= bcm47xxsflash.o
 obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
+obj-$(CONFIG_MTD_POWERNV_FLASH)+= powernv_flash.o
 
 
 CFLAGS_docg3.o += -I$(src)
diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
new file mode 100644
index 000..f619e4a
--- /dev/null
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -0,0 +1,286 @@
+/*
+ * OPAL PNOR flash MTD abstraction
+ *
+ * IBM 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/errno.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/platform_device.h
+#include linux/string.h
+#include linux/slab.h
+#include linux/mtd/mtd.h
+#include linux/mtd/partitions.h
+
+#include linux/debugfs.h
+#include linux/seq_file.h
+
+#include asm/opal.h
+
+
+/*
+ * This driver creates the a Linux MTD abstraction for platform PNOR flash
+ * backed by OPAL calls
+ */
+
+struct powernv_flash {
+   struct mtd_info mtd;
+   uint64_tid;
+};
+
+enum flash_op {
+   FLASH_OP_READ,
+   FLASH_OP_WRITE,
+   FLASH_OP_ERASE,
+};
+
+static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
+   loff_t offset, size_t len, size_t *retlen, u_char *buf)
+{
+   struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
+   struct device *dev = mtd-dev;
+   int token;
+   struct opal_msg msg;
+   int rc;
+
+   dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
+   __func__, op, offset, len);
+
+   token = opal_async_get_token_interruptible();
+   if (token  0) {
+   dev_err(dev, Failed to get an async token\n);
+   return -ENOMEM;
+   }
+
+   switch (op) {
+   case FLASH_OP_READ:
+   rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_WRITE:
+   rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_ERASE:
+   rc = opal_flash_erase(info-id, offset, len, token);
+   break;
+   default:
+   BUG_ON(1);
+   }
+
+   if (rc != OPAL_ASYNC_COMPLETION) {
+   dev_err(dev, opal_flash_async_op(op=%d) failed (rc %d)\n,
+   op, rc);
+   return -EIO;
+   }
+
+   rc = opal_async_wait_response(token, msg);
+   opal_async_release_token(token);
+   if (rc) {
+   dev_err(dev, opal async wait failed (rc %d)\n, rc);
+   return -EIO;
+   }
+
+   rc = be64_to_cpu(msg.params[1]);
+   if (rc == OPAL_SUCCESS) {
+   rc = 0;
+   if (retlen

[PATCH V3] drivers/mtd: add powernv flash MTD abstraction driver

2015-06-01 Thread Cyril Bur
Powerpc powernv platforms allow access to certain system flash devices
through a firmwarwe interface. This change adds an mtd driver for these
flash devices.

Minor updates from Jeremy Kerr and Joel Stanley.

Signed-off-by: Cyril Bur cyril...@gmail.com
Signed-off-by: Joel Stanley j...@jms.id.au
Signed-off-by: Jeremy Kerr j...@ozlabs.org
---
Hello Brian and MTD folk,
Could I please get an ACK for Michael to take this through the powerpc
tree.
Thanks.


V2: Address Brian Norris' review
Fix typos
Change from NAND flash type to NOR flash type
Correctness tweaks
V3: Address Neelesh Gupta's review
Minor corrections
Release the opal token on error
Unregister mtd device on module remove
---
 drivers/mtd/devices/Kconfig |   8 +
 drivers/mtd/devices/Makefile|   1 +
 drivers/mtd/devices/powernv_flash.c | 286 
 3 files changed, 295 insertions(+)
 create mode 100644 drivers/mtd/devices/powernv_flash.c

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index c49d0b1..f73c416 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -195,6 +195,14 @@ config MTD_BLOCK2MTD
  Testing MTD users (eg JFFS2) on large media and media that might
  be removed during a write (using the floppy drive).
 
+config MTD_POWERNV_FLASH
+   tristate powernv flash MTD driver
+   depends on PPC_POWERNV
+   help
+ This provides an MTD device to access flash on powernv OPAL
+ platforms from Linux. This device abstracts away the
+ firmware interface for flash access.
+
 comment Disk-On-Chip Device Drivers
 
 config MTD_DOCG3
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index f0b0e61..7912d3a 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI)   += spear_smi.o
 obj-$(CONFIG_MTD_SST25L)   += sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)+= bcm47xxsflash.o
 obj-$(CONFIG_MTD_ST_SPI_FSM)+= st_spi_fsm.o
+obj-$(CONFIG_MTD_POWERNV_FLASH)+= powernv_flash.o
 
 
 CFLAGS_docg3.o += -I$(src)
diff --git a/drivers/mtd/devices/powernv_flash.c 
b/drivers/mtd/devices/powernv_flash.c
new file mode 100644
index 000..777e09f
--- /dev/null
+++ b/drivers/mtd/devices/powernv_flash.c
@@ -0,0 +1,286 @@
+/*
+ * OPAL PNOR flash MTD abstraction
+ *
+ * IBM 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include linux/kernel.h
+#include linux/module.h
+#include linux/errno.h
+#include linux/of.h
+#include linux/of_address.h
+#include linux/platform_device.h
+#include linux/string.h
+#include linux/slab.h
+#include linux/mtd/mtd.h
+#include linux/mtd/partitions.h
+
+#include linux/debugfs.h
+#include linux/seq_file.h
+
+#include asm/opal.h
+
+
+/*
+ * This driver creates the a Linux MTD abstraction for platform PNOR flash
+ * backed by OPAL calls
+ */
+
+struct powernv_flash {
+   struct mtd_info mtd;
+   u32 id;
+};
+
+enum flash_op {
+   FLASH_OP_READ,
+   FLASH_OP_WRITE,
+   FLASH_OP_ERASE,
+};
+
+static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op,
+   loff_t offset, size_t len, size_t *retlen, u_char *buf)
+{
+   struct powernv_flash *info = (struct powernv_flash *)mtd-priv;
+   struct device *dev = mtd-dev;
+   int token;
+   struct opal_msg msg;
+   int rc;
+
+   dev_dbg(dev, %s(op=%d, offset=0x%llx, len=%zu)\n,
+   __func__, op, offset, len);
+
+   token = opal_async_get_token_interruptible();
+   if (token  0) {
+   if (token != -ERESTARTSYS)
+   dev_err(dev, Failed to get an async token\n);
+
+   return token;
+   }
+
+   switch (op) {
+   case FLASH_OP_READ:
+   rc = opal_flash_read(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_WRITE:
+   rc = opal_flash_write(info-id, offset, __pa(buf), len, token);
+   break;
+   case FLASH_OP_ERASE:
+   rc = opal_flash_erase(info-id, offset, len, token);
+   break;
+   default:
+   BUG_ON(1);
+   }
+
+   if (rc != OPAL_ASYNC_COMPLETION) {
+   dev_err(dev, opal_flash_async_op(op=%d) failed (rc %d)\n,
+   op, rc);
+   opal_async_release_token(token);
+   return -EIO;
+   }
+
+   rc

Re: [PATCH 1/2] powerpc/configs: merge pseries_defconfig and pseries_le_defconfig

2015-05-24 Thread Cyril Bur
On Sun, 2015-05-24 at 12:19 +0300, Denis Kirjanov wrote:
 On 5/24/15, Anton Blanchard an...@samba.org wrote:
  Hi Cyril,
 
  These two configs should be identical with the exception of big or
  little endian
 
  The big endian version has XMON_DEFAULT turned on while the little
  endian has XMON_DEFAULT not set. Enable XMON_DEFAULT for little
  endian.
 
  I disabled it on the LE defconfig on purpose. In most cases we want to
  get the kernel back around and running again. I got sick of building a
  mainline kernel for a non development box and coming back to it stuck in
  xmon.
 
  Should we disable it by default and require developers who want xmon to
  set the config option, or use the xmon=on boot?
 
 I think that it's a good idea to keep it disabled by default

Sure, that behaviour makes sense since and the goal here is to have them
be the same, I'll resent the patch to go the other way and turn
XMON_DEFAULT off in big endian.

Thanks,

Cyril
 
  Anton
  ___
  Linuxppc-dev mailing list
  Linuxppc-dev@lists.ozlabs.org
  https://lists.ozlabs.org/listinfo/linuxppc-dev


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] crypto: nx - tweak Makefile dependencies

2015-05-24 Thread Cyril Bur
Selecting CRYPTO_DEV_NX causes a conditional include of nx/Kconfig but
options within nx/Kconfig do not depend on it. The included options should
depend on CRYPTO_DEV_NX since currently CRYPTO_DEV_NX cannot be built for
little endian. While Kconfig appears to understand this convoluted
dependency situation, it isn't explicitly stated.

This patch addresses the missing dependencies for CRYPTO_DEV_NX_ENCRYPT and
CRYPTO_DEV_NX_COMPRESS which should depend on CRYPTO_DEV_NX. It also makes
more sense to put all three options into the nx/Kconfig file and have the
file included unconditionally.

CC: Marcelo Henrique Cerri mhce...@linux.vnet.ibm.com
CC: Fionnuala Gunter f...@linux.vnet.ibm.com
CC: linux-cry...@vger.kernel.org
CC: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Cyril Bur cyril...@gmail.com
---
 drivers/crypto/Kconfig| 11 +--
 drivers/crypto/nx/Kconfig | 11 +--
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 033c0c8..cc96a7d 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -311,16 +311,7 @@ config CRYPTO_DEV_S5P
  Select this to offload Samsung S5PV210 or S5PC110, Exynos from AES
  algorithms execution.
 
-config CRYPTO_DEV_NX
-   bool Support for IBM Power7+ in-Nest cryptographic acceleration
-   depends on PPC64  IBMVIO  !CPU_LITTLE_ENDIAN
-   default n
-   help
- Support for Power7+ in-Nest cryptographic acceleration.
-
-if CRYPTO_DEV_NX
-   source drivers/crypto/nx/Kconfig
-endif
+source drivers/crypto/nx/Kconfig
 
 config CRYPTO_DEV_UX500
tristate Driver for ST-Ericsson UX500 crypto hardware acceleration
diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index f826166..0726f12 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -1,6 +1,13 @@
+config CRYPTO_DEV_NX
+   bool Support for IBM Power7+ in-Nest cryptographic acceleration
+   depends on PPC64  IBMVIO  !CPU_LITTLE_ENDIAN
+   default n
+   help
+ Support for Power7+ in-Nest cryptographic acceleration.
+
 config CRYPTO_DEV_NX_ENCRYPT
tristate Encryption acceleration support
-   depends on PPC64  IBMVIO
+   depends on CRYPTO_DEV_NX
default y
select CRYPTO_AES
select CRYPTO_CBC
@@ -18,7 +25,7 @@ config CRYPTO_DEV_NX_ENCRYPT
 
 config CRYPTO_DEV_NX_COMPRESS
tristate Compression acceleration support
-   depends on PPC64  IBMVIO
+   depends on CRYPTO_DEV_NX
default y
help
  Support for Power7+ in-Nest compression acceleration. This
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 2/2] powerpc/configs: Replace pseries_le_defconfig with a Makefile target using merge_config

2015-05-25 Thread Cyril Bur
Rather than continuing to maintain a copy of pseries_defconfig with enabled
CONFIG_CPU_LITTLE_ENDIAN, use the generic merge_config script and use an
le.config to enable little endian on top of pseries_defconfig without the
need for a duplicated _defconfig file.

This method will require less maintenance in the future and will ensure
that both 'defconfigs' are always in sync.

It is worth noting that the seemingly more simple approach of:
pseries_le_defconfig: pseries_defconfig
$(Q)$(MAKE) le.config
Will not work when building using O=builddir.
The obvious fix to that
pseries_le_defconfig:
$(Q)$(MAKE) -f $(srctree)/Makefile pseries_defconfig le.config
Will result in options that get selected by other options having 'select
CONFIG_FOO' in the defconfig file possibly remaining selected after the
merge with le.config, when they would not have been set by using an actual
pseries_le_defconfig file. As a result this has caused differences in the
generated .config files from when there were actual pseries_le_defconfig
and pseries_defconfg files.

The solution is to ensure to only invoke a config target once so that it
has all the information it needs to correctly set all the parameters. This
is done through the explicit call to make olddefconfig

Signed-off-by: Cyril Bur cyril...@gmail.com
---
V2: Rework to have olddefconfig (or equivalent) only called once.
Improved to make writing *_defconfig targets easier and have the
  targets look cleaner.

 arch/powerpc/Makefile |  15 ++
 arch/powerpc/configs/le.config|   1 +
 arch/powerpc/configs/pseries_le_defconfig | 319 --
 3 files changed, 16 insertions(+), 319 deletions(-)
 create mode 100644 arch/powerpc/configs/le.config
 delete mode 100644 arch/powerpc/configs/pseries_le_defconfig

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 07a4808..2cafce6 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -269,6 +269,21 @@ bootwrapper_install:
 %.dtb: scripts
$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
+#Used to create 'merged defconfigs'
+#Should be $(call)'ed with the first argument as the defconfig on which to
+#base and with a space separated list of .config files to merge, without
+#the .config suffix.
+define merge_into_defconfig
+$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
+   -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \
+   $(foreach 
config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config)
+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
+endef
+
+PHONY += pseries_le_defconfig
+pseries_le_defconfig:
+   $(call merge_into_defconfig,pseries_defconfig,le)
+
 define archhelp
   @echo '* zImage  - Build default images selected by kernel config'
   @echo '  zImage.*- Compressed kernel image 
(arch/$(ARCH)/boot/zImage.*)'
diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config
new file mode 100644
index 000..ee43fdb
--- /dev/null
+++ b/arch/powerpc/configs/le.config
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/pseries_le_defconfig 
b/arch/powerpc/configs/pseries_le_defconfig
deleted file mode 100644
index 09bc96e..000
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ /dev/null
@@ -1,319 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2048
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_USER_NS=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
-# CONFIG_PPC_PMAC is not set
-CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
-CONFIG_HZ_100=y
-CONFIG_BINFMT_MISC=m
-CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_KEXEC=y
-CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
-CONFIG_SCHED_SMT=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_NET_IPIP=y
-CONFIG_SYN_COOKIES=y

[PATCH V2 1/2] powerpc/configs: Merge pseries_defconfig and pseries_le_defconfig

2015-05-25 Thread Cyril Bur
These two configs should be identical with the exception of big or little
endian.

The big endian version has XMON_DEFAULT turned on while the little endian
has XMON_DEFAULT not set. It makes the most sense for defconfigs not to use
xmon by default, production systems should get back up as quickly as
possible, not sit in xmon.

In the event debugging is required, the option can be enabled or xmon=on
can be specified on commandline.

Signed-off-by: Cyril Bur cyril...@gmail.com
---
V2: Removes XMON_DEFAULT from big endian after Antons suggestion that it
should be a default for production systems. 

 arch/powerpc/configs/pseries_defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/configs/pseries_defconfig 
b/arch/powerpc/configs/pseries_defconfig
index c2e39f6..4da8260 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -297,7 +297,6 @@ CONFIG_CODE_PATCHING_SELFTEST=y
 CONFIG_FTR_FIXUP_SELFTEST=y
 CONFIG_MSI_BITMAP_SELFTEST=y
 CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 08/10] cxl: Allow the kernel to trust that an image won't change on PERST.

2015-08-11 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:41 +1000
Daniel Axtens d...@axtens.net wrote:

 Provide a kernel API and a sysfs entry which allow a user to specify
 that when a card is PERSTed, it's image will stay the same, allowing
 it to participate in EEH.
 
 cxl_reset is used to reflash the card. In that case, we cannot safely
 assert that the image will not change. Therefore, disallow cxl_reset
 if the flag is set.
 

So I'm not super all over the putting all sorts of code inside CONFIG_CXL_EEH,
I understand that there is another driver being merged and they'll use
CONFIG_CXL_EEH so that both this driver and the other driver can go in the same
merge window but does this mean you need to put it around everything here?

I may have misunderstood what you've told me but if the other driver depends on
work done in this one (and not the other way around), if they depend on
CONFIG_CXL_EEH which you create in the last patch, then they cannot be built
until this series exists, so they can't have issues.

The one catch is that this series as is waits untill the last patch to actually
create the symbol, and therefore compile everything so lets be sure you don't
break bisecting. You might need to rethink the order of things in 8/10 and 9/10,
I can't see anything obvious if it helps...

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  Documentation/ABI/testing/sysfs-class-cxl | 10 ++
  drivers/misc/cxl/api.c|  9 +
  drivers/misc/cxl/cxl.h|  3 +++
  drivers/misc/cxl/pci.c| 11 +++
  drivers/misc/cxl/sysfs.c  | 30 ++
  include/misc/cxl.h| 12 
  6 files changed, 75 insertions(+)
 
 diff --git a/Documentation/ABI/testing/sysfs-class-cxl 
 b/Documentation/ABI/testing/sysfs-class-cxl
 index acfe9df83139..b07e86d4597f 100644
 --- a/Documentation/ABI/testing/sysfs-class-cxl
 +++ b/Documentation/ABI/testing/sysfs-class-cxl
 @@ -223,3 +223,13 @@ Description:write only
  Writing 1 will issue a PERST to card which may cause the card
  to reload the FPGA depending on load_image_on_perst.
  Users:   https://github.com/ibm-capi/libcxl
 +
 +What:/sys/class/cxl/card/perst_reloads_same_image
 +Date:July 2015
 +Contact: linuxppc-dev@lists.ozlabs.org
 +Description: read/write
 + Trust that when an image is reloaded via PERST, it will not
 + have changed.
 + 0 = don't trust, the image may be different (default)
 + 1 = trust that the image will not change.
 +Users:   https://github.com/ibm-capi/libcxl
 diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
 index 729e0851167d..c1012ced0323 100644
 --- a/drivers/misc/cxl/api.c
 +++ b/drivers/misc/cxl/api.c
 @@ -327,3 +327,12 @@ int cxl_afu_reset(struct cxl_context *ctx)
   return cxl_afu_check_and_enable(afu);
  }
  EXPORT_SYMBOL_GPL(cxl_afu_reset);
 +
 +#ifdef CONFIG_CXL_EEH
 +void cxl_perst_reloads_same_image(struct cxl_afu *afu,
 +   bool perst_reloads_same_image)
 +{
 + afu-adapter-perst_same_image = perst_reloads_same_image;
 +}
 +EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
 +#endif
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index 88a88c445e2a..6dd4158f76ac 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -493,6 +493,9 @@ struct cxl {
   bool user_image_loaded;
   bool perst_loads_image;
   bool perst_select_user;
 +#ifdef CONFIG_CXL_EEH
 + bool perst_same_image;
 +#endif
  };
  
  int cxl_alloc_one_irq(struct cxl *adapter);
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 0acf9e62733e..b6a189b35323 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -875,6 +875,14 @@ int cxl_reset(struct cxl *adapter)
   int i;
   u32 val;
  
 +#ifdef CONFIG_CXL_EEH
 + if (adapter-perst_same_image) {
 + dev_warn(dev-dev,
 +  cxl: refusing to reset/reflash when 
 perst_reloads_same_image is set.\n);
 + return -EINVAL;
 + }
 +#endif
 +
   dev_info(dev-dev, CXL reset\n);
  
   /* pcie_warm_reset requests a fundamental pci reset which includes a
 @@ -1148,6 +1156,9 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
* configure/reconfigure
*/
   adapter-perst_loads_image = true;
 +#ifdef CONFIG_CXL_EEH
 + adapter-perst_same_image = false;
 +#endif
  
   if ((rc = cxl_configure_adapter(adapter, dev))) {
   pci_disable_device(dev);
 diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
 index 31f38bc71a3d..4bcb63258e3e 100644
 --- a/drivers/misc/cxl/sysfs.c
 +++ b/drivers/misc/cxl/sysfs.c
 @@ -112,12 +112,42 @@ static ssize_t load_image_on_perst_store(struct device 
 *device,
   return count;
  }
  
 +#ifdef CONFIG_CXL_EEH
 +static ssize_t 

Re: [PATCH v2 09/10] cxl: EEH support

2015-08-11 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:42 +1000
Daniel Axtens d...@axtens.net wrote:

 EEH (Enhanced Error Handling) allows a driver to recover from the
 temporary failure of an attached PCI card. Enable basic CXL support
 for EEH.
 

Same thoughts about the config option as in 8/10.

As I've mentioned to you my knowledge of PCI, EEH and CAPI are limited, after
talking to you about it and apart from the CONFIG problems it looks like it
works as advertised.

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/cxl.h  |   1 +
  drivers/misc/cxl/pci.c  | 253 
 
  drivers/misc/cxl/vphb.c |   8 ++
  3 files changed, 262 insertions(+)
 
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index 6dd4158f76ac..2065e894e46d 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -699,6 +699,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
  
  void cxl_stop_trace(struct cxl *cxl);
  int cxl_pci_vphb_add(struct cxl_afu *afu);
 +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
  void cxl_pci_vphb_remove(struct cxl_afu *afu);
  
  extern struct pci_driver cxl_pci_driver;
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index b6a189b35323..60ae863b6f0a 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -24,6 +24,7 @@
  #include asm/io.h
  
  #include cxl.h
 +#include misc/cxl.h
 

Re our discussion: you do need it :)
 
  
  #define CXL_PCI_VSEC_ID  0x1280
 @@ -1249,10 +1250,262 @@ static void cxl_remove(struct pci_dev *dev)
   cxl_remove_adapter(adapter);
  }
  
 +#ifdef CONFIG_CXL_EEH
 +static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
 + pci_channel_state_t state)
 +{
 + struct pci_dev *afu_dev;
 + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
 + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
 +
 + /* There should only be one entry, but go through the list
 +  * anyway
 +  */
 + list_for_each_entry(afu_dev, afu-phb-bus-devices, bus_list) {
 + if (!afu_dev-driver)
 + continue;
 +
 + if (afu_dev-driver-err_handler)
 + afu_result = 
 afu_dev-driver-err_handler-error_detected(afu_dev,
 + 
   state);
 + /* Disconnect trumps all, NONE trumps NEED_RESET */
 + if (afu_result == PCI_ERS_RESULT_DISCONNECT)
 + result = PCI_ERS_RESULT_DISCONNECT;
 + else if ((afu_result == PCI_ERS_RESULT_NONE) 
 +  (result == PCI_ERS_RESULT_NEED_RESET))
 + result = PCI_ERS_RESULT_NONE;
 + }
 + return result;
 +}
 +
 +static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
 +pci_channel_state_t state)
 +{
 + struct cxl *adapter = pci_get_drvdata(pdev);
 + struct cxl_afu *afu;
 + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
 + int i;
 +
 + /* At this point, we could still have an interrupt pending.
 +  * Let's try to get them out of the way before they do
 +  * anything we don't like.
 +  */
 + schedule();
 +
 + /* If we're permanently dead, give up. */
 + if (state == pci_channel_io_perm_failure) {
 + /* Tell the AFU drivers; but we don't care what they
 +  * say, we're going away.
 +  */
 + for (i = 0; i  adapter-slices; i++) {
 + afu = adapter-afu[i];
 + cxl_vphb_error_detected(afu, state);
 + }
 + return PCI_ERS_RESULT_DISCONNECT;
 + }
 +
 + /* Are we reflashing?
 +  *
 +  * If we reflash, we could come back as something entirely
 +  * different, including a non-CAPI card. As such, by default
 +  * we don't participate in the process. We'll be unbound and
 +  * the slot re-probed. (TODO: check EEH doesn't blindly rebind
 +  * us!)
 +  *
 +  * However, this isn't the entire story: for reliablity
 +  * reasons, we usually want to reflash the FPGA on PERST in
 +  * order to get back to a more reliable known-good state.
 +  *
 +  * This causes us a bit of a problem: if we reflash we can't
 +  * trust that we'll come back the same - we could have a new
 +  * image and been PERSTed in order to load that
 +  * image. However, most of the time we actually *will* come
 +  * back the same - for example a regular EEH event.
 +  *
 +  * Therefore, we allow the user to assert that the image is
 +  * indeed the same and that we should continue on into EEH
 +  * anyway.
 +  */
 + if (adapter-perst_loads_image  !adapter-perst_same_image) {
 + /* TODO take the PHB out of CXL mode */
 + dev_info(pdev-dev, reflashing, so opting out of EEH!\n);
 + return 

Re: [PATCH v2 07/10] cxl: Don't remove AFUs/vPHBs in cxl_reset

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:40 +1000
Daniel Axtens d...@axtens.net wrote:

 If the driver doesn't participate in EEH, the AFUs will be removed
 by cxl_remove, which will be invoked by EEH.
 
 If the driver does particpate in EEH, the vPHB needs to stick around
 so that the it can particpate.
 
 In both cases, we shouldn't remove the AFU/vPHB.
 

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/pci.c | 5 -
  1 file changed, 5 deletions(-)
 
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 98a8207da88d..0acf9e62733e 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -877,11 +877,6 @@ int cxl_reset(struct cxl *adapter)
  
   dev_info(dev-dev, CXL reset\n);
  
 - for (i = 0; i  adapter-slices; i++) {
 - cxl_pci_vphb_remove(adapter-afu[i]);
 - cxl_remove_afu(adapter-afu[i]);
 - }
 -
   /* pcie_warm_reset requests a fundamental pci reset which includes a
* PERST assert/deassert.  PERST triggers a loading of the image
* if user or factory is selected in sysfs */

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 05/10] cxl: Refactor adaptor init/teardown

2015-08-11 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:38 +1000
Daniel Axtens d...@axtens.net wrote:

 Some aspects of initialisation are done only once in the lifetime of
 an adapter: for example, allocating memory for the adapter,
 allocating the adapter number, or setting up sysfs/debugfs files.
 
 However, we may want to be able to do some parts of the
 initialisation multiple times: for example, in error recovery we
 want to be able to tear down and then re-map IO memory and IRQs.
 
 Therefore, refactor CXL init/teardown as follows.
 
  - Keep the overarching functions 'cxl_init_adapter' and its pair,
'cxl_remove_adapter'.
 
  - Move all 'once only' allocation/freeing steps to the existing
'cxl_alloc_adapter' function, and its pair 'cxl_release_adapter'
(This involves moving allocation of the adapter number out of
cxl_init_adapter.)
 
  - Create two new functions: 'cxl_configure_adapter', and its pair
'cxl_deconfigure_adapter'. These two functions 'wire up' the
hardware --- they (de)configure resources that do not need to
last the entire lifetime of the adapter
 

You have a dilema with the use of ugly if (rc = foo()). I don't like it but the
file is littered with it.

Looks like the majority of uses in this file the conditional block is only
one line then it makes sense (or at least in terms of numbers of lines... fair
enough), however, if you have a conditional block spanning multiple lines, I
don't like.

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/pci.c | 138 
 ++---
  1 file changed, 85 insertions(+), 53 deletions(-)
 
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index adcf938f2fdb..7f47e2221524 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -966,7 +966,6 @@ static int cxl_read_vsec(struct cxl *adapter, struct 
 pci_dev *dev)
   CXL_READ_VSEC_BASE_IMAGE(dev, vsec, adapter-base_image);
   CXL_READ_VSEC_IMAGE_STATE(dev, vsec, image_state);
   adapter-user_image_loaded = !!(image_state  
 CXL_VSEC_USER_IMAGE_LOADED);
 - adapter-perst_loads_image = true;
   adapter-perst_select_user = !!(image_state  
 CXL_VSEC_USER_IMAGE_LOADED);
  
   CXL_READ_VSEC_NAFUS(dev, vsec, adapter-slices);
 @@ -1026,22 +1025,34 @@ static void cxl_release_adapter(struct device *dev)
  
   pr_devel(cxl_release_adapter\n);
  
 + cxl_remove_adapter_nr(adapter);
 +
   kfree(adapter);
  }
  
 -static struct cxl *cxl_alloc_adapter(struct pci_dev *dev)
 +static struct cxl *cxl_alloc_adapter(void)
  {
   struct cxl *adapter;
 + int rc;
  
   if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
   return NULL;
  
 - adapter-dev.parent = dev-dev;
 - adapter-dev.release = cxl_release_adapter;
 - pci_set_drvdata(dev, adapter);
   spin_lock_init(adapter-afu_list_lock);
  
 + if ((rc = cxl_alloc_adapter_nr(adapter)))

Humf

 + goto err1;
 +
 + if ((rc = dev_set_name(adapter-dev, card%i, adapter-adapter_num)))

Humf
 + goto err2;
 +
   return adapter;
 +
 +err2:
 + cxl_remove_adapter_nr(adapter);
 +err1:
 + kfree(adapter);
 + return NULL;
  }
  
  static int sanitise_adapter_regs(struct cxl *adapter)
 @@ -1050,57 +1061,94 @@ static int sanitise_adapter_regs(struct cxl *adapter)
   return cxl_tlb_slb_invalidate(adapter);
  }
  
 -static struct cxl *cxl_init_adapter(struct pci_dev *dev)
 +/* This should contain *only* operations that can safely be done in
 + * both creation and recovery.
 + */
 +static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
  {
 - struct cxl *adapter;
 - bool free = true;
   int rc;
  
 + adapter-dev.parent = dev-dev;
 + adapter-dev.release = cxl_release_adapter;
 + pci_set_drvdata(dev, adapter);
  
 - if (!(adapter = cxl_alloc_adapter(dev)))
 - return ERR_PTR(-ENOMEM);
 + if ((rc = pci_enable_device(dev))) {

Backets...

 + dev_err(dev-dev, pci_enable_device failed: %i\n, rc);
 + return rc;
 + }
  
   if ((rc = cxl_read_vsec(adapter, dev)))
 - goto err1;
 + return rc;
  
   if ((rc = cxl_vsec_looks_ok(adapter, dev)))
 - goto err1;
 + return rc;
  
   if ((rc = setup_cxl_bars(dev)))
 - goto err1;
 + return rc;
  
   if ((rc = switch_card_to_cxl(dev)))
 - goto err1;
 -
 - if ((rc = cxl_alloc_adapter_nr(adapter)))
 - goto err1;
 -
 - if ((rc = dev_set_name(adapter-dev, card%i, adapter-adapter_num)))
 - goto err2;
 + return rc;
  
   if ((rc = cxl_update_image_control(adapter)))
 - goto err2;
 + return rc;
  
   if ((rc = cxl_map_adapter_regs(adapter, dev)))
 - goto err2;
 + return rc;
  
   if ((rc = sanitise_adapter_regs(adapter)))
 - goto err2;
 + goto err;
  
 

Re: [PATCH v2 01/10] cxl: Drop commands if the PCI channel is not in normal state

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:34 +1000
Daniel Axtens d...@axtens.net wrote:

 If the PCI channel has gone down, don't attempt to poke the hardware.
 
 We need to guard every time cxl_whatever_(read|write) is called. This
 is because a call to those functions will dereference an offset into an
 mmio register, and the mmio mappings get invalidated in the EEH
 teardown.
 

Hey Daniel, keeping in mind I can't exactly test it, and that I don't know the
ins and outs of CAPI, the code looks nice and it makes sence to me.

Some very minor points,

Otherwise

Acked-by: Cyril Bur cyril...@gmail.com

 Check in the read/write functions in the header.
 We give them the same semantics as usual PCI operations:
  - a write to a channel that is down is ignored.
  - a read from a channel that is down returns all fs.
 
 Also, we try to access the MMIO space of a vPHB device as part of the
 PCI disable path. Because that's a read that bypasses most of our usual
 checks, we handle it explicitly.
 
 As far as user visible warnings go:
  - Check link state in file ops, return -EIO if down.
  - Be reasonably quiet if there's an error in a teardown path,
or when we already know the hardware is going down.
  - Throw a big WARN if someone tries to start a CXL operation
while the card is down. This gives a useful stacktrace for
debugging whatever is doing that.
 
 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/context.c |  6 +++-
  drivers/misc/cxl/cxl.h | 34 --
  drivers/misc/cxl/file.c| 19 +
  drivers/misc/cxl/native.c  | 71 
 --
  drivers/misc/cxl/vphb.c| 26 +
  5 files changed, 144 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
 index 1287148629c0..615842115848 100644
 --- a/drivers/misc/cxl/context.c
 +++ b/drivers/misc/cxl/context.c
 @@ -193,7 +193,11 @@ int __detach_context(struct cxl_context *ctx)
   if (status != STARTED)
   return -EBUSY;
  
 - WARN_ON(cxl_detach_process(ctx));
 + /* Only warn if we detached while the link was OK.
 +  * If detach fails when hw is down, we don't care.
 +  */
 + WARN_ON(cxl_detach_process(ctx) 
 + cxl_adapter_link_ok(ctx-afu-adapter));
   flush_work(ctx-fault_work); /* Only needed for dedicated process */
   put_pid(ctx-pid);
   cxl_ctx_put();
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index 4fd66cabde1e..47eadbcfd379 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -531,6 +531,14 @@ struct cxl_process_element {
   __be32 software_state;
  } __packed;
  
 +static inline bool cxl_adapter_link_ok(struct cxl *cxl)
 +{
 + struct pci_dev *pdev;
 +
 + pdev = to_pci_dev(cxl-dev.parent);
 + return (pdev-error_state == pci_channel_io_normal);
 +}
 +

In the process of reviewing these patches I read the style guide in furthur
detail and (it doesn't 100% commit one way or the other but) it suggests it may
be wise to get GCC choose if it should inline or not, unless you have an reason 
(the macro replacment below being a good example)... Just a thought.

  static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
  {
   WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
 @@ -538,9 +546,11 @@ static inline void __iomem *_cxl_p1_addr(struct cxl 
 *cxl, cxl_p1_reg_t reg)
  }
  
  #define cxl_p1_write(cxl, reg, val) \
 - out_be64(_cxl_p1_addr(cxl, reg), val)
 + if (cxl_adapter_link_ok(cxl)) \
 + out_be64(_cxl_p1_addr(cxl, reg), val)
  #define cxl_p1_read(cxl, reg) \
 - in_be64(_cxl_p1_addr(cxl, reg))
 + (cxl_adapter_link_ok(cxl) ? in_be64(_cxl_p1_addr(cxl, reg)) \
 +  : (~0ULL))
  
  static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t 
 reg)
  {
 @@ -549,9 +559,11 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu 
 *afu, cxl_p1n_reg_t reg
  }
  
  #define cxl_p1n_write(afu, reg, val) \
 - out_be64(_cxl_p1n_addr(afu, reg), val)
 + if (cxl_adapter_link_ok(afu-adapter)) \
 + out_be64(_cxl_p1n_addr(afu, reg), val)
  #define cxl_p1n_read(afu, reg) \
 - in_be64(_cxl_p1n_addr(afu, reg))
 + (cxl_adapter_link_ok(afu-adapter) ? in_be64(_cxl_p1n_addr(afu, reg)) \
 +  : (~0ULL))
  

In the interest of safety and consistency, you might want braces around afu
when you dereference it. ie (afu)-adapter.

  static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t 
 reg)
  {
 @@ -559,15 +571,21 @@ static inline void __iomem *_cxl_p2n_addr(struct 
 cxl_afu *afu, cxl_p2n_reg_t reg
  }
  
  #define cxl_p2n_write(afu, reg, val) \
 - out_be64(_cxl_p2n_addr(afu, reg), val)
 + if (cxl_adapter_link_ok(afu-adapter)) \
 + out_be64(_cxl_p2n_addr(afu, reg), val)
  #define cxl_p2n_read(afu, reg) \
 - in_be64(_cxl_p2n_addr(afu, reg))
 + (cxl_adapter_link_ok(afu-adapter

Re: [PATCH v2 02/10] cxl: Allocate and release the SPA with the AFU

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:35 +1000
Daniel Axtens d...@axtens.net wrote:

 Previously the SPA was allocated and freed upon entering and leaving
 AFU-directed mode. This causes some issues for error recovery - contexts
 hold a pointer inside the SPA, and they may persist after the AFU has
 been detached.
 
 We would ideally like to allocate the SPA when the AFU is allocated, and
 release it until the AFU is released. However, we don't know how big the
 SPA needs to be until we read the AFU descriptor.
 
 Therefore, restructure the code:
 
  - Allocate the SPA only once, on the first attach.
 
  - Release the SPA only when the entire AFU is being released (not
detached). Guard the release with a NULL check, so we don't free
if it was never allocated (e.g. dedicated mode)
 

I'm sure you tested this :), looks fine to me, from an outsiders perspective
code appears to do what the commit message says.

Just one super minor question, you do the NULL check in the caller. How obvious
is the error if/when a caller forgets?

Acked-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/cxl.h|  3 +++
  drivers/misc/cxl/native.c | 28 ++--
  drivers/misc/cxl/pci.c|  3 +++
  3 files changed, 24 insertions(+), 10 deletions(-)
 
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index 47eadbcfd379..88a88c445e2a 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -603,6 +603,9 @@ void unregister_cxl_calls(struct cxl_calls *calls);
  int cxl_alloc_adapter_nr(struct cxl *adapter);
  void cxl_remove_adapter_nr(struct cxl *adapter);
  
 +int cxl_alloc_spa(struct cxl_afu *afu);
 +void cxl_release_spa(struct cxl_afu *afu);
 +
  int cxl_file_init(void);
  void cxl_file_exit(void);
  int cxl_register_adapter(struct cxl *adapter);
 diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
 index 16948915eb0d..debd97147b58 100644
 --- a/drivers/misc/cxl/native.c
 +++ b/drivers/misc/cxl/native.c
 @@ -182,10 +182,8 @@ static int spa_max_procs(int spa_size)
   return ((spa_size / 8) - 96) / 17;
  }
  
 -static int alloc_spa(struct cxl_afu *afu)
 +int cxl_alloc_spa(struct cxl_afu *afu)
  {
 - u64 spap;
 -
   /* Work out how many pages to allocate */
   afu-spa_order = 0;
   do {
 @@ -204,6 +202,13 @@ static int alloc_spa(struct cxl_afu *afu)
   pr_devel(spa pages: %i afu-spa_max_procs: %i   afu-num_procs: %i\n,
1afu-spa_order, afu-spa_max_procs, afu-num_procs);
  
 + return 0;
 +}
 +
 +static void attach_spa(struct cxl_afu *afu)
 +{
 + u64 spap;
 +
   afu-sw_command_status = (__be64 *)((char *)afu-spa +
   ((afu-spa_max_procs + 3) * 128));
  
 @@ -212,13 +217,15 @@ static int alloc_spa(struct cxl_afu *afu)
   spap |= CXL_PSL_SPAP_V;
   pr_devel(cxl: SPA allocated at 0x%p. Max processes: %i, 
 sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n, afu-spa, 
 afu-spa_max_procs, afu-sw_command_status, spap);
   cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
 -
 - return 0;
  }
  
 -static void release_spa(struct cxl_afu *afu)
 +static inline void detach_spa(struct cxl_afu *afu)
  {
   cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
 +}
 +
 +void cxl_release_spa(struct cxl_afu *afu)
 +{
   free_pages((unsigned long) afu-spa, afu-spa_order);
  }
  
 @@ -446,8 +453,11 @@ static int activate_afu_directed(struct cxl_afu *afu)
  
   dev_info(afu-dev, Activating AFU directed mode\n);
  
 - if (alloc_spa(afu))
 - return -ENOMEM;
 + if (afu-spa == NULL) {
 + if (cxl_alloc_spa(afu))
 + return -ENOMEM;
 + }
 + attach_spa(afu);
  
   cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
   cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xULL);
 @@ -560,8 +570,6 @@ static int deactivate_afu_directed(struct cxl_afu *afu)
   cxl_afu_disable(afu);
   cxl_psl_purge(afu);
  
 - release_spa(afu);
 -
   return 0;
  }
  
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 32ad09705949..1849c1785b49 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -551,6 +551,9 @@ static void cxl_release_afu(struct device *dev)
  
   pr_devel(cxl_release_afu\n);
  
 + if (afu-spa)
 + cxl_release_spa(afu);
 +
   kfree(afu);
  }
  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 03/10] cxl: Make IRQ release idempotent

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:36 +1000
Daniel Axtens d...@axtens.net wrote:

 Check if an IRQ is mapped before releasing it.
 
 This will simplify future EEH code by allowing unconditional unmapping
 of IRQs.
 

Acked-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/irq.c | 9 +
  1 file changed, 9 insertions(+)
 
 diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
 index 680cd263436d..121ec48f3ab4 100644
 --- a/drivers/misc/cxl/irq.c
 +++ b/drivers/misc/cxl/irq.c
 @@ -341,6 +341,9 @@ int cxl_register_psl_err_irq(struct cxl *adapter)
  
  void cxl_release_psl_err_irq(struct cxl *adapter)
  {
 + if (adapter-err_virq != irq_find_mapping(NULL, adapter-err_hwirq))
 + return;
 +
   cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x);
   cxl_unmap_irq(adapter-err_virq, adapter);
   cxl_release_one_irq(adapter, adapter-err_hwirq);
 @@ -374,6 +377,9 @@ int cxl_register_serr_irq(struct cxl_afu *afu)
  
  void cxl_release_serr_irq(struct cxl_afu *afu)
  {
 + if (afu-serr_virq != irq_find_mapping(NULL, afu-serr_hwirq))
 + return;
 +
   cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x);
   cxl_unmap_irq(afu-serr_virq, afu);
   cxl_release_one_irq(afu-adapter, afu-serr_hwirq);
 @@ -400,6 +406,9 @@ int cxl_register_psl_irq(struct cxl_afu *afu)
  
  void cxl_release_psl_irq(struct cxl_afu *afu)
  {
 + if (afu-psl_virq != irq_find_mapping(NULL, afu-psl_hwirq))
 + return;
 +
   cxl_unmap_irq(afu-psl_virq, afu);
   cxl_release_one_irq(afu-adapter, afu-psl_hwirq);
   kfree(afu-psl_irq_name);

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 04/10] cxl: Clean up adapter MMIO unmap path.

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:37 +1000
Daniel Axtens d...@axtens.net wrote:

  - MMIO pointer unmapping is guarded by a null pointer check.
However, iounmap doesn't null the pointer, just invalidate it.
Therefore, explicitly null the pointer after unmapping.
 
  - afu_desc_mmio also needs to be unmapped.
 
  - PCI regions are allocated in cxl_map_adapter_regs.
Therefore they should be released in unmap, not elsewhere.
 

You've changed the order in which cxl_remove_adapter() does its work, which,
I'm sure you've considered and it's fine, best to check.

Acked-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/pci.c | 24 ++--
  1 file changed, 18 insertions(+), 6 deletions(-)
 
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 1849c1785b49..adcf938f2fdb 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -539,10 +539,18 @@ err:
  
  static void cxl_unmap_slice_regs(struct cxl_afu *afu)
  {
 - if (afu-p2n_mmio)
 + if (afu-p2n_mmio) {
   iounmap(afu-p2n_mmio);
 - if (afu-p1n_mmio)
 + afu-p2n_mmio = NULL;
 + }
 + if (afu-p1n_mmio) {
   iounmap(afu-p1n_mmio);
 + afu-p1n_mmio = NULL;
 + }
 + if (afu-afu_desc_mmio) {
 + iounmap(afu-afu_desc_mmio);
 + afu-afu_desc_mmio = NULL;
 + }
  }
  
  static void cxl_release_afu(struct device *dev)
 @@ -920,10 +928,16 @@ err1:
  
  static void cxl_unmap_adapter_regs(struct cxl *adapter)
  {
 - if (adapter-p1_mmio)
 + if (adapter-p1_mmio) {
   iounmap(adapter-p1_mmio);
 - if (adapter-p2_mmio)
 + adapter-p1_mmio = NULL;
 + pci_release_region(to_pci_dev(adapter-dev.parent), 2);
 + }
 + if (adapter-p2_mmio) {
   iounmap(adapter-p2_mmio);
 + adapter-p2_mmio = NULL;
 + pci_release_region(to_pci_dev(adapter-dev.parent), 0);
 + }
  }
  
  static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
 @@ -1132,8 +1146,6 @@ static void cxl_remove_adapter(struct cxl *adapter)
  
   device_unregister(adapter-dev);
  
 - pci_release_region(pdev, 0);
 - pci_release_region(pdev, 2);
   pci_disable_device(pdev);
  }
  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 10/10] cxl: Add CONFIG_CXL_EEH symbol

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:43 +1000
Daniel Axtens d...@axtens.net wrote:

 CONFIG_CXL_EEH is for CXL's EEH related code.
 
 As well as the EEH callbacks, it should guard sysfs and
 kernel API changes that are only required for CXL EEH.
 
 We now have all the pieces in place, so add it now.
 

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/Kconfig | 6 ++
  1 file changed, 6 insertions(+)
 
 diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
 index b6db9ebd52c2..c151fc1fe14c 100644
 --- a/drivers/misc/cxl/Kconfig
 +++ b/drivers/misc/cxl/Kconfig
 @@ -11,11 +11,17 @@ config CXL_KERNEL_API
   bool
   default n
  
 +config CXL_EEH
 + bool
 + default n
 + select EEH
 +
  config CXL
   tristate Support for IBM Coherent Accelerators (CXL)
   depends on PPC_POWERNV  PCI_MSI
   select CXL_BASE
   select CXL_KERNEL_API
 + select CXL_EEH
   default m
   help
 Select this option to enable driver support for IBM Coherent

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 06/10] cxl: Refactor AFU init/teardown

2015-08-10 Thread Cyril Bur
On Tue, 28 Jul 2015 15:28:39 +1000
Daniel Axtens d...@axtens.net wrote:

 As with an adapter, some aspects of initialisation are done only once
 in the lifetime of an AFU: for example, allocating memory, or setting
 up sysfs/debugfs files.
 
 However, we may want to be able to do some parts of the initialisation
 multiple times: for example, in error recovery we want to be able to
 tear down and then re-map IO memory and IRQs.
 
 Therefore, refactor AFU init/teardown as follows.
 
  - Create two new functions: 'cxl_configure_afu', and its pair
'cxl_deconfigure_afu'. As with the adapter functions,
these (de)configure resources that do not need to last the entire
lifetime of the AFU.
 
  - Allocating and releasing memory remain the task of 'cxl_alloc_afu'
and 'cxl_release_afu'.
 
  - Once-only functions that do not involve allocating/releasing memory
stay in the overarching 'cxl_init_afu'/'cxl_remove_afu' pair.
However, the task of picking an AFU mode and activating it has been
broken out.
 

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/pci.c | 87 
 +-
  1 file changed, 50 insertions(+), 37 deletions(-)
 
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 7f47e2221524..98a8207da88d 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -753,45 +753,67 @@ ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, 
 char *buf,
   return count;
  }
  
 -static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 +static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, 
 struct pci_dev *dev)
  {
 - struct cxl_afu *afu;
 - bool free = true;
   int rc;
  
 - if (!(afu = cxl_alloc_afu(adapter, slice)))
 - return -ENOMEM;
 -
 - if ((rc = dev_set_name(afu-dev, afu%i.%i, adapter-adapter_num, 
 slice)))
 - goto err1;
 -
   if ((rc = cxl_map_slice_regs(afu, adapter, dev)))
 - goto err1;
 + return rc;
  
   if ((rc = sanitise_afu_regs(afu)))
 - goto err2;
 + goto err1;
  
   /* We need to reset the AFU before we can read the AFU descriptor */
   if ((rc = __cxl_afu_reset(afu)))
 - goto err2;
 + goto err1;
  
   if (cxl_verbose)
   dump_afu_descriptor(afu);
  
   if ((rc = cxl_read_afu_descriptor(afu)))
 - goto err2;
 + goto err1;
  
   if ((rc = cxl_afu_descriptor_looks_ok(afu)))
 - goto err2;
 + goto err1;
  
   if ((rc = init_implementation_afu_regs(afu)))
 - goto err2;
 + goto err1;
  
   if ((rc = cxl_register_serr_irq(afu)))
 - goto err2;
 + goto err1;
  
   if ((rc = cxl_register_psl_irq(afu)))
 - goto err3;
 + goto err2;
 +
 + return 0;
 +
 +err2:
 + cxl_release_serr_irq(afu);
 +err1:
 + cxl_unmap_slice_regs(afu);
 + return rc;
 +}
 +
 +static void cxl_deconfigure_afu(struct cxl_afu *afu)
 +{
 + cxl_release_psl_irq(afu);
 + cxl_release_serr_irq(afu);
 + cxl_unmap_slice_regs(afu);
 +}
 +
 +static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 +{
 + struct cxl_afu *afu;
 + int rc;
 +
 + if (!(afu = cxl_alloc_afu(adapter, slice)))
 + return -ENOMEM;
 +
 + if ((rc = dev_set_name(afu-dev, afu%i.%i, adapter-adapter_num, 
 slice)))
 + goto err_free;
 +
 + if ((rc = cxl_configure_afu(afu, adapter, dev)))
 + goto err_free;
  
   /* Don't care if this fails */
   cxl_debugfs_afu_add(afu);
 @@ -806,10 +828,6 @@ static int cxl_init_afu(struct cxl *adapter, int slice, 
 struct pci_dev *dev)
   if ((rc = cxl_sysfs_afu_add(afu)))
   goto err_put1;
  
 -
 - if ((rc = cxl_afu_select_best_mode(afu)))
 - goto err_put2;
 -
   adapter-afu[afu-slice] = afu;
  
   if ((rc = cxl_pci_vphb_add(afu)))
 @@ -817,21 +835,16 @@ static int cxl_init_afu(struct cxl *adapter, int slice, 
 struct pci_dev *dev)
  
   return 0;
  
 -err_put2:
 - cxl_sysfs_afu_remove(afu);
  err_put1:
 - device_unregister(afu-dev);
 - free = false;
 + cxl_deconfigure_afu(afu);
   cxl_debugfs_afu_remove(afu);
 - cxl_release_psl_irq(afu);
 -err3:
 - cxl_release_serr_irq(afu);
 -err2:
 - cxl_unmap_slice_regs(afu);
 -err1:
 - if (free)
 - kfree(afu);
 + device_unregister(afu-dev);
   return rc;
 +
 +err_free:
 + kfree(afu);
 + return rc;
 +
  }
  
  static void cxl_remove_afu(struct cxl_afu *afu)
 @@ -851,10 +864,7 @@ static void cxl_remove_afu(struct cxl_afu *afu)
   cxl_context_detach_all(afu);
   cxl_afu_deactivate_mode(afu);
  
 - cxl_release_psl_irq(afu);
 - cxl_release_serr_irq(afu);
 - cxl_unmap_slice_regs(afu

Re: [PATCH v3 09/11] cxl: Allow the kernel to trust that an image won't change on PERST.

2015-08-12 Thread Cyril Bur
On Wed, 12 Aug 2015 10:48:18 +1000
Daniel Axtens d...@axtens.net wrote:

 Provide a kernel API and a sysfs entry which allow a user to specify
 that when a card is PERSTed, it's image will stay the same, allowing
 it to participate in EEH.
 
 cxl_reset is used to reflash the card. In that case, we cannot safely
 assert that the image will not change. Therefore, disallow cxl_reset
 if the flag is set.
 

Looks much better without all the #ifdefs!!

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  Documentation/ABI/testing/sysfs-class-cxl | 10 ++
  drivers/misc/cxl/api.c|  7 +++
  drivers/misc/cxl/cxl.h|  1 +
  drivers/misc/cxl/pci.c|  7 +++
  drivers/misc/cxl/sysfs.c  | 26 ++
  include/misc/cxl.h| 10 ++
  6 files changed, 61 insertions(+)
 
 diff --git a/Documentation/ABI/testing/sysfs-class-cxl 
 b/Documentation/ABI/testing/sysfs-class-cxl
 index acfe9df83139..b07e86d4597f 100644
 --- a/Documentation/ABI/testing/sysfs-class-cxl
 +++ b/Documentation/ABI/testing/sysfs-class-cxl
 @@ -223,3 +223,13 @@ Description:write only
  Writing 1 will issue a PERST to card which may cause the card
  to reload the FPGA depending on load_image_on_perst.
  Users:   https://github.com/ibm-capi/libcxl
 +
 +What:/sys/class/cxl/card/perst_reloads_same_image
 +Date:July 2015
 +Contact: linuxppc-dev@lists.ozlabs.org
 +Description: read/write
 + Trust that when an image is reloaded via PERST, it will not
 + have changed.
 + 0 = don't trust, the image may be different (default)
 + 1 = trust that the image will not change.
 +Users:   https://github.com/ibm-capi/libcxl
 diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
 index 729e0851167d..6a768a9ad22f 100644
 --- a/drivers/misc/cxl/api.c
 +++ b/drivers/misc/cxl/api.c
 @@ -327,3 +327,10 @@ int cxl_afu_reset(struct cxl_context *ctx)
   return cxl_afu_check_and_enable(afu);
  }
  EXPORT_SYMBOL_GPL(cxl_afu_reset);
 +
 +void cxl_perst_reloads_same_image(struct cxl_afu *afu,
 +   bool perst_reloads_same_image)
 +{
 + afu-adapter-perst_same_image = perst_reloads_same_image;
 +}
 +EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index d540542f9931..cda02412b01e 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -493,6 +493,7 @@ struct cxl {
   bool user_image_loaded;
   bool perst_loads_image;
   bool perst_select_user;
 + bool perst_same_image;
  };
  
  int cxl_alloc_one_irq(struct cxl *adapter);
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 023a2086830b..b4a68a896a33 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -874,6 +874,12 @@ int cxl_reset(struct cxl *adapter)
   int i;
   u32 val;
  
 + if (adapter-perst_same_image) {
 + dev_warn(dev-dev,
 +  cxl: refusing to reset/reflash when 
 perst_reloads_same_image is set.\n);
 + return -EINVAL;
 + }
 +
   dev_info(dev-dev, CXL reset\n);
  
   /* pcie_warm_reset requests a fundamental pci reset which includes a
 @@ -1148,6 +1154,7 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
* configure/reconfigure
*/
   adapter-perst_loads_image = true;
 + adapter-perst_same_image = false;
  
   rc = cxl_configure_adapter(adapter, dev);
   if (rc) {
 diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
 index 31f38bc71a3d..6619cf1f6e1f 100644
 --- a/drivers/misc/cxl/sysfs.c
 +++ b/drivers/misc/cxl/sysfs.c
 @@ -112,12 +112,38 @@ static ssize_t load_image_on_perst_store(struct device 
 *device,
   return count;
  }
  
 +static ssize_t perst_reloads_same_image_show(struct device *device,
 +  struct device_attribute *attr,
 +  char *buf)
 +{
 + struct cxl *adapter = to_cxl_adapter(device);
 +
 + return scnprintf(buf, PAGE_SIZE, %i\n, adapter-perst_same_image);
 +}
 +
 +static ssize_t perst_reloads_same_image_store(struct device *device,
 +  struct device_attribute *attr,
 +  const char *buf, size_t count)
 +{
 + struct cxl *adapter = to_cxl_adapter(device);
 + int rc;
 + int val;
 +
 + rc = sscanf(buf, %i, val);
 + if ((rc != 1) || !(val == 1 || val == 0))
 + return -EINVAL;
 +
 + adapter-perst_same_image = (val == 1 ? true : false);
 + return count;
 +}
 +
  static struct device_attribute adapter_attrs[] = {
   __ATTR_RO(caia_version),
   __ATTR_RO(psl_revision),
   __ATTR_RO(base_image),
   __ATTR_RO(image_loaded),
   __ATTR_RW

Re: [PATCH v3 02/11] cxl: Drop commands if the PCI channel is not in normal state

2015-08-12 Thread Cyril Bur
On Wed, 12 Aug 2015 10:48:11 +1000
Daniel Axtens d...@axtens.net wrote:

 If the PCI channel has gone down, don't attempt to poke the hardware.
 
 We need to guard every time cxl_whatever_(read|write) is called. This
 is because a call to those functions will dereference an offset into an
 mmio register, and the mmio mappings get invalidated in the EEH
 teardown.
 
 Check in the read/write functions in the header.
 We give them the same semantics as usual PCI operations:
  - a write to a channel that is down is ignored.
  - a read from a channel that is down returns all fs.
 
 Also, we try to access the MMIO space of a vPHB device as part of the
 PCI disable path. Because that's a read that bypasses most of our usual
 checks, we handle it explicitly.
 
 As far as user visible warnings go:
  - Check link state in file ops, return -EIO if down.
  - Be reasonably quiet if there's an error in a teardown path,
or when we already know the hardware is going down.
  - Throw a big WARN if someone tries to start a CXL operation
while the card is down. This gives a useful stacktrace for
debugging whatever is doing that.
 

My previous comments appear to have been added, making functions from those
macros was a good move. I can't speak too much for the exact function of the
patch but the code looks good.

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/context.c |  6 +++-
  drivers/misc/cxl/cxl.h | 44 ++--
  drivers/misc/cxl/file.c| 19 +
  drivers/misc/cxl/native.c  | 71 
 --
  drivers/misc/cxl/vphb.c| 26 +
  5 files changed, 154 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
 index 1287148629c0..615842115848 100644
 --- a/drivers/misc/cxl/context.c
 +++ b/drivers/misc/cxl/context.c
 @@ -193,7 +193,11 @@ int __detach_context(struct cxl_context *ctx)
   if (status != STARTED)
   return -EBUSY;
  
 - WARN_ON(cxl_detach_process(ctx));
 + /* Only warn if we detached while the link was OK.
 +  * If detach fails when hw is down, we don't care.
 +  */
 + WARN_ON(cxl_detach_process(ctx) 
 + cxl_adapter_link_ok(ctx-afu-adapter));
   flush_work(ctx-fault_work); /* Only needed for dedicated process */
   put_pid(ctx-pid);
   cxl_ctx_put();
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index 6a93bfbcd826..9b9e89fd02cc 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -531,6 +531,14 @@ struct cxl_process_element {
   __be32 software_state;
  } __packed;
  
 +static inline bool cxl_adapter_link_ok(struct cxl *cxl)
 +{
 + struct pci_dev *pdev;
 +
 + pdev = to_pci_dev(cxl-dev.parent);
 + return !pci_channel_offline(pdev);
 +}
 +
  static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
  {
   WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
 @@ -539,12 +547,16 @@ static inline void __iomem *_cxl_p1_addr(struct cxl 
 *cxl, cxl_p1_reg_t reg)
  
  static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
  {
 - out_be64(_cxl_p1_addr(cxl, reg), val);
 + if (likely(cxl_adapter_link_ok(cxl)))
 + out_be64(_cxl_p1_addr(cxl, reg), val);
  }
  
  static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
  {
 - return in_be64(_cxl_p1_addr(cxl, reg));
 + if (likely(cxl_adapter_link_ok(cxl)))
 + return in_be64(_cxl_p1_addr(cxl, reg));
 + else
 + return ~0ULL;
  }
  
  static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t 
 reg)
 @@ -555,12 +567,16 @@ static inline void __iomem *_cxl_p1n_addr(struct 
 cxl_afu *afu, cxl_p1n_reg_t reg
  
  static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 
 val)
  {
 - out_be64(_cxl_p1n_addr(afu, reg), val);
 + if (likely(cxl_adapter_link_ok(afu-adapter)))
 + out_be64(_cxl_p1n_addr(afu, reg), val);
  }
  
  static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
  {
 - return in_be64(_cxl_p1n_addr(afu, reg));
 + if (likely(cxl_adapter_link_ok(afu-adapter)))
 + return in_be64(_cxl_p1n_addr(afu, reg));
 + else
 + return ~0ULL;
  }
  
  static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t 
 reg)
 @@ -570,22 +586,34 @@ static inline void __iomem *_cxl_p2n_addr(struct 
 cxl_afu *afu, cxl_p2n_reg_t reg
  
  static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 
 val)
  {
 - out_be64(_cxl_p2n_addr(afu, reg), val);
 + if (likely(cxl_adapter_link_ok(afu-adapter)))
 + out_be64(_cxl_p2n_addr(afu, reg), val);
  }
  
  static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
  {
 - return in_be64(_cxl_p2n_addr(afu, reg));
 + if (likely(cxl_adapter_link_ok(afu-adapter

Re: [PATCH v3 01/11] cxl: Convert MMIO read/write macros to inline functions

2015-08-12 Thread Cyril Bur
On Wed, 12 Aug 2015 10:48:10 +1000
Daniel Axtens d...@axtens.net wrote:

 We're about to make these more complex, so make them functions
 first.
 

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/cxl.h | 51 
 ++
  1 file changed, 35 insertions(+), 16 deletions(-)
 
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index 4fd66cabde1e..6a93bfbcd826 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -537,10 +537,15 @@ static inline void __iomem *_cxl_p1_addr(struct cxl 
 *cxl, cxl_p1_reg_t reg)
   return cxl-p1_mmio + cxl_reg_off(reg);
  }
  
 -#define cxl_p1_write(cxl, reg, val) \
 - out_be64(_cxl_p1_addr(cxl, reg), val)
 -#define cxl_p1_read(cxl, reg) \
 - in_be64(_cxl_p1_addr(cxl, reg))
 +static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
 +{
 + out_be64(_cxl_p1_addr(cxl, reg), val);
 +}
 +
 +static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
 +{
 + return in_be64(_cxl_p1_addr(cxl, reg));
 +}
  
  static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t 
 reg)
  {
 @@ -548,26 +553,40 @@ static inline void __iomem *_cxl_p1n_addr(struct 
 cxl_afu *afu, cxl_p1n_reg_t reg
   return afu-p1n_mmio + cxl_reg_off(reg);
  }
  
 -#define cxl_p1n_write(afu, reg, val) \
 - out_be64(_cxl_p1n_addr(afu, reg), val)
 -#define cxl_p1n_read(afu, reg) \
 - in_be64(_cxl_p1n_addr(afu, reg))
 +static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 
 val)
 +{
 + out_be64(_cxl_p1n_addr(afu, reg), val);
 +}
 +
 +static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
 +{
 + return in_be64(_cxl_p1n_addr(afu, reg));
 +}
  
  static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t 
 reg)
  {
   return afu-p2n_mmio + cxl_reg_off(reg);
  }
  
 -#define cxl_p2n_write(afu, reg, val) \
 - out_be64(_cxl_p2n_addr(afu, reg), val)
 -#define cxl_p2n_read(afu, reg) \
 - in_be64(_cxl_p2n_addr(afu, reg))
 +static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 
 val)
 +{
 + out_be64(_cxl_p2n_addr(afu, reg), val);
 +}
  
 +static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
 +{
 + return in_be64(_cxl_p2n_addr(afu, reg));
 +}
  
 -#define cxl_afu_cr_read64(afu, cr, off) \
 - in_le64((afu)-afu_desc_mmio + (afu)-crs_offset + ((cr) * 
 (afu)-crs_len) + (off))
 -#define cxl_afu_cr_read32(afu, cr, off) \
 - in_le32((afu)-afu_desc_mmio + (afu)-crs_offset + ((cr) * 
 (afu)-crs_len) + (off))
 +static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off)
 +{
 + return in_le64((afu)-afu_desc_mmio + (afu)-crs_offset + ((cr) * 
 (afu)-crs_len) + (off));
 +}
 +
 +static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off)
 +{
 + return in_le32((afu)-afu_desc_mmio + (afu)-crs_offset + ((cr) * 
 (afu)-crs_len) + (off));
 +}
  u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
  u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 10/11] cxl: EEH support

2015-08-12 Thread Cyril Bur
On Wed, 12 Aug 2015 10:48:19 +1000
Daniel Axtens d...@axtens.net wrote:

 EEH (Enhanced Error Handling) allows a driver to recover from the
 temporary failure of an attached PCI card. Enable basic CXL support
 for EEH.
 

Looks like the only change since was the removal of the #ifdef, if that is
correct.

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/cxl.h  |   1 +
  drivers/misc/cxl/pci.c  | 252 
 
  drivers/misc/cxl/vphb.c |   8 ++
  3 files changed, 261 insertions(+)
 
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index cda02412b01e..6f5386653dae 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -726,6 +726,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
  
  void cxl_stop_trace(struct cxl *cxl);
  int cxl_pci_vphb_add(struct cxl_afu *afu);
 +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
  void cxl_pci_vphb_remove(struct cxl_afu *afu);
  
  extern struct pci_driver cxl_pci_driver;
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index b4a68a896a33..1eb26a357ce0 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -24,6 +24,7 @@
  #include asm/io.h
  
  #include cxl.h
 +#include misc/cxl.h
  
  
  #define CXL_PCI_VSEC_ID  0x1280
 @@ -1246,10 +1247,261 @@ static void cxl_remove(struct pci_dev *dev)
   cxl_remove_adapter(adapter);
  }
  
 +static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
 + pci_channel_state_t state)
 +{
 + struct pci_dev *afu_dev;
 + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
 + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
 +
 + /* There should only be one entry, but go through the list
 +  * anyway
 +  */
 + list_for_each_entry(afu_dev, afu-phb-bus-devices, bus_list) {
 + if (!afu_dev-driver)
 + continue;
 +
 + afu_dev-error_state = state;
 +
 + if (afu_dev-driver-err_handler)
 + afu_result = 
 afu_dev-driver-err_handler-error_detected(afu_dev,
 + 
   state);
 + /* Disconnect trumps all, NONE trumps NEED_RESET */
 + if (afu_result == PCI_ERS_RESULT_DISCONNECT)
 + result = PCI_ERS_RESULT_DISCONNECT;
 + else if ((afu_result == PCI_ERS_RESULT_NONE) 
 +  (result == PCI_ERS_RESULT_NEED_RESET))
 + result = PCI_ERS_RESULT_NONE;
 + }
 + return result;
 +}
 +
 +static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
 +pci_channel_state_t state)
 +{
 + struct cxl *adapter = pci_get_drvdata(pdev);
 + struct cxl_afu *afu;
 + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
 + int i;
 +
 + /* At this point, we could still have an interrupt pending.
 +  * Let's try to get them out of the way before they do
 +  * anything we don't like.
 +  */
 + schedule();
 +
 + /* If we're permanently dead, give up. */
 + if (state == pci_channel_io_perm_failure) {
 + /* Tell the AFU drivers; but we don't care what they
 +  * say, we're going away.
 +  */
 + for (i = 0; i  adapter-slices; i++) {
 + afu = adapter-afu[i];
 + cxl_vphb_error_detected(afu, state);
 + }
 + return PCI_ERS_RESULT_DISCONNECT;
 + }
 +
 + /* Are we reflashing?
 +  *
 +  * If we reflash, we could come back as something entirely
 +  * different, including a non-CAPI card. As such, by default
 +  * we don't participate in the process. We'll be unbound and
 +  * the slot re-probed. (TODO: check EEH doesn't blindly rebind
 +  * us!)
 +  *
 +  * However, this isn't the entire story: for reliablity
 +  * reasons, we usually want to reflash the FPGA on PERST in
 +  * order to get back to a more reliable known-good state.
 +  *
 +  * This causes us a bit of a problem: if we reflash we can't
 +  * trust that we'll come back the same - we could have a new
 +  * image and been PERSTed in order to load that
 +  * image. However, most of the time we actually *will* come
 +  * back the same - for example a regular EEH event.
 +  *
 +  * Therefore, we allow the user to assert that the image is
 +  * indeed the same and that we should continue on into EEH
 +  * anyway.
 +  */
 + if (adapter-perst_loads_image  !adapter-perst_same_image) {
 + /* TODO take the PHB out of CXL mode */
 + dev_info(pdev-dev, reflashing, so opting out of EEH!\n);
 + return PCI_ERS_RESULT_NONE;
 + }
 +
 + /*
 +  * At this point, we want to try to recover.  We'll always

Re: [PATCH v3 06/11] cxl: Refactor adaptor init/teardown

2015-08-12 Thread Cyril Bur
On Wed, 12 Aug 2015 10:48:15 +1000
Daniel Axtens d...@axtens.net wrote:

 Some aspects of initialisation are done only once in the lifetime of
 an adapter: for example, allocating memory for the adapter,
 allocating the adapter number, or setting up sysfs/debugfs files.
 
 However, we may want to be able to do some parts of the
 initialisation multiple times: for example, in error recovery we
 want to be able to tear down and then re-map IO memory and IRQs.
 
 Therefore, refactor CXL init/teardown as follows.
 
  - Keep the overarching functions 'cxl_init_adapter' and its pair,
'cxl_remove_adapter'.
 
  - Move all 'once only' allocation/freeing steps to the existing
'cxl_alloc_adapter' function, and its pair 'cxl_release_adapter'
(This involves moving allocation of the adapter number out of
cxl_init_adapter.)
 
  - Create two new functions: 'cxl_configure_adapter', and its pair
'cxl_deconfigure_adapter'. These two functions 'wire up' the
hardware --- they (de)configure resources that do not need to
last the entire lifetime of the adapter
 

Reviewed-by: Cyril Bur cyril...@gmail.com

 Signed-off-by: Daniel Axtens d...@axtens.net
 ---
  drivers/misc/cxl/pci.c | 140 
 ++---
  1 file changed, 87 insertions(+), 53 deletions(-)
 
 diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
 index 484d35a5aead..f6cb089ff981 100644
 --- a/drivers/misc/cxl/pci.c
 +++ b/drivers/misc/cxl/pci.c
 @@ -965,7 +965,6 @@ static int cxl_read_vsec(struct cxl *adapter, struct 
 pci_dev *dev)
   CXL_READ_VSEC_BASE_IMAGE(dev, vsec, adapter-base_image);
   CXL_READ_VSEC_IMAGE_STATE(dev, vsec, image_state);
   adapter-user_image_loaded = !!(image_state  
 CXL_VSEC_USER_IMAGE_LOADED);
 - adapter-perst_loads_image = true;
   adapter-perst_select_user = !!(image_state  
 CXL_VSEC_USER_IMAGE_LOADED);
  
   CXL_READ_VSEC_NAFUS(dev, vsec, adapter-slices);
 @@ -1025,22 +1024,34 @@ static void cxl_release_adapter(struct device *dev)
  
   pr_devel(cxl_release_adapter\n);
  
 + cxl_remove_adapter_nr(adapter);
 +
   kfree(adapter);
  }
  
 -static struct cxl *cxl_alloc_adapter(struct pci_dev *dev)
 +static struct cxl *cxl_alloc_adapter(void)
  {
   struct cxl *adapter;
 + int rc;
  
   if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
   return NULL;
  
 - adapter-dev.parent = dev-dev;
 - adapter-dev.release = cxl_release_adapter;
 - pci_set_drvdata(dev, adapter);
   spin_lock_init(adapter-afu_list_lock);
  
 + if ((rc = cxl_alloc_adapter_nr(adapter)))
 + goto err1;
 +
 + if ((rc = dev_set_name(adapter-dev, card%i, adapter-adapter_num)))
 + goto err2;
 +
   return adapter;
 +
 +err2:
 + cxl_remove_adapter_nr(adapter);
 +err1:
 + kfree(adapter);
 + return NULL;
  }
  
  static int sanitise_adapter_regs(struct cxl *adapter)
 @@ -1049,57 +1060,96 @@ static int sanitise_adapter_regs(struct cxl *adapter)
   return cxl_tlb_slb_invalidate(adapter);
  }
  
 -static struct cxl *cxl_init_adapter(struct pci_dev *dev)
 +/* This should contain *only* operations that can safely be done in
 + * both creation and recovery.
 + */
 +static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
  {
 - struct cxl *adapter;
 - bool free = true;
   int rc;
  
 + adapter-dev.parent = dev-dev;
 + adapter-dev.release = cxl_release_adapter;
 + pci_set_drvdata(dev, adapter);
  
 - if (!(adapter = cxl_alloc_adapter(dev)))
 - return ERR_PTR(-ENOMEM);
 + rc = pci_enable_device(dev);
 + if (rc) {
 + dev_err(dev-dev, pci_enable_device failed: %i\n, rc);
 + return rc;
 + }
  
   if ((rc = cxl_read_vsec(adapter, dev)))
 - goto err1;
 + return rc;
  
   if ((rc = cxl_vsec_looks_ok(adapter, dev)))
 - goto err1;
 + return rc;
  
   if ((rc = setup_cxl_bars(dev)))
 - goto err1;
 + return rc;
  
   if ((rc = switch_card_to_cxl(dev)))
 - goto err1;
 -
 - if ((rc = cxl_alloc_adapter_nr(adapter)))
 - goto err1;
 -
 - if ((rc = dev_set_name(adapter-dev, card%i, adapter-adapter_num)))
 - goto err2;
 + return rc;
  
   if ((rc = cxl_update_image_control(adapter)))
 - goto err2;
 + return rc;
  
   if ((rc = cxl_map_adapter_regs(adapter, dev)))
 - goto err2;
 + return rc;
  
   if ((rc = sanitise_adapter_regs(adapter)))
 - goto err2;
 + goto err;
  
   if ((rc = init_implementation_adapter_regs(adapter, dev)))
 - goto err3;
 + goto err;
  
   if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI)))
 - goto err3;
 + goto err;
  
   /* If recovery happened, the last step is to turn on snooping

[PATCH v2] powerpc: Fix checkstop in native_hpte_clear() with lockdep

2015-10-07 Thread Cyril Bur
native_hpte_clear() is called in real mode from two places:
- Early in boot during htab initialisation if firmware assisted dump is
  active.
- Late in the kexec path.

In both contexts there is no need to disable interrupts are they are
already disabled. Furthermore, locking around the tlbie() is only required
for pre POWER5 hardware.

On POWER5 or newer hardware concurrent tlbie()s work as expected and on pre
POWER5 hardware concurrent tlbie()s could result in deadlock. This code
would only be executed at crashdump time, during which all bets are off,
concurrent tlbie()s are unlikely and taking locks is unsafe therefore the
best course of action is to simply do nothing. Concurrent tlbie()s are not
possible in the first case as secondary CPUs have not come up yet.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
Tested on POWER8 system by applying this patch to the petitboot kernel,
kexecing into Linus' tree with this patch applied and from there kexecing
into Ubuntu 3.19.0-26.

v2: No code change.
Addition of comment in machdep.h and phrasing tweaks elsewhere.

 arch/powerpc/include/asm/machdep.h |  9 +++--
 arch/powerpc/mm/hash_native_64.c   | 23 +++
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index cab6753..3f191f5 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -61,8 +61,13 @@ struct machdep_calls {
   unsigned long addr,
   unsigned char *hpte_slot_array,
   int psize, int ssize, int local);
-   /* special for kexec, to be called in real mode, linear mapping is
-* destroyed as well */
+   /*
+* Special for kexec.
+* To be called in real mode with interrupts disabled. No locks are
+* taken as such, concurrent access on pre POWER5 hardware could result
+* in a deadlock.
+* The linear mapping is destroyed as well.
+*/
void(*hpte_clear_all)(void);
 
void __iomem *  (*ioremap)(phys_addr_t addr, unsigned long size,
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 13befa35..c8822af 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -582,13 +582,21 @@ static void hpte_decode(struct hash_pte *hpte, unsigned 
long slot,
  * be when they isi), and we are the only one left.  We rely on our kernel
  * mapping being 0xC0's and the hardware ignoring those two real bits.
  *
+ * This must be called with interrupts disabled.
+ *
+ * Taking the native_tlbie_lock is unsafe here due to the possibility of
+ * lockdep being on. On pre POWER5 hardware, not taking the lock could
+ * cause deadlock. POWER5 and newer not taking the lock is fine. This only
+ * gets called during boot before secondary CPUs have come up and during
+ * crashdump and all bets are off anyway.
+ *
  * TODO: add batching support when enabled.  remember, no dynamic memory here,
  * athough there is the control page available...
  */
 static void native_hpte_clear(void)
 {
unsigned long vpn = 0;
-   unsigned long slot, slots, flags;
+   unsigned long slot, slots;
struct hash_pte *hptep = htab_address;
unsigned long hpte_v;
unsigned long pteg_count;
@@ -596,13 +604,6 @@ static void native_hpte_clear(void)
 
pteg_count = htab_hash_mask + 1;
 
-   local_irq_save(flags);
-
-   /* we take the tlbie lock and hold it.  Some hardware will
-* deadlock if we try to tlbie from two processors at once.
-*/
-   raw_spin_lock(_tlbie_lock);
-
slots = pteg_count * HPTES_PER_GROUP;
 
for (slot = 0; slot < slots; slot++, hptep++) {
@@ -614,8 +615,8 @@ static void native_hpte_clear(void)
hpte_v = be64_to_cpu(hptep->v);
 
/*
-* Call __tlbie() here rather than tlbie() since we
-* already hold the native_tlbie_lock.
+* Call __tlbie() here rather than tlbie() since we can't take 
the
+* native_tlbie_lock.
 */
if (hpte_v & HPTE_V_VALID) {
hpte_decode(hptep, slot, , , , );
@@ -625,8 +626,6 @@ static void native_hpte_clear(void)
}
 
asm volatile("eieio; tlbsync; ptesync":::"memory");
-   raw_spin_unlock(_tlbie_lock);
-   local_irq_restore(flags);
 }
 
 /*
-- 
2.6.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 6/8] powerpc: Add the ability to save FPU without giving it up

2015-11-17 Thread Cyril Bur
This patch adds the ability to be able to save the FPU registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch optimises the thread copy path (as a result of a fork() or
clone()) so that the parent thread can return to userspace with hot
registers avoiding a possibly pointless reload of FPU register state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  2 +-
 arch/powerpc/kernel/fpu.S| 21 
 arch/powerpc/kernel/process.c| 46 +++-
 3 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 171ce13..8cf7fd6 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -28,7 +28,7 @@ extern void giveup_all(struct task_struct *);
 extern void enable_kernel_fp(void);
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
-extern void __giveup_fpu(struct task_struct *);
+extern void save_fpu(struct task_struct *);
 static inline void disable_kernel_fp(void)
 {
msr_check_and_clear(MSR_FP);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index b063524..15da2b5 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -143,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
blr
 
 /*
- * __giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
+ * save_fpu(tsk)
+ * Save the floating-point registers in its thread_struct.
  * Enables the FPU for use in the kernel on return.
  */
-_GLOBAL(__giveup_fpu)
+_GLOBAL(save_fpu)
addir3,r3,THREAD/* want THREAD of task */
PPC_LL  r6,THREAD_FPSAVEAREA(r3)
PPC_LL  r5,PT_REGS(r3)
PPC_LCMPI   0,r6,0
bne 2f
addir6,r3,THREAD_FPSTATE
-2: PPC_LCMPI   0,r5,0
-   SAVE_32FPVSRS(0, R4, R6)
+2: SAVE_32FPVSRS(0, R4, R6)
mffsfr0
stfdfr0,FPSTATE_FPSCR(r6)
-   beq 1f
-   PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-   li  r3,MSR_FP|MSR_FE0|MSR_FE1
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-   orisr3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-   andcr4,r4,r3/* disable FP for previous task */
-   PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
 
 /*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index c602b67..51e246a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits)
 EXPORT_SYMBOL(__msr_check_and_clear);
 
 #ifdef CONFIG_PPC_FPU
+void __giveup_fpu(struct task_struct *tsk)
+{
+   save_fpu(tsk);
+   tsk->thread.regs->msr &= ~MSR_FP;
+#ifdef CONFIG_VSX
+   if (cpu_has_feature(CPU_FTR_VSX))
+   tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
 void giveup_fpu(struct task_struct *tsk)
 {
check_if_tm_restore_required(tsk);
@@ -413,12 +423,46 @@ void restore_math(struct pt_regs *regs)
regs->msr = msr;
 }
 
+void save_all(struct task_struct *tsk)
+{
+   unsigned long usermsr;
+
+   if (!tsk->thread.regs)
+   return;
+
+   usermsr = tsk->thread.regs->msr;
+
+   if ((usermsr & msr_all_available) == 0)
+   return;
+
+   msr_check_and_set(msr_all_available);
+
+#ifdef CONFIG_PPC_FPU
+   if (usermsr & MSR_FP)
+   save_fpu(tsk);
+#endif
+#ifdef CONFIG_ALTIVEC
+   if (usermsr & MSR_VEC)
+   __giveup_altivec(tsk);
+#endif
+#ifdef CONFIG_VSX
+   if (usermsr & MSR_VSX)
+   __giveup_vsx(tsk);
+#endif
+#ifdef CONFIG_SPE
+   if (usermsr & MSR_SPE)
+   __giveup_spe(tsk);
+#endif
+
+   msr_check_and_clear(msr_all_available);
+}
+
 void flush_all_to_thread(struct task_struct *tsk)
 {
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
-   giveup_all(tsk);
+   save_all(tsk);
 
 #ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
-- 
2.6.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/8] powerpc: Explicitly disable math features when copying thread

2015-11-17 Thread Cyril Bur
With threads leaving the math bits enabled in their saved MSR to indicate
that the hardware is hot and a restore is not needed, children need to turn
it off as when they do get scheduled, there's no way their registers could
have been hot.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 398f7bf..441d9e5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1243,6 +1243,7 @@ int copy_thread(unsigned long clone_flags, unsigned long 
usp,
 
f = ret_from_fork;
}
+   childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
sp -= STACK_FRAME_OVERHEAD;
 
/*
-- 
2.6.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/8] selftests/powerpc: Test FPU and VMX regs in signal ucontext

2015-11-17 Thread Cyril Bur
Load up the non volatile FPU and VMX regs and ensure that they are the
expected value in a signal handler

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/math/Makefile |   4 +-
 tools/testing/selftests/powerpc/math/fpu_signal.c | 119 +
 tools/testing/selftests/powerpc/math/vmx_signal.c | 124 ++
 3 files changed, 246 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_signal.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_signal.c

diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
index 9fa690f..5ce000bf 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,4 +1,4 @@
-TEST_PROGS := fpu_syscall fpu_preempt vmx_syscall vmx_preempt
+TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt 
vmx_signal
 
 all: $(TEST_PROGS)
 
@@ -7,9 +7,11 @@ $(TEST_PROGS): CFLAGS += -O2 -g -pthread
 
 fpu_syscall: fpu_asm.S
 fpu_preempt: fpu_asm.S
+fpu_signal:  fpu_asm.S
 
 vmx_syscall: vmx_asm.S
 vmx_preempt: vmx_asm.S
+vmx_signal: vmx_asm.S
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c 
b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000..ca61d1f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,119 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+/* Number of times each thread should recieve the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+2.1};
+
+bool bad_context;
+int running;
+volatile int not_ready;
+extern long preempt_fpu(double *darray, volatile int *not_ready, int 
*sentinal);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+   int i;
+   ucontext_t *uc = context;
+   mcontext_t *mc = >uc_mcontext;
+
+   /* Only the non volatiles were loaded up */
+   for (i = 14; i < 32; i++) {
+   if (mc->fp_regs[i] != darray[i - 14]) {
+   bad_context = true;
+   break;
+   }
+   }
+}
+
+void *signal_fpu_c(void *p)
+{
+   int i;
+   long rc;
+   struct sigaction act;
+   act.sa_sigaction = signal_fpu_sig;
+   act.sa_flags = SA_SIGINFO;
+   rc = sigaction(SIGUSR1, , NULL);
+   if (rc)
+   return p;
+
+   srand(pthread_self());
+   for (i = 0; i < 21; i++)
+   darray[i] = rand();
+
+   rc = preempt_fpu(darray, _ready, );
+
+   return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+   int i, j, rc, threads;
+   void *rc_p;
+   pthread_t *tids;
+
+   threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+   tids = malloc(threads * sizeof(pthread_t));
+   FAIL_IF(!tids);
+
+   running = true;
+   not_ready = threads;
+   for (i = 0; i < threads; i++) {
+   rc = pthread_create([i], NULL, signal_fpu_c, NULL);
+   FAIL_IF(rc);
+   }
+
+   setbuf(stdout, NULL);
+   printf("\tWaiting for all workers to start...");
+   while (not_ready);
+   printf("done\n");
+
+   printf("\tSending signals to all threads %d times...", ITERATIONS);
+   for (i = 0; i < ITERATIONS; i++) {
+   for (j = 0; j < threads; j++) {
+   pthread_kill(tids[j], SIGUSR1);
+   }
+   sleep(1);
+   }
+   printf("done\n");
+
+   printf("\tKilling workers...");
+   running = 0;
+   for (i = 0; i < threads; i++) {
+   pthread_join(tids[i], _p);
+
+   /*
+* Harness will say the fail was here, look at why signal_fpu
+* returned
+*/
+   if ((long) rc_p || bad_context)
+   printf("oops\n");
+   if (bad_context)
+   fprintf(stderr, "\t!! bad_context is true\n");
+   FAIL_IF((long) rc_p || bad_context);
+   }
+   printf("done\n");
+
+   free(tids);
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(test_signal_fpu, "fpu_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c 
b/tools/testing/selftests/powerpc/math/vmx_signal.c
new file mode 100644
index 000..007ac9e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -0,0 +1,124 @@
+#include 
+#include 
+#

[PATCH 0/8] FP/VEC/VSX switching optimisations

2015-11-17 Thread Cyril Bur
Hi,

These patches are an extension of the work done by Anton
https://patchwork.ozlabs.org/patch/537621/, they'll need to be applied on
top of them.

The goal of these patches is to rework how the 'math' registers (FP, VEC
and VSX) are context switched. Currently the kernel adopts a lazy approach,
always switching userspace tasks with all three facilities disabled and
loads in each set of registers upon receiving each unavailable exception.
The kernel does try to avoid disabling the features in the syscall quick
path but it during testing it appears that even what should be a simple
syscall still causes the kernel to use some facilities (vectorised memcpy
for example) for its self and therefore disable it for the user task.

The lazy approach makes for a small amount of time spent restoring
userspace state and if tasks don't use any of these facilities it is the
correct thing to do. In recent years, new workloads and new features such
as auto vectorisation in GCC have meant that the use of these facilities by
userspace has increased, so much so that some workloads can have a task
take an FP unavailable exception and a VEC unavailable exception almost
every time slice.

This series removes the general laziness in favour of a more selective
approach. If a task uses any of the 'math' facilities the kernel will load
the registers and enable the facilities for future time slices as the
assumption is that the use is likely to continue for some time. This
removes the cost of having to take an exception.

These patches also adds logic to detect if a task had been using a facility
and optimises in the case where the registers are still hot, this provides
another speedup as not only is the cost of the exception saved but the cost
of copying up to 64 x 128 bit registers is also removed.

With these patches applied on top of Antons patches I observe a significant
improvement with Antons context switch microbenchmark using yield():

http://ozlabs.org/~anton/junkcode/context_switch2.c

Using an LE kernel compiled with pseries_le_defconfig

Running:
./context_switch2 --test=yield 8 8
and adding one of --fp, --altivec or --vector
Gives a 5% improvement on a POWER8 CPU.

./context_switch2 --test=yield --fp --altivec --vector 8 8
Gives a 15% improvement on a POWER8 CPU.

I'll take this opportunity to note that 15% can be somewhat misleading. It
may be reasonable to assume that each of the optimisations has had a
compounding effect, this isn't incorrect and the reason behind the apparent
compounding reveals a lot about where the current bottleneck is.

The tests always touch FP first, then VEC then VSX which is the guaranteed
worst case for the way the kernel currently operates. This behaviour will
trigger three subsequent unavailable exceptions. Since the kernel currently
enables all three facilities after taking a VSX unavailable the tests can
be modified to touch VSX->VEC->FP in this order the difference in
performance when touching all three only 5%. There is a compounding effect
in so far as the cost of taking multiple unavailable exception is removed.
This testing also demonstrates that the cost of the exception is by far the
most expensive part of the current lazy approach.

Cyril Bur (8):
  selftests/powerpc: Test the preservation of FPU and VMX regs across
syscall
  selftests/powerpc: Test preservation of FPU and VMX regs across
preemption
  selftests/powerpc: Test FPU and VMX regs in signal ucontext
  powerpc: Explicitly disable math features when copying thread
  powerpc: Restore FPU/VEC/VSX if previously used
  powerpc: Add the ability to save FPU without giving it up
  powerpc: Add the ability to save Altivec without giving it up
  powerpc: Add the ability to save VSX without giving it up

 arch/powerpc/include/asm/processor.h   |   2 +
 arch/powerpc/include/asm/switch_to.h   |   5 +-
 arch/powerpc/kernel/asm-offsets.c  |   2 +
 arch/powerpc/kernel/entry_64.S |  55 +-
 arch/powerpc/kernel/fpu.S  |  25 +--
 arch/powerpc/kernel/ppc_ksyms.c|   4 -
 arch/powerpc/kernel/process.c  | 144 --
 arch/powerpc/kernel/vector.S   |  45 +
 tools/testing/selftests/powerpc/Makefile   |   3 +-
 tools/testing/selftests/powerpc/math/Makefile  |  19 ++
 tools/testing/selftests/powerpc/math/basic_asm.h   |  26 +++
 tools/testing/selftests/powerpc/math/fpu_asm.S | 185 +
 tools/testing/selftests/powerpc/math/fpu_preempt.c |  92 +
 tools/testing/selftests/powerpc/math/fpu_signal.c  | 119 +++
 tools/testing/selftests/powerpc/math/fpu_syscall.c |  79 
 tools/testing/selftests/powerpc/math/vmx_asm.S | 219 +
 tools/testing/selftests/powerpc/math/vmx_preempt.c |  92 +
 tools/testing/selftests/powerpc/math/vmx_signal.c  | 124 
 tools/testing/selftests/powerp

[PATCH 2/8] selftests/powerpc: Test preservation of FPU and VMX regs across preemption

2015-11-17 Thread Cyril Bur
Loop in assembly checking the registers with many threads.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/math/Makefile  |  7 +-
 tools/testing/selftests/powerpc/math/fpu_asm.S | 34 
 tools/testing/selftests/powerpc/math/fpu_preempt.c | 92 ++
 tools/testing/selftests/powerpc/math/vmx_asm.S | 44 ++-
 tools/testing/selftests/powerpc/math/vmx_preempt.c | 92 ++
 5 files changed, 263 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_preempt.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_preempt.c

diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
index 896d9e2..9fa690f 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,12 +1,15 @@
-TEST_PROGS := fpu_syscall vmx_syscall
+TEST_PROGS := fpu_syscall fpu_preempt vmx_syscall vmx_preempt
 
 all: $(TEST_PROGS)
 
 $(TEST_PROGS): ../harness.c
-$(TEST_PROGS): CFLAGS += -O2 -g
+$(TEST_PROGS): CFLAGS += -O2 -g -pthread
 
 fpu_syscall: fpu_asm.S
+fpu_preempt: fpu_asm.S
+
 vmx_syscall: vmx_asm.S
+vmx_preempt: vmx_asm.S
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S 
b/tools/testing/selftests/powerpc/math/fpu_asm.S
index d5412c1..5ff0adc 100644
--- a/tools/testing/selftests/powerpc/math/fpu_asm.S
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -149,3 +149,37 @@ FUNC_START(test_fpu)
POP_BASIC_STACK(256)
blr
 FUNC_END(test_fpu)
+
+#int preempt_fpu(double *darray, volatile int *not_ready, int *sentinal)
+#On starting will (atomically) decrement not_ready as a signal that the FPU
+#has been loaded with darray. Will proceed to check the validity of the FPU
+#registers while sentinal is not zero.
+FUNC_START(preempt_fpu)
+   PUSH_BASIC_STACK(256)
+   std r3,32(sp) #double *darray
+   std r4,40(sp) #volatile int *not_ready
+   std r5,48(sp) #int *sentinal
+   PUSH_FPU(56)
+
+   bl load_fpu
+
+   #Atomic DEC
+   ld r3,40(sp)
+1: lwarx r4,0,r3
+   addi r4,r4,-1
+   stwcx. r4,0,r3
+   bne- 1b
+
+2: ld r3, 32(sp)
+   bl check_fpu
+   cmpdi r3,0
+   bne 3f
+   ld r4, 48(sp)
+   ld r5, 0(r4)
+   cmpwi r5,0
+   bne 2b
+
+3: POP_FPU(56)
+   POP_BASIC_STACK(256)
+   blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c 
b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000..e24cf9b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,92 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+2.1};
+
+volatile int not_ready;
+int running;
+
+extern void preempt_fpu(double *darray, volatile int *not_ready, int 
*sentinal);
+
+void *preempt_fpu_c(void *p)
+{
+   int i;
+   srand(pthread_self());
+   for (i = 0; i < 21; i++)
+   darray[i] = rand();
+
+   /* Test failed if it ever returns */
+   preempt_fpu(darray, _ready, );
+
+   return p;
+}
+
+int test_preempt_fpu(void)
+{
+   int i, rc, threads;
+   pthread_t *tids;
+
+   threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+   tids = malloc((threads) * sizeof(pthread_t));
+   FAIL_IF(!tids);
+
+   running = true;
+   not_ready = threads;
+   for (i = 0; i < threads; i++) {
+   rc = pthread_create([i], NULL, preempt_fpu_c, NULL);
+   FAIL_IF(rc);
+   }
+
+   setbuf(stdout, NULL);
+   /* Not really nessesary but nice to wait for every thread to start */
+   printf("\tWaiting for all workers to start...");
+   while(not_ready);
+   printf("done\n");
+
+   printf("\tWaiting for %d seconds to let some workers get preempted...", 
PREEMPT_TIME);
+   sleep(PREEMPT_TIME);
+   printf("done\n");
+
+   printf("\tKilling workers...");
+   running = 0;
+   for (i = 0; i < threads; i++) {
+   void *rc_p;
+   pthread_join(tids[i], _p);
+
+   /*
+* Harness will say the fail was here, look at why preempt_fpu
+* returned
+*/
+   if ((long) rc_p)
+   printf("oops\n");
+   FAIL_IF((long) rc_p);
+   }
+   printf("done\n");
+

[PATCH 1/8] selftests/powerpc: Test the preservation of FPU and VMX regs across syscall

2015-11-17 Thread Cyril Bur
Test that the non volatile floating point and Altivec registers get
correctly preserved across the fork() syscall.

fork() works nicely for this purpose, the registers should be the same for
both parent and child

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/Makefile   |   3 +-
 tools/testing/selftests/powerpc/math/Makefile  |  14 ++
 tools/testing/selftests/powerpc/math/basic_asm.h   |  26 +++
 tools/testing/selftests/powerpc/math/fpu_asm.S | 151 +
 tools/testing/selftests/powerpc/math/fpu_syscall.c |  79 +
 tools/testing/selftests/powerpc/math/vmx_asm.S | 183 +
 tools/testing/selftests/powerpc/math/vmx_syscall.c |  81 +
 7 files changed, 536 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/math/Makefile
 create mode 100644 tools/testing/selftests/powerpc/math/basic_asm.h
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_syscall.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_syscall.c

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 0c2706b..19e8191 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \
   switch_endian\
   syscalls \
   tm   \
-  vphn
+  vphn \
+  math
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000..896d9e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,14 @@
+TEST_PROGS := fpu_syscall vmx_syscall
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+$(TEST_PROGS): CFLAGS += -O2 -g
+
+fpu_syscall: fpu_asm.S
+vmx_syscall: vmx_asm.S
+
+include ../../lib.mk
+
+clean:
+   rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/math/basic_asm.h 
b/tools/testing/selftests/powerpc/math/basic_asm.h
new file mode 100644
index 000..27aca79
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/basic_asm.h
@@ -0,0 +1,26 @@
+#include 
+#include 
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+   lis reg,(expr)@highest; \
+   ori reg,reg,(expr)@higher;  \
+   rldicr  reg,reg,32,31;  \
+   orisreg,reg,(expr)@high;\
+   ori reg,reg,(expr)@l;
+
+#define PUSH_BASIC_STACK(size) \
+   std 2,24(sp); \
+   mflrr0; \
+   std r0,16(sp); \
+   mfcrr0; \
+   stw r0,8(sp); \
+   stdusp,-size(sp);
+
+#define POP_BASIC_STACK(size) \
+   addisp,sp,size; \
+   ld  2,24(sp); \
+   ld  r0,16(sp); \
+   mtlrr0; \
+   lwz r0,8(sp); \
+   mtcrr0; \
+
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S 
b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000..d5412c1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,151 @@
+#include "basic_asm.h"
+
+#define PUSH_FPU(pos) \
+   stfdf14,pos(sp); \
+   stfdf15,pos+8(sp); \
+   stfdf16,pos+16(sp); \
+   stfdf17,pos+24(sp); \
+   stfdf18,pos+32(sp); \
+   stfdf19,pos+40(sp); \
+   stfdf20,pos+48(sp); \
+   stfdf21,pos+56(sp); \
+   stfdf22,pos+64(sp); \
+   stfdf23,pos+72(sp); \
+   stfdf24,pos+80(sp); \
+   stfdf25,pos+88(sp); \
+   stfdf26,pos+96(sp); \
+   stfdf27,pos+104(sp); \
+   stfdf28,pos+112(sp); \
+   stfdf29,pos+120(sp); \
+   stfdf30,pos+128(sp); \
+   stfdf31,pos+136(sp);
+
+#define POP_FPU(pos) \
+   lfd f14,pos(sp); \
+   lfd f15,pos+8(sp); \
+   lfd f16,pos+16(sp); \
+   lfd f17,pos+24(sp); \
+   lfd f18,pos+32(sp); \
+   lfd f19,pos+40(sp); \
+   lfd f20,pos+48(sp); \
+   lfd f21,pos+56(sp); \
+   lfd f22,pos+64(sp); \
+   lfd f23,pos+72(sp); \
+   lfd f24,pos+80(sp); \
+   lfd f25,pos+88(sp); \
+   lfd f26,pos+96(sp); \
+   lfd f27,pos+104(sp); \
+   lfd f28,pos+112(sp); \
+   lfd f29,pos+120(sp); \
+   lfd f30,pos+128(sp); \
+   lfd f31,pos+136(sp);
+
+#Careful calling this, it will 'clobber' fpu (by design)
+#Don't call this from C
+FUNC_START(load_fpu)
+   lfd f14,0(r3)
+   lfd f15,8(r3)
+   lfd f16,16(r3)
+   lfd f17,24(r3)
+   lfd f18,32(r3)
+   lfd f19,40(r3)
+   lfd f20,48(r3)
+   lfd f21,56(r3)
+   lfd f22,64(r3)
+   lfd f23,72(r3)
+   lfd f24,80(r3)
+   l

[PATCH 8/8] powerpc: Add the ability to save VSX without giving it up

2015-11-17 Thread Cyril Bur
This patch adds the ability to be able to save the VSX registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch builds on a previous optimisation for the FPU and VEC registers
in the thread copy path to avoid a possibly pointless reload of VSX state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  1 -
 arch/powerpc/kernel/ppc_ksyms.c  |  4 
 arch/powerpc/kernel/process.c| 23 ++-
 arch/powerpc/kernel/vector.S | 17 -
 4 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 372f297..15843d3 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -50,7 +50,6 @@ static inline void disable_kernel_altivec(void)
 extern void enable_kernel_vsx(void);
 extern void flush_vsx_to_thread(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
-extern void __giveup_vsx(struct task_struct *);
 static inline void disable_kernel_vsx(void)
 {
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 41e1607..ef7024da 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state);
 EXPORT_SYMBOL(store_vr_state);
 #endif
 
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif
-
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 19e803a..e0bb3d7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -248,20 +248,33 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-void giveup_vsx(struct task_struct *tsk)
+void __giveup_vsx(struct task_struct *tsk)
 {
-   check_if_tm_restore_required(tsk);
-
-   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (tsk->thread.regs->msr & MSR_FP)
__giveup_fpu(tsk);
if (tsk->thread.regs->msr & MSR_VEC)
__giveup_altivec(tsk);
+   tsk->thread.regs->msr &= ~MSR_VSX;
+}
+
+void giveup_vsx(struct task_struct *tsk)
+{
+   check_if_tm_restore_required(tsk);
+
+   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
__giveup_vsx(tsk);
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
 EXPORT_SYMBOL(giveup_vsx);
 
+void save_vsx(struct task_struct *tsk)
+{
+   if (tsk->thread.regs->msr & MSR_FP)
+   save_fpu(tsk);
+   if (tsk->thread.regs->msr & MSR_VEC)
+   save_altivec(tsk);
+}
+
 void enable_kernel_vsx(void)
 {
WARN_ON(preemptible());
@@ -457,7 +470,7 @@ void save_all(struct task_struct *tsk)
 #endif
 #ifdef CONFIG_VSX
if (usermsr & MSR_VSX)
-   __giveup_vsx(tsk);
+   save_vsx(tsk);
 #endif
 #ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 51b0c17..1c2e7a3 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -151,23 +151,6 @@ _GLOBAL(load_up_vsx)
std r12,_MSR(r1)
b   fast_exception_return
 
-/*
- * __giveup_vsx(tsk)
- * Disable VSX for the task given as the argument.
- * Does NOT save vsx registers.
- */
-_GLOBAL(__giveup_vsx)
-   addir3,r3,THREAD/* want THREAD of task */
-   ld  r5,PT_REGS(r3)
-   cmpdi   0,r5,0
-   beq 1f
-   ld  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-   lis r3,MSR_VSX@h
-   andcr4,r4,r3/* disable VSX for previous task */
-   std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-   blr
-
 #endif /* CONFIG_VSX */
 
 
-- 
2.6.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/8] powerpc: Restore FPU/VEC/VSX if previously used

2015-11-17 Thread Cyril Bur
Currently the FPU, VEC and VSX facilities are lazily loaded. This is not a
problem unless a process is using these facilities.

Modern versions of GCC are very good at automatically vectorising code, new
and modernised workloads make use of floating point and vector facilities,
even the kernel makes use of vectorised memcpy.

All this combined greatly increases the cost of a syscall since the kernel
uses the facilities sometimes even in syscall fast-path making it
increasingly common for a thread to take an *_unavailable exception soon
after a syscall, not to mention potentially taking the trifecta.

The obvious overcompensation to this problem is to simply always load all
the facilities on every exit to userspace. Loading up all FPU, VEC and VSX
registers every time can be expensive and if a workload does avoid using
them, it should not be forced to incur this penalty.

An 8bit counter is used to detect if the registers have been used in the
past and the registers are always loaded until the value wraps to back to
zero.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/processor.h |  2 ++
 arch/powerpc/kernel/asm-offsets.c|  2 ++
 arch/powerpc/kernel/entry_64.S   | 55 --
 arch/powerpc/kernel/fpu.S|  4 +++
 arch/powerpc/kernel/process.c| 66 ++--
 arch/powerpc/kernel/vector.S |  4 +++
 6 files changed, 119 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index ac23308..dcab21f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -236,11 +236,13 @@ struct thread_struct {
 #endif
struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
unsigned long   trap_nr;/* last trap # on this thread */
+   u8 load_fp;
 #ifdef CONFIG_ALTIVEC
struct thread_vr_state vr_state;
struct thread_vr_state *vr_save_area;
unsigned long   vrsave;
int used_vr;/* set if process has used altivec */
+   u8 load_vec;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
/* VSR status */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 221d584..0f593d7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -95,12 +95,14 @@ int main(void)
DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
+   DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp));
 #ifdef CONFIG_ALTIVEC
DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
+   DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec));
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c8b4225..46e9869 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -210,7 +210,54 @@ system_call:   /* label this so stack 
traces look sane */
li  r11,-MAX_ERRNO
andi.   
r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne-syscall_exit_work
-   cmpld   r3,r11
+
+   /*
+* This is an assembly version of checks performed in restore_math()
+* to avoid calling C unless absolutely necessary.
+* Note: In order to simplify the assembly, if the FP or VEC registers
+* are hot (and therefore restore_math() isn't called) the
+* LOAD_{FP,VEC} thread counter doesn't get incremented.
+* This is likely the best thing to do anyway because hot regs indicate
+* that the workload is doing a lot of syscalls that can be handled
+* quickly and without the need to touch FP or VEC regs (by the kernel).
+* a) If this workload is long running then this is exactly what the
+* kernel should be doing.
+* b) If this workload isn't long running then we'll soon fall back to
+* calling into C and the counter will be incremented regularly again
+* anyway.
+*/
+   ld  r9,PACACURRENT(r13)
+   andi.   r0,r8,MSR_FP
+   addir9,r9,THREAD
+   lbz r5,THREAD_LOAD_FP(r9)
+   /*
+* Goto 2 if !r0 && r5
+* The cmpb works because r5 can only have bits set in the lowest byte
+* and

[PATCH 7/8] powerpc: Add the ability to save Altivec without giving it up

2015-11-17 Thread Cyril Bur
This patch adds the ability to be able to save the VEC registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch builds on a previous optimisation for the FPU registers in the
thread copy path to avoid a possibly pointless reload of VEC state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  2 +-
 arch/powerpc/kernel/process.c| 12 +++-
 arch/powerpc/kernel/vector.S | 24 
 3 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 8cf7fd6..372f297 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -39,7 +39,7 @@ static inline void disable_kernel_fp(void)
 extern void enable_kernel_altivec(void);
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
-extern void __giveup_altivec(struct task_struct *);
+extern void save_altivec(struct task_struct *);
 static inline void disable_kernel_altivec(void)
 {
msr_check_and_clear(MSR_VEC);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51e246a..19e803a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -198,6 +198,16 @@ void enable_kernel_fp(void)
 EXPORT_SYMBOL(enable_kernel_fp);
 
 #ifdef CONFIG_ALTIVEC
+void __giveup_altivec(struct task_struct *tsk)
+{
+   save_altivec(tsk);
+   tsk->thread.regs->msr &= ~MSR_VEC;
+#ifdef CONFIG_VSX
+   if (cpu_has_feature(CPU_FTR_VSX))
+   tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
 void giveup_altivec(struct task_struct *tsk)
 {
check_if_tm_restore_required(tsk);
@@ -443,7 +453,7 @@ void save_all(struct task_struct *tsk)
 #endif
 #ifdef CONFIG_ALTIVEC
if (usermsr & MSR_VEC)
-   __giveup_altivec(tsk);
+   save_altivec(tsk);
 #endif
 #ifdef CONFIG_VSX
if (usermsr & MSR_VSX)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 038cff8..51b0c17 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -106,36 +106,20 @@ _GLOBAL(load_up_altivec)
blr
 
 /*
- * __giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
  */
-_GLOBAL(__giveup_altivec)
+_GLOBAL(save_altivec)
addir3,r3,THREAD/* want THREAD of task */
PPC_LL  r7,THREAD_VRSAVEAREA(r3)
PPC_LL  r5,PT_REGS(r3)
PPC_LCMPI   0,r7,0
bne 2f
addir7,r3,THREAD_VRSTATE
-2: PPC_LCMPI   0,r5,0
-   SAVE_32VRS(0,r4,r7)
+2: SAVE_32VRS(0,r4,r7)
mfvscr  v0
li  r4,VRSTATE_VSCR
stvxv0,r4,r7
-   beq 1f
-   PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-   lis r3,(MSR_VEC|MSR_VSX)@h
-FTR_SECTION_ELSE
-   lis r3,MSR_VEC@h
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#else
-   lis r3,MSR_VEC@h
-#endif
-   andcr4,r4,r3/* disable FP for previous task */
-   PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
 
 #ifdef CONFIG_VSX
-- 
2.6.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc: Fix checkstop in native_hpte_clear() with lockdep

2015-09-30 Thread Cyril Bur
native_hpte_clear() is called in real mode from two places:
- Early in boot during htab initialisation if firmware assisted dump is
  active.
- Late in the kexec path.

In both contexts there is no need to disable interrupts are they are
already disabled. Furthermore, locking around the tlbie() is only required
for pre POWER5 hardware.

On POWER5 or newer hardware concurrent tlbie()s work as expected and on pre
POWER5 hardware concurrent tlbie()s could result in deadlock. This code
would only be executed during a crashdump during which all bets are off,
concurrent tlbie()s are unlikely and taking locks is unsafe therefore the
best course of action is to simply do nothing. Concurrent tlbie()s are not
possible in the first case as secondary CPUs have not come up yet.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/mm/hash_native_64.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 13befa35..c8822af 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -582,13 +582,21 @@ static void hpte_decode(struct hash_pte *hpte, unsigned 
long slot,
  * be when they isi), and we are the only one left.  We rely on our kernel
  * mapping being 0xC0's and the hardware ignoring those two real bits.
  *
+ * This must be called with interrupts disabled.
+ *
+ * Taking the native_tlbie_lock is unsafe here due to the possibility of
+ * lockdep being on. On pre POWER5 hardware, not taking the lock could
+ * cause deadlock. POWER5 and newer not taking the lock is fine. This only
+ * gets called during boot before secondary CPUs have come up and during
+ * crashdump and all bets are off anyway.
+ *
  * TODO: add batching support when enabled.  remember, no dynamic memory here,
  * athough there is the control page available...
  */
 static void native_hpte_clear(void)
 {
unsigned long vpn = 0;
-   unsigned long slot, slots, flags;
+   unsigned long slot, slots;
struct hash_pte *hptep = htab_address;
unsigned long hpte_v;
unsigned long pteg_count;
@@ -596,13 +604,6 @@ static void native_hpte_clear(void)
 
pteg_count = htab_hash_mask + 1;
 
-   local_irq_save(flags);
-
-   /* we take the tlbie lock and hold it.  Some hardware will
-* deadlock if we try to tlbie from two processors at once.
-*/
-   raw_spin_lock(_tlbie_lock);
-
slots = pteg_count * HPTES_PER_GROUP;
 
for (slot = 0; slot < slots; slot++, hptep++) {
@@ -614,8 +615,8 @@ static void native_hpte_clear(void)
hpte_v = be64_to_cpu(hptep->v);
 
/*
-* Call __tlbie() here rather than tlbie() since we
-* already hold the native_tlbie_lock.
+* Call __tlbie() here rather than tlbie() since we can't take 
the
+* native_tlbie_lock.
 */
if (hpte_v & HPTE_V_VALID) {
hpte_decode(hptep, slot, , , , );
@@ -625,8 +626,6 @@ static void native_hpte_clear(void)
}
 
asm volatile("eieio; tlbsync; ptesync":::"memory");
-   raw_spin_unlock(_tlbie_lock);
-   local_irq_restore(flags);
 }
 
 /*
-- 
2.6.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 0/8] FP/VEC/VSX switching optimisations

2015-11-18 Thread Cyril Bur
On Wed, 18 Nov 2015 14:51:25 +
David Laight <david.lai...@aculab.com> wrote:

> From: Cyril Bur
> > Sent: 18 November 2015 03:27  
> ...
> > The goal of these patches is to rework how the 'math' registers (FP, VEC
> > and VSX) are context switched. Currently the kernel adopts a lazy approach,
> > always switching userspace tasks with all three facilities disabled and
> > loads in each set of registers upon receiving each unavailable exception.
> > The kernel does try to avoid disabling the features in the syscall quick
> > path but it during testing it appears that even what should be a simple
> > syscall still causes the kernel to use some facilities (vectorised memcpy
> > for example) for its self and therefore disable it for the user task.  
> 

Hi David,

> Perhaps the kernel should be avoiding using these registers?
> I wonder if the gain from using vectorised memcpy is typically
> enough to warrant the cost of the save and restore?
> 

Yeah, on smaller copies that might be the way to go.

> There may even be scope for kernel code doing a save/restore
> of a small number of registers onto an in-stack save area.

This has been thrown up in the air, there's also the volatile/non-volatiles to
consider and the caveat that glibc doesn't quite respect the ABI here.

As it turns out (and no one is more surprised than me), despite the other
attempts at optimising, this series really has boiled down to removing the need
for processes to take the facility unavailable interrupts.

I do plan to carry on with optimising in this area and will have a look to see
what I can do.

Cyril

> It would need to be linked to the data of the thread
> that owns the fpu registers so that a save request could
> be honoured.
> Pre-emption would probably need to be disabled, but nested
> use, and use from ISR should be ok.
> 
>   David
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/8] selftests/powerpc: Test the preservation of FPU and VMX regs across syscall

2015-11-22 Thread Cyril Bur
On Mon, 23 Nov 2015 11:23:13 +1100
Michael Neuling <mi...@neuling.org> wrote:

> On Wed, 2015-11-18 at 14:26 +1100, Cyril Bur wrote:
> > Test that the non volatile floating point and Altivec registers get
> > correctly preserved across the fork() syscall.  
> 
> Can we add a test for VSX too?  I realise it's the same registers, but
> the enable bits in the MSR are different so it's easy to get them wrong
> in the kernel.

Yeah, I'm sure I could get that wrong haha.

H this got me thinking. Today we always enable FP and Altivec when we
enable VSX but isn't there a world where we could actually run with FP and
Altivec disabled and VSX on? In which case, is the whole thing volatile or
does the kernel still need to save the subset of the matrix which corresponds 
to non-volatile FPs and non-volatile Altivec?
> 
> Additional comments below.
> 
> > fork() works nicely for this purpose, the registers should be the same for
> > both parent and child
> > 
> > Signed-off-by: Cyril Bur <cyril...@gmail.com>
> > ---
> >  tools/testing/selftests/powerpc/Makefile   |   3 +-
> >  tools/testing/selftests/powerpc/math/Makefile  |  14 ++
> >  tools/testing/selftests/powerpc/math/basic_asm.h   |  26 +++
> >  tools/testing/selftests/powerpc/math/fpu_asm.S | 151 +
> >  tools/testing/selftests/powerpc/math/fpu_syscall.c |  79 +
> >  tools/testing/selftests/powerpc/math/vmx_asm.S | 183 
> > +
> >  tools/testing/selftests/powerpc/math/vmx_syscall.c |  81 +
> >  7 files changed, 536 insertions(+), 1 deletion(-)
> >  create mode 100644 tools/testing/selftests/powerpc/math/Makefile
> >  create mode 100644 tools/testing/selftests/powerpc/math/basic_asm.h
> >  create mode 100644 tools/testing/selftests/powerpc/math/fpu_asm.S
> >  create mode 100644 tools/testing/selftests/powerpc/math/fpu_syscall.c
> >  create mode 100644 tools/testing/selftests/powerpc/math/vmx_asm.S
> >  create mode 100644 tools/testing/selftests/powerpc/math/vmx_syscall.c
> > 
> > diff --git a/tools/testing/selftests/powerpc/Makefile 
> > b/tools/testing/selftests/powerpc/Makefile
> > index 0c2706b..19e8191 100644
> > --- a/tools/testing/selftests/powerpc/Makefile
> > +++ b/tools/testing/selftests/powerpc/Makefile
> > @@ -22,7 +22,8 @@ SUB_DIRS = benchmarks >   >   > \  
> >  >  >switch_endian> > \  
> >  >  >syscalls>  >   > \  
> >  >  >tm>>   >   > \  
> > ->  >vphn
> > +>  >vphn \
> > +>  >math  
> >  
> >  endif
> >  
> > diff --git a/tools/testing/selftests/powerpc/math/Makefile 
> > b/tools/testing/selftests/powerpc/math/Makefile
> > new file mode 100644
> > index 000..896d9e2
> > --- /dev/null
> > +++ b/tools/testing/selftests/powerpc/math/Makefile
> > @@ -0,0 +1,14 @@
> > +TEST_PROGS := fpu_syscall vmx_syscall  
> 
> 
> Add a new .gitignore in this dirfor these new build objects.
> 

Yep

> > +
> > +all: $(TEST_PROGS)
> > +
> > +$(TEST_PROGS): ../harness.c
> > +$(TEST_PROGS): CFLAGS += -O2 -g
> > +
> > +fpu_syscall: fpu_asm.S
> > +vmx_syscall: vmx_asm.S
> > +
> > +include ../../lib.mk
> > +
> > +clean:  
> > +>  > rm -f $(TEST_PROGS) *.o  
> > diff --git a/tools/testing/selftests/powerpc/math/basic_asm.h 
> > b/tools/testing/selftests/powerpc/math/basic_asm.h
> > new file mode 100644
> > index 000..27aca79
> > --- /dev/null
> > +++ b/tools/testing/selftests/powerpc/math/basic_asm.h  
> 
> Can you put this up a directory since it's generically useful for
> powerpc?
> 

Sure why not.

> > @@ -0,0 +1,26 @@
> > +#include 
> > +#include 
> > +
> > +#define LOAD_REG_IMMEDIATE(reg,expr) \  
> > +>  > lis>  > reg,(expr)@highest;>  > \
> > +>  > ori>  > reg,reg,(expr)@higher;>   > \
> > +>  > rldicr>   > reg,reg,32,31;>   > \
> > +>  > oris> > reg,reg,(expr)@high;> > \  
> > +>  > ori>  > reg,reg,(expr)@l;  
> > +
> > +#define PUSH_BASIC_STACK(size) \  
> > +>  > std>  > 2,24(sp); \
> > +>  > mflr> > r0; \
> > +>  > std>  > r0,16(sp); \
> > +>  > mfcr> > r0; \
> > +>  > stw>  > r0,8(sp); \
> > +>  > stdu> > sp,-size(sp);  
> > +
> > +#define POP_BASIC_STACK(size) \  
> 

Re: [PATCH 4/8] powerpc: Explicitly disable math features when copying thread

2015-11-22 Thread Cyril Bur
On Mon, 23 Nov 2015 12:08:38 +1100
Michael Neuling <mi...@neuling.org> wrote:

> On Wed, 2015-11-18 at 14:26 +1100, Cyril Bur wrote:
> > With threads leaving the math bits enabled in their saved MSR to
> > indicate
> > that the hardware is hot and a restore is not needed, children need
> > to turn
> > it off as when they do get scheduled, there's no way their registers
> > could
> > have been hot.
> > 
> > Signed-off-by: Cyril Bur <cyril...@gmail.com>
> > ---
> >  arch/powerpc/kernel/process.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/arch/powerpc/kernel/process.c
> > b/arch/powerpc/kernel/process.c
> > index 398f7bf..441d9e5 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -1243,6 +1243,7 @@ int copy_thread(unsigned long clone_flags,
> > unsigned long usp,
> >  
> > f = ret_from_fork;
> > }
> > +   childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);  
> 
> Is this a current bug? 
> 

It is impossible currently because saving the registers (of the parent, before
the creating the child) also forces a giveup of the facilities.

The next patch in the series decouples the saving and the giving up which
makes this situation possible.

> Mikey
> 
> > sp -= STACK_FRAME_OVERHEAD;
> >  
> > /*  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 5/8] powerpc: Restore FPU/VEC/VSX if previously used

2015-11-22 Thread Cyril Bur
On Fri, 20 Nov 2015 22:01:04 +1100
Michael Ellerman <m...@ellerman.id.au> wrote:

> On Wed, 2015-11-18 at 14:26 +1100, Cyril Bur wrote:
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index c8b4225..46e9869 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -210,7 +210,54 @@ system_call:   /* label this so stack 
> > traces look sane */
> > li  r11,-MAX_ERRNO
> > andi.   
> > r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
> > bne-syscall_exit_work
> > -   cmpld   r3,r11
> > +
> > +   /*
> > +* This is an assembly version of checks performed in restore_math()
> > +* to avoid calling C unless absolutely necessary.
> > +* Note: In order to simplify the assembly, if the FP or VEC registers
> > +* are hot (and therefore restore_math() isn't called) the
> > +* LOAD_{FP,VEC} thread counter doesn't get incremented.
> > +* This is likely the best thing to do anyway because hot regs indicate
> > +* that the workload is doing a lot of syscalls that can be handled
> > +* quickly and without the need to touch FP or VEC regs (by the kernel).
> > +* a) If this workload is long running then this is exactly what the
> > +* kernel should be doing.
> > +* b) If this workload isn't long running then we'll soon fall back to
> > +* calling into C and the counter will be incremented regularly again
> > +* anyway.
> > +*/
> > +   ld  r9,PACACURRENT(r13)
> > +   andi.   r0,r8,MSR_FP
> > +   addir9,r9,THREAD
> > +   lbz r5,THREAD_LOAD_FP(r9)
> > +   /*
> > +* Goto 2 if !r0 && r5
> > +* The cmpb works because r5 can only have bits set in the lowest byte
> > +* and r0 may or may not have bit 13 set (different byte) but will have
> > +* a zero low byte therefore the low bytes must differ if r5 == true
> > +* and the bit 13 byte must be the same if !r0
> > +*/
> > +   cmpbr7,r0,r5  
> 
> cmpb is new since Power6, which means it doesn't exist on Cell -> Program 
> Check :)
> 
Oops, sorry.

> I'm testing a patch using crandc, but I don't like it.
> 
> I'm not a big fan of the logic here, it's unpleasantly complicated. Did you
> benchmark going to C to do the checks? Or I wonder if we could just check
> THREAD_LOAD_FP || THREAD_LOAD_VEC and if either is set we go to 
> restore_math().
> 

I didn't benchmark going to C mostly because you wanted to avoid calling C
unless necessary in that path. Based off the results I got benchmarking the
this series I expect calling C will also be in the noise of removing the
exception.

> Or on the other hand we check !MSR_FP && !MSR_VEC and if so we go to
> restore_math()?
> 

That seems like the best check to leave in the assembly if you want to avoid
complicated assembly in there.

> > +   cmpldi  r7,0xff0
> > +#ifdef CONFIG_ALTIVEC
> > +   beq 2f
> > +
> > +   lbz r9,THREAD_LOAD_VEC(r9)
> > +   andis.  r0,r8,MSR_VEC@h
> > +   /* Skip (goto 3) if r0 || !r9 */
> > +   bne 3f
> > +   cmpldi  r9,0
> > +   beq 3f
> > +#else
> > +   bne 3f
> > +#endif
> > +2: addir3,r1,STACK_FRAME_OVERHEAD
> > +   bl  restore_math
> > +   ld  r8,_MSR(r1)
> > +   ld  r3,RESULT(r1)
> > +   li  r11,-MAX_ERRNO
> > +
> > +3: cmpld   r3,r11
> > ld  r5,_CCR(r1)
> > bge-syscall_error
> >  .Lsyscall_error_cont:  
> 
> 
> cheers
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V10 00/28] Add new powerpc specific ELF core notes

2016-06-02 Thread Cyril Bur
On 1 June 2016 at 18:26, Anshuman Khandual 
wrote:

> On 05/31/2016 04:42 AM, Michael Ellerman wrote:
> > Hi Laurent,
> >
> > Sorry no. My next branch closed for 4.7 about 3 weeks ago.
> >
> > This series has been blocked for a long time on the gdb support, but
> that is
> > now working. However it still doesn't pass its own selftests, and I had
> some
>
> This series was clearing all of the selftests at the time it was posted.
> But yes, it has some assumptions from timing and sync perspective which
> gets broken some times as the kernel changes. Its been bit difficult to
> perfect the sync requirements as we can do only some much inside the
> transaction once it gets started. There are scopes here to improve these
> selftests but not clearing them today does not really mean the patches are
> now functionally broken.
>
> > disagreements with the implementation - it duplicates a lot of code
> rather
> > than refactoring things.
>
> hmm, sorry, I dont remember the context here. Can you please point to the
> discussion in this regard ?
>
> >
> > I'm waiting on a patch from Cyril which will rework how the TM FP state
> is
> > handled, and that should make this series easier to implement.
>
> Can you please elaborate on this ? Has this patch been posted in the
> mailing
> list ? How does this make it easier for us to implement these ELF notes ?


Hi Anshuman,

I'm doing a bit of a rewrite of the TM handling of the FP/VMX/VSX state.

At the moment is is rather confusing since pt_regs is the always the 'live'
state
and theres a ckpt_regs that is the pt_regs for the checkpointed state.
FPU/VMX/VSX
is done differently which is really only creating confusion so I'm changing
it to do the
same at for pt_regs/ckpt_regs. Ultimately this is part of more work from me
but
Michael has told me that at least this bit is useful now so I'm splitting
it off from
the bigger picture and sending asap. At the very least it will make it
easier to know
what and where the transactional state it and where the checkpointed state
is.

It isn't on the list but I hope I'll get it out today.

Cyril


> >
> > The plan is that both should go into 4.8.
>
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>



-- 
Cyril
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/5] selftests/powerpc: Add test to check TM ucontext creation

2016-06-07 Thread Cyril Bur
Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/basic_asm.h|   4 +
 tools/testing/selftests/powerpc/fpu_asm.h  |  72 
 tools/testing/selftests/powerpc/gpr_asm.h  |  96 
 tools/testing/selftests/powerpc/math/fpu_asm.S |  73 +---
 tools/testing/selftests/powerpc/math/vmx_asm.S |  85 +-
 tools/testing/selftests/powerpc/tm/Makefile|   9 +-
 .../powerpc/tm/tm-signal-context-chk-fpu.c |  94 +++
 .../powerpc/tm/tm-signal-context-chk-gpr.c |  96 
 .../powerpc/tm/tm-signal-context-chk-vmx.c | 112 ++
 .../powerpc/tm/tm-signal-context-chk-vsx.c | 127 +
 .../selftests/powerpc/tm/tm-signal-context-chk.c   | 102 +
 tools/testing/selftests/powerpc/tm/tm-signal.S | 105 +
 tools/testing/selftests/powerpc/vmx_asm.h  |  98 
 13 files changed, 920 insertions(+), 153 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/fpu_asm.h
 create mode 100644 tools/testing/selftests/powerpc/gpr_asm.h
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal-context-chk.c
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal.S
 create mode 100644 tools/testing/selftests/powerpc/vmx_asm.h

diff --git a/tools/testing/selftests/powerpc/basic_asm.h 
b/tools/testing/selftests/powerpc/basic_asm.h
index 3349a07..5131059 100644
--- a/tools/testing/selftests/powerpc/basic_asm.h
+++ b/tools/testing/selftests/powerpc/basic_asm.h
@@ -4,6 +4,10 @@
 #include 
 #include 
 
+#define TBEGIN .long 0x7C00051D
+#define TSUSPEND .long 0x7C0005DD
+#define TRESUME .long 0x7C2005DD
+
 #define LOAD_REG_IMMEDIATE(reg,expr) \
lis reg,(expr)@highest; \
ori reg,reg,(expr)@higher;  \
diff --git a/tools/testing/selftests/powerpc/fpu_asm.h 
b/tools/testing/selftests/powerpc/fpu_asm.h
new file mode 100644
index 000..a73a7a9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/fpu_asm.h
@@ -0,0 +1,72 @@
+#ifndef _SELFTESTS_POWERPC_FPU_ASM_H
+#define _SELFTESTS_POWERPC_FPU_ASM_H
+#include "basic_asm.h"
+
+#define PUSH_FPU(stack_size) \
+   stfdf31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+   stfdf30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+   stfdf29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+   stfdf28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+   stfdf27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+   stfdf26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+   stfdf25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+   stfdf24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+   stfdf23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+   stfdf22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+   stfdf21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+   stfdf20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+   stfdf19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+   stfdf18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+   stfdf17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+   stfdf16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+   stfdf15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+   stfdf14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+#define POP_FPU(stack_size) \
+   lfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+   lfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+   lfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+   lfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+   lfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+   lfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+   lfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+   lfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+   lfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+   lfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+   lfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+   lfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+   lfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+   lfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+   lfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+   lfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+   lfd

[PATCH 3/5] powerpc: tm: Always use fp_state and vr_state to store live registers

2016-06-07 Thread Cyril Bur
There is currently an inconsistency as to how the entire CPU register
state is saved and restored when a thread uses transactional memory
(TM).

Using transactional memory results in the CPU having duplicated
(almost all) of its register state. This duplication results in a set
of registers which can be considered 'live', those being currently
modified by the instructions being executed and another set that is
frozen at a point in time.

On context switch, both sets of state have to be saved and (later)
restored. These two states are often called a variety of different
things. Common terms for the state which only exists after has entered
a transaction (performed a TBEGIN instruction) in hardware is the
'transactional' or 'speculative'.

Between a TBEGIN and a TEND or TABORT (or an event that causes the
hardware to abort), regardless of the use of TSUSPEND the
transactional state can be referred to as the live state.

The second state is often to referred to as the 'checkpointed' state
and is a duplication of the live state when the TBEGIN instruction is
executed. This state is kept in the hardware and will be rolled back
to on transaction failure.

Currently all the registers stored in pt_regs are ALWAYS the live
registers, that is, when a thread has transactional registers their
values are stored in pt_regs and the checkpointed state is in
ckpt_regs. A strange opposite is true for fp_state. When a thread is
non transactional fp_state holds the live registers. When a thread has
initiated a transaction fp_state holds the checkpointed state and
transact_fp becomes the structure which holds the live state (at this
point it is a transactional state). The same is true for vr_state

This method creates confusion as to where the live state is, in some
circumstances it requires extra work to determine where to put the
live state and prevents the use of common functions designed (probably
before TM) to save the live state.

With this patch pt_regs, fp_state and vr_state all represent the same
thing and the other structures [pending rename] are for checkpointed
state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/kernel/process.c   | 44 +--
 arch/powerpc/kernel/signal_32.c | 50 ++
 arch/powerpc/kernel/signal_64.c | 53 +++
 arch/powerpc/kernel/tm.S| 95 ++---
 arch/powerpc/kernel/traps.c | 12 --
 5 files changed, 116 insertions(+), 138 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ea8a28f..696e0236 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -763,24 +763,12 @@ static void tm_reclaim_thread(struct thread_struct *thr,
 {
unsigned long msr_diff = 0;
 
-   /*
-* If FP/VSX registers have been already saved to the
-* thread_struct, move them to the transact_fp array.
-* We clear the TIF_RESTORE_TM bit since after the reclaim
-* the thread will no longer be transactional.
-*/
if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
-   msr_diff = thr->ckpt_regs.msr & ~thr->regs->msr;
-   if (msr_diff & MSR_FP)
-   memcpy(>transact_fp, >fp_state,
-  sizeof(struct thread_fp_state));
-   if (msr_diff & MSR_VEC)
-   memcpy(>transact_vr, >vr_state,
-  sizeof(struct thread_vr_state));
+   msr_diff = (thr->ckpt_regs.msr & ~thr->regs->msr)
+   & (MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1);
+
clear_ti_thread_flag(ti, TIF_RESTORE_TM);
-   msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
}
-
/*
 * Use the current MSR TM suspended bit to track if we have
 * checkpointed state outstanding.
@@ -799,6 +787,8 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if (!MSR_TM_SUSPENDED(mfmsr()))
return;
 
+   save_all(container_of(thr, struct task_struct, thread));
+
tm_reclaim(thr, thr->regs->msr, cause);
 
/* Having done the reclaim, we now have the checkpointed
@@ -901,7 +891,7 @@ static inline void tm_recheckpoint_new_task(struct 
task_struct *new)
 * If the task was using FP, we non-lazily reload both the original and
 * the speculative FP register states.  This is because the kernel
 * doesn't see if/when a TM rollback occurs, so if we take an FP
-* unavoidable later, we are unable to determine which set of FP regs
+* unavailable later, we are unable to determine which set of FP regs
 * need to be restored.
 */
if (!new->thread.regs)
@@ -917,24 +907,10 @@ static inline void tm_recheckpoint_new_task(struct 
task_struct *new)
 "(new->m

[PATCH 1/5] selftests/powerpc: Check for VSX preservation across userspace preemption

2016-06-07 Thread Cyril Bur
Ensure the kernel correctly switches VSX registers correctly. VSX
registers are all volatile, and despite the kernel preserving VSX
across syscalls, it doesn't have to. Test that during interrupts and
timeslices ending the VSX regs remain the same.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/math/Makefile  |   4 +-
 tools/testing/selftests/powerpc/math/vsx_asm.S |  57 +
 tools/testing/selftests/powerpc/math/vsx_preempt.c | 140 +
 tools/testing/selftests/powerpc/vsx_asm.h  |  71 +++
 4 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/math/vsx_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/vsx_preempt.c
 create mode 100644 tools/testing/selftests/powerpc/vsx_asm.h

diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
index 5b88875..aa6598b 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,4 +1,4 @@
-TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt 
vmx_signal
+TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt 
vmx_signal vsx_preempt
 
 all: $(TEST_PROGS)
 
@@ -13,6 +13,8 @@ vmx_syscall: vmx_asm.S
 vmx_preempt: vmx_asm.S
 vmx_signal: vmx_asm.S
 
+vsx_preempt: vsx_asm.S
+
 include ../../lib.mk
 
 clean:
diff --git a/tools/testing/selftests/powerpc/math/vsx_asm.S 
b/tools/testing/selftests/powerpc/math/vsx_asm.S
new file mode 100644
index 000..4ceaf37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_asm.S
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../basic_asm.h"
+#include "../vsx_asm.h"
+
+FUNC_START(check_vsx)
+   PUSH_BASIC_STACK(32)
+   std r3,STACK_FRAME_PARAM(0)(sp)
+   addi r3, r3, 16 * 12 #Second half of array
+   bl store_vsx
+   ld r3,STACK_FRAME_PARAM(0)(sp)
+   bl vsx_memcmp
+   POP_BASIC_STACK(32)
+   blr
+FUNC_END(check_vsx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
+# On starting will (atomically) decrement threads_starting as a signal that
+# the VMX have been loaded with varray. Will proceed to check the validity of
+# the VMX registers while running is not zero.
+FUNC_START(preempt_vsx)
+   PUSH_BASIC_STACK(512)
+   std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+   std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+   std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+   bl load_vsx
+   nop
+
+   sync
+   # Atomic DEC
+   ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+   addi r4,r4,-1
+   stwcx. r4,0,r3
+   bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+   bl check_vsx
+   nop
+   cmpdi r3,0
+   bne 3f
+   ld r4,STACK_FRAME_PARAM(2)(sp)
+   ld r5,0(r4)
+   cmpwi r5,0
+   bne 2b
+
+3: POP_BASIC_STACK(512)
+   blr
+FUNC_END(preempt_vsx)
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c 
b/tools/testing/selftests/powerpc/math/vsx_preempt.c
new file mode 100644
index 000..706dbaa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VSX registers change across preemption.
+ * There is no way to be sure preemption happened so this test just
+ * uses many threads and a long wait. As such, a successful test
+ * doesn't mean much but a failure is bad.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[24] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+   {13,14,15,16},{17,18,19,20},{21,22,23,24},
+   {25,26,27,28},{29,30,31,32},{33,34,35,36},
+   {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+int threads_starting;
+int running;
+
+extern long preempt_vsx(vector int *varray, int *threads_starting, int 
*running);
+
+long vsx_memcmp(vector int *a) {
+   vector int zero = {0,0,0,0};
+   int i;
+
+   FAIL_IF(a != var

[PATCH 4/5] powerpc: tm: Rename transct_(*) to ck(\1)_state

2016-06-07 Thread Cyril Bur
Make the structures being used for checkpointed state named
consistently with the pt_regs/ckpt_regs.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/processor.h | 20 +++-
 arch/powerpc/kernel/asm-offsets.c| 12 
 arch/powerpc/kernel/fpu.S|  2 +-
 arch/powerpc/kernel/process.c|  4 +--
 arch/powerpc/kernel/signal.h |  8 ++---
 arch/powerpc/kernel/signal_32.c  | 60 ++--
 arch/powerpc/kernel/signal_64.c  | 32 +--
 arch/powerpc/kernel/tm.S | 12 
 arch/powerpc/kernel/vector.S |  4 +--
 9 files changed, 71 insertions(+), 83 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 009fab1..6fd0f00 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -147,7 +147,7 @@ typedef struct {
 } mm_segment_t;
 
 #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
-#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
+#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
 
 /* FP and VSX 0-31 register set */
 struct thread_fp_state {
@@ -266,21 +266,9 @@ struct thread_struct {
unsigned long   tm_ppr;
unsigned long   tm_dscr;
 
-   /*
-* Transactional FP and VSX 0-31 register set.
-* NOTE: the sense of these is the opposite of the integer ckpt_regs!
-*
-* When a transaction is active/signalled/scheduled etc., *regs is the
-* most recent set of/speculated GPRs with ckpt_regs being the older
-* checkpointed regs to which we roll back if transaction aborts.
-*
-* However, fpr[] is the checkpointed 'base state' of FP regs, and
-* transact_fpr[] is the new set of transactional values.
-* VRs work the same way.
-*/
-   struct thread_fp_state transact_fp;
-   struct thread_vr_state transact_vr;
-   unsigned long   transact_vrsave;
+   struct thread_fp_state ckfp_state; /* Checkpointed FP state */
+   struct thread_vr_state ckvr_state; /* Checkpointed VR state */
+   unsigned long   ckvrsave; /* Checkpointed VRSAVE */
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void*   kvm_shadow_vcpu; /* KVM internal data */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 9ea0955..e67741f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -152,12 +152,12 @@ int main(void)
DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
-   DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
-transact_vr));
-   DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
-   transact_vrsave));
-   DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
-transact_fp));
+   DEFINE(THREAD_CKVRSTATE, offsetof(struct thread_struct,
+ckvr_state));
+   DEFINE(THREAD_CKVRSAVE, offsetof(struct thread_struct,
+   ckvrsave));
+   DEFINE(THREAD_CKFPSTATE, offsetof(struct thread_struct,
+ckfp_state));
/* Local pt_regs on stack for Transactional Memory funcs. */
DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
   sizeof(struct pt_regs) + 16);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 15da2b5..181c187 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -68,7 +68,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
SYNC
MTMSRD(r5)
 
-   addir7,r3,THREAD_TRANSACT_FPSTATE
+   addir7,r3,THREAD_CKFPSTATE
lfd fr0,FPSTATE_FPSCR(r7)
MTFSF_L(fr0)
REST_32FPVSRS(0, R4, R7)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 696e0236..15462c9 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -813,8 +813,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
 *
 * In switching we need to maintain a 2nd register state as
 * oldtask->thread.ckpt_regs.  We tm_reclaim(oldproc); this saves the
-* checkpointed (tbegin) state in ckpt_regs and saves the transactional
-* (current) FPRs into oldtask->thread.transact_fpr[].
+* checkpointed (tbegin) state in ckpt_regs, ckfp_state and
+* ckvr_state
 *
 * We also context switch (save) TFHAR/TEXASR/TFIAR in here.
 */
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
i

[PATCH 0/5] Consistent TM structures

2016-06-07 Thread Cyril Bur
Hi,

The reason for this series is outlined in 3/5. I'll reexplain here
quickly.

If userspace doesn't use TM at all then pt_regs, fp_state and vr_state
hold (almost) all the register state of the CPU.

If userspace uses TM then pt_regs is ALWAYS the live state. This may
be a transactional speculative state or if the thread is between
transactions it is just the regular live state. The checkpointed state
(if needed) always exists in ckpt_regs.
This is not true of fp_state and vr_state which MAY hold a live state
when the thread has not entered a transaction but will then contain
checkpointed values once a thread enters a transaction.
transact_fp and transact_vr are used only when a thread is in a
transaction (active or suspended) to keep the live (but speculative)
state.

Here I aim to remove this disconnect and have everything behave like
pt_regs.

For ease of review I've left patches 3, 4 and 5 separate. It probably
makes sense for them to be squashed into one, the naming inconsistency
between 3 and 4 can't be a good idea.

A few apologies for this series:
 - I had to write tests to have an idea what I've done is correct,
they're still a bit rough around the edges.
 - In the process I made more the asm helpers shared as the powerpc/math
selftests had quite a few things I found useful.
 - This pretty much means the 2/5 monster should be a few patches. I'll
split them up.

I didn't want this series held up from initial review while I cleaned
up tests.

Thanks,

Cyril

Cyril Bur (5):
  selftests/powerpc: Check for VSX preservation across userspace
preemption
  selftests/powerpc: Add test to check TM ucontext creation
  powerpc: tm: Always use fp_state and vr_state to store live registers
  powerpc: tm: Rename transct_(*) to ck(\1)_state
  powerpc: Remove do_load_up_transact_{fpu,altivec}

 arch/powerpc/include/asm/processor.h   |  20 +--
 arch/powerpc/include/asm/tm.h  |   5 -
 arch/powerpc/kernel/asm-offsets.c  |  12 +-
 arch/powerpc/kernel/fpu.S  |  26 
 arch/powerpc/kernel/process.c  |  48 ++-
 arch/powerpc/kernel/signal.h   |   8 +-
 arch/powerpc/kernel/signal_32.c|  84 ++---
 arch/powerpc/kernel/signal_64.c|  59 -
 arch/powerpc/kernel/tm.S   |  95 +++---
 arch/powerpc/kernel/traps.c|  12 +-
 arch/powerpc/kernel/vector.S   |  25 
 tools/testing/selftests/powerpc/basic_asm.h|   4 +
 tools/testing/selftests/powerpc/fpu_asm.h  |  72 +++
 tools/testing/selftests/powerpc/gpr_asm.h  |  96 ++
 tools/testing/selftests/powerpc/math/Makefile  |   4 +-
 tools/testing/selftests/powerpc/math/fpu_asm.S |  73 +--
 tools/testing/selftests/powerpc/math/vmx_asm.S |  85 +
 tools/testing/selftests/powerpc/math/vsx_asm.S |  57 +
 tools/testing/selftests/powerpc/math/vsx_preempt.c | 140 +
 tools/testing/selftests/powerpc/tm/Makefile|   9 +-
 .../powerpc/tm/tm-signal-context-chk-fpu.c |  94 ++
 .../powerpc/tm/tm-signal-context-chk-gpr.c |  96 ++
 .../powerpc/tm/tm-signal-context-chk-vmx.c | 112 +
 .../powerpc/tm/tm-signal-context-chk-vsx.c | 127 +++
 .../selftests/powerpc/tm/tm-signal-context-chk.c   | 102 +++
 tools/testing/selftests/powerpc/tm/tm-signal.S | 105 
 tools/testing/selftests/powerpc/vmx_asm.h  |  98 +++
 tools/testing/selftests/powerpc/vsx_asm.h  |  71 +++
 28 files changed, 1343 insertions(+), 396 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/fpu_asm.h
 create mode 100644 tools/testing/selftests/powerpc/gpr_asm.h
 create mode 100644 tools/testing/selftests/powerpc/math/vsx_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/vsx_preempt.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
 create mode 100644 
tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal-context-chk.c
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal.S
 create mode 100644 tools/testing/selftests/powerpc/vmx_asm.h
 create mode 100644 tools/testing/selftests/powerpc/vsx_asm.h

-- 
2.8.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/5] powerpc: Remove do_load_up_transact_{fpu,altivec}

2016-06-07 Thread Cyril Bur
Previous rework of TM code leaves these functions unused

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/tm.h |  5 -
 arch/powerpc/kernel/fpu.S | 26 --
 arch/powerpc/kernel/vector.S  | 25 -
 3 files changed, 56 deletions(-)

diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index c22d704..82e06ca 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -9,11 +9,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void do_load_up_transact_fpu(struct thread_struct *thread);
-extern void do_load_up_transact_altivec(struct thread_struct *thread);
-#endif
-
 extern void tm_enable(void);
 extern void tm_reclaim(struct thread_struct *thread,
   unsigned long orig_msr, uint8_t cause);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 181c187..08d14b0 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -50,32 +50,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);  
\
 #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
 #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/* void do_load_up_transact_fpu(struct thread_struct *thread)
- *
- * This is similar to load_up_fpu but for the transactional version of the FP
- * register set.  It doesn't mess with the task MSR or valid flags.
- * Furthermore, we don't do lazy FP with TM currently.
- */
-_GLOBAL(do_load_up_transact_fpu)
-   mfmsr   r6
-   ori r5,r6,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-   orisr5,r5,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-   SYNC
-   MTMSRD(r5)
-
-   addir7,r3,THREAD_CKFPSTATE
-   lfd fr0,FPSTATE_FPSCR(r7)
-   MTFSF_L(fr0)
-   REST_32FPVSRS(0, R4, R7)
-
-   blr
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
 /*
  * Load state from memory into FP registers including FPSCR.
  * Assumes the caller has enabled FP in the MSR.
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index b5d5025..84b19ab 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -7,31 +7,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/* void do_load_up_transact_altivec(struct thread_struct *thread)
- *
- * This is similar to load_up_altivec but for the transactional version of the
- * vector regs.  It doesn't mess with the task MSR or valid flags.
- * Furthermore, VEC laziness is not supported with TM currently.
- */
-_GLOBAL(do_load_up_transact_altivec)
-   mfmsr   r6
-   orisr5,r6,MSR_VEC@h
-   MTMSRD(r5)
-   isync
-
-   li  r4,1
-   stw r4,THREAD_USED_VR(r3)
-
-   li  r10,THREAD_CKVRSTATE+VRSTATE_VSCR
-   lvx v0,r10,r3
-   mtvscr  v0
-   addir10,r3,THREAD_CKVRSTATE
-   REST_32VRS(0,r4,r10)
-
-   blr
-#endif
-
 /*
  * Load state from memory into VMX registers including VSCR.
  * Assumes the caller has enabled VMX in the MSR.
-- 
2.8.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/5] selftests/powerpc: Check for VSX preservation across userspace preemption

2016-06-10 Thread Cyril Bur
On Thu, 09 Jun 2016 11:35:55 +1000
Daniel Axtens <d...@axtens.net> wrote:

> Yay for tests!
> 
> I have a few minor nits, and one more major one (rc == 2 below).
> 
> > +/*
> > + * Copyright 2015, Cyril Bur, IBM Corp.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version
> > + * 2 of the License, or (at your option) any later version.
> > + */  
> I realise this is well past a lost cause by now, but isn't the idea to
> be version 2, not version 2 or later?
> 
> > +
> > +#include "../basic_asm.h"
> > +#include "../vsx_asm.h"
> > +  
> 
> Some of your other functions start with a comment. That would be super
> helpful here - I'm still not super comfortable I understand the calling
> convention. 
> > +FUNC_START(check_vsx)
> > +   PUSH_BASIC_STACK(32)
> > +   std r3,STACK_FRAME_PARAM(0)(sp)
> > +   addi r3, r3, 16 * 12 #Second half of array
> > +   bl store_vsx
> > +   ld r3,STACK_FRAME_PARAM(0)(sp)
> > +   bl vsx_memcmp
> > +   POP_BASIC_STACK(32)
> > +   blr
> > +FUNC_END(check_vsx)
> > +  
> 
> 
> 
> > +long vsx_memcmp(vector int *a) {
> > +   vector int zero = {0,0,0,0};
> > +   int i;
> > +
> > +   FAIL_IF(a != varray);
> > +
> > +   for(i = 0; i < 12; i++) {
> > +   if (memcmp([i + 12], , 16) == 0) {
> > +   fprintf(stderr, "Detected zero from the VSX reg %d\n", 
> > i + 12);
> > +   return 1;
> > +   }
> > +   }
> > +
> > +   if (memcmp(a, [12], 12 * 16)) {  
> I'm somewhat confused as to how this comparison works. You're comparing
> the new saved ones to the old saved ones, yes?

check_vmx() has put the live registers on the end of the array... so the first
12 in 'a' are the known values and the next 12 are the live values... they
should match.

> > +   long *p = (long *)a;
> > +   fprintf(stderr, "VSX mismatch\n");
> > +   for (i = 0; i < 24; i=i+2)
> > +   fprintf(stderr, "%d: 0x%08lx%08lx | 0x%08lx%08lx\n",
> > +   i/2 + i%2 + 20, p[i], p[i + 1], p[i + 
> > 24], p[i + 25]);
> > +   return 1;
> > +   }
> > +   return 0;
> > +}
> > +
> > +void *preempt_vsx_c(void *p)
> > +{
> > +   int i, j;
> > +   long rc;
> > +   srand(pthread_self());
> > +   for (i = 0; i < 12; i++)
> > +   for (j = 0; j < 4; j++) {
> > +   varray[i][j] = rand();
> > +   /* Don't want zero because it hides kernel problems */
> > +   if (varray[i][j] == 0)
> > +   j--;
> > +   }
> > +   rc = preempt_vsx(varray, _starting, );
> > +   if (rc == 2)  
> How would rc == 2? AIUI, preempt_vsx returns the value of check_vsx,
> which in turn returns the value of vsx_memcmp, which returns 1 or 0.
> 
> > +   fprintf(stderr, "Caught zeros in VSX compares\n");  
> Isn't it zeros or a mismatched value?

I think that patch went through too many iterations and no enough cleanups.
Fixed

> > +   return (void *)rc;
> > +}
> > +
> > +int test_preempt_vsx(void)
> > +{
> > +   int i, rc, threads;
> > +   pthread_t *tids;
> > +
> > +   threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
> > +   tids = malloc(threads * sizeof(pthread_t));
> > +   FAIL_IF(!tids);
> > +
> > +   running = true;
> > +   threads_starting = threads;
> > +   for (i = 0; i < threads; i++) {
> > +   rc = pthread_create([i], NULL, preempt_vsx_c, NULL);
> > +   FAIL_IF(rc);
> > +   }
> > +
> > +   setbuf(stdout, NULL);
> > +   /* Not really nessesary but nice to wait for every thread to start */
> > +   printf("\tWaiting for %d workers to start...", threads_starting);
> > +   while(threads_starting)
> > +   asm volatile("": : :"memory");  
> I think __sync_synchronise() might be ... more idiomatic or something?
> Not super fussy.
> 

Best to be consistent with how all the other powerpc/math tests do it, which
was initially an MPE recommendation.

> > +   printf("done\n");
> > +
> > +   printf("\tWaiting for %d seconds to let some workers get preempted...", 
> > PREEMPT_TIME);
> > +   s

Re: [PATCH 2/5] selftests/powerpc: Add test to check TM ucontext creation

2016-06-09 Thread Cyril Bur
On Thu, 09 Jun 2016 15:12:51 +1000
Daniel Axtens <d...@axtens.net> wrote:

As stated in the cover-letter, this patch needs the most work and quite a lot
too.

Turns out the comment you took is wrong (the code actually does the opposite),
once again, just sent the series to get eyes on the actual patch.

> I'm trying not to be too nit picky or difficult on tests, so here's a
> pre-written commit message for you:
> 
> "The kernel sets up two sets of ucontexts if the signal was to be
> delivered while the thread was in a transaction. Expected behaviour is
> that the currently executing code is in the first and the checkpointed
> state (the state that will be rolled back to) is in the uc_link
> ucontext.
> 
> The reason for this is that:
> 
>  - code which is not TM aware and installs a signal handler will expect
>to see/modify its currently running state in the uc.
> 
>  - but, that TM-unaware code may have dynamicially linked against code
>which is TM aware and is doing HTM under the hood, so the
>checkpointed state needs to be made available somewhere
> 
> Test if the live and checkpointed state is made stored correctly.
> Test:
>  - GPRs
>  - FP registers
>  - VMX
>  - VSX
> "
> 
> > +#define TBEGIN .long 0x7C00051D
> > +#define TSUSPEND .long 0x7C0005DD
> > +#define TRESUME .long 0x7C2005DD  
> You define these 3 opcodes in a number of files. I assume you're going
> to consolidate them in v2?
> 
> > + * The kernel sets up two sets of ucontexts if the signal was to be 
> > delivered
> > + * while the thread was in a transaction. Expected behaviour is that the
> > + * currently executing code is in the first and the checkpointed state (the
> > + * state that will be rolled back to) is in the uc_link ucontext.
> > + *
> > + * The reason for this is that code which is not TM aware and installs a 
> > signal
> > + * handler will expect to see/modify its currently running state in the uc,
> > + * this code may have dynamicially linked against code which is TM aware 
> > and is
> > + * doing HTM under the hood.  
> 
> I had real trouble parsing this sentence the first few times. I think
> it's missing a while:
> 
> The reason for this is that _while_ code which is not TM aware...
> 
> (Although it would be better in several sentences :P)
> 
> > +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
> > @@ -0,0 +1,96 @@
> > +/*
> > + * Copyright 2016, Cyril Bur, IBM Corp.
> > + * Licensed under GPLv2.  
> Ironically, it seems this now needs to be GPLv2+, probably with the
> regular license grant paragraph.
> 
> > +   /* Always be 64bit, don't really care about 32bit */  
> Forgive my ignorance of the test suite: are we guaranteed this by the
> build system, or should we add a SKIP_IF() for it?
> > +   for (i = 0; i < NV_GPR_REGS && !fail; i++) {
> > +   fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]);
> > +   fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + 
> > NV_GPR_REGS]);
> > +   }
> > +   if (fail)
> > +   printf("Failed on %d GPR %lu or %lu\n", i - 1,
> > +   ucp->uc_mcontext.gp_regs[i + 13], 
> > tm_ucp->uc_mcontext.gp_regs[i + 13]);
> > +}
> > +  
> 
> Looking good otherwise!
> 
> Regards,
> Daniel

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc: tm: Always reclaim in start_thread() for exec() class syscalls

2016-06-15 Thread Cyril Bur
Userspace can quite legitimately perform an exec() syscall with a
suspended transaction. exec() does not return to the old process,
rather it load a new one and starts that, the expectation therefore is
that the new process starts not in a transaction. Currently exec() is
not treated any differently to any other syscall which creates
problems.

Firstly it could allow a new process to start with a suspended
transaction for a binary that no longer exists. This means that the
checkpointed state won't be valid and if the suspended transaction
were ever to be resumed and subsequently aborted (a possibility which
is exceedingly likely as exec()ing will likely doom the transaction)
the new process will jump to invalid state.

Secondly the incorrect attempt to keep the transactional state while
still zeroing state for the new process creates at least two TM Bad
Things. The first triggers on the rfid to return to userspace as
start_thread() has given the new process a 'clean' MSR but the suspend
will still be set in the hardware MSR. The second TM Bad Thing
triggers in __switch_to() as the processor is still transactionally
suspended but __switch_to() wants to zero the TM sprs for the new
process.

This is an example of the outcome of calling exec() with a suspended
transaction. Note the first 700 is likely the first TM bad thing
decsribed earlier only the kernel can't report it as we've loaded
userspace registers. c0009980 is the rfid in
fast_exception_return()

Bad kernel stack pointer 3fffcfa1a370 at c0009980
Oops: Bad kernel stack pointer, sig: 6 [#1]
SMP NR_CPUS=2048 NUMA pSeries
Modules linked in:
CPU: 0 PID: 2006 Comm: tm-execed Not tainted
4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515 #1
task: c000fbea6d80 ti: c0003ffec000 task.ti: c000fb7ec000
NIP: c0009980 LR:  CTR: 
REGS: c0003ffefd40 TRAP: 0700   Not tainted
(4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515)
MSR: 800300201031 <SF,ME,IR,DR,LE,TM[SE]>  CR:   XER: 
CFAR: c00098b4 SOFTE: 0
PACATMSCRATCH: b001d033
GPR00:  3fffcfa1a370  
GPR04:    
GPR08:    
GPR12: 3fff966611c0   
GPR16:    
GPR20:    
GPR24:    
GPR28:    
NIP [c0009980] fast_exception_return+0xb0/0xb8
LR []   (null)
Call Trace:
Instruction dump:
f84d0278 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 e8010070
e8410080 e8610088 e8810090 e8210078 <4c24> 4800 e8610178 88ed023b
---[ end trace 4d79afb454bb5313 ]---

[ cut here ]
Kernel BUG at c0043e80 [verbose debug info unavailable]
Unexpected TM Bad Thing exception at c0043e80 (msr 0x201033)
Oops: Unrecoverable exception, sig: 6 [#2]
SMP NR_CPUS=2048 NUMA pSeries
Modules linked in:
CPU: 0 PID: 2006 Comm: tm-execed Tainted: G  D
4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515 #1
task: c000fbea6d80 ti: c0003ffec000 task.ti: c000fb7ec000
NIP: c0043e80 LR: c0015a24 CTR: 
REGS: c0003ffef7e0 TRAP: 0700   Tainted: G  D
(4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515)
MSR: 800300201033 <SF,ME,IR,DR,RI,LE,TM[SE]>  CR: 28002828  XER: 
CFAR: c0015a20 SOFTE: 0
PACATMSCRATCH: b001d033
GPR00:  c0003ffefa60 c0db5500 c000fbead000
GPR04: 80031033   ff16
GPR08:  8001d033 c000fb7e3ea0 cfe4
GPR12: 2200 cfe0  
GPR16:    
GPR20:   c000fbea7410 ff16
GPR24: c000ffe1f600 c000fbea8700 c000fbea8700 c000fbead000
GPR28: c0e20198 c000fbea6d80 c000fbeab680 c000fbea6d80
NIP [c0043e80] tm_restore_sprs+0xc/0x1c
LR [c0015a24] __switch_to+0x1f4/0x420
Call Trace:
Instruction dump:
7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8
4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020
---[ end trace 4d79afb454bb5314 ]---

Fixes: bc2a940 ("powerpc: Hook in new transactional memory code")
Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/kernel/process.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel

[PATCH 1/2] selftests/powerpc: exec() with suspended transaction

2016-06-15 Thread Cyril Bur
Perform an exec() class syscall with a suspended transaction.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/tm/Makefile |  3 +-
 tools/testing/selftests/powerpc/tm/tm-exec.c| 55 +
 tools/testing/selftests/powerpc/tm/tm-execed.c  | 47 +
 tools/testing/selftests/powerpc/tm/tm-syscall.c | 15 ---
 tools/testing/selftests/powerpc/tm/tm.h | 23 ++-
 5 files changed, 126 insertions(+), 17 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-exec.c
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-execed.c

diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
b/tools/testing/selftests/powerpc/tm/Makefile
index d0505db..6967ce2 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,4 +1,5 @@
-TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack 
tm-vmxcopy tm-fork tm-tar tm-tmspr
+TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack 
tm-vmxcopy tm-fork tm-tar tm-tmspr \
+   tm-exec tm-execed
 
 all: $(TEST_PROGS)
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c 
b/tools/testing/selftests/powerpc/tm/tm-exec.c
new file mode 100644
index 000..2d1c60f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be performed provided the transactions are suspended.
+ * The exec() class of syscall is unique as a new process is loaded.
+ *
+ * It makes little sense for after an exec() call for the previously
+ * suspended transaction to still exist.
+ */
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tm.h"
+
+static char *path;
+
+int test_exec(void)
+{
+   char *file;
+
+   SKIP_IF(!have_htm());
+
+   FAIL_IF(asprintf(, "%s/%s", path, "tm-execed") == -1);
+
+   asm __volatile__(
+   "tbegin.;"
+   "blt1f; "
+   "tsuspend.;"
+   "1: ;"
+   : : : "memory");
+
+   execl(file, "tm-execed", NULL);
+   /* Shouldn't get here */
+   perror("execl() failed");
+   return 1;
+}
+
+int main(int argc, char *argv[])
+{
+   path = dirname(argv[0]);
+   return test_harness(test_exec, "tm_exec");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-execed.c 
b/tools/testing/selftests/powerpc/tm/tm-execed.c
new file mode 100644
index 000..e6119e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-execed.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be done provided the transactions are suspended. The
+ * exec() class of syscall is unique as a new program is loaded.
+ *
+ * It makes little sence for after an exec() call for the previously
+ * suspended transaction to still exist.
+ *
+ * This program also as by product confirms that a process exiting
+ * with a suspended transaction doesn't do anything strange.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tm.h"
+
+int test_execed(void)
+{
+   SKIP_IF(!have_htm());
+
+   asm __volatile__(
+   "tbegin.;"
+   "blt1f;"
+   "tsuspend.;"
+   "1: ;"
+   : : : "memory");
+
+   FAIL_IF(failure_is_nesting());
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(test_execed, "tm_execed");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c 
b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index 60560cb..454b965 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -27,21 +27,6 @@ unsigned retries = 0;
 #define TEST_DURATION 10 /* seconds */
 #define TM_RETRIES 100
 
-long failure_code(void)
-{
-   return __builtin_get_texasru() >> 24;
-}
-
-bool failure_is_persistent(void)
-{
-   return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
-}
-
-bool failure_is_syscall(void)
-{
-   return (failure_code() & TM_CAUSE_SYSCALL) =

[PATCH v2 1/2] selftests/powerpc: exec() with suspended transaction

2016-06-16 Thread Cyril Bur
Perform an exec() class syscall with a suspended transaction.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
V2: No change

 tools/testing/selftests/powerpc/tm/Makefile |  3 +-
 tools/testing/selftests/powerpc/tm/tm-exec.c| 55 +
 tools/testing/selftests/powerpc/tm/tm-execed.c  | 47 +
 tools/testing/selftests/powerpc/tm/tm-syscall.c | 15 ---
 tools/testing/selftests/powerpc/tm/tm.h | 23 ++-
 5 files changed, 126 insertions(+), 17 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-exec.c
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-execed.c

diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
b/tools/testing/selftests/powerpc/tm/Makefile
index d0505db..129e9ef 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,4 +1,5 @@
-TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack 
tm-vmxcopy tm-fork tm-tar tm-tmspr
+TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
+   tm-vmxcopy tm-fork tm-tar tm-tmspr tm-exec tm-execed
 
 all: $(TEST_PROGS)
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c 
b/tools/testing/selftests/powerpc/tm/tm-exec.c
new file mode 100644
index 000..2d1c60f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be performed provided the transactions are suspended.
+ * The exec() class of syscall is unique as a new process is loaded.
+ *
+ * It makes little sense for after an exec() call for the previously
+ * suspended transaction to still exist.
+ */
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tm.h"
+
+static char *path;
+
+int test_exec(void)
+{
+   char *file;
+
+   SKIP_IF(!have_htm());
+
+   FAIL_IF(asprintf(, "%s/%s", path, "tm-execed") == -1);
+
+   asm __volatile__(
+   "tbegin.;"
+   "blt1f; "
+   "tsuspend.;"
+   "1: ;"
+   : : : "memory");
+
+   execl(file, "tm-execed", NULL);
+   /* Shouldn't get here */
+   perror("execl() failed");
+   return 1;
+}
+
+int main(int argc, char *argv[])
+{
+   path = dirname(argv[0]);
+   return test_harness(test_exec, "tm_exec");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-execed.c 
b/tools/testing/selftests/powerpc/tm/tm-execed.c
new file mode 100644
index 000..e6119e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-execed.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be done provided the transactions are suspended. The
+ * exec() class of syscall is unique as a new program is loaded.
+ *
+ * It makes little sence for after an exec() call for the previously
+ * suspended transaction to still exist.
+ *
+ * This program also as by product confirms that a process exiting
+ * with a suspended transaction doesn't do anything strange.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tm.h"
+
+int test_execed(void)
+{
+   SKIP_IF(!have_htm());
+
+   asm __volatile__(
+   "tbegin.;"
+   "blt1f;"
+   "tsuspend.;"
+   "1: ;"
+   : : : "memory");
+
+   FAIL_IF(failure_is_nesting());
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(test_execed, "tm_execed");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c 
b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index 60560cb..454b965 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -27,21 +27,6 @@ unsigned retries = 0;
 #define TEST_DURATION 10 /* seconds */
 #define TM_RETRIES 100
 
-long failure_code(void)
-{
-   return __builtin_get_texasru() >> 24;
-}
-
-bool failure_is_persistent(void)
-{
-   return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
-}
-
-bool failure_is_syscall(void)
-{
-   return (failure_code() &

[PATCH v2 2/2] powerpc: tm: Always reclaim in start_thread() for exec() class syscalls

2016-06-16 Thread Cyril Bur
Userspace can quite legitimately perform an exec() syscall with a
suspended transaction. exec() does not return to the old process,
rather it load a new one and starts that, the expectation therefore is
that the new process starts not in a transaction. Currently exec() is
not treated any differently to any other syscall which creates
problems.

Firstly it could allow a new process to start with a suspended
transaction for a binary that no longer exists. This means that the
checkpointed state won't be valid and if the suspended transaction
were ever to be resumed and subsequently aborted (a possibility which
is exceedingly likely as exec()ing will likely doom the transaction)
the new process will jump to invalid state.

Secondly the incorrect attempt to keep the transactional state while
still zeroing state for the new process creates at least two TM Bad
Things. The first triggers on the rfid to return to userspace as
start_thread() has given the new process a 'clean' MSR but the suspend
will still be set in the hardware MSR. The second TM Bad Thing
triggers in __switch_to() as the processor is still transactionally
suspended but __switch_to() wants to zero the TM sprs for the new
process.

This is an example of the outcome of calling exec() with a suspended
transaction. Note the first 700 is likely the first TM bad thing
decsribed earlier only the kernel can't report it as we've loaded
userspace registers. c0009980 is the rfid in
fast_exception_return()

Bad kernel stack pointer 3fffcfa1a370 at c0009980
Oops: Bad kernel stack pointer, sig: 6 [#1]
SMP NR_CPUS=2048 NUMA pSeries
Modules linked in:
CPU: 0 PID: 2006 Comm: tm-execed Not tainted
4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515 #1
task: c000fbea6d80 ti: c0003ffec000 task.ti: c000fb7ec000
NIP: c0009980 LR:  CTR: 
REGS: c0003ffefd40 TRAP: 0700   Not tainted
(4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515)
MSR: 800300201031 <SF,ME,IR,DR,LE,TM[SE]>  CR:   XER: 
CFAR: c00098b4 SOFTE: 0
PACATMSCRATCH: b001d033
GPR00:  3fffcfa1a370  
GPR04:    
GPR08:    
GPR12: 3fff966611c0   
GPR16:    
GPR20:    
GPR24:    
GPR28:    
NIP [c0009980] fast_exception_return+0xb0/0xb8
LR []   (null)
Call Trace:
Instruction dump:
f84d0278 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 e8010070
e8410080 e8610088 e8810090 e8210078 <4c24> 4800 e8610178 88ed023b
---[ end trace 4d79afb454bb5313 ]---

[ cut here ]
Kernel BUG at c0043e80 [verbose debug info unavailable]
Unexpected TM Bad Thing exception at c0043e80 (msr 0x201033)
Oops: Unrecoverable exception, sig: 6 [#2]
SMP NR_CPUS=2048 NUMA pSeries
Modules linked in:
CPU: 0 PID: 2006 Comm: tm-execed Tainted: G  D
4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515 #1
task: c000fbea6d80 ti: c0003ffec000 task.ti: c000fb7ec000
NIP: c0043e80 LR: c0015a24 CTR: 
REGS: c0003ffef7e0 TRAP: 0700   Tainted: G  D
(4.6.0-rc3cyrilb769744c1efb74735f687b36ba6f97b5668e0f515)
MSR: 800300201033 <SF,ME,IR,DR,RI,LE,TM[SE]>  CR: 28002828  XER: 
CFAR: c0015a20 SOFTE: 0
PACATMSCRATCH: b001d033
GPR00:  c0003ffefa60 c0db5500 c000fbead000
GPR04: 80031033   ff16
GPR08:  8001d033 c000fb7e3ea0 cfe4
GPR12: 2200 cfe0  
GPR16:    
GPR20:   c000fbea7410 ff16
GPR24: c000ffe1f600 c000fbea8700 c000fbea8700 c000fbead000
GPR28: c0e20198 c000fbea6d80 c000fbeab680 c000fbea6d80
NIP [c0043e80] tm_restore_sprs+0xc/0x1c
LR [c0015a24] __switch_to+0x1f4/0x420
Call Trace:
Instruction dump:
7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8
4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020
---[ end trace 4d79afb454bb5314 ]---

Fixes: bc2a940 ("powerpc: Hook in new transactional memory code")
Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
V2: Wrap the entire thing in #ifdef to avoid breaking 32bit builds.

 arch/powerpc/kernel/process.c | 10 ++
 1 file changed, 10 inserti

Re: [PATCH V2 5/8] powerpc: Restore FPU/VEC/VSX if previously used

2016-01-17 Thread Cyril Bur
On Fri, 15 Jan 2016 17:02:41 +1100
Michael Neuling <mi...@neuling.org> wrote:

Hey Mikey,

Thanks for the review, as always you're correct :).

> 
> Can you make the inline code easier to read?  Something like
> 
> #ifdef CONFIG_ALTIVEC
> #define loadvec(thr) ((thr).load_vec)
> #else
> #define loadvec(thr) 0
> #endif
> 
> void restore_math(struct pt_regs *regs)
> {
>unsigned long msr;
> 
>if (!current->thread.load_fp && !loadvec(current->thread)
>   return;
> 
> > +
> > +   msr = regs->msr;
> > +   msr_check_and_set(msr_all_available);
> > +
> > +   /*
> > +* Only reload if the bit is not set in the user MSR, the bit BEING 
> > set
> > +* indicates that the registers are hot
> > +*/
> > +#ifdef CONFIG_PPC_FPU
> > +   if (current->thread.load_fp && !(msr & MSR_FP)) {
> > +   load_fp_state(>thread.fp_state);
> > +   msr |= MSR_FP | current->thread.fpexc_mode;
> > +   current->thread.load_fp++;
> > +   }
> > +#endif
> > +#ifdef CONFIG_ALTIVEC
> > +   if (current->thread.load_vec && !(msr & MSR_VEC) &&
> > +   cpu_has_feature(CPU_FTR_ALTIVEC)) {
> > +   load_vr_state(>thread.vr_state);
> > +   current->thread.used_vr = 1;
> > +   msr |= MSR_VEC;
> > +   current->thread.load_vec++;
> > +   }
> > +#endif
> > +#ifdef CONFIG_VSX
> > +   if (!(msr & MSR_VSX) && (msr & (MSR_FP | MSR_VEC)) == (MSR_FP | 
> > MSR_VEC)) {  
> 
> What are you trying to hit with this if statement?
> 
> Seems you are turning on VSX if VSX is not already on but FP and VEC
> is.  Why do you need the check MSR_VSX is not used?  That seems redundant.
> 
> > +   current->thread.used_vsr = 1;
> > +   msr |= MSR_VSX;
> > +   }
> > +#endif
> > +
> > +   msr_check_and_clear(msr_all_available);  
> 
> Why are you doing this?  Why all, and not just the ones you've enabled above?
> 

This is part of the batching of MSR reads and writes. We turned everything on
at the start of restore_math() because it means only one write, the MSR
reads/writes are where the performance hit, not the number of bits changed.
Obviously we subsequently we turn everything off again because it also means
only one write (and we had unconditionally turned everything on).

The check at the start of restore_math() and in entry_64.S should mean that we
don't the msr_check_and_set()/msr_check_and_clear() block with nothing to do.

> > +
> > +   regs->msr = msr;
> > +}
> > +
> >  void flush_all_to_thread(struct task_struct *tsk)
> >  {
> > if (tsk->thread.regs) {
> > @@ -832,17 +879,9 @@ void restore_tm_state(struct pt_regs *regs)  
>  
> > msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
> > msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
> > -   if (msr_diff & MSR_FP) {
> > -   msr_check_and_set(MSR_FP);
> > -   load_fp_state(>thread.fp_state);
> > -   msr_check_and_clear(MSR_FP);
> > -   regs->msr |= current->thread.fpexc_mode;
> > -   }
> > -   if (msr_diff & MSR_VEC) {
> > -   msr_check_and_set(MSR_VEC);
> > -   load_vr_state(>thread.vr_state);
> > -   msr_check_and_clear(MSR_VEC);
> > -   }
> > +
> > +   restore_math(regs);
> > +
> > regs->msr |= msr_diff;
> >  }  
>  
> > @@ -1006,6 +1045,11 @@ struct task_struct *__switch_to(struct task_struct 
> > *prev,
> > batch = this_cpu_ptr(_tlb_batch);
> > batch->active = 1;
> > }
> > +
> > +   /* Don't do this on a kernel thread */  
> 
> Why not?
> 
> > +   if (current_thread_info()->task->thread.regs)
> > +   restore_math(current_thread_info()->task->thread.regs);
> > +
> >  #endif /* CONFIG_PPC_BOOK3S_64 */  
>  
> > return last;
> > diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
> > index 162d0f7..038cff8 100644
> > --- a/arch/powerpc/kernel/vector.S
> > +++ b/arch/powerpc/kernel/vector.S
> > @@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec)
> > orisr12,r12,MSR_VEC@h
> > std r12,_MSR(r1)
> >  #endif
> > +   /* Don't care if r4 overflows, this is desired behaviour */
> > +   lbz r4,THREAD_LOAD_VEC(r5)
> > +   addir4,r4,1
> > +   stb r4,THREAD_LOAD_VEC(r5)
> > addir6,r5,THREAD_VRSTATE
> > li  r4,1
> > li  r10,VRSTATE_VSCR
> > -- 
> > 2.7.0  
> 
> > ___
> > Linuxppc-dev mailing list
> > Linuxppc-dev@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/linuxppc-devOn Fri, 2016-01-15 at 16:04 
> > +1100, Cyril Bur wrote:  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 8/8] powerpc: Add the ability to save VSX without giving it up

2016-01-17 Thread Cyril Bur
On Fri, 15 Jan 2016 17:25:26 +1100
Michael Neuling <mi...@neuling.org> wrote:

> On Fri, 2016-01-15 at 16:04 +1100, Cyril Bur wrote:
> > This patch adds the ability to be able to save the VSX registers to
> > the
> > thread struct without giving up (disabling the facility) next time
> > the
> > process returns to userspace.
> > 
> > This patch builds on a previous optimisation for the FPU and VEC
> > registers
> > in the thread copy path to avoid a possibly pointless reload of VSX
> > state.
> > 
> > Signed-off-by: Cyril Bur <cyril...@gmail.com>
> > ---
> >  arch/powerpc/include/asm/switch_to.h |  1 -
> >  arch/powerpc/kernel/ppc_ksyms.c  |  4 
> >  arch/powerpc/kernel/process.c| 23 ++-
> >  arch/powerpc/kernel/vector.S | 17 -
> >  4 files changed, 18 insertions(+), 27 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/switch_to.h
> > b/arch/powerpc/include/asm/switch_to.h
> > index 29dda9d..4dfcd3e 100644
> > --- a/arch/powerpc/include/asm/switch_to.h
> > +++ b/arch/powerpc/include/asm/switch_to.h
> > @@ -52,7 +52,6 @@ static inline void disable_kernel_altivec(void)
> >  extern void enable_kernel_vsx(void);
> >  extern void flush_vsx_to_thread(struct task_struct *);
> >  extern void giveup_vsx(struct task_struct *);
> > -extern void __giveup_vsx(struct task_struct *);
> >  static inline void disable_kernel_vsx(void)
> >  {
> > msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
> > diff --git a/arch/powerpc/kernel/ppc_ksyms.c
> > b/arch/powerpc/kernel/ppc_ksyms.c
> > index 41e1607..ef7024da 100644
> > --- a/arch/powerpc/kernel/ppc_ksyms.c
> > +++ b/arch/powerpc/kernel/ppc_ksyms.c
> > @@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state);
> >  EXPORT_SYMBOL(store_vr_state);
> >  #endif
> >  
> > -#ifdef CONFIG_VSX
> > -EXPORT_SYMBOL_GPL(__giveup_vsx);
> > -#endif
> > -
> >  #ifdef CONFIG_EPAPR_PARAVIRT
> >  EXPORT_SYMBOL(epapr_hypercall_start);
> >  #endif
> > diff --git a/arch/powerpc/kernel/process.c
> > b/arch/powerpc/kernel/process.c
> > index 5566c32..3d907b8 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -252,20 +252,33 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
> >  #endif /* CONFIG_ALTIVEC */
> >  
> >  #ifdef CONFIG_VSX
> > -void giveup_vsx(struct task_struct *tsk)
> > +void __giveup_vsx(struct task_struct *tsk)
> >  {
> > -   check_if_tm_restore_required(tsk);
> > -
> > -   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
> > if (tsk->thread.regs->msr & MSR_FP)
> > __giveup_fpu(tsk);
> > if (tsk->thread.regs->msr & MSR_VEC)
> > __giveup_altivec(tsk);
> > +   tsk->thread.regs->msr &= ~MSR_VSX;
> > +}
> > +
> > +void giveup_vsx(struct task_struct *tsk)
> > +{
> > +   check_if_tm_restore_required(tsk);
> > +
> > +   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
> > __giveup_vsx(tsk);
> > msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
> >  }
> >  EXPORT_SYMBOL(giveup_vsx);
> >  
> > +void save_vsx(struct task_struct *tsk)
> > +{
> > +   if (tsk->thread.regs->msr & MSR_FP)
> > +   save_fpu(tsk);
> > +   if (tsk->thread.regs->msr & MSR_VEC)
> > +   save_altivec(tsk);
> > +}
> > +
> >  void enable_kernel_vsx(void)
> >  {
> > WARN_ON(preemptible());
> > @@ -465,7 +478,7 @@ void save_all(struct task_struct *tsk)
> >  #endif
> >  #ifdef CONFIG_VSX
> > if (usermsr & MSR_VSX)
> > -   __giveup_vsx(tsk);
> > +   save_vsx(tsk);  
> 
> This seems suboptimal.  save_vsx() will call save_fpu() and
> save_altivec() again, which you just called earlier in save_all().
> 

Ah yes, will fix

> save_vsx() is only used here, so could be static. 
> 

Thanks.

> Also, put the #ifdef junk as part of the function so that the caller
> doesn't have to deal with it. 
> 

Can do absolutely, however this means that in save_all I can't check if the
function needs to be called or not. For example, without CONFIG_VSX, MSR_VSX
won't exist which means we might end up calling save_vsx THEN checking MSR_VSX
and returning early.

I'm happy to defer to you and mpe on what's nicer, I would side with avoiding
the function call at the cost of ugly #ifdefs but I can always see the merits
of clean code.

Thanks for the review,

Cyril

> Mikey
> 
> >  #endif
> >  #ifdef CONFIG_SPE

[PATCH V2 1/8] selftests/powerpc: Test the preservation of FPU and VMX regs across syscall

2016-01-14 Thread Cyril Bur
Test that the non volatile floating point and Altivec registers get
correctly preserved across the fork() syscall.

fork() works nicely for this purpose, the registers should be the same for
both parent and child

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/Makefile   |   3 +-
 tools/testing/selftests/powerpc/basic_asm.h|  26 +++
 tools/testing/selftests/powerpc/math/.gitignore|   2 +
 tools/testing/selftests/powerpc/math/Makefile  |  14 ++
 tools/testing/selftests/powerpc/math/fpu_asm.S | 161 +
 tools/testing/selftests/powerpc/math/fpu_syscall.c |  90 ++
 tools/testing/selftests/powerpc/math/vmx_asm.S | 193 +
 tools/testing/selftests/powerpc/math/vmx_syscall.c |  92 ++
 8 files changed, 580 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/basic_asm.h
 create mode 100644 tools/testing/selftests/powerpc/math/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/math/Makefile
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_syscall.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_syscall.c

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 0c2706b..19e8191 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \
   switch_endian\
   syscalls \
   tm   \
-  vphn
+  vphn \
+  math
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/basic_asm.h 
b/tools/testing/selftests/powerpc/basic_asm.h
new file mode 100644
index 000..27aca79
--- /dev/null
+++ b/tools/testing/selftests/powerpc/basic_asm.h
@@ -0,0 +1,26 @@
+#include 
+#include 
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+   lis reg,(expr)@highest; \
+   ori reg,reg,(expr)@higher;  \
+   rldicr  reg,reg,32,31;  \
+   orisreg,reg,(expr)@high;\
+   ori reg,reg,(expr)@l;
+
+#define PUSH_BASIC_STACK(size) \
+   std 2,24(sp); \
+   mflrr0; \
+   std r0,16(sp); \
+   mfcrr0; \
+   stw r0,8(sp); \
+   stdusp,-size(sp);
+
+#define POP_BASIC_STACK(size) \
+   addisp,sp,size; \
+   ld  2,24(sp); \
+   ld  r0,16(sp); \
+   mtlrr0; \
+   lwz r0,8(sp); \
+   mtcrr0; \
+
diff --git a/tools/testing/selftests/powerpc/math/.gitignore 
b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000..b19b269
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,2 @@
+fpu_syscall
+vmx_syscall
diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000..418bef1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,14 @@
+TEST_PROGS := fpu_syscall vmx_syscall
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+
+fpu_syscall: fpu_asm.S
+vmx_syscall: vmx_asm.S
+
+include ../../lib.mk
+
+clean:
+   rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S 
b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000..8733874
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../basic_asm.h"
+
+#define PUSH_FPU(pos) \
+   stfdf14,pos(sp); \
+   stfdf15,pos+8(sp); \
+   stfdf16,pos+16(sp); \
+   stfdf17,pos+24(sp); \
+   stfdf18,pos+32(sp); \
+   stfdf19,pos+40(sp); \
+   stfdf20,pos+48(sp); \
+   stfdf21,pos+56(sp); \
+   stfdf22,pos+64(sp); \
+   stfdf23,pos+72(sp); \
+   stfdf24,pos+80(sp); \
+   stfdf25,pos+88(sp); \
+   stfdf26,pos+96(sp); \
+   stfdf27,pos+104(sp); \
+   stfdf28,pos+112(sp); \
+   stfdf29,pos+120(sp); \
+   stfdf30,pos+128(sp); \
+   stfdf31,pos+136(sp);
+
+#define POP_FPU(pos) \
+   lfd f14,pos(sp); \
+   lfd f15,pos+8(sp); \
+   lfd f16,pos+16(sp); \
+   lfd f17,pos+24(sp); \
+   lfd f18,pos+32(sp); \
+   lfd f19,pos+40(sp); \
+   lfd f20,pos+48(sp); \
+   lfd f21,pos+56(sp); \
+   lfd f22,pos+64(sp); \
+   lfd  

[PATCH V2 5/8] powerpc: Restore FPU/VEC/VSX if previously used

2016-01-14 Thread Cyril Bur
Currently the FPU, VEC and VSX facilities are lazily loaded. This is not a
problem unless a process is using these facilities.

Modern versions of GCC are very good at automatically vectorising code, new
and modernised workloads make use of floating point and vector facilities,
even the kernel makes use of vectorised memcpy.

All this combined greatly increases the cost of a syscall since the kernel
uses the facilities sometimes even in syscall fast-path making it
increasingly common for a thread to take an *_unavailable exception soon
after a syscall, not to mention potentially taking all three.

The obvious overcompensation to this problem is to simply always load all
the facilities on every exit to userspace. Loading up all FPU, VEC and VSX
registers every time can be expensive and if a workload does avoid using
them, it should not be forced to incur this penalty.

An 8bit counter is used to detect if the registers have been used in the
past and the registers are always loaded until the value wraps to back to
zero.

Several versions of the assembly in entry_64.S. 1. Always calling C, 2.
Performing a common case check and then calling C and 3. A complex check in
asm. After some benchmarking it was determined that avoiding C in the
common case is a performance benefit. The full check in asm greatly
complicated that codepath for a negligible performance gain and the
trade-off was deemed not worth it.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/processor.h |  2 ++
 arch/powerpc/kernel/asm-offsets.c|  2 ++
 arch/powerpc/kernel/entry_64.S   | 21 ++--
 arch/powerpc/kernel/fpu.S|  4 +++
 arch/powerpc/kernel/process.c| 66 ++--
 arch/powerpc/kernel/vector.S |  4 +++
 6 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index ac23308..dcab21f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -236,11 +236,13 @@ struct thread_struct {
 #endif
struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
unsigned long   trap_nr;/* last trap # on this thread */
+   u8 load_fp;
 #ifdef CONFIG_ALTIVEC
struct thread_vr_state vr_state;
struct thread_vr_state *vr_save_area;
unsigned long   vrsave;
int used_vr;/* set if process has used altivec */
+   u8 load_vec;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
/* VSR status */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 07cebc3..10d5eab 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -95,12 +95,14 @@ int main(void)
DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
+   DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp));
 #ifdef CONFIG_ALTIVEC
DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
+   DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec));
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0d525ce..038e0a1 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -210,7 +210,20 @@ system_call:   /* label this so stack 
traces look sane */
li  r11,-MAX_ERRNO
andi.   
r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne-syscall_exit_work
-   cmpld   r3,r11
+
+   andi.   r0,r8,MSR_FP
+   beq 2f
+#ifdef CONFIG_ALTIVEC
+   andis.  r0,r8,MSR_VEC@h
+   bne 3f
+#endif
+2: addir3,r1,STACK_FRAME_OVERHEAD
+   bl  restore_math
+   ld  r8,_MSR(r1)
+   ld  r3,RESULT(r1)
+   li  r11,-MAX_ERRNO
+
+3: cmpld   r3,r11
ld  r5,_CCR(r1)
bge-syscall_error
 .Lsyscall_error_cont:
@@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite)
 
/* Check current_thread_info()->flags */
andi.   r0,r4,_TIF_USER_WORK_MASK
-#ifdef CONFIG_PPC_BOOK3E
bne 1f
+#ifdef CONFIG_PPC_BOOK3E
/*
 * Check to see if the dbcr0 register is set up to debug.
 * Use the internal debug mode bit to do this.
@@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite)

[PATCH V2 6/8] powerpc: Add the ability to save FPU without giving it up

2016-01-14 Thread Cyril Bur
This patch adds the ability to be able to save the FPU registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch optimises the thread copy path (as a result of a fork() or
clone()) so that the parent thread can return to userspace with hot
registers avoiding a possibly pointless reload of FPU register state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  2 +-
 arch/powerpc/kernel/fpu.S| 21 
 arch/powerpc/kernel/process.c| 46 +++-
 3 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 5b268b6..c4d50e9 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -28,7 +28,7 @@ extern void giveup_all(struct task_struct *);
 extern void enable_kernel_fp(void);
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
-extern void __giveup_fpu(struct task_struct *);
+extern void save_fpu(struct task_struct *);
 static inline void disable_kernel_fp(void)
 {
msr_check_and_clear(MSR_FP);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index b063524..15da2b5 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -143,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
blr
 
 /*
- * __giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
+ * save_fpu(tsk)
+ * Save the floating-point registers in its thread_struct.
  * Enables the FPU for use in the kernel on return.
  */
-_GLOBAL(__giveup_fpu)
+_GLOBAL(save_fpu)
addir3,r3,THREAD/* want THREAD of task */
PPC_LL  r6,THREAD_FPSAVEAREA(r3)
PPC_LL  r5,PT_REGS(r3)
PPC_LCMPI   0,r6,0
bne 2f
addir6,r3,THREAD_FPSTATE
-2: PPC_LCMPI   0,r5,0
-   SAVE_32FPVSRS(0, R4, R6)
+2: SAVE_32FPVSRS(0, R4, R6)
mffsfr0
stfdfr0,FPSTATE_FPSCR(r6)
-   beq 1f
-   PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-   li  r3,MSR_FP|MSR_FE0|MSR_FE1
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-   orisr3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-   andcr4,r4,r3/* disable FP for previous task */
-   PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
 
 /*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ec53468..8a96e4f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits)
 EXPORT_SYMBOL(__msr_check_and_clear);
 
 #ifdef CONFIG_PPC_FPU
+void __giveup_fpu(struct task_struct *tsk)
+{
+   save_fpu(tsk);
+   tsk->thread.regs->msr &= ~MSR_FP;
+#ifdef CONFIG_VSX
+   if (cpu_has_feature(CPU_FTR_VSX))
+   tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
 void giveup_fpu(struct task_struct *tsk)
 {
check_if_tm_restore_required(tsk);
@@ -421,12 +431,46 @@ void restore_math(struct pt_regs *regs)
regs->msr = msr;
 }
 
+void save_all(struct task_struct *tsk)
+{
+   unsigned long usermsr;
+
+   if (!tsk->thread.regs)
+   return;
+
+   usermsr = tsk->thread.regs->msr;
+
+   if ((usermsr & msr_all_available) == 0)
+   return;
+
+   msr_check_and_set(msr_all_available);
+
+#ifdef CONFIG_PPC_FPU
+   if (usermsr & MSR_FP)
+   save_fpu(tsk);
+#endif
+#ifdef CONFIG_ALTIVEC
+   if (usermsr & MSR_VEC)
+   __giveup_altivec(tsk);
+#endif
+#ifdef CONFIG_VSX
+   if (usermsr & MSR_VSX)
+   __giveup_vsx(tsk);
+#endif
+#ifdef CONFIG_SPE
+   if (usermsr & MSR_SPE)
+   __giveup_spe(tsk);
+#endif
+
+   msr_check_and_clear(msr_all_available);
+}
+
 void flush_all_to_thread(struct task_struct *tsk)
 {
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
-   giveup_all(tsk);
+   save_all(tsk);
 
 #ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
-- 
2.7.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 0/8] FP/VEC/VSX switching optimisations

2016-01-14 Thread Cyril Bur
Cover-letter for V1 of the series is at
https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-November/136350.html

Version one of this series used a cmpb instruction in handcrafted assembly
which it turns out is not supported on older power machines. Michael
suggested replacing it with crandc, which instruction works fine. Testing
also showed no difference in performance between using cmpb and crandc.

The primary objective improving the syscall hot path. While gut feelings
may be that avoiding C is quicker it may also be the case that the C is not
significantly slower. If C is not slower using C would provide a distinct
readability and maintainability advantage.
I have benchmarked a few possible scenarios:
1. Always calling into C.
2. Testing for the common case in assembly and calling into C
3. Using crandc in the full assembly check

All benchmarks are the average of 50 runs of Antons context switch
benchmark http://www.ozlabs.org/~anton/junkcode/context_switch2.c with
the kernel and ramdisk run under QEMU/KVM on a POWER8.
To test for all cases a variety of flags were passed to the benchmark to
see the effect of only touching a subset of the 'math' register space.

The absolute numbers are in context switches per second can vary greatly
depending on the how the kernel is run (virt/powernv/ramdisk/disk) and as
such units aren't very relevant here as we're interested in a speedup.
The most interesting number here is the %speedup over the previous
scenario. In this case 100% means there was no difference, therefore <100%
indicates a decrease in performance and >100% an increase.

For 1 - Always calling into C
 Flags |  Average   |  Stddev  |

  none | 2059785.00 | 14217.64 |
fp | 1766297.65 | 10576.64 |
fp altivec | 1636125.04 | 5693.84  |
 fp vector | 1640951.76 | 13141.93 |
   altivec | 1815133.80 | 10450.46 |
altivec vector | 1636438.60 | 5475.12  |
vector | 1639628.16 | 11456.06 |
   all | 1629516.32 | 7785.36  |



For 2 - Common case checking in asm before calling into C
 Flags |  Average   |  Stddev  | %speedup vs 1 |

  none | 2058003.64 | 20464.22 | 99.91 |
fp | 1757245.80 | 14455.45 | 99.49 |
fp altivec | 1658240.12 | 6318.41  | 101.35|
 fp vector | 1668912.96 | 9451.47  | 101.70|
   altivec | 1815223.96 | 4819.82  | 100.00|
altivec vector | 1648805.32 | 15100.50 | 100.76|
vector | 1663654.68 | 13814.79 | 101.47|
   all | 1644884.04 | 11315.74 | 100.94|



For 3 - Full checking in ASM using crandc instead of cmpb
 Flags |  Average   |  Stddev  | %speedup vs 2 |

  none | 2066930.52 | 19426.46 | 100.43|
fp | 1781653.24 | 7744.55  | 101.39|
fp altivec | 1653125.84 | 6727.36  | 99.69 |
 fp vector | 1656011.04 | 11678.56 | 99.23 |
   altivec | 1824934.72 | 16842.19 | 100.53|
altivec vector | 1649486.92 | 3219.14  | 100.04|
vector | 1662420.20 | 9609.34  | 99.93 |
   all | 1647933.64 | 11121.22 | 100.19|

From these numbers it appears that reducing the call to C in the common
case is beneficial, possibly up to 1.5% speedup over always calling C. The
benefit of the more complicated asm checking does appear to be very slight,
fractions of a percent at best. In balance it may prove wise to use the
option 2, there are much bigger fish to fry in terms of performance, the
complexity of the assembly for a small fraction of one percent improvement
is not worth it at this stage.

Version 2 of this series also addresses some comments from Mikey Neuling in
the tests such as adding .gitignore and forcing 64 bit compiles of the
tests as they use 64 bit only instructions.


Cyril Bur (8):
  selftests/powerpc: Test the preservation of FPU and VMX regs across
syscall
  selftests/powerpc: Test preservation of FPU and VMX regs across
preemption
  selftests/powerpc: Test FPU and VMX regs in signal ucontext
  powerpc: Explicitly disable math features when copying thread
  powerpc: Restore FPU/VEC/VSX if previously used
  powerpc: Add the ability to save FPU without giving it up
  powerpc: Add the ability to save Altivec without giving it up
  powerpc: Add the ability to save VSX without giving it up

 arch/powerpc/include/asm/processor.h   |   2 +
 arch/powerpc/include/asm/switch_to.h   |   5 +-
 arch/powerpc/kernel/asm-offsets.c  |   2 +
 arch/powerpc/kernel/entry_64.S |  21 +-
 arch/powerpc/kernel/fpu.S  |  25 +--
 arch/powerpc/kernel/ppc_ksyms.c|   4 -
 arch/powerpc/kernel/process.c  | 144 +++--
 arch/powerpc/kernel/ve

[PATCH V2 4/8] powerpc: Explicitly disable math features when copying thread

2016-01-14 Thread Cyril Bur
With threads leaving the math bits enabled in their saved MSR to indicate
that the hardware is hot and a restore is not needed, children need to turn
it off as when they do get scheduled, there's no way their registers could
have been hot.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e..e0c3d2d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1307,6 +1307,7 @@ int copy_thread(unsigned long clone_flags, unsigned long 
usp,
 
f = ret_from_fork;
}
+   childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
sp -= STACK_FRAME_OVERHEAD;
 
/*
-- 
2.7.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 8/8] powerpc: Add the ability to save VSX without giving it up

2016-01-14 Thread Cyril Bur
This patch adds the ability to be able to save the VSX registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch builds on a previous optimisation for the FPU and VEC registers
in the thread copy path to avoid a possibly pointless reload of VSX state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  1 -
 arch/powerpc/kernel/ppc_ksyms.c  |  4 
 arch/powerpc/kernel/process.c| 23 ++-
 arch/powerpc/kernel/vector.S | 17 -
 4 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 29dda9d..4dfcd3e 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -52,7 +52,6 @@ static inline void disable_kernel_altivec(void)
 extern void enable_kernel_vsx(void);
 extern void flush_vsx_to_thread(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
-extern void __giveup_vsx(struct task_struct *);
 static inline void disable_kernel_vsx(void)
 {
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 41e1607..ef7024da 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state);
 EXPORT_SYMBOL(store_vr_state);
 #endif
 
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif
-
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5566c32..3d907b8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -252,20 +252,33 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-void giveup_vsx(struct task_struct *tsk)
+void __giveup_vsx(struct task_struct *tsk)
 {
-   check_if_tm_restore_required(tsk);
-
-   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (tsk->thread.regs->msr & MSR_FP)
__giveup_fpu(tsk);
if (tsk->thread.regs->msr & MSR_VEC)
__giveup_altivec(tsk);
+   tsk->thread.regs->msr &= ~MSR_VSX;
+}
+
+void giveup_vsx(struct task_struct *tsk)
+{
+   check_if_tm_restore_required(tsk);
+
+   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
__giveup_vsx(tsk);
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
 EXPORT_SYMBOL(giveup_vsx);
 
+void save_vsx(struct task_struct *tsk)
+{
+   if (tsk->thread.regs->msr & MSR_FP)
+   save_fpu(tsk);
+   if (tsk->thread.regs->msr & MSR_VEC)
+   save_altivec(tsk);
+}
+
 void enable_kernel_vsx(void)
 {
WARN_ON(preemptible());
@@ -465,7 +478,7 @@ void save_all(struct task_struct *tsk)
 #endif
 #ifdef CONFIG_VSX
if (usermsr & MSR_VSX)
-   __giveup_vsx(tsk);
+   save_vsx(tsk);
 #endif
 #ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 51b0c17..1c2e7a3 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -151,23 +151,6 @@ _GLOBAL(load_up_vsx)
std r12,_MSR(r1)
b   fast_exception_return
 
-/*
- * __giveup_vsx(tsk)
- * Disable VSX for the task given as the argument.
- * Does NOT save vsx registers.
- */
-_GLOBAL(__giveup_vsx)
-   addir3,r3,THREAD/* want THREAD of task */
-   ld  r5,PT_REGS(r3)
-   cmpdi   0,r5,0
-   beq 1f
-   ld  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-   lis r3,MSR_VSX@h
-   andcr4,r4,r3/* disable VSX for previous task */
-   std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-   blr
-
 #endif /* CONFIG_VSX */
 
 
-- 
2.7.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 7/8] powerpc: Add the ability to save Altivec without giving it up

2016-01-14 Thread Cyril Bur
This patch adds the ability to be able to save the VEC registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch builds on a previous optimisation for the FPU registers in the
thread copy path to avoid a possibly pointless reload of VEC state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  2 +-
 arch/powerpc/kernel/process.c| 12 +++-
 arch/powerpc/kernel/vector.S | 24 
 3 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index c4d50e9..29dda9d 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -41,7 +41,7 @@ static inline void flush_fp_to_thread(struct task_struct *t) 
{ }
 extern void enable_kernel_altivec(void);
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
-extern void __giveup_altivec(struct task_struct *);
+extern void save_altivec(struct task_struct *);
 static inline void disable_kernel_altivec(void)
 {
msr_check_and_clear(MSR_VEC);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8a96e4f..5566c32 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -200,6 +200,16 @@ EXPORT_SYMBOL(enable_kernel_fp);
 #endif /* CONFIG_PPC_FPU */
 
 #ifdef CONFIG_ALTIVEC
+void __giveup_altivec(struct task_struct *tsk)
+{
+   save_altivec(tsk);
+   tsk->thread.regs->msr &= ~MSR_VEC;
+#ifdef CONFIG_VSX
+   if (cpu_has_feature(CPU_FTR_VSX))
+   tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
 void giveup_altivec(struct task_struct *tsk)
 {
check_if_tm_restore_required(tsk);
@@ -451,7 +461,7 @@ void save_all(struct task_struct *tsk)
 #endif
 #ifdef CONFIG_ALTIVEC
if (usermsr & MSR_VEC)
-   __giveup_altivec(tsk);
+   save_altivec(tsk);
 #endif
 #ifdef CONFIG_VSX
if (usermsr & MSR_VSX)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 038cff8..51b0c17 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -106,36 +106,20 @@ _GLOBAL(load_up_altivec)
blr
 
 /*
- * __giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
  */
-_GLOBAL(__giveup_altivec)
+_GLOBAL(save_altivec)
addir3,r3,THREAD/* want THREAD of task */
PPC_LL  r7,THREAD_VRSAVEAREA(r3)
PPC_LL  r5,PT_REGS(r3)
PPC_LCMPI   0,r7,0
bne 2f
addir7,r3,THREAD_VRSTATE
-2: PPC_LCMPI   0,r5,0
-   SAVE_32VRS(0,r4,r7)
+2: SAVE_32VRS(0,r4,r7)
mfvscr  v0
li  r4,VRSTATE_VSCR
stvxv0,r4,r7
-   beq 1f
-   PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-   lis r3,(MSR_VEC|MSR_VSX)@h
-FTR_SECTION_ELSE
-   lis r3,MSR_VEC@h
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#else
-   lis r3,MSR_VEC@h
-#endif
-   andcr4,r4,r3/* disable FP for previous task */
-   PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
 
 #ifdef CONFIG_VSX
-- 
2.7.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 3/8] selftests/powerpc: Test FPU and VMX regs in signal ucontext

2016-01-14 Thread Cyril Bur
Load up the non volatile FPU and VMX regs and ensure that they are the
expected value in a signal handler

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/math/.gitignore   |   2 +
 tools/testing/selftests/powerpc/math/Makefile |   4 +-
 tools/testing/selftests/powerpc/math/fpu_signal.c | 135 +
 tools/testing/selftests/powerpc/math/vmx_signal.c | 138 ++
 4 files changed, 278 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_signal.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_signal.c

diff --git a/tools/testing/selftests/powerpc/math/.gitignore 
b/tools/testing/selftests/powerpc/math/.gitignore
index 1a6f09e..4fe13a4 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -2,3 +2,5 @@ fpu_syscall
 vmx_syscall
 fpu_preempt
 vmx_preempt
+fpu_signal
+vmx_signal
diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
index b6f4158..5b88875 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,4 +1,4 @@
-TEST_PROGS := fpu_syscall fpu_preempt vmx_syscall vmx_preempt
+TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt 
vmx_signal
 
 all: $(TEST_PROGS)
 
@@ -7,9 +7,11 @@ $(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
 
 fpu_syscall: fpu_asm.S
 fpu_preempt: fpu_asm.S
+fpu_signal:  fpu_asm.S
 
 vmx_syscall: vmx_asm.S
 vmx_preempt: vmx_asm.S
+vmx_signal: vmx_asm.S
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c 
b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000..888aa51
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers are correctly reported in a
+ * signal context. Each worker just spins checking its FPU registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+2.1};
+
+bool bad_context;
+int threads_starting;
+int running;
+
+extern long preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+   int i;
+   ucontext_t *uc = context;
+   mcontext_t *mc = >uc_mcontext;
+
+   /* Only the non volatiles were loaded up */
+   for (i = 14; i < 32; i++) {
+   if (mc->fp_regs[i] != darray[i - 14]) {
+   bad_context = true;
+   break;
+   }
+   }
+}
+
+void *signal_fpu_c(void *p)
+{
+   int i;
+   long rc;
+   struct sigaction act;
+   act.sa_sigaction = signal_fpu_sig;
+   act.sa_flags = SA_SIGINFO;
+   rc = sigaction(SIGUSR1, , NULL);
+   if (rc)
+   return p;
+
+   srand(pthread_self());
+   for (i = 0; i < 21; i++)
+   darray[i] = rand();
+
+   rc = preempt_fpu(darray, _starting, );
+
+   return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+   int i, j, rc, threads;
+   void *rc_p;
+   pthread_t *tids;
+
+   threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+   tids = malloc(threads * sizeof(pthread_t));
+   FAIL_IF(!tids);
+
+   running = true;
+   threads_starting = threads;
+   for (i = 0; i < threads; i++) {
+   rc = pthread_create([i], NULL, signal_fpu_c, NULL);
+   FAIL_IF(rc);
+   }
+
+   setbuf(stdout, NULL);
+   printf("\tWaiting for all workers to start...");
+   while (threads_starting)
+   asm volatile("": : :"memory");
+   printf("done\n");
+
+   printf("\tSending signals to all threads %d times...", ITERATIONS);
+   for (i = 0; i < ITERATIONS; i++) {
+   for (j = 0; j < threads; j++) {
+   pthread_kill(tids[j], SIGUSR1);
+   }
+   sleep(1);
+   }
+

[PATCH V2 2/8] selftests/powerpc: Test preservation of FPU and VMX regs across preemption

2016-01-14 Thread Cyril Bur
Loop in assembly checking the registers with many threads.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/math/.gitignore|   2 +
 tools/testing/selftests/powerpc/math/Makefile  |   5 +-
 tools/testing/selftests/powerpc/math/fpu_asm.S |  34 +++
 tools/testing/selftests/powerpc/math/fpu_preempt.c | 113 +
 tools/testing/selftests/powerpc/math/vmx_asm.S |  44 +++-
 tools/testing/selftests/powerpc/math/vmx_preempt.c | 113 +
 6 files changed, 306 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_preempt.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_preempt.c

diff --git a/tools/testing/selftests/powerpc/math/.gitignore 
b/tools/testing/selftests/powerpc/math/.gitignore
index b19b269..1a6f09e 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -1,2 +1,4 @@
 fpu_syscall
 vmx_syscall
+fpu_preempt
+vmx_preempt
diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
index 418bef1..b6f4158 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,4 +1,4 @@
-TEST_PROGS := fpu_syscall vmx_syscall
+TEST_PROGS := fpu_syscall fpu_preempt vmx_syscall vmx_preempt
 
 all: $(TEST_PROGS)
 
@@ -6,7 +6,10 @@ $(TEST_PROGS): ../harness.c
 $(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
 
 fpu_syscall: fpu_asm.S
+fpu_preempt: fpu_asm.S
+
 vmx_syscall: vmx_asm.S
+vmx_preempt: vmx_asm.S
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S 
b/tools/testing/selftests/powerpc/math/fpu_asm.S
index 8733874..46bbe99 100644
--- a/tools/testing/selftests/powerpc/math/fpu_asm.S
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -159,3 +159,37 @@ FUNC_START(test_fpu)
POP_BASIC_STACK(256)
blr
 FUNC_END(test_fpu)
+
+#int preempt_fpu(double *darray, int *threads_running, int *running)
+#On starting will (atomically) decrement not_ready as a signal that the FPU
+#has been loaded with darray. Will proceed to check the validity of the FPU
+#registers while running is not zero.
+FUNC_START(preempt_fpu)
+   PUSH_BASIC_STACK(256)
+   std r3,32(sp) #double *darray
+   std r4,40(sp) #volatile int *not_ready
+   std r5,48(sp) #int *running
+   PUSH_FPU(56)
+
+   bl load_fpu
+
+   #Atomic DEC
+   ld r3,40(sp)
+1: lwarx r4,0,r3
+   addi r4,r4,-1
+   stwcx. r4,0,r3
+   bne- 1b
+
+2: ld r3, 32(sp)
+   bl check_fpu
+   cmpdi r3,0
+   bne 3f
+   ld r4, 48(sp)
+   ld r5, 0(r4)
+   cmpwi r5,0
+   bne 2b
+
+3: POP_FPU(56)
+   POP_BASIC_STACK(256)
+   blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c 
b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000..0f85b79
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across preemption.
+ * Two things should be noted here a) The check_fpu function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+2.1};
+
+int threads_starting;
+int running;
+
+extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void *preempt_fpu_c(void *p)
+{
+   int i;
+   srand(pthread_self());
+   for (i = 0; i < 21; i++)
+   darray[i] = rand();
+
+   /* Test failed if it ever returns */
+   preempt_fpu(darray, _starting, );
+
+   return p;
+}
+
+int test_preempt_fpu(void)
+{
+   int i, rc, threads;
+   pthread_t *tids;
+
+   threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+   tids = malloc((threads) * sizeof(pthread_t));
+   FAIL_IF(!tids);
+
+   running = true;
+

Re: [PATCH V2 4/8] powerpc: Explicitly disable math features when copying thread

2016-01-14 Thread Cyril Bur
On Fri, 15 Jan 2016 16:42:22 +1100
Michael Neuling <mi...@neuling.org> wrote:

> On Fri, 2016-01-15 at 16:04 +1100, Cyril Bur wrote:
> > With threads leaving the math bits enabled in their saved MSR to indicate
> > that the hardware is hot and a restore is not needed, children need to turn
> > it off as when they do get scheduled, there's no way their registers could
> > have been hot.  
> 
> Is this a bug in the current code?
> 

You're very consistent:

https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-November/136469.html

;)

> Mikey
> 
> > Signed-off-by: Cyril Bur <cyril...@gmail.com>
> > ---
> >  arch/powerpc/kernel/process.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/arch/powerpc/kernel/process.c
> > b/arch/powerpc/kernel/process.c
> > index dccc87e..e0c3d2d 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -1307,6 +1307,7 @@ int copy_thread(unsigned long clone_flags,
> > unsigned long usp,
> >  
> > f = ret_from_fork;
> > }
> > +   childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
> > sp -= STACK_FRAME_OVERHEAD;
> >  
> > /*  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC 0/3] Enable MSR_TM lazily

2016-06-29 Thread Cyril Bur
Currently the kernel checks to see if the hardware is transactional
memory capable and always enables the MSR_TM bit. The problem with
this is that the TM related SPRs become available to userspace,
requiring them to be switched between processes. It turns out these
SPRs are expensive to read and write and if a thread doesn't use TM
(or worse yet isn't even TM aware) then context switching incurs this
penalty for nothing.

The solution here is to leave the MSR_TM bit disabled and enable it
more 'on demand'. Leaving MSR_TM disabled cause a thread to take a
facility unavailable fault if and when it does decide to use TM. As
with recent updates to the FPU, VMX and VSX units the MSR_TM bit will
be enabled upon taking the fault and left on for some time afterwards
as the assumption is that if a thread used TM ones it may well use it
again. The kernel will turn the MSR_TM bit off after some number of
context switches of that thread.

Performance numbers haven't been completely gathered as yet but early
runs of tools/testing/selftests/powerpc/benchmarks/context_switch
(which doesn't use TM) yields a jump from ~16 switches per second
to ~18 switches per second with patch 3/3 applied.

These patches will need to be applied on top of my recent rework of
TM: http://patchwork.ozlabs.org/patch/631959/
I have pushed a branch to github to help with reviews:
https://github.com/cyrilbur-ibm/linux/tree/tm_lazy

Cyril Bur (3):
  selftests/powerpc: Add test to check TM ucontext creation
  powerpc: tm: Add TM Unavailable Exception
  powerpc: tm: Enable transactional memory (TM) lazily for userspace

 arch/powerpc/include/asm/processor.h   |   1 +
 arch/powerpc/kernel/process.c  |  30 --
 arch/powerpc/kernel/traps.c|  33 +++
 .../selftests/powerpc/tm/tm-signal-context-chk.c   | 102 +
 4 files changed, 158 insertions(+), 8 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/tm/tm-signal-context-chk.c

-- 
2.9.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 4/9] powerpc: Explicitly disable math features when copying thread

2016-02-08 Thread Cyril Bur
On Wed, 27 Jan 2016 23:01:59 +1100
Balbir Singh <bsinghar...@gmail.com> wrote:

> On Wed, Jan 27, 2016 at 10:50 AM, Cyril Bur <cyril...@gmail.com> wrote:
> > On Mon, 25 Jan 2016 11:04:23 +1100
> > Balbir Singh <bsinghar...@gmail.com> wrote:
> >  
> >> On Thu, 21 Jan 2016 11:55:44 +1100
> >> Cyril Bur <cyril...@gmail.com> wrote:
> >>  
> >> > Currently when threads get scheduled off they always giveup the FPU,
> >> > Altivec (VMX) and Vector (VSX) units if they were using them. When they 
> >> > are
> >> > scheduled back on a fault is then taken to enable each facility and load
> >> > registers. As a result explicitly disabling FPU/VMX/VSX has not been
> >> > necessary.
> >> >
> >> > Future changes and optimisations remove this mandatory giveup and fault
> >> > which could cause calls such as clone() and fork() to copy threads and 
> >> > run
> >> > them later with FPU/VMX/VSX enabled but no registers loaded.
> >> >
> >> > This patch starts the process of having MSR_{FP,VEC,VSX} mean that a
> >> > threads registers are hot while not having MSR_{FP,VEC,VSX} means that 
> >> > the
> >> > registers must be loaded. This allows for a smarter return to userspace.
> >> >
> >> > Signed-off-by: Cyril Bur <cyril...@gmail.com>
> >> > ---
> >> >  arch/powerpc/kernel/process.c | 1 +
> >> >  1 file changed, 1 insertion(+)
> >> >
> >> > diff --git a/arch/powerpc/kernel/process.c 
> >> > b/arch/powerpc/kernel/process.c
> >> > index dccc87e..e0c3d2d 100644
> >> > --- a/arch/powerpc/kernel/process.c
> >> > +++ b/arch/powerpc/kernel/process.c
> >> > @@ -1307,6 +1307,7 @@ int copy_thread(unsigned long clone_flags, 
> >> > unsigned long usp,
> >> >
> >> > f = ret_from_fork;
> >> > }
> >> > +   childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);  
> >>  
> >
> > Hi Balbir,
> >
> > Perhaps I'm missing something, are you saying
> >  
> >> Ideally you want to use __msr_check_and_clear()
> >>  
> >
> > instead of childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); ? I don't see how 
> > that
> > can work...
> >
> > __msr_check_and_clear() operates on the currently active MSR, that is, the 
> > msr
> > for the current kernel context. childregs->msr is the value that will be 
> > used
> > for that userspace context when the kernel returns. Here we must ensure that
> > that children are created with the bit disabled.
> >  
> 
> Yes, my bad! I thought the routine took generic bits, hoping to reuse
> the CONFIG_VSX bits. I don't think it helps much, what you have is
> correct.
> 
> >> Basically we start with these bits off and then take an exception on use?
> >>  
> >
> > Currently yes, this is what I'm trying to change. This patch hasn't been
> > necessary until now as any thread which saves its FPU/VMX/VSX data ALSO
> > disables those bits in regs->msr and so theres no way a clone() or fork() 
> > can
> > create a child with MSR_FP or MSR_VEC or MSR_VSX set. I add a meaning to
> > 'having a regs->msr FP,VEC,VSX bit set' to mean that 'the regs are hot' in a
> > subsequent patch which means this assumption no longer holds so now we must
> > explicitly disable (so as to signal that the FPU/VMX/VSX regs are not hot) 
> > for
> > children thread.
> >
> > Sounds like I still haven't got that commit message quite right yet.  
> 
> I think the older series had more data to help understand the patch.
> It would help to move some of them to the current series
> 

The previous commit message to this patch was:

With threads leaving the math bits enabled in their saved MSR to indicate
that the hardware is hot and a restore is not needed, children need to turn
it off as when they do get scheduled, there's no way their registers could
have been hot.

Mikey pointed out, that was misleading as it wasn't clear that I was preparing
for future work and that no bug currently exists.

Are you talking about another patch?

I have tried to incorporate more from the old commit message into the new one,
see below:

Currently when threads get scheduled off they always giveup the FPU,
Altivec (VMX) and Vector (VSX) units if they were using them. When they are
scheduled back on a fault is then taken to enable each facility and load
registers. As a result explicitly disabling FPU/VMX/VSX has not been
necessary.

Future

[PATCH 6/9] powerpc: Prepare for splitting giveup_{fpu, altivec, vsx} in two

2016-02-28 Thread Cyril Bur
This prepares for the decoupling of saving {fpu,altivec,vsx} registers and
marking {fpu,altivec,vsx} as being unused by a thread.

Currently giveup_{fpu,altivec,vsx}() does both however optimisations to
task switching can be made if these two operations are decoupled.
save_all() will permit the saving of registers to thread structs and leave
threads MSR with bits enabled.

This patch introduces no functional change.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/reg.h   |  8 
 arch/powerpc/include/asm/switch_to.h |  7 +++
 arch/powerpc/kernel/process.c| 31 ++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c4cb2ff..d07b110 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -75,6 +75,14 @@
 #define MSR_HV 0
 #endif
 
+/*
+ * To be used in shared book E/book S, this avoids needing to worry about
+ * book S/book E in shared code
+ */
+#ifndef MSR_SPE
+#define MSR_SPE0
+#endif
+
 #define MSR_VEC__MASK(MSR_VEC_LG)  /* Enable AltiVec */
 #define MSR_VSX__MASK(MSR_VSX_LG)  /* Enable VSX */
 #define MSR_POW__MASK(MSR_POW_LG)  /* Enable Power 
Management */
diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 5b268b6..3690041 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -34,6 +34,7 @@ static inline void disable_kernel_fp(void)
msr_check_and_clear(MSR_FP);
 }
 #else
+static inline void __giveup_fpu(struct task_struct *t) { }
 static inline void flush_fp_to_thread(struct task_struct *t) { }
 #endif
 
@@ -46,6 +47,8 @@ static inline void disable_kernel_altivec(void)
 {
msr_check_and_clear(MSR_VEC);
 }
+#else
+static inline void __giveup_altivec(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_VSX
@@ -57,6 +60,8 @@ static inline void disable_kernel_vsx(void)
 {
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
+#else
+static inline void __giveup_vsx(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_SPE
@@ -68,6 +73,8 @@ static inline void disable_kernel_spe(void)
 {
msr_check_and_clear(MSR_SPE);
 }
+#else
+static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
 static inline void clear_task_ebb(struct task_struct *t)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 55c1eb0..29da07f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -444,12 +444,41 @@ void restore_math(struct pt_regs *regs)
regs->msr = msr;
 }
 
+void save_all(struct task_struct *tsk)
+{
+   unsigned long usermsr;
+
+   if (!tsk->thread.regs)
+   return;
+
+   usermsr = tsk->thread.regs->msr;
+
+   if ((usermsr & msr_all_available) == 0)
+   return;
+
+   msr_check_and_set(msr_all_available);
+
+   if (usermsr & MSR_FP)
+   __giveup_fpu(tsk);
+
+   if (usermsr & MSR_VEC)
+   __giveup_altivec(tsk);
+
+   if (usermsr & MSR_VSX)
+   __giveup_vsx(tsk);
+
+   if (usermsr & MSR_SPE)
+   __giveup_spe(tsk);
+
+   msr_check_and_clear(msr_all_available);
+}
+
 void flush_all_to_thread(struct task_struct *tsk)
 {
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
-   giveup_all(tsk);
+   save_all(tsk);
 
 #ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
-- 
2.7.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 9/9] powerpc: Add the ability to save VSX without giving it up

2016-02-28 Thread Cyril Bur
This patch adds the ability to be able to save the VSX registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch builds on a previous optimisation for the FPU and VEC registers
in the thread copy path to avoid a possibly pointless reload of VSX state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  4 
 arch/powerpc/kernel/ppc_ksyms.c  |  4 
 arch/powerpc/kernel/process.c| 42 +---
 arch/powerpc/kernel/vector.S | 17 ---
 4 files changed, 30 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 9028822..17c8380 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -56,14 +56,10 @@ static inline void __giveup_altivec(struct task_struct *t) 
{ }
 #ifdef CONFIG_VSX
 extern void enable_kernel_vsx(void);
 extern void flush_vsx_to_thread(struct task_struct *);
-extern void giveup_vsx(struct task_struct *);
-extern void __giveup_vsx(struct task_struct *);
 static inline void disable_kernel_vsx(void)
 {
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
-#else
-static inline void __giveup_vsx(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_SPE
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 41e1607..ef7024da 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state);
 EXPORT_SYMBOL(store_vr_state);
 #endif
 
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif
-
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 14c09d2..d7a9df5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -280,19 +280,31 @@ static inline int restore_altivec(struct task_struct 
*tsk) { return 0; }
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-void giveup_vsx(struct task_struct *tsk)
+static void __giveup_vsx(struct task_struct *tsk)
 {
-   check_if_tm_restore_required(tsk);
-
-   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (tsk->thread.regs->msr & MSR_FP)
__giveup_fpu(tsk);
if (tsk->thread.regs->msr & MSR_VEC)
__giveup_altivec(tsk);
+   tsk->thread.regs->msr &= ~MSR_VSX;
+}
+
+static void giveup_vsx(struct task_struct *tsk)
+{
+   check_if_tm_restore_required(tsk);
+
+   msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
__giveup_vsx(tsk);
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
-EXPORT_SYMBOL(giveup_vsx);
+
+static void save_vsx(struct task_struct *tsk)
+{
+   if (tsk->thread.regs->msr & MSR_FP)
+   save_fpu(tsk);
+   if (tsk->thread.regs->msr & MSR_VEC)
+   save_altivec(tsk);
+}
 
 void enable_kernel_vsx(void)
 {
@@ -335,6 +347,7 @@ static int restore_vsx(struct task_struct *tsk)
 }
 #else
 static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+static inline void save_vsx(struct task_struct *tsk) { }
 #endif /* CONFIG_VSX */
 
 #ifdef CONFIG_SPE
@@ -478,14 +491,19 @@ void save_all(struct task_struct *tsk)
 
msr_check_and_set(msr_all_available);
 
-   if (usermsr & MSR_FP)
-   save_fpu(tsk);
-
-   if (usermsr & MSR_VEC)
-   save_altivec(tsk);
+   /*
+* Saving the way the register space is in hardware, save_vsx boils
+* down to a save_fpu() and save_altivec()
+*/
+   if (usermsr & MSR_VSX) {
+   save_vsx(tsk);
+   } else {
+   if (usermsr & MSR_FP)
+   save_fpu(tsk);
 
-   if (usermsr & MSR_VSX)
-   __giveup_vsx(tsk);
+   if (usermsr & MSR_VEC)
+   save_altivec(tsk);
+   }
 
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 51b0c17..1c2e7a3 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -151,23 +151,6 @@ _GLOBAL(load_up_vsx)
std r12,_MSR(r1)
b   fast_exception_return
 
-/*
- * __giveup_vsx(tsk)
- * Disable VSX for the task given as the argument.
- * Does NOT save vsx registers.
- */
-_GLOBAL(__giveup_vsx)
-   addir3,r3,THREAD/* want THREAD of task */
-   ld  r5,PT_REGS(r3)
-   cmpdi   0,r5,0
-   beq 1f
-   ld  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-   lis r3,MSR_VSX@h
-   andcr4,r4,r3/* disable VSX for previous task */
-   std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-   blr
-
 #endif /* CONFIG_VSX */
 
 
-- 
2.7.2

___
Linux

[PATCH 1/9] selftests/powerpc: Test the preservation of FPU and VMX regs across syscall

2016-02-28 Thread Cyril Bur
Test that the non volatile floating point and Altivec registers get
correctly preserved across the fork() syscall.

fork() works nicely for this purpose, the registers should be the same for
both parent and child

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 tools/testing/selftests/powerpc/Makefile   |   3 +-
 tools/testing/selftests/powerpc/basic_asm.h|  63 +++
 tools/testing/selftests/powerpc/math/.gitignore|   2 +
 tools/testing/selftests/powerpc/math/Makefile  |  16 ++
 tools/testing/selftests/powerpc/math/fpu_asm.S | 161 +
 tools/testing/selftests/powerpc/math/fpu_syscall.c |  90 ++
 tools/testing/selftests/powerpc/math/vmx_asm.S | 195 +
 tools/testing/selftests/powerpc/math/vmx_syscall.c |  91 ++
 8 files changed, 620 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/basic_asm.h
 create mode 100644 tools/testing/selftests/powerpc/math/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/math/Makefile
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/fpu_syscall.c
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_asm.S
 create mode 100644 tools/testing/selftests/powerpc/math/vmx_syscall.c

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 0c2706b..19e8191 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \
   switch_endian\
   syscalls \
   tm   \
-  vphn
+  vphn \
+  math
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/basic_asm.h 
b/tools/testing/selftests/powerpc/basic_asm.h
new file mode 100644
index 000..0d5dccb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/basic_asm.h
@@ -0,0 +1,63 @@
+#include 
+#include 
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+   lis reg,(expr)@highest; \
+   ori reg,reg,(expr)@higher;  \
+   rldicr  reg,reg,32,31;  \
+   orisreg,reg,(expr)@high;\
+   ori reg,reg,(expr)@l;
+
+/*
+ * Note: These macros assume that variables being stored on the stack are
+ * doublewords, while this is usually the case it may not always be the
+ * case for each use case.
+ */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_FRAME_TOC_POS  24
+#define __STACK_FRAME_PARAM(_param)  (32 + ((_param)*8))
+#define __STACK_FRAME_LOCAL(_num_params,_var_num)  
((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
+#else
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_FRAME_TOC_POS  40
+#define __STACK_FRAME_PARAM(i)  (48 + ((i)*8))
+/*
+ * Caveat: if a function passed more than 8 doublewords, the caller will have
+ * made more space... which would render the 112 incorrect.
+ */
+#define __STACK_FRAME_LOCAL(_num_params,_var_num)  (112 + ((_var_num)*8))
+#endif
+/* Parameter x saved to the stack */
+#define STACK_FRAME_PARAM(var)__STACK_FRAME_PARAM(var)
+/* Local variable x saved to the stack after x parameters */
+#define STACK_FRAME_LOCAL(num_params,var)
__STACK_FRAME_LOCAL(num_params,var)
+#define STACK_FRAME_LR_POS   16
+#define STACK_FRAME_CR_POS   8
+
+/*
+ * It is very important to note here that _extra is the extra amount of
+ * stack space needed. This space can be accessed using STACK_FRAME_PARAM()
+ * or STACK_FRAME_LOCAL() macros.
+ *
+ * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
+ * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
+ * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
+ * preprocessed incorrectly, hence r0.
+ */
+#define PUSH_BASIC_STACK(_extra) \
+   mflrr0; \
+   std r0,STACK_FRAME_LR_POS(%r1); \
+   stdu%r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
+   mfcrr0; \
+   stw r0,STACK_FRAME_CR_POS(%r1); \
+   std %r2,STACK_FRAME_TOC_POS(%r1);
+
+#define POP_BASIC_STACK(_extra) \
+   ld  %r2,STACK_FRAME_TOC_POS(%r1); \
+   lwz r0,STACK_FRAME_CR_POS(%r1); \
+   mtcrr0; \
+   addi%r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \
+   ld  r0,STACK_FRAME_LR_POS(%r1); \
+   mtlrr0;
+
diff --git a/tools/testing/selftests/powerpc/math/.gitignore 
b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000..b19b269
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,2 @@
+fpu_syscall
+vmx_syscall
diff --git a/tools/testing/selftests/powerpc/math/Makefile 
b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000..41c3aca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,16 @@
+TEST_PROGS := fpu_syscall vmx_syscall
+
+all: $(TEST_PROGS)
+
+#The g

[PATCH 4/9] powerpc: Explicitly disable math features when copying thread

2016-02-28 Thread Cyril Bur
Currently when threads get scheduled off they always giveup the FPU,
Altivec (VMX) and Vector (VSX) units if they were using them. When they are
scheduled back on a fault is then taken to enable each facility and load
registers. As a result explicitly disabling FPU/VMX/VSX has not been
necessary.

Future changes and optimisations remove this mandatory giveup and fault
which could cause calls such as clone() and fork() to copy threads and run
them later with FPU/VMX/VSX enabled but no registers loaded.

This patch starts the process of having MSR_{FP,VEC,VSX} mean that a
threads registers are hot while not having MSR_{FP,VEC,VSX} means that the
registers must be loaded. This allows for a smarter return to userspace.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e..e0c3d2d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1307,6 +1307,7 @@ int copy_thread(unsigned long clone_flags, unsigned long 
usp,
 
f = ret_from_fork;
}
+   childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
sp -= STACK_FRAME_OVERHEAD;
 
/*
-- 
2.7.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 8/9] powerpc: Add the ability to save Altivec without giving it up

2016-02-28 Thread Cyril Bur
This patch adds the ability to be able to save the VEC registers to the
thread struct without giving up (disabling the facility) next time the
process returns to userspace.

This patch builds on a previous optimisation for the FPU registers in the
thread copy path to avoid a possibly pointless reload of VEC state.

Signed-off-by: Cyril Bur <cyril...@gmail.com>
---
 arch/powerpc/include/asm/switch_to.h |  3 ++-
 arch/powerpc/kernel/process.c| 12 +++-
 arch/powerpc/kernel/vector.S | 24 
 3 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 6a201e8..9028822 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -43,12 +43,13 @@ static inline void flush_fp_to_thread(struct task_struct 
*t) { }
 extern void enable_kernel_altivec(void);
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
-extern void __giveup_altivec(struct task_struct *);
+extern void save_altivec(struct task_struct *);
 static inline void disable_kernel_altivec(void)
 {
msr_check_and_clear(MSR_VEC);
 }
 #else
+static inline void save_altivec(struct task_struct *t) { }
 static inline void __giveup_altivec(struct task_struct *t) { }
 #endif
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a7e5061..14c09d2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -213,6 +213,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; 
}
 #ifdef CONFIG_ALTIVEC
 #define loadvec(thr) ((thr).load_vec)
 
+static void __giveup_altivec(struct task_struct *tsk)
+{
+   save_altivec(tsk);
+   tsk->thread.regs->msr &= ~MSR_VEC;
+#ifdef CONFIG_VSX
+   if (cpu_has_feature(CPU_FTR_VSX))
+   tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
 void giveup_altivec(struct task_struct *tsk)
 {
check_if_tm_restore_required(tsk);
@@ -472,7 +482,7 @@ void save_all(struct task_struct *tsk)
save_fpu(tsk);
 
if (usermsr & MSR_VEC)
-   __giveup_altivec(tsk);
+   save_altivec(tsk);
 
if (usermsr & MSR_VSX)
__giveup_vsx(tsk);
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 038cff8..51b0c17 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -106,36 +106,20 @@ _GLOBAL(load_up_altivec)
blr
 
 /*
- * __giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
  */
-_GLOBAL(__giveup_altivec)
+_GLOBAL(save_altivec)
addir3,r3,THREAD/* want THREAD of task */
PPC_LL  r7,THREAD_VRSAVEAREA(r3)
PPC_LL  r5,PT_REGS(r3)
PPC_LCMPI   0,r7,0
bne 2f
addir7,r3,THREAD_VRSTATE
-2: PPC_LCMPI   0,r5,0
-   SAVE_32VRS(0,r4,r7)
+2: SAVE_32VRS(0,r4,r7)
mfvscr  v0
li  r4,VRSTATE_VSCR
stvxv0,r4,r7
-   beq 1f
-   PPC_LL  r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-   lis r3,(MSR_VEC|MSR_VSX)@h
-FTR_SECTION_ELSE
-   lis r3,MSR_VEC@h
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#else
-   lis r3,MSR_VEC@h
-#endif
-   andcr4,r4,r3/* disable FP for previous task */
-   PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
 
 #ifdef CONFIG_VSX
-- 
2.7.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

  1   2   3   4   5   >