[PATCH] perf annotate: cross arch annotate support fixes for ARM

2016-08-26 Thread Kim Phillips
For ARM we remove the list that contains non-arm insns, and
instead add more maintainable branch instruction regex logic.

Signed-off-by: Kim Phillips 
Acked-by: Ravi Bangoria 
Cc: Namhyung Kim 
---
 tools/perf/util/annotate.c | 177 +
 1 file changed, 67 insertions(+), 110 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b2c6cf3..52316f3 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -26,6 +26,7 @@
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
+static regex_t  arm_call_insn, arm_jump_insn;
 
 static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
@@ -449,98 +450,7 @@ static struct ins instructions_x86[] = {
{ .name = "retq",  .ops  = _ops, },
 };
 
-static struct ins instructions_arm[] = {
-   { .name = "add",   .ops  = _ops, },
-   { .name = "addl",  .ops  = _ops, },
-   { .name = "addq",  .ops  = _ops, },
-   { .name = "addw",  .ops  = _ops, },
-   { .name = "and",   .ops  = _ops, },
-   { .name = "b", .ops  = _ops, }, /* might also be a call */
-   { .name = "bcc",   .ops  = _ops, },
-   { .name = "bcs",   .ops  = _ops, },
-   { .name = "beq",   .ops  = _ops, },
-   { .name = "bge",   .ops  = _ops, },
-   { .name = "bgt",   .ops  = _ops, },
-   { .name = "bhi",   .ops  = _ops, },
-   { .name = "bl",.ops  = _ops, },
-   { .name = "bls",   .ops  = _ops, },
-   { .name = "blt",   .ops  = _ops, },
-   { .name = "blx",   .ops  = _ops, },
-   { .name = "bne",   .ops  = _ops, },
-   { .name = "bts",   .ops  = _ops, },
-   { .name = "call",  .ops  = _ops, },
-   { .name = "callq", .ops  = _ops, },
-   { .name = "cmp",   .ops  = _ops, },
-   { .name = "cmpb",  .ops  = _ops, },
-   { .name = "cmpl",  .ops  = _ops, },
-   { .name = "cmpq",  .ops  = _ops, },
-   { .name = "cmpw",  .ops  = _ops, },
-   { .name = "cmpxch", .ops  = _ops, },
-   { .name = "dec",   .ops  = _ops, },
-   { .name = "decl",  .ops  = _ops, },
-   { .name = "imul",  .ops  = _ops, },
-   { .name = "inc",   .ops  = _ops, },
-   { .name = "incl",  .ops  = _ops, },
-   { .name = "ja",.ops  = _ops, },
-   { .name = "jae",   .ops  = _ops, },
-   { .name = "jb",.ops  = _ops, },
-   { .name = "jbe",   .ops  = _ops, },
-   { .name = "jc",.ops  = _ops, },
-   { .name = "jcxz",  .ops  = _ops, },
-   { .name = "je",.ops  = _ops, },
-   { .name = "jecxz", .ops  = _ops, },
-   { .name = "jg",.ops  = _ops, },
-   { .name = "jge",   .ops  = _ops, },
-   { .name = "jl",.ops  = _ops, },
-   { .name = "jle",   .ops  = _ops, },
-   { .name = "jmp",   .ops  = _ops, },
-   { .name = "jmpq",  .ops  = _ops, },
-   { .name = "jna",   .ops  = _ops, },
-   { .name = "jnae",  .ops  = _ops, },
-   { .name = "jnb",   .ops  = _ops, },
-   { .name = "jnbe",  .ops  = _ops, },
-   { .name = "jnc",   .ops  = _ops, },
-   { .name = "jne",   .ops  = _ops, },
-   { .name = "jng",   .ops  = _ops, },
-   { .name = "jnge",  .ops  = _ops, },
-   { .name = "jnl",   .ops  = _ops, },
-   { .name = "jnle",  .ops  = _ops, },
-   { .name = "jno",   .ops  = _ops, },
-   { .name = "jnp",   .ops  = _ops, },
-   { .name = "jns",   .ops  = _ops, },
-   { .name = "jnz",   .ops  = _ops, },
-   { .name = "jo",.ops  = _ops, },
-   { .name = "jp",.ops  = _ops, },
-   { .name = "jpe",   .ops  = _ops, },
-   { .name = "jpo",   .ops  = _ops, },
-   { .name = "jrcxz", .ops  = _ops, },
-   { .name = "js",.ops  = _ops, },
-   { .name = "jz",.ops  = _ops, },
-   { .name = "lea",   .ops  = _ops, },
-   { .name = "lock",  .ops  = _ops, },
-   { .name = "mov",   .ops  = _ops, },
-   { .name = "movb",  .ops  = _ops, },
-   { .name = "movdqa",.ops  = _ops, },
-   { .name = "movl",  .ops  = _ops, },
-   { .name = "movq",  .ops  = _ops, },
-   { .name = "movslq", .ops  = _ops, },
-   { .name = "movzbl", .ops  = _ops, },
-   { .name = "movzwl", .ops  = _ops, },
-   { .name = "nop",   .ops  = _ops, },
-   { .name = "nopl",  .ops  = _ops, },
-   { .name = "nopw",  .ops  = _ops, },
-   { .name = "or",.ops  = _ops, },
-   { .name = "orl",   .ops  = _ops, },
-   { .name = "test",  .ops  = _ops, },
-   { .name = "testb", .ops  = _ops, },
-   { .name = "testl", .ops  = _ops, },
-   { .name = "xadd",  .ops  = _ops, },
-   { .name = "xbeginl", .ops  = _ops, },
-   { .name = "xbeginq", .ops  = _ops, },
-   { .name = "retq",  .ops  = _ops, },
-};
-
-struct instructions_powerpc {
+struct 

[PATCH 3/3] powerpc/pseries: Add bitmap to track updated LMBs

2016-08-26 Thread Nathan Fontenot
Recent updates to the PAPR for memory hotplug has now made the
reserved field of the ibm,dynamic-memory property a capabilities
field. To support this update we can no longer use the reserved field
to track which LMBs have been updated during a DLPAR operation.

This patch adds a bitfield to track any LMBs that are updated during
a DLPAR operation so that we can roll back to the state proior to the
DLPAR operation if an error occurs.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   25 ++-
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index a0371d1..4be1b61 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -27,6 +27,7 @@
 static struct drconf_mem {
u32 num_lmbs;
struct of_drconf_cell *lmbs;
+   unsigned long *lmb_update_map;
 } drmem;
 
 #define for_each_lmb_range(lmb, start, end)\
@@ -46,15 +47,15 @@ static void lmb_set_aa_index(u32 lmb, u32 aa_index) {
 }
 
 static bool lmb_updated(u32 lmb) {
-   return drmem.lmbs[lmb].reserved;
+   return test_bit(lmb, drmem.lmb_update_map);
 }
 
 static void mark_lmb_updated(u32 lmb) {
-   drmem.lmbs[lmb].reserved = 1;
+   set_bit(lmb, drmem.lmb_update_map);
 }
 
-static void rm_lmb_update(u32 lmb) {
-   drmem.lmbs[lmb].reserved = 0;
+static void clear_lmb_updates(void) {
+   bitmap_zero(drmem.lmb_update_map, drmem.num_lmbs);
 }
 
 static bool lmb_reserved(u32 lmb) {
@@ -99,6 +100,11 @@ static void __init update_drconf_memory(void)
p = prop->value;
drmem.num_lmbs = be32_to_cpu(*p++);
drmem.lmbs = (struct of_drconf_cell *)p;
+   
+   if (!drmem.lmb_update_map) {
+   drmem.lmb_update_map = kmalloc(BITS_TO_LONGS(drmem.num_lmbs),
+  GFP_KERNEL);
+   }
 }
 
 unsigned long pseries_memory_block_size(void)
@@ -484,8 +490,6 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
if (rc)
pr_err("Failed to add LMB back, drc index %x\n",
   lmb_drc_index(lmb));
-
-   rm_lmb_update(lmb);
}
 
rc = -EINVAL;
@@ -496,8 +500,6 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
 
pr_info("Memory at %llx was hot-removed\n",
lmb_base_address(lmb));
-
-   rm_lmb_update(lmb);
}
rc = 0;
}
@@ -588,8 +590,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, 
u32 drc_index)
if (rc)
pr_err("Failed to add LMB, drc index %x\n",
   lmb_drc_index(lmb));
-
-   rm_lmb_update(lmb);
}
rc = -EINVAL;
} else {
@@ -599,8 +599,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, 
u32 drc_index)
 
pr_info("Memory at %llx (drc index %x) was 
hot-removed\n",
lmb_base_address(lmb), lmb_drc_index(lmb));
-
-   rm_lmb_update(lmb);
}
}
 
@@ -732,7 +730,6 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 
pr_info("Memory at %llx (drc index %x) was hot-added\n",
lmb_base_address(lmb), lmb_drc_index(lmb));
-   rm_lmb_update(lmb);
}
}
 
@@ -830,7 +827,6 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 
drc_index)
 
pr_info("Memory at %llx (drc index %x) was hot-added\n",
lmb_base_address(lmb), lmb_drc_index(lmb));
-   rm_lmb_update(lmb);
}
}
 
@@ -843,6 +839,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
int rc;
 
lock_device_hotplug();
+   clear_lmb_updates();
 
switch (hp_elog->action) {
case PSERIES_HP_ELOG_ACTION_ADD:



[PATCH 2/3] powerpc/pseries: Remove no longer needed rtas_hp_event flag

2016-08-26 Thread Nathan Fontenot
Remove the use of the rtas_hp_event flag as it is no longer needed. the
managememnet of the static dynamic-memory property does not go through
of_update_property so we do not need to set this flag.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5173e49..a0371d1 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -24,8 +24,6 @@
 #include 
 #include "pseries.h"
 
-static bool rtas_hp_event;
-
 static struct drconf_mem {
u32 num_lmbs;
struct of_drconf_cell *lmbs;
@@ -929,9 +927,6 @@ static int pseries_update_drconf_memory(struct 
of_reconfig_data *pr)
__be32 *p;
int i, rc = -EINVAL;
 
-   if (rtas_hp_event)
-   return 0;
-
update_drconf_memory();
 
memblock_size = pseries_memory_block_size();



[PATCH 1/3] powerpc/pseries: maintain single copy of ibm, dynamic-memory property

2016-08-26 Thread Nathan Fontenot
The ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory property
of the device-tree can be fairly big on systems with a large amount
of memory. A system with 1 TB of memory (256 MB LMBs) the property
size is 94k, this equates to roughly a 30MB property size for a 32 TB
system. This file size is not neccessarily huge, but the need to update
this property every time we DLPAR add or remove an LMB could be
problematic.

Every time the property is updated a new copy of the property is made
with the previous copy being added to the old_properties list. Due to
the lack of reference counting on properties old versions of a property
are never free'ed. One a large 32TB system we could easilty do
several thousands of memory add/remove operations and thus create
several thousand of copies of this property. This seems a bit wasteful
with respect to system resources.

This patch changes the pseries hotplug memory code to maintain a static
reference to this property instead of creating a new copy for every LMB
that we add or remove. In doing this we have to ensure that the property
remains in BE format so a set of accessor methods are provided to
get/set values from the property in the proper cpu format.

This should provide an improvement in kernel resources as
we will no longer have un-referenced copies of this property.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |  520 ++-
 1 file changed, 225 insertions(+), 295 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index b708c5c..5173e49 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -26,6 +26,83 @@
 
 static bool rtas_hp_event;
 
+static struct drconf_mem {
+   u32 num_lmbs;
+   struct of_drconf_cell *lmbs;
+} drmem;
+
+#define for_each_lmb_range(lmb, start, end)\
+   for ((lmb) = (start); (lmb) < (end); (lmb)++)
+#define for_each_lmb(lmb)  for_each_lmb_range((lmb), 0, drmem.num_lmbs)
+
+static u64 lmb_base_address(u32 lmb) {
+   return be64_to_cpu(drmem.lmbs[lmb].base_addr);
+}
+
+static u32 lmb_drc_index(u32 lmb) {
+   return be32_to_cpu(drmem.lmbs[lmb].drc_index);
+}
+
+static void lmb_set_aa_index(u32 lmb, u32 aa_index) {
+   drmem.lmbs[lmb].aa_index = cpu_to_be32(aa_index);
+}
+
+static bool lmb_updated(u32 lmb) {
+   return drmem.lmbs[lmb].reserved;
+}
+
+static void mark_lmb_updated(u32 lmb) {
+   drmem.lmbs[lmb].reserved = 1;
+}
+
+static void rm_lmb_update(u32 lmb) {
+   drmem.lmbs[lmb].reserved = 0;
+}
+
+static bool lmb_reserved(u32 lmb) {
+   return be32_to_cpu(drmem.lmbs[lmb].flags) & DRCONF_MEM_RESERVED;
+}
+
+static bool lmb_assigned(u32 lmb) {
+   return be32_to_cpu(drmem.lmbs[lmb].flags) & DRCONF_MEM_ASSIGNED;
+}
+
+static void mark_lmb_assigned(u32 lmb) {
+   drmem.lmbs[lmb].flags |= cpu_to_be32(DRCONF_MEM_ASSIGNED);
+}
+
+static void mark_lmb_unassigned(u32 lmb) {
+   drmem.lmbs[lmb].flags &= cpu_to_be32(~DRCONF_MEM_ASSIGNED);
+}
+
+static int dlpar_acquire_lmb(u32 lmb) {
+   return dlpar_acquire_drc(be32_to_cpu(drmem.lmbs[lmb].drc_index));
+}
+
+static int dlpar_release_lmb(u32 lmb) {
+   return dlpar_release_drc(be32_to_cpu(drmem.lmbs[lmb].drc_index));
+}
+
+static void __init update_drconf_memory(void)
+{
+   struct device_node *dn;
+   struct property *prop;
+   __be32 *p;
+
+   dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!dn)
+   return;
+
+   prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+   of_node_put(dn);
+   if (!prop)
+   return;
+
+   p = prop->value;
+   drmem.num_lmbs = be32_to_cpu(*p++);
+   drmem.lmbs = (struct of_drconf_cell *)p;
+}
+
 unsigned long pseries_memory_block_size(void)
 {
struct device_node *np;
@@ -99,98 +176,6 @@ static struct property *dlpar_clone_property(struct 
property *prop,
return new_prop;
 }
 
-static struct property *dlpar_clone_drconf_property(struct device_node *dn)
-{
-   struct property *prop, *new_prop;
-   struct of_drconf_cell *lmbs;
-   u32 num_lmbs, *p;
-   int i;
-
-   prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
-   if (!prop)
-   return NULL;
-
-   new_prop = dlpar_clone_property(prop, prop->length);
-   if (!new_prop)
-   return NULL;
-
-   /* Convert the property to cpu endian-ness */
-   p = new_prop->value;
-   *p = be32_to_cpu(*p);
-
-   num_lmbs = *p++;
-   lmbs = (struct of_drconf_cell *)p;
-
-   for (i = 0; i < num_lmbs; i++) {
-   lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
-   lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
-   lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
-   }
-
-   return new_prop;
-}
-

[PATCH 0/3] powerpc/pseries: Manage single copy of ibm, dynamic-memory

2016-08-26 Thread Nathan Fontenot
The ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory property
of the device-tree can be fairly big on systems with a large amount
of memory. A system with 1 TB of memory (256 MB LMBs) the property
size is 94k, this equates to roughly a 30MB property size for a 32 TB
system. This file size is not neccessarily huge, but the need to update
this property every time we DLPAR add or remove an LMB could be
problematic.

Every time the property is updated a new copy of the property is made
with the previous copy being added to the old_properties list. Due to
the lack of reference counting on properties old versions of a property
are never free'ed. One a large 32TB system we could easilty do
several thousands of memory add/remove operations and thus create
several thousand of copies of this property. This seems a bit wasteful
with respect to system resources.

Patch 1/3:
This patch changes the pseries hotplug memory code to maintain a static
reference to this property instead of creating a new copy for every LMB
that we add or remove. In doing this we have to ensure that the property
remains in BE format so a set of accessor methods are provided to
get/set values from the property in the proper cpu format.

Patch 2/3:
Remove a no longer need rtas_hp_event flag.

Patch 3/3:
Add a bit field to track updated LMBs during DLPAR add/remove operations.
 
This should provide an improvement in kernel resources as
we will no longer have un-referenced copies of this property.

-Nathan
---

Nathan Fontenot (3):
  powerpc/pseries: maintain single copy of ibm,dynamic-memory property
  powerpc/pseries: Remove no longer needed rtas_hp_event flag
  powerpc/pseries: Add bitmap to track updated LMBs


 arch/powerpc/platforms/pseries/hotplug-memory.c |  526 ++-
 1 file changed, 224 insertions(+), 302 deletions(-)



[PATCH] powerpc/32: fix again csum_partial_copy_generic()

2016-08-26 Thread Christophe Leroy
commit 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic()
based on copy_tofrom_user()") introduced a bug when destination
address is odd and len is lower than cacheline size.

In that case the resulting csum value doesn't have to be rotated one
byte because the cache-aligned copy part is skipped so no alignment
is performed.

Fixes: 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic()
based on copy_tofrom_user()")
Cc: sta...@vger.kernel.org

Reported-by: Alessio Igor Bogani 
Signed-off-by: Christophe Leroy 
Tested-by: Alessio Igor Bogani 
---
 arch/powerpc/lib/checksum_32.S | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 0a57fe6..aa8214f 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic)
stw r7,12(r1)
stw r8,8(r1)
 
-   rlwinm  r0,r4,3,0x8
-   rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte */
-   cmplwi  cr7,r0,0/* is destination address even ? */
addic   r12,r6,0
addir6,r4,-4
neg r0,r4
addir4,r3,-4
andi.   r0,r0,CACHELINE_MASK/* # bytes to start of cache line */
+   crset   4*cr7+eq
beq 58f
 
cmplw   0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
+   rlwinm  r7,r6,3,0x8
+   rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte */
+   cmplwi  cr7,r7,0/* is destination address even ? */
andi.   r8,r0,3 /* get it word-aligned first */
mtctr   r8
beq+61f
-- 
2.1.0



Re: Suspected regression?

2016-08-26 Thread Alessio Igor Bogani
Hi Christophe,

On 26 August 2016 at 14:46, Christophe Leroy  wrote:
[...]
> Can you try the patch below ? I have identified that in case the packet is
> smaller than a cacheline, it doesn't get cache-aligned so the result shall
> not be rotated in case of odd dest address.
>
> This patch goes in addition to the previous fix (1bc8b816cb805) as it fixes
> a different case.
>
> Christophe
>
> diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
> index 68f6862..3971cfb 100644
> --- a/arch/powerpc/lib/checksum_32.S
> +++ b/arch/powerpc/lib/checksum_32.S
> @@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic)
> stw r7,12(r1)
> stw r8,8(r1)
>
> -   rlwinm  r0,r4,3,0x8
> -   rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte
> */
> -   cmplwi  cr7,r0,0/* is destination address even ? */
> addic   r12,r6,0
> addir6,r4,-4
> neg r0,r4
> addir4,r3,-4
> andi.   r0,r0,CACHELINE_MASK/* # bytes to start of cache line */
> +   crset   4*cr7+eq
> beq 58f
>
> cmplw   0,r5,r0 /* is this more than total to do? */
> blt 63f /* if not much to do */
> +   rlwinm  r7,r6,3,0x8
> +   rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte
> */
> +   cmplwi  cr7,r7,0/* is destination address even ? */
> andi.   r8,r0,3 /* get it word-aligned first */
> mtctr   r8
> beq+61f

Yeah! It fixes my problem! Thank you very much!

Ciao,
Alessio


Re: Suspected regression?

2016-08-26 Thread Christophe Leroy

Hi Alessio,

Le 26/08/2016 à 04:32, Scott Wood a écrit :

On Tue, 2016-08-23 at 13:34 +0200, Christophe Leroy wrote:


Le 23/08/2016 à 11:20, Alessio Igor Bogani a écrit :


Hi Christophe,

Sorry for delay in reply I was on vacation.

On 6 August 2016 at 11:29, christophe leroy 
wrote:


Alessio,


Le 05/08/2016 à 09:51, Christophe Leroy a écrit :





Le 19/07/2016 à 23:52, Scott Wood a écrit :



On Tue, 2016-07-19 at 12:00 +0200, Alessio Igor Bogani wrote:



Hi all,

I have got two boards MVME5100 (MPC7410 cpu) and MVME7100
(MPC8641D
cpu) for which I use the same cross-compiler (ppc7400).

I tested these against kernel HEAD to found that these don't boot
anymore (PID 1 crash).

Bisecting results in first offending commit:
7aef4136566b0539a1a98391181e188905e33401

Removing it from HEAD make boards boot properly again.

A third system based on P2010 isn't affected at all.

Is it a regression or I have made something wrong?


I booted both my next branch, and Linus's master on MPC8641HPCN and
didn't see
this -- though possibly your RFS is doing something
different.  Maybe
that's
the difference with P2010 as well.

Is there any way you can debug the cause of the crash?  Or send me a
minimal
RFS that demonstrates the problem (ideally with debug symbols on the
userspace
binaries)?


I got from Alessio the below information:

systemd[1]: Caught , core dump failed (child 137, code=killed,
status=7/BUS).
systemd[1]: Freezing execution.


What can generate SIGBUS ?
And shouldn't we also get some KERN_ERR trace, something like
"unhandled
signal 7 at ." ?


As far as I can see, SIGBUS is mainly generated from alignment
exception.
According to 7410 Reference Manual, alignment exception can happen in
the
following cases:
* An operand of a dcbz instruction is on a page that is write-through or
cache-inhibited for a virtual mode access.
* An attempt to execute a dcbz instruction occurs when the cache is
disabled
or locked.

Could try with below patch to check if the dcbz insn is causing the
SIGBUS ?

Unfortunately that patch doesn't solve the problem.

Is there a chance that cache behavior could settled by board firmware
(PPCBug on the MPC7410 board and MotLoad on the MPC8641D one)?
In that case what do you suggest me to looking for?

If the removal of dcbz doesn't solve the issue, I don't think it is a
cache related issue.
As far as I understood, your init gets a SIGBUS signal, right ? Then we
must identify the reason for that sigbus.


My guess would be errors demand-loading a page via NFS.

One approach might be to hack up the code so that both versions of
csum_partial_copy_generic() are present, and call both each time.  If the
results differ or the copied bytes are wrong, then spit out a dump of the
details.



Can you try the patch below ? I have identified that in case the packet 
is smaller than a cacheline, it doesn't get cache-aligned so the result 
shall not be rotated in case of odd dest address.


This patch goes in addition to the previous fix (1bc8b816cb805) as it 
fixes a different case.


Christophe

diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 68f6862..3971cfb 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic)
stw r7,12(r1)
stw r8,8(r1)

-   rlwinm  r0,r4,3,0x8
-   rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte */
-   cmplwi  cr7,r0,0/* is destination address even ? */
addic   r12,r6,0
addir6,r4,-4
neg r0,r4
addir4,r3,-4
andi.   r0,r0,CACHELINE_MASK/* # bytes to start of cache line */
+   crset   4*cr7+eq
beq 58f

cmplw   0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
+   rlwinm  r7,r6,3,0x8
+   rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte */
+   cmplwi  cr7,r7,0/* is destination address even ? */
andi.   r8,r0,3 /* get it word-aligned first */
mtctr   r8
beq+61f
--



Re: [PATCH v6 2/7] perf annotate: Add cross arch annotate support

2016-08-26 Thread Ravi Bangoria
Hi Kim,

I've tested your patch on x86 and powerpc and it looks fine to me. Can you 
please
put your signed-off-by.

Please add Act-by: Ravi Bangoria  as well.

Regards,
-Ravi

On Wednesday 24 August 2016 02:06 AM, Kim Phillips wrote:
> On Tue, 23 Aug 2016 11:17:16 +0900
> Namhyung Kim  wrote:
>
>> On Tue, Aug 23, 2016 at 8:01 AM, Kim Phillips  wrote:
>>> On Fri, 19 Aug 2016 18:29:33 +0530
>>> Ravi Bangoria  wrote:
>>>
 Changes in v6:
   - Instead of adding only those instructions defined in #ifdef __arm__,
 add all instructions from default table to arm table.
>>> Thanks, I've gone through the list and removed all not-ARM
>>> instructions, and added some missing ARM branch instructions:
>> Can we use regex patterns instead?
> Yes, that helps prevent mistakes updating instruction lists - how does
> this look?:
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index b2c6cf3..52316f3 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -26,6 +26,7 @@
>  const char   *disassembler_style;
>  const char   *objdump_path;
>  static regex_tfile_lineno;
> +static regex_tarm_call_insn, arm_jump_insn;
>
>  static struct ins *ins__find(const char *name, const char *norm_arch);
>  static int disasm_line__parse(char *line, char **namep, char **rawp);
> @@ -449,98 +450,7 @@ static struct ins instructions_x86[] = {
>   { .name = "retq",  .ops  = _ops, },
>  };
>
> -static struct ins instructions_arm[] = {
> - { .name = "add",   .ops  = _ops, },
> - { .name = "addl",  .ops  = _ops, },
> - { .name = "addq",  .ops  = _ops, },
> - { .name = "addw",  .ops  = _ops, },
> - { .name = "and",   .ops  = _ops, },
> - { .name = "b", .ops  = _ops, }, /* might also be a call */
> - { .name = "bcc",   .ops  = _ops, },
> - { .name = "bcs",   .ops  = _ops, },
> - { .name = "beq",   .ops  = _ops, },
> - { .name = "bge",   .ops  = _ops, },
> - { .name = "bgt",   .ops  = _ops, },
> - { .name = "bhi",   .ops  = _ops, },
> - { .name = "bl",.ops  = _ops, },
> - { .name = "bls",   .ops  = _ops, },
> - { .name = "blt",   .ops  = _ops, },
> - { .name = "blx",   .ops  = _ops, },
> - { .name = "bne",   .ops  = _ops, },
> - { .name = "bts",   .ops  = _ops, },
> - { .name = "call",  .ops  = _ops, },
> - { .name = "callq", .ops  = _ops, },
> - { .name = "cmp",   .ops  = _ops, },
> - { .name = "cmpb",  .ops  = _ops, },
> - { .name = "cmpl",  .ops  = _ops, },
> - { .name = "cmpq",  .ops  = _ops, },
> - { .name = "cmpw",  .ops  = _ops, },
> - { .name = "cmpxch", .ops  = _ops, },
> - { .name = "dec",   .ops  = _ops, },
> - { .name = "decl",  .ops  = _ops, },
> - { .name = "imul",  .ops  = _ops, },
> - { .name = "inc",   .ops  = _ops, },
> - { .name = "incl",  .ops  = _ops, },
> - { .name = "ja",.ops  = _ops, },
> - { .name = "jae",   .ops  = _ops, },
> - { .name = "jb",.ops  = _ops, },
> - { .name = "jbe",   .ops  = _ops, },
> - { .name = "jc",.ops  = _ops, },
> - { .name = "jcxz",  .ops  = _ops, },
> - { .name = "je",.ops  = _ops, },
> - { .name = "jecxz", .ops  = _ops, },
> - { .name = "jg",.ops  = _ops, },
> - { .name = "jge",   .ops  = _ops, },
> - { .name = "jl",.ops  = _ops, },
> - { .name = "jle",   .ops  = _ops, },
> - { .name = "jmp",   .ops  = _ops, },
> - { .name = "jmpq",  .ops  = _ops, },
> - { .name = "jna",   .ops  = _ops, },
> - { .name = "jnae",  .ops  = _ops, },
> - { .name = "jnb",   .ops  = _ops, },
> - { .name = "jnbe",  .ops  = _ops, },
> - { .name = "jnc",   .ops  = _ops, },
> - { .name = "jne",   .ops  = _ops, },
> - { .name = "jng",   .ops  = _ops, },
> - { .name = "jnge",  .ops  = _ops, },
> - { .name = "jnl",   .ops  = _ops, },
> - { .name = "jnle",  .ops  = _ops, },
> - { .name = "jno",   .ops  = _ops, },
> - { .name = "jnp",   .ops  = _ops, },
> - { .name = "jns",   .ops  = _ops, },
> - { .name = "jnz",   .ops  = _ops, },
> - { .name = "jo",.ops  = _ops, },
> - { .name = "jp",.ops  = _ops, },
> - { .name = "jpe",   .ops  = _ops, },
> - { .name = "jpo",   .ops  = _ops, },
> - { .name = "jrcxz", .ops  = _ops, },
> - { .name = "js",.ops  = _ops, },
> - { .name = "jz",.ops  = _ops, },
> - { .name = "lea",   .ops  = _ops, },
> - { .name = "lock",  .ops  = _ops, },
> - { .name = "mov",   .ops  = _ops, },
> - { .name = "movb",  .ops  = _ops, },
> - { .name = "movdqa",.ops  = _ops, },
> - { .name = "movl",  .ops  = _ops, },
> - { .name = "movq",  .ops  = _ops, },
> - { .name = "movslq", .ops  = _ops, },
> - { .name = "movzbl", .ops  = _ops, },
> - { .name = "movzwl", .ops  = _ops, },
> - { 

Re: [PATCH] powerpc/fsl_pci: Size upper inbound window based on RAM size

2016-08-26 Thread Tillmann Heidsieck

Hi Scott,

thanks for the patch! This one works for my setup:
T4240, 12GB Ram and Radeon E6760.

On 2016-08-26 08:38, Scott Wood wrote:
This allows PCI devices that can only address (e.g.) 36 or 40 bit DMA 
to
use direct DMA, at the cost of not being able to DMA to non-RAM 
addresses

(this doesn't affect MSIs as there is a separate dedicated window for
that) which we wouldn't have been able to do anyway if the RAM size 
didn't

trigger the creation of the second inbound window.

It also fixes an off-by-one error that set dma_direct_ops on PCI 
devices

whose dma mask could address all the space below the DMA offset
(previously 40 bits), but not the window that starts at the DMA offset.

Signed-off-by: Scott Wood 
Cc: Tillmann Heidsieck 
---


Tested-by: Tillmann Heidsieck 


Re: [PATCH 00/44] usb: don't print on ENOMEM

2016-08-26 Thread Lothar Waßmann
Hi,

On Thu, 25 Aug 2016 19:38:52 +0200 Wolfram Sang wrote:
> Here is my next series to save memory by removing unneeded strings. It removes
> in the usb subsystem all unspecific error messages after calling malloc-based
> functions, i.e. (devm_)k[zcm]alloc. kmalloc prints enough information in that
> case. If the message was specific (e.g. "can't save CLEAR_TT_BUFFER state"), I
> left it. This series saves ~4.5KB of "out of memory" permutations in .text and
> .rodata. For modified lines, (x == NULL) was replaced with (!NULL) as well.
s/!NULL/!x/


Lothar Waßmann


Re: [RFC PATCH] powerpc: fsl_pci: fix inbound ATMU entries for systems with >4G RAM

2016-08-26 Thread Scott Wood
On 08/26/2016 12:55 AM, Scott Wood wrote:
> On 08/26/2016 12:26 AM, Tillmann Heidsieck wrote:
>> On 2016-08-24 23:39, Scott Wood wrote:
>>> BTW, for some reason your patch is not showing up in Patchwork.
>>
>> Are there some known pitfalls when sending patches to Patchwork?
> 
> It's not the first time I've seen certain people's patches not show up
> there, but I don't know what the root cause is.

I do see the patch on Patchwork now; I guess it was just slow.

-Scott



[PATCH] powerpc/fsl_pci: Size upper inbound window based on RAM size

2016-08-26 Thread Scott Wood
This allows PCI devices that can only address (e.g.) 36 or 40 bit DMA to
use direct DMA, at the cost of not being able to DMA to non-RAM addresses
(this doesn't affect MSIs as there is a separate dedicated window for
that) which we wouldn't have been able to do anyway if the RAM size didn't
trigger the creation of the second inbound window.

It also fixes an off-by-one error that set dma_direct_ops on PCI devices
whose dma mask could address all the space below the DMA offset
(previously 40 bits), but not the window that starts at the DMA offset.

Signed-off-by: Scott Wood 
Cc: Tillmann Heidsieck 
---
 arch/powerpc/sysdev/fsl_pci.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 0ef9df4..d3a5974 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -111,8 +111,7 @@ static struct pci_ops fsl_indirect_pcie_ops =
.write = indirect_write_config,
 };
 
-#define MAX_PHYS_ADDR_BITS 40
-static u64 pci64_dma_offset = 1ull << MAX_PHYS_ADDR_BITS;
+static u64 pci64_dma_offset;
 
 #ifdef CONFIG_SWIOTLB
 static void setup_swiotlb_ops(struct pci_controller *hose)
@@ -132,12 +131,10 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 
dma_mask)
return -EIO;
 
/*
-* Fixup PCI devices that are able to DMA to above the physical
-* address width of the SoC such that we can address any internal
-* SoC address from across PCI if needed
+* Fix up PCI devices that are able to DMA to the large inbound
+* mapping that allows addressing any RAM address from across PCI.
 */
-   if ((dev_is_pci(dev)) &&
-   dma_mask >= DMA_BIT_MASK(MAX_PHYS_ADDR_BITS)) {
+   if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
set_dma_ops(dev, _direct_ops);
set_dma_offset(dev, pci64_dma_offset);
}
@@ -387,6 +384,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
mem_log++;
 
piwar = (piwar & ~PIWAR_SZ_MASK) | (mem_log - 1);
+   pci64_dma_offset = 1ULL << mem_log;
 
if (setup_inbound) {
/* Setup inbound memory window */
-- 
2.7.4



Re: linux-next: build warnings after merge of the kbuild tree

2016-08-26 Thread Nicholas Mc Guire
On Fri, Aug 26, 2016 at 01:58:03PM +1000, Nicholas Piggin wrote:
> On Mon, 22 Aug 2016 20:47:58 +1000
> Nicholas Piggin  wrote:
> 
> > On Fri, 19 Aug 2016 20:44:55 +1000
> > Nicholas Piggin  wrote:
> > 
> > > On Fri, 19 Aug 2016 10:37:00 +0200
> > > Michal Marek  wrote:
> > >   
> > > > On 2016-08-19 07:09, Stephen Rothwell wrote:
> > 
> > [snip]
> > 
> > > > > 
> > > > > I may be missing something, but genksyms generates the crc's off the
> > > > > preprocessed C source code and we don't have any for the asm files 
> > > > > ...  
> > > > 
> > > > Of course you are right. Which means that we are losing type information
> > > > for these exports for CONFIG_MODVERSIONS purposes. I guess it's
> > > > acceptable, since the asm functions are pretty basic and their
> > > > signatures do not change.
> > > 
> > > I don't completely agree. It would be nice to have the functionality
> > > still there.
> > > 
> > > What happens if you just run cmd_modversions on the as rule? It relies on
> > > !defined(__ASSEMBLY__), but we're feeding the result to genksyms, not as.
> > > It would require the header be included in the .S file and be protected 
> > > for
> > > asm builds.  
> > 
> > 
> > This seems like it *could* be made to work, but there's a few problems.
> > 
> > - .h files are not made for C consumption. Matter of manually adding the
> > ifdef guards, which isn't terrible.
> > 
> > - .S files do not all include their .h where the C declaration is. Also
> > will cause some churn but doable and maybe not completely unreasonable.
> > 
> > - genksyms parser barfs when it hits the assembly of the .S file. Best
> > way to fix that seems just send the #include and EXPORT_SYMBOL lines
> > from the .S to the preprocessor. That's a bit of a rabbit hole too, with
> > some .S files being included, etc.
> > 
> > I'm not sure what to do here. If nobody cares and we lose CRCs for .S
> > exports, then okay we can whitelist those relocs easily. If we don't want
> > to lose the functionality, the above might work but it's a bit intrusive
> > an is going to require another cycle of prep patches to go through arch
> > code first.
> > 
> > Or suggestions for alternative approach?
> 
> Here is a quick patch that I think should catch missing CRCs in
> architecture independent way. If we merge something like this, we
> can whitelist the symbols in arch/powerpc so people get steered to
> the right place.
> 
> Powerpc seems to be the only one really catching this, and it's
> only as a side effect of a test run for CONFIG_RELOCATABLE kernels,
> which means version failures probably slipped through other archs.
> 
> I'll clean it up, do some more testing, and submit it unless
> anybody dislikes it or has a better way to do it.
> 
> Thanks,
> Nick
> 
> 
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index 4b8ffd3..1efc454 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -609,6 +609,7 @@ static void handle_modversions(struct module *mod, struct 
> elf_info *info,
>  {
>   unsigned int crc;
>   enum export export;
> + int is_crc = 0;

should that not be a bool here ?

>  
>   if ((!is_vmlinux(mod->name) || mod->is_dot_o) &&
>   strncmp(symname, "__ksymtab", 9) == 0)
> @@ -618,6 +619,7 @@ static void handle_modversions(struct module *mod, struct 
> elf_info *info,
>  
>   /* CRC'd symbol */
>   if (strncmp(symname, CRC_PFX, strlen(CRC_PFX)) == 0) {
> + is_crc = 1;

is_crc = true;

>   crc = (unsigned int) sym->st_value;
>   sym_update_crc(symname + strlen(CRC_PFX), mod, crc,
>   export);

thx!
hofrat


Re: [PATCH v6 2/7] perf annotate: Add cross arch annotate support

2016-08-26 Thread Namhyung Kim
Hi,

On Tue, Aug 23, 2016 at 03:36:17PM -0500, Kim Phillips wrote:
> On Tue, 23 Aug 2016 11:17:16 +0900
> Namhyung Kim  wrote:
> 
> > On Tue, Aug 23, 2016 at 8:01 AM, Kim Phillips  wrote:
> > > On Fri, 19 Aug 2016 18:29:33 +0530
> > > Ravi Bangoria  wrote:
> > >
> > >> Changes in v6:
> > >>   - Instead of adding only those instructions defined in #ifdef __arm__,
> > >> add all instructions from default table to arm table.
> > > Thanks, I've gone through the list and removed all not-ARM
> > > instructions, and added some missing ARM branch instructions:
> > 
> > Can we use regex patterns instead?
> 
> Yes, that helps prevent mistakes updating instruction lists - how does
> this look?:

Much better!

Thanks,
Namhyung


> 
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index b2c6cf3..52316f3 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -26,6 +26,7 @@
>  const char   *disassembler_style;
>  const char   *objdump_path;
>  static regex_tfile_lineno;
> +static regex_tarm_call_insn, arm_jump_insn;
>  
>  static struct ins *ins__find(const char *name, const char *norm_arch);
>  static int disasm_line__parse(char *line, char **namep, char **rawp);
> @@ -449,98 +450,7 @@ static struct ins instructions_x86[] = {
>   { .name = "retq",  .ops  = _ops, },
>  };
>  
> -static struct ins instructions_arm[] = {
> - { .name = "add",   .ops  = _ops, },
> - { .name = "addl",  .ops  = _ops, },
> - { .name = "addq",  .ops  = _ops, },
> - { .name = "addw",  .ops  = _ops, },
> - { .name = "and",   .ops  = _ops, },
> - { .name = "b", .ops  = _ops, }, /* might also be a call */
> - { .name = "bcc",   .ops  = _ops, },
> - { .name = "bcs",   .ops  = _ops, },
> - { .name = "beq",   .ops  = _ops, },
> - { .name = "bge",   .ops  = _ops, },
> - { .name = "bgt",   .ops  = _ops, },
> - { .name = "bhi",   .ops  = _ops, },
> - { .name = "bl",.ops  = _ops, },
> - { .name = "bls",   .ops  = _ops, },
> - { .name = "blt",   .ops  = _ops, },
> - { .name = "blx",   .ops  = _ops, },
> - { .name = "bne",   .ops  = _ops, },
> - { .name = "bts",   .ops  = _ops, },
> - { .name = "call",  .ops  = _ops, },
> - { .name = "callq", .ops  = _ops, },
> - { .name = "cmp",   .ops  = _ops, },
> - { .name = "cmpb",  .ops  = _ops, },
> - { .name = "cmpl",  .ops  = _ops, },
> - { .name = "cmpq",  .ops  = _ops, },
> - { .name = "cmpw",  .ops  = _ops, },
> - { .name = "cmpxch", .ops  = _ops, },
> - { .name = "dec",   .ops  = _ops, },
> - { .name = "decl",  .ops  = _ops, },
> - { .name = "imul",  .ops  = _ops, },
> - { .name = "inc",   .ops  = _ops, },
> - { .name = "incl",  .ops  = _ops, },
> - { .name = "ja",.ops  = _ops, },
> - { .name = "jae",   .ops  = _ops, },
> - { .name = "jb",.ops  = _ops, },
> - { .name = "jbe",   .ops  = _ops, },
> - { .name = "jc",.ops  = _ops, },
> - { .name = "jcxz",  .ops  = _ops, },
> - { .name = "je",.ops  = _ops, },
> - { .name = "jecxz", .ops  = _ops, },
> - { .name = "jg",.ops  = _ops, },
> - { .name = "jge",   .ops  = _ops, },
> - { .name = "jl",.ops  = _ops, },
> - { .name = "jle",   .ops  = _ops, },
> - { .name = "jmp",   .ops  = _ops, },
> - { .name = "jmpq",  .ops  = _ops, },
> - { .name = "jna",   .ops  = _ops, },
> - { .name = "jnae",  .ops  = _ops, },
> - { .name = "jnb",   .ops  = _ops, },
> - { .name = "jnbe",  .ops  = _ops, },
> - { .name = "jnc",   .ops  = _ops, },
> - { .name = "jne",   .ops  = _ops, },
> - { .name = "jng",   .ops  = _ops, },
> - { .name = "jnge",  .ops  = _ops, },
> - { .name = "jnl",   .ops  = _ops, },
> - { .name = "jnle",  .ops  = _ops, },
> - { .name = "jno",   .ops  = _ops, },
> - { .name = "jnp",   .ops  = _ops, },
> - { .name = "jns",   .ops  = _ops, },
> - { .name = "jnz",   .ops  = _ops, },
> - { .name = "jo",.ops  = _ops, },
> - { .name = "jp",.ops  = _ops, },
> - { .name = "jpe",   .ops  = _ops, },
> - { .name = "jpo",   .ops  = _ops, },
> - { .name = "jrcxz", .ops  = _ops, },
> - { .name = "js",.ops  = _ops, },
> - { .name = "jz",.ops  = _ops, },
> - { .name = "lea",   .ops  = _ops, },
> - { .name = "lock",  .ops  = _ops, },
> - { .name = "mov",   .ops  = _ops, },
> - { .name = "movb",  .ops  = _ops, },
> - { .name = "movdqa",.ops  = _ops, },
> - { .name = "movl",  .ops  = _ops, },
> - { .name = "movq",  .ops  = _ops, },
> - { .name = "movslq", .ops  = _ops, },
> - { .name = "movzbl", .ops  = _ops, },
> - { .name = "movzwl", .ops  = _ops, },
> - { .name = "nop",   .ops  = _ops, },
> - { .name = "nopl",  .ops  = _ops, },
> - { .name = "nopw",  .ops  = _ops, },
> - {