[PATCH v8 3/3] ARM: ftrace: Add MODULE_PLTS support

2021-03-30 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Teach ftrace_make_call() and ftrace_make_nop() about PLTs.
Teach PLT code about FTRACE and all its callbacks.
Otherwise the following might happen:

[ cut here ]
WARNING: CPU: 14 PID: 2265 at .../arch/arm/kernel/insn.c:14 
__arm_gen_branch+0x83/0x8c()
...
Hardware name: LSI Axxia AXM55XX
[] (unwind_backtrace) from [] (show_stack+0x11/0x14)
[] (show_stack) from [] (dump_stack+0x81/0xa8)
[] (dump_stack) from [] (warn_slowpath_common+0x69/0x90)
[] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x17/0x1c)
[] (warn_slowpath_null) from [] (__arm_gen_branch+0x83/0x8c)
[] (__arm_gen_branch) from [] (ftrace_make_nop+0xf/0x24)
[] (ftrace_make_nop) from [] 
(ftrace_process_locs+0x27b/0x3e8)
[] (ftrace_process_locs) from [] (load_module+0x11e9/0x1a44)
[] (load_module) from [] (SyS_finit_module+0x59/0x84)
[] (SyS_finit_module) from [] (ret_fast_syscall+0x1/0x18)
---[ end trace e1b64ced7a89adcc ]---
[ cut here ]
WARNING: CPU: 14 PID: 2265 at .../kernel/trace/ftrace.c:1979 
ftrace_bug+0x1b1/0x234()
...
Hardware name: LSI Axxia AXM55XX
[] (unwind_backtrace) from [] (show_stack+0x11/0x14)
[] (show_stack) from [] (dump_stack+0x81/0xa8)
[] (dump_stack) from [] (warn_slowpath_common+0x69/0x90)
[] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x17/0x1c)
[] (warn_slowpath_null) from [] (ftrace_bug+0x1b1/0x234)
[] (ftrace_bug) from [] (ftrace_process_locs+0x285/0x3e8)
[] (ftrace_process_locs) from [] (load_module+0x11e9/0x1a44)
[] (load_module) from [] (SyS_finit_module+0x59/0x84)
[] (SyS_finit_module) from [] (ret_fast_syscall+0x1/0x18)
---[ end trace e1b64ced7a89adcd ]---
ftrace failed to modify [] 0xe9ef7006
actual: 02:f0:3b:fa
ftrace record flags: 0
(0) expected tramp: c0314265

Signed-off-by: Alexander Sverdlin 
---
 arch/arm/include/asm/ftrace.h |  3 +++
 arch/arm/include/asm/module.h |  1 +
 arch/arm/kernel/ftrace.c  | 46 +++
 arch/arm/kernel/module-plts.c | 44 +
 4 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 48ec1d0..a4dbac0 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -15,6 +15,9 @@ extern void __gnu_mcount_nc(void);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 struct dyn_arch_ftrace {
+#ifdef CONFIG_ARM_MODULE_PLTS
+   struct module *mod;
+#endif
 };
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 09b9ad5..cfffae6 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -30,6 +30,7 @@ struct plt_entries {
 
 struct mod_plt_sec {
struct elf32_shdr   *plt;
+   struct plt_entries  *plt_ent;
int plt_count;
 };
 
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 61de817..3c83b5d 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -68,9 +68,10 @@ int ftrace_arch_code_modify_post_process(void)
return 0;
 }
 
-static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr,
+bool warn)
 {
-   return arm_gen_branch_link(pc, addr, true);
+   return arm_gen_branch_link(pc, addr, warn);
 }
 
 static int ftrace_modify_code(unsigned long pc, unsigned long old,
@@ -104,14 +105,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
int ret;
 
pc = (unsigned long)_call;
-   new = ftrace_call_replace(pc, (unsigned long)func);
+   new = ftrace_call_replace(pc, (unsigned long)func, true);
 
ret = ftrace_modify_code(pc, 0, new, false);
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
if (!ret) {
pc = (unsigned long)_regs_call;
-   new = ftrace_call_replace(pc, (unsigned long)func);
+   new = ftrace_call_replace(pc, (unsigned long)func, true);
 
ret = ftrace_modify_code(pc, 0, new, false);
}
@@ -124,10 +125,22 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned 
long addr)
 {
unsigned long new, old;
unsigned long ip = rec->ip;
+   unsigned long aaddr = adjust_address(rec, addr);
+   struct module *mod = NULL;
+
+#ifdef CONFIG_ARM_MODULE_PLTS
+   mod = rec->arch.mod;
+#endif
 
old = ftrace_nop_replace(rec);
 
-   new = ftrace_call_replace(ip, adjust_address(rec, addr));
+   new = ftrace_call_replace(ip, aaddr, !mod);
+#ifdef CONFIG_ARM_MODULE_PLTS
+   if (!new && mod) {
+   aaddr = get_module_plt(mod, ip, aaddr);
+   new = ftrace_call_replace(ip, aaddr, true);
+   }
+#endif
 
return ftrace_modify_code(rec->ip, old, new, true);
 }
@@ -140,9 +153,9 @@ int ftrace_modify_call(struct dyn_ftrace *re

[PATCH v8 2/3] ARM: Add warn suppress parameter to arm_gen_branch_link()

2021-03-30 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Will be used in the following patch. No functional change.

Signed-off-by: Alexander Sverdlin 
---
 arch/arm/include/asm/insn.h |  8 
 arch/arm/kernel/ftrace.c|  2 +-
 arch/arm/kernel/insn.c  | 19 ++-
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
index f20e08a..5475cbf 100644
--- a/arch/arm/include/asm/insn.h
+++ b/arch/arm/include/asm/insn.h
@@ -13,18 +13,18 @@ arm_gen_nop(void)
 }
 
 unsigned long
-__arm_gen_branch(unsigned long pc, unsigned long addr, bool link);
+__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn);
 
 static inline unsigned long
 arm_gen_branch(unsigned long pc, unsigned long addr)
 {
-   return __arm_gen_branch(pc, addr, false);
+   return __arm_gen_branch(pc, addr, false, true);
 }
 
 static inline unsigned long
-arm_gen_branch_link(unsigned long pc, unsigned long addr)
+arm_gen_branch_link(unsigned long pc, unsigned long addr, bool warn)
 {
-   return __arm_gen_branch(pc, addr, true);
+   return __arm_gen_branch(pc, addr, true, warn);
 }
 
 #endif
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 9a79ef6..61de817 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -70,7 +70,7 @@ int ftrace_arch_code_modify_post_process(void)
 
 static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
-   return arm_gen_branch_link(pc, addr);
+   return arm_gen_branch_link(pc, addr, true);
 }
 
 static int ftrace_modify_code(unsigned long pc, unsigned long old,
diff --git a/arch/arm/kernel/insn.c b/arch/arm/kernel/insn.c
index 2e844b7..db0acbb 100644
--- a/arch/arm/kernel/insn.c
+++ b/arch/arm/kernel/insn.c
@@ -3,8 +3,9 @@
 #include 
 #include 
 
-static unsigned long
-__arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link)
+static unsigned long __arm_gen_branch_thumb2(unsigned long pc,
+unsigned long addr, bool link,
+bool warn)
 {
unsigned long s, j1, j2, i1, i2, imm10, imm11;
unsigned long first, second;
@@ -12,7 +13,7 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, 
bool link)
 
offset = (long)addr - (long)(pc + 4);
if (offset < -16777216 || offset > 16777214) {
-   WARN_ON_ONCE(1);
+   WARN_ON_ONCE(warn);
return 0;
}
 
@@ -33,8 +34,8 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, 
bool link)
return __opcode_thumb32_compose(first, second);
 }
 
-static unsigned long
-__arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link)
+static unsigned long __arm_gen_branch_arm(unsigned long pc, unsigned long addr,
+ bool link, bool warn)
 {
unsigned long opcode = 0xea00;
long offset;
@@ -44,7 +45,7 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, 
bool link)
 
offset = (long)addr - (long)(pc + 8);
if (unlikely(offset < -33554432 || offset > 33554428)) {
-   WARN_ON_ONCE(1);
+   WARN_ON_ONCE(warn);
return 0;
}
 
@@ -54,10 +55,10 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, 
bool link)
 }
 
 unsigned long
-__arm_gen_branch(unsigned long pc, unsigned long addr, bool link)
+__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn)
 {
if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
-   return __arm_gen_branch_thumb2(pc, addr, link);
+   return __arm_gen_branch_thumb2(pc, addr, link, warn);
else
-   return __arm_gen_branch_arm(pc, addr, link);
+   return __arm_gen_branch_arm(pc, addr, link, warn);
 }
-- 
2.10.2



[PATCH v8 1/3] ARM: PLT: Move struct plt_entries definition to header

2021-03-30 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

No functional change, later it will be re-used in several files.

Signed-off-by: Alexander Sverdlin 
---
 arch/arm/include/asm/module.h | 9 +
 arch/arm/kernel/module-plts.c | 9 -
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 4b0df09..09b9ad5 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -19,6 +19,15 @@ enum {
 };
 #endif
 
+#define PLT_ENT_STRIDE L1_CACHE_BYTES
+#define PLT_ENT_COUNT  (PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE   (sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+struct plt_entries {
+   u32 ldr[PLT_ENT_COUNT];
+   u32 lit[PLT_ENT_COUNT];
+};
+
 struct mod_plt_sec {
struct elf32_shdr   *plt;
int plt_count;
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 6e626ab..d330e9e 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -12,10 +12,6 @@
 #include 
 #include 
 
-#define PLT_ENT_STRIDE L1_CACHE_BYTES
-#define PLT_ENT_COUNT  (PLT_ENT_STRIDE / sizeof(u32))
-#define PLT_ENT_SIZE   (sizeof(struct plt_entries) / PLT_ENT_COUNT)
-
 #ifdef CONFIG_THUMB2_KERNEL
 #define PLT_ENT_LDR__opcode_to_mem_thumb32(0xf8dff000 | \
(PLT_ENT_STRIDE - 4))
@@ -24,11 +20,6 @@
(PLT_ENT_STRIDE - 8))
 #endif
 
-struct plt_entries {
-   u32 ldr[PLT_ENT_COUNT];
-   u32 lit[PLT_ENT_COUNT];
-};
-
 static bool in_init(const struct module *mod, unsigned long loc)
 {
return loc - (u32)mod->init_layout.base < mod->init_layout.size;
-- 
2.10.2



[PATCH v8 0/3] ARM: Implement MODULE_PLT support in FTRACE

2021-03-30 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

FTRACE's function tracer currently doesn't always work on ARM with
MODULE_PLT option enabled. If the module is loaded too far, FTRACE's
code modifier cannot cope with introduced veneers and turns the
function tracer off globally.

ARM64 already has a solution for the problem, refer to the following
patches:

arm64: ftrace: emit ftrace-mod.o contents through code
arm64: module-plts: factor out PLT generation code for ftrace
arm64: ftrace: fix !CONFIG_ARM64_MODULE_PLTS kernels
arm64: ftrace: fix building without CONFIG_MODULES
arm64: ftrace: add support for far branches to dynamic ftrace
arm64: ftrace: don't validate branch via PLT in ftrace_make_nop()

But the presented ARM variant has just a half of the footprint in terms of
the changed LoCs. It also retains the code validation-before-modification
instead of switching it off.

Changelog:
v8:
* Add warn suppress parameter to arm_gen_branch_link()
v7:
* rebased
v6:
* rebased
v5:
* BUILD_BUG_ON() ensures fixed_plts[] always fits one PLT block
* use "for" loop instead of "while"
* scripts/recordmcount is filtering reloc types
v4:
* Fixed build without CONFIG_FUNCTION_TRACER
* Reorganized pre-allocated PLTs handling in get_module_plt(),
  now compiler eliminates the whole FTRACE-related handling code
if ARRAY_SIZE(fixed_plts) == 0
v3:
* Only extend struct dyn_arch_ftrace when ARM_MODULE_PLTS is enabled
v2:
* As suggested by Steven Rostedt, refrain from tree-wide API modification,
  save module pointer in struct dyn_arch_ftrace instead (PowerPC way)

Alexander Sverdlin (3):
  ARM: PLT: Move struct plt_entries definition to header
  ARM: Add warn suppress parameter to arm_gen_branch_link()
  ARM: ftrace: Add MODULE_PLTS support

 arch/arm/include/asm/ftrace.h |  3 +++
 arch/arm/include/asm/insn.h   |  8 +++
 arch/arm/include/asm/module.h | 10 +
 arch/arm/kernel/ftrace.c  | 46 +---
 arch/arm/kernel/insn.c| 19 +
 arch/arm/kernel/module-plts.c | 49 +--
 6 files changed, 103 insertions(+), 32 deletions(-)

-- 
2.10.2



[PATCH v3] gpio: pl061: Support implementations without GPIOINTR line

2021-03-19 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

There are several implementations of PL061 which lack GPIOINTR signal in
hardware and only have individual GPIOMIS[7:0] interrupts. Use the
hierarchical interrupt support of the gpiolib in these cases (if at least 8
IRQs are configured for the PL061).

One in-tree example is arch/arm/boot/dts/axm55xx.dtsi, PL061 instances have
8 IRQs defined, but current driver supports only the first one, so only one
pin would work as IRQ trigger.

Link: 
https://lore.kernel.org/linux-gpio/CACRpkdZpYzpMDWqJobSYH=jhgb74hbcqihotexs+svyo6sr...@mail.gmail.com/
Signed-off-by: Alexander Sverdlin 
---
Changelog:
v3: pl061_populate_parent_fwspec() -> pl061_populate_parent_alloc_arg()
v2: Add pl061_populate_parent_fwspec()

 drivers/gpio/Kconfig  |  1 +
 drivers/gpio/gpio-pl061.c | 97 +++
 2 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index e3607ec..456c0a5 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -469,6 +469,7 @@ config GPIO_PL061
depends on ARM_AMBA
select IRQ_DOMAIN
select GPIOLIB_IRQCHIP
+   select IRQ_DOMAIN_HIERARCHY
help
  Say yes here to support the PrimeCell PL061 GPIO device
 
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index f1b53dd..5bfb5f6 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define GPIODIR 0x400
 #define GPIOIS  0x404
@@ -283,6 +284,69 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned 
int state)
return irq_set_irq_wake(pl061->parent_irq, state);
 }
 
+static int pl061_child_to_parent_hwirq(struct gpio_chip *gc, unsigned int 
child,
+  unsigned int child_type,
+  unsigned int *parent,
+  unsigned int *parent_type)
+{
+   struct amba_device *adev = to_amba_device(gc->parent);
+   unsigned int irq = adev->irq[child];
+   struct irq_data *d = irq_get_irq_data(irq);
+
+   if (!d)
+   return -EINVAL;
+
+   *parent_type = irqd_get_trigger_type(d);
+   *parent = irqd_to_hwirq(d);
+   return 0;
+}
+
+#ifdef CONFIG_OF
+static void *pl061_populate_parent_alloc_arg(struct gpio_chip *gc,
+unsigned int parent_hwirq,
+unsigned int parent_type)
+{
+   struct device_node *dn = to_of_node(gc->irq.fwnode);
+   struct of_phandle_args pha;
+   struct irq_fwspec *fwspec;
+   int i;
+
+   if (WARN_ON(!dn))
+   return NULL;
+
+   fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
+   if (!fwspec)
+   return NULL;
+
+   /*
+* This brute-force here is because of the fact PL061 is often paired
+* with GIC-v3, which has 3-cell IRQ specifier (SPI/PPI selection), and
+* unexpected range shifts in hwirq mapping (SPI IRQs are shifted by
+* 32). So this is about reversing of gic_irq_domain_translate().
+*/
+   for (i = 0; i < PL061_GPIO_NR; i++) {
+   unsigned int p, pt;
+
+   if (pl061_child_to_parent_hwirq(gc, i, parent_type, , ))
+   continue;
+   if (p == parent_hwirq)
+   break;
+   }
+   if (WARN_ON(i == PL061_GPIO_NR))
+   return NULL;
+
+   if (WARN_ON(of_irq_parse_one(dn, i, )))
+   return NULL;
+
+   fwspec->fwnode = gc->irq.parent_domain->fwnode;
+   fwspec->param_count = pha.args_count;
+   for (i = 0; i < pha.args_count; i++)
+   fwspec->param[i] = pha.args[i];
+
+   return fwspec;
+}
+#endif
+
 static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
 {
struct device *dev = >dev;
@@ -330,16 +394,35 @@ static int pl061_probe(struct amba_device *adev, const 
struct amba_id *id)
 
girq = >gc.irq;
girq->chip = >irq_chip;
-   girq->parent_handler = pl061_irq_handler;
-   girq->num_parents = 1;
-   girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
-GFP_KERNEL);
-   if (!girq->parents)
-   return -ENOMEM;
-   girq->parents[0] = irq;
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
 
+   /*
+* There are some PL061 implementations which lack GPIOINTR in hardware
+* and only have individual GPIOMIS[7:0] signals. We distinguish them by
+* the number of IRQs assigned to the AMBA device.
+*/
+   if (adev->irq[PL061_GPIO_NR - 1]) {
+   girq->fwnode = dev->fwnode;
+   girq->parent_domain =
+   irq_get_irq_dat

[PATCH v2] gpio: pl061: Support implementations without GPIOINTR line

2021-03-18 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

There are several implementations of PL061 which lack GPIOINTR signal in
hardware and only have individual GPIOMIS[7:0] interrupts. Use the
hierarchical interrupt support of the gpiolib in these cases (if at least 8
IRQs are configured for the PL061).

One in-tree example is arch/arm/boot/dts/axm55xx.dtsi, PL061 instances have
8 IRQs defined, but current driver supports only the first one, so only one
pin would work as IRQ trigger.

Link: 
https://lore.kernel.org/linux-gpio/CACRpkdZpYzpMDWqJobSYH=jhgb74hbcqihotexs+svyo6sr...@mail.gmail.com/
Signed-off-by: Alexander Sverdlin 
---
Changelog:
v2: Add pl061_populate_parent_fwspec()

 drivers/gpio/Kconfig  |  1 +
 drivers/gpio/gpio-pl061.c | 91 +++
 2 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index e3607ec..456c0a5 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -469,6 +469,7 @@ config GPIO_PL061
depends on ARM_AMBA
select IRQ_DOMAIN
select GPIOLIB_IRQCHIP
+   select IRQ_DOMAIN_HIERARCHY
help
  Say yes here to support the PrimeCell PL061 GPIO device
 
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index f1b53dd..e95714a 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define GPIODIR 0x400
 #define GPIOIS  0x404
@@ -283,6 +284,64 @@ static int pl061_irq_set_wake(struct irq_data *d, unsigned 
int state)
return irq_set_irq_wake(pl061->parent_irq, state);
 }
 
+static int pl061_child_to_parent_hwirq(struct gpio_chip *gc, unsigned int 
child,
+  unsigned int child_type,
+  unsigned int *parent,
+  unsigned int *parent_type)
+{
+   struct amba_device *adev = to_amba_device(gc->parent);
+   unsigned int irq = adev->irq[child];
+   struct irq_data *d = irq_get_irq_data(irq);
+
+   if (!d)
+   return -EINVAL;
+
+   *parent_type = irqd_get_trigger_type(d);
+   *parent = irqd_to_hwirq(d);
+   return 0;
+}
+
+#ifdef CONFIG_OF
+void pl061_populate_parent_fwspec(struct gpio_chip *gc,
+ struct irq_fwspec *fwspec,
+ unsigned int parent_hwirq,
+ unsigned int parent_type)
+{
+   struct device_node *dn = to_of_node(gc->irq.fwnode);
+   struct of_phandle_args pha;
+   int i;
+
+   fwspec->param_count = 0;
+
+   if (WARN_ON(!dn))
+   return;
+
+   /*
+* This brute-force here is because of the fact PL061 is often paired
+* with GIC-v3, which has 3-cell IRQ specifier (SPI/PPI selection), and
+* unexpected range shifts in hwirq mapping (SPI IRQs are shifted by
+* 32). So this is about reversing of gic_irq_domain_translate().
+*/
+   for (i = 0; i < PL061_GPIO_NR; i++) {
+   unsigned int p, pt;
+
+   if (pl061_child_to_parent_hwirq(gc, i, parent_type, , ))
+   continue;
+   if (p == parent_hwirq)
+   break;
+   }
+   if (WARN_ON(i == PL061_GPIO_NR))
+   return;
+
+   if (WARN_ON(of_irq_parse_one(dn, i, )))
+   return;
+
+   fwspec->param_count = pha.args_count;
+   for (i = 0; i < pha.args_count; i++)
+   fwspec->param[i] = pha.args[i];
+}
+#endif
+
 static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
 {
struct device *dev = >dev;
@@ -330,16 +389,34 @@ static int pl061_probe(struct amba_device *adev, const 
struct amba_id *id)
 
girq = >gc.irq;
girq->chip = >irq_chip;
-   girq->parent_handler = pl061_irq_handler;
-   girq->num_parents = 1;
-   girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
-GFP_KERNEL);
-   if (!girq->parents)
-   return -ENOMEM;
-   girq->parents[0] = irq;
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
 
+   /*
+* There are some PL061 implementations which lack GPIOINTR in hardware
+* and only have individual GPIOMIS[7:0] signals. We distinguish them by
+* the number of IRQs assigned to the AMBA device.
+*/
+   if (!adev->irq[PL061_GPIO_NR - 1]) {
+   WARN_ON(adev->irq[1]);
+
+   girq->parent_handler = pl061_irq_handler;
+   girq->num_parents = 1;
+   girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
+GFP_KERNEL);
+   if (!girq->parents)
+   return -ENOMEM;
+   girq->parents[0] =

[PATCH] rapidio/mport_cdev: Fix race in mport_cdev_release()

2021-03-17 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

While get_dma_channel() is protected against concurrent calls, there is a
race against kref_put() in mport_cdev_release():

CPU0CPU1

get_dma_channel()
 kref_init(>md->dma_ref);
 ...
mport_cdev_release_dma()
 kref_put(>dma_ref,
  mport_release_def_dma);
get_dma_channel()
 if (priv->md->dma_chan) {
  ...
  kref_get(>md->dma_ref);
  mport_release_def_dma()
   md->dma_chan = NULL;

which may appear like this:

[ cut here ]
WARNING: CPU: 1 PID: 12057 at .../linux/include/linux/kref.h:46 
rio_dma_transfer.isra.12+0x8e0/0xbe8 [rio_mport_cdev]
 ...
CPU: 1 PID: 12057 Comm: ... Tainted: G   O4.9.109-... #1
Stack : ...

Call Trace:
[] show_stack+0x90/0xb0
[] dump_stack+0x88/0xc0
[] __warn+0x108/0x120
[] rio_dma_transfer.isra.12+0x8e0/0xbe8 [rio_mport_cdev]
[] mport_cdev_ioctl+0x604/0x2988 [rio_mport_cdev]
[] do_vfs_ioctl+0xb8/0x780
[] SyS_ioctl+0x88/0xc0
[] syscall_common+0x34/0x58
---[ end trace 78842d4915cfc502 ]---

Fixes: e8de370188d0 ("rapidio: add mport char device driver")
Cc: sta...@vger.kernel.org
Signed-off-by: Alexander Sverdlin 
---
 drivers/rapidio/devices/rio_mport_cdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/rapidio/devices/rio_mport_cdev.c 
b/drivers/rapidio/devices/rio_mport_cdev.c
index 8155f59..a6276dc 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -1980,6 +1980,7 @@ static void mport_cdev_release_dma(struct file *filp)
current->comm, task_pid_nr(current), wret);
}
 
+   mutex_lock(>dma_lock);
if (priv->dmach != priv->md->dma_chan) {
rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
   filp, current->comm, task_pid_nr(current));
@@ -1990,6 +1991,7 @@ static void mport_cdev_release_dma(struct file *filp)
}
 
priv->dmach = NULL;
+   mutex_unlock(>dma_lock);
 }
 #else
 #define mport_cdev_release_dma(priv) do {} while (0)
-- 
2.4.6



[PATCH 2/2] mtd: char: Get rid of Big MTD Lock

2021-02-17 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Get rid of central chrdev MTD lock, which prevents simultaneous operations
on completely independent physical MTD chips. Replace it with newly
introduced per-master mutex.

Signed-off-by: Alexander Sverdlin 
---
 drivers/mtd/mtdchar.c   | 14 --
 drivers/mtd/mtdcore.c   |  1 +
 include/linux/mtd/mtd.h |  1 +
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index f31390d..57c4a2f 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -27,8 +27,6 @@
 
 #include "mtdcore.h"
 
-static DEFINE_MUTEX(mtd_mutex);
-
 /*
  * Data structure to hold the pointer to the mtd device as well
  * as mode information of various use cases.
@@ -1020,11 +1018,14 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, 
u_long arg)
 
 static long mtdchar_unlocked_ioctl(struct file *file, u_int cmd, u_long arg)
 {
+   struct mtd_file_info *mfi = file->private_data;
+   struct mtd_info *mtd = mfi->mtd;
+   struct mtd_info *master = mtd_get_master(mtd);
int ret;
 
-   mutex_lock(_mutex);
+   mutex_lock(>master.chrdev_lock);
ret = mtdchar_ioctl(file, cmd, arg);
-   mutex_unlock(_mutex);
+   mutex_unlock(>master.chrdev_lock);
 
return ret;
 }
@@ -1045,10 +1046,11 @@ static long mtdchar_compat_ioctl(struct file *file, 
unsigned int cmd,
 {
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
+   struct mtd_info *master = mtd_get_master(mtd);
void __user *argp = compat_ptr(arg);
int ret = 0;
 
-   mutex_lock(_mutex);
+   mutex_lock(>master.chrdev_lock);
 
switch (cmd) {
case MEMWRITEOOB32:
@@ -,7 +1113,7 @@ static long mtdchar_compat_ioctl(struct file *file, 
unsigned int cmd,
ret = mtdchar_ioctl(file, cmd, (unsigned long)argp);
}
 
-   mutex_unlock(_mutex);
+   mutex_unlock(>master.chrdev_lock);
 
return ret;
 }
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 2d6423d..0b09597 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -773,6 +773,7 @@ static void mtd_set_dev_defaults(struct mtd_info *mtd)
 
INIT_LIST_HEAD(>partitions);
mutex_init(>master.partitions_lock);
+   mutex_init(>master.chrdev_lock);
 }
 
 /**
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 157357e..ceabc2c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -229,6 +229,7 @@ struct mtd_part {
  */
 struct mtd_master {
struct mutex partitions_lock;
+   struct mutex chrdev_lock;
unsigned int suspended : 1;
 };
 
-- 
2.10.2



[PATCH 1/2] mtd: char: Drop mtd_mutex usage from mtdchar_open()

2021-02-17 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

It looks unnecessary in the function, remove it from the function
having in mind to remove it completely.

Signed-off-by: Alexander Sverdlin 
---
 drivers/mtd/mtdchar.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 323035d..f31390d 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -58,13 +58,10 @@ static int mtdchar_open(struct inode *inode, struct file 
*file)
if ((file->f_mode & FMODE_WRITE) && (minor & 1))
return -EACCES;
 
-   mutex_lock(_mutex);
mtd = get_mtd_device(NULL, devnum);
 
-   if (IS_ERR(mtd)) {
-   ret = PTR_ERR(mtd);
-   goto out;
-   }
+   if (IS_ERR(mtd))
+   return PTR_ERR(mtd);
 
if (mtd->type == MTD_ABSENT) {
ret = -ENODEV;
@@ -84,13 +81,10 @@ static int mtdchar_open(struct inode *inode, struct file 
*file)
}
mfi->mtd = mtd;
file->private_data = mfi;
-   mutex_unlock(_mutex);
return 0;
 
 out1:
put_mtd_device(mtd);
-out:
-   mutex_unlock(_mutex);
return ret;
 } /* mtdchar_open */
 
-- 
2.10.2



[PATCH 2/6] MIPS: Implement atomic_cmpxchg_relaxed()

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

This will save one SYNCW on Octeon and improve tight
uncontended spinlock loop performance by 17%.

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/include/asm/atomic.h  | 3 +++
 arch/mips/include/asm/cmpxchg.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index f904084..a4e5116 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -264,4 +264,7 @@ ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
 
 #undef ATOMIC_SIP_OP
 
+#define atomic_cmpxchg_relaxed(v, o, n) \
+   (cmpxchg_relaxed(&((v)->counter), (o), (n)))
+
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 5b0b3a6..620f01a 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -182,6 +182,8 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long 
old,
  (unsigned long)(__typeof__(*(ptr)))(new), \
  sizeof(*(ptr
 
+#define cmpxchg_relaxedcmpxchg_local
+
 #define cmpxchg(ptr, old, new) \
 ({ \
__typeof__(*(ptr)) __res;   \
-- 
2.10.2



[PATCH 4/6] MIPS: Octeon: qspinlock: Exclude mmiowb()

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

On Octeon mmiowb() is SYNCW, which is already contained in
smp_store_release(). Removing superfluous barrier brings around 10%
performance on uncontended tight spinlock loops.

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/include/asm/spinlock.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 0a707f3..fbe97b4 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -21,8 +21,10 @@
  */
 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
+#ifndef CONFIG_CPU_CAVIUM_OCTEON
/* This could be optimised with ARCH_HAS_MMIOWB */
mmiowb();
+#endif
smp_store_release(>locked, 0);
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
nudge_writes();
-- 
2.10.2



[PATCH 0/6] MIPS: qspinlock: Try to reduce reduce the spinlock regression

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

The switch to qspinlock brought a massive regression in spinlocks on
Octeon. Even after applying this series (and a patch in the
ARCH-independent code [1]) tight contended (6 cores, 1 thread per core)
spinlock loop is still 50% slower as previous ticket-based implementation.

This series implements some optimizations and has been tested on a 6-core
Octeon machine.

[1] Link: https://lkml.org/lkml/2021/1/27/1137

Alexander Sverdlin (6):
  MIPS: Octeon: Implement __smp_store_release()
  MIPS: Implement atomic_cmpxchg_relaxed()
  MIPS: Octeon: qspinlock: Flush write buffer
  MIPS: Octeon: qspinlock: Exclude mmiowb()
  MIPS: Provide {atomic_}xchg_relaxed()
  MIPS: cmpxchg: Use cmpxchg_local() for {cmp_}xchg_small()

 arch/mips/include/asm/atomic.h   | 5 +
 arch/mips/include/asm/barrier.h  | 9 +
 arch/mips/include/asm/cmpxchg.h  | 6 ++
 arch/mips/include/asm/spinlock.h | 5 +
 arch/mips/kernel/cmpxchg.c   | 4 ++--
 5 files changed, 27 insertions(+), 2 deletions(-)

-- 
2.10.2



[PATCH 6/6] MIPS: cmpxchg: Use cmpxchg_local() for {cmp_}xchg_small()

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

It makes no sense to fold smp_mb__before_llsc()/smp_llsc_mb() again and
again, leave only one barrier pair in the outer function.

This removes one SYNCW from __xchg_small() and brings around 10%
performance improvement in a tight spinlock loop with 6 threads on a 6 core
Octeon.

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/kernel/cmpxchg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c
index 89107de..122e85f 100644
--- a/arch/mips/kernel/cmpxchg.c
+++ b/arch/mips/kernel/cmpxchg.c
@@ -41,7 +41,7 @@ unsigned long __xchg_small(volatile void *ptr, unsigned long 
val, unsigned int s
do {
old32 = load32;
new32 = (load32 & ~mask) | (val << shift);
-   load32 = cmpxchg(ptr32, old32, new32);
+   load32 = cmpxchg_local(ptr32, old32, new32);
} while (load32 != old32);
 
return (load32 & mask) >> shift;
@@ -97,7 +97,7 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned 
long old,
 */
old32 = (load32 & ~mask) | (old << shift);
new32 = (load32 & ~mask) | (new << shift);
-   load32 = cmpxchg(ptr32, old32, new32);
+   load32 = cmpxchg_local(ptr32, old32, new32);
if (load32 == old32)
return old;
}
-- 
2.10.2



[PATCH 1/6] MIPS: Octeon: Implement __smp_store_release()

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

On Octeon smp_mb() translates to SYNC while wmb+rmb translates to SYNCW
only. This brings around 10% performance on tight uncontended spinlock
loops.

Refer to commit 500c2e1fdbcc ("MIPS: Optimize spinlocks.") and the link
below.

On 6-core Octeon machine:
sysbench --test=mutex --num-threads=64 --memory-scope=local run

w/o patch:  1.60s
with patch: 1.51s

Link: https://lore.kernel.org/lkml/5644d08d.4080...@caviumnetworks.com/
Signed-off-by: Alexander Sverdlin 
---
 arch/mips/include/asm/barrier.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 49ff172..24c3f2c 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -113,6 +113,15 @@ static inline void wmb(void)
".set arch=octeon\n\t"  \
"syncw\n\t" \
".set pop" : : : "memory")
+
+#define __smp_store_release(p, v)  \
+do {   \
+   compiletime_assert_atomic_type(*p); \
+   __smp_wmb();\
+   __smp_rmb();\
+   WRITE_ONCE(*p, v);  \
+} while (0)
+
 #else
 #define smp_mb__before_llsc() smp_llsc_mb()
 #define __smp_mb__before_llsc() smp_llsc_mb()
-- 
2.10.2



[PATCH 5/6] MIPS: Provide {atomic_}xchg_relaxed()

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

This has the effect of removing one redundant SYNCW from
queued_spin_lock_slowpath() on Octeon.

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/include/asm/atomic.h  | 2 ++
 arch/mips/include/asm/cmpxchg.h | 4 
 2 files changed, 6 insertions(+)

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index a4e5116..3b0f54b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -266,5 +266,7 @@ ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
 
 #define atomic_cmpxchg_relaxed(v, o, n) \
(cmpxchg_relaxed(&((v)->counter), (o), (n)))
+#define atomic_xchg_relaxed(v, new) \
+   (xchg_relaxed(&((v)->counter), (new)))
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 620f01a..7830d81 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -110,6 +110,10 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, 
int size)
__res;  \
 })
 
+#define xchg_relaxed(ptr, x)   \
+   ((__typeof__(*(ptr)))   \
+   __xchg((ptr), (unsigned long)(x), sizeof(*(ptr
+
 #define __cmpxchg_asm(ld, st, m, old, new) \
 ({ \
__typeof(*(m)) __ret;   \
-- 
2.10.2



[PATCH 3/6] MIPS: Octeon: qspinlock: Flush write buffer

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Flushing the write buffer brings aroung 10% performace on the tight
uncontended spinlock loops on Octeon. Refer to commit 500c2e1fdbcc
("MIPS: Optimize spinlocks.").

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/include/asm/spinlock.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 8a88eb2..0a707f3 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -24,6 +24,9 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
/* This could be optimised with ARCH_HAS_MMIOWB */
mmiowb();
smp_store_release(>locked, 0);
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+   nudge_writes();
+#endif
 }
 
 #include 
-- 
2.10.2



[PATCH 2/2] ARM: mcs_spinlock: Drop smp_wmb in arch_mcs_spin_lock_contended()

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Drop smp_wmb in arch_mcs_spin_lock_contended() after adding in into
ARCH-independent code.

Signed-off-by: Alexander Sverdlin 
---
 arch/arm/include/asm/mcs_spinlock.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/include/asm/mcs_spinlock.h 
b/arch/arm/include/asm/mcs_spinlock.h
index 529d2cf..d8fa921 100644
--- a/arch/arm/include/asm/mcs_spinlock.h
+++ b/arch/arm/include/asm/mcs_spinlock.h
@@ -8,8 +8,6 @@
 /* MCS spin-locking. */
 #define arch_mcs_spin_lock_contended(lock) \
 do {   \
-   /* Ensure prior stores are observed before we enter wfe. */ \
-   smp_mb();   \
while (!(smp_load_acquire(lock)))   \
wfe();  \
 } while (0)\
-- 
2.10.2



[PATCH 1/2] qspinlock: Ensure writes are pushed out of core write buffer

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Ensure writes are pushed out of core write buffer to prevent waiting code
on another cores from spinning longer than necessary.

6 threads running tight spinlock loop competing for the same lock
on 6 cores on MIPS/Octeon do 100 iterations...

before the patch in:4.3 sec
after the patch in: 1.2 sec

Same 6-core Octeon machine:
sysbench --test=mutex --num-threads=64 --memory-scope=local run

w/o patch:  1.53s
with patch: 1.28s

This will also allow to remove the smp_wmb() in
arch/arm/include/asm/mcs_spinlock.h (was it actually addressing the same
issue?).

Finally our internal quite diverse test suite of different IPC/network
aspects didn't detect any regressions on ARM/ARM64/x86_64.

Signed-off-by: Alexander Sverdlin 
---
 kernel/locking/mcs_spinlock.h | 5 +
 kernel/locking/qspinlock.c| 6 ++
 2 files changed, 11 insertions(+)

diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 5e10153..10e497a 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -89,6 +89,11 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct 
mcs_spinlock *node)
return;
}
WRITE_ONCE(prev->next, node);
+   /*
+* This is necessary to make sure that the corresponding "while" in the
+* mcs_spin_unlock() doesn't loop forever
+*/
+   smp_wmb();
 
/* Wait until the lock holder passes the lock down. */
arch_mcs_spin_lock_contended(>locked);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index cbff6ba..577fe01 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -469,6 +469,12 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
 
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
+   /*
+* This is necessary to make sure that the corresponding
+* smp_cond_load_relaxed() below (running on another core)
+* doesn't spin forever.
+*/
+   smp_wmb();
 
pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(>locked);
-- 
2.10.2



[PATCH v7 2/2] ARM: ftrace: Add MODULE_PLTS support

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Teach ftrace_make_call() and ftrace_make_nop() about PLTs.
Teach PLT code about FTRACE and all its callbacks.
Otherwise the following might happen:

[ cut here ]
WARNING: CPU: 14 PID: 2265 at .../arch/arm/kernel/insn.c:14 
__arm_gen_branch+0x83/0x8c()
...
Hardware name: LSI Axxia AXM55XX
[] (unwind_backtrace) from [] (show_stack+0x11/0x14)
[] (show_stack) from [] (dump_stack+0x81/0xa8)
[] (dump_stack) from [] (warn_slowpath_common+0x69/0x90)
[] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x17/0x1c)
[] (warn_slowpath_null) from [] (__arm_gen_branch+0x83/0x8c)
[] (__arm_gen_branch) from [] (ftrace_make_nop+0xf/0x24)
[] (ftrace_make_nop) from [] 
(ftrace_process_locs+0x27b/0x3e8)
[] (ftrace_process_locs) from [] (load_module+0x11e9/0x1a44)
[] (load_module) from [] (SyS_finit_module+0x59/0x84)
[] (SyS_finit_module) from [] (ret_fast_syscall+0x1/0x18)
---[ end trace e1b64ced7a89adcc ]---
[ cut here ]
WARNING: CPU: 14 PID: 2265 at .../kernel/trace/ftrace.c:1979 
ftrace_bug+0x1b1/0x234()
...
Hardware name: LSI Axxia AXM55XX
[] (unwind_backtrace) from [] (show_stack+0x11/0x14)
[] (show_stack) from [] (dump_stack+0x81/0xa8)
[] (dump_stack) from [] (warn_slowpath_common+0x69/0x90)
[] (warn_slowpath_common) from [] 
(warn_slowpath_null+0x17/0x1c)
[] (warn_slowpath_null) from [] (ftrace_bug+0x1b1/0x234)
[] (ftrace_bug) from [] (ftrace_process_locs+0x285/0x3e8)
[] (ftrace_process_locs) from [] (load_module+0x11e9/0x1a44)
[] (load_module) from [] (SyS_finit_module+0x59/0x84)
[] (SyS_finit_module) from [] (ret_fast_syscall+0x1/0x18)
---[ end trace e1b64ced7a89adcd ]---
ftrace failed to modify [] 0xe9ef7006
actual: 02:f0:3b:fa
ftrace record flags: 0
(0) expected tramp: c0314265

Signed-off-by: Alexander Sverdlin 
---
 arch/arm/include/asm/ftrace.h |  3 +++
 arch/arm/include/asm/module.h |  1 +
 arch/arm/kernel/ftrace.c  | 46 +--
 arch/arm/kernel/module-plts.c | 44 +
 4 files changed, 88 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 48ec1d0..a4dbac0 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -15,6 +15,9 @@ extern void __gnu_mcount_nc(void);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 struct dyn_arch_ftrace {
+#ifdef CONFIG_ARM_MODULE_PLTS
+   struct module *mod;
+#endif
 };
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 09b9ad5..cfffae6 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -30,6 +30,7 @@ struct plt_entries {
 
 struct mod_plt_sec {
struct elf32_shdr   *plt;
+   struct plt_entries  *plt_ent;
int plt_count;
 };
 
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 9a79ef6..fa867a5 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -70,6 +70,19 @@ int ftrace_arch_code_modify_post_process(void)
 
 static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
+   s32 offset = addr - pc;
+   s32 blim = 0xfe08;
+   s32 flim = 0x0204;
+
+   if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+   blim = 0xff04;
+   flim = 0x0102;
+   }
+
+   if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+   (offset < blim || offset > flim))
+   return 0;
+
return arm_gen_branch_link(pc, addr);
 }
 
@@ -124,10 +137,22 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned 
long addr)
 {
unsigned long new, old;
unsigned long ip = rec->ip;
+   unsigned long aaddr = adjust_address(rec, addr);
 
old = ftrace_nop_replace(rec);
 
-   new = ftrace_call_replace(ip, adjust_address(rec, addr));
+   new = ftrace_call_replace(ip, aaddr);
+
+#ifdef CONFIG_ARM_MODULE_PLTS
+   if (!new) {
+   struct module *mod = rec->arch.mod;
+
+   if (mod) {
+   aaddr = get_module_plt(mod, ip, aaddr);
+   new = ftrace_call_replace(ip, aaddr);
+   }
+   }
+#endif
 
return ftrace_modify_code(rec->ip, old, new, true);
 }
@@ -152,12 +177,29 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned 
long old_addr,
 int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
 {
+   unsigned long aaddr = adjust_address(rec, addr);
unsigned long ip = rec->ip;
unsigned long old;
unsigned long new;
int ret;
 
-   old = ftrace_call_replace(ip, adjust_address(rec, addr));
+#ifdef CONFIG_ARM_MODULE_PLTS
+   /* mod is only supplied during module loading */
+   if (!mod)
+   mod = rec->arch.mod;
+   else
+   rec->arch.

[PATCH v7 0/2] ARM: Implement MODULE_PLT support in FTRACE

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

FTRACE's function tracer currently doesn't always work on ARM with
MODULE_PLT option enabled. If the module is loaded too far, FTRACE's
code modifier cannot cope with introduced veneers and turns the
function tracer off globally.

ARM64 already has a solution for the problem, refer to the following
patches:

arm64: ftrace: emit ftrace-mod.o contents through code
arm64: module-plts: factor out PLT generation code for ftrace
arm64: ftrace: fix !CONFIG_ARM64_MODULE_PLTS kernels
arm64: ftrace: fix building without CONFIG_MODULES
arm64: ftrace: add support for far branches to dynamic ftrace
arm64: ftrace: don't validate branch via PLT in ftrace_make_nop()

But the presented ARM variant has just a half of the footprint in terms of
the changed LoCs. It also retains the code validation-before-modification
instead of switching it off.

Changelog:
v7:
* rebased
v6:
* rebased
v5:
* BUILD_BUG_ON() ensures fixed_plts[] always fits one PLT block
* use "for" loop instead of "while"
* scripts/recordmcount is filtering reloc types
v4:
* Fixed build without CONFIG_FUNCTION_TRACER
* Reorganized pre-allocated PLTs handling in get_module_plt(),
  now compiler eliminates the whole FTRACE-related handling code
if ARRAY_SIZE(fixed_plts) == 0
v3:
* Only extend struct dyn_arch_ftrace when ARM_MODULE_PLTS is enabled
v2:
* As suggested by Steven Rostedt, refrain from tree-wide API modification,
  save module pointer in struct dyn_arch_ftrace instead (PowerPC way)

Alexander Sverdlin (2):
  ARM: PLT: Move struct plt_entries definition to header
  ARM: ftrace: Add MODULE_PLTS support

 arch/arm/include/asm/ftrace.h |  3 +++
 arch/arm/include/asm/module.h | 10 +
 arch/arm/kernel/ftrace.c  | 46 ++--
 arch/arm/kernel/module-plts.c | 49 +--
 4 files changed, 95 insertions(+), 13 deletions(-)

-- 
2.10.2



[PATCH v7 1/2] ARM: PLT: Move struct plt_entries definition to header

2021-01-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

No functional change, later it will be re-used in several files.

Signed-off-by: Alexander Sverdlin 
---
 arch/arm/include/asm/module.h | 9 +
 arch/arm/kernel/module-plts.c | 9 -
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 4b0df09..09b9ad5 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -19,6 +19,15 @@ enum {
 };
 #endif
 
+#define PLT_ENT_STRIDE L1_CACHE_BYTES
+#define PLT_ENT_COUNT  (PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE   (sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+struct plt_entries {
+   u32 ldr[PLT_ENT_COUNT];
+   u32 lit[PLT_ENT_COUNT];
+};
+
 struct mod_plt_sec {
struct elf32_shdr   *plt;
int plt_count;
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 6e626ab..d330e9e 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -12,10 +12,6 @@
 #include 
 #include 
 
-#define PLT_ENT_STRIDE L1_CACHE_BYTES
-#define PLT_ENT_COUNT  (PLT_ENT_STRIDE / sizeof(u32))
-#define PLT_ENT_SIZE   (sizeof(struct plt_entries) / PLT_ENT_COUNT)
-
 #ifdef CONFIG_THUMB2_KERNEL
 #define PLT_ENT_LDR__opcode_to_mem_thumb32(0xf8dff000 | \
(PLT_ENT_STRIDE - 4))
@@ -24,11 +20,6 @@
(PLT_ENT_STRIDE - 8))
 #endif
 
-struct plt_entries {
-   u32 ldr[PLT_ENT_COUNT];
-   u32 lit[PLT_ENT_COUNT];
-};
-
 static bool in_init(const struct module *mod, unsigned long loc)
 {
return loc - (u32)mod->init_layout.base < mod->init_layout.size;
-- 
2.10.2



[PATCH 2/2] MIPS: OCTEON: Don't add kernel sections into memblock allocator

2020-12-03 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Because check_kernel_sections_mem() does exactly this for all platforms.

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/cavium-octeon/setup.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index d051a8a..34cef11 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -1013,8 +1013,6 @@ void __init plat_mem_setup(void)
uint64_t crashk_end;
 #ifndef CONFIG_CRASH_DUMP
int64_t memory;
-   uint64_t kernel_start;
-   uint64_t kernel_size;
 #endif
 
total = 0;
@@ -1152,13 +1150,6 @@ void __init plat_mem_setup(void)
}
}
cvmx_bootmem_unlock();
-   /* Add the memory region for the kernel. */
-   kernel_start = (unsigned long) _text;
-   kernel_size = _end - _text;
-
-   /* Adjust for physical offset. */
-   kernel_start &= ~0x8000ULL;
-   memblock_add(kernel_start, kernel_size);
 #endif /* CONFIG_CRASH_DUMP */
 
 #ifdef CONFIG_CAVIUM_RESERVE32
-- 
2.10.2



[PATCH 1/2] MIPS: Don't round up kernel sections size for memblock_add()

2020-12-03 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Linux doesn't own the memory immediately after the kernel image. On Octeon
bootloader places a shared structure right close after the kernel _end,
refer to "struct cvmx_bootinfo *octeon_bootinfo" in cavium-octeon/setup.c.

If check_kernel_sections_mem() rounds the PFNs up, first memblock_alloc()
inside early_init_dt_alloc_memory_arch() <= device_tree_init() returns
memory block overlapping with the above octeon_bootinfo structure, which
is being overwritten afterwards.

Fixes: a94e4f24ec83 ("MIPS: init: Drop boot_mem_map")
Signed-off-by: Alexander Sverdlin 
---
 arch/mips/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index ca579de..9d11f68 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -498,8 +498,8 @@ static void __init request_crashkernel(struct resource *res)
 
 static void __init check_kernel_sections_mem(void)
 {
-   phys_addr_t start = PFN_PHYS(PFN_DOWN(__pa_symbol(&_text)));
-   phys_addr_t size = PFN_PHYS(PFN_UP(__pa_symbol(&_end))) - start;
+   phys_addr_t start = __pa_symbol(&_text);
+   phys_addr_t size = __pa_symbol(&_end) - start;
 
if (!memblock_is_region_memory(start, size)) {
pr_info("Kernel sections are not in the memory maps\n");
-- 
2.10.2



[PATCH] MIPS: Octeon: irq: Alloc desc before configuring IRQ

2020-11-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Allocate the IRQ descriptors where necessary before configuring them via
irq_set_chip_and_handler(). Fixes the following soft lockup:

watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [modprobe:72]
Modules linked in:
irq event stamp: 33288
hardirqs last  enabled at (33287): [] 
restore_partial+0x74/0x150
hardirqs last disabled at (33288): [] handle_int+0x128/0x178
softirqs last  enabled at (33284): [] __do_softirq+0x5c4/0x6d0
softirqs last disabled at (33279): [] irq_exit+0xe8/0xf0
CPU: 5 PID: 72 Comm: modprobe Not tainted 4.19.80-... #1
$ 0   :  0001 0003 82bdc640
$ 4   :    
$ 8   : 0001 0001  803076cc
$12   :   817f 0800
$16   : 80a96d10 80a9 82c41780 82c41788
$20   : 0001 8013b248 80008ef28080 80bb8700
$24   : 03bf 802d0610
$28   : 80008ef2 80008ef23bd0 0006 8020d6f8
Hi: 0160
Lo: 0014
epc   : 8020d72c smp_call_function_many+0x2f4/0x370
ra: 8020d6f8 smp_call_function_many+0x2c0/0x370
Status: 10008ce3 KX SX UX KERNEL EXL IE
Cause : 40808000 (ExcCode 00)
PrId  : 000d900a (Cavium Octeon II)
CPU: 5 PID: 72 Comm: modprobe Not tainted 4.19.80-... #1
Stack : 80ab 0051801c0da0 1ce0 5e70a8a65518aeac
5e70a8a65518aeac  80008e0cfb48 8182
80008e0cfad4 f0ce6f64 0001 
801ccfb8   817f
80008531d840 80a9 fffe 
80b2  80bb3980 80bb3980
80a9 fffe 8057a760 0028
80c50028 80008ef2 80008e0cfb40 80b2
80835d6c  80008e0cfc78 5e70a8a65518aeac
80a9dbf7 80835c2c 801357a4 809bdd50
...
Call Trace:
[] show_stack+0x9c/0x130
[] dump_stack+0xdc/0x140
[] watchdog_timer_fn+0x3e8/0x478
[] __hrtimer_run_queues+0x18c/0x6d8
[] hrtimer_interrupt+0x104/0x2e8
[] c0_compare_interrupt+0x60/0x90
[] __handle_irq_event_percpu+0xb4/0x4a0
[] handle_irq_event_percpu+0x34/0x90
[] handle_percpu_irq+0x9c/0xe0
[] generic_handle_irq+0x34/0x50
[] do_IRQ+0x18/0x28
[] plat_irq_dispatch+0x90/0x128
[] handle_int+0x16c/0x178
[] smp_call_function_many+0x2f4/0x370
[] smp_call_function+0x40/0xa0
[] flush_tlb_mm+0x44/0x140
[] tlb_flush_mmu+0x38/0x90
[] arch_tlb_finish_mmu+0x4c/0x88
[] tlb_finish_mmu+0x24/0x50
[] exit_mmap+0x11c/0x1b8
[] mmput+0x84/0x138
[] do_exit+0x314/0xc88
[] do_group_exit+0x48/0xb0
[] __wake_up_parent+0x0/0x18

Signed-off-by: Alexander Sverdlin 
---
 arch/mips/cavium-octeon/octeon-irq.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/mips/cavium-octeon/octeon-irq.c 
b/arch/mips/cavium-octeon/octeon-irq.c
index a19f69e..0bccd15 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1505,10 +1505,20 @@ static int __init octeon_irq_init_ciu(
goto err;
}
 
+   r = irq_alloc_desc_at(OCTEON_IRQ_MBOX0, -1);
+   if (r < 0) {
+   pr_err("Failed to allocate desc for %s\n", "OCTEON_IRQ_MBOX0");
+   goto err;
+   }
r = octeon_irq_set_ciu_mapping(
OCTEON_IRQ_MBOX0, 0, 32, 0, chip_mbox, handle_percpu_irq);
if (r)
goto err;
+   r = irq_alloc_desc_at(OCTEON_IRQ_MBOX1, -1);
+   if (r < 0) {
+   pr_err("Failed to allocate desc for %s\n", "OCTEON_IRQ_MBOX1");
+   goto err;
+   }
r = octeon_irq_set_ciu_mapping(
OCTEON_IRQ_MBOX1, 0, 33, 0, chip_mbox, handle_percpu_irq);
if (r)
@@ -1546,6 +1556,11 @@ static int __init octeon_irq_init_ciu(
if (r)
goto err;
 
+   r = irq_alloc_descs(OCTEON_IRQ_WDOG0, OCTEON_IRQ_WDOG0, 16, -1);
+   if (r < 0) {
+   pr_err("Failed to allocate desc for %s\n", "OCTEON_IRQ_WDOGx");
+   goto err;
+   }
/* CIU_1 */
for (i = 0; i < 16; i++) {
r = octeon_irq_set_ciu_mapping(
-- 
2.10.2



[PATCH] tty: serial: uartlite: Support probe deferral

2020-11-27 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Give uartlite a chance to be probed when IRQ controller will be finally
available and return potential -EPROBE_DEFER as-is. The condition "<="
has been changed to "<" to follow the recommendation in the header of
platform_get_irq().

Signed-off-by: Alexander Sverdlin 
---
 drivers/tty/serial/uartlite.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 09379db..f42ccc4 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -773,8 +773,8 @@ static int ulite_probe(struct platform_device *pdev)
return -ENODEV;
 
irq = platform_get_irq(pdev, 0);
-   if (irq <= 0)
-   return -ENXIO;
+   if (irq < 0)
+   return irq;
 
pdata->clk = devm_clk_get(>dev, "s_axi_aclk");
if (IS_ERR(pdata->clk)) {
-- 
2.10.2



[PATCH] MIPS: reserve the memblock right after the kernel

2020-11-06 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Linux doesn't own the memory immediately after the kernel image. On Octeon
bootloader places a shared structure right close after the kernel _end,
refer to "struct cvmx_bootinfo *octeon_bootinfo" in cavium-octeon/setup.c.

If check_kernel_sections_mem() rounds the PFNs up, first memblock_alloc()
inside early_init_dt_alloc_memory_arch() <= device_tree_init() returns
memory block overlapping with the above octeon_bootinfo structure, which
is being overwritten afterwards.

Cc: sta...@vger.kernel.org
Fixes: a94e4f24ec83 ("MIPS: init: Drop boot_mem_map")
Signed-off-by: Alexander Sverdlin 
---
 arch/mips/kernel/setup.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 0d42532..f6cf2f6 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -504,6 +504,12 @@ static void __init check_kernel_sections_mem(void)
if (!memblock_is_region_memory(start, size)) {
pr_info("Kernel sections are not in the memory maps\n");
memblock_add(start, size);
+   /*
+* Octeon bootloader places shared data structure right after
+* the kernel => make sure it will not be corrupted.
+*/
+   memblock_reserve(__pa_symbol(&_end),
+start + size - __pa_symbol(&_end));
}
 }
 
-- 
2.10.2



[PATCH] mtd: spi-nor: Don't copy self-pointing struct around

2020-10-05 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

spi_nor_parse_sfdp() modifies the passed structure so that it points to
itself (params.erase_map.regions to params.erase_map.uniform_region). This
makes it impossible to copy the local struct anywhere else.

Therefore only use memcpy() in backup-restore scenario. The bug may show up
like below:

BUG: unable to handle page fault for address: c9b377f8
Oops:  [#1] PREEMPT SMP NOPTI
CPU: 4 PID: 3500 Comm: flashcp Tainted: G   O  5.4.53-... #1
...
RIP: 0010:spi_nor_erase+0x8e/0x5c0
Code: 64 24 18 89 db 4d 8b b5 d0 04 00 00 4c 89 64 24 18 4c 89 64 24 20 eb 12 
a8 10 0f 85 59 02 00 00 49 83 c6 10 0f 84 4f 02 00 00 <49> 8b 06 48 89 c2 48 83 
e2 c0 48 89 d1 49 03 4e 08 48 39 cb 73 d8
RSP: 0018:c9000217fc48 EFLAGS: 00010206
RAX: 0074 RBX:  RCX: 0074
RDX: 8884550c9980 RSI: 88844f9c0bc0 RDI: 88844ede7bb8
RBP: 0074 R08: 815bfbe0 R09: 88844f9c0bc0
R10:  R11:  R12: c9000217fc60
R13: 88844ede7818 R14: c9b377f8 R15: 
FS:  7f4699780500() GS:88846ff0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: c9b377f8 CR3: 0004538ee000 CR4: 00340fe0
Call Trace:
 part_erase+0x27/0x50
 mtdchar_ioctl+0x831/0xba0
 ? filemap_map_pages+0x186/0x3d0
 ? do_filp_open+0xad/0x110
 ? _copy_to_user+0x22/0x30
 ? cp_new_stat+0x150/0x180
 mtdchar_unlocked_ioctl+0x2a/0x40
 do_vfs_ioctl+0xa0/0x630
 ? __do_sys_newfstat+0x3c/0x60
 ksys_ioctl+0x70/0x80
 __x64_sys_ioctl+0x16/0x20
 do_syscall_64+0x6a/0x200
 ? prepare_exit_to_usermode+0x50/0xd0
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7f46996b6817

Fixes: 1c1d8d98e1c7 ("mtd: spi-nor: Split spi_nor_init_params()")
Cc: sta...@vger.kernel.org
Tested-by: Baurzhan Ismagulov 
Co-developed-by: Matija Glavinic Pecotic 
Signed-off-by: Matija Glavinic Pecotic 
Signed-off-by: Alexander Sverdlin 
---
 drivers/mtd/spi-nor/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index 2add4a0..cce0670 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -2701,11 +2701,10 @@ static void spi_nor_sfdp_init_params(struct spi_nor 
*nor)
 
memcpy(_params, nor->params, sizeof(sfdp_params));
 
-   if (spi_nor_parse_sfdp(nor, _params)) {
+   if (spi_nor_parse_sfdp(nor, nor->params)) {
+   memcpy(nor->params, _params, sizeof(*nor->params));
nor->addr_width = 0;
nor->flags &= ~SNOR_F_4B_OPCODES;
-   } else {
-   memcpy(nor->params, _params, sizeof(*nor->params));
}
 }
 
-- 
2.10.2



[PATCH] net: octeon: mgmt: Repair filling of RX ring

2020-05-29 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

The removal of mips_swiotlb_ops exposed a problem in octeon_mgmt Ethernet
driver. mips_swiotlb_ops had an mb() after most of the operations and the
removal of the ops had broken the receive functionality of the driver.
My code inspection has shown no other places except
octeon_mgmt_rx_fill_ring() where an explicit barrier would be obviously
missing. The latter function however has to make sure that "ringing the
bell" doesn't happen before RX ring entry is really written.

The patch has been successfully tested on Octeon II.

Fixes: a33db9ed ("MIPS: remove mips_swiotlb_ops")
Cc: sta...@vger.kernel.org
Signed-off-by: Alexander Sverdlin 
---
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c 
b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 07b960e..79c110a 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -235,6 +235,11 @@ static void octeon_mgmt_rx_fill_ring(struct net_device 
*netdev)
 
/* Put it in the ring.  */
p->rx_ring[p->rx_next_fill] = re.d64;
+   /* Make sure there is no reorder of filling the ring and ringing
+* the bell
+*/
+   wmb();
+
dma_sync_single_for_device(p->dev, p->rx_ring_handle,
   
ring_size_to_bytes(OCTEON_MGMT_RX_RING_SIZE),
   DMA_BIDIRECTIONAL);
-- 
2.10.2



[PATCH] macvlan: Skip loopback packets in RX handler

2020-05-26 Thread Alexander A Sverdlin
From: Alexander Sverdlin 

Ignore loopback-originatig packets soon enough and don't try to process L2
header where it doesn't exist. The very similar br_handle_frame() in bridge
code performs exactly the same check.

This is an example of such ICMPv6 packet:

skb len=96 headroom=40 headlen=96 tailroom=56
mac=(40,0) net=(40,40) trans=80
shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0))
csum(0xae2e9a2f ip_summed=1 complete_sw=0 valid=0 level=0)
hash(0xc97ebd88 sw=1 l4=1) proto=0x86dd pkttype=5 iif=24
dev name=etha01.212 feat=0x0x40005000
skb headroom: : 00 7c 86 52 84 88 ff ff 00 00 00 00 00 00 08 00
skb headroom: 0010: 45 00 00 9e 5d 5c 40 00 40 11 33 33 00 00 00 01
skb headroom: 0020: 02 40 43 80 00 00 86 dd
skb linear:   : 60 09 88 bd 00 38 3a ff fe 80 00 00 00 00 00 00
skb linear:   0010: 00 40 43 ff fe 80 00 00 ff 02 00 00 00 00 00 00
skb linear:   0020: 00 00 00 00 00 00 00 01 86 00 61 00 40 00 00 2d
skb linear:   0030: 00 00 00 00 00 00 00 00 03 04 40 e0 00 00 01 2c
skb linear:   0040: 00 00 00 78 00 00 00 00 fd 5f 42 68 23 87 a8 81
skb linear:   0050: 00 00 00 00 00 00 00 00 01 01 02 40 43 80 00 00
skb tailroom: : ...
skb tailroom: 0010: ...
skb tailroom: 0020: ...
skb tailroom: 0030: ...

Call Trace, how it happens exactly:
 ...
 macvlan_handle_frame+0x321/0x425 [macvlan]
 ? macvlan_forward_source+0x110/0x110 [macvlan]
 __netif_receive_skb_core+0x545/0xda0
 ? enqueue_task_fair+0xe5/0x8e0
 ? __netif_receive_skb_one_core+0x36/0x70
 __netif_receive_skb_one_core+0x36/0x70
 process_backlog+0x97/0x140
 net_rx_action+0x1eb/0x350
 ? __hrtimer_run_queues+0x136/0x2e0
 __do_softirq+0xe3/0x383
 do_softirq_own_stack+0x2a/0x40
 
 do_softirq.part.4+0x4e/0x50
 netif_rx_ni+0x60/0xd0
 dev_loopback_xmit+0x83/0xf0
 ip6_finish_output2+0x575/0x590 [ipv6]
 ? ip6_cork_release.isra.1+0x64/0x90 [ipv6]
 ? __ip6_make_skb+0x38d/0x680 [ipv6]
 ? ip6_output+0x6c/0x140 [ipv6]
 ip6_output+0x6c/0x140 [ipv6]
 ip6_send_skb+0x1e/0x60 [ipv6]
 rawv6_sendmsg+0xc4b/0xe10 [ipv6]
 ? proc_put_long+0xd0/0xd0
 ? rw_copy_check_uvector+0x4e/0x110
 ? sock_sendmsg+0x36/0x40
 sock_sendmsg+0x36/0x40
 ___sys_sendmsg+0x2b6/0x2d0
 ? proc_dointvec+0x23/0x30
 ? addrconf_sysctl_forward+0x8d/0x250 [ipv6]
 ? dev_forward_change+0x130/0x130 [ipv6]
 ? _raw_spin_unlock+0x12/0x30
 ? proc_sys_call_handler.isra.14+0x9f/0x110
 ? __call_rcu+0x213/0x510
 ? get_max_files+0x10/0x10
 ? trace_hardirqs_on+0x2c/0xe0
 ? __sys_sendmsg+0x63/0xa0
 __sys_sendmsg+0x63/0xa0
 do_syscall_64+0x6c/0x1e0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Alexander Sverdlin 
---
 drivers/net/macvlan.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e7289d6..7cea2fa 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -447,6 +447,10 @@ static rx_handler_result_t macvlan_handle_frame(struct 
sk_buff **pskb)
int ret;
rx_handler_result_t handle_res;
 
+   /* Packets from dev_loopback_xmit() do not have L2 header, bail out */
+   if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+   return RX_HANDLER_PASS;
+
port = macvlan_port_get_rcu(skb->dev);
if (is_multicast_ether_addr(eth->h_dest)) {
unsigned int hash;
-- 
2.10.2