[PATCH] selftests/powerpc: Add a test of wild bctr

2018-07-23 Thread Michael Ellerman
This tests that a bctr (Branch to counter and link), ie. a function
call, to a wildly out-of-bounds address is handled correctly.

Some old kernel versions didn't handle it correctly, see eg:

  "powerpc/slb: Force a full SLB flush when we insert for a bad EA"
  https://lists.ozlabs.org/pipermail/linuxppc-dev/2017-April/157397.html

Signed-off-by: Michael Ellerman 
---
 tools/testing/selftests/powerpc/include/reg.h  |   1 +
 tools/testing/selftests/powerpc/mm/.gitignore  |   3 +-
 tools/testing/selftests/powerpc/mm/Makefile|   2 +-
 tools/testing/selftests/powerpc/mm/wild_bctr.c | 153 +
 4 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/wild_bctr.c

diff --git a/tools/testing/selftests/powerpc/include/reg.h 
b/tools/testing/selftests/powerpc/include/reg.h
index 7f348c059bc2..52b4710469d2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -17,6 +17,7 @@
: "memory")
 
 #define mb()   asm volatile("sync" : : : "memory");
+#define barrier()  asm volatile("" : : : "memory");
 
 #define SPRN_MMCR2 769
 #define SPRN_MMCRA 770
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore 
b/tools/testing/selftests/powerpc/mm/.gitignore
index 7d7c42ed6de9..ba919308fe30 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,4 +2,5 @@ hugetlb_vs_thp_test
 subpage_prot
 tempfile
 prot_sao
-segv_errors
\ No newline at end of file
+segv_errors
+wild_bctr
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile 
b/tools/testing/selftests/powerpc/mm/Makefile
index 8ebbe96d80a8..893ad41460af 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
 noarg:
$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors 
wild_bctr
 TEST_GEN_FILES := tempfile
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c 
b/tools/testing/selftests/powerpc/mm/wild_bctr.c
new file mode 100644
index ..f8a2901dc44e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+
+#define BAD_NIP0x788c545a1800ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+   struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+   memcpy(_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+   save_regs(ctxt_v);
+   longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+   save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+   printf("Everything is OK in here.\n");
+   return 0;
+}
+
+#define REG_POISON 0x5a5aUL
+#define POISONED_REG(n)((REG_POISON << 48) | ((n) << 32) | (REG_POISON 
<< 16) | (n))
+
+static inline void poison_regs(void)
+{
+   #define POISON_REG(n)   \
+ "lis  " __stringify(n) "," __stringify(REG_POISON) ";" \
+ "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+ "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+ "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) 
";" \
+ "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+   asm (POISON_REG(15)
+POISON_REG(16)
+POISON_REG(17)
+POISON_REG(18)
+POISON_REG(19)
+POISON_REG(20)
+POISON_REG(21)
+POISON_REG(22)
+POISON_REG(23)
+POISON_REG(24)
+POISON_REG(25)
+POISON_REG(26)
+POISON_REG(27)
+POISON_REG(28)
+POISON_REG(29)
+: // inputs
+: // outputs
+: "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+  "26", "27", "28", "29"
+   );
+   #undef POISON_REG
+}
+
+static int check_regs(void)
+{
+   unsigned long i;
+
+   for (i = 15; i <= 29; i++)
+   FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+   printf("Regs OK\n");
+   return 0;
+}
+
+static void dump_regs(void)
+{
+   for (int i = 0; i < 32; i += 4) {
+   printf("r%02d 0x%016lx  r%02d 0x%016lx  " \
+  "r%02d 0x%016lx  r%02d 0x%016lx\n",
+  i, signal_regs.gpr[i],
+  i+1, signal_regs.gpr[i+1],
+   

[PATCH kernel for v4.14 and v4.17 stable] KVM: PPC: Check if IOMMU page is contained in the pinned physical page

2018-07-23 Thread Alexey Kardashevskiy
A VM which has:
 - a DMA capable device passed through to it (eg. network card);
 - running a malicious kernel that ignores H_PUT_TCE failure;
 - capability of using IOMMU pages bigger that physical pages
can create an IOMMU mapping that exposes (for example) 16MB of
the host physical memory to the device when only 64K was allocated to the VM.

The remaining 16MB - 64K will be some other content of host memory, possibly
including pages of the VM, but also pages of host kernel memory, host
programs or other VMs.

The attacking VM does not control the location of the page it can map,
and is only allowed to map as many pages as it has pages of RAM.

We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that
an IOMMU page is contained in the physical page so the PCI hardware won't
get access to unassigned host memory; however this check is missing in
the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and
did not hit this yet as the very first time when the mapping happens
we do not have tbl::it_userspace allocated yet and fall back to
the userspace which in turn calls VFIO IOMMU driver, this fails and
the guest does not retry,

This stores the smallest preregistered page size in the preregistered
region descriptor and changes the mm_iommu_xxx API to check this against
the IOMMU page size.

This calculates maximum page size as a minimum of the natural region
alignment and compound page size. For the page shift this uses the shift
returned by find_linux_pte() which indicates how the page is mapped to
the current userspace - if the page is huge and this is not a zero, then
it is a leaf pte and the page is mapped within the range.

Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO")
Cc: sta...@vger.kernel.org # v4.12+
Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
Signed-off-by: Michael Ellerman 
(cherry picked from commit 76fa4975f3ed12d15762bc979ca44078598ed8ee)
Signed-off-by: Alexey Kardashevskiy 
---

The original patch did not apply because of fad953ce which fixed
all vmalloc's to use array_size() so the backport is pretty trivial
and applies to v4.17 stable as well.


---
 arch/powerpc/include/asm/mmu_context.h |  4 ++--
 arch/powerpc/kvm/book3s_64_vio.c   |  2 +-
 arch/powerpc/kvm/book3s_64_vio_hv.c|  6 --
 arch/powerpc/mm/mmu_context_iommu.c| 37 --
 drivers/vfio/vfio_iommu_spapr_tce.c|  2 +-
 5 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h 
b/arch/powerpc/include/asm/mmu_context.h
index 44fdf47..6f67ff5 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
 extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries);
 extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
-   unsigned long ua, unsigned long *hpa);
+   unsigned long ua, unsigned int pageshift, unsigned long *hpa);
 extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
-   unsigned long ua, unsigned long *hpa);
+   unsigned long ua, unsigned int pageshift, unsigned long *hpa);
 extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
 extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
 #endif
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 4dffa61..e14cec6 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -433,7 +433,7 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct 
iommu_table *tbl,
/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
return H_TOO_HARD;
 
-   if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, )))
+   if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, )))
return H_HARDWARE;
 
if (mm_iommu_mapped_inc(mem))
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c 
b/arch/powerpc/kvm/book3s_64_vio_hv.c
index c32e9bfe..648cf6c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -262,7 +262,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct 
iommu_table *tbl,
if (!mem)
return H_TOO_HARD;
 
-   if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, )))
+   if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
+   )))
return H_HARDWARE;
 
pua = (void *) vmalloc_to_phys(pua);
@@ -431,7 +432,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
if (mem)
-   prereg = mm_iommu_ua_to_hpa_rm(mem, ua, ) == 0;
+ 

Re: [PATCH net-next] wan/fsl_ucc_hdlc: use IS_ERR_VALUE() to check return value of qe_muram_alloc

2018-07-23 Thread David Miller
From: YueHaibing 
Date: Mon, 23 Jul 2018 22:12:33 +0800

> qe_muram_alloc return a unsigned long integer,which should not
> compared with zero. check it using IS_ERR_VALUE() to fix this.
> 
> Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC")
> Signed-off-by: YueHaibing 

Applied, thanks.


Re: [PATCH v07 2/9] hotplug/cpu: Add operation queuing function

2018-07-23 Thread Nathan Fontenot

On 07/13/2018 03:18 PM, Michael Bringmann wrote:

migration/dlpar: This patch adds function dlpar_queue_action()
which will queued up information about a CPU/Memory 'readd'
operation according to resource type, action code, and DRC index.
At a subsequent point, the list of operations can be run/played
in series.  Examples of such oprations include 'readd' of CPU
and Memory blocks identified as having changed their associativity
during an LPAR migration event. >
Signed-off-by: Michael Bringmann 
---
Changes in patch:
   -- Correct drc_index before adding to pseries_hp_errorlog struct
   -- Correct text of notice
   -- Revise queuing model to save up all of the DLPAR actions for
  later execution.
   -- Restore list init statement missing from patch
   -- Move call to apply queued operations into 'mobility.c'
   -- Compress some code
   -- Rename some of queueing function APIs
   -- Revise implementation to push execution of queued operations
  to a workqueue task.
   -- Cleanup reference to outdated queuing operation.
---
  arch/powerpc/include/asm/rtas.h   |2 +
  arch/powerpc/platforms/pseries/dlpar.c|   61 +
  arch/powerpc/platforms/pseries/mobility.c |4 ++
  arch/powerpc/platforms/pseries/pseries.h  |2 +
  4 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c..4f601c7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
struct { __be32 count, index; } ic;
chardrc_name[1];
} _drc_u;
+   struct list_head list;
  };

  #define PSERIES_HP_ELOG_RESOURCE_CPU  1
  #define PSERIES_HP_ELOG_RESOURCE_MEM  2
  #define PSERIES_HP_ELOG_RESOURCE_SLOT 3
  #define PSERIES_HP_ELOG_RESOURCE_PHB  4
+#define PSERIES_HP_ELOG_RESOURCE_PMT   5

  #define PSERIES_HP_ELOG_ACTION_ADD1
  #define PSERIES_HP_ELOG_ACTION_REMOVE 2
diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c0..7264b8e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,6 +25,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 

  static struct workqueue_struct *pseries_hp_wq;
@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
return 0;
  }

+static int dlpar_pmt(struct pseries_hp_errorlog *work);
+
  static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
  {
int rc;
@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog 
*hp_elog)
case PSERIES_HP_ELOG_RESOURCE_CPU:
rc = dlpar_cpu(hp_elog);
break;
+   case PSERIES_HP_ELOG_RESOURCE_PMT:
+   rc = dlpar_pmt(hp_elog);
+   break;
default:
pr_warn_ratelimited("Invalid resource (%d) specified\n",
hp_elog->resource);
@@ -407,6 +413,61 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog,
}
  }

+LIST_HEAD(dlpar_delayed_list);
+
+int dlpar_queue_action(int resource, int action, u32 drc_index)
+{
+   struct pseries_hp_errorlog *hp_errlog;
+
+   hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
+   if (!hp_errlog)
+   return -ENOMEM;
+
+   hp_errlog->resource = resource;
+   hp_errlog->action = action;
+   hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+   hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
+
+   list_add_tail(_errlog->list, _delayed_list);
+
+   return 0;
+}
+
+static int dlpar_pmt(struct pseries_hp_errorlog *work)
+{
+   struct list_head *pos, *q;
+
+   ssleep(15);
+
+   list_for_each_safe(pos, q, _delayed_list) {
+   struct pseries_hp_errorlog *tmp;
+
+   tmp = list_entry(pos, struct pseries_hp_errorlog, list);
+   handle_dlpar_errorlog(tmp);
+
+   list_del(pos);
+   kfree(tmp);
+
+   ssleep(10);
+   }
+
+   return 0;
+}
+
+int dlpar_queued_actions_run(void)
+{
+   if (!list_empty(_delayed_list)) {
+   struct pseries_hp_errorlog hp_errlog;
+
+   hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
+   hp_errlog.action = 0;
+   hp_errlog.id_type = 0;
+
+   queue_hotplug_event(_errlog, 0, 0); > +   }
+   return 0;
+}


I'm a bit confused by this. Is there a reason this needs to queue a
hotplug event instead of just walking the list as is done in dlpar_pmt?

-Nathan


+
  static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog 
*hp_elog)
  {
char *arg;
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index f6364d9..d0d1cae 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ 

Re: [PATCH v07 1/9] hotplug/cpu: Conditionally acquire/release DRC index

2018-07-23 Thread Nathan Fontenot

On 07/13/2018 03:17 PM, Michael Bringmann wrote:

powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
skipping of DRC index acquire or release operations during the CPU
add or remove operations.  This is intended to support subsequent
changes to provide a 'CPU readd' operation.

Signed-off-by: Michael Bringmann 
---
Changes in patch:
   -- Move new validity check added to pseries_smp_notifier
  to another patch
   -- Revise one of checks for 'acquire_drc' in dlpar_cpu_add.
   -- Revise one of checks for 'release_drc' in dlpar_cpu_remove.
---
  arch/powerpc/platforms/pseries/hotplug-cpu.c |   71 +++---
  1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77ca..7ede3b0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -432,7 +432,7 @@ static bool valid_cpu_drc_index(struct device_node *parent, 
u32 drc_index)
return found;
  }

-static ssize_t dlpar_cpu_add(u32 drc_index)
+static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
  {
struct device_node *dn, *parent;
int rc, saved_rc;
@@ -457,19 +457,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return -EINVAL;
}

-   rc = dlpar_acquire_drc(drc_index);
-   if (rc) {
-   pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
-   rc, drc_index);
-   of_node_put(parent);
-   return -EINVAL;
+   if (acquire_drc) {
+   rc = dlpar_acquire_drc(drc_index);
+   if (rc) {
+   pr_warn("Failed to acquire DRC, rc: %d, drc index: 
%x\n",
+   rc, drc_index);
+   of_node_put(parent);
+   return -EINVAL;
+   }
}

dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
if (!dn) {
pr_warn("Failed call to configure-connector, drc index: %x\n",
drc_index);
-   dlpar_release_drc(drc_index);
+   if (acquire_drc)
+   dlpar_release_drc(drc_index);
of_node_put(parent);
return -EINVAL;
}
@@ -484,9 +487,11 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
dn->name, rc, drc_index);

-   rc = dlpar_release_drc(drc_index);
-   if (!rc)
-   dlpar_free_cc_nodes(dn);
+   if (acquire_drc) {
+   rc = dlpar_release_drc(drc_index);
+   if (!rc)
+   dlpar_free_cc_nodes(dn);
+   }

return saved_rc;
}
@@ -498,7 +503,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
dn->name, rc, drc_index);

rc = dlpar_detach_node(dn);
-   if (!rc)
+   if (!rc && acquire_drc)
dlpar_release_drc(drc_index);

return saved_rc;
@@ -566,7 +571,8 @@ static int dlpar_offline_cpu(struct device_node *dn)

  }

-static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
+   bool release_drc)
  {
int rc;

@@ -579,12 +585,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, 
u32 drc_index)
return -EINVAL;
}

-   rc = dlpar_release_drc(drc_index);
-   if (rc) {
-   pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
-   drc_index, dn->name, rc);
-   dlpar_online_cpu(dn);
-   return rc;
+   if (release_drc) {
+   rc = dlpar_release_drc(drc_index);
+   if (rc) {
+   pr_warn("Failed to release drc (%x) for CPU %s, rc: 
%d\n",
+   drc_index, dn->name, rc);
+   dlpar_online_cpu(dn);
+   return rc;
+   }
}

rc = dlpar_detach_node(dn);
@@ -593,8 +601,9 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 
drc_index)

pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);

-   rc = dlpar_acquire_drc(drc_index);
-   if (!rc)
+   if (release_drc)
+   rc = dlpar_acquire_drc(drc_index);
+   if (!release_drc || !rc)
dlpar_online_cpu(dn);


This is likely wrong. At this point you're in a if (rc) so rc is already
non-zero. If release_drc is false this checks an invalid rc state.

-Nathan



return saved_rc;
@@ -622,7 +631,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 

Re: [PATCH v4 00/11] hugetlb: Factorize hugetlb architecture primitives

2018-07-23 Thread Alex Ghiti

Ok will do and report when done.

Thanks for your feedback,

Alex

On 07/23/2018 02:00 PM, Michael Ellerman wrote:

Alex Ghiti  writes:


Does anyone have any suggestion about those patches ?

Cross compiling it for some non-x86 arches would be a good start :)

There are cross compilers available here:

   https://mirrors.edge.kernel.org/pub/tools/crosstool/


cheers


On 07/09/2018 02:16 PM, Michal Hocko wrote:

[CC hugetlb guys - http://lkml.kernel.org/r/20180705110716.3919-1-a...@ghiti.fr]

On Thu 05-07-18 11:07:05, Alexandre Ghiti wrote:

In order to reduce copy/paste of functions across architectures and then
make riscv hugetlb port (and future ports) simpler and smaller, this
patchset intends to factorize the numerous hugetlb primitives that are
defined across all the architectures.

Except for prepare_hugepage_range, this patchset moves the versions that
are just pass-through to standard pte primitives into
asm-generic/hugetlb.h by using the same #ifdef semantic that can be
found in asm-generic/pgtable.h, i.e. __HAVE_ARCH_***.

s390 architecture has not been tackled in this serie since it does not
use asm-generic/hugetlb.h at all.
powerpc could be factorized a bit more (cf huge_ptep_set_wrprotect).

This patchset has been compiled on x86 only.

Changelog:

v4:
Fix powerpc build error due to misplacing of #include
 outside of #ifdef CONFIG_HUGETLB_PAGE, as
pointed by Christophe Leroy.

v1, v2, v3:
Same version, just problems with email provider and misuse of
--batch-size option of git send-email

Alexandre Ghiti (11):
hugetlb: Harmonize hugetlb.h arch specific defines with pgtable.h
hugetlb: Introduce generic version of hugetlb_free_pgd_range
hugetlb: Introduce generic version of set_huge_pte_at
hugetlb: Introduce generic version of huge_ptep_get_and_clear
hugetlb: Introduce generic version of huge_ptep_clear_flush
hugetlb: Introduce generic version of huge_pte_none
hugetlb: Introduce generic version of huge_pte_wrprotect
hugetlb: Introduce generic version of prepare_hugepage_range
hugetlb: Introduce generic version of huge_ptep_set_wrprotect
hugetlb: Introduce generic version of huge_ptep_set_access_flags
hugetlb: Introduce generic version of huge_ptep_get

   arch/arm/include/asm/hugetlb-3level.h| 32 +-
   arch/arm/include/asm/hugetlb.h   | 33 +--
   arch/arm64/include/asm/hugetlb.h | 39 +++-
   arch/ia64/include/asm/hugetlb.h  | 47 ++-
   arch/mips/include/asm/hugetlb.h  | 40 +++--
   arch/parisc/include/asm/hugetlb.h| 33 +++
   arch/powerpc/include/asm/book3s/32/pgtable.h |  2 +
   arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
   arch/powerpc/include/asm/hugetlb.h   | 43 ++
   arch/powerpc/include/asm/nohash/32/pgtable.h |  2 +
   arch/powerpc/include/asm/nohash/64/pgtable.h |  1 +
   arch/sh/include/asm/hugetlb.h| 54 ++---
   arch/sparc/include/asm/hugetlb.h | 40 +++--
   arch/x86/include/asm/hugetlb.h   | 72 +--
   include/asm-generic/hugetlb.h| 88 
+++-
   15 files changed, 143 insertions(+), 384 deletions(-)

--
2.16.2




[PATCH] net: ethernet: fs-enet: Use generic CRC32 implementation

2018-07-23 Thread Krzysztof Kozlowski
Use generic kernel CRC32 implementation because it:
1. Should be faster (uses lookup tables),
2. Removes duplicated CRC generation code,
3. Uses well-proven algorithm instead of coding it one more time.

Suggested-by: Eric Biggers 
Signed-off-by: Krzysztof Kozlowski 

---

Not tested on hardware.
---
 drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c 
b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index 1fc27c97e3b2..99fe2c210d0f 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -176,21 +177,10 @@ static void set_multicast_start(struct net_device *dev)
 static void set_multicast_one(struct net_device *dev, const u8 *mac)
 {
struct fs_enet_private *fep = netdev_priv(dev);
-   int temp, hash_index, i, j;
+   int temp, hash_index;
u32 crc, csrVal;
-   u8 byte, msb;
-
-   crc = 0x;
-   for (i = 0; i < 6; i++) {
-   byte = mac[i];
-   for (j = 0; j < 8; j++) {
-   msb = crc >> 31;
-   crc <<= 1;
-   if (msb ^ (byte & 0x1))
-   crc ^= FEC_CRC_POLY;
-   byte >>= 1;
-   }
-   }
+
+   crc = ether_crc(6, mac);
 
temp = (crc & 0x3f) >> 1;
hash_index = ((temp & 0x01) << 4) |
-- 
2.14.1



Re: [PATCH v07 2/9] hotplug/cpu: Add operation queuing function

2018-07-23 Thread John Allen

On Fri, Jul 13, 2018 at 03:18:01PM -0500, Michael Bringmann wrote:

migration/dlpar: This patch adds function dlpar_queue_action()
which will queued up information about a CPU/Memory 'readd'
operation according to resource type, action code, and DRC index.
At a subsequent point, the list of operations can be run/played
in series.  Examples of such oprations include 'readd' of CPU
and Memory blocks identified as having changed their associativity
during an LPAR migration event.

Signed-off-by: Michael Bringmann 
---
Changes in patch:
 -- Correct drc_index before adding to pseries_hp_errorlog struct
 -- Correct text of notice
 -- Revise queuing model to save up all of the DLPAR actions for
later execution.
 -- Restore list init statement missing from patch
 -- Move call to apply queued operations into 'mobility.c'
 -- Compress some code
 -- Rename some of queueing function APIs
 -- Revise implementation to push execution of queued operations
to a workqueue task.
 -- Cleanup reference to outdated queuing operation.
---
arch/powerpc/include/asm/rtas.h   |2 +
arch/powerpc/platforms/pseries/dlpar.c|   61 +
arch/powerpc/platforms/pseries/mobility.c |4 ++
arch/powerpc/platforms/pseries/pseries.h  |2 +
4 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c..4f601c7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
struct { __be32 count, index; } ic;
chardrc_name[1];
} _drc_u;
+   struct list_head list;
};

#define PSERIES_HP_ELOG_RESOURCE_CPU1
#define PSERIES_HP_ELOG_RESOURCE_MEM2
#define PSERIES_HP_ELOG_RESOURCE_SLOT   3
#define PSERIES_HP_ELOG_RESOURCE_PHB4
+#define PSERIES_HP_ELOG_RESOURCE_PMT   5

#define PSERIES_HP_ELOG_ACTION_ADD  1
#define PSERIES_HP_ELOG_ACTION_REMOVE   2
diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c0..7264b8e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,6 +25,7 @@
#include 
#include 
#include 
+#include 
#include 

static struct workqueue_struct *pseries_hp_wq;
@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
return 0;
}

+static int dlpar_pmt(struct pseries_hp_errorlog *work);
+
static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
{
int rc;
@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog 
*hp_elog)
case PSERIES_HP_ELOG_RESOURCE_CPU:
rc = dlpar_cpu(hp_elog);
break;
+   case PSERIES_HP_ELOG_RESOURCE_PMT:
+   rc = dlpar_pmt(hp_elog);
+   break;
default:
pr_warn_ratelimited("Invalid resource (%d) specified\n",
hp_elog->resource);
@@ -407,6 +413,61 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog,
}
}

+LIST_HEAD(dlpar_delayed_list);
+
+int dlpar_queue_action(int resource, int action, u32 drc_index)
+{
+   struct pseries_hp_errorlog *hp_errlog;
+
+   hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
+   if (!hp_errlog)
+   return -ENOMEM;
+
+   hp_errlog->resource = resource;
+   hp_errlog->action = action;
+   hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+   hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
+
+   list_add_tail(_errlog->list, _delayed_list);
+
+   return 0;
+}
+
+static int dlpar_pmt(struct pseries_hp_errorlog *work)
+{
+   struct list_head *pos, *q;
+
+   ssleep(15);


Why do we need to sleep for so long here?

-John


+
+   list_for_each_safe(pos, q, _delayed_list) {
+   struct pseries_hp_errorlog *tmp;
+
+   tmp = list_entry(pos, struct pseries_hp_errorlog, list);
+   handle_dlpar_errorlog(tmp);
+
+   list_del(pos);
+   kfree(tmp);
+
+   ssleep(10);
+   }
+
+   return 0;
+}
+
+int dlpar_queued_actions_run(void)
+{
+   if (!list_empty(_delayed_list)) {
+   struct pseries_hp_errorlog hp_errlog;
+
+   hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
+   hp_errlog.action = 0;
+   hp_errlog.id_type = 0;
+
+   queue_hotplug_event(_errlog, 0, 0);
+   }
+   return 0;
+}
+
static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
{
char *arg;
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index f6364d9..d0d1cae 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -378,6 +378,10 @@ static ssize_t migration_store(struct class *class,
return rc;

post_mobility_fixup();
+

Re: [PATCH v2 2/2] powerpc/pseries: Wait for completion of hotplug events during PRRN handling

2018-07-23 Thread John Allen

On Mon, Jul 23, 2018 at 11:41:24PM +1000, Michael Ellerman wrote:

John Allen  writes:


While handling PRRN events, the time to handle the actual hotplug events
dwarfs the time it takes to perform the device tree updates and queue the
hotplug events. In the case that PRRN events are being queued continuously,
hotplug events have been observed to be queued faster than the kernel can
actually handle them. This patch avoids the problem by waiting for a
hotplug request to complete before queueing more hotplug events.


So do we need the hotplug work queue at all? Can we just call
handle_dlpar_errorlog() directly?

Or are we using the work queue to serialise things? And if so would a
mutex be better?


Right, the workqueue is meant to serialize all hotplug events and it 
gets used for more than just PRRN events. I believe the motivation for 
using the workqueue over a mutex is that KVM guests initiate hotplug 
events through the hotplug interrupt and can queue fairly large requests 
meaning that in this scenario, waiting for a lock would block interrupts
for a while. Using the workqueue allows us to serialize hotplug events 
from different sources in the same way without worrying about the 
context in which the event is generated.




It looks like prrn_update_node() is called via at least, prrn_work_fn()
and post_mobility_fixup().

The latter is called from migration_store(), which seems like it would
be harmless. But also from pseries_suspend_enable_irqs() which I'm less
clear on.


Yeah, that doesn't seem to make sense based on the function name. Odd 
that prrn_update_node is being called from anywhere outside of handling 
PRRN events. Perhaps if other code paths are using the function, it 
needs a more generic name.


-John



cheers


diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 8a8033a249c7..49930848fa78 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -242,6 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 
drc_index)
 static void prrn_update_node(__be32 phandle)
 {
struct pseries_hp_errorlog *hp_elog;
+   struct completion hotplug_done;
struct device_node *dn;

/*
@@ -263,7 +264,9 @@ static void prrn_update_node(__be32 phandle)
hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
hp_elog->_drc_u.drc_index = phandle;

-   queue_hotplug_event(hp_elog, NULL, NULL);
+   init_completion(_done);
+   queue_hotplug_event(hp_elog, _done, NULL);
+   wait_for_completion(_done);

kfree(hp_elog);
 }
--
2.17.1






Re: powerpc/ps3: Set driver coherent_dma_mask

2018-07-23 Thread Michael Ellerman
On Wed, 2018-07-18 at 22:08:33 UTC, Geoff Levand wrote:
> Set the coherent_dma_mask for the PS3 ehci, ohci, and snd devices.
> 
> Silences WARN_ON_ONCE messages emitted by the dma_alloc_attrs() routine.
> 
> Reported-by: Fredrik Noring 
> Signed-off-by: Geoff Levand 
> Acked-by: Greg Kroah-Hartman 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/48e91846865cd61bfdb701eb737de2

cheers


Re: [v2] powerpc/prom_init: remove linux,stdout-package property

2018-07-23 Thread Michael Ellerman
On Wed, 2018-07-18 at 16:15:44 UTC, Murilo Opsfelder Araujo wrote:
> This property was added in 2004 and the only use of it, which was already 
> inside
> `#if 0`, was removed a month later.
> 
> Signed-off-by: Murilo Opsfelder Araujo 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ec9336396a98f61290f45e8bb94245

cheers


Re: powerpc/hugetlbpage: Rmove unhelpful HUGEPD_*_SHIFT macros

2018-07-23 Thread Michael Ellerman
On Tue, 2018-07-17 at 04:24:30 UTC, David Gibson wrote:
> The HUGEPD_*_SHIFT macros are always defined to be PGDIR_SHIFT and
> PUD_SHIFT, and have to have those values to work properly.  They once used
> to have different values, but that was really only because they were used
> to mean different things in different contexts.
> 
> 6fa50483 "powerpc/mm/hugetlb: initialize the pagetable cache correctly for
> hugetlb" removed that double meaning, but left the now useless constants.
> 
> Signed-off-by: David Gibson 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fdf743c5c51d5b45db4dada3910954

cheers


Re: chrp/nvram.c: add MODULE_LICENSE()

2018-07-23 Thread Michael Ellerman
On Sat, 2018-07-14 at 04:27:48 UTC, Randy Dunlap wrote:
> From: Randy Dunlap 
> 
> Add MODULE_LICENSE() to the chrp nvram.c driver to fix the build
> warning message:
> 
> WARNING: modpost: missing MODULE_LICENSE() in 
> arch/powerpc/platforms/chrp/nvram.o
> 
> Signed-off-by: Randy Dunlap 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a8bf9e504a6a2bb058b4f020d4ccc5

cheers


Re: [1/3] selftests/powerpc: Remove Power9 copy_unaligned test

2018-07-23 Thread Michael Ellerman
On Wed, 2018-07-11 at 07:10:15 UTC, Michael Ellerman wrote:
> This is a test of the ISA 3.0 "copy" instruction. That instruction has
> an L field, which if set to 1 specifies that "the instruction
> identifies the beginning of a move group" (pp 858). That's also
> referred to as "copy first" vs "copy".
> 
> In ISA 3.0B the copy instruction does not have an L field, and the
> corresponding bit in the instruction must be set to 1.
> 
> This test is generating a "copy" instruction, not a "copy first", and
> so on Power9 (which implements 3.0B), this results in an illegal
> instruction.
> 
> So just drop the test entirely. We still have copy_first_unaligned to
> test the "copy first" behaviour.
> 
> Signed-off-by: Michael Ellerman 
> Acked-by: Michael Neuling 

Series applied to powerpc next.

https://git.kernel.org/powerpc/c/83039f22ba2f6aff935a2acbb6bf67

cheers


Re: powerpc/8xx: fix handling of early NULL pointer dereference

2018-07-23 Thread Michael Ellerman
On Fri, 2018-07-13 at 13:10:47 UTC, Christophe Leroy wrote:
> NULL pointers are pointers to user memory space. So user pagetable
> has to be set in order to avoid random behaviour in case of NULL
> pointer dereference, otherwise we may encounter random memory
> access hence Machine Check Exception from TLB Miss handlers.
> 
> Set user pagetable as early as possible in order to properly
> catch early kernel NULL pointer dereference.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/8c8c10b90d88bfaad41ea34df17bf6

cheers


Re: cxl: Fix wrong comparison in cxl_adapter_context_get()

2018-07-23 Thread Michael Ellerman
On Wed, 2018-07-04 at 15:28:33 UTC, Vaibhav Jain wrote:
> Function atomic_inc_unless_negative() returns a bool to indicate
> success/failure. However cxl_adapter_context_get() wrongly compares
> the return value against '>=0' which will always be true. The patch
> fixes this comparison to '==0' there by also fixing this compile time
> warning:
> 
>   drivers/misc/cxl/main.c:290 cxl_adapter_context_get()
>   warn: 'atomic_inc_unless_negative(>contexts_num)' is unsigned
> 
> Cc: sta...@vger.kernel.org
> Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists")
> Reported-by: Dan Carpenter 
> Signed-off-by: Vaibhav Jain 
> Acked-by: Andrew Donnellan 
> Acked-by: Frederic Barrat 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ef6cb5f1a048fdf91ccee6d63d2bfa

cheers


Re: powerpc/mpic: Cleanup irq vector accounting

2018-07-23 Thread Michael Ellerman
On Fri, 2018-06-29 at 10:24:32 UTC, Bharat Bhushan wrote:
> Available vector space accounts ipis and timer interrupts
> while spurious vector was not accounted. Also later
> mpic_setup_error_int() escape one more vector, seemingly it
> assumes one spurious vector.
> 
> Signed-off-by: Bharat Bhushan 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fca7bf946e31688edce446fdaa571c

cheers


Re: [2/2] powernv/npu: Add a debugfs setting to change ATSD threshold

2018-07-23 Thread Michael Ellerman
On Tue, 2018-04-17 at 09:11:29 UTC, Alistair Popple wrote:
> The threshold at which it becomes more efficient to coalesce a range of
> ATSDs into a single per-PID ATSD is currently not well understood due to a
> lack of real-world work loads. This patch adds a debugfs parameter allowing
> the threshold to be altered at runtime in order to aid future development
> and refinement of the value.
> 
> Signed-off-by: Alistair Popple 
> Acked-by: Balbir Singh 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/99c3ce33a00bc40cb218af770ef00c

cheers


Re: Mark ams driver as orphaned in MAINTAINERS

2018-07-23 Thread Michael Ellerman
On Mon, 2018-01-29 at 22:40:09 UTC, Michael Hanselmann wrote:
> I no longer have any hardware with the Apple motion sensor and thus
> relinquish maintainership of the driver.
> 
> Signed-off-by: Michael Hanselmann 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d69ccc00c497a4d81fca2dca9bda9f

cheers


[PATCH 5/5] powerpc/powernv: Query firmware for count cache flush settings

2018-07-23 Thread Michael Ellerman
Look for fw-features properties to determine the appropriate settings
for the count cache flush, and then call the generic powerpc code to
set it up based on the security feature flags.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/platforms/powernv/setup.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/setup.c 
b/arch/powerpc/platforms/powernv/setup.c
index f96df0a25d05..0988d050becd 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -78,6 +78,12 @@ static void init_fw_feat_flags(struct device_node *np)
if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
 
+   if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
+   security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+   if (fw_feature_is("enabled", 
"needs-count-cache-flush-on-context-switch", np))
+   security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
/*
 * The features below are enabled by default, so we instead look to see
 * if firmware has *disabled* them, and clear them if so.
@@ -125,6 +131,7 @@ static void pnv_setup_rfi_flush(void)
 
setup_rfi_flush(type, enable);
setup_barrier_nospec();
+   setup_count_cache_flush();
 }
 
 static void __init pnv_setup_arch(void)
-- 
2.14.1



[PATCH 4/5] powerpc/pseries: Query hypervisor for count cache flush settings

2018-07-23 Thread Michael Ellerman
Use the existing hypercall to determine the appropriate settings for
the count cache flush, and then call the generic powerpc code to set
it up based on the security feature flags.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/hvcall.h  | 2 ++
 arch/powerpc/platforms/pseries/setup.c | 7 +++
 2 files changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 662c8347d699..a0b17f9f1ea4 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -342,10 +342,12 @@
 #define H_CPU_CHAR_BRANCH_HINTS_HONORED(1ull << 58) // IBM bit 5
 #define H_CPU_CHAR_THREAD_RECONFIG_CTRL(1ull << 57) // IBM bit 6
 #define H_CPU_CHAR_COUNT_CACHE_DISABLED(1ull << 56) // IBM bit 7
+#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST  (1ull << 54) // IBM bit 9
 
 #define H_CPU_BEHAV_FAVOUR_SECURITY(1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR   (1ull << 62) // IBM bit 1
 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR  (1ull << 61) // IBM bit 2
+#define H_CPU_BEHAV_FLUSH_COUNT_CACHE  (1ull << 58) // IBM bit 5
 
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK 0x18
diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index 139f0af6c3d9..04805a79cbda 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -484,6 +484,12 @@ static void init_cpu_char_feature_flags(struct 
h_cpu_char_result *result)
if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
 
+   if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
+   security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+   if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
+   security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
/*
 * The features below are enabled by default, so we instead look to see
 * if firmware has *disabled* them, and clear them if so.
@@ -535,6 +541,7 @@ void pseries_setup_rfi_flush(void)
 
setup_rfi_flush(types, enable);
setup_barrier_nospec();
+   setup_count_cache_flush();
 }
 
 #ifdef CONFIG_PCI_IOV
-- 
2.14.1



[PATCH 3/5] powerpc/64s: Add support for software count cache flush

2018-07-23 Thread Michael Ellerman
Some CPU revisions support a mode where the count cache needs to be
flushed by software on context switch. Additionally some revisions may
have a hardware accelerated flush, in which case the software flush
sequence can be shortened.

If we detect the appropriate flag from firmware we patch a branch
into _switch() which takes us to a count cache flush sequence.

That sequence in turn may be patched to return early if we detect that
the CPU supports accelerating the flush sequence in hardware.

Add debugfs support for reporting the state of the flush, as well as
runtime disabling it.

And modify the spectre_v2 sysfs file to report the state of the
software flush.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/asm-prototypes.h|  6 ++
 arch/powerpc/include/asm/security_features.h |  1 +
 arch/powerpc/kernel/entry_64.S   | 54 
 arch/powerpc/kernel/security.c   | 96 ++--
 4 files changed, 152 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 769567b66c0c..70fdc5b9b9fb 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -143,4 +143,10 @@ struct kvm_vcpu;
 void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 
+/* Patch sites */
+extern s32 patch__call_flush_count_cache;
+extern s32 patch__flush_count_cache_return;
+
+extern long flush_count_cache;
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index a0d47bc18a5c..759597bf0fd8 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -22,6 +22,7 @@ enum stf_barrier_type {
 
 void setup_stf_barrier(void);
 void do_stf_barrier_fixups(enum stf_barrier_type types);
+void setup_count_cache_flush(void);
 
 static inline void security_ftr_set(unsigned long feature)
 {
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0357f87a013c..017cf70f01d7 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -504,6 +505,57 @@ _GLOBAL(ret_from_kernel_thread)
li  r3,0
b   .Lsyscall_exit
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define FLUSH_COUNT_CACHE  \
+1: nop;\
+   patch_site 1b, patch__call_flush_count_cache
+
+
+#define BCCTR_FLUSH.long 0x4c400420
+
+.macro nops number
+   .rept \number
+   nop
+   .endr
+.endm
+
+.balign 32
+.global flush_count_cache
+flush_count_cache:
+   /* Save LR into r9 */
+   mflrr9
+
+   .rept 64
+   bl  .+4
+   .endr
+   b   1f
+   nops6
+
+   .balign 32
+   /* Restore LR */
+1: mtlrr9
+   li  r9,0x7fff
+   mtctr   r9
+
+   BCCTR_FLUSH
+
+2: nop
+   patch_site 2b patch__flush_count_cache_return
+
+   nops3
+
+   .rept 278
+   .balign 32
+   BCCTR_FLUSH
+   nops7
+   .endr
+
+   blr
+#else
+#define FLUSH_COUNT_CACHE
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -535,6 +587,8 @@ _GLOBAL(_switch)
std r23,_CCR(r1)
std r1,KSP(r3)  /* Set old stack pointer */
 
+   FLUSH_COUNT_CACHE
+
/*
 * On SMP kernels, care must be taken because a task may be
 * scheduled off CPUx and on to CPUy. Memory ordering must be
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 4cb8f1f7b593..fa9366b53eb7 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -8,6 +8,8 @@
 #include 
 #include 
 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -15,6 +17,13 @@
 
 unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
+enum count_cache_flush_type {
+   COUNT_CACHE_FLUSH_NONE  = 0x1,
+   COUNT_CACHE_FLUSH_SW= 0x2,
+   COUNT_CACHE_FLUSH_HW= 0x4,
+};
+static enum count_cache_flush_type count_cache_flush_type;
+
 bool barrier_nospec_enabled;
 
 static void enable_barrier_nospec(bool enable)
@@ -147,17 +156,29 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct 
device_attribute *attr, c
bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
 
-   if (bcs || ccd) {
+   if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
+   bool comma = false;
seq_buf_printf(, "Mitigation: ");
 
-   if (bcs)
+   if (bcs) {
seq_buf_printf(, 

[PATCH 2/5] powerpc/64s: Add new security feature flags for count cache flush

2018-07-23 Thread Michael Ellerman
Add security feature flags to indicate the need for software to flush
the count cache on context switch, and for the presence of a hardware
assisted count cache flush.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/security_features.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index 44989b22383c..a0d47bc18a5c 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -59,6 +59,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Indirect branch prediction cache disabled
 #define SEC_FTR_COUNT_CACHE_DISABLED   0x0020ull
 
+// bcctr 2,0,0 triggers a hardware assisted count cache flush
+#define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0800ull
+
 
 // Features indicating need for Spectre/Meltdown mitigations
 
@@ -74,6 +77,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Firmware configuration indicates user favours security over performance
 #define SEC_FTR_FAVOUR_SECURITY0x0200ull
 
+// Software required to flush count cache on context switch
+#define SEC_FTR_FLUSH_COUNT_CACHE  0x0400ull
+
 
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
-- 
2.14.1



[PATCH 1/5] powerpc/asm: Add a patch_site macro & helpers for patching instructions

2018-07-23 Thread Michael Ellerman
Add a macro and some helper C functions for patching single asm
instructions.

The gas macro means we can do something like:

  1:nop
patch_site 1b, patch__foo

Which is less visually distracting than defining a GLOBAL symbol at 1,
and also doesn't pollute the symbol table which can confuse eg. perf.

These are obviously similar to our existing feature sections, but are
not automatically patched based on CPU/MMU features, rather they are
designed to be manually patched by C code at some arbitrary point.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/code-patching-asm.h | 18 ++
 arch/powerpc/include/asm/code-patching.h |  2 ++
 arch/powerpc/lib/code-patching.c | 16 
 3 files changed, 36 insertions(+)
 create mode 100644 arch/powerpc/include/asm/code-patching-asm.h

diff --git a/arch/powerpc/include/asm/code-patching-asm.h 
b/arch/powerpc/include/asm/code-patching-asm.h
new file mode 100644
index ..ed7b1448493a
--- /dev/null
+++ b/arch/powerpc/include/asm/code-patching-asm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_CODE_PATCHING_ASM_H
+#define _ASM_POWERPC_CODE_PATCHING_ASM_H
+
+/* Define a "site" that can be patched */
+.macro patch_site label name
+   .pushsection ".rodata"
+   .balign 4
+   .global \name
+\name:
+   .4byte  \label - .
+   .popsection
+.endm
+
+#endif /* _ASM_POWERPC_CODE_PATCHING_ASM_H */
diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 812535f40124..b2051234ada8 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -32,6 +32,8 @@ unsigned int create_cond_branch(const unsigned int *addr,
 int patch_branch(unsigned int *addr, unsigned long target, int flags);
 int patch_instruction(unsigned int *addr, unsigned int instr);
 int raw_patch_instruction(unsigned int *addr, unsigned int instr);
+int patch_instruction_site(s32 *addr, unsigned int instr);
+int patch_branch_site(s32 *site, unsigned long target, int flags);
 
 int instr_is_relative_branch(unsigned int instr);
 int instr_is_relative_link_branch(unsigned int instr);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index e0d881ab304e..850f3b8f4da5 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -195,6 +195,22 @@ int patch_branch(unsigned int *addr, unsigned long target, 
int flags)
return patch_instruction(addr, create_branch(addr, target, flags));
 }
 
+int patch_branch_site(s32 *site, unsigned long target, int flags)
+{
+   unsigned int *addr;
+
+   addr = (unsigned int *)((unsigned long)site + *site);
+   return patch_instruction(addr, create_branch(addr, target, flags));
+}
+
+int patch_instruction_site(s32 *site, unsigned int instr)
+{
+   unsigned int *addr;
+
+   addr = (unsigned int *)((unsigned long)site + *site);
+   return patch_instruction(addr, instr);
+}
+
 bool is_offset_in_branch_range(long offset)
 {
/*
-- 
2.14.1



Re: [PATCH v2 1/2] powerpc/pseries: Avoid blocking rtas polling handling multiple PRRN events

2018-07-23 Thread John Allen

On Mon, Jul 23, 2018 at 11:27:56PM +1000, Michael Ellerman wrote:

Hi John,

I'm a bit puzzled by this one.

John Allen  writes:

When a PRRN event is being handled and another PRRN event comes in, the
second event will block rtas polling waiting on the first to complete,
preventing any further rtas events from being handled. This can be
especially problematic in case that PRRN events are continuously being
queued in which case rtas polling gets indefinitely blocked completely.

This patch introduces a mutex that prevents any subsequent PRRN events from
running while there is a prrn event being handled, allowing rtas polling to
continue normally.

Signed-off-by: John Allen 
---
v2:
  -Unlock prrn_lock when PRRN operations are complete, not after handler is
   scheduled.
  -Remove call to flush_work, the previous broken method of serializing
   PRRN events.
---
 arch/powerpc/kernel/rtasd.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c33d59d..845fc5aec178 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -284,15 +286,17 @@ static void prrn_work_fn(struct work_struct *work)
 */
pseries_devicetree_update(-prrn_update_scope);
numa_update_cpu_topology(false);
+   mutex_unlock(_lock);
 }

 static DECLARE_WORK(prrn_work, prrn_work_fn);

 static void prrn_schedule_update(u32 scope)
 {
-   flush_work(_work);


This seems like it's actually the core of the change. Previously we were
basically blocking on the flush before continuing.


The idea here is to replace the blocking flush_work with a non-blocking 
mutex. So rather than waiting on the running PRRN event to complete, we 
bail out since a PRRN event is already running. The situation this is 
meant to address is flooding the workqueue with PRRN events, which like 
the situation in patch 2/2, these can be queued up faster than they can 
actually be handled.





-   prrn_update_scope = scope;


I don't really understand the scope. With the old code we always ran the
work function once for call, now we potentially throw away the scope
value (if the try lock fails).


So anytime we actually want to run with the scope (in the event the 
trylock succeeds), we schedule the work with the scope value set 
accordingly as seen in the code below. In the case that we actually 
don't want to run a PRRN event (if one is already running) we do throw 
away the scope and ignore the request entirely.





-   schedule_work(_work);
+   if (mutex_trylock(_lock)) {
+   prrn_update_scope = scope;
+   schedule_work(_work);
+   }


Ignoring the scope, the addition of the mutex should not actually make
any difference. If you see the doco for schedule_work() it says:

* This puts a job in the kernel-global workqueue if it was not already
* queued and leaves it in the same position on the kernel-global
* workqueue otherwise.


So the mutex basically implements that existing behaviour. But maybe the
scope is the issue? Like I said I don't really understand the scope
value.


So I guess I'm wondering if we just need to drop the flush_work() and
the rest is not required?


To sum up the above, the behavior without the mutex is not the same as 
with the mutex. Without the mutex, that means that anytime we get a PRRN 
event, it will get queued on the workqueue which can get flooded if PRRN 
events are queued continuously. With the mutex, only one PRRN event can 
be queued for handling at once.


Hope that clears things up!

-John



cheers





Re: [PATCH v7 4/4] kexec_file: Load kernel at top of system RAM if required

2018-07-23 Thread Michal Hocko
On Thu 19-07-18 23:17:53, Baoquan He wrote:
> Kexec has been a formal feature in our distro, and customers owning
> those kind of very large machine can make use of this feature to speed
> up the reboot process. On uefi machine, the kexec_file loading will
> search place to put kernel under 4G from top to down. As we know, the
> 1st 4G space is DMA32 ZONE, dma, pci mmcfg, bios etc all try to consume
> it. It may have possibility to not be able to find a usable space for
> kernel/initrd. From the top down of the whole memory space, we don't
> have this worry. 

I do not have the full context here but let me note that you should be
careful when doing top-down reservation because you can easily get into
hotplugable memory and break the hotremove usecase. We even warn when
this is done. See memblock_find_in_range_node
-- 
Michal Hocko
SUSE Labs


[PATCH net-next] wan/fsl_ucc_hdlc: use IS_ERR_VALUE() to check return value of qe_muram_alloc

2018-07-23 Thread YueHaibing
qe_muram_alloc return a unsigned long integer,which should not
compared with zero. check it using IS_ERR_VALUE() to fix this.

Fixes: c19b6d246a35 ("drivers/net: support hdlc function for QE-UCC")
Signed-off-by: YueHaibing 
---
 drivers/net/wan/fsl_ucc_hdlc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 9b09c9d..5f0366a 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -192,7 +192,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
priv->ucc_pram_offset = qe_muram_alloc(sizeof(struct ucc_hdlc_param),
ALIGNMENT_OF_UCC_HDLC_PRAM);
 
-   if (priv->ucc_pram_offset < 0) {
+   if (IS_ERR_VALUE(priv->ucc_pram_offset)) {
dev_err(priv->dev, "Can not allocate MURAM for hdlc 
parameter.\n");
ret = -ENOMEM;
goto free_tx_bd;
@@ -230,14 +230,14 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
 
/* Alloc riptr, tiptr */
riptr = qe_muram_alloc(32, 32);
-   if (riptr < 0) {
+   if (IS_ERR_VALUE(riptr)) {
dev_err(priv->dev, "Cannot allocate MURAM mem for Receive 
internal temp data pointer\n");
ret = -ENOMEM;
goto free_tx_skbuff;
}
 
tiptr = qe_muram_alloc(32, 32);
-   if (tiptr < 0) {
+   if (IS_ERR_VALUE(tiptr)) {
dev_err(priv->dev, "Cannot allocate MURAM mem for Transmit 
internal temp data pointer\n");
ret = -ENOMEM;
goto free_riptr;
-- 
2.7.0




Re: [PATCH v4 00/11] hugetlb: Factorize hugetlb architecture primitives

2018-07-23 Thread Michael Ellerman
Alex Ghiti  writes:

> Does anyone have any suggestion about those patches ?

Cross compiling it for some non-x86 arches would be a good start :)

There are cross compilers available here:

  https://mirrors.edge.kernel.org/pub/tools/crosstool/


cheers

> On 07/09/2018 02:16 PM, Michal Hocko wrote:
>> [CC hugetlb guys - 
>> http://lkml.kernel.org/r/20180705110716.3919-1-a...@ghiti.fr]
>>
>> On Thu 05-07-18 11:07:05, Alexandre Ghiti wrote:
>>> In order to reduce copy/paste of functions across architectures and then
>>> make riscv hugetlb port (and future ports) simpler and smaller, this
>>> patchset intends to factorize the numerous hugetlb primitives that are
>>> defined across all the architectures.
>>>
>>> Except for prepare_hugepage_range, this patchset moves the versions that
>>> are just pass-through to standard pte primitives into
>>> asm-generic/hugetlb.h by using the same #ifdef semantic that can be
>>> found in asm-generic/pgtable.h, i.e. __HAVE_ARCH_***.
>>>
>>> s390 architecture has not been tackled in this serie since it does not
>>> use asm-generic/hugetlb.h at all.
>>> powerpc could be factorized a bit more (cf huge_ptep_set_wrprotect).
>>>
>>> This patchset has been compiled on x86 only.
>>>
>>> Changelog:
>>>
>>> v4:
>>>Fix powerpc build error due to misplacing of #include
>>> outside of #ifdef CONFIG_HUGETLB_PAGE, as
>>>pointed by Christophe Leroy.
>>>
>>> v1, v2, v3:
>>>Same version, just problems with email provider and misuse of
>>>--batch-size option of git send-email
>>>
>>> Alexandre Ghiti (11):
>>>hugetlb: Harmonize hugetlb.h arch specific defines with pgtable.h
>>>hugetlb: Introduce generic version of hugetlb_free_pgd_range
>>>hugetlb: Introduce generic version of set_huge_pte_at
>>>hugetlb: Introduce generic version of huge_ptep_get_and_clear
>>>hugetlb: Introduce generic version of huge_ptep_clear_flush
>>>hugetlb: Introduce generic version of huge_pte_none
>>>hugetlb: Introduce generic version of huge_pte_wrprotect
>>>hugetlb: Introduce generic version of prepare_hugepage_range
>>>hugetlb: Introduce generic version of huge_ptep_set_wrprotect
>>>hugetlb: Introduce generic version of huge_ptep_set_access_flags
>>>hugetlb: Introduce generic version of huge_ptep_get
>>>
>>>   arch/arm/include/asm/hugetlb-3level.h| 32 +-
>>>   arch/arm/include/asm/hugetlb.h   | 33 +--
>>>   arch/arm64/include/asm/hugetlb.h | 39 +++-
>>>   arch/ia64/include/asm/hugetlb.h  | 47 ++-
>>>   arch/mips/include/asm/hugetlb.h  | 40 +++--
>>>   arch/parisc/include/asm/hugetlb.h| 33 +++
>>>   arch/powerpc/include/asm/book3s/32/pgtable.h |  2 +
>>>   arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
>>>   arch/powerpc/include/asm/hugetlb.h   | 43 ++
>>>   arch/powerpc/include/asm/nohash/32/pgtable.h |  2 +
>>>   arch/powerpc/include/asm/nohash/64/pgtable.h |  1 +
>>>   arch/sh/include/asm/hugetlb.h| 54 ++---
>>>   arch/sparc/include/asm/hugetlb.h | 40 +++--
>>>   arch/x86/include/asm/hugetlb.h   | 72 +--
>>>   include/asm-generic/hugetlb.h| 88 
>>> +++-
>>>   15 files changed, 143 insertions(+), 384 deletions(-)
>>>
>>> -- 
>>> 2.16.2


Re: [PATCH v7 2/2] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups

2018-07-23 Thread Michael Ellerman
Shilpasri G Bhat  writes:
> diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
> index f829dad..99afbf7 100644
> --- a/drivers/hwmon/ibmpowernv.c
> +++ b/drivers/hwmon/ibmpowernv.c
> @@ -292,12 +344,126 @@ static u32 get_sensor_hwmon_index(struct sensor_data 
> *sdata,
>   return ++sensor_groups[sdata->type].hwmon_index;
>  }
>  
> +static int init_sensor_group_data(struct platform_device *pdev,
> +   struct platform_data *pdata)
> +{
> + struct sensor_group_data *sgrp_data;
> + struct device_node *groups, *sgrp;
> + enum sensors type;
> + int count = 0, ret = 0;
> +
> + groups = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
> + if (!groups)
> + return ret;
> +
> + for_each_child_of_node(groups, sgrp) {
> + type = get_sensor_type(sgrp);
> + if (type != MAX_SENSOR_TYPE)
> + pdata->nr_sensor_groups++;
> + }
> +
> + if (!pdata->nr_sensor_groups)
> + goto out;
> +
> + sgrp_data = devm_kcalloc(>dev, pdata->nr_sensor_groups,
> +  sizeof(*sgrp_data), GFP_KERNEL);
> + if (!sgrp_data) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for_each_child_of_node(groups, sgrp) {
> + const __be32 *phandles;
> + int len, gid;
> +
> + type = get_sensor_type(sgrp);
> + if (type == MAX_SENSOR_TYPE)
> + continue;
> +
> + if (of_property_read_u32(sgrp, "sensor-group-id", ))
> + continue;
> +
> + phandles = of_get_property(sgrp, "sensors", );
> + if (!phandles)
> + continue;

You should be able to use the more modern OF APIs, eg:

rc = of_count_phandle_with_args(sgrp, "sensors", NULL);

> + len /= sizeof(u32);
> + if (!len)
> + continue;

Which would make that check unnecessary.

> + sensor_groups[type].attr_count++;
> + sgrp_data[count].gid = gid;
> + mutex_init(_data[count].mutex);
> + sgrp_data[count++].enable = false;
> + }
> +
> + pdata->sgrp_data = sgrp_data;
> +out:
> + of_node_put(groups);
> + return ret;
> +}
> +
> +static struct sensor_group_data *get_sensor_group(struct platform_data 
> *pdata,
> +   struct device_node *node,
> +   enum sensors gtype)
> +{
> + struct sensor_group_data *sgrp_data = pdata->sgrp_data;
> + struct device_node *groups, *sgrp;
> +
> + groups = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
> + if (!groups)
> + return NULL;
> +
> + for_each_child_of_node(groups, sgrp) {
> + const __be32 *phandles;
> + int len, gid, i;
> + enum sensors type;
> +
> + type = get_sensor_type(sgrp);
> + if (type != gtype)
> + continue;
> +
> + if (of_property_read_u32(sgrp, "sensor-group-id", ))
> + continue;
> +
> + phandles = of_get_property(sgrp, "sensors", );
> + if (!phandles)
> + continue;
> +
> + len /= sizeof(u32);
> + if (!len)
> + continue;
> +
> + while (--len >= 0)
> + if (be32_to_cpu(phandles[len]) == node->phandle)
> + break;

Likewise, here you could use of_for_each_phandle().


cheers


Re: [PATCH v2 2/2] powerpc/pseries: Wait for completion of hotplug events during PRRN handling

2018-07-23 Thread Michael Ellerman
John Allen  writes:

> While handling PRRN events, the time to handle the actual hotplug events
> dwarfs the time it takes to perform the device tree updates and queue the
> hotplug events. In the case that PRRN events are being queued continuously,
> hotplug events have been observed to be queued faster than the kernel can
> actually handle them. This patch avoids the problem by waiting for a
> hotplug request to complete before queueing more hotplug events.

So do we need the hotplug work queue at all? Can we just call
handle_dlpar_errorlog() directly?

Or are we using the work queue to serialise things? And if so would a
mutex be better?

It looks like prrn_update_node() is called via at least, prrn_work_fn()
and post_mobility_fixup().

The latter is called from migration_store(), which seems like it would
be harmless. But also from pseries_suspend_enable_irqs() which I'm less
clear on.

cheers

> diff --git a/arch/powerpc/platforms/pseries/mobility.c 
> b/arch/powerpc/platforms/pseries/mobility.c
> index 8a8033a249c7..49930848fa78 100644
> --- a/arch/powerpc/platforms/pseries/mobility.c
> +++ b/arch/powerpc/platforms/pseries/mobility.c
> @@ -242,6 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 
> drc_index)
>  static void prrn_update_node(__be32 phandle)
>  {
>   struct pseries_hp_errorlog *hp_elog;
> + struct completion hotplug_done;
>   struct device_node *dn;
>  
>   /*
> @@ -263,7 +264,9 @@ static void prrn_update_node(__be32 phandle)
>   hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
>   hp_elog->_drc_u.drc_index = phandle;
>  
> - queue_hotplug_event(hp_elog, NULL, NULL);
> + init_completion(_done);
> + queue_hotplug_event(hp_elog, _done, NULL);
> + wait_for_completion(_done);
>  
>   kfree(hp_elog);
>  }
> -- 
> 2.17.1


Re: [PATCH v2 1/2] powerpc/pseries: Avoid blocking rtas polling handling multiple PRRN events

2018-07-23 Thread Michael Ellerman
Hi John,

I'm a bit puzzled by this one.

John Allen  writes:
> When a PRRN event is being handled and another PRRN event comes in, the
> second event will block rtas polling waiting on the first to complete,
> preventing any further rtas events from being handled. This can be
> especially problematic in case that PRRN events are continuously being
> queued in which case rtas polling gets indefinitely blocked completely.
>
> This patch introduces a mutex that prevents any subsequent PRRN events from
> running while there is a prrn event being handled, allowing rtas polling to
> continue normally.
>
> Signed-off-by: John Allen 
> ---
> v2:
>   -Unlock prrn_lock when PRRN operations are complete, not after handler is
>scheduled.
>   -Remove call to flush_work, the previous broken method of serializing
>PRRN events.
> ---
>  arch/powerpc/kernel/rtasd.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
> index 44d66c33d59d..845fc5aec178 100644
> --- a/arch/powerpc/kernel/rtasd.c
> +++ b/arch/powerpc/kernel/rtasd.c
> @@ -284,15 +286,17 @@ static void prrn_work_fn(struct work_struct *work)
>*/
>   pseries_devicetree_update(-prrn_update_scope);
>   numa_update_cpu_topology(false);
> + mutex_unlock(_lock);
>  }
>  
>  static DECLARE_WORK(prrn_work, prrn_work_fn);
>  
>  static void prrn_schedule_update(u32 scope)
>  {
> - flush_work(_work);

This seems like it's actually the core of the change. Previously we were
basically blocking on the flush before continuing.

> - prrn_update_scope = scope;

I don't really understand the scope. With the old code we always ran the
work function once for call, now we potentially throw away the scope
value (if the try lock fails).

> - schedule_work(_work);
> + if (mutex_trylock(_lock)) {
> + prrn_update_scope = scope;
> + schedule_work(_work);
> + }

Ignoring the scope, the addition of the mutex should not actually make
any difference. If you see the doco for schedule_work() it says:

 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * workqueue otherwise.


So the mutex basically implements that existing behaviour. But maybe the
scope is the issue? Like I said I don't really understand the scope
value.


So I guess I'm wondering if we just need to drop the flush_work() and
the rest is not required?

cheers


Re: [PATCH 4/7] x86,tlb: make lazy TLB mode lazier

2018-07-23 Thread Rik van Riel
On Fri, 2018-07-20 at 10:30 +0200, Peter Zijlstra wrote:
> On Thu, Jul 19, 2018 at 10:04:09AM -0700, Andy Lutomirski wrote:
> > I added some more arch maintainers.  The idea here is that, on x86
> > at
> > least, task->active_mm and all its refcounting is pure
> > overhead.  When
> > a process exits, __mmput() gets called, but the core kernel has a
> > longstanding "optimization" in which other tasks (kernel threads
> > and
> > idle tasks) may have ->active_mm pointing at this mm.  This is
> > nasty,
> > complicated, and hurts performance on large systems, since it
> > requires
> > extra atomic operations whenever a CPU switches between real users
> > threads and idle/kernel threads.
> > 
> > It's also almost completely worthless on x86 at least, since
> > __mmput()
> > frees pagetables, and that operation *already* forces a remote TLB
> > flush, so we might as well zap all the active_mm references at the
> > same time.
> 
> So I disagree that active_mm is complicated (the code is less than
> ideal
> but that is actually fixable). And aside from the process exit case,
> it
> does avoid CR3 writes when switching between user and kernel threads
> (which can be far more often than exit if you have longer running
> tasks).
> 
> Now agreed, recent x86 work has made that less important.
> 
> And I of course also agree that not doing those refcount atomics is
> better.

It might be cleaner to keep the ->active_mm pointer
in place for now (at least in the first patch), even 
on architectures where we end up dropping the refcounting.

That way the code is more similar everywhere, and
we just get rid of the expensive instructions.

Let me try coding this up...

-- 
All Rights Reversed.

signature.asc
Description: This is a digitally signed message part


Re: [RFC 0/4] Virtio uses DMA API for all devices

2018-07-23 Thread Michael S. Tsirkin
On Mon, Jul 23, 2018 at 11:58:23AM +0530, Anshuman Khandual wrote:
> On 07/20/2018 06:46 PM, Michael S. Tsirkin wrote:
> > On Fri, Jul 20, 2018 at 09:29:37AM +0530, Anshuman Khandual wrote:
> >> This patch series is the follow up on the discussions we had before about
> >> the RFC titled [RFC,V2] virtio: Add platform specific DMA API translation
> >> for virito devices (https://patchwork.kernel.org/patch/10417371/). There
> >> were suggestions about doing away with two different paths of transactions
> >> with the host/QEMU, first being the direct GPA and the other being the DMA
> >> API based translations.
> >>
> >> First patch attempts to create a direct GPA mapping based DMA operations
> >> structure called 'virtio_direct_dma_ops' with exact same implementation
> >> of the direct GPA path which virtio core currently has but just wrapped in
> >> a DMA API format. Virtio core must use 'virtio_direct_dma_ops' instead of
> >> the arch default in absence of VIRTIO_F_IOMMU_PLATFORM flag to preserve the
> >> existing semantics. The second patch does exactly that inside the function
> >> virtio_finalize_features(). The third patch removes the default direct GPA
> >> path from virtio core forcing it to use DMA API callbacks for all devices.
> >> Now with that change, every device must have a DMA operations structure
> >> associated with it. The fourth patch adds an additional hook which gives
> >> the platform an opportunity to do yet another override if required. This
> >> platform hook can be used on POWER Ultravisor based protected guests to
> >> load up SWIOTLB DMA callbacks to do the required (as discussed previously
> >> in the above mentioned thread how host is allowed to access only parts of
> >> the guest GPA range) bounce buffering into the shared memory for all I/O
> >> scatter gather buffers to be consumed on the host side.
> >>
> >> Please go through these patches and review whether this approach broadly
> >> makes sense. I will appreciate suggestions, inputs, comments regarding
> >> the patches or the approach in general. Thank you.
> > I like how patches 1-3 look. Could you test performance
> > with/without to see whether the extra indirection through
> > use of DMA ops causes a measurable slow-down?
> 
> I ran this simple DD command 10 times where /dev/vda is a virtio block
> device of 10GB size.
> 
> dd if=/dev/zero of=/dev/vda bs=8M count=1024 oflag=direct
> 
> With and without patches bandwidth which has a bit wide range does not
> look that different from each other.
> 
> Without patches
> ===
> 
> -- 1 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.95557 s, 4.4 GB/s
> -- 2 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.05176 s, 4.2 GB/s
> -- 3 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.88314 s, 4.6 GB/s
> -- 4 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.84899 s, 4.6 GB/s
> -- 5 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 5.37184 s, 1.6 GB/s
> -- 6 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.9205 s, 4.5 GB/s
> -- 7 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 6.85166 s, 1.3 GB/s
> -- 8 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.74049 s, 4.9 GB/s
> -- 9 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 6.31699 s, 1.4 GB/s
> -- 10 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.47057 s, 3.5 GB/s
> 
> 
> With patches
> 
> 
> -- 1 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.25993 s, 3.8 GB/s
> -- 2 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.82438 s, 4.7 GB/s
> -- 3 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.93856 s, 4.4 GB/s
> -- 4 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.83405 s, 4.7 GB/s
> -- 5 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 7.50199 s, 1.1 GB/s
> -- 6 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.28742 s, 3.8 GB/s
> -- 7 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 5.74958 s, 1.5 GB/s
> -- 8 -
> 1024+0 records in
> 1024+0 records out
> 8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.99149 s, 4.3 GB/s
> -- 9 -
> 1024+0 

Re: [PATCH] scsi: prevent ISA driver from building on PPC32

2018-07-23 Thread Christoph Hellwig
On Sat, Jul 21, 2018 at 12:58:21PM -0700, Randy Dunlap wrote:
> From: Randy Dunlap 
> 
> Prevent drivers from building on PPC32 if they use isa_bus_to_virt(),
> isa_virt_to_bus(), or isa_page_to_bus(), which are not available and
> thus cause build errors.

Please don't introduce weird arch dependencies, and add a
CONFIG_ISA_VIRT_TO_BUS instead.


Re: [PATCH] scsi: prevent ISA driver from building on PPC32

2018-07-23 Thread Christoph Hellwig
On Mon, Jul 23, 2018 at 01:18:11AM -0700, Christoph Hellwig wrote:
> On Sat, Jul 21, 2018 at 12:58:21PM -0700, Randy Dunlap wrote:
> > From: Randy Dunlap 
> > 
> > Prevent drivers from building on PPC32 if they use isa_bus_to_virt(),
> > isa_virt_to_bus(), or isa_page_to_bus(), which are not available and
> > thus cause build errors.
> 
> Please don't introduce weird arch dependencies, and add a
> CONFIG_ISA_VIRT_TO_BUS instead.

And in fact we have so few drivers that we should just kill off the
API entirely instead.  I'll take care of aha1542 in the next week or
so.


Re: [PATCH 2/3] [v2] m68k: mac: use time64_t in RTC handling

2018-07-23 Thread Geert Uytterhoeven
Hi Finn,

On Sun, Jul 22, 2018 at 1:56 PM Finn Thain  wrote:
> On Wed, 18 Jul 2018, I wrote:
> > On Wed, 18 Jul 2018, Arnd Bergmann wrote:
> > > I'd suggest we do it like below to make it consistent with the rest
> > > again, using the 1904..2040 range of dates and no warning for invalid
> > > dates.
> > >
> > > If you agree, I'll send that as a proper patch.
> >
> > Geert may instead wish to fixup or revert the patch he has committed
> > already...
>
> Geert, how do you want to handle this?
>
> Do you want a fixup patch or a v3 patch with the WARN_ON and the other two
> issues addressed?

Please send a fixup patch, for the m68k/master branch, which is non-rebasing.
I'll fold it into the original commit on the m68k/for-next branch.

> I'm willing to send either one if Arnd is okay with that. I'd really like
> to resolve this before the merge window opens, since my PMU patch series
> is affected.

+1. If it's not resolved (a few days) before the merge window opens, I may have
to revert the patch instead.

Thanks!

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


[RFC 1/1] cpuidle : Move saving and restoring of sprs to opal

2018-07-23 Thread Abhishek Goel
This patch moves the saving and restoring of sprs for P9 cpuidle
from kernel to opal. This patch still uses existing code to detect
first thread in core.
In an attempt to make the powernv idle code backward compatible,
and to some extent forward compatible, add support for pre-stop entry
and post-stop exit actions in OPAL. If a kernel knows about this
opal call, then just a firmware supporting newer hardware is required,
instead of waiting for kernel updates.

Signed-off-by: Abhishek Goel 
---

Link to the Skiboot patch corresponding to this patch:
http://patchwork.ozlabs.org/patch/947568/

 arch/powerpc/include/asm/cpuidle.h|  10 --
 arch/powerpc/include/asm/opal-api.h   |   4 +-
 arch/powerpc/include/asm/opal.h   |   3 +
 arch/powerpc/include/asm/paca.h   |   5 +-
 arch/powerpc/kernel/asm-offsets.c |  10 +-
 arch/powerpc/kernel/idle_book3s.S | 130 ++
 arch/powerpc/platforms/powernv/idle.c |   4 +
 .../powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/xmon/xmon.c  |  12 +-
 9 files changed, 61 insertions(+), 119 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index e210a83eb196..c10f47af9a55 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -68,16 +68,6 @@
 #define ERR_DEEP_STATE_ESL_MISMATCH-2
 
 #ifndef __ASSEMBLY__
-/* Additional SPRs that need to be saved/restored during stop */
-struct stop_sprs {
-   u64 pid;
-   u64 ldbar;
-   u64 fscr;
-   u64 hfscr;
-   u64 mmcr1;
-   u64 mmcr2;
-   u64 mmcra;
-};
 
 extern u32 pnv_fastsleep_workaround_at_entry[];
 extern u32 pnv_fastsleep_workaround_at_exit[];
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 3bab299eda49..6792a737bc9a 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -208,7 +208,9 @@
 #define OPAL_SENSOR_READ_U64   162
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   165
-#define OPAL_LAST  165
+#define OPAL_IDLE_SAVE 168
+#define OPAL_IDLE_RESTORE  169
+#define OPAL_LAST  169
 
 #define QUIESCE_HOLD   1 /* Spin all calls at entry */
 #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e1b2910c6e81..12d57aeacde2 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -356,6 +356,9 @@ extern void opal_kmsg_init(void);
 
 extern int opal_event_request(unsigned int opal_event_nr);
 
+extern int opal_cpuidle_save(u64 *stop_sprs, int scope, u64 psscr);
+extern int opal_cpuidle_restore(u64 *stop_sprs, int scope, u64 psscr, u64 
srr1);
+
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
 unsigned long vmalloc_size);
 void opal_free_sg_list(struct opal_sg_list *sg);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 6d34bd71139d..765524e76beb 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -195,11 +195,12 @@ struct paca_struct {
/* The PSSCR value that the kernel requested before going to stop */
u64 requested_psscr;
 
+   u64 wakeup_psscr;
/*
-* Save area for additional SPRs that need to be
+* Save area for SPRs that need to be
 * saved/restored during cpuidle stop.
 */
-   struct stop_sprs stop_sprs;
+   u64 *opal_stop_sprs;
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 0a0544335950..65a3d8582017 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -769,14 +769,8 @@ int main(void)
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
-#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
-   STOP_SPR(STOP_PID, pid);
-   STOP_SPR(STOP_LDBAR, ldbar);
-   STOP_SPR(STOP_FSCR, fscr);
-   STOP_SPR(STOP_HFSCR, hfscr);
-   STOP_SPR(STOP_MMCR1, mmcr1);
-   STOP_SPR(STOP_MMCR2, mmcr2);
-   STOP_SPR(STOP_MMCRA, mmcra);
+   OFFSET(PACA_WAKEUP_PSSCR, paca_struct, wakeup_psscr);
+   OFFSET(STOP_SPRS, paca_struct, opal_stop_sprs);
 #endif
 
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/idle_book3s.S 
b/arch/powerpc/kernel/idle_book3s.S
index e734f6e45abc..66fc955abee3 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -45,6 +45,9 @@
 
 #define 

Re: [RFC 0/4] Virtio uses DMA API for all devices

2018-07-23 Thread Anshuman Khandual
On 07/20/2018 06:46 PM, Michael S. Tsirkin wrote:
> On Fri, Jul 20, 2018 at 09:29:37AM +0530, Anshuman Khandual wrote:
>> This patch series is the follow up on the discussions we had before about
>> the RFC titled [RFC,V2] virtio: Add platform specific DMA API translation
>> for virito devices (https://patchwork.kernel.org/patch/10417371/). There
>> were suggestions about doing away with two different paths of transactions
>> with the host/QEMU, first being the direct GPA and the other being the DMA
>> API based translations.
>>
>> First patch attempts to create a direct GPA mapping based DMA operations
>> structure called 'virtio_direct_dma_ops' with exact same implementation
>> of the direct GPA path which virtio core currently has but just wrapped in
>> a DMA API format. Virtio core must use 'virtio_direct_dma_ops' instead of
>> the arch default in absence of VIRTIO_F_IOMMU_PLATFORM flag to preserve the
>> existing semantics. The second patch does exactly that inside the function
>> virtio_finalize_features(). The third patch removes the default direct GPA
>> path from virtio core forcing it to use DMA API callbacks for all devices.
>> Now with that change, every device must have a DMA operations structure
>> associated with it. The fourth patch adds an additional hook which gives
>> the platform an opportunity to do yet another override if required. This
>> platform hook can be used on POWER Ultravisor based protected guests to
>> load up SWIOTLB DMA callbacks to do the required (as discussed previously
>> in the above mentioned thread how host is allowed to access only parts of
>> the guest GPA range) bounce buffering into the shared memory for all I/O
>> scatter gather buffers to be consumed on the host side.
>>
>> Please go through these patches and review whether this approach broadly
>> makes sense. I will appreciate suggestions, inputs, comments regarding
>> the patches or the approach in general. Thank you.
> I like how patches 1-3 look. Could you test performance
> with/without to see whether the extra indirection through
> use of DMA ops causes a measurable slow-down?

I ran this simple DD command 10 times where /dev/vda is a virtio block
device of 10GB size.

dd if=/dev/zero of=/dev/vda bs=8M count=1024 oflag=direct

With and without patches bandwidth which has a bit wide range does not
look that different from each other.

Without patches
===

-- 1 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.95557 s, 4.4 GB/s
-- 2 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.05176 s, 4.2 GB/s
-- 3 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.88314 s, 4.6 GB/s
-- 4 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.84899 s, 4.6 GB/s
-- 5 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 5.37184 s, 1.6 GB/s
-- 6 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.9205 s, 4.5 GB/s
-- 7 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 6.85166 s, 1.3 GB/s
-- 8 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.74049 s, 4.9 GB/s
-- 9 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 6.31699 s, 1.4 GB/s
-- 10 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.47057 s, 3.5 GB/s


With patches


-- 1 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.25993 s, 3.8 GB/s
-- 2 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.82438 s, 4.7 GB/s
-- 3 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.93856 s, 4.4 GB/s
-- 4 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.83405 s, 4.7 GB/s
-- 5 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 7.50199 s, 1.1 GB/s
-- 6 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.28742 s, 3.8 GB/s
-- 7 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 5.74958 s, 1.5 GB/s
-- 8 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 1.99149 s, 4.3 GB/s
-- 9 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 5.67647 s, 1.5 GB/s
-- 10 -
1024+0 records in
1024+0 records out
8589934592 bytes (8.6 GB, 8.0 GiB) copied, 2.93957 s, 2.9 GB/s

Does this look okay ?