[PATCH] powerpc/pseries: Perform full re-add of CPU for topology update post-migration

2018-10-29 Thread Nathan Fontenot
On pseries systems, performing a partition migration can result in
altering the nodes a CPU is assigned to on the destination system. For
exampl, pre-migration on the source system CPUs are in node 1 and 3,
post-migration on the destination system CPUs are in nodes 2 and 3.

Handling the node change for a CPU can cause corruption in the slab
cache if we hit a timing where a CPUs node is changed while cache_reap()
is invoked. The corruption occurs because the slab cache code appears
to rely on the CPU and slab cache pages being on the same node.

The current dynamic updating of a CPUs node done in arch/powerpc/mm/numa.c
does not prevent us from hitting this scenario.

Changing the device tree property update notification handler that
recognizes an affinity change for a CPU to do a full DLPAR remove and
add of the CPU instead of dynamically changing its node resolves this
issue.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/include/asm/topology.h  |2 ++
 arch/powerpc/mm/numa.c   |9 +
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   19 +++
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index a4a718dbfec6..f85e2b01c3df 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -132,6 +132,8 @@ static inline void shared_proc_topology_init(void) {}
 #define topology_sibling_cpumask(cpu)  (per_cpu(cpu_sibling_map, cpu))
 #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)  (cpu_to_core_id(cpu))
+
+int dlpar_cpu_readd(int cpu);
 #endif
 #endif
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 693ae1c1acba..bb6a7b56bef7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1461,13 +1461,6 @@ static void reset_topology_timer(void)
 
 #ifdef CONFIG_SMP
 
-static void stage_topology_update(int core_id)
-{
-   cpumask_or(_associativity_changes_mask,
-   _associativity_changes_mask, cpu_sibling_mask(core_id));
-   reset_topology_timer();
-}
-
 static int dt_update_callback(struct notifier_block *nb,
unsigned long action, void *data)
 {
@@ -1480,7 +1473,7 @@ static int dt_update_callback(struct notifier_block *nb,
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id;
of_property_read_u32(update->dn, "reg", _id);
-   stage_topology_update(core_id);
+   rc = dlpar_cpu_readd(core_id);
rc = NOTIFY_OK;
}
break;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 2f8e62163602..97feb6e79f1a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -802,6 +802,25 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
return rc;
 }
 
+int dlpar_cpu_readd(int cpu)
+{
+   struct device_node *dn;
+   struct device *dev;
+   u32 drc_index;
+   int rc;
+
+   dev = get_cpu_device(cpu);
+   dn = dev->of_node;
+
+   rc = of_property_read_u32(dn, "ibm,my-drc-index", _index);
+
+   rc = dlpar_cpu_remove_by_index(drc_index);
+   if (!rc)
+   rc = dlpar_cpu_add(drc_index);
+
+   return rc;
+}
+
 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 {
u32 count, drc_index;



Re: [PATCH v04 3/4] migration/memory: Evaluate LMB assoc changes

2018-10-10 Thread Nathan Fontenot
On 10/09/2018 03:37 PM, Michael Bringmann wrote:
> migration/memory: This patch adds code that recognizes changes to
> the associativity of memory blocks described by the device-tree
> properties in order to drive equivalent 'hotplug' operations to
> update local and general kernel data structures to reflect those
> changes.  These differences may include:
> 
> * Evaluate 'ibm,dynamic-memory' properties when processing the
>   updated device-tree properties of the system during Post Migration
>   events (migration_store).  The new functionality looks for changes
>   to the aa_index values for each drc_index/LMB to identify any memory
>   blocks that should be readded.
> 
> * In an LPAR migration scenario, the "ibm,associativity-lookup-arrays"
>   property may change.  In the event that a row of the array differs,
>   locate all assigned memory blocks with that 'aa_index' and 're-add'
>   them to the system memory block data structures.  In the process of
>   the 're-add', the system routines will update the corresponding entry
>   for the memory in the LMB structures and any other relevant kernel
>   data structures.
> 
> A number of previous extensions made to the DRMEM code for scanning
> device-tree properties and creating LMB arrays are used here to
> ensure that the resulting code is simpler and more usable:
> 
> * Use new paired list iterator for the DRMEM LMB info arrays to find
>   differences in old and new versions of properties.
> * Use new iterator for copies of the DRMEM info arrays to evaluate
>   completely new structures.
> * Combine common code for parsing and evaluating memory description
>   properties based on the DRMEM LMB array model to greatly simplify
>   extension from the older property 'ibm,dynamic-memory' to the new
>   property model of 'ibm,dynamic-memory-v2'.
> 
> For support, add a new pseries hotplug action for DLPAR operations,
> PSERIES_HP_ELOG_ACTION_READD_MULTIPLE.  It is a variant of the READD
> operation which performs the action upon multiple instances of the
> resource at one time.  The operation is to be triggered by device-tree
> analysis of updates by RTAS events analyzed by 'migation_store' during
> post-migration processing.  It will be used for memory updates,
> initially.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in v04:
>   -- Move dlpar_memory_readd_multiple() function definition and use
>  into previous patch along with action constant definition.
>   -- Correct spacing in patch
> Changes in v03:
>   -- Modify the code that parses the memory affinity attributes to
>  mark relevant DRMEM LMB array entries using the internal_flags
>  mechanism instead of generate unique hotplug actions for each
>  memory block to be readded.  The change is intended to both
>  simplify the code, and to require fewer resources on systems
>  with huge amounts of memory.
>   -- Save up notice about any all LMB entries until the end of the
>  'migration_store' operation at which point a single action is
>  queued to scan the entire DRMEM array.
>   -- Add READD_MULTIPLE function for memory that scans the DRMEM
>  array to identify multiple entries that were marked previously.
>  The corresponding memory blocks are to be readded to the system
>  to update relevant data structures outside of the powerpc-
>  specific code.
>   -- Change dlpar_memory_pmt_changes_action to directly queue worker
>  to pseries work queue.
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c |  189 
> +++
>  arch/powerpc/platforms/pseries/mobility.c   |4 
>  arch/powerpc/platforms/pseries/pseries.h|4 
>  3 files changed, 163 insertions(+), 34 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index bf2420a..a7ca22e 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -534,8 +534,11 @@ static int dlpar_memory_readd_by_index(u32 drc_index)
>   }
>   }
> 
> - if (!lmb_found)
> - rc = -EINVAL;
> + if (!lmb_found) {
> + pr_info("Failed to update memory for drc index %lx\n",
> + (unsigned long) drc_index);
> + return -EINVAL;
> + }
> 
>   if (rc)
>   pr_info("Failed to update memory at %llx\n",
> @@ -1002,13 +1005,43 @@ static int pseries_add_mem_node(struct device_node 
> *np)
>   return (ret < 0) ? -EINVAL : 0;
>  }
> 
> -static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
> +static int pmt_changes = 0;
> +
> +void dlpar_memory_pmt_changes_set(void)
> +{
> + pmt_changes = 1;
> +}
> +
> +void dlpar_memory_pmt_changes_clear(void)
> +{
> + pmt_changes = 0;
> +}
> +
> +int dlpar_memory_pmt_changes(void)
> +{
> + return pmt_changes;
> +}
> +
> +void dlpar_memory_pmt_changes_action(void)
> +{
> + if 

Re: [PATCH v04 3/5] migration/memory: Add hotplug READD_MULTIPLE

2018-10-10 Thread Nathan Fontenot
On 10/09/2018 03:36 PM, Michael Bringmann wrote:
> migration/memory: This patch adds a new pseries hotplug action
> for CPU and memory operations, PSERIES_HP_ELOG_ACTION_READD_MULTIPLE.
> This is a variant of the READD operation which performs the action
> upon multiple instances of the resource at one time.  The operation
> is to be triggered by device-tree analysis of updates by RTAS events
> analyzed by 'migation_store' during post-migration processing.  It
> will be used for memory updates, initially.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in v04:
>   -- Move init of 'lmb->internal_flags' in init_drmem_v2_lmbs to
>  previous patch.
>   -- Pull in implementation of dlpar_memory_readd_multiple() to go
>  with operation flag.
> ---
>  arch/powerpc/include/asm/rtas.h |1 +
>  arch/powerpc/platforms/pseries/hotplug-memory.c |   31 
> +++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index 0183e95..cc00451 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -333,6 +333,7 @@ struct pseries_hp_errorlog {
>  #define PSERIES_HP_ELOG_ACTION_ADD   1
>  #define PSERIES_HP_ELOG_ACTION_REMOVE2
>  #define PSERIES_HP_ELOG_ACTION_READD 3
> +#define PSERIES_HP_ELOG_ACTION_READD_MULTIPLE4
> 
>  #define PSERIES_HP_ELOG_ID_DRC_NAME  1
>  #define PSERIES_HP_ELOG_ID_DRC_INDEX 2
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 9a15d39..bf2420a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -546,6 +546,30 @@ static int dlpar_memory_readd_by_index(u32 drc_index)
>   return rc;
>  }
> 
> +static int dlpar_memory_readd_multiple(void)
> +{
> + struct drmem_lmb *lmb;
> + int rc;
> +
> + pr_info("Attempting to update multiple LMBs\n");
> +
> + for_each_drmem_lmb(lmb) {
> + if (drmem_lmb_update(lmb)) {
> + rc = dlpar_remove_lmb(lmb);
> +
> + if (!rc) {
> + rc = dlpar_add_lmb(lmb);
> + if (rc)
> + dlpar_release_drc(lmb->drc_index);
> + }

The work you're doing here is essentially the same that is done in
dlpar_memory_readd_by_index(). Perhaps pulling the commin bits of both
routines into a helper routine. This could include the success/failure
messages in dlpar_memory_readd_by_index()

-Nathan

> +
> + drmem_remove_lmb_update(lmb);
> + }
> + }
> +
> + return rc;
> +}
> +
>  static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
>  {
>   struct drmem_lmb *lmb, *start_lmb, *end_lmb;
> @@ -646,6 +670,10 @@ static int dlpar_memory_readd_by_index(u32 drc_index)
>  {
>   return -EOPNOTSUPP;
>  }
> +static int dlpar_memory_readd_multiple(void)
> +{
> + return -EOPNOTSUPP;
> +}
> 
>  static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
>  {
> @@ -923,6 +951,9 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
>   drc_index = hp_elog->_drc_u.drc_index;
>   rc = dlpar_memory_readd_by_index(drc_index);
>   break;
> + case PSERIES_HP_ELOG_ACTION_READD_MULTIPLE:
> + rc = dlpar_memory_readd_multiple();
> + break;
>   default:
>   pr_err("Invalid action (%d) specified\n", hp_elog->action);
>   rc = -EINVAL;
> 



Re: [PATCH v04 1/5] powerpc/drmem: Export 'dynamic-memory' loader

2018-10-10 Thread Nathan Fontenot
On 10/09/2018 03:36 PM, Michael Bringmann wrote:
> powerpc/drmem: Export many of the functions of DRMEM to parse
> "ibm,dynamic-memory" and "ibm,dynamic-memory-v2" during hotplug
> operations and for Post Migration events.
> 
> Also modify the DRMEM initialization code to allow it to,
> 
> * Be called after system initialization
> * Provide a separate user copy of the LMB array that is produces
> * Free the user copy upon request
> 
> In addition, a couple of changes were made to make the creation
> of additional copies of the LMB array more useful including,
> 
> * Add new iterator to work through a pair of drmem_info arrays.
> * Modify DRMEM code to replace usages of dt_root_addr_cells, and
>   dt_mem_next_cell, as these are only available at first boot.
> 
> Signed-off-by: Michael Bringmann 
> ---
>  arch/powerpc/include/asm/drmem.h |   15 
>  arch/powerpc/mm/drmem.c  |   75 
> --
>  2 files changed, 70 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index 7c1d8e7..1fbb684 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -35,6 +35,18 @@ struct drmem_lmb_info {
>   _info->lmbs[0],   \
>   _info->lmbs[drmem_info->n_lmbs - 1])
> 
> +#define for_each_dinfo_lmb(dinfo, lmb)   \
> + for_each_drmem_lmb_in_range((lmb),  \
> + >lmbs[0],\
> + >lmbs[dinfo->n_lmbs - 1])
> +
> +#define for_each_pair_dinfo_lmb(dinfo1, lmb1, dinfo2, lmb2)  \
> + for ((lmb1) = (>lmbs[0]),   \
> +  (lmb2) = (>lmbs[0]);   \
> +  ((lmb1) <= (>lmbs[dinfo1->n_lmbs - 1])) && \
> +  ((lmb2) <= (>lmbs[dinfo2->n_lmbs - 1]));   \
> +  (lmb1)++, (lmb2)++)
> +

The macros for traversing seem to be getting a bit unwieldy with these
updates. I wonder if we should move to just using walk routine
for all traversing of the drmem lmbs.

>  /*
>   * The of_drconf_cell_v1 struct defines the layout of the LMB data
>   * specified in the ibm,dynamic-memory device tree property.
> @@ -94,6 +106,9 @@ void __init walk_drmem_lmbs(struct device_node *dn,
>   void (*func)(struct drmem_lmb *, const __be32 **));
>  int drmem_update_dt(void);
> 
> +struct drmem_lmb_info *drmem_lmbs_init(struct property *prop);
> +void drmem_lmbs_free(struct drmem_lmb_info *dinfo);
> +
>  #ifdef CONFIG_PPC_PSERIES
>  void __init walk_drmem_lmbs_early(unsigned long node,
>   void (*func)(struct drmem_lmb *, const __be32 **));
> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
> index 3f18036..13d2abb 100644
> --- a/arch/powerpc/mm/drmem.c
> +++ b/arch/powerpc/mm/drmem.c
> @@ -20,6 +20,7 @@
> 
>  static struct drmem_lmb_info __drmem_info;
>  struct drmem_lmb_info *drmem_info = &__drmem_info;
> +static int n_root_addr_cells;
> 
>  u64 drmem_lmb_memory_max(void)
>  {
> @@ -193,12 +194,13 @@ int drmem_update_dt(void)
>   return rc;
>  }
> 
> -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
> +static void read_drconf_v1_cell(struct drmem_lmb *lmb,
>  const __be32 **prop)
>  {
>   const __be32 *p = *prop;
> 
> - lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
> + lmb->base_addr = of_read_number(p, n_root_addr_cells);
> + p += n_root_addr_cells;

Any reason this can't just be
lmb->base_addr= dt_mem_next_cell(n_root_addr_cells, );

>   lmb->drc_index = of_read_number(p++, 1);
> 
>   p++; /* skip reserved field */
> @@ -209,7 +211,7 @@ static void __init read_drconf_v1_cell(struct drmem_lmb 
> *lmb,
>   *prop = p;
>  }
> 
> -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 
> *usm,
> +static void __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
>   void (*func)(struct drmem_lmb *, const __be32 **))
>  {
>   struct drmem_lmb lmb;
> @@ -225,13 +227,14 @@ static void __init __walk_drmem_v1_lmbs(const __be32 
> *prop, const __be32 *usm,
>   }
>  }
> 
> -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
> +static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
>  const __be32 **prop)
>  {
>   const __be32 *p = *prop;
> 
>   dr_cell->seq_lmbs = of_read_number(p++, 1);
> - dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
> + dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
> + p += n_root_addr_cells;

Same here.

-Nathan

>   dr_cell->drc_index = of_read_number(p++, 1);
>   dr_cell->aa_index = of_read_number(p++, 1);
>   dr_cell->flags = of_read_number(p++, 1);
> @@ -239,7 +242,7 @@ static void __init read_drconf_v2_cell(struct 
> of_drconf_cell_v2 *dr_cell,
>   

Re: [PATCH 1/2] powerpc/pseries: PAPR persistent memory support

2018-10-10 Thread Nathan Fontenot
On 10/10/2018 01:08 AM, Oliver O'Halloran wrote:
> This patch implements support for discovering storage class memory
> devices at boot and for handling hotplug of new regions via RTAS
> hotplug events.
> 
> Signed-off-by: Oliver O'Halloran 
> ---
>  arch/powerpc/include/asm/firmware.h   |  3 ++-
>  arch/powerpc/include/asm/hvcall.h | 10 +-
>  arch/powerpc/include/asm/rtas.h   |  2 ++
>  arch/powerpc/kernel/rtasd.c   |  2 ++
>  arch/powerpc/platforms/pseries/Makefile   |  2 +-
>  arch/powerpc/platforms/pseries/dlpar.c|  4 
>  arch/powerpc/platforms/pseries/firmware.c |  1 +
>  arch/powerpc/platforms/pseries/pseries.h  |  5 +
>  arch/powerpc/platforms/pseries/ras.c  |  3 ++-
>  9 files changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/firmware.h 
> b/arch/powerpc/include/asm/firmware.h
> index 7a051bd21f87..113c64d5d394 100644
> --- a/arch/powerpc/include/asm/firmware.h
> +++ b/arch/powerpc/include/asm/firmware.h
> @@ -52,6 +52,7 @@
>  #define FW_FEATURE_PRRN  ASM_CONST(0x0002)
>  #define FW_FEATURE_DRMEM_V2  ASM_CONST(0x0004)
>  #define FW_FEATURE_DRC_INFO  ASM_CONST(0x0008)
> +#define FW_FEATURE_PAPR_SCM  ASM_CONST(0x0010)
> 
>  #ifndef __ASSEMBLY__
> 
> @@ -69,7 +70,7 @@ enum {
>   FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
>   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
>   FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
> - FW_FEATURE_DRC_INFO,
> + FW_FEATURE_DRC_INFO | FW_FEATURE_PAPR_SCM,
>   FW_FEATURE_PSERIES_ALWAYS = 0,
>   FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
>   FW_FEATURE_POWERNV_ALWAYS = 0,
> diff --git a/arch/powerpc/include/asm/hvcall.h 
> b/arch/powerpc/include/asm/hvcall.h
> index a0b17f9f1ea4..0e81ef83b35a 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -295,7 +295,15 @@
>  #define H_INT_ESB   0x3C8
>  #define H_INT_SYNC  0x3CC
>  #define H_INT_RESET 0x3D0
> -#define MAX_HCALL_OPCODE H_INT_RESET
> +#define H_SCM_READ_METADATA 0x3E4
> +#define H_SCM_WRITE_METADATA0x3E8
> +#define H_SCM_BIND_MEM  0x3EC
> +#define H_SCM_UNBIND_MEM0x3F0
> +#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
> +#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
> +#define H_SCM_MEM_QUERY  0x3FC
> +#define H_SCM_BLOCK_CLEAR   0x400
> +#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR
> 
>  /* H_VIOCTL functions */
>  #define H_GET_VIOA_DUMP_SIZE 0x01
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index 71e393c46a49..1e81f3d55457 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -125,6 +125,7 @@ struct rtas_suspend_me_data {
>  #define RTAS_TYPE_INFO   0xE2
>  #define RTAS_TYPE_DEALLOC0xE3
>  #define RTAS_TYPE_DUMP   0xE4
> +#define RTAS_TYPE_HOTPLUG0xE5
>  /* I don't add PowerMGM events right now, this is a different topic */ 
>  #define RTAS_TYPE_PMGM_POWER_SW_ON   0x60
>  #define RTAS_TYPE_PMGM_POWER_SW_OFF  0x61
> @@ -316,6 +317,7 @@ struct pseries_hp_errorlog {
>  #define PSERIES_HP_ELOG_RESOURCE_MEM 2
>  #define PSERIES_HP_ELOG_RESOURCE_SLOT3
>  #define PSERIES_HP_ELOG_RESOURCE_PHB 4
> +#define PSERIES_HP_ELOG_RESOURCE_PMEM   6
> 
>  #define PSERIES_HP_ELOG_ACTION_ADD   1
>  #define PSERIES_HP_ELOG_ACTION_REMOVE2
> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
> index 6fafc82c04b0..fad0baddfcba 100644
> --- a/arch/powerpc/kernel/rtasd.c
> +++ b/arch/powerpc/kernel/rtasd.c
> @@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
>   return "Dump Notification Event";
>   case RTAS_TYPE_PRRN:
>   return "Platform Resource Reassignment Event";
> + case RTAS_TYPE_HOTPLUG:
> + return "Hotplug Event";
>   }
> 
>   return rtas_type[0];
> diff --git a/arch/powerpc/platforms/pseries/Makefile 
> b/arch/powerpc/platforms/pseries/Makefile
> index 7e89d5c47068..892b27ced973 100644
> --- a/arch/powerpc/platforms/pseries/Makefile
> +++ b/arch/powerpc/platforms/pseries/Makefile
> @@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE)+= kexec.o
>  obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
> 
>  obj-$(CONFIG_HOTPLUG_CPU)+= hotplug-cpu.o
> -obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
> +obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o
> 
>  obj-$(CONFIG_HVC_CONSOLE)+= hvconsole.o
>  obj-$(CONFIG_HVCS)   += hvcserver.o
> diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
> b/arch/powerpc/platforms/pseries/dlpar.c
> index a0b20c03f078..795996fefdb9 100644
> --- a/arch/powerpc/platforms/pseries/dlpar.c
> +++ b/arch/powerpc/platforms/pseries/dlpar.c
> @@ -357,6 +357,10 @@ static int 

Re: [PATCH v02] powerpc/mobility: Extend start/stop topology update scope

2018-10-10 Thread Nathan Fontenot
On 10/09/2018 03:12 PM, Michael Bringmann wrote:
> The PPC mobility code may receive RTAS requests to perform PRRN
> topology changes at any time, including during LPAR migration
> operations.  In some configurations where the affinity of CPUs
> or memory is being changed on that platform, the PRRN requests
> may apply or refer to outdated information prior to the complete
> update of the device-tree.  This patch changes the duration for
> which topology updates are suppressed during LPAR migrations from
> just the rtas_ibm_suspend_me / 'ibm,suspend-me' call(s) to cover
> the entire 'migration_store' operation to allow all changes to
> the device-tree to be applied prior to accepting and applying any
> PRRN requests.
> 
> For tracking purposes, pr_info notices are added to the functions
> start_topology_update() and stop_topology_update() of 'numa.c'.
> 
> Signed-off-by: Michael Bringmann 

Reviewed-by: Nathan Fontenot 

> ---
> Changes in v02:
>   -- Rebase to latest powerpc next tree.
> ---
>  arch/powerpc/kernel/rtas.c|2 --
>  arch/powerpc/mm/numa.c|6 ++
>  arch/powerpc/platforms/pseries/mobility.c |5 +
>  3 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index 2c7ed31..e02ac37 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -982,7 +982,6 @@ int rtas_ibm_suspend_me(u64 handle)
>   }
> 
>   cpu_hotplug_disable();
> - stop_topology_update();
> 
>   /* Call function on all CPUs.  One of us will make the
>* rtas call
> @@ -995,7 +994,6 @@ int rtas_ibm_suspend_me(u64 handle)
>   if (atomic_read() != 0)
>   printk(KERN_ERR "Error doing global join\n");
> 
> - start_topology_update();
>   cpu_hotplug_enable();
> 
>   /* Take down CPUs not online prior to suspend */
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index b5a71ba..0ade0a1 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1518,6 +1518,10 @@ int start_topology_update(void)
>   }
>   }
> 
> + pr_info("Starting topology update%s%s\n",
> + (prrn_enabled ? " prrn_enabled" : ""),
> + (vphn_enabled ? " vphn_enabled" : ""));
> +
>   return rc;
>  }
> 
> @@ -1539,6 +1543,8 @@ int stop_topology_update(void)
>   rc = del_timer_sync(_timer);
>   }
> 
> + pr_info("Stopping topology update\n");
> +
>   return rc;
>  }
> 
> diff --git a/arch/powerpc/platforms/pseries/mobility.c 
> b/arch/powerpc/platforms/pseries/mobility.c
> index 2f0f512..7da222d 100644
> --- a/arch/powerpc/platforms/pseries/mobility.c
> +++ b/arch/powerpc/platforms/pseries/mobility.c
> @@ -367,6 +367,8 @@ static ssize_t migration_store(struct class *class,
>   if (rc)
>   return rc;
> 
> + stop_topology_update();
> +
>   do {
>   rc = rtas_ibm_suspend_me(streamid);
>   if (rc == -EAGAIN)
> @@ -377,6 +379,9 @@ static ssize_t migration_store(struct class *class,
>   return rc;
> 
>   post_mobility_fixup();
> +
> + start_topology_update();
> +
>   return count;
>  }
> 
> 



Re: [PATCH] powerpc/pseries: Export maximum memory value

2018-10-10 Thread Nathan Fontenot
On 10/10/2018 05:22 AM, Aravinda Prasad wrote:
> This patch exports the maximum possible amount of memory
> configured on the system via /proc/powerpc/lparcfg.
> 
> Signed-off-by: Aravinda Prasad 
> ---
>  arch/powerpc/platforms/pseries/lparcfg.c |   13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/lparcfg.c 
> b/arch/powerpc/platforms/pseries/lparcfg.c
> index 7c872dc..aa82f55 100644
> --- a/arch/powerpc/platforms/pseries/lparcfg.c
> +++ b/arch/powerpc/platforms/pseries/lparcfg.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -36,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "pseries.h"
> 
> @@ -433,6 +435,16 @@ static void parse_em_data(struct seq_file *m)
>   seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
>  }
> 
> +static void maxmem_data(struct seq_file *m)
> +{
> + unsigned long maxmem = 0;
> +
> + maxmem += drmem_info->n_lmbs * drmem_info->lmb_size;
> + maxmem += hugetlb_total_pages() * PAGE_SIZE;
> +
> + seq_printf(m, "MaxMem=%ld\n", maxmem);

Should this be MaxPossibleMem?

At least for the drmem memory the value calculated is the maximum possible
memory. I wonder if calling it MaxMem would lead users to think they have
that much memory available to them.

-Nathan

> +}
> +
>  static int pseries_lparcfg_data(struct seq_file *m, void *v)
>  {
>   int partition_potential_processors;
> @@ -491,6 +503,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void 
> *v)
>   seq_printf(m, "slb_size=%d\n", mmu_slb_size);
>  #endif
>   parse_em_data(m);
> + maxmem_data(m);
> 
>   return 0;
>  }
> 



Re: [PATCH v3 -next] powerpc/pseries/memory-hotplug: Fix return value type of find_aa_index

2018-10-10 Thread Nathan Fontenot
On 10/09/2018 08:59 AM, YueHaibing wrote:
> 'aa_index' is defined as an unsigned value, but find_aa_index
> may return -1 when dlpar_clone_property fails. So change 
> find_aa_index return value type to bool, which indicate 'aa_index'
> whether found or not.
> 
> Fixes: c05a5a40969e ("powerpc/pseries: Dynamic add entires to associativity 
> lookup array")
> Signed-off-by: YueHaibing 

Reviewed-by: Nathan Fontenot nf...@linux.vnet.ibm.com>
 
> ---
> v3: change find_aa_index return type to bool
> v2: use 'rc' track the validation of aa_index
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c | 61 
> -
>  1 file changed, 28 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index d26a771..4db510f 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -101,11 +101,12 @@ static struct property *dlpar_clone_property(struct 
> property *prop,
>   return new_prop;
>  }
> 
> -static u32 find_aa_index(struct device_node *dr_node,
> -  struct property *ala_prop, const u32 *lmb_assoc)
> +static bool find_aa_index(struct device_node *dr_node,
> +  struct property *ala_prop,
> +  const u32 *lmb_assoc, u32 *aa_index)
>  {
> - u32 *assoc_arrays;
> - u32 aa_index;
> + u32 *assoc_arrays, new_prop_size;
> + struct property *new_prop;
>   int aa_arrays, aa_array_entries, aa_array_sz;
>   int i, index;
> 
> @@ -121,46 +122,39 @@ static u32 find_aa_index(struct device_node *dr_node,
>   aa_array_entries = be32_to_cpu(assoc_arrays[1]);
>   aa_array_sz = aa_array_entries * sizeof(u32);
> 
> - aa_index = -1;
>   for (i = 0; i < aa_arrays; i++) {
>   index = (i * aa_array_entries) + 2;
> 
>   if (memcmp(_arrays[index], _assoc[1], aa_array_sz))
>   continue;
> 
> - aa_index = i;
> - break;
> + *aa_index = i;
> + return true;
>   }
> 
> - if (aa_index == -1) {
> - struct property *new_prop;
> - u32 new_prop_size;
> -
> - new_prop_size = ala_prop->length + aa_array_sz;
> - new_prop = dlpar_clone_property(ala_prop, new_prop_size);
> - if (!new_prop)
> - return -1;
> -
> - assoc_arrays = new_prop->value;
> + new_prop_size = ala_prop->length + aa_array_sz;
> + new_prop = dlpar_clone_property(ala_prop, new_prop_size);
> + if (!new_prop)
> + return false;
> 
> - /* increment the number of entries in the lookup array */
> - assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
> + assoc_arrays = new_prop->value;
> 
> - /* copy the new associativity into the lookup array */
> - index = aa_arrays * aa_array_entries + 2;
> - memcpy(_arrays[index], _assoc[1], aa_array_sz);
> + /* increment the number of entries in the lookup array */
> + assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
> 
> - of_update_property(dr_node, new_prop);
> + /* copy the new associativity into the lookup array */
> + index = aa_arrays * aa_array_entries + 2;
> + memcpy(_arrays[index], _assoc[1], aa_array_sz);
> 
> - /*
> -  * The associativity lookup array index for this lmb is
> -  * number of entries - 1 since we added its associativity
> -  * to the end of the lookup array.
> -  */
> - aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
> - }
> + of_update_property(dr_node, new_prop);
> 
> - return aa_index;
> + /*
> +  * The associativity lookup array index for this lmb is
> +  * number of entries - 1 since we added its associativity
> +  * to the end of the lookup array.
> +  */
> + *aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
> + return true;
>  }
> 
>  static int update_lmb_associativity_index(struct drmem_lmb *lmb)
> @@ -169,6 +163,7 @@ static int update_lmb_associativity_index(struct 
> drmem_lmb *lmb)
>   struct property *ala_prop;
>   const u32 *lmb_assoc;
>   u32 aa_index;
> + bool is_found;
> 
>   parent = of_find_node_by_path("/");
>   if (!parent)
> @@ -200,11 +195,11 @@ static int update_lmb_associativity_index(struct 
> drmem_lmb *lmb)
>   return -ENODEV;
>   }
> 
> - aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
> + is_found = find_aa_index(dr_node, ala_prop, lmb_assoc, _index);
> 
>   dlpar_free_cc_nodes(lmb_node);
> 
> - if (aa_index < 0) {
> + if (!is_found) {
>   pr_err("Could not find LMB associativity\n");
>   return -1;
>   }
> 



Re: [PATCH v03 1/5] powerpc/drmem: Export 'dynamic-memory' loader

2018-10-03 Thread Nathan Fontenot
On 10/02/2018 08:00 PM, Michael Ellerman wrote:
> Michael Bringmann  writes:
> 
>> powerpc/drmem: Export many of the functions of DRMEM to parse
>> "ibm,dynamic-memory" and "ibm,dynamic-memory-v2" during hotplug
>> operations and for Post Migration events.
> 
> This isn't a criticism of your patch, but I think the drmem.c code
> should be moved into platforms/pseries.
> 
> That would then make most of it private to platforms/pseries and we
> wouldn't need to export things in arch/powerpc/include/asm.

I don't have an issue with moving it to platform/pseries. I originally
put it in arch/powerpc/mm because the numa code also uses the drmem code.

The numa code was updated so that it could just be given a lmb struct
instead of having to parse the device tree directly for dynamic
reconfiguration memory. Having to support two versions of this dt
property this made more sense.

-Nathan

> 
> 
>> Also modify the DRMEM initialization code to allow it to,
>>
>> * Be called after system initialization
>> * Provide a separate user copy of the LMB array that is produces
>> * Free the user copy upon request
> 
> Is there any reason those can't be done as separate patches?
> 
>> In addition, a couple of changes were made to make the creation
>> of additional copies of the LMB array more useful including,
>>
>> * Add new iterator to work through a pair of drmem_info arrays.
>> * Modify DRMEM code to replace usages of dt_root_addr_cells, and
>>   dt_mem_next_cell, as these are only available at first boot.
> 
> Likewise?
> 
> cheers
> 
>> diff --git a/arch/powerpc/include/asm/drmem.h 
>> b/arch/powerpc/include/asm/drmem.h
>> index ce242b9..b0e70fd 100644
>> --- a/arch/powerpc/include/asm/drmem.h
>> +++ b/arch/powerpc/include/asm/drmem.h
>> @@ -35,6 +35,18 @@ struct drmem_lmb_info {
>>  _info->lmbs[0],   \
>>  _info->lmbs[drmem_info->n_lmbs - 1])
>>  
>> +#define for_each_dinfo_lmb(dinfo, lmb)  \
>> +for_each_drmem_lmb_in_range((lmb),  \
>> +>lmbs[0],\
>> +>lmbs[dinfo->n_lmbs - 1])
>> +
>> +#define for_each_pair_dinfo_lmb(dinfo1, lmb1, dinfo2, lmb2) \
>> +for ((lmb1) = (>lmbs[0]),   \
>> + (lmb2) = (>lmbs[0]);   \
>> + ((lmb1) <= (>lmbs[dinfo1->n_lmbs - 1])) && \
>> + ((lmb2) <= (>lmbs[dinfo2->n_lmbs - 1]));   \
>> + (lmb1)++, (lmb2)++)
>> +
>>  /*
>>   * The of_drconf_cell_v1 struct defines the layout of the LMB data
>>   * specified in the ibm,dynamic-memory device tree property.
>> @@ -94,6 +106,9 @@ void __init walk_drmem_lmbs(struct device_node *dn,
>>  void (*func)(struct drmem_lmb *, const __be32 **));
>>  int drmem_update_dt(void);
>>  
>> +struct drmem_lmb_info *drmem_lmbs_init(struct property *prop);
>> +void drmem_lmbs_free(struct drmem_lmb_info *dinfo);
>> +
>>  #ifdef CONFIG_PPC_PSERIES
>>  void __init walk_drmem_lmbs_early(unsigned long node,
>>  void (*func)(struct drmem_lmb *, const __be32 **));
>> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
>> index 3f18036..13d2abb 100644
>> --- a/arch/powerpc/mm/drmem.c
>> +++ b/arch/powerpc/mm/drmem.c
>> @@ -20,6 +20,7 @@
>>  
>>  static struct drmem_lmb_info __drmem_info;
>>  struct drmem_lmb_info *drmem_info = &__drmem_info;
>> +static int n_root_addr_cells;
>>  
>>  u64 drmem_lmb_memory_max(void)
>>  {
>> @@ -193,12 +194,13 @@ int drmem_update_dt(void)
>>  return rc;
>>  }
>>  
>> -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
>> +static void read_drconf_v1_cell(struct drmem_lmb *lmb,
>> const __be32 **prop)
>>  {
>>  const __be32 *p = *prop;
>>  
>> -lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
>> +lmb->base_addr = of_read_number(p, n_root_addr_cells);
>> +p += n_root_addr_cells;
>>  lmb->drc_index = of_read_number(p++, 1);
>>  
>>  p++; /* skip reserved field */
>> @@ -209,7 +211,7 @@ static void __init read_drconf_v1_cell(struct drmem_lmb 
>> *lmb,
>>  *prop = p;
>>  }
>>  
>> -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 
>> *usm,
>> +static void __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
>>  void (*func)(struct drmem_lmb *, const __be32 **))
>>  {
>>  struct drmem_lmb lmb;
>> @@ -225,13 +227,14 @@ static void __init __walk_drmem_v1_lmbs(const __be32 
>> *prop, const __be32 *usm,
>>  }
>>  }
>>  
>> -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
>> +static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
>> const __be32 **prop)
>>  {
>>  const __be32 *p = *prop;
>>  
>>  dr_cell->seq_lmbs = of_read_number(p++, 1);
>> -dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
>> +dr_cell->base_addr = of_read_number(p, 

Re: [PATCH] powerpc/mobility: Extend start/stop topology update scope

2018-10-02 Thread Nathan Fontenot
On 10/01/2018 01:56 PM, Michael Bringmann wrote:
> The PPC mobility code may receive RTAS requests to perform PRRN
> topology changes at any time, including during LPAR migration
> operations.  In some configurations where the affinity of CPUs
> or memory is being changed on that platform, the PRRN requests
> may apply or refer to outdated information prior to the complete
> update of the device-tree.  This patch changes the duration for
> which topology updates are suppressed during LPAR migrations from
> just the rtas_ibm_suspend_me / 'ibm,suspend-me' call(s) to cover
> the entire 'migration_store' operation to allow all changes to
> the device-tree to be applied prior to accepting and applying any
> PRRN requests.
> 
> For tracking purposes, pr_info notices are added to the functions
> start_topology_update() and stop_topology_update() of 'numa.c'.
> 
> Signed-off-by: Michael Bringmann 

Reviewed-by: Nathan Fontenot 

> ---
>  arch/powerpc/kernel/rtas.c|4 
>  arch/powerpc/mm/numa.c|6 ++
>  arch/powerpc/platforms/pseries/mobility.c |5 +
>  3 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index 8afd146..28d8b57 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -981,8 +981,6 @@ int rtas_ibm_suspend_me(u64 handle)
>   goto out;
>   }
> 
> - stop_topology_update();
> -
>   /* Call function on all CPUs.  One of us will make the
>* rtas call
>*/
> @@ -994,8 +992,6 @@ int rtas_ibm_suspend_me(u64 handle)
>   if (atomic_read() != 0)
>   printk(KERN_ERR "Error doing global join\n");
> 
> - start_topology_update();
> -
>   /* Take down CPUs not online prior to suspend */
>   cpuret = rtas_offline_cpus_mask(offline_mask);
>   if (cpuret)
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index b5a71ba..0ade0a1 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1518,6 +1518,10 @@ int start_topology_update(void)
>   }
>   }
> 
> + pr_info("Starting topology update%s%s\n",
> + (prrn_enabled ? " prrn_enabled" : ""),
> + (vphn_enabled ? " vphn_enabled" : ""));
> +
>   return rc;
>  }
> 
> @@ -1539,6 +1543,8 @@ int stop_topology_update(void)
>   rc = del_timer_sync(_timer);
>   }
> 
> + pr_info("Stopping topology update\n");
> +
>   return rc;
>  }
> 
> diff --git a/arch/powerpc/platforms/pseries/mobility.c 
> b/arch/powerpc/platforms/pseries/mobility.c
> index 23fb9ac..49ebefd 100644
> --- a/arch/powerpc/platforms/pseries/mobility.c
> +++ b/arch/powerpc/platforms/pseries/mobility.c
> @@ -373,6 +373,8 @@ static ssize_t migration_store(struct class *class,
>   if (rc)
>   return rc;
> 
> + stop_topology_update();
> +
>   do {
>   rc = rtas_ibm_suspend_me(streamid);
>   if (rc == -EAGAIN)
> @@ -383,6 +385,9 @@ static ssize_t migration_store(struct class *class,
>   return rc;
> 
>   post_mobility_fixup();
> +
> + start_topology_update();
> +
>   return count;
>  }
> 
> 



[PATCH v2] powerpc/pseries: Track LMB nid instead of using device tree

2018-10-02 Thread Nathan Fontenot
When removing memory we need to remove the memory from the node
it was added to instead of looking up the node it should be in
in the device tree.

During testing we have seen scenarios where the affinity for a
LMB changes due to a partition migration or PRRN event. In these
cases the node the LMB exists in may not match the node the device
tree indicates it belongs in. This can lead to a system crash
when trying to DLPAR remove the LMB after a migration or PRRN
event. The current code looks up the node in the device tree to
remove the LMB from, the crash occurs when we try to offline this
node and it does not have any data, i.e. node_data[nid] == NULL.

36:mon> e
cpu 0x36: Vector: 300 (Data Access) at [c001828b7810]
pc: c036d08c: try_offline_node+0x2c/0x1b0
lr: c03a14ec: remove_memory+0xbc/0x110
sp: c001828b7a90
   msr: 8280b033
   dar: 9a28
 dsisr: 4000
  current = 0xc006329c4c80
  paca= 0xc7a55200   softe: 0irq_happened: 0x01
pid   = 76926, comm = kworker/u320:3

36:mon> t
[link register   ] c03a14ec remove_memory+0xbc/0x110
[c001828b7a90] c006a1cc arch_remove_memory+0x9c/0xd0 (unreliable)
[c001828b7ad0] c03a14e0 remove_memory+0xb0/0x110
[c001828b7b20] c00c7db4 dlpar_remove_lmb+0x94/0x160
[c001828b7b60] c00c8ef8 dlpar_memory+0x7e8/0xd10
[c001828b7bf0] c00bf828 handle_dlpar_errorlog+0xf8/0x160
[c001828b7c60] c00bf8cc pseries_hp_work_fn+0x3c/0xa0
[c001828b7c90] c0128cd8 process_one_work+0x298/0x5a0
[c001828b7d20] c0129068 worker_thread+0x88/0x620
[c001828b7dc0] c013223c kthread+0x1ac/0x1c0
[c001828b7e30] c000b45c ret_from_kernel_thread+0x5c/0x80

To resolve this we need to track the node a LMB belongs to when
it is added to the system so we can remove it from that node instead
of the node that the device tree indicates it should belong to.

Signed-off-by: Nathan Fontenot 
---

Updates:
  V2: Fix builds to only track nid for LMBs when MEMORY_HOTPLUG
  is configured.

 arch/powerpc/include/asm/drmem.h|   21 +
 arch/powerpc/mm/drmem.c |6 +-
 arch/powerpc/platforms/pseries/hotplug-memory.c |   17 -
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 7c1d8e74b25d..7f3279b014db 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -17,6 +17,9 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
+#ifdef CONFIG_MEMORY_HOTPLUG
+   int nid;
+#endif
 };
 
 struct drmem_lmb_info {
@@ -104,4 +107,22 @@ static inline void 
invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0x;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+   lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+   lmb->nid = -1;
+}
+#else
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+}
+#endif
+
 #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f1803672c9b..641891df2046 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
 
-   for_each_drmem_lmb(lmb)
+   for_each_drmem_lmb(lmb) {
read_drconf_v1_cell(lmb, );
+   lmb_set_nid(lmb);
+   }
 }
 
 static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
+
+   lmb_set_nid(lmb);
}
}
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9a15d39995e5..58feca665b8c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -382,7 +382,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
unsigned long block_sz;
-   int nid, rc;
+   int rc;
 
if (!lmb_is_removable(lmb))
return -EINVAL;
@@ -392,14 +392,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
 
block_sz = pseries_memory_block_size();
-   nid = memory_add_physaddr_to_nid(lmb->base_addr);
 
-   remove_memory(nid, lmb->base_addr, block_sz);
+   remove_memory(lmb->nid, lmb->base_addr, block_

Re: [PATCH v2] powerpc/rtas: Fix a potential race between CPU-Offline & Migration

2018-10-01 Thread Nathan Fontenot
On 10/01/2018 05:40 AM, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" 
> 
> Live Partition Migrations require all the present CPUs to execute the
> H_JOIN call, and hence rtas_ibm_suspend_me() onlines any offline CPUs
> before initiating the migration for this purpose.
> 
> The commit 85a88cabad57
> ("powerpc/pseries: Disable CPU hotplug across migrations")
> disables any CPU-hotplug operations once all the offline CPUs are
> brought online to prevent any further state change. Once the
> CPU-Hotplug operation is disabled, the code assumes that all the CPUs
> are online.
> 
> However, there is a minor window in rtas_ibm_suspend_me() between
> onlining the offline CPUs and disabling CPU-Hotplug when a concurrent
> CPU-offline operations initiated by the userspace can succeed thereby
> nullifying the the aformentioned assumption. In this unlikely case
> these offlined CPUs will not call H_JOIN, resulting in a system hang.
> 
> Fix this by verifying that all the present CPUs are actually online
> after CPU-Hotplug has been disabled, failing which we restore the
> state of the offline CPUs in rtas_ibm_suspend_me() and return an
> -EBUSY.
> 
> Cc: Nathan Fontenot 
> Cc: Tyrel Datwyler 
> Suggested-by: Michael Ellerman 
> Signed-off-by: Gautham R. Shenoy 

Reviewed-by: Nathan Fontenot 

> ---
> v2: Restore the state of the offline CPUs if all CPUs aren't onlined.
> 
>  arch/powerpc/kernel/rtas.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index 2c7ed31..d4468cb 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -982,6 +982,15 @@ int rtas_ibm_suspend_me(u64 handle)
>   }
> 
>   cpu_hotplug_disable();
> +
> + /* Check if we raced with a CPU-Offline Operation */
> + if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
> + pr_err("%s: Raced against a concurrent CPU-Offline\n",
> +__func__);
> + atomic_set(, -EBUSY);
> + goto out_hotplug_enable;
> + }
> +
>   stop_topology_update();
> 
>   /* Call function on all CPUs.  One of us will make the
> @@ -996,6 +1005,8 @@ int rtas_ibm_suspend_me(u64 handle)
>   printk(KERN_ERR "Error doing global join\n");
> 
>   start_topology_update();
> +
> +out_hotplug_enable:
>   cpu_hotplug_enable();
> 
>   /* Take down CPUs not online prior to suspend */
> 



Re: [PATCH] powerpc/rtas: Fix a potential race between CPU-Offline & Migration

2018-09-28 Thread Nathan Fontenot
On 09/28/2018 02:02 AM, Gautham R Shenoy wrote:
> Hi Nathan,
> 
> On Thu, Sep 27, 2018 at 12:31:34PM -0500, Nathan Fontenot wrote:
>> On 09/27/2018 11:51 AM, Gautham R. Shenoy wrote:
>>> From: "Gautham R. Shenoy" 
>>>
>>> Live Partition Migrations require all the present CPUs to execute the
>>> H_JOIN call, and hence rtas_ibm_suspend_me() onlines any offline CPUs
>>> before initiating the migration for this purpose.
>>>
>>> The commit 85a88cabad57
>>> ("powerpc/pseries: Disable CPU hotplug across migrations")
>>> disables any CPU-hotplug operations once all the offline CPUs are
>>> brought online to prevent any further state change. Once the
>>> CPU-Hotplug operation is disabled, the code assumes that all the CPUs
>>> are online.
>>>
>>> However, there is a minor window in rtas_ibm_suspend_me() between
>>> onlining the offline CPUs and disabling CPU-Hotplug when a concurrent
>>> CPU-offline operations initiated by the userspace can succeed thereby
>>> nullifying the the aformentioned assumption. In this unlikely case
>>> these offlined CPUs will not call H_JOIN, resulting in a system hang.
>>>
>>> Fix this by verifying that all the present CPUs are actually online
>>> after CPU-Hotplug has been disabled, failing which we return from
>>> rtas_ibm_suspend_me() with -EBUSY.
>>
>> Would we also want to havr the ability to re-try onlining all of the cpus
>> before failing the migration?
> 
> Given that we haven't been able to hit issue in practice after your
> fix to disable CPU Hotplug after migrations, it indicates that the
> race-window, if it is not merely a theoretical one, is extremely
> narrow. So, this current patch addresses the safety aspect, as in,
> should someone manage to exploit this narrow race-window, it ensures
> that the system doesn't go to a hang state.
> 
> Having the ability to retry onlining all the CPUs is only required for
> progress of LPM in this rarest of cases. We should add the code to
> retry onlining the CPUs if the consequence of failing an LPM is high,
> even in this rarest of case. Otherwise IMHO we should be ok not adding
> the additional code.

I believe you're correct. One small update to the patch below...

> 
>>
>> This would involve a bigger code change as the current code to online all
>> CPUs would work in its current form.
>>
>> -Nathan
>>
>>>
>>> Cc: Nathan Fontenot 
>>> Cc: Tyrel Datwyler 
>>> Suggested-by: Michael Ellerman 
>>> Signed-off-by: Gautham R. Shenoy 
>>> ---
>>>  arch/powerpc/kernel/rtas.c | 10 ++
>>>  1 file changed, 10 insertions(+)
>>>
>>> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
>>> index 2c7ed31..27f6fd3 100644
>>> --- a/arch/powerpc/kernel/rtas.c
>>> +++ b/arch/powerpc/kernel/rtas.c
>>> @@ -982,6 +982,16 @@ int rtas_ibm_suspend_me(u64 handle)
>>> }
>>>
>>> cpu_hotplug_disable();
>>> +
>>> +   /* Check if we raced with a CPU-Offline Operation */
>>> +   if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
>>> +   pr_err("%s: Raced against a concurrent CPU-Offline\n",
>>> +  __func__);
>>> +   atomic_set(, -EBUSY);
>>> +   cpu_hotplug_enable();

Before returning, we return all CPUs that were offline prior to the migration
back to the offline state. We should be doing that here as well. This should
be as simple as adding a call to rtas_offline_cpus_mask() here.

-Nathan

>>> +   goto out;
>>> +   }
>>> +
>>> stop_topology_update();
>>>
>>> /* Call function on all CPUs.  One of us will make the
>>>



Re: [PATCH -next] powerpc/pseries/memory-hotplug: Fix return value type of find_aa_index

2018-09-28 Thread Nathan Fontenot
On 09/21/2018 05:37 AM, YueHaibing wrote:
> find_aa_index will return -1 when dlpar_clone_property fails,
> its return value type should be int. Also the caller
> update_lmb_associativity_index should use a int variable to
> get it,then compared with 0.

The aa_index that we are handling here is defined as an unsigned value
in the PAPR so I'm a little hesitant in changing it to a signed value.
Also, changing the aa_index to be signed, we still assign it to the
u32 lmb->aa_index.

There are some other places where the aa_index is treated as a signed value
in finc_aa_index(). Perhaps the better solution is use an rc value to track
the validation of finding the aa_index instead of the aa_index value itself.

-Nathan 

> 
> Fixes: c05a5a40969e ("powerpc/pseries: Dynamic add entires to associativity 
> lookup array")
> Signed-off-by: YueHaibing 
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 9a15d39..6aad17c 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -101,13 +101,12 @@ static struct property *dlpar_clone_property(struct 
> property *prop,
>   return new_prop;
>  }
> 
> -static u32 find_aa_index(struct device_node *dr_node,
> +static int find_aa_index(struct device_node *dr_node,
>struct property *ala_prop, const u32 *lmb_assoc)
>  {
>   u32 *assoc_arrays;
> - u32 aa_index;
>   int aa_arrays, aa_array_entries, aa_array_sz;
> - int i, index;
> + int i, index, aa_index;
> 
>   /*
>* The ibm,associativity-lookup-arrays property is defined to be
> @@ -168,7 +167,7 @@ static int update_lmb_associativity_index(struct 
> drmem_lmb *lmb)
>   struct device_node *parent, *lmb_node, *dr_node;
>   struct property *ala_prop;
>   const u32 *lmb_assoc;
> - u32 aa_index;
> + int aa_index;
> 
>   parent = of_find_node_by_path("/");
>   if (!parent)
> 



Re: [PATCH] powerpc/rtas: Fix a potential race between CPU-Offline & Migration

2018-09-27 Thread Nathan Fontenot
On 09/27/2018 11:51 AM, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" 
> 
> Live Partition Migrations require all the present CPUs to execute the
> H_JOIN call, and hence rtas_ibm_suspend_me() onlines any offline CPUs
> before initiating the migration for this purpose.
> 
> The commit 85a88cabad57
> ("powerpc/pseries: Disable CPU hotplug across migrations")
> disables any CPU-hotplug operations once all the offline CPUs are
> brought online to prevent any further state change. Once the
> CPU-Hotplug operation is disabled, the code assumes that all the CPUs
> are online.
> 
> However, there is a minor window in rtas_ibm_suspend_me() between
> onlining the offline CPUs and disabling CPU-Hotplug when a concurrent
> CPU-offline operations initiated by the userspace can succeed thereby
> nullifying the the aformentioned assumption. In this unlikely case
> these offlined CPUs will not call H_JOIN, resulting in a system hang.
> 
> Fix this by verifying that all the present CPUs are actually online
> after CPU-Hotplug has been disabled, failing which we return from
> rtas_ibm_suspend_me() with -EBUSY.

Would we also want to havr the ability to re-try onlining all of the cpus
before failing the migration?

This would involve a bigger code change as the current code to online all
CPUs would work in its current form.

-Nathan

> 
> Cc: Nathan Fontenot 
> Cc: Tyrel Datwyler 
> Suggested-by: Michael Ellerman 
> Signed-off-by: Gautham R. Shenoy 
> ---
>  arch/powerpc/kernel/rtas.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index 2c7ed31..27f6fd3 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -982,6 +982,16 @@ int rtas_ibm_suspend_me(u64 handle)
>   }
> 
>   cpu_hotplug_disable();
> +
> + /* Check if we raced with a CPU-Offline Operation */
> + if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
> + pr_err("%s: Raced against a concurrent CPU-Offline\n",
> +__func__);
> + atomic_set(, -EBUSY);
> + cpu_hotplug_enable();
> + goto out;
> + }
> +
>   stop_topology_update();
> 
>   /* Call function on all CPUs.  One of us will make the
> 



Re: [PATCH] powerpc/pseries: Track LMB nid instead of using device tree

2018-09-24 Thread Nathan Fontenot
On 09/19/2018 11:38 PM, Michael Ellerman wrote:
> Nathan Fontenot  writes:
> 
>> When removing memory we need to remove the memory from the node
>> it was added to instead of looking up the node it should be in
>> in the device tree.
>>
>> During testing we have seen scenarios where the affinity for a
>> LMB changes due to a partition migration or PRRN event. In these
>> cases the node the LMB exists in may not match the node the device
>> tree indicates it belongs in. This can lead to a system crash
>> when trying to DLAPR remove the LMB after a migration or PRRN
>> event. The current code looks up the node in the device tree to
>> remove the LMB from, the crash occurs when we try to offline this
>> node and it does not have any data, i.e. node_data[nid] == NULL.
> 
> This isn't building for 32-bit etc:
> 
> arch/powerpc/mm/drmem.c: In function 'init_drmem_v1_lmbs':
> arch/powerpc/mm/drmem.c:371:14: error: implicit declaration of function 
> 'memory_add_physaddr_to_nid' [-Werror=implicit-function-declaration]
>lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
>   ^
> cc1: all warnings being treated as errors
> scripts/Makefile.build:317: recipe for target 'arch/powerpc/mm/drmem.o' failed
> 
> See the failed checks here:
>   https://patchwork.ozlabs.org/patch/969150/
> 
> 
> Probably drmem.c should only be compiled for 64-bit NUMA etc.

Looks like the root cause is that memory hotplug relies on sparsemem which
is not supported on 32-bit.

This patch is also going to need a refresh to apply cleanly due to other
patches that have gone in. I'll re-submit after looking at the build break 
issues more.

-Nathan

> 
> cheers
> 



Re: [PATCH] powerpc/pseries: Disable CPU hotplug across migrations

2018-09-24 Thread Nathan Fontenot
On 09/24/2018 03:56 AM, Gautham R Shenoy wrote:
> Hi Michael,
> 
> On Mon, Sep 24, 2018 at 05:00:42PM +1000, Michael Ellerman wrote:
>> Nathan Fontenot  writes:
>>> On 09/18/2018 05:32 AM, Gautham R Shenoy wrote:
>>>> Hi Nathan,
>>>> On Tue, Sep 18, 2018 at 1:05 AM Nathan Fontenot
>>>>  wrote:
>>>>>
>>>>> When performing partition migrations all present CPUs must be online
>>>>> as all present CPUs must make the H_JOIN call as part of the migration
>>>>> process. Once all present CPUs make the H_JOIN call, one CPU is returned
>>>>> to make the rtas call to perform the migration to the destination system.
>>>>>
>>>>> During testing of migration and changing the SMT state we have found
>>>>> instances where CPUs are offlined, as part of the SMT state change,
>>>>> before they make the H_JOIN call. This results in a hung system where
>>>>> every CPU is either in H_JOIN or offline.
>>>>>
>>>>> To prevent this this patch disables CPU hotplug during the migration
>>>>> process.
>>>>>
>>>>> Signed-off-by: Nathan Fontenot 
>>>>> ---
>>>>>  arch/powerpc/kernel/rtas.c |2 ++
>>>>>  1 file changed, 2 insertions(+)
>>>>>
>>>>> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
>>>>> index 8afd146bc9c7..2c7ed31c736e 100644
>>>>> --- a/arch/powerpc/kernel/rtas.c
>>>>> +++ b/arch/powerpc/kernel/rtas.c
>>>>> @@ -981,6 +981,7 @@ int rtas_ibm_suspend_me(u64 handle)
>>>>> goto out;
>>>>> }
>>>>>
>>>>> +   cpu_hotplug_disable();
>>>>
>>>> So, some of the onlined CPUs ( via
>>>> rtas_online_cpus_mask(offline_mask);) can go still offline,
>>>> if the userspace issues an offline command, just before we execute
>>>> cpu_hotplug_disable().
>>>>
>>>> So we are narrowing down the race, but it still exists. Am I missing 
>>>> something ?
>>>
>>> You're correct, this narrows the window in which a CPU can go offline.
>>>
>>> In testing with this patch we have not been able to re-create the failure 
>>> but
>>> there is still a small window.
>>
>> Well let's close it.
>>
>> We just need to check that all present CPUs are online after we've
>> called cpu_hotplug_disable() don't we?
> 
> Yes. However, we cannot use the cpu_up() API to bring the offline CPUs
> online, since will return with an -EBUSY if CPU-Hotplug has been
> disabled. _cpu_up() works, but it is (understandably) a static
> function in kernel/cpu.c
> 
> So, we might need a new APIs along the lines of
> disable_nonboot_cpus()/enable_nonboot_cpus() 
> that is currently being used by the suspend subsystem, only that we
> would need the APIs to
>   - Disable hotplug and online all the CPUs in an atomic
>   fashion. Would be good if the API returns the cpumask of CPUs
>   which were offline, which were brought online by this API.
> 
>   - Restore the state of the machine by offlining the CPUs which
>   we brought online, and enable hotplug again. 
>   

There is already code in the LPM path that saves a cpu mask of the offline
cpus prior to bringing them all online so we can offline them again after
the migration.

The missing piece to fully close the window is an API that will allow us to
online cpus while cpu hotplug is disabled.

Since we have not been able to re-create the failure with this patch would
it be ok to pull in this patch while other options are explored?

-Nathan

>>
>> cheers
>>



Re: [PATCH] powerpc/pseries: Disable CPU hotplug across migrations

2018-09-20 Thread Nathan Fontenot
On 09/18/2018 05:32 AM, Gautham R Shenoy wrote:
> Hi Nathan,
> On Tue, Sep 18, 2018 at 1:05 AM Nathan Fontenot
>  wrote:
>>
>> When performing partition migrations all present CPUs must be online
>> as all present CPUs must make the H_JOIN call as part of the migration
>> process. Once all present CPUs make the H_JOIN call, one CPU is returned
>> to make the rtas call to perform the migration to the destination system.
>>
>> During testing of migration and changing the SMT state we have found
>> instances where CPUs are offlined, as part of the SMT state change,
>> before they make the H_JOIN call. This results in a hung system where
>> every CPU is either in H_JOIN or offline.
>>
>> To prevent this this patch disables CPU hotplug during the migration
>> process.
>>
>> Signed-off-by: Nathan Fontenot 
>> ---
>>  arch/powerpc/kernel/rtas.c |2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
>> index 8afd146bc9c7..2c7ed31c736e 100644
>> --- a/arch/powerpc/kernel/rtas.c
>> +++ b/arch/powerpc/kernel/rtas.c
>> @@ -981,6 +981,7 @@ int rtas_ibm_suspend_me(u64 handle)
>> goto out;
>> }
>>
>> +   cpu_hotplug_disable();
> 
> So, some of the onlined CPUs ( via
> rtas_online_cpus_mask(offline_mask);) can go still offline,
> if the userspace issues an offline command, just before we execute
> cpu_hotplug_disable().
> 
> So we are narrowing down the race, but it still exists. Am I missing 
> something ?

You're correct, this narrows the window in which a CPU can go offline.

In testing with this patch we have not been able to re-create the failure but
there is still a small window.

-Nathan

> 
>> stop_topology_update();
>>
>> /* Call function on all CPUs.  One of us will make the
>> @@ -995,6 +996,7 @@ int rtas_ibm_suspend_me(u64 handle)
>> printk(KERN_ERR "Error doing global join\n");
>>
>> start_topology_update();
>> +   cpu_hotplug_enable();
>>
>> /* Take down CPUs not online prior to suspend */
>> cpuret = rtas_offline_cpus_mask(offline_mask);
>>
> 
> 



[PATCH] powerpc/pseries: Disable CPU hotplug across migrations

2018-09-17 Thread Nathan Fontenot
When performing partition migrations all present CPUs must be online
as all present CPUs must make the H_JOIN call as part of the migration
process. Once all present CPUs make the H_JOIN call, one CPU is returned
to make the rtas call to perform the migration to the destination system.

During testing of migration and changing the SMT state we have found
instances where CPUs are offlined, as part of the SMT state change,
before they make the H_JOIN call. This results in a hung system where
every CPU is either in H_JOIN or offline.

To prevent this this patch disables CPU hotplug during the migration
process.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/kernel/rtas.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8afd146bc9c7..2c7ed31c736e 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -981,6 +981,7 @@ int rtas_ibm_suspend_me(u64 handle)
goto out;
}
 
+   cpu_hotplug_disable();
stop_topology_update();
 
/* Call function on all CPUs.  One of us will make the
@@ -995,6 +996,7 @@ int rtas_ibm_suspend_me(u64 handle)
printk(KERN_ERR "Error doing global join\n");
 
start_topology_update();
+   cpu_hotplug_enable();
 
/* Take down CPUs not online prior to suspend */
cpuret = rtas_offline_cpus_mask(offline_mask);



[PATCH] powerpc/pseries: Track LMB nid instead of using device tree

2018-09-12 Thread Nathan Fontenot
When removing memory we need to remove the memory from the node
it was added to instead of looking up the node it should be in
in the device tree.

During testing we have seen scenarios where the affinity for a
LMB changes due to a partition migration or PRRN event. In these
cases the node the LMB exists in may not match the node the device
tree indicates it belongs in. This can lead to a system crash
when trying to DLAPR remove the LMB after a migration or PRRN
event. The current code looks up the node in the device tree to
remove the LMB from, the crash occurs when we try to offline this
node and it does not have any data, i.e. node_data[nid] == NULL.

36:mon> e
cpu 0x36: Vector: 300 (Data Access) at [c001828b7810]
pc: c036d08c: try_offline_node+0x2c/0x1b0
lr: c03a14ec: remove_memory+0xbc/0x110
sp: c001828b7a90
   msr: 8280b033
   dar: 9a28
 dsisr: 4000
  current = 0xc006329c4c80
  paca= 0xc7a55200   softe: 0irq_happened: 0x01
pid   = 76926, comm = kworker/u320:3

36:mon> t
[link register   ] c03a14ec remove_memory+0xbc/0x110
[c001828b7a90] c006a1cc arch_remove_memory+0x9c/0xd0 (unreliable)
[c001828b7ad0] c03a14e0 remove_memory+0xb0/0x110
[c001828b7b20] c00c7db4 dlpar_remove_lmb+0x94/0x160
[c001828b7b60] c00c8ef8 dlpar_memory+0x7e8/0xd10
[c001828b7bf0] c00bf828 handle_dlpar_errorlog+0xf8/0x160
[c001828b7c60] c00bf8cc pseries_hp_work_fn+0x3c/0xa0
[c001828b7c90] c0128cd8 process_one_work+0x298/0x5a0
[c001828b7d20] c0129068 worker_thread+0x88/0x620
[c001828b7dc0] c013223c kthread+0x1ac/0x1c0
[c001828b7e30] c000b45c ret_from_kernel_thread+0x5c/0x80

To resolve this we need to track the node a LMB belongs to when
it is added to the system so we can remove it from that node instead
of the node that the device tree indicates it should belong to.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/include/asm/drmem.h|1 +
 arch/powerpc/mm/drmem.c |7 ++-
 arch/powerpc/platforms/pseries/hotplug-memory.c |   18 +-
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index ce242b9ea8c6..d099123f4f26 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -17,6 +17,7 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
+   int nid;
 };
 
 struct drmem_lmb_info {
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f1803672c9b..2baf00945b33 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -366,8 +366,11 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
 
-   for_each_drmem_lmb(lmb)
+   for_each_drmem_lmb(lmb) {
read_drconf_v1_cell(lmb, );
+   lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
+   }
+
 }
 
 static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +415,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
 
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
+
+   lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
}
}
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c1578f54c626..fc3a1e2d98b8 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -221,6 +221,9 @@ static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
 
lmb->aa_index = aa_index;
 
+   /* Find the node id for this address */
+   lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
+
rtas_hp_event = true;
rc = drmem_update_dt();
rtas_hp_event = false;
@@ -234,6 +237,7 @@ static int dlpar_remove_device_tree_lmb(struct drmem_lmb 
*lmb)
 
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
lmb->aa_index = 0x;
+   lmb->nid = -1;
 
rtas_hp_event = true;
rc = drmem_update_dt();
@@ -411,7 +415,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
unsigned long block_sz;
-   int nid, rc;
+   int rc;
 
if (!lmb_is_removable(lmb))
return -EINVAL;
@@ -421,9 +425,8 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
 
block_sz = pseries_memory_block_size();
-   nid = memory_add_physaddr_to_nid(lmb->base_addr);
 
-   remove_memory(nid, lmb->base_addr, block_sz);
+   remove_memory(lmb->nid, lmb->base_addr, block_sz);
 
/* Update memory region

[PATCH v5 2/2] powerpc/pseries:Remove unneeded uses of dlpar work queue

2018-09-10 Thread Nathan Fontenot
There are three instances in which dlpar hotplug events are invoked;
handling a hotplug interrupt (in a kvm guest), handling a dlpar
request through sysfs, and updating LMB affinity when handling a
PRRN event. Only in the case of handling a hotplug interrupt do we
have to put the work on a workqueue, the other cases can handle the
dlpar request directly.

This patch exports the handle_dlpar_errorlog() function so that
dlpar hotplug events can be handled directly and updates the two
instances mentioned above to use the direct invocation.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/platforms/pseries/dlpar.c|   37 +++--
 arch/powerpc/platforms/pseries/mobility.c |   18 +-
 arch/powerpc/platforms/pseries/pseries.h  |5 ++--
 arch/powerpc/platforms/pseries/ras.c  |2 +-
 4 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c03f078..052c4f2ba0a0 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -32,8 +32,6 @@ static struct workqueue_struct *pseries_hp_wq;
 struct pseries_hp_work {
struct work_struct work;
struct pseries_hp_errorlog *errlog;
-   struct completion *hp_completion;
-   int *rc;
 };
 
 struct cc_workarea {
@@ -329,7 +327,7 @@ int dlpar_release_drc(u32 drc_index)
return 0;
 }
 
-static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
int rc;
 
@@ -371,20 +369,13 @@ static void pseries_hp_work_fn(struct work_struct *work)
struct pseries_hp_work *hp_work =
container_of(work, struct pseries_hp_work, work);
 
-   if (hp_work->rc)
-   *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog);
-   else
-   handle_dlpar_errorlog(hp_work->errlog);
-
-   if (hp_work->hp_completion)
-   complete(hp_work->hp_completion);
+   handle_dlpar_errorlog(hp_work->errlog);
 
kfree(hp_work->errlog);
kfree((void *)work);
 }
 
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
-struct completion *hotplug_done, int *rc)
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
 {
struct pseries_hp_work *work;
struct pseries_hp_errorlog *hp_errlog_copy;
@@ -397,13 +388,9 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog,
if (work) {
INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
work->errlog = hp_errlog_copy;
-   work->hp_completion = hotplug_done;
-   work->rc = rc;
queue_work(pseries_hp_wq, (struct work_struct *)work);
} else {
-   *rc = -ENOMEM;
kfree(hp_errlog_copy);
-   complete(hotplug_done);
}
 }
 
@@ -521,18 +508,15 @@ static int dlpar_parse_id_type(char **cmd, struct 
pseries_hp_errorlog *hp_elog)
 static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
   const char *buf, size_t count)
 {
-   struct pseries_hp_errorlog *hp_elog;
-   struct completion hotplug_done;
+   struct pseries_hp_errorlog hp_elog;
char *argbuf;
char *args;
int rc;
 
args = argbuf = kstrdup(buf, GFP_KERNEL);
-   hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
-   if (!hp_elog || !argbuf) {
+   if (!argbuf) {
pr_info("Could not allocate resources for DLPAR operation\n");
kfree(argbuf);
-   kfree(hp_elog);
return -ENOMEM;
}
 
@@ -540,25 +524,22 @@ static ssize_t dlpar_store(struct class *class, struct 
class_attribute *attr,
 * Parse out the request from the user, this will be in the form:
 *
 */
-   rc = dlpar_parse_resource(, hp_elog);
+   rc = dlpar_parse_resource(, _elog);
if (rc)
goto dlpar_store_out;
 
-   rc = dlpar_parse_action(, hp_elog);
+   rc = dlpar_parse_action(, _elog);
if (rc)
goto dlpar_store_out;
 
-   rc = dlpar_parse_id_type(, hp_elog);
+   rc = dlpar_parse_id_type(, _elog);
if (rc)
goto dlpar_store_out;
 
-   init_completion(_done);
-   queue_hotplug_event(hp_elog, _done, );
-   wait_for_completion(_done);
+   rc = handle_dlpar_errorlog(_elog);
 
 dlpar_store_out:
kfree(argbuf);
-   kfree(hp_elog);
 
if (rc)
pr_err("Could not handle DLPAR request \"%s\"\n", buf);
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index f0e30dc94988..6f27d00505cf 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platform

[PATCH v5 1/2] powerpc/pseries: Remove prrn_work workqueue

2018-09-10 Thread Nathan Fontenot
When a PRRN event is received we are already running in a worker
thread. Instead of spawning off another worker thread on the prrn_work
workqueue to handle the PRRN event we can just call the PRRN handler
routine directly.

With this update we can also pass the scope variable for the PRRN
event directly to the handler instead of it being a global variable.

This patch fixes the following oops mnessage we are seeing in PRRN testing:

Oops: Bad kernel stack pointer, sig: 6 [#1]
SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver 
nfs lockd grace sunrpc fscache binfmt_misc reiserfs vfat fat rpadlpar_io(X) 
rpaphp(X) tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag 
af_packet xfs libcrc32c dm_service_time ibmveth(X) ses enclosure 
scsi_transport_sas rtc_generic btrfs xor raid6_pq sd_mod ibmvscsi(X) 
scsi_transport_srp ipr(X) libata sg dm_multipath dm_mod scsi_dh_rdac 
scsi_dh_emc scsi_dh_alua scsi_mod autofs4
Supported: Yes, External 54
CPU: 7 PID: 18967 Comm: kworker/u96:0 Tainted: G X 
4.4.126-94.22-default #1
Workqueue: pseries hotplug workque pseries_hp_work_fn
task: c00775367790 ti: c0001ebd4000 task.ti: c0070d14
NIP:  LR: 1fb3d050 CTR: 
REGS: c0001ebd7d40 TRAP: 0700   Tainted: G X  
(4.4.126-94.22-default)
MSR: 800102081000 <41,VEC,ME5  CR: 2802  XER: 20040018   4
CFAR: 1fb3d084 40 419   13
GPR00: 400010007 1400 00041fffe200
GPR04: 00805 1fb15fa8 00050500
GPR08: 0001f40040001  05:5200040002
GPR12: 5c7a05400 c00e89f8 1ed9f668
GPR16: 1fbeff9441fbeff94 1fb545e4 00600060
GPR20: 4  
GPR24: 540001fb3c000  1fb1b040
GPR28: 1fb2400041fb440d8 0008 
NIP [] 5 (null)
LR [1fb3d050] 031fb3d050
Call Trace:4
Instruction dump:  4   5:47 122
  X4XX     
  X5XX  6000 6000 6000 6000
---[ end trace aa5627b04a7d9d6b ]---   3NMI 
watchdog: BUG: soft lockup - CPU#27 stuck for 23s! [kworker/27:0:13903]
Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver 
nfs lockd grace sunrpc fscache binfmt_misc reiserfs vfat fat rpadlpar_io(X) 
rpaphp(X) tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag 
af_packet xfs libcrc32c dm_service_time ibmveth(X) ses enclosure 
scsi_transport_sas rtc_generic btrfs xor raid6_pq sd_mod ibmvscsi(X) 
scsi_transport_srp ipr(X) libata sg dm_multipath dm_mod scsi_dh_rdac 
scsi_dh_emc scsi_dh_alua scsi_mod autofs4
Supported: Yes, External
CPU: 27 PID: 13903 Comm: kworker/27:0 Tainted: G  D  X 
4.4.126-94.22-default #1
Workqueue: events prrn_work_fn
task: c00747cfa390 ti: c0074712c000 task.ti: c0074712c000
NIP: c08002a8 LR: c0090770 CTR: 0032e088
REGS: c0074712f7b0 TRAP: 0901   Tainted: G  D  X  
(4.4.126-94.22-default)
MSR: 80019033   CR: 22482044  XER: 2004
CFAR: c08002c4 SOFTE: 1
GPR00: c0090770 c0074712fa30 c0f09800 c0fa1928 6:02
GPR04: c00775f5e000 fffe 0001 c0f42db8
GPR08: 0001 8007  
GPR12: 800621008318 c7a14400
NIP [c08002a8] _raw_spin_lock+0x68/0xd0
LR [c0090770] mobility_rtas_call+0x50/0x100
Call Trace:595
[c0074712fa60] [c0090770] mobility_rtas_call+0x50/0x100
[c0074712faf0] [c0090b08] pseries_devicetree_update+0xf8/0x530
[c0074712fc20] [c0031ba4] prrn_work_fn+0x34/0x50
[c0074712fc40] [c00e0390] process_one_work+0x1a0/0x4e0
[c0074712fcd0] [c00e0870] worker_thread+0x1a0/0x6105:57   2
[c0074712fd80] [c00e8b18] kthread+0x128/0x150
[c0074712fe30] [c00096f8] ret_from_kernel_thread+0x5c/0x64
Instruction dump:
2c09 40c20010 7d40192d 40c2fff0 7c2004ac 2fa9 40de0018 5:540030   3
e8010010 ebe1fff8 7c0803a6 4e800020 <7c210b78> e92d 89290009 792affe3

Signed-off-by: John Allen 
Signed-off-by: Haren Myneni 
---
v5:
  - Update commit message to include oops message
v4:
  - Remove prrn_work workqueue as suggested by Michael Ellerman
  - Make the PRRN event scope passed in as opposed to a global, suggested
by Michael Ellerman
v3:
  -Scrap the mutex as it only 

[PATCH v5 0/2] powerpc/pseries: Improve serialization of PRRN events

2018-09-10 Thread Nathan Fontenot
Stress testing has uncovered issues with handling continuously queued PRRN
events. Running PRRN events in this way can seriously load the system given
the sheer volume of dlpar actions being handled, eventually resulting
in a system oops (see below). This patchset ensures that PRRN
events are handled more synchronously. It also updates dlpar invocation
so that it can be done directly instead of waiting on a workqueue.

Oops: Bad kernel stack pointer, sig: 6 [#1]
SMP NR_CPUS=2048 NUMA pSeries
Supported: Yes, External 54
CPU: 7 PID: 18967 Comm: kworker/u96:0 Tainted: G X 
4.4.126-94.22-default #1
Workqueue: pseries hotplug workque pseries_hp_work_fn
task: c00775367790 ti: c0001ebd4000 task.ti: c0070d14
NIP:  LR: 1fb3d050 CTR: 
REGS: c0001ebd7d40 TRAP: 0700   Tainted: G X  
(4.4.126-94.22-default)
MSR: 800102081000 <41,VEC,ME5  CR: 2802  XER: 20040018   4
CFAR: 1fb3d084 40 419   13
GPR00: 400010007 1400 00041fffe200 
GPR04: 00805 1fb15fa8 00050500 
GPR08: 0001f40040001  05:5200040002
GPR12: 5c7a05400 c00e89f8 1ed9f668 
GPR16: 1fbeff9441fbeff94 1fb545e4 00600060 
GPR20: 4   
GPR24: 540001fb3c000  1fb1b040 
GPR28: 1fb2400041fb440d8 0008  
NIP [] 5 (null)
LR [1fb3d050] 031fb3d050
Call Trace:4
Instruction dump:  4   5:47 122
  X4XX      
  X5XX  6000 6000 6000 6000 

-Nathan
---

Nathan Fontenot (2):
  powerpc/pseries: Remove prrn_work workqueue
  powerpc/pseries:Remove unneeded uses of dlpar work queue


 arch/powerpc/kernel/rtasd.c   |   17 ++---
 arch/powerpc/platforms/pseries/dlpar.c|   37 +++--
 arch/powerpc/platforms/pseries/mobility.c |   18 +-
 arch/powerpc/platforms/pseries/pseries.h  |5 ++--
 arch/powerpc/platforms/pseries/ras.c  |2 +-
 5 files changed, 22 insertions(+), 57 deletions(-)



[PATCH v4 2/2] powerpc/pseries:Remove unneeded uses of dlpar work queue

2018-08-30 Thread Nathan Fontenot
There are three instances in which dlpar hotplug events are invoked;
handling a hotplug interrupt (in a kvm guest), handling a dlpar
request through sysfs, and updating LMB affinity when handling a
PRRN event. Only in the case of handling a hotplug interrupt do we
have to put the work on a workqueue, the other cases can handle the
dlpar request directly.

This patch exports the handle_dlpar_errorlog() function so that
dlpar hotplug events can be handled directly and updates the two
instances mentioned above to use the direct invocation.

Signed-off-by: Nathan Fontenot 
---
 arch/powerpc/platforms/pseries/dlpar.c|   37 +++--
 arch/powerpc/platforms/pseries/mobility.c |   18 +-
 arch/powerpc/platforms/pseries/pseries.h  |5 ++--
 arch/powerpc/platforms/pseries/ras.c  |2 +-
 4 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c03f078..052c4f2ba0a0 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -32,8 +32,6 @@ static struct workqueue_struct *pseries_hp_wq;
 struct pseries_hp_work {
struct work_struct work;
struct pseries_hp_errorlog *errlog;
-   struct completion *hp_completion;
-   int *rc;
 };
 
 struct cc_workarea {
@@ -329,7 +327,7 @@ int dlpar_release_drc(u32 drc_index)
return 0;
 }
 
-static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
int rc;
 
@@ -371,20 +369,13 @@ static void pseries_hp_work_fn(struct work_struct *work)
struct pseries_hp_work *hp_work =
container_of(work, struct pseries_hp_work, work);
 
-   if (hp_work->rc)
-   *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog);
-   else
-   handle_dlpar_errorlog(hp_work->errlog);
-
-   if (hp_work->hp_completion)
-   complete(hp_work->hp_completion);
+   handle_dlpar_errorlog(hp_work->errlog);
 
kfree(hp_work->errlog);
kfree((void *)work);
 }
 
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
-struct completion *hotplug_done, int *rc)
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
 {
struct pseries_hp_work *work;
struct pseries_hp_errorlog *hp_errlog_copy;
@@ -397,13 +388,9 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog,
if (work) {
INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
work->errlog = hp_errlog_copy;
-   work->hp_completion = hotplug_done;
-   work->rc = rc;
queue_work(pseries_hp_wq, (struct work_struct *)work);
} else {
-   *rc = -ENOMEM;
kfree(hp_errlog_copy);
-   complete(hotplug_done);
}
 }
 
@@ -521,18 +508,15 @@ static int dlpar_parse_id_type(char **cmd, struct 
pseries_hp_errorlog *hp_elog)
 static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
   const char *buf, size_t count)
 {
-   struct pseries_hp_errorlog *hp_elog;
-   struct completion hotplug_done;
+   struct pseries_hp_errorlog hp_elog;
char *argbuf;
char *args;
int rc;
 
args = argbuf = kstrdup(buf, GFP_KERNEL);
-   hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
-   if (!hp_elog || !argbuf) {
+   if (!argbuf) {
pr_info("Could not allocate resources for DLPAR operation\n");
kfree(argbuf);
-   kfree(hp_elog);
return -ENOMEM;
}
 
@@ -540,25 +524,22 @@ static ssize_t dlpar_store(struct class *class, struct 
class_attribute *attr,
 * Parse out the request from the user, this will be in the form:
 *
 */
-   rc = dlpar_parse_resource(, hp_elog);
+   rc = dlpar_parse_resource(, _elog);
if (rc)
goto dlpar_store_out;
 
-   rc = dlpar_parse_action(, hp_elog);
+   rc = dlpar_parse_action(, _elog);
if (rc)
goto dlpar_store_out;
 
-   rc = dlpar_parse_id_type(, hp_elog);
+   rc = dlpar_parse_id_type(, _elog);
if (rc)
goto dlpar_store_out;
 
-   init_completion(_done);
-   queue_hotplug_event(hp_elog, _done, );
-   wait_for_completion(_done);
+   rc = handle_dlpar_errorlog(_elog);
 
 dlpar_store_out:
kfree(argbuf);
-   kfree(hp_elog);
 
if (rc)
pr_err("Could not handle DLPAR request \"%s\"\n", buf);
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index f0e30dc94988..6f27d00505cf 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platform

[PATCH v4 1/2] powerpc/pseries: Remove prrn_work workqueue

2018-08-30 Thread Nathan Fontenot
When a PRRN event is received we are already running in a worker
thread. Instead of spawning off another worker thread on the prrn_work
workqueue to handle the PRRN event we can just call the PRRN handler
routine directly.

With this update we can also pass the scope variable for the PRRN
event directly to the handler instead of it being a global variable.

Signed-off-by: John Allen 
Signed-off-by: Haren Myneni 
---
v4:
  - Remove prrn_work workqueue as suggested by Michael Ellerman
  - Make the PRRN event scope passed in as opposed to a global, suggested
by Michael Ellerman
v3:
  -Scrap the mutex as it only replicates existing workqueue behavior.
v2:
  -Unlock prrn_lock when PRRN operations are complete, not after handler is
   scheduled.
  -Remove call to flush_work, the previous broken method of serializing
   PRRN events.
---
 arch/powerpc/kernel/rtasd.c |   17 +++--
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c33d59d..23b88b923f06 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -274,27 +274,16 @@ void pSeries_log_error(char *buf, unsigned int err_type, 
int fatal)
 }
 
 #ifdef CONFIG_PPC_PSERIES
-static s32 prrn_update_scope;
-
-static void prrn_work_fn(struct work_struct *work)
+static void handle_prrn_event(s32 scope)
 {
/*
 * For PRRN, we must pass the negative of the scope value in
 * the RTAS event.
 */
-   pseries_devicetree_update(-prrn_update_scope);
+   pseries_devicetree_update(-scope);
numa_update_cpu_topology(false);
 }
 
-static DECLARE_WORK(prrn_work, prrn_work_fn);
-
-static void prrn_schedule_update(u32 scope)
-{
-   flush_work(_work);
-   prrn_update_scope = scope;
-   schedule_work(_work);
-}
-
 static void handle_rtas_event(const struct rtas_error_log *log)
 {
if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
@@ -303,7 +292,7 @@ static void handle_rtas_event(const struct rtas_error_log 
*log)
/* For PRRN Events the extended log length is used to denote
 * the scope for calling rtas update-nodes.
 */
-   prrn_schedule_update(rtas_error_extended_log_length(log));
+   handle_prrn_event(rtas_error_extended_log_length(log));
 }
 
 #else



[PATCH v4 0/2] powerpc/pseries: Improve serialization of PRRN events

2018-08-30 Thread Nathan Fontenot
Stress testing has uncovered issues with handling continuously queued PRRN
events. Running PRRN events in this way can seriously load the system given
the sheer volume of dlpar being handled. This patchset ensures that PRRN
events are handled more synchronously. It also updates dlpar invocation
so that it can be done directly instead of waiting on a workqueue.

-Nathan
---
v4:
  -Update patch 1/2 to remove prrn workqueue
  -Replace patch 2/2 to allow for direct dlpar invocation
v3:
  -Scrap the PRRN mutex as it only replicates existing workqueue behavior.
v2:
  -Unlock prrn_lock when PRRN operations are complete, not after handler is
   scheduled.
  -Remove call to flush_work, the previous broken method of serializing
   PRRN events.

Nathan Fontenot (2):
  powerpc/pseries: Remove prrn_work workqueue
  powerpc/pseries:Remove unneeded uses of dlpar work queue


 arch/powerpc/kernel/rtasd.c   |   17 ++---
 arch/powerpc/platforms/pseries/dlpar.c|   37 +++--
 arch/powerpc/platforms/pseries/mobility.c |   18 +-
 arch/powerpc/platforms/pseries/pseries.h  |5 ++--
 arch/powerpc/platforms/pseries/ras.c  |2 +-
 5 files changed, 22 insertions(+), 57 deletions(-)



Re: [PATCH v3 2/2] powerpc/pseries: Wait for completion of hotplug events during PRRN handling

2018-08-09 Thread Nathan Fontenot
On 08/08/2018 10:29 AM, John Allen wrote:
> While handling PRRN events, the time to handle the actual hotplug events
> dwarfs the time it takes to perform the device tree updates and queue the
> hotplug events. In the case that PRRN events are being queued continuously,
> hotplug events have been observed to be queued faster than the kernel can
> actually handle them. This patch avoids the problem by waiting for a
> hotplug request to complete before queueing more hotplug events.
> 
> Signed-off-by: John Allen 

In the V2 thread it was mentioned that we could just call the DLPAR operation
directly instead of going through the workqueue. I have written a patch to do
this that also cleans up some of the request handling.

requests that come through the hotplug interrupt still use the workqueue. The
other requests, PRRN and sysfs, just call the dlpar handler directly. This
eliminates the need for a wait conditional and return code handling in the
workqueue handler and should solve the issue that John solves with his patch.

This still needs testing but wanted to get people's thoughts.

-Nathan

---
 arch/powerpc/platforms/pseries/dlpar.c|   37 +++--
 arch/powerpc/platforms/pseries/mobility.c |   18 +-
 arch/powerpc/platforms/pseries/pseries.h  |5 ++--
 arch/powerpc/platforms/pseries/ras.c  |2 +-
 4 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c03f078..052c4f2ba0a0 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -32,8 +32,6 @@ static struct workqueue_struct *pseries_hp_wq;
 struct pseries_hp_work {
struct work_struct work;
struct pseries_hp_errorlog *errlog;
-   struct completion *hp_completion;
-   int *rc;
 };
 
 struct cc_workarea {
@@ -329,7 +327,7 @@ int dlpar_release_drc(u32 drc_index)
return 0;
 }
 
-static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
int rc;
 
@@ -371,20 +369,13 @@ static void pseries_hp_work_fn(struct work_struct *work)
struct pseries_hp_work *hp_work =
container_of(work, struct pseries_hp_work, work);
 
-   if (hp_work->rc)
-   *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog);
-   else
-   handle_dlpar_errorlog(hp_work->errlog);
-
-   if (hp_work->hp_completion)
-   complete(hp_work->hp_completion);
+   handle_dlpar_errorlog(hp_work->errlog);
 
kfree(hp_work->errlog);
kfree((void *)work);
 }
 
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
-struct completion *hotplug_done, int *rc)
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
 {
struct pseries_hp_work *work;
struct pseries_hp_errorlog *hp_errlog_copy;
@@ -397,13 +388,9 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog,
if (work) {
INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
work->errlog = hp_errlog_copy;
-   work->hp_completion = hotplug_done;
-   work->rc = rc;
queue_work(pseries_hp_wq, (struct work_struct *)work);
} else {
-   *rc = -ENOMEM;
kfree(hp_errlog_copy);
-   complete(hotplug_done);
}
 }
 
@@ -521,18 +508,15 @@ static int dlpar_parse_id_type(char **cmd, struct 
pseries_hp_errorlog *hp_elog)
 static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
   const char *buf, size_t count)
 {
-   struct pseries_hp_errorlog *hp_elog;
-   struct completion hotplug_done;
+   struct pseries_hp_errorlog hp_elog;
char *argbuf;
char *args;
int rc;
 
args = argbuf = kstrdup(buf, GFP_KERNEL);
-   hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
-   if (!hp_elog || !argbuf) {
+   if (!argbuf) {
pr_info("Could not allocate resources for DLPAR operation\n");
kfree(argbuf);
-   kfree(hp_elog);
return -ENOMEM;
}
 
@@ -540,25 +524,22 @@ static ssize_t dlpar_store(struct class *class, struct 
class_attribute *attr,
 * Parse out the request from the user, this will be in the form:
 *
 */
-   rc = dlpar_parse_resource(, hp_elog);
+   rc = dlpar_parse_resource(, _elog);
if (rc)
goto dlpar_store_out;
 
-   rc = dlpar_parse_action(, hp_elog);
+   rc = dlpar_parse_action(, _elog);
if (rc)
goto dlpar_store_out;
 
-   rc = dlpar_parse_id_type(, hp_elog);
+   rc = dlpar_parse_id_type(, _elog);
if (rc)
goto dlpar_store_out;
 
-   init_completion(_done);
-   

Re: [PATCH v07 6/9] pmt/numa: Disable arch_update_cpu_topology during CPU readd

2018-07-24 Thread Nathan Fontenot

On 07/13/2018 03:18 PM, Michael Bringmann wrote:

pmt/numa: Disable arch_update_cpu_topology during post migration
CPU readd updates when evaluating device-tree changes after LPM
to avoid thread deadlocks trying to update node assignments.
System timing between all of the threads and timers restarted in
a migrated system overlapped frequently allowing tasks to start
acquiring resources (get_online_cpus) needed by rebuild_sched_domains.
Defer the operation of that function until after the CPU readd has
completed.

Signed-off-by: Michael Bringmann 
---
  arch/powerpc/platforms/pseries/hotplug-cpu.c |9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 1906ee57..df1791b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -26,6 +26,7 @@
  #include /* for idle_task_exit */
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -684,9 +685,15 @@ static int dlpar_cpu_readd_by_index(u32 drc_index)

pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);

+   arch_update_cpu_topology_suspend();
rc = dlpar_cpu_remove_by_index(drc_index, false);
-   if (!rc)
+   arch_update_cpu_topology_resume();
+
+   if (!rc) {
+   arch_update_cpu_topology_suspend();
rc = dlpar_cpu_add(drc_index, false);
+   arch_update_cpu_topology_resume();
+   }



A couple of questions...Why not disable across the entire remove and add
operations instead of disabling for each operation?

Also, what about other CPU add/remove routines, do they need to do
similar disabling?

-Nathan


if (rc)
pr_info("Failed to update cpu at drc_index %lx\n",





Re: [PATCH v07 2/9] hotplug/cpu: Add operation queuing function

2018-07-23 Thread Nathan Fontenot

On 07/13/2018 03:18 PM, Michael Bringmann wrote:

migration/dlpar: This patch adds function dlpar_queue_action()
which will queued up information about a CPU/Memory 'readd'
operation according to resource type, action code, and DRC index.
At a subsequent point, the list of operations can be run/played
in series.  Examples of such oprations include 'readd' of CPU
and Memory blocks identified as having changed their associativity
during an LPAR migration event. >
Signed-off-by: Michael Bringmann 
---
Changes in patch:
   -- Correct drc_index before adding to pseries_hp_errorlog struct
   -- Correct text of notice
   -- Revise queuing model to save up all of the DLPAR actions for
  later execution.
   -- Restore list init statement missing from patch
   -- Move call to apply queued operations into 'mobility.c'
   -- Compress some code
   -- Rename some of queueing function APIs
   -- Revise implementation to push execution of queued operations
  to a workqueue task.
   -- Cleanup reference to outdated queuing operation.
---
  arch/powerpc/include/asm/rtas.h   |2 +
  arch/powerpc/platforms/pseries/dlpar.c|   61 +
  arch/powerpc/platforms/pseries/mobility.c |4 ++
  arch/powerpc/platforms/pseries/pseries.h  |2 +
  4 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c..4f601c7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
struct { __be32 count, index; } ic;
chardrc_name[1];
} _drc_u;
+   struct list_head list;
  };

  #define PSERIES_HP_ELOG_RESOURCE_CPU  1
  #define PSERIES_HP_ELOG_RESOURCE_MEM  2
  #define PSERIES_HP_ELOG_RESOURCE_SLOT 3
  #define PSERIES_HP_ELOG_RESOURCE_PHB  4
+#define PSERIES_HP_ELOG_RESOURCE_PMT   5

  #define PSERIES_HP_ELOG_ACTION_ADD1
  #define PSERIES_HP_ELOG_ACTION_REMOVE 2
diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c0..7264b8e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,6 +25,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 

  static struct workqueue_struct *pseries_hp_wq;
@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
return 0;
  }

+static int dlpar_pmt(struct pseries_hp_errorlog *work);
+
  static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
  {
int rc;
@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog 
*hp_elog)
case PSERIES_HP_ELOG_RESOURCE_CPU:
rc = dlpar_cpu(hp_elog);
break;
+   case PSERIES_HP_ELOG_RESOURCE_PMT:
+   rc = dlpar_pmt(hp_elog);
+   break;
default:
pr_warn_ratelimited("Invalid resource (%d) specified\n",
hp_elog->resource);
@@ -407,6 +413,61 @@ void queue_hotplug_event(struct pseries_hp_errorlog 
*hp_errlog,
}
  }

+LIST_HEAD(dlpar_delayed_list);
+
+int dlpar_queue_action(int resource, int action, u32 drc_index)
+{
+   struct pseries_hp_errorlog *hp_errlog;
+
+   hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
+   if (!hp_errlog)
+   return -ENOMEM;
+
+   hp_errlog->resource = resource;
+   hp_errlog->action = action;
+   hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+   hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
+
+   list_add_tail(_errlog->list, _delayed_list);
+
+   return 0;
+}
+
+static int dlpar_pmt(struct pseries_hp_errorlog *work)
+{
+   struct list_head *pos, *q;
+
+   ssleep(15);
+
+   list_for_each_safe(pos, q, _delayed_list) {
+   struct pseries_hp_errorlog *tmp;
+
+   tmp = list_entry(pos, struct pseries_hp_errorlog, list);
+   handle_dlpar_errorlog(tmp);
+
+   list_del(pos);
+   kfree(tmp);
+
+   ssleep(10);
+   }
+
+   return 0;
+}
+
+int dlpar_queued_actions_run(void)
+{
+   if (!list_empty(_delayed_list)) {
+   struct pseries_hp_errorlog hp_errlog;
+
+   hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
+   hp_errlog.action = 0;
+   hp_errlog.id_type = 0;
+
+   queue_hotplug_event(_errlog, 0, 0); > +   }
+   return 0;
+}


I'm a bit confused by this. Is there a reason this needs to queue a
hotplug event instead of just walking the list as is done in dlpar_pmt?

-Nathan


+
  static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog 
*hp_elog)
  {
char *arg;
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index f6364d9..d0d1cae 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ 

Re: [PATCH v07 1/9] hotplug/cpu: Conditionally acquire/release DRC index

2018-07-23 Thread Nathan Fontenot

On 07/13/2018 03:17 PM, Michael Bringmann wrote:

powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
skipping of DRC index acquire or release operations during the CPU
add or remove operations.  This is intended to support subsequent
changes to provide a 'CPU readd' operation.

Signed-off-by: Michael Bringmann 
---
Changes in patch:
   -- Move new validity check added to pseries_smp_notifier
  to another patch
   -- Revise one of checks for 'acquire_drc' in dlpar_cpu_add.
   -- Revise one of checks for 'release_drc' in dlpar_cpu_remove.
---
  arch/powerpc/platforms/pseries/hotplug-cpu.c |   71 +++---
  1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77ca..7ede3b0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -432,7 +432,7 @@ static bool valid_cpu_drc_index(struct device_node *parent, 
u32 drc_index)
return found;
  }

-static ssize_t dlpar_cpu_add(u32 drc_index)
+static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
  {
struct device_node *dn, *parent;
int rc, saved_rc;
@@ -457,19 +457,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return -EINVAL;
}

-   rc = dlpar_acquire_drc(drc_index);
-   if (rc) {
-   pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
-   rc, drc_index);
-   of_node_put(parent);
-   return -EINVAL;
+   if (acquire_drc) {
+   rc = dlpar_acquire_drc(drc_index);
+   if (rc) {
+   pr_warn("Failed to acquire DRC, rc: %d, drc index: 
%x\n",
+   rc, drc_index);
+   of_node_put(parent);
+   return -EINVAL;
+   }
}

dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
if (!dn) {
pr_warn("Failed call to configure-connector, drc index: %x\n",
drc_index);
-   dlpar_release_drc(drc_index);
+   if (acquire_drc)
+   dlpar_release_drc(drc_index);
of_node_put(parent);
return -EINVAL;
}
@@ -484,9 +487,11 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
dn->name, rc, drc_index);

-   rc = dlpar_release_drc(drc_index);
-   if (!rc)
-   dlpar_free_cc_nodes(dn);
+   if (acquire_drc) {
+   rc = dlpar_release_drc(drc_index);
+   if (!rc)
+   dlpar_free_cc_nodes(dn);
+   }

return saved_rc;
}
@@ -498,7 +503,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
dn->name, rc, drc_index);

rc = dlpar_detach_node(dn);
-   if (!rc)
+   if (!rc && acquire_drc)
dlpar_release_drc(drc_index);

return saved_rc;
@@ -566,7 +571,8 @@ static int dlpar_offline_cpu(struct device_node *dn)

  }

-static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
+   bool release_drc)
  {
int rc;

@@ -579,12 +585,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, 
u32 drc_index)
return -EINVAL;
}

-   rc = dlpar_release_drc(drc_index);
-   if (rc) {
-   pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
-   drc_index, dn->name, rc);
-   dlpar_online_cpu(dn);
-   return rc;
+   if (release_drc) {
+   rc = dlpar_release_drc(drc_index);
+   if (rc) {
+   pr_warn("Failed to release drc (%x) for CPU %s, rc: 
%d\n",
+   drc_index, dn->name, rc);
+   dlpar_online_cpu(dn);
+   return rc;
+   }
}

rc = dlpar_detach_node(dn);
@@ -593,8 +601,9 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 
drc_index)

pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);

-   rc = dlpar_acquire_drc(drc_index);
-   if (!rc)
+   if (release_drc)
+   rc = dlpar_acquire_drc(drc_index);
+   if (!release_drc || !rc)
dlpar_online_cpu(dn);


This is likely wrong. At this point you're in a if (rc) so rc is already
non-zero. If release_drc is false this checks an invalid rc state.

-Nathan



return saved_rc;
@@ -622,7 +631,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 

Re: [PATCH v2 2/2] powerpc/pseries: Wait for completion of hotplug events during PRRN handling

2018-07-20 Thread Nathan Fontenot

On 07/17/2018 02:40 PM, John Allen wrote:

While handling PRRN events, the time to handle the actual hotplug events
dwarfs the time it takes to perform the device tree updates and queue the
hotplug events. In the case that PRRN events are being queued continuously,
hotplug events have been observed to be queued faster than the kernel can
actually handle them. This patch avoids the problem by waiting for a
hotplug request to complete before queueing more hotplug events.

Signed-off-by: John Allen 


Reviewed-by: Nathan Fontenot 


---
  arch/powerpc/platforms/pseries/mobility.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 8a8033a249c7..49930848fa78 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -242,6 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 
drc_index)
  static void prrn_update_node(__be32 phandle)
  {
struct pseries_hp_errorlog *hp_elog;
+   struct completion hotplug_done;
struct device_node *dn;

/*
@@ -263,7 +264,9 @@ static void prrn_update_node(__be32 phandle)
hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
hp_elog->_drc_u.drc_index = phandle;

-   queue_hotplug_event(hp_elog, NULL, NULL);
+   init_completion(_done);
+   queue_hotplug_event(hp_elog, _done, NULL);
+   wait_for_completion(_done);

kfree(hp_elog);
  }





Re: [PATCH v2 1/2] powerpc/pseries: Avoid blocking rtas polling handling multiple PRRN events

2018-07-20 Thread Nathan Fontenot

On 07/17/2018 02:40 PM, John Allen wrote:

When a PRRN event is being handled and another PRRN event comes in, the
second event will block rtas polling waiting on the first to complete,
preventing any further rtas events from being handled. This can be
especially problematic in case that PRRN events are continuously being
queued in which case rtas polling gets indefinitely blocked completely.

This patch introduces a mutex that prevents any subsequent PRRN events from
running while there is a prrn event being handled, allowing rtas polling to
continue normally.

Signed-off-by: John Allen 


Reviewed-by: Nathan Fontenot 


---
v2:
   -Unlock prrn_lock when PRRN operations are complete, not after handler is
scheduled.
   -Remove call to flush_work, the previous broken method of serializing
PRRN events.
---
  arch/powerpc/kernel/rtasd.c | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 44d66c33d59d..845fc5aec178 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -35,6 +35,8 @@

  static DEFINE_SPINLOCK(rtasd_log_lock);

+static DEFINE_MUTEX(prrn_lock);
+
  static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);

  static char *rtas_log_buf;
@@ -284,15 +286,17 @@ static void prrn_work_fn(struct work_struct *work)
 */
pseries_devicetree_update(-prrn_update_scope);
numa_update_cpu_topology(false);
+   mutex_unlock(_lock);
  }

  static DECLARE_WORK(prrn_work, prrn_work_fn);

  static void prrn_schedule_update(u32 scope)
  {
-   flush_work(_work);
-   prrn_update_scope = scope;
-   schedule_work(_work);
+   if (mutex_trylock(_lock)) {
+   prrn_update_scope = scope;
+   schedule_work(_work);
+   }
  }

  static void handle_rtas_event(const struct rtas_error_log *log)





Re: [powerpc/powervm]kernel BUG at mm/memory_hotplug.c:1864!

2018-06-26 Thread Nathan Fontenot
On 06/12/2018 05:28 AM, Balbir Singh wrote:
> 
> 
> On 11/06/18 17:41, vrbagal1 wrote:
>> On 2018-06-08 17:45, Oscar Salvador wrote:
>>> On Fri, Jun 08, 2018 at 05:11:24PM +0530, vrbagal1 wrote:
 On 2018-06-08 16:58, Oscar Salvador wrote:
> On Fri, Jun 08, 2018 at 04:44:24PM +0530, vrbagal1 wrote:
>> Greetings!!!
>>
>> I am seeing kernel bug followed by oops message and system reboots,
>> while
>> running dlpar memory hotplug test.
>>
>> Machine Details: Power6 PowerVM Platform
>> GCC version: (gcc version 4.8.3 20140911 (Red Hat 4.8.3-7) (GCC))
>> Test case: dlpar memory hotplug test 
>> (https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/memhotplug.py)
>> Kernel Version: Linux version 4.17.0-autotest
>>
>> I am seeing this bug on rc7 as well.
>>
>> Observing similar traces on linux next kernel: 4.17.0-next-20180608-autotest
>>
>>  Block size [0x400] unaligned hotplug range: start 0x22000, size 
>> 0x100
> 
> size < block_size in this case, why? how? Could you confirm that the block 
> size is 64MB and your trying to remove 16MB
> 

I was not able to re-create this failure exactly ( I don't have a Power6 system)
but was able to get a similar re-create on a Power 9 with a few modifications.

I think the issue you're seeing is due to a change in the validation of memory
done in remove_memory to ensure the amount of memory being removed spans
entire memory block. The pseries memory remove code, see 
pseries_remove_memblock,
tries to remove each section of a memory block instead of the entire memory 
block.

Could you try the patch below that updates the pseries code to remove the entire
memory block instead of doing it one section at a time.

-Nathan
---

 arch/powerpc/platforms/pseries/hotplug-memory.c |   18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c1578f54c626..6072efc793e1 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -316,11 +316,11 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb)
return dlpar_change_lmb_state(lmb, false);
 }
 
-static int pseries_remove_memblock(unsigned long base, unsigned int 
memblock_size)
+static int pseries_remove_memblock(unsigned long base,
+  unsigned int memblock_sz)
 {
-   unsigned long block_sz, start_pfn;
-   int sections_per_block;
-   int i, nid;
+   unsigned long start_pfn;
+   int nid;
 
start_pfn = base >> PAGE_SHIFT;
 
@@ -329,18 +329,12 @@ static int pseries_remove_memblock(unsigned long base, 
unsigned int memblock_siz
if (!pfn_valid(start_pfn))
goto out;
 
-   block_sz = pseries_memory_block_size();
-   sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
nid = memory_add_physaddr_to_nid(base);
-
-   for (i = 0; i < sections_per_block; i++) {
-   remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
-   base += MIN_MEMORY_BLOCK_SIZE;
-   }
+   remove_memory(nid, base, memblock_sz);
 
 out:
/* Update memory regions for memory remove */
-   memblock_remove(base, memblock_size);
+   memblock_remove(base, memblock_sz);
unlock_device_hotplug();
return 0;
 }



Re: [RFC v4 4/4] hotplug/drcinfo: Code cleanup for devices

2018-05-22 Thread Nathan Fontenot
On 05/22/2018 11:37 AM, Michael Bringmann wrote:
> This patch extends the use of a common parse function for the
> ibm,drc-info property that can be modified by a callback function
> to the hotplug device processing.  Candidate code is replaced by
> a call to the parser including a pointer to a local context-specific
> functions, and local data.
> 
> In addition, several more opportunities to compress and reuse
> common code between the old and new property parsers were applied.
> 
> Finally, a bug with the registration of slots was observed on some
> systems, and the code was rewritten to prevent its reoccurrence.
> 
> Signed-off-by: Michael Bringmann 
> Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info 
> firmwar
> e feature" -- end of patch series applied to powerpc next)
> ---
> Changes in V4:
>   -- Update code to account for latest kernel checkins.
>   -- Fix bug searching for virtual device slots.
>   -- Rebased to 4.17-rc5 kernel
>   -- Patch cleanup
> ---
>  drivers/pci/hotplug/rpaphp_core.c |  181 
> ++---
>  1 file changed, 126 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/rpaphp_core.c 
> b/drivers/pci/hotplug/rpaphp_core.c
> index 435c1a0..dc4ec68 100644
> --- a/drivers/pci/hotplug/rpaphp_core.c
> +++ b/drivers/pci/hotplug/rpaphp_core.c
> @@ -222,47 +222,51 @@ static int rpaphp_check_drc_props_v1(struct device_node 
> *dn, char *drc_name,
>   return -EINVAL;
>  }
> 
> -static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
> - char *drc_type, unsigned int my_index)
> +struct check_drc_props_v2_struct {
> + char *drc_name;
> + char *drc_type;
> + unsigned int my_index;
> +};
> +
> +static int check_drc_props_v2_cb(struct of_drc_info *drc, void *idata,
> + void *not_used, int *ret_code)
>  {
> - struct property *info;
> - unsigned int entries;
> - struct of_drc_info drc;
> - const __be32 *value;
> + struct check_drc_props_v2_struct *cdata = idata;
>   char cell_drc_name[MAX_DRC_NAME_LEN];
> - int j, fndit;
> 
> - info = of_find_property(dn->parent, "ibm,drc-info", NULL);
> - if (info == NULL)
> - return -EINVAL;
> + (*ret_code) = -EINVAL;
> 
> - value = info->value;
> - entries = of_read_number(value++, 1);
> -
> - for (j = 0; j < entries; j++) {
> - of_read_drc_info_cell(, , );
> -
> - /* Should now know end of current entry */
> -
> - if (my_index > drc.last_drc_index)
> - continue;
> + if (cdata->my_index > drc->last_drc_index)
> + return 0;
> 
> - fndit = 1;
> - break;
> + /* Found drc_index.  Now match the rest. */
> + sprintf(cell_drc_name, "%s%d", drc->drc_name_prefix,
> + cdata->my_index - drc->drc_index_start +
> + drc->drc_name_suffix_start);
> +
> + if (((cdata->drc_name == NULL) ||
> +  (cdata->drc_name && !strcmp(cdata->drc_name, cell_drc_name))) &&
> + ((cdata->drc_type == NULL) ||
> +  (cdata->drc_type && !strcmp(cdata->drc_type, drc->drc_type {
> + (*ret_code) = 0;
> + return 1;
>   }
> - /* Found it */
> 
> - if (fndit)
> - sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix, 
> - my_index);
> + return 0;
> +}
> 
> - if (((drc_name == NULL) ||
> -  (drc_name && !strcmp(drc_name, cell_drc_name))) &&
> - ((drc_type == NULL) ||
> -  (drc_type && !strcmp(drc_type, drc.drc_type
> - return 0;
> +static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
> + char *drc_type, unsigned int my_index)
> +{
> + struct device_node *root = dn;
> + struct check_drc_props_v2_struct cdata = {
> + drc_name, drc_type, be32_to_cpu(my_index) };
> 
> - return -EINVAL;
> + if (!drc_type || (drc_type && strcmp(drc_type, "SLOT")))
> + root = dn->parent;
> +
> + return drc_info_parser(root, check_drc_props_v2_cb,
> + drc_type, );
>  }
> 
>  int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
> @@ -285,7 +289,6 @@ int rpaphp_check_drc_props(struct device_node *dn, char 
> *drc_name,
>  }
>  EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
> 
> -
>  static int is_php_type(char *drc_type)
>  {
>   unsigned long value;
> @@ -345,17 +348,41 @@ static int is_php_dn(struct device_node *dn, const int 
> **indexes,
>   *
>   * To remove a slot, it suffices to call rpaphp_deregister_slot().
>   */
> -int rpaphp_add_slot(struct device_node *dn)
> +
> +static int rpaphp_add_slot_common(struct device_node *dn,
> + u32 drc_index, char *drc_name, char *drc_type,
> + u32 drc_power_domain)
>  {
>   struct slot *slot;
>  

Re: [RFC v4 3/4] hotplug/drcinfo: Fix hot-add CPU issues

2018-05-22 Thread Nathan Fontenot
On 05/22/2018 11:37 AM, Michael Bringmann wrote:
> This patch applies a common parse function for the ibm,drc-info
> property that can be modified by a callback function to the
> hot-add CPU code.  Candidate code is replaced by a call to the
> parser including a pointer to a local context-specific functions,
> and local data.
> 
> In addition, a bug in the release of the previous patch set may
> break things in some of the CPU DLPAR operations.  For instance,
> when attempting to hot-add a new CPU or set of CPUs, the original
> patch failed to always properly calculate the available resources,
> and aborted the operation.
> 
> Signed-off-by: Michael Bringmann 
> Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info 
> firmwar
> e feature" -- end of patch series applied to powerpc next)
> ---
> Changes in V4:
>   -- Update code to account for latest kernel checkins.
>   -- Rebased to 4.17-rc5 kernel
>   -- Compress some more code
> ---
>  arch/powerpc/platforms/pseries/hotplug-cpu.c|  118 
> +--
>  arch/powerpc/platforms/pseries/pseries_energy.c |  107 +++--
>  2 files changed, 141 insertions(+), 84 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 6ef77ca..ceacad9 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -411,27 +411,63 @@ static bool dlpar_cpu_exists(struct device_node 
> *parent, u32 drc_index)
>   return found;
>  }
> 
> -static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
> +static bool check_cpu_drc_index(struct device_node *parent,
> + int (*cb)(struct of_drc_info *drc,
> + void *data, void *not_used,
> + int *ret_code),
> + void *cdata)
>  {
>   bool found = false;
> - int rc, index;
> 
> - index = 0;
> - while (!found) {
> - u32 drc;
> + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
> + if (drc_info_parser(parent, cb, "CPU", cdata))
> + found = true;
> + } else {
> + int index = 0;
> 
> - rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
> - index++, );
> - if (rc)
> - break;
> + while (!found) {
> + u32 drc;
> 
> - if (drc == drc_index)
> - found = true;
> + if (of_property_read_u32_index(parent,
> + "ibm,drc-indexes",
> + index++, ))
> + break;
> + if (cb(NULL, cdata, , NULL))
> + found = true;
> + }
>   }
> 
>   return found;
>  }
> 
> +struct valid_drc_index_struct {
> + u32 targ_drc_index;
> +};

Can you help me understand the need to encapsulate the drc_index as a struct.

> +
> +static int valid_drc_index_cb(struct of_drc_info *drc, void *idata,
> + void *drc_index, int *ret_code)
> +{
> + struct valid_drc_index_struct *cdata = idata;
> +
> + if (drc) {
> + if (!((drc->drc_index_start <= cdata->targ_drc_index) &&
> + (cdata->targ_drc_index <= drc->last_drc_index)))
> + return 0;
> + } else {
> + if (*((u32 *)drc_index) != cdata->targ_drc_index)
> + return 0;
> + }
> + (*ret_code) = 1;
> + return 1;
> +}
> +
> +static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
> +{
> + struct valid_drc_index_struct cdata = { drc_index };
> +
> + return check_cpu_drc_index(parent, valid_drc_index_cb, );
> +}
> +
>  static ssize_t dlpar_cpu_add(u32 drc_index)
>  {
>   struct device_node *dn, *parent;
> @@ -721,11 +757,43 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
>   return rc;
>  }
> 
> +struct cpus_to_add_struct {
> + struct device_node *parent;
> + u32 *cpu_drcs;
> + u32 cpus_to_add;
> + u32 cpus_found;
> +};
> +
> +static int cpus_to_add_cb(struct of_drc_info *drc, void *idata,
> + void *drc_index, int *ret_code)
> +{
> + struct cpus_to_add_struct *cdata = idata;
> +
> + if (drc) {
> + int k;
> +
> + for (k = 0; (k < drc->num_sequential_elems) &&
> + (cdata->cpus_found < cdata->cpus_to_add); k++) {
> + u32 idrc = drc->drc_index_start +
> + (k * drc->sequential_inc);
> +
> + if (dlpar_cpu_exists(cdata->parent, idrc))
> + continue;
> + cdata->cpu_drcs[cdata->cpus_found++] = idrc;
> 

Re: [RFC v4 2/4] hotplug/drcinfo: Provide parser with callback

2018-05-22 Thread Nathan Fontenot
On 05/22/2018 11:37 AM, Michael Bringmann wrote:
> This patch provides a common parse function for the ibm,drc-info
> property that can be modified by a callback function.  The caller
> provides a pointer to the function and a pointer to their unique
> data, and the parser provides the current lmb set from the struct.
> The callback function may return codes indicating that the parsing
> is complete, or should continue, along with an error code that may
> be returned to the caller.
> 
> Signed-off-by: Michael Bringmann 
> Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info 
> firmwar
> e feature" -- end of patch series applied to powerpc next)
> ---
> Changes in V4:
>   -- Update code to account for latest kernel checkins.
>   -- Rebased to 4.17-rc5 kernel
>   -- Some patch cleanup including file combination
> ---
>  arch/powerpc/include/asm/prom.h |7 +
>  arch/powerpc/platforms/pseries/of_helpers.c |   37 
> +++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index b04c5ce..2e947b3 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -94,6 +94,13 @@ struct of_drc_info {
>  extern int of_read_drc_info_cell(struct property **prop,
>   const __be32 **curval, struct of_drc_info *data);
> 
> +extern int drc_info_parser(struct device_node *dn,
> + int (*usercb)(struct of_drc_info *drc,
> + void *data,
> + void *optional_data,
> + int *ret_code),
> + char *opt_drc_type,
> + void *data);

After looking at the patch 3 in this series, I think a couple of comments and
a small change may help. It was not clear at first what the call back function
was supposed to return. After reading users of this routine it appears that the
callback function is returning a bool value indicating whether or not the parser
should continue. I documenting this and having the callback routine return a 
bool
may make this clearer.

Also, I see other places in the kernel name these types of routines as walk_*,
perhaps a slight name change to walk_drc_info_entries() may also make it clearer
what the code is doing.

-Nathan

> 
>  /*
>   * There are two methods for telling firmware what our capabilities are.
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 11b2ef1..a588ee6 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -6,6 +6,9 @@
>  #include 
> 
>  #include "of_helpers.h"
> +#include "pseries.h"
> +
> +#define  MAX_DRC_NAME_LEN 64
> 
>  /**
>   * pseries_of_derive_parent - basically like dirname(1)
> @@ -87,3 +90,37 @@ int of_read_drc_info_cell(struct property **prop, const 
> __be32 **curval,
>   return 0;
>  }
>  EXPORT_SYMBOL(of_read_drc_info_cell);
> +
> +int drc_info_parser(struct device_node *dn,
> + int (*usercb)(struct of_drc_info *drc,
> + void *data,
> + void *optional_data,
> + int *ret_code),
> + char *opt_drc_type,
> + void *data)
> +{
> + struct property *info;
> + unsigned int entries;
> + struct of_drc_info drc;
> + const __be32 *value;
> + int j, done = 0, ret_code = -EINVAL;
> +
> + info = of_find_property(dn, "ibm,drc-info", NULL);
> + if (info == NULL)
> + return -EINVAL;
> +
> + value = info->value;
> + entries = of_read_number(value++, 1);
> +
> + for (j = 0, done = 0; (j < entries) && (!done); j++) {
> + of_read_drc_info_cell(, , );
> +
> + if (opt_drc_type && strcmp(opt_drc_type, drc.drc_type))
> + continue;
> +
> + done = usercb(, data, NULL, _code);
> + }
> +
> + return ret_code;
> +}
> +EXPORT_SYMBOL(drc_info_parser);
> 



Re: [RFC v4 2/4] hotplug/drcinfo: Provide parser with callback

2018-05-22 Thread Nathan Fontenot
On 05/22/2018 11:37 AM, Michael Bringmann wrote:
> This patch provides a common parse function for the ibm,drc-info
> property that can be modified by a callback function.  The caller
> provides a pointer to the function and a pointer to their unique
> data, and the parser provides the current lmb set from the struct.
> The callback function may return codes indicating that the parsing
> is complete, or should continue, along with an error code that may
> be returned to the caller.
> 
> Signed-off-by: Michael Bringmann 
> Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info 
> firmwar
> e feature" -- end of patch series applied to powerpc next)
> ---
> Changes in V4:
>   -- Update code to account for latest kernel checkins.
>   -- Rebased to 4.17-rc5 kernel
>   -- Some patch cleanup including file combination
> ---
>  arch/powerpc/include/asm/prom.h |7 +
>  arch/powerpc/platforms/pseries/of_helpers.c |   37 
> +++
>  2 files changed, 44 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index b04c5ce..2e947b3 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -94,6 +94,13 @@ struct of_drc_info {
>  extern int of_read_drc_info_cell(struct property **prop,
>   const __be32 **curval, struct of_drc_info *data);
> 
> +extern int drc_info_parser(struct device_node *dn,
> + int (*usercb)(struct of_drc_info *drc,
> + void *data,
> + void *optional_data,

The optional_data parameter to the callback routine doesn't seem to be used.

-Nathan

> + int *ret_code),
> + char *opt_drc_type,
> + void *data);
> 
>  /*
>   * There are two methods for telling firmware what our capabilities are.
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 11b2ef1..a588ee6 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -6,6 +6,9 @@
>  #include 
> 
>  #include "of_helpers.h"
> +#include "pseries.h"
> +
> +#define  MAX_DRC_NAME_LEN 64
> 
>  /**
>   * pseries_of_derive_parent - basically like dirname(1)
> @@ -87,3 +90,37 @@ int of_read_drc_info_cell(struct property **prop, const 
> __be32 **curval,
>   return 0;
>  }
>  EXPORT_SYMBOL(of_read_drc_info_cell);
> +
> +int drc_info_parser(struct device_node *dn,
> + int (*usercb)(struct of_drc_info *drc,
> + void *data,
> + void *optional_data,
> + int *ret_code),
> + char *opt_drc_type,
> + void *data)
> +{
> + struct property *info;
> + unsigned int entries;
> + struct of_drc_info drc;
> + const __be32 *value;
> + int j, done = 0, ret_code = -EINVAL;
> +
> + info = of_find_property(dn, "ibm,drc-info", NULL);
> + if (info == NULL)
> + return -EINVAL;
> +
> + value = info->value;
> + entries = of_read_number(value++, 1);
> +
> + for (j = 0, done = 0; (j < entries) && (!done); j++) {
> + of_read_drc_info_cell(, , );
> +
> + if (opt_drc_type && strcmp(opt_drc_type, drc.drc_type))
> + continue;
> +
> + done = usercb(, data, NULL, _code);
> + }
> +
> + return ret_code;
> +}
> +EXPORT_SYMBOL(drc_info_parser);
> 



Re: [RFC v4 1/4] hotplug/drcinfo: Simplify parse ibm, drc-info structs

2018-05-22 Thread Nathan Fontenot
On 05/22/2018 11:37 AM, Michael Bringmann wrote:
> Replace use of of_prop_next_u32() in when parsing 'ibm,drc-info'
> structure to simplify and reduce parsing code.
>

You mention that this patch is to fix the parsing of the drc-info struct, but 
you end up
making changes to the parsing code in pseries_energy.c and rpaphp_core.c. If 
there is a
bug in the parsing code in those files that should be submitted as a separate 
patch
outside of the drc-info fixups.

-Nathan
 
> Signed-off-by: Michael Bringmann 
> Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info 
> firmware feature" -- end of patch series applied to powerpc next)
> ---
> Changes in V4:
>   -- Rebased patch to 4.17-rc5 kernel
>   -- Replace of_prop_next_u32() by of_read_number()
> ---
>  arch/powerpc/platforms/pseries/of_helpers.c |   20 +---
>  arch/powerpc/platforms/pseries/pseries_energy.c |   10 --
>  drivers/pci/hotplug/rpaphp_core.c   |5 ++---
>  3 files changed, 11 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 6df192f..11b2ef1 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -65,29 +65,19 @@ int of_read_drc_info_cell(struct property **prop, const 
> __be32 **curval,
> 
>   /* Get drc-index-start:encode-int */
>   p2 = (const __be32 *)p;
> - p2 = of_prop_next_u32(*prop, p2, >drc_index_start);
> - if (!p2)
> - return -EINVAL;
> + data->drc_index_start = of_read_number(p2++, 1);
> 
>   /* Get drc-name-suffix-start:encode-int */
> - p2 = of_prop_next_u32(*prop, p2, >drc_name_suffix_start);
> - if (!p2)
> - return -EINVAL;
> + data->drc_name_suffix_start = of_read_number(p2++, 1);
> 
>   /* Get number-sequential-elements:encode-int */
> - p2 = of_prop_next_u32(*prop, p2, >num_sequential_elems);
> - if (!p2)
> - return -EINVAL;
> + data->num_sequential_elems = of_read_number(p2++, 1);
> 
>   /* Get sequential-increment:encode-int */
> - p2 = of_prop_next_u32(*prop, p2, >sequential_inc);
> - if (!p2)
> - return -EINVAL;
> + data->sequential_inc = of_read_number(p2++, 1);
> 
>   /* Get drc-power-domain:encode-int */
> - p2 = of_prop_next_u32(*prop, p2, >drc_power_domain);
> - if (!p2)
> - return -EINVAL;
> + data->drc_power_domain = of_read_number(p2++, 1);
> 
>   /* Should now know end of current entry */
>   (*curval) = (void *)p2;
> diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c 
> b/arch/powerpc/platforms/pseries/pseries_energy.c
> index 6ed2212..5261975 100644
> --- a/arch/powerpc/platforms/pseries/pseries_energy.c
> +++ b/arch/powerpc/platforms/pseries/pseries_energy.c
> @@ -61,9 +61,8 @@ static u32 cpu_to_drc_index(int cpu)
>   if (info == NULL)
>   goto err_of_node_put;
> 
> - value = of_prop_next_u32(info, NULL, _set_entries);
> - if (!value)
> - goto err_of_node_put;
> + value = info->value;
> + num_set_entries = of_read_number(value++, 1);
> 
>   for (j = 0; j < num_set_entries; j++) {
> 
> @@ -123,9 +122,8 @@ static int drc_index_to_cpu(u32 drc_index)
>   if (info == NULL)
>   goto err_of_node_put;
> 
> - value = of_prop_next_u32(info, NULL, _set_entries);
> - if (!value)
> - goto err_of_node_put;
> + value = info->value;
> + num_set_entries = of_read_number(value++, 1);
> 
>   for (j = 0; j < num_set_entries; j++) {
> 
> diff --git a/drivers/pci/hotplug/rpaphp_core.c 
> b/drivers/pci/hotplug/rpaphp_core.c
> index fb5e084..435c1a0 100644
> --- a/drivers/pci/hotplug/rpaphp_core.c
> +++ b/drivers/pci/hotplug/rpaphp_core.c
> @@ -236,9 +236,8 @@ static int rpaphp_check_drc_props_v2(struct device_node 
> *dn, char *drc_name,
>   if (info == NULL)
>   return -EINVAL;
> 
> - value = of_prop_next_u32(info, NULL, );
> - if (!value)
> - return -EINVAL;
> + value = info->value;
> + entries = of_read_number(value++, 1);
> 
>   for (j = 0; j < entries; j++) {
>   of_read_drc_info_cell(, , );
> 



Re: [RFC v5 2/6] powerpc/cpu: Conditionally acquire/release DRC index

2018-05-22 Thread Nathan Fontenot
On 05/21/2018 12:52 PM, Michael Bringmann wrote:
> powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
> skipping of DRC index acquire or release operations during the CPU
> add or remove operations.  This is intended to support subsequent
> changes to provide a 'CPU readd' operation.
> 
> Signed-off-by: Michael Bringmann 
> ---
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |   71 
> +++---
>  1 file changed, 42 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a408217..ec78cc6 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -474,7 +474,7 @@ static bool valid_cpu_drc_index(struct device_node 
> *parent, u32 drc_index)
>   );
>  }
> 
> -static ssize_t dlpar_cpu_add(u32 drc_index)
> +static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
>  {
>   struct device_node *dn, *parent;
>   int rc, saved_rc;
> @@ -499,19 +499,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   return -EINVAL;
>   }
> 
> - rc = dlpar_acquire_drc(drc_index);
> - if (rc) {
> - pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
> - rc, drc_index);
> - of_node_put(parent);
> - return -EINVAL;
> + if (acquire_drc) {
> + rc = dlpar_acquire_drc(drc_index);
> + if (rc) {
> + pr_warn("Failed to acquire DRC, rc: %d, drc index: 
> %x\n",
> + rc, drc_index);
> + of_node_put(parent);
> + return -EINVAL;
> + }
>   }
> 
>   dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
>   if (!dn) {
>   pr_warn("Failed call to configure-connector, drc index: %x\n",
>   drc_index);
> - dlpar_release_drc(drc_index);
> + if (acquire_drc)
> + dlpar_release_drc(drc_index);
>   of_node_put(parent);
>   return -EINVAL;
>   }
> @@ -526,8 +529,9 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
>   dn->name, rc, drc_index);
> 
> - rc = dlpar_release_drc(drc_index);
> - if (!rc)
> + if (acquire_drc)
> + rc = dlpar_release_drc(drc_index);
> + if (!rc || acquire_drc)
>   dlpar_free_cc_nodes(dn);
> 
>   return saved_rc;
> @@ -540,7 +544,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   dn->name, rc, drc_index);
> 
>   rc = dlpar_detach_node(dn);
> - if (!rc)
> + if (!rc && acquire_drc)
>   dlpar_release_drc(drc_index);
> 
>   return saved_rc;
> @@ -608,7 +612,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
> 
>  }
> 
> -static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
> +static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
> + bool release_drc)
>  {
>   int rc;
> 
> @@ -621,12 +626,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, 
> u32 drc_index)
>   return -EINVAL;
>   }
> 
> - rc = dlpar_release_drc(drc_index);
> - if (rc) {
> - pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
> - drc_index, dn->name, rc);
> - dlpar_online_cpu(dn);
> - return rc;
> + if (release_drc) {
> + rc = dlpar_release_drc(drc_index);
> + if (rc) {
> + pr_warn("Failed to release drc (%x) for CPU %s, rc: 
> %d\n",
> + drc_index, dn->name, rc);
> + dlpar_online_cpu(dn);
> + return rc;
> + }
>   }
> 
>   rc = dlpar_detach_node(dn);
> @@ -635,7 +642,10 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, 
> u32 drc_index)
> 
>   pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
> 
> - rc = dlpar_acquire_drc(drc_index);
> + if (release_drc)
> + rc = dlpar_acquire_drc(drc_index);
> + else
> + rc = 0;
>   if (!rc)
>   dlpar_online_cpu(dn);
> 
> @@ -664,7 +674,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 
> drc_index)
>   return dn;
>  }
> 
> -static int dlpar_cpu_remove_by_index(u32 drc_index)
> +static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
>  {
>   struct device_node *dn;
>   int rc;
> @@ -676,7 +686,7 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
>   return -ENODEV;
>   }
> 
> - rc = 

Re: [PATCH] pseries/memory-hotplug: Only update DT once per memory DLPAR request

2018-05-22 Thread Nathan Fontenot
Hi Michael,

I sent this patch out several weeks ago, just wanted to make sure it hasn't 
fallen
off your radar.

Thanks,
-Nathan

On 04/20/2018 03:29 PM, Nathan Fontenot wrote:
> The updates to powerpc numa and memory hotplug code now use the
> in-kernel LMB array instead of the device tree. This change
> allows the pseries memory DLPAR code to only update the device
> tree once after successfully handling a DLPAR request.
> 
> Prior to the in-kernel LMB array, the numa code looked up the
> affinity for memory being added in the device tree, the code
> now looks this up in the LMB array. This change means the
> memory hotplug code can just update the affinity for an LMB
> in the LMB array instead of updating the device tree.
> 
> This also provides a savings in kernel memory. When updating the
> device tree old properties are never free'ed since there is no
> usecount on properties. This behavior leads to a new copy of the
> property being allocated every time a LMB is added or removed
> (i.e. a request to add 100 LMBs creates 100 new copies of the
> property). With this update only a single new property is created
> when a DLPAR request completes successfully.
> 
> Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/drmem.h|5 ++
>  arch/powerpc/platforms/pseries/hotplug-memory.c |   55 
> +++
>  2 files changed, 21 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index ce242b9ea8c6..7c1d8e74b25d 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -99,4 +99,9 @@ void __init walk_drmem_lmbs_early(unsigned long node,
>   void (*func)(struct drmem_lmb *, const __be32 **));
>  #endif
> 
> +static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
> +{
> + lmb->aa_index = 0x;
> +}
> +
>  #endif /* _ASM_POWERPC_LMB_H */
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index c1578f54c626..9a15d39995e5 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -163,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node,
>   return aa_index;
>  }
> 
> -static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
> +static int update_lmb_associativity_index(struct drmem_lmb *lmb)
>  {
>   struct device_node *parent, *lmb_node, *dr_node;
>   struct property *ala_prop;
> @@ -203,43 +203,14 @@ static u32 lookup_lmb_associativity_index(struct 
> drmem_lmb *lmb)
>   aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
> 
>   dlpar_free_cc_nodes(lmb_node);
> - return aa_index;
> -}
> -
> -static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
> -{
> - int rc, aa_index;
> -
> - lmb->flags |= DRCONF_MEM_ASSIGNED;
> 
> - aa_index = lookup_lmb_associativity_index(lmb);
>   if (aa_index < 0) {
> - pr_err("Couldn't find associativity index for drc index %x\n",
> -lmb->drc_index);
> - return aa_index;
> + pr_err("Could not find LMB associativity\n");
> + return -1;
>   }
> 
>   lmb->aa_index = aa_index;
> -
> - rtas_hp_event = true;
> - rc = drmem_update_dt();
> - rtas_hp_event = false;
> -
> - return rc;
> -}
> -
> -static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
> -{
> - int rc;
> -
> - lmb->flags &= ~DRCONF_MEM_ASSIGNED;
> - lmb->aa_index = 0x;
> -
> - rtas_hp_event = true;
> - rc = drmem_update_dt();
> - rtas_hp_event = false;
> -
> - return rc;
> + return 0;
>  }
> 
>  static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
> @@ -428,7 +399,9 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
>   /* Update memory regions for memory remove */
>   memblock_remove(lmb->base_addr, block_sz);
> 
> - dlpar_remove_device_tree_lmb(lmb);
> + invalidate_lmb_associativity_index(lmb);
> + lmb->flags &= ~DRCONF_MEM_ASSIGNED;
> +
>   return 0;
>  }
> 
> @@ -688,10 +661,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
>   if (lmb->flags & DRCONF_MEM_ASSIGNED)
>   return -EINVAL;
> 
> - rc = dlpar_add_device_tree_lmb(lmb);
> + rc = update_lmb_associativity_index(lmb);
>   if (rc) {
> - pr_err("Couldn't update device tree for drc index %x\n

Re: [RFC v3 1/4] powerpc/hotplug/drcinfo: Fix bugs parsing ibm,drc-info structs

2018-05-18 Thread Nathan Fontenot
On 05/17/2018 05:41 PM, Michael Bringmann wrote:
> [Replace/withdraw previous patch submission to ensure that testing
> of related patches on similar hardware progresses together.]
> 
> This patch fixes a memory parsing bug when using of_prop_next_u32
> calls at the start of a structure.  Depending upon the value of
> "cur" memory pointer argument to of_prop_next_u32, it will or it
> won't advance the value of the returned memory pointer by the
> size of one u32.  This patch corrects the code to deal with that
> indexing feature when parsing the ibm,drc-info structs for CPUs.
> Also, need to advance the pointer at the end of_read_drc_info_cell
> for same reason.
> 

I see that you provide an update for of_read_drc_info_cell to fix the
unexpected behavior you're seeing, but I'm not sure why you're updating
the code in pseries_energy.c and rpaphp_core.c? can you provide some
additional information as to why these functions also need to be updated.

> Signed-off-by: Michael Bringmann 
> Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info 
> firmware feature" -- end of patch series applied to powerpc next)
> ---
> Changes in V3:
>   -- Rebased patch to 4.17-rc5 kernel
> ---
>  arch/powerpc/platforms/pseries/of_helpers.c |5 ++---
>  arch/powerpc/platforms/pseries/pseries_energy.c |2 ++
>  drivers/pci/hotplug/rpaphp_core.c   |1 +
>  3 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 6df192f..20598b2 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -65,9 +65,7 @@ int of_read_drc_info_cell(struct property **prop, const 
> __be32 **curval,
> 
>   /* Get drc-index-start:encode-int */
>   p2 = (const __be32 *)p;
> - p2 = of_prop_next_u32(*prop, p2, >drc_index_start);
> - if (!p2)
> - return -EINVAL;
> + data->drc_index_start = of_read_number(p2, 1);

This appears to resolve advancing the pointer for the beginning of a struct.

> 
>   /* Get drc-name-suffix-start:encode-int */
>   p2 = of_prop_next_u32(*prop, p2, >drc_name_suffix_start);
> @@ -88,6 +86,7 @@ int of_read_drc_info_cell(struct property **prop, const 
> __be32 **curval,
>   p2 = of_prop_next_u32(*prop, p2, >drc_power_domain);
>   if (!p2)
>   return -EINVAL;
> + p2++;

...but why is the advancement needed here? of_prop_next_u32 should have 
advanced it, correct?

-Nathan

> 
>   /* Should now know end of current entry */
>   (*curval) = (void *)p2;
> diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c 
> b/arch/powerpc/platforms/pseries/pseries_energy.c
> index 6ed2212..c7d84aa 100644
> --- a/arch/powerpc/platforms/pseries/pseries_energy.c
> +++ b/arch/powerpc/platforms/pseries/pseries_energy.c
> @@ -64,6 +64,7 @@ static u32 cpu_to_drc_index(int cpu)
>   value = of_prop_next_u32(info, NULL, _set_entries);
>   if (!value)
>   goto err_of_node_put;
> + value++;
> 
>   for (j = 0; j < num_set_entries; j++) {
> 
> @@ -126,6 +127,7 @@ static int drc_index_to_cpu(u32 drc_index)
>   value = of_prop_next_u32(info, NULL, _set_entries);
>   if (!value)
>   goto err_of_node_put;
> + value++;
> 
>   for (j = 0; j < num_set_entries; j++) {
> 
> diff --git a/drivers/pci/hotplug/rpaphp_core.c 
> b/drivers/pci/hotplug/rpaphp_core.c
> index fb5e084..dccdf62 100644
> --- a/drivers/pci/hotplug/rpaphp_core.c
> +++ b/drivers/pci/hotplug/rpaphp_core.c
> @@ -239,6 +239,7 @@ static int rpaphp_check_drc_props_v2(struct device_node 
> *dn, char *drc_name,
>   value = of_prop_next_u32(info, NULL, );
>   if (!value)
>   return -EINVAL;
> + value++;
> 
>   for (j = 0; j < entries; j++) {
>   of_read_drc_info_cell(, , );
> 



Re: [RFC v4 2/3] powerpc migration/cpu: Associativity & cpu changes

2018-05-18 Thread Nathan Fontenot
On 05/17/2018 05:26 PM, Michael Bringmann wrote:
> powerpc migration/cpu: Now apply changes to the associativity of cpus
> for the topology of LPARS in Post Migration events.  Recognize more
> changes to the associativity of memory blocks described by the
> 'cpu' properties when processing the topology of LPARS in Post Migration
> events.  Previous efforts only recognized whether a memory block's
> assignment had changed in the property.  Changes here include:
> 
> * Provide hotplug CPU 'readd by index' operation
> * Checking for changes in cpu associativity and making 'readd' calls
>   when differences are observed.
> * Queue up  changes to CPU properties so that they may take place
>   after all PowerPC device-tree changes have been applied i.e. after
>   the device hotplug is released in the mobility code.

This kinda feels like three different patches in one. any reason to not split
this into three patches? Perhaps at the least split the last item into it's
own patch.

> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes include:
>   -- Rearrange patches to co-locate CPU property-related changes.
>   -- Modify dlpar_cpu_add & dlpar_cpu_remove to skip DRC index acquire
>  or release operations during the CPU readd process.
>   -- Correct a bug in DRC index selection for queued operation.
>   -- Rebase to 4.17-rc5 kernel
> ---
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |  123 
> +++---
>  arch/powerpc/platforms/pseries/mobility.c|3 +
>  2 files changed, 95 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a408217..23d4cb8 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -474,7 +474,7 @@ static bool valid_cpu_drc_index(struct device_node 
> *parent, u32 drc_index)
>   );
>  }
> 
> -static ssize_t dlpar_cpu_add(u32 drc_index)
> +static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
>  {
>   struct device_node *dn, *parent;
>   int rc, saved_rc;
> @@ -499,19 +499,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   return -EINVAL;
>   }
> 
> - rc = dlpar_acquire_drc(drc_index);
> - if (rc) {
> - pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
> - rc, drc_index);
> - of_node_put(parent);
> - return -EINVAL;
> + if (acquire_drc) {
> + rc = dlpar_acquire_drc(drc_index);
> + if (rc) {
> + pr_warn("Failed to acquire DRC, rc: %d, drc index: 
> %x\n",
> + rc, drc_index);
> + of_node_put(parent);
> + return -EINVAL;
> + }
>   }
> 
>   dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
>   if (!dn) {
>   pr_warn("Failed call to configure-connector, drc index: %x\n",
>   drc_index);
> - dlpar_release_drc(drc_index);
> + if (acquire_drc)
> + dlpar_release_drc(drc_index);
>   of_node_put(parent);
>   return -EINVAL;
>   }
> @@ -526,8 +529,9 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
>   dn->name, rc, drc_index);
> 
> - rc = dlpar_release_drc(drc_index);
> - if (!rc)
> + if (acquire_drc)
> + rc = dlpar_release_drc(drc_index);
> + if (!rc || acquire_drc)
>   dlpar_free_cc_nodes(dn);

This seems like it would be more readable if everything were inside the
if (acquire_drc) block.

> 
>   return saved_rc;
> @@ -540,7 +544,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>   dn->name, rc, drc_index);
> 
>   rc = dlpar_detach_node(dn);
> - if (!rc)
> + if (!rc && acquire_drc)
>   dlpar_release_drc(drc_index);
> 
>   return saved_rc;
> @@ -608,12 +612,13 @@ static int dlpar_offline_cpu(struct device_node *dn)
> 
>  }
> 
> -static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
> +static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
> + bool release_drc)
>  {
>   int rc;
> 
> - pr_debug("Attempting to remove CPU %s, drc index: %x\n",
> -  dn->name, drc_index);
> + pr_debug("Attempting to remove CPU %s, drc index: %x (%d)\n",
> +  dn->name, drc_index, release_drc);
> 
>   rc = dlpar_offline_cpu(dn);
>   if (rc) {
> @@ -621,12 +626,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, 
> u32 drc_index)
>   return -EINVAL;
>   }
> 
> - rc = dlpar_release_drc(drc_index);
> - if (rc) {
> -  

Re: [RFC v4 1/3] powerpc migration/drmem: Modify DRMEM code to export more features

2018-05-18 Thread Nathan Fontenot
On 05/17/2018 05:26 PM, Michael Bringmann wrote:
> powerpc migration/drmem: Export many of the functions of DRMEM to
> parse "ibm,dynamic-memory" and "ibm,dynamic-memory-v2" during
> hotplug operations and for Post Migration events.
> 
> Also modify the DRMEM initialization code to allow it to,
> 
> * Be called after system initialization
> * Provide a separate user copy of the LMB array that is produces
> * Free the user copy upon request
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in RFC:
>   -- Separate DRMEM changes into a standalone patch
>   -- Do not export excess functions.  Make exported names more explicit.
>   -- Add new iterator to work through a pair of drmem_info arrays.
>   -- Modify DRMEM code to replace usages of dt_root_addr_cells, and
>  dt_mem_next_cell, as these are only available at first boot.
>   -- Rebase to 4.17-rc5 kernel
> ---
>  arch/powerpc/include/asm/drmem.h |   10 +
>  arch/powerpc/mm/drmem.c  |   78 
> +++---
>  2 files changed, 66 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index ce242b9..c964b89 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -35,6 +35,13 @@ struct drmem_lmb_info {
>   _info->lmbs[0],   \
>   _info->lmbs[drmem_info->n_lmbs - 1])
> 
> +#define for_each_pair_drmem_lmb(dinfo1, lmb1, dinfo2, lmb2)  \
> + for ((lmb1) = (>lmbs[0]),   \
> +  (lmb2) = (>lmbs[0]);   \
> + ((lmb1) <= (>lmbs[dinfo1->n_lmbs - 1])) &&  \
> + ((lmb2) <= (>lmbs[dinfo2->n_lmbs - 1]));\
> +  (lmb1)++, (lmb2)++)
> +
>  /*
>   * The of_drconf_cell_v1 struct defines the layout of the LMB data
>   * specified in the ibm,dynamic-memory device tree property.
> @@ -94,6 +101,9 @@ void __init walk_drmem_lmbs(struct device_node *dn,
>   void (*func)(struct drmem_lmb *, const __be32 **));
>  int drmem_update_dt(void);
> 
> +struct drmem_lmb_info* drmem_init_lmbs(struct property *prop);
> +void drmem_lmbs_free(struct drmem_lmb_info *dinfo);
> +
>  #ifdef CONFIG_PPC_PSERIES
>  void __init walk_drmem_lmbs_early(unsigned long node,
>   void (*func)(struct drmem_lmb *, const __be32 **));
> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
> index 3f18036..d9b281c 100644
> --- a/arch/powerpc/mm/drmem.c
> +++ b/arch/powerpc/mm/drmem.c
> @@ -20,6 +20,7 @@
> 
>  static struct drmem_lmb_info __drmem_info;
>  struct drmem_lmb_info *drmem_info = &__drmem_info;
> +static int n_root_addr_cells;
> 
>  u64 drmem_lmb_memory_max(void)
>  {
> @@ -193,12 +194,13 @@ int drmem_update_dt(void)
>   return rc;
>  }
> 
> -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
> +static void read_drconf_v1_cell(struct drmem_lmb *lmb,
>  const __be32 **prop)
>  {
>   const __be32 *p = *prop;
> 
> - lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
> + lmb->base_addr = of_read_number(p, n_root_addr_cells);
> + p += n_root_addr_cells;
>   lmb->drc_index = of_read_number(p++, 1);
> 
>   p++; /* skip reserved field */
> @@ -209,7 +211,7 @@ static void __init read_drconf_v1_cell(struct drmem_lmb 
> *lmb,
>   *prop = p;
>  }
> 
> -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 
> *usm,
> +static void __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *data,
>   void (*func)(struct drmem_lmb *, const __be32 **))
>  {
>   struct drmem_lmb lmb;
> @@ -221,17 +223,18 @@ static void __init __walk_drmem_v1_lmbs(const __be32 
> *prop, const __be32 *usm,
> 
>   for (i = 0; i < n_lmbs; i++) {
>   read_drconf_v1_cell(, );
> - func(, );
> + func(, );

Is there a need to change the variable name from usm to data (bot here and 
below)?

>   }
>  }
> 
> -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
> +static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
>  const __be32 **prop)
>  {
>   const __be32 *p = *prop;
> 
>   dr_cell->seq_lmbs = of_read_number(p++, 1);
> - dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
> + dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
> + p += n_root_addr_cells;
>   dr_cell->drc_index = of_read_number(p++, 1);
>   dr_cell->aa_index = of_read_number(p++, 1);
>   dr_cell->flags = of_read_number(p++, 1);
> @@ -239,7 +242,7 @@ static void __init read_drconf_v2_cell(struct 
> of_drconf_cell_v2 *dr_cell,
>   *prop = p;
>  }
> 
> -static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 
> *usm,
> +static void __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *data,
>   void 

Re: [RFC v2 3/3] postmigration/memory: Associativity & ibm,dynamic-memory-v2

2018-04-26 Thread Nathan Fontenot
On 04/24/2018 04:35 PM, Michael Bringmann wrote:
> See below.
> 
> On 04/24/2018 12:17 PM, Nathan Fontenot wrote:
>> On 02/26/2018 02:53 PM, Michael Bringmann wrote:
>>> postmigration/memory: Now apply changes to the associativity of memory
>>> blocks described by the 'ibm,dynamic-memory-v2' property regarding
>>> the topology of LPARS in Post Migration events.
>>>
>>> * Extend the previous work done for the 'ibm,associativity-lookup-array'
>>>   to apply to either property 'ibm,dynamic-memory' or
>>>   'ibm,dynamic-memory-v2', whichever is present.
>>> * Add new code to parse the 'ibm,dynamic-memory-v2' property looking
>>>   for differences in block 'assignment', associativity indexes per
>>>   block, and any other difference currently known.
>>> * Rewrite some of the original code to parse the 'ibm,dynamic-memory'
>>>   property to take advantage of LMB parsing code.
>>>
>>> When block differences are recognized, the memory block may be removed,
>>> added, or updated depending upon the state of the new device tree
>>> property and differences from the migrated value of the property.
>>>
>>
>> The only thing we need to check during LPM is affinity updates, memory
>> is not added or removed as part of LPM.
>>
>> I think a slightly different approach to this may be worth considering.
>> One of the goals of the drmem.c code was to remove the need to parse the
>> device tree for memory directly. For this update, I think we could modify
>> the code that builds the drmem_info data so that it can return a drmem_info
>> struct instead of assuming to set the global one.
>>
>> This change would allow you to do a straight compare on the global vs. the
>> new info from the updated device tree property. I think this would be cleaner
>> and may be able to use the same routine for V1 and V2 properties.
> 
> The code dealing with the 'ibm,associativity' array updated cleanly to use
> the same function to scan the LMBs regardless of the version of the 
> properties.
> 
> The code dealing with changes to 'ibm,dynamic-memory-v2' is a mirror of the
> code in 'pseries_update_drconf_memory' that deals with changes to the property
> 'ibm,dynamic-memory', so it should also be updated.  On the other hand, do we
> need to consider the memory requirements of creating/cloning the drmem_info
> structure to provide a copy based on the new 'dynamic-memory' property?
> Or is this not an issue?

If done correctly, using the drmem code to create a drmem_info struct from
the new memory property would allow us to use the same comparison routine
for v1 and v2 versions of the property.

The size of the drmem_info data is not big enough to be concerned about
memory requirements when making a copy. 

-Nathan

> 
>>
>>> Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>
>>> ---
>>> Changes in RFC v2:
>>>   -- Reuse existing parser code from 'drmem.c' in parsing property
>>>  'imb,dynamic-memory-v2' for migration.
>>>   -- Fix crash during migration that occurs on non-VPHN systems
>>>  when attempting to reset topology timer.
>>>   -- Change section of a support function + variable from __init 
>>>  to normal runtime to make them visible to migration code.
>>> ---
>>>  arch/powerpc/include/asm/drmem.h|8 +
>>>  arch/powerpc/mm/drmem.c |   23 ++-
>>>  arch/powerpc/mm/numa.c  |3 
>>>  arch/powerpc/platforms/pseries/hotplug-memory.c |  175 
>>> +++
>>>  drivers/of/fdt.c|4 -
>>>  include/linux/of_fdt.h  |2 
>>>  6 files changed, 170 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/drmem.h 
>>> b/arch/powerpc/include/asm/drmem.h
>>> index 47a7012..e4773c9 100644
>>> --- a/arch/powerpc/include/asm/drmem.h
>>> +++ b/arch/powerpc/include/asm/drmem.h
>>> @@ -92,6 +92,14 @@ void __init walk_drmem_lmbs(struct device_node *dn,
>>> void (*func)(struct drmem_lmb *, const __be32 **));
>>>  int drmem_update_dt(void);
>>>
>>> +void walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *data,
>>> +   void (*func)(struct drmem_lmb *, const __be32 **));
>>> +
>>> +void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
>>> +   const __be32 **prop);
>>> +void walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *data,
>>

Re: [RFC v2 1/3] hotplug/mobility: Apply assoc updates for Post Migration Topo

2018-04-26 Thread Nathan Fontenot
On 04/24/2018 04:33 PM, Michael Bringmann wrote:
> See comments below:
> 
> On 04/24/2018 11:56 AM, Nathan Fontenot wrote:
>> On 02/26/2018 02:52 PM, Michael Bringmann wrote:
>>> hotplug/mobility: Recognize more changes to the associativity of
>>> memory blocks described by the 'ibm,dynamic-memory' and 'cpu'
>>> properties when processing the topology of LPARS in Post Migration
>>> events.  Previous efforts only recognized whether a memory block's
>>> assignment had changed in the property.  Changes here include:
>>>
>>> * Checking the aa_index values of the old/new properties and 'readd'
>>>   any block for which the setting has changed.
>>> * Checking for changes in cpu associativity and making 'readd' calls
>>>   when differences are observed.
>>
>> As part of the post-migration updates do you need to hold a lock
>> so that we don't attempt to process any of the cpu/memory changes
>> while the device tree is being updated?
>>
>> You may be able to grab the device hotplug lock for this.
> 
> The CPU Re-add process reuses the dlpar_cpu_remove / dlpar_cpu_add
> code for POWERPC.  These functions end up invoking device_online() /
> device_offline() which in turn end up invoking the 'cpus_write_lock/unlock'
> around every kernel change to the CPU structures.  It was modeled
> on the Memory Re-add process as we discussed a while back, which
> also uses device_online and a corresponding write lock for each
> LMB processed.
> 
> Do you see a need for a coarser granularity of locking around
> all or a group of the cpu/memory changes?  The data structures
> that the kernel outside of powerpc uses for CPUs and LMBs seem
> to be quite well isolated from the device-tree properties.

My thinking was for memory and CPU updates, the idea being that all
updates are queued up until after the post-LPM device tree updates happens.
Grabbing the device_hotplug lock while updating the device tree would
prevent any of the queued CPU/memory updates from happening.

> 
>>
>>>
>>> Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>
>>> ---
>>> Changes in RFC:
>>>   -- Simplify code to update CPU nodes during mobility checks.
>>>  Remove functions to generate extra HP_ELOG messages in favor
>>>  of direct function calls to dlpar_cpu_readd_by_index.
>>>   -- Move check for "cpu" node type from pseries_update_cpu to
>>>  pseries_smp_notifier in 'hotplug-cpu.c'
>>>   -- Remove functions 'pseries_memory_readd_by_index' and
>>>  'pseries_cpu_readd_by_index' as no longer needed outside of
>>>  'mobility.c'.
>>> ---
>>>  arch/powerpc/platforms/pseries/hotplug-cpu.c|   69 
>>> +++
>>>  arch/powerpc/platforms/pseries/hotplug-memory.c |6 ++
>>>  2 files changed, 75 insertions(+)
>>>
>>> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
>>> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>>> index a7d14aa7..91ef22a 100644
>>> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
>>> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>>> @@ -636,6 +636,27 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
>>> return rc;
>>>  }
>>>
>>> +static int dlpar_cpu_readd_by_index(u32 drc_index)
>>> +{
>>> +   int rc = 0;
>>> +
>>> +   pr_info("Attempting to update CPU, drc index %x\n", drc_index);
>>
>> Should make this say we are re-adding the CPU, it's a bit more specific as
>> to what is really happening.
> 
> Okay.  I will update the notice from dlpar_memory_readd_by_index, as well.

Looks like your current message mirrors what the memory readd routine has,
let's just keep the message as is.

-Nathan

>>
>>> +
>>> +   if (dlpar_cpu_remove_by_index(drc_index))
>>> +   rc = -EINVAL;
>>> +   else if (dlpar_cpu_add(drc_index))
>>> +   rc = -EINVAL;
>>> +
>>> +   if (rc)
>>> +   pr_info("Failed to update cpu at drc_index %lx\n",
>>> +   (unsigned long int)drc_index);
>>> +   else
>>> +   pr_info("CPU at drc_index %lx was updated\n",
>>> +   (unsigned long int)drc_index);
>>> +
>>> +   return rc;
>>> +}
>>> +
>>>  static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
>>>  {
>>> struct device_node *dn;
>>> @@ -826,6 +847,9 @@ int dlpar_cpu(struct pseries_hp_errorlog *

Re: [RFC v2 3/3] postmigration/memory: Associativity & ibm,dynamic-memory-v2

2018-04-24 Thread Nathan Fontenot
On 02/26/2018 02:53 PM, Michael Bringmann wrote:
> postmigration/memory: Now apply changes to the associativity of memory
> blocks described by the 'ibm,dynamic-memory-v2' property regarding
> the topology of LPARS in Post Migration events.
> 
> * Extend the previous work done for the 'ibm,associativity-lookup-array'
>   to apply to either property 'ibm,dynamic-memory' or
>   'ibm,dynamic-memory-v2', whichever is present.
> * Add new code to parse the 'ibm,dynamic-memory-v2' property looking
>   for differences in block 'assignment', associativity indexes per
>   block, and any other difference currently known.
> * Rewrite some of the original code to parse the 'ibm,dynamic-memory'
>   property to take advantage of LMB parsing code.
> 
> When block differences are recognized, the memory block may be removed,
> added, or updated depending upon the state of the new device tree
> property and differences from the migrated value of the property.
> 

The only thing we need to check during LPM is affinity updates, memory
is not added or removed as part of LPM.

I think a slightly different approach to this may be worth considering.
One of the goals of the drmem.c code was to remove the need to parse the
device tree for memory directly. For this update, I think we could modify
the code that builds the drmem_info data so that it can return a drmem_info
struct instead of assuming to set the global one.

This change would allow you to do a straight compare on the global vs. the
new info from the updated device tree property. I think this would be cleaner
and may be able to use the same routine for V1 and V2 properties.

> Signed-off-by: Michael Bringmann 
> ---
> Changes in RFC v2:
>   -- Reuse existing parser code from 'drmem.c' in parsing property
>  'imb,dynamic-memory-v2' for migration.
>   -- Fix crash during migration that occurs on non-VPHN systems
>  when attempting to reset topology timer.
>   -- Change section of a support function + variable from __init 
>  to normal runtime to make them visible to migration code.
> ---
>  arch/powerpc/include/asm/drmem.h|8 +
>  arch/powerpc/mm/drmem.c |   23 ++-
>  arch/powerpc/mm/numa.c  |3 
>  arch/powerpc/platforms/pseries/hotplug-memory.c |  175 
> +++
>  drivers/of/fdt.c|4 -
>  include/linux/of_fdt.h  |2 
>  6 files changed, 170 insertions(+), 45 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index 47a7012..e4773c9 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -92,6 +92,14 @@ void __init walk_drmem_lmbs(struct device_node *dn,
>   void (*func)(struct drmem_lmb *, const __be32 **));
>  int drmem_update_dt(void);
> 
> +void walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *data,
> + void (*func)(struct drmem_lmb *, const __be32 **));
> +
> +void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
> + const __be32 **prop);
> +void walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *data,
> + void (*func)(struct drmem_lmb *, const __be32 **));
> +
>  #ifdef CONFIG_PPC_PSERIES
>  void __init walk_drmem_lmbs_early(unsigned long node,
>   void (*func)(struct drmem_lmb *, const __be32 **));
> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
> index 31dbe14..e47a6e0 100644
> --- a/arch/powerpc/mm/drmem.c
> +++ b/arch/powerpc/mm/drmem.c
> @@ -192,7 +192,7 @@ int drmem_update_dt(void)
>   return rc;
>  }
> 
> -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
> +static void read_drconf_v1_cell(struct drmem_lmb *lmb,
>  const __be32 **prop)
>  {
>   const __be32 *p = *prop;
> @@ -208,7 +208,7 @@ static void __init read_drconf_v1_cell(struct drmem_lmb 
> *lmb,
>   *prop = p;
>  }
> 
> -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 
> *usm,
> +void walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *data,
>   void (*func)(struct drmem_lmb *, const __be32 **))
>  {
>   struct drmem_lmb lmb;
> @@ -218,11 +218,12 @@ static void __init __walk_drmem_v1_lmbs(const __be32 
> *prop, const __be32 *usm,
> 
>   for (i = 0; i < n_lmbs; i++) {
>   read_drconf_v1_cell(, );
> - func(, );
> + func(, );
>   }
>  }
> +EXPORT_SYMBOL(walk_drmem_v1_lmbs);
> 
> -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
> +void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
>  const __be32 **prop)
>  {
>   const __be32 *p = *prop;
> @@ -235,8 +236,9 @@ static void __init read_drconf_v2_cell(struct 
> of_drconf_cell_v2 *dr_cell,
> 
>   *prop = p;
>  }
> 

Re: [RFC v2 2/3] postmigration/memory: Review assoc lookup array changes

2018-04-24 Thread Nathan Fontenot


On 02/26/2018 02:53 PM, Michael Bringmann wrote:
> postmigration/memory: In an LPAR migration scenario, the property
> "ibm,associativity-lookup-arrays" may change.  In the event that a
> row of the array differs, locate all assigned memory blocks with that
> 'aa_index' and 're-add' them to the system memory block data structures.
> In the process of the 're-add', the appropriate entry of the property
> 'ibm,dynamic-memory' would be updated as well as any other applicable
> system data structures.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in RFC v2:
>   -- Simplify code to update memory nodes during mobility checks.
>  Remove functions to generate extra HP_ELOG messages in favor
>  of direct function calls to dlpar_memory_readd_by_index.
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c |  120 
> +++
>  1 file changed, 120 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 2341eae..b63181d 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -1051,6 +1051,123 @@ static int pseries_update_drconf_memory(struct 
> of_reconfig_data *pr)
>   return rc;
>  }
> 
> +struct assoc_arrays {
> + u32 n_arrays;
> + u32 array_sz;
> + const __be32 *arrays;
> +};
> +
> +static int pseries_update_ala_memory_aai(int aa_index,
> + struct property *dmprop)
> +{
> + struct of_drconf_cell *drmem;
> + u32 entries;
> + __be32 *p;
> + int i;
> + int rc = 0;
> +
> + p = (__be32 *) dmprop->value;
> + if (!p)
> + return -EINVAL;
> +
> + /* The first int of the property is the number of lmb's
> +  * described by the property. This is followed by an array
> +  * of of_drconf_cell entries. Get the number of entries
> +  * and skip to the array of of_drconf_cell's.
> +  */
> + entries = be32_to_cpu(*p++);
> + drmem = (struct of_drconf_cell *)p;
> +
> + for (i = 0; i < entries; i++) {
> + if ((be32_to_cpu(drmem[i].aa_index) != aa_index) &&
> + (be32_to_cpu(drmem[i].flags) & DRCONF_MEM_ASSIGNED)) {
> + rc = dlpar_memory_readd_by_index(
> + be32_to_cpu(drmem[i].drc_index));
> + }
> + }
> +
> + return rc;
> +}
> +
> +static int pseries_update_ala_memory(struct of_reconfig_data *pr)
> +{
> + struct assoc_arrays new_ala, old_ala;
> + struct device_node *dn;
> + struct property *dmprop;
> + __be32 *p;
> + int i, lim;
> +
> + if (rtas_hp_event)
> + return 0;
> +
> + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
> + if (!dn)
> + return -ENODEV;
> +
> + dmprop = of_find_property(dn, "ibm,dynamic-memory", NULL);
> + if (!dmprop) {
> + of_node_put(dn);
> + return -ENODEV;
> + }
> +
> + /*
> +  * The layout of the ibm,associativity-lookup-arrays
> +  * property is a number N indicating the number of
> +  * associativity arrays, followed by a number M
> +  * indicating the size of each associativity array,
> +  * followed by a list of N associativity arrays.
> +  */
> +
> + p = (__be32 *) pr->old_prop->value;
> + if (!p) {
> + of_node_put(dn);
> + return -EINVAL;
> + }
> + old_ala.n_arrays = of_read_number(p++, 1);
> + old_ala.array_sz = of_read_number(p++, 1);
> + old_ala.arrays = p;
> +
> + p = (__be32 *) pr->prop->value;
> + if (!p) {
> + of_node_put(dn);
> + return -EINVAL;
> + }
> + new_ala.n_arrays = of_read_number(p++, 1);
> + new_ala.array_sz = of_read_number(p++, 1);
> + new_ala.arrays = p;
> +
> + lim = (new_ala.n_arrays > old_ala.n_arrays) ? old_ala.n_arrays :
> + new_ala.n_arrays;
> +
> + if (old_ala.array_sz == new_ala.array_sz) {
> +
> + for (i = 0; i < lim; i++) {
> + int index = (i * new_ala.array_sz);
> +
> + if (!memcmp(_ala.arrays[index],
> + _ala.arrays[index],
> + new_ala.array_sz))
> + continue;
> +
> + pseries_update_ala_memory_aai(i, dmprop);
> + }
> +
> + for (i = lim; i < new_ala.n_arrays; i++)
> + pseries_update_ala_memory_aai(i, dmprop);
> +
> + } else {
> + /* Update all entries representing these rows;
> +  * as all rows have different sizes, none can
> +  * have equivalent values.
> +  */
> + for (i = 0; i < lim; i++)
> + pseries_update_ala_memory_aai(i, dmprop);
> + }
> +
> + of_node_put(dn);

Re: [RFC v2 1/3] hotplug/mobility: Apply assoc updates for Post Migration Topo

2018-04-24 Thread Nathan Fontenot
On 02/26/2018 02:52 PM, Michael Bringmann wrote:
> hotplug/mobility: Recognize more changes to the associativity of
> memory blocks described by the 'ibm,dynamic-memory' and 'cpu'
> properties when processing the topology of LPARS in Post Migration
> events.  Previous efforts only recognized whether a memory block's
> assignment had changed in the property.  Changes here include:
> 
> * Checking the aa_index values of the old/new properties and 'readd'
>   any block for which the setting has changed.
> * Checking for changes in cpu associativity and making 'readd' calls
>   when differences are observed.

As part of the post-migration updates do you need to hold a lock
so that we don't attempt to process any of the cpu/memory changes
while the device tree is being updated?

You may be able to grab the device hotplug lock for this.

> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in RFC:
>   -- Simplify code to update CPU nodes during mobility checks.
>  Remove functions to generate extra HP_ELOG messages in favor
>  of direct function calls to dlpar_cpu_readd_by_index.
>   -- Move check for "cpu" node type from pseries_update_cpu to
>  pseries_smp_notifier in 'hotplug-cpu.c'
>   -- Remove functions 'pseries_memory_readd_by_index' and
>  'pseries_cpu_readd_by_index' as no longer needed outside of
>  'mobility.c'.
> ---
>  arch/powerpc/platforms/pseries/hotplug-cpu.c|   69 
> +++
>  arch/powerpc/platforms/pseries/hotplug-memory.c |6 ++
>  2 files changed, 75 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a7d14aa7..91ef22a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -636,6 +636,27 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
>   return rc;
>  }
> 
> +static int dlpar_cpu_readd_by_index(u32 drc_index)
> +{
> + int rc = 0;
> +
> + pr_info("Attempting to update CPU, drc index %x\n", drc_index);

Should make this say we are re-adding the CPU, it's a bit more specific as
to what is really happening.

> +
> + if (dlpar_cpu_remove_by_index(drc_index))
> + rc = -EINVAL;
> + else if (dlpar_cpu_add(drc_index))
> + rc = -EINVAL;
> +
> + if (rc)
> + pr_info("Failed to update cpu at drc_index %lx\n",
> + (unsigned long int)drc_index);
> + else
> + pr_info("CPU at drc_index %lx was updated\n",
> + (unsigned long int)drc_index);
> +
> + return rc;
> +}
> +
>  static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
>  {
>   struct device_node *dn;
> @@ -826,6 +847,9 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
>   else
>   rc = -EINVAL;
>   break;
> + case PSERIES_HP_ELOG_ACTION_READD:
> + rc = dlpar_cpu_readd_by_index(drc_index);
> + break;
>   default:
>   pr_err("Invalid action (%d) specified\n", hp_elog->action);
>   rc = -EINVAL;
> @@ -876,12 +900,53 @@ static ssize_t dlpar_cpu_release(const char *buf, 
> size_t count)
> 
>  #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
> 
> +static int pseries_update_cpu(struct of_reconfig_data *pr)
> +{
> + u32 old_entries, new_entries;
> + __be32 *p, *old_assoc, *new_assoc;
> + int rc = 0;
> +
> + /* So far, we only handle the 'ibm,associativity' property,
> +  * here.
> +  * The first int of the property is the number of domains
> +  * described.  This is followed by an array of level values.
> +  */
> + p = (__be32 *) pr->old_prop->value;
> + if (!p)
> + return -EINVAL;
> + old_entries = be32_to_cpu(*p++);
> + old_assoc = p;
> +
> + p = (__be32 *)pr->prop->value;
> + if (!p)
> + return -EINVAL;
> + new_entries = be32_to_cpu(*p++);
> + new_assoc = p;
> +
> + if (old_entries == new_entries) {
> + int sz = old_entries * sizeof(int);
> +
> + if (!memcmp(old_assoc, new_assoc, sz))
> + rc = dlpar_cpu_readd_by_index(
> + be32_to_cpu(pr->dn->phandle));
> +
> + } else {
> + rc = dlpar_cpu_readd_by_index(
> + be32_to_cpu(pr->dn->phandle));
> + }
> +
> + return rc;
> +}

Do we need to do the full compare of the new vs. the old affinity property?

I would think we would only get an updated property if the property changes.
We don't care what changes in the property at this point, only that it changed.
You could just call dlpar_cpu_readd_by_index() directly.

-Nathan

> +
>  static int pseries_smp_notifier(struct notifier_block *nb,
>   unsigned long action, void *data)
>  {
>   struct of_reconfig_data *rd = data;
>   int err 

[PATCH] pseries/memory-hotplug: Only update DT once per memory DLPAR request

2018-04-20 Thread Nathan Fontenot
The updates to powerpc numa and memory hotplug code now use the
in-kernel LMB array instead of the device tree. This change
allows the pseries memory DLPAR code to only update the device
tree once after successfully handling a DLPAR request.

Prior to the in-kernel LMB array, the numa code looked up the
affinity for memory being added in the device tree, the code
now looks this up in the LMB array. This change means the
memory hotplug code can just update the affinity for an LMB
in the LMB array instead of updating the device tree.

This also provides a savings in kernel memory. When updating the
device tree old properties are never free'ed since there is no
usecount on properties. This behavior leads to a new copy of the
property being allocated every time a LMB is added or removed
(i.e. a request to add 100 LMBs creates 100 new copies of the
property). With this update only a single new property is created
when a DLPAR request completes successfully.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/drmem.h|5 ++
 arch/powerpc/platforms/pseries/hotplug-memory.c |   55 +++
 2 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index ce242b9ea8c6..7c1d8e74b25d 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -99,4 +99,9 @@ void __init walk_drmem_lmbs_early(unsigned long node,
void (*func)(struct drmem_lmb *, const __be32 **));
 #endif
 
+static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+   lmb->aa_index = 0x;
+}
+
 #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c1578f54c626..9a15d39995e5 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -163,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node,
return aa_index;
 }
 
-static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
+static int update_lmb_associativity_index(struct drmem_lmb *lmb)
 {
struct device_node *parent, *lmb_node, *dr_node;
struct property *ala_prop;
@@ -203,43 +203,14 @@ static u32 lookup_lmb_associativity_index(struct 
drmem_lmb *lmb)
aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
 
dlpar_free_cc_nodes(lmb_node);
-   return aa_index;
-}
-
-static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
-{
-   int rc, aa_index;
-
-   lmb->flags |= DRCONF_MEM_ASSIGNED;
 
-   aa_index = lookup_lmb_associativity_index(lmb);
if (aa_index < 0) {
-   pr_err("Couldn't find associativity index for drc index %x\n",
-  lmb->drc_index);
-   return aa_index;
+   pr_err("Could not find LMB associativity\n");
+   return -1;
}
 
lmb->aa_index = aa_index;
-
-   rtas_hp_event = true;
-   rc = drmem_update_dt();
-   rtas_hp_event = false;
-
-   return rc;
-}
-
-static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
-{
-   int rc;
-
-   lmb->flags &= ~DRCONF_MEM_ASSIGNED;
-   lmb->aa_index = 0x;
-
-   rtas_hp_event = true;
-   rc = drmem_update_dt();
-   rtas_hp_event = false;
-
-   return rc;
+   return 0;
 }
 
 static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
@@ -428,7 +399,9 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
 
-   dlpar_remove_device_tree_lmb(lmb);
+   invalidate_lmb_associativity_index(lmb);
+   lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+
return 0;
 }
 
@@ -688,10 +661,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
 
-   rc = dlpar_add_device_tree_lmb(lmb);
+   rc = update_lmb_associativity_index(lmb);
if (rc) {
-   pr_err("Couldn't update device tree for drc index %x\n",
-  lmb->drc_index);
dlpar_release_drc(lmb->drc_index);
return rc;
}
@@ -704,14 +675,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
/* Add the memory */
rc = add_memory(nid, lmb->base_addr, block_sz);
if (rc) {
-   dlpar_remove_device_tree_lmb(lmb);
+   invalidate_lmb_associativity_index(lmb);
return rc;
}
 
rc = dlpar_online_lmb(lmb);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
-   dlpar_remove_device_tree_lmb(lmb);
+   invalidate_lmb_associativity_index(lmb);
} el

Re: [RFC PATCH v0 2/2] powerpc, drmem: Rename DRMEM_LMB_RESERVED to DRMEM_LMB_ISOLATED

2018-02-22 Thread Nathan Fontenot
On 02/21/2018 04:36 AM, Bharata B Rao wrote:
> Memory hotplug code uses a temporary LMB flags bit DRMEM_LMB_RESERVED
> to mark the LMB which is currently undergoing hotplug or unplug.
> It is easy to confuse DRMEM_LMB_RESERVED to mean the LMB is reserved
> for which a separate flag bit already exists DRCONF_MEM_RESERVED. Since
> both DRMEM_LMB_RESERVED and DRCONF_MEM_RESERVED operate on the same
> LMB flags word, rename the former to DRMEM_LMB_ISOLATED.
> 
> Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com>

Reviewed-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>

> ---
>  arch/powerpc/include/asm/drmem.h| 14 -
>  arch/powerpc/mm/drmem.c |  2 +-
>  arch/powerpc/platforms/pseries/hotplug-memory.c | 40 
> -
>  3 files changed, 28 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index ce242b9..b3fa3f7 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -72,21 +72,21 @@ static inline u32 drmem_lmb_size(void)
>   return drmem_info->lmb_size;
>  }
> 
> -#define DRMEM_LMB_RESERVED   0x8000
> +#define DRMEM_LMB_ISOLATED   0x8000
> 
> -static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
> +static inline void drmem_mark_lmb_isolated(struct drmem_lmb *lmb)
>  {
> - lmb->flags |= DRMEM_LMB_RESERVED;
> + lmb->flags |= DRMEM_LMB_ISOLATED;
>  }
> 
> -static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
> +static inline void drmem_remove_lmb_isolation(struct drmem_lmb *lmb)
>  {
> - lmb->flags &= ~DRMEM_LMB_RESERVED;
> + lmb->flags &= ~DRMEM_LMB_ISOLATED;
>  }
> 
> -static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
> +static inline bool drmem_lmb_isolated(struct drmem_lmb *lmb)
>  {
> - return lmb->flags & DRMEM_LMB_RESERVED;
> + return lmb->flags & DRMEM_LMB_ISOLATED;
>  }
> 
>  u64 drmem_lmb_memory_max(void);
> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
> index 3f18036..652bf3a 100644
> --- a/arch/powerpc/mm/drmem.c
> +++ b/arch/powerpc/mm/drmem.c
> @@ -35,7 +35,7 @@ static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
>* Return the value of the lmb flags field minus the reserved
>* bit used internally for hotplug processing.
>*/
> - return lmb->flags & ~DRMEM_LMB_RESERVED;
> + return lmb->flags & ~DRMEM_LMB_ISOLATED;
>  }
> 
>  static struct property *clone_property(struct property *prop, u32 prop_sz)
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index c1578f5..2f5ca29 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -467,7 +467,7 @@ static int dlpar_memory_remove_by_count(u32 
> lmbs_to_remove)
>   /* Mark this lmb so we can add it later if all of the
>* requested LMBs cannot be removed.
>*/
> - drmem_mark_lmb_reserved(lmb);
> + drmem_mark_lmb_isolated(lmb);
> 
>   lmbs_removed++;
>   if (lmbs_removed == lmbs_to_remove)
> @@ -478,7 +478,7 @@ static int dlpar_memory_remove_by_count(u32 
> lmbs_to_remove)
>   pr_err("Memory hot-remove failed, adding LMB's back\n");
> 
>   for_each_drmem_lmb(lmb) {
> - if (!drmem_lmb_reserved(lmb))
> + if (!drmem_lmb_isolated(lmb))
>   continue;
> 
>   rc = dlpar_add_lmb(lmb);
> @@ -486,20 +486,20 @@ static int dlpar_memory_remove_by_count(u32 
> lmbs_to_remove)
>   pr_err("Failed to add LMB back, drc index %x\n",
>  lmb->drc_index);
> 
> - drmem_remove_lmb_reservation(lmb);
> + drmem_remove_lmb_isolation(lmb);
>   }
> 
>   rc = -EINVAL;
>   } else {
>   for_each_drmem_lmb(lmb) {
> - if (!drmem_lmb_reserved(lmb))
> + if (!drmem_lmb_isolated(lmb))
>   continue;
> 
>   dlpar_release_drc(lmb->drc_index);
>   pr_info("Memory at %llx was hot-removed\n",
>   lmb->base_addr);
> 
> - drmem_remove_lmb_reservation(lmb);
> + drmem_remove_lmb_isolation(lm

Re: [RFC PATCH v0 1/2] powerpc, drmem: Fix unexpected flag value in ibm, dynamic-memory-v2

2018-02-22 Thread Nathan Fontenot
On 02/21/2018 04:36 AM, Bharata B Rao wrote:
> Memory addtion and removal by count and indexed-count methods
> temporarily mark the LMBs that are being added/removed by a special
> flag value DRMEM_LMB_RESERVED. Accessing flags value directly at
> a few places without proper accessor method is causing two unexpected
> side-effects:
> 
> - DRMEM_LMB_RESERVED bit is becoming part of the flags word of
>   drconf_cell_v2 entries in ibm,dynamic-memory-v2 DT property.
> - This results in extra drconf_cell entries in ibm,dynamic-memory-v2.
>   For example if 1G memory is added, it leads to one entry for 3 LMBs
>   and 1 separate entry for the last LMB. All the 4 LMBs should be
>   defined by one entry here.
> 
> Fix this by always accessing the flags by its accessor method
> drmem_lmb_flags().
> 
> Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com>

Reviewed-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>

> ---
>  arch/powerpc/mm/drmem.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
> index 916844f..3f18036 100644
> --- a/arch/powerpc/mm/drmem.c
> +++ b/arch/powerpc/mm/drmem.c
> @@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 
> *dr_cell,
>   dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
>   dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
>   dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
> - dr_cell->flags = cpu_to_be32(lmb->flags);
> + dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
>  }
> 
>  static int drmem_update_dt_v2(struct device_node *memory,
> @@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
>   }
> 
>   if (prev_lmb->aa_index != lmb->aa_index ||
> - prev_lmb->flags != lmb->flags)
> + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
>   lmb_sets++;
> 
>   prev_lmb = lmb;
> @@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory,
>   }
> 
>   if (prev_lmb->aa_index != lmb->aa_index ||
> - prev_lmb->flags != lmb->flags) {
> + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
>   /* end of one set, start of another */
>   dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
>   dr_cell++;
> 



[PATCH] pseries/drmem: Check for zero filled ibm, dynamic-memory property.

2018-02-15 Thread Nathan Fontenot
Some versions of QEMU will produce an ibm,dynamic-reconfiguration-memory
node with a ibm,dynamic-memory property that is zero-filled. This causes
the drmem code to oops trying to parse this property.

The fix for this is to validate that the property does contain LMB
entries before trying to parse it and bail if the count is zero.

Oops: Kernel access of bad area, sig: 11 [#1]
SMP NR_CPUS=2048
NUMA
pSeries
Modules linked in:
Supported: Yes
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.14-11.2-default #1
task: c0007e639680 task.stack: c0007e648000
NIP: c0c709a4 LR: c0c70998 CTR: 
REGS: c0007e64b8d0 TRAP: 0300   Not tainted  (4.12.14-11.2-default)
MSR: 80010280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]>
  CR: 84000248  XER: 
CFAR: c067018c DAR: 0010 DSISR: 4200 SOFTE: 1
GPR00: c0c70998 c0007e64bb50 c1157b00 
GPR04: c0007e64bb70  002f 0022
GPR08: 0003 c6f63fac c6f63fb0 001e
GPR12:  cfa8 c000dca8 
GPR16:    
GPR20:    
GPR24: c0cccb98 c0c636f0 c0c56cd0 0007
GPR28: c0cccba8 c0007c30 c0007e64bbf0 0010
NIP [c0c709a4] read_drconf_v1_cell+0x54/0x9c
LR [c0c70998] read_drconf_v1_cell+0x48/0x9c
Call Trace:
[c0007e64bb50] [c0c56cd0] __param_initcall_debug+0x0/0x28 
(unreliable)
[c0007e64bb90] [c0c70e24] drmem_init+0x144/0x2f8
[c0007e64bc40] [c000d034] do_one_initcall+0x64/0x1d0
[c0007e64bd00] [c0c643d0] kernel_init_freeable+0x298/0x38c
[c0007e64bdc0] [c000dcc4] kernel_init+0x24/0x160
[c0007e64be30] [c000b428] ret_from_kernel_thread+0x5c/0xb4
Instruction dump:
7c9e2378 6000 e9429050 e93e 7c240b78 7c7f1b78 f9240021 e86a0002
4804e41d 6000 e9210020 39490004  f9410020 39490010 7d004c2c

The ibm,dynamic-reconfiguration-memory device tree property
generated that causes this:

ibm,dynamic-reconfiguration-memory {
ibm,lmb-size = <0x0 0x1000>;
ibm,memory-flags-mask = <0xff>;
ibm,dynamic-memory = <0x0 0x0 0x0 0x0 0x0 0x0>;
linux,phandle = <0x7e57eed8>;
ibm,associativity-lookup-arrays = <0x1 0x4 0x0 0x0 0x0 0x0>;
ibm,memory-preservation-time = <0x0>;
};

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/mm/drmem.c |8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 1604110c4238..916844f99c64 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, 
const __be32 *usm,
u32 i, n_lmbs;
 
n_lmbs = of_read_number(prop++, 1);
+   if (n_lmbs == 0)
+   return;
 
for (i = 0; i < n_lmbs; i++) {
read_drconf_v1_cell(, );
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, 
const __be32 *usm,
u32 i, j, lmb_sets;
 
lmb_sets = of_read_number(prop++, 1);
+   if (lmb_sets == 0)
+   return;
 
for (i = 0; i < lmb_sets; i++) {
read_drconf_v2_cell(_cell, );
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
struct drmem_lmb *lmb;
 
drmem_info->n_lmbs = of_read_number(prop++, 1);
+   if (drmem_info->n_lmbs == 0)
+   return;
 
drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
   GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
int lmb_index;
 
lmb_sets = of_read_number(prop++, 1);
+   if (lmb_sets == 0)
+   return;
 
/* first pass, calculate the number of LMBs */
p = prop;



Re: 4.16-rc1 virtual machine crash on boot

2018-02-14 Thread Nathan Fontenot
On 02/14/2018 03:37 PM, Tyrel Datwyler wrote:
> On 02/13/2018 10:15 PM, Cyril Bur wrote:
>> On Tue, 2018-02-13 at 21:12 -0800, Tyrel Datwyler wrote:
>>> On 02/13/2018 05:20 PM, Cyril Bur wrote:
 Hello all,
>>>
>>> Does reverting commit 02ef6dd8109b581343ebeb1c4c973513682535d6 alleviate 
>>> the issue?
>>>
>>
>> Hi Tyrel,
>>
>> No it doesn't. Same backtrace.
> 
> What about commit 0c38ed6f6f0b78a404fe46767d21504b37af8705? After a little 
> closer look I think the series that commit enabled is the culprit. 
> Especially, since Michael Ellerman complained of the same crash.
> 
> -Tyrel

This wouldn't fix the issue. That patch enabled support for 
ibm,dynamic-memory-v2
but in the stack trace we see that the device tree has the old (v1) version of
the ibm,dynamic-memory property.

-Nathan
> 
>>>

 I'm seeing this crash trying to boot a KVM virtual machine. This kernel
 was compiled with pseries_le_defconfig and run using the following qemu
 commandline:

 qemu-system-ppc64 -enable-kvm -cpu POWER8 -smp 4 -m 4G -M pseries
 -nographic -vga none -drive file=vm.raw,if=virtio,format=raw -drive
 file=mkvmconf2xeO,if=virtio,format=raw -netdev type=user,id=net0
 -device virtio-net-pci,netdev=net0 -kernel vmlinux_tscr -append
 'root=/dev/vdb1 rw cloud-init=disabled'

 qemu-system-ppc64 --version
 QEMU emulator version 2.5.0 (Debian 1:2.5+dfsg-5ubuntu10.16), Copyright
 (c) 2003-2008 Fabrice Bellard


 Key type dns_resolver registered
 Unable to handle kernel paging request for data at address 0x0010
 Faulting instruction address: 0xc18f2bbc
 Oops: Kernel access of bad area, sig: 11 [#1]
 LE SMP NR_CPUS=2048 NUMA pSeries
 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1v4.16-rc1 #8
 NIP:  c18f2bbc LR: c18f2bb4 CTR: 
 REGS: c000fea838d0 TRAP: 0380   Not tainted  (4.16.0-rc1v4.16-rc1)
 MSR:  82009033   CR: 84000248  XER:
 2000
 CFAR: c19591a0 SOFTE: 0 
 GPR00: c18f2bb4 c000fea83b50 c1bd8400
  
 GPR04: c000fea83b70  002f
 0022 
 GPR08:  c22a3e90 
 0220 
 GPR12:  cfb40980 c000d698
  
 GPR16:   
  
 GPR20:   
  
 GPR24:  c18b9248 c18e36d8
 c19738a8 
 GPR28: 0007 c000fc68 c000fea83bf0
 0010 
 NIP [c18f2bbc] read_drconf_v1_cell+0x50/0x9c
 LR [c18f2bb4] read_drconf_v1_cell+0x48/0x9c
 Call Trace:
 [c000fea83b50] [c18f2bb4] read_drconf_v1_cell+0x48/0x9c
 (unreliable)
 [c000fea83b90] [c18f305c] drmem_init+0x13c/0x2ec
 [c000fea83c40] [c18e4288] do_one_initcall+0xdc/0x1ac
 [c000fea83d00] [c18e45d4] kernel_init_freeable+0x27c/0x358
 [c000fea83dc0] [c000d6bc] kernel_init+0x2c/0x160
 [c000fea83e30] [c000bc20] ret_from_kernel_thread+0x5c/0xbc
 Instruction dump:
 7c7f1b78 6000 6000 7c240b78 3d22ffdc 3929f0a4 e95e
 e8690002 
 f9440021 4806657d 6000 e9210020  39090004 39490010
 f9010020 
 ---[ end trace bd9f49f482d30e03 ]---

 Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b

 WARNING: CPU: 1 PID: 1 at drivers/tty/vt/vt.c:3883
 do_unblank_screen+0x1f0/0x270
 CPU: 1 PID: 1 Comm: swapper/0 Tainted: G  D  4.16.0-
 rc1v4.16-rc1 #8
 NIP:  c09aa800 LR: c09aa63c CTR: c148f5f0
 REGS: c000fea832c0 TRAP: 0700   Tainted:
 G  D   (4.16.0-rc1v4.16-rc1)
 MSR:  82029033   CR: 2800  XER:
 2000
 CFAR: c09aa658 SOFTE: 1 
 GPR00: c09aa63c c000fea83540 c1bd8400
  
 GPR04: 0001 c000fb0c200e 1dd7
 c000fea834d0 
 GPR08: fe43  
 0001 
 GPR12: 28002428 cfb40980 c000d698
  
 GPR16:   
  
 GPR20:   
  
 GPR24: c000fea4 c000feadf910 c1a4a7a8
 c1cc4ea0 
 GPR28: c173f4f0 c1cc4ec8 
  
 NIP [c09aa800] do_unblank_screen+0x1f0/0x270
 LR [c09aa63c] do_unblank_screen+0x2c/0x270
 Call Trace:
 [c000fea83540] [c09aa63c] 

Re: [PATCH V3 0/9] powerpc: Support for ibm,dynamic-memory-v2

2018-02-14 Thread Nathan Fontenot
On 02/14/2018 03:30 PM, Tyrel Datwyler wrote:
> On 12/03/2017 09:13 PM, Michael Ellerman wrote:
>> Nathan Fontenot <nf...@linux.vnet.ibm.com> writes:
>>
>>> This patch set provides a series of updates to de-couple the LMB
>>> information provided in the device tree property from the device
>>> tree property format. This eases the ability to support a new
>>> format for the dynamic memory property, ibm,dynamic-memory-v2.
>>
>> Something in here is still blowing up for me in a KVM guest:
> 
> So, it looks like this series was applied despite observing this KVM guest 
> crash. Cyril posted yesterday to the list about hitting this same issue with 
> 4.16-rc1.
> 
> -Tyrel
> 

Yes, Michael pointed out that he hit this on his system but I have never
been able to replicate this error.

Now that others are seeing it any help I could get on re-creating the failure
would be appreciated.

-Nathan

>>
>> OF stdout device is: /vdevice/vty@7100
>> Preparing to boot Linux version 4.14.0-rc2-gcc6x-g9e1fc7e 
>> (kerkins@alpine1-p1) (gcc version 6.4.1 20171202 (Custom 6328ca9eaa476138)) 
>> #1 SMP Sun Dec 3 21:45:32 AEDT 2017
>> Detected machine type: 0101
>> command line: 
>> Max number of cores passed to firmware: 256 (NR_CPUS = 2048)
>> Calling ibm,client-architecture-support... done
>> memory layout at init:
>>   memory_limit :  (16 MB aligned)
>>   alloc_bottom : 015c
>>   alloc_top: 3000
>>   alloc_top_hi : 0001
>>   rmo_top  : 3000
>>   ram_top  : 0001
>> instantiating rtas at 0x2fff... done
>> prom_hold_cpus: skipped
>> copying OF device tree...
>> Building dt strings...
>> Building dt structure...
>> Device tree strings 0x017d -> 0x017d09d8
>> Device tree struct  0x017e -> 0x017f
>> Quiescing Open Firmware ...
>> Booting Linux via __start() @ 0x0040 ...
>> [0.00] bootconsole [udbg0] enabled
>> [0.00] Allocated 2883584 bytes for 2048 pacas at cfd4
>> [0.00] hash-mmu: Page sizes from device-tree:
>> [0.00] hash-mmu: base_shift=12: shift=12, sllp=0x, 
>> avpnm=0x, tlbiel=1, penc=0
>> [0.00] hash-mmu: base_shift=16: shift=16, sllp=0x0110, 
>> avpnm=0x, tlbiel=1, penc=1
>> [0.00]  -> fw_vec5_feature_init()
>> [0.00]  <- fw_vec5_feature_init()
>> [0.00]  -> fw_hypertas_feature_init()
>> [0.00]  <- fw_hypertas_feature_init()
>> [0.00] Page orders: linear mapping = 16, virtual = 16, io = 16, 
>> vmemmap = 16
>> [0.00] Using 1TB segments
>> [0.00] hash-mmu: Initializing hash mmu with SLB
>> [0.00] Linux version 4.14.0-rc2-gcc6x-g9e1fc7e 
>> (kerkins@alpine1-p1) (gcc version 6.4.1 20171202 (Custom 6328ca9eaa476138)) 
>> #1 SMP Sun Dec 3 21:45:32 AEDT 2017
>> [0.00] Found initrd at 0xc15c:0xc178d70b
>> [0.00] Machine is LPAR !
>> [0.00]  -> pseries_init()
>> [0.00]  -> fw_cmo_feature_init()
>> [0.00] CMO not available
>> [0.00]  <- fw_cmo_feature_init()
>> [0.00]  <- pseries_init()
>> [0.00] Using pSeries machine description
>> [0.00] Partition configured for 16 cpus.
>> [0.00] CPU maps initialized for 8 threads per core
>> [0.00]  (thread shift is 3)
>> [0.00] Freed 2818048 bytes for unused pacas
>> [0.00] -
>> [0.00] ppc64_pft_size= 0x19
>> [0.00] phys_mem_size = 0x1
>> [0.00] dcache_bsize  = 0x80
>> [0.00] icache_bsize  = 0x80
>> [0.00] cpu_features  = 0x17dc7aec18500249
>> [0.00]   possible= 0xdfdf18500649
>> [0.00]   always  = 0x18100040
>> [0.00] cpu_user_features = 0xdc0065c2 0xef00
>> [0.00] mmu_features  = 0x78006001
>> [0.00] firmware_features = 0x0001405a440b
>> [0.00] htab_hash_mask= 0x3
>> [0.00] -
>> [0.00] numa:   NODE_DATA [mem 

[PATCH] pseries: Fix build break for SPLPAR=n and CPU hotplug

2018-02-09 Thread Nathan Fontenot
Build break fix for SPLPAR=n builds and CPU hotplug.

arch/powerpc/platforms/pseries/hotplug-cpu.o: In function `.dlpar_online_cpu':
hotplug-cpu.c:(.text+0xc40): undefined reference to `.find_and_online_cpu_nid'

Move the declaration of find_and_online_cpu_nid() to topology.h where
we can define it as a no-op for SPLAPR=n builds.

Fixes: e67e02a ("powerpc/pseries: Fix cpu hotplug crash with memoryless nodes")
Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/topology.h  |9 +
 arch/powerpc/platforms/pseries/hotplug-cpu.c |3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index 88187c285c70..4c5704a14f0d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -105,6 +105,15 @@ extern int timed_topology_update(int nsecs);
 #endif /* CONFIG_PPC_SPLPAR */
 #endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */
 
+#if defined(CONFIG_PPC_SPLPAR)
+int find_and_online_cpu_nid(int cpu);
+#else
+static inline int find_and_online_cpu_nid(int cpu)
+{
+   return 0;
+}
+#endif
+
 #include 
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index dceb51454d8d..ca6e363c2d75 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -340,8 +341,6 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
 }
 
-extern int find_and_online_cpu_nid(int cpu);
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
int rc = 0;



Re: [PATCH RESEND V2 ] powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove

2018-01-29 Thread Nathan Fontenot
On 01/27/2018 02:58 AM, Michael Ellerman wrote:
> Nathan Fontenot <nf...@linux.vnet.ibm.com> writes:
> 
>> When DLPAR removing a CPU, the unmapping of the cpu from a node in
>> unmap_cpu_from_node() should also invalidate the CPUs entry in the
>> numa_cpu_lookup_table. There is not a guarantee that on a subsequent
>> DLPAR add of the CPU the associativity will be the same and thus
>> could be in a different node. Invalidating the entry in the
>> numa_cpu_lookup_table causes the associativity to be read from the
>> device tree at the time of the add.
> 
> This last part seems to contradict the change log of commit d4edc5b6c480
> ("powerpc: Fix the setup of CPU-to-Node mappings during CPU online"),
> which seems to say that we shouldn't be looking at the device tree.
> 
> Can you explain to me what I'm missing?

The commit you refer to addresses CPU online/offline behavior and is correct
that we shouldn't reference the device tree. The cpu-to-node mapping shouldn't
change across a offline/online operation since the CPU remains assigned to
the partition the entire time.

This patch addresses CPUs that have been DLPAR removed, and as such the CPU
is no longer assigned to the partition. Given this we don't have a guarantee
that the CPU will have the same node-to-cpu mapping when it is assigned
back to the partition on a subsequent DLPAR add operation.

Without this patch, the CPU is put back in the node it was in previously
which may not match the node firmware states it belongs to.

> 
> Also when did this break, always? Which commit should I mark this as
> fixing?
As far as I know this has always been broken. I've looked the the git logs
for the numa and pseries cpu hotplug code and don't see a specific
commit I can point at for breaking this.

-Nathan



[PATCH RESEND V2 ] powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove

2018-01-26 Thread Nathan Fontenot
When DLPAR removing a CPU, the unmapping of the cpu from a node in
unmap_cpu_from_node() should also invalidate the CPUs entry in the
numa_cpu_lookup_table. There is not a guarantee that on a subsequent
DLPAR add of the CPU the associativity will be the same and thus
could be in a different node. Invalidating the entry in the
numa_cpu_lookup_table causes the associativity to be read from the
device tree at the time of the add.

The current behavior of not invalidating the CPUs entry in the
numa_cpu_lookup_table can result in scenarios where the the topology
layout of CPUs in the partition does not match the device tree
or the topology reported by the HMC.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Originally sent Dec. 5 2017, no reply, resending.

Updates for V2: Move the invalidation from unmap_cpu_from_node to
pseries_remove_processor, the former routine is also called during cpu
offline and we do not want to invalidate during cpu offline.

 arch/powerpc/include/asm/topology.h  |5 +
 arch/powerpc/mm/numa.c   |5 -
 arch/powerpc/platforms/pseries/hotplug-cpu.c |2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index 88187c285c70..1c02e6900f78 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int 
nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+   numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
int nid;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 268c7a2d9a5b..7ec3a0d787d3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
-   numa_cpu_lookup_table[cpu] = node;
-}
-
 static void map_cpu_to_node(int cpu, int node)
 {
update_numa_cpu_lookup_table(cpu, node);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a7d14aa7bb7c..09083ad82f7a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
+   update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)



Re: [PATCH V2] powerpc/kernel: Add 'ibm,thread-groups' property for CPU allocation

2018-01-12 Thread Nathan Fontenot
On 01/08/2018 11:19 AM, Michael Bringmann wrote:
> Add code to parse the new property 'ibm,thread-groups" when it is
> present.  The content of this property explicitly defines the number
> of threads per core as well as the PowerPC 'threads_core_mask'.
> The design provides a common device-tree for both P9 normal core and
> P9 fused core systems.  The new property has been observed to be
> available on P9 pHyp systems, but it is not always present on
> OpenPower BMC systems.
> 
> The property updates the kernel to know which CPUs/threads of each
> core are actually present, and then use the map when adding cores
> to the system at boot, or during hotplug operations.
> 
> * Previously, the information about the number of threads per core
>   was inferred solely from the "ibm,ppc-interrupt-server#s" property
>   in the system device tree.
> * Also previous to this property, The mask of threads per CPU was
>   inferred to be a strict linear series from 0..(nthreads-1).
> * After reading the "ibm,thread-group" property, we can determine
>   the number of threads per core to be the 'bitmask weight' of the
>   CPU thread mask.
> * Also after reading the property, we can determine which of the
>   possible threads we are allowed to online for each CPU.  It is no
>   longer a simple linear sequence, but may be discontinuous e.g.
>   activate threads 1,2,3,5,6,7 on a core instead of 0-5 sequentially.
> 
> Implementation of the "ibm,thread-groups" property is spread across
> a few files in the powerpc specific code:
> 
> * prom.c: Parse the property and create 'ppc_thread_group_mask'.
>   Use the mask in operation of early_init_dt_scan_cpus().
> * setup-common.c: Import 'ppc_thread_group_mask' and use the value
>   in the operation of cpu_init_thread_core_maps(), and
>   smp_setup_cpu_maps.
> * hotplug-cpu.c: Use 'ppc_thread_group_mask' in several locations
>   where the code previously expected to iterate over a
>   linear series of active threads (0..nthreads-1).
> 
> Note that the "ibm,thread-groups" property also includes semantics
> of 'thread-group' i.e. define one or more subgroups of the available
> threads, each group of threads to be used for a specific class of
> task.  Translating thread group semantics into Linux kernel features
> is TBD.

One thing I don't see addressed in the comments or in the code is
migration support. I think we need to update the thread group mask
post-migration to reflect the threads per core on the new system.

-Nathan

> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V2:
>   -- Add more information and examples to the patch description.
>   -- Rename 'pseries_thread_group_mask' to 'ppc_thread_group_mask'
>   -- Remove unnecessary debug message complaining about absence of
>  property.
>   -- Reduce indent complexity of early_init_dt_scan_cpus().
> ---
>  arch/powerpc/include/asm/cputhreads.h|2 +
>  arch/powerpc/kernel/prom.c   |   74 
> ++
>  arch/powerpc/kernel/setup-common.c   |   30 +++
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |   13 -
>  4 files changed, 107 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/cputhreads.h 
> b/arch/powerpc/include/asm/cputhreads.h
> index d71a909..8e444d4 100644
> --- a/arch/powerpc/include/asm/cputhreads.h
> +++ b/arch/powerpc/include/asm/cputhreads.h
> @@ -31,6 +31,8 @@
>  #define threads_core_mask(*get_cpu_mask(0))
>  #endif
> 
> +extern cpumask_t ppc_thread_group_mask;
> +
>  /* cpu_thread_mask_to_cores - Return a cpumask of one per cores
>   *hit by the argument
>   *
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index b15bae2..0a49231 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -68,6 +68,9 @@
>  #define DBG(fmt...)
>  #endif
> 
> +cpumask_t ppc_thread_group_mask;
> +EXPORT_SYMBOL(ppc_thread_group_mask);
> +
>  #ifdef CONFIG_PPC64
>  int __initdata iommu_is_off;
>  int __initdata iommu_force_on;
> @@ -303,6 +306,71 @@ static void __init check_cpu_feature_properties(unsigned 
> long node)
>   }
>  }
> 
> +static void __init early_init_setup_thread_group_mask(unsigned long node,
> + cpumask_t *thread_group_mask)
> +{
> + const __be32 *thrgrp;
> + int len, rc = 0;
> + u32 cc_type = 0, no_split = 0, thr_per_split = 0;
> + int j, k;
> +
> + cpumask_clear(thread_group_mask);
> +
> + thrgrp = of_get_flat_dt_prop(node, "ibm,thread-groups", );
> + if (!thrgrp)
> + return;
> +
> + /* Process the thread groups for the Core thread mask */
> + /* Characteristic type per table */
> + cc_type = of_read_number(thrgrp++, 1);
> +
> + /*
> +  * 1 : Group shares common L1, translation cache, and
> +  * instruction data flow
> +  * >1 : Reserved
> +  */
> + 

Re: [PATCH V8 3/3] hotplug/cpu: Fix crash with memoryless nodes

2018-01-08 Thread Nathan Fontenot
On 11/28/2017 04:58 PM, Michael Bringmann wrote:
> On powerpc systems with shared configurations of CPUs and memory and
> memoryless nodes at boot, an event ordering problem was observed on
> a SLES12 build platforms with the hot-add of CPUs to the memoryless
> nodes.
> 
> * The most common error occurred when the memory SLAB driver attempted
>   to reference the memoryless node to which a CPU was being added
>   before the kernel had finished initializing all of the data structures
>   for the CPU and exited 'device_online' under DLPAR/hot-add.
> 
>   Normally the memoryless node would be initialized through the call
>   path device_online ... arch_update_cpu_topology ... find_cpu_nid
>   ...  try_online_node.  This patch ensures that the powerpc node will
>   be initialized as early as possible, even if it was memoryless and
>   CPU-less at the point when we are trying to hot-add a new CPU to it.
> 
> Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>

Reviewed-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>

> ---
> Changes in V8:
>   -- Change a 'printk(KERN_INFO ...)' statement to be a pr_debug()
>  statement.
>   -- Rename 'find_cpu_nid' to 'find_and_online_cpu_nid' for better
>  clarity of its function.
> ---
>  arch/powerpc/mm/numa.c   |4 +++-
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |3 +++
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 6b08dd8..a182f9e 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1307,7 +1307,7 @@ static long vphn_get_associativity(unsigned long cpu,
>   return rc;
>  }
> 
> -static inline int find_and_online_cpu_nid(int cpu)
> +int find_and_online_cpu_nid(int cpu)
>  {
>   __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
>   int new_nid;
> @@ -1340,6 +1340,8 @@ static inline int find_and_online_cpu_nid(int cpu)
>  #endif
>   }
> 
> + pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
> + cpu, new_nid);
>   return new_nid;
>  }
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a7d14aa7..dceb514 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node 
> *np)
>   cpu_maps_update_done();
>  }
> 
> +extern int find_and_online_cpu_nid(int cpu);
> +
>  static int dlpar_online_cpu(struct device_node *dn)
>  {
>   int rc = 0;
> @@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn)
>   != CPU_STATE_OFFLINE);
>   cpu_maps_update_done();
>   timed_topology_update(1);
> + find_and_online_cpu_nid(cpu);
>   rc = device_online(get_cpu_device(cpu));
>   if (rc)
>   goto out;
> 



Re: [PATCH V8 2/3] poserpc/initnodes: Ensure nodes initialized for hotplug

2018-01-08 Thread Nathan Fontenot
On 11/28/2017 04:58 PM, Michael Bringmann wrote:
> On powerpc systems which allow 'hot-add' of CPU, it may occur that
> the new resources are to be inserted into nodes that were not used
> for memory resources at bootup.  Many different configurations of
> PowerPC resources may need to be supported depending upon the
> environment.  Important characteristics of the nodes and operating
> environment include:
> 
> * Dedicated vs. shared CPUs.  Shared CPUs require information such
>   as the VPHN hcall for CPU assignment to nodes, since shared CPUs
>   have their affinity set to node 0 at boot and when hot-added.
>   Associativity decisions made based on dedicated resource rules,
>   such as associativity properties in the device tree, may vary from
>   decisions made using the values returned by the VPHN hcall.
> * memoryless nodes at boot.  Nodes need to be defined as 'possible'
>   at boot for operation with other code modules.  Previously, the
>   powerpc code would limit the set of possible nodes to those which
>   have memory assigned at boot, and were thus online.  Subsequent
>   add/remove of CPUs or memory would only work with this subset of
>   possible nodes.
> * memoryless nodes with CPUs at boot.  Due to the previous restriction
>   on nodes, nodes that had CPUs but no memory were being collapsed
>   into other nodes that did have memory at boot.  In practice this
>   meant that the node assignment presented by the runtime kernel
>   differed from the affinity and associativity attributes presented
>   by the device tree or VPHN hcalls.  Nodes that might be known to
>   the pHyp were not 'possible' in the runtime kernel because they did
>   not have memory at boot.
> 
> This patch fixes some problems encountered at runtime with
> configurations that support memory-less nodes, or that hot-add CPUs
> into nodes that are memoryless during system execution after boot.
> The problems of interest include,
> 
> * Nodes known to powerpc to be memoryless at boot, but to have
>   CPUs in them are allowed to be 'possible' and 'online'.  Memory
>   allocations for those nodes are taken from another node that does
>   have memory until and if memory is hot-added to the node.
> * Nodes which have no resources assigned at boot, but which may still
>   be referenced subsequently by affinity or associativity attributes,
>   are kept in the list of 'possible' nodes for powerpc.  Hot-add of
>   memory or CPUs to the system can reference these nodes and bring
>   them online instead of redirecting the references to one of the set
>   of nodes known to have memory at boot.
> 
> Note that this software operates under the context of CPU hotplug.
> We are not doing memory hotplug in this code, but rather updating
> the kernel's CPU topology (i.e. arch_update_cpu_topology /
> numa_update_cpu_topology).  We are initializing a node that may be
> used by CPUs or memory before it can be referenced as invalid by a
> CPU hotplug operation.  CPU hotplug operations are protected by a
> range of APIs including cpu_maps_update_begin/cpu_maps_update_done,
> cpus_read/write_lock / cpus_read/write_unlock, device locks, and more.
> Memory hotplug operations, including try_online_node, are protected
> by mem_hotplug_begin/mem_hotplug_done, device locks, and more.  In
> the case of CPUs being hot-added to a previously memoryless node, the
> try_online_node operation occurs wholly within the CPU locks with no
> overlap.  Using HMC hot-add/hot-remove operations, we have been able
> to add and remove CPUs to any possible node without failures.  HMC
> operations involve a degree self-serialization, though.
> 
> Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>

Reviewed-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>

> ---
> Changes in V8:
>   -- Clarify 'resources' as CPUs in patch description regarding
>  VPHN call.  Add another clause to statement mentioning that
>  shared CPUs start in node 0, and are finally assigned per
>  VPHN information.
>   -- Rename 'find_cpu_nid' to 'find_and_online_cpu_nid' for better
>  clarity of its function.
>   -- Restore '__init' tag to definition of 'setup_node_data'
> ---
>  arch/powerpc/mm/numa.c |   49 
> ++--
>  1 file changed, 39 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 735e3fd..6b08dd8 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long lcpu)
>   nid = of_node_to_nid_single(cpu);
> 
>  out_present:
> - if (nid < 0 || !node_online(nid))
> + if (nid < 0 || !node_possible(nid))
>   nid = first_on

Re: [PATCH V8 1/3] powerpc/nodes: Ensure enough nodes avail for operations

2018-01-08 Thread Nathan Fontenot
On 11/28/2017 04:58 PM, Michael Bringmann wrote:
> On powerpc systems which allow 'hot-add' of CPU or memory resources,
> it may occur that the new resources are to be inserted into nodes
> that were not used for these resources at bootup.  In the kernel,
> any node that is used must be defined and initialized.  These empty
> nodes may occur when,
> 
> * Dedicated vs. shared resources.  Shared resources require
>   information such as the VPHN hcall for CPU assignment to nodes.
>   Associativity decisions made based on dedicated resource rules,
>   such as associativity properties in the device tree, may vary
>   from decisions made using the values returned by the VPHN hcall.
> * memoryless nodes at boot.  Nodes need to be defined as 'possible'
>   at boot for operation with other code modules.  Previously, the
>   powerpc code would limit the set of possible nodes to those which
>   have memory assigned at boot, and were thus online.  Subsequent
>   add/remove of CPUs or memory would only work with this subset of
>   possible nodes.
> * memoryless nodes with CPUs at boot.  Due to the previous restriction
>   on nodes, nodes that had CPUs but no memory were being collapsed
>   into other nodes that did have memory at boot.  In practice this
>   meant that the node assignment presented by the runtime kernel
>   differed from the affinity and associativity attributes presented
>   by the device tree or VPHN hcalls.  Nodes that might be known to
>   the pHyp were not 'possible' in the runtime kernel because they did
>   not have memory at boot.
> 
> This patch ensures that sufficient nodes are defined to support
> configuration requirements after boot, as well as at boot.  This
> patch set fixes a couple of problems.
> 
> * Nodes known to powerpc to be memoryless at boot, but to have
>   CPUs in them are allowed to be 'possible' and 'online'.  Memory
>   allocations for those nodes are taken from another node that does
>   have memory until and if memory is hot-added to the node.
> * Nodes which have no resources assigned at boot, but which may still
>   be referenced subsequently by affinity or associativity attributes,
>   are kept in the list of 'possible' nodes for powerpc.  Hot-add of
>   memory or CPUs to the system can reference these nodes and bring
>   them online instead of redirecting to one of the set of nodes that
>   were known to have memory at boot.
> 
> This patch extracts the value of the lowest domain level (number of
> allocable resources) from the device tree property
> "ibm,max-associativity-domains" to use as the maximum number of nodes
> to setup as possibly available in the system.  This new setting will
> override the instruction,
> 
> nodes_and(node_possible_map, node_possible_map, node_online_map);
> 
> presently seen in the function arch/powerpc/mm/numa.c:initmem_init().
> 
> If the "ibm,max-associativity-domains" property is not present at boot,
> no operation will be performed to define or enable additional nodes, or
> enable the above 'nodes_and()'.
> 
> Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>

Reviewed-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
 
> ---
> Changes in V8:
>   -- Remove unneeded pr_info() statement
> ---
>  arch/powerpc/mm/numa.c |   37 ++---
>  1 file changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index adb6364f..735e3fd 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -892,6 +892,34 @@ static void __init setup_node_data(int nid, u64 
> start_pfn, u64 end_pfn)
>   NODE_DATA(nid)->node_spanned_pages = spanned_pages;
>  }
> 
> +static void __init find_possible_nodes(void)
> +{
> + struct device_node *rtas;
> + u32 numnodes, i;
> +
> + if (min_common_depth <= 0)
> + return;
> +
> + rtas = of_find_node_by_path("/rtas");
> + if (!rtas)
> + return;
> +
> + if (of_property_read_u32_index(rtas,
> + "ibm,max-associativity-domains",
> + min_common_depth, ))
> + goto out;
> +
> + for (i = 0; i < numnodes; i++) {
> + if (!node_possible(i)) {
> + setup_node_data(i, 0, 0);
> + node_set(i, node_possible_map);
> + }
> + }
> +
> +out:
> + of_node_put(rtas);
> +}
> +
>  void __init initmem_init(void)
>  {
>   int nid, cpu;
> @@ -905,12 +933,15 @@ void __init initmem_init(void)
>   memblock_dump_all();
> 
>   /*
> -  * Reduce the possible NUMA nod

Re: [PATCH V2] powerpc/pseries: Invalidate numa_cpu_lookup_table on cpu remove

2018-01-08 Thread Nathan Fontenot
Hi Michael,

I hadn't seen any update on this patch, just wanted to make sure you saw it.

-Nathan 


On 12/05/2017 09:33 PM, Nathan Fontenot wrote:
> When DLPAR removing a CPU we need to invalidate its entry in the
> numa_cpu_lookup_table. There is no guarantee that on a subsequent
> DLPAR add of the CPU the associativity will be the same and thus it
> could be in a different node. Invalidating the entry in the
> numa_cpu_lookup_table causes the associativity to be read from the
> device tree at the time of the add.
> 
> The current behavior of not invalidating the CPUs entry in the
> numa_cpu_lookup_table can result in scenarios where the the topology
> layout of CPUs in the partition does not match the device tree
> or the topology reported by the HMC.
> 
> This patch move the update_numa_cpu_lookup_table to topology.h so it is
> available outside of numa.c. A call to this routine is added in
> pseries_remove_processor().
> 
> Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
> ---
> 
> Updates for V2: Move the invalidation from unmap_cpu_from_node to
> pseries_remove_processor, the former routine is also called during cpu
> offline and we do not want to invalidate during cpu offline.
> 
>  arch/powerpc/include/asm/topology.h  |5 +
>  arch/powerpc/mm/numa.c   |5 -
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |2 ++
>  3 files changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/topology.h 
> b/arch/powerpc/include/asm/topology.h
> index 88187c285c70..1c02e6900f78 100644
> --- a/arch/powerpc/include/asm/topology.h
> +++ b/arch/powerpc/include/asm/topology.h
> @@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, 
> int nid);
>  extern void sysfs_remove_device_from_node(struct device *dev, int nid);
>  extern int numa_update_cpu_topology(bool cpus_locked);
> 
> +static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
> +{
> + numa_cpu_lookup_table[cpu] = node;
> +}
> +
>  static inline int early_cpu_to_node(int cpu)
>  {
>   int nid;
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index adb6364f4091..09be66fcea68 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -142,11 +142,6 @@ static void reset_numa_cpu_lookup_table(void)
>   numa_cpu_lookup_table[cpu] = -1;
>  }
> 
> -static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
> -{
> - numa_cpu_lookup_table[cpu] = node;
> -}
> -
>  static void map_cpu_to_node(int cpu, int node)
>  {
>   update_numa_cpu_lookup_table(cpu, node);
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a7d14aa7bb7c..09083ad82f7a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -36,6 +36,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "pseries.h"
>  #include "offline_states.h"
> @@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node 
> *np)
>   BUG_ON(cpu_online(cpu));
>   set_cpu_present(cpu, false);
>   set_hard_smp_processor_id(cpu, -1);
> + update_numa_cpu_lookup_table(cpu, -1);
>   break;
>   }
>   if (cpu >= nr_cpu_ids)
> 



[PATCH V2] powerpc/pseries: Invalidate numa_cpu_lookup_table on cpu remove

2017-12-05 Thread Nathan Fontenot
When DLPAR removing a CPU we need to invalidate its entry in the
numa_cpu_lookup_table. There is no guarantee that on a subsequent
DLPAR add of the CPU the associativity will be the same and thus it
could be in a different node. Invalidating the entry in the
numa_cpu_lookup_table causes the associativity to be read from the
device tree at the time of the add.

The current behavior of not invalidating the CPUs entry in the
numa_cpu_lookup_table can result in scenarios where the the topology
layout of CPUs in the partition does not match the device tree
or the topology reported by the HMC.

This patch move the update_numa_cpu_lookup_table to topology.h so it is
available outside of numa.c. A call to this routine is added in
pseries_remove_processor().

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Updates for V2: Move the invalidation from unmap_cpu_from_node to
pseries_remove_processor, the former routine is also called during cpu
offline and we do not want to invalidate during cpu offline.

 arch/powerpc/include/asm/topology.h  |5 +
 arch/powerpc/mm/numa.c   |5 -
 arch/powerpc/platforms/pseries/hotplug-cpu.c |2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index 88187c285c70..1c02e6900f78 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int 
nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+   numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
int nid;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index adb6364f4091..09be66fcea68 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -142,11 +142,6 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
-   numa_cpu_lookup_table[cpu] = node;
-}
-
 static void map_cpu_to_node(int cpu, int node)
 {
update_numa_cpu_lookup_table(cpu, node);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a7d14aa7bb7c..09083ad82f7a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
+   update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)



Re: [PATCH] powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove

2017-12-05 Thread Nathan Fontenot
Disregard. I'll send an updated patch soon.

-Nathan

On 12/05/2017 02:55 PM, Nathan Fontenot wrote:
> When DLPAR removing a CPU, the unmapping of the cpu from a node in
> unmap_cpu_from_node() should also invalidate the CPUs entry in the
> numa_cpu_lookup_table. There is not a guarantee that on a subsequent
> DLPAR add of the CPU the associativity will be the same and thus
> could be in a different node. Invalidating the entry in the
> numa_cpu_lookup_table causes the associativity to be read from the
> device tree at the time of the add.
> 
> The current behavior of not invalidating the CPUs entry in the
> numa_cpu_lookup_table can result in scenarios where the the topology
> layout of CPUs in the partition does not match the device tree
> or the topology reported by the HMC.
> 
> Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/mm/numa.c |2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index adb6364f4091..415b1a76b429 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -164,6 +164,8 @@ static void unmap_cpu_from_node(unsigned long cpu)
> 
>   dbg("removing cpu %lu from node %d\n", cpu, node);
> 
> + update_numa_cpu_lookup_table(cpu, -1);
> +
>   if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
>   cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
>   } else {
> 



[PATCH] powerpc/numa: Invalidate numa_cpu_lookup_table on cpu remove

2017-12-05 Thread Nathan Fontenot
When DLPAR removing a CPU, the unmapping of the cpu from a node in
unmap_cpu_from_node() should also invalidate the CPUs entry in the
numa_cpu_lookup_table. There is not a guarantee that on a subsequent
DLPAR add of the CPU the associativity will be the same and thus
could be in a different node. Invalidating the entry in the
numa_cpu_lookup_table causes the associativity to be read from the
device tree at the time of the add.

The current behavior of not invalidating the CPUs entry in the
numa_cpu_lookup_table can result in scenarios where the the topology
layout of CPUs in the partition does not match the device tree
or the topology reported by the HMC.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/mm/numa.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index adb6364f4091..415b1a76b429 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -164,6 +164,8 @@ static void unmap_cpu_from_node(unsigned long cpu)
 
dbg("removing cpu %lu from node %d\n", cpu, node);
 
+   update_numa_cpu_lookup_table(cpu, -1);
+
if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
} else {



[PATCH V3 9/9] powerpc: Enable support of ibm,dynamic-memory-v2

2017-12-01 Thread Nathan Fontenot
Add required bits to the architecture vector to enable support
of the ibm,dynamic-memory-v2 device tree property.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/firmware.h   |3 ++-
 arch/powerpc/include/asm/prom.h   |1 +
 arch/powerpc/kernel/prom_init.c   |1 +
 arch/powerpc/platforms/pseries/firmware.c |1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/firmware.h 
b/arch/powerpc/include/asm/firmware.h
index 8645897472b1..832df61f30ef 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,6 +51,7 @@
 #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x8000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0001)
 #define FW_FEATURE_PRRNASM_CONST(0x0002)
+#define FW_FEATURE_DRMEM_V2ASM_CONST(0x0004)
 
 #ifndef __ASSEMBLY__
 
@@ -67,7 +68,7 @@ enum {
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
-   FW_FEATURE_HPT_RESIZE,
+   FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index f0a30a003bd8..9f27866e3126 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -143,6 +143,7 @@ extern int of_get_ibm_chip_id(struct device_node *np);
 #define OV5_PFO_HW_842 0x1140  /* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR0x1120  /* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS 0x1501  /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2   0x1680  /* ibm,dynamic-reconfiguration-v2 */
 #define OV5_XIVE_SUPPORT   0x17C0  /* XIVE Exploitation Support Mask */
 #define OV5_XIVE_LEGACY0x1700  /* XIVE legacy mode Only */
 #define OV5_XIVE_EXPLOIT   0x1740  /* XIVE exploitation mode Only */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 02190e90c7ae..acf4b2e0530c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -869,6 +869,7 @@ struct ibm_arch_vec __cacheline_aligned 
ibm_architecture_vec = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
+   .byte22 = OV5_FEAT(OV5_DRMEM_V2),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
diff --git a/arch/powerpc/platforms/pseries/firmware.c 
b/arch/powerpc/platforms/pseries/firmware.c
index 63cc82ad58ac..aac3ea2911b2 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -114,6 +114,7 @@ static __initdata struct vec5_fw_feature
 vec5_fw_features_table[] = {
{FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
{FW_FEATURE_PRRN,   OV5_PRRN},
+   {FW_FEATURE_DRMEM_V2,   OV5_DRMEM_V2},
 };
 
 static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)



[PATCH V3 8/9] powerpc/drmem: Add support for ibm, dynamic-memory-v2 property

2017-12-01 Thread Nathan Fontenot
The Power Hypervisor has introduced a new device tree format for
the property describing the dynamic reconfiguration LMBs for a system,
ibm,dynamic-memory-v2. This new format condenses the size of the
property, especially on large memory systems, by reporting sets
of LMBs that have the same properties (flags and associativity array
index).

This patch updates the powerpc/mm/drmem.c code to provide routines
that can parse the new device tree format during the walk_drmem_lmb*
routines used during boot, the creation of the LMB array, and updating
the device tree to create a new property in the proper format for
ibm,dynamic-memory-v2.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>

---

Updates for V3: Provide parsing routines for ibm,dynamic-memory-v2
to be called from the walk_drmem_lmb* routines.

 arch/powerpc/include/asm/drmem.h |   12 ++
 arch/powerpc/mm/drmem.c  |  192 +-
 2 files changed, 200 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index afa7dce89a67..47a701263b03 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -49,6 +49,18 @@ struct of_drconf_cell_v1 {
__be32  flags;
 };
 
+/* Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+   u32 seq_lmbs;
+   u64 base_addr;
+   u32 drc_index;
+   u32 aa_index;
+   u32 flags;
+} __packed;
+
 #define DRCONF_MEM_ASSIGNED0x0008
 #define DRCONF_MEM_AI_INVALID  0x0040
 #define DRCONF_MEM_RESERVED0x0080
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 5fe3944ebf28..31dbe14f1d96 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -91,6 +91,84 @@ static int drmem_update_dt_v1(struct device_node *memory,
return 0;
 }
 
+static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+   struct drmem_lmb *lmb)
+{
+   dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+   dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+   dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+   dr_cell->flags = cpu_to_be32(lmb->flags);
+}
+
+static int drmem_update_dt_v2(struct device_node *memory,
+ struct property *prop)
+{
+   struct property *new_prop;
+   struct of_drconf_cell_v2 *dr_cell;
+   struct drmem_lmb *lmb, *prev_lmb;
+   u32 lmb_sets, prop_sz, seq_lmbs;
+   u32 *p;
+
+   /* First pass, determine how many LMB sets are needed. */
+   lmb_sets = 0;
+   prev_lmb = NULL;
+   for_each_drmem_lmb(lmb) {
+   if (!prev_lmb) {
+   prev_lmb = lmb;
+   lmb_sets++;
+   continue;
+   }
+
+   if (prev_lmb->aa_index != lmb->aa_index ||
+   prev_lmb->flags != lmb->flags)
+   lmb_sets++;
+
+   prev_lmb = lmb;
+   }
+
+   prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
+   new_prop = clone_property(prop, prop_sz);
+   if (!new_prop)
+   return -1;
+
+   p = new_prop->value;
+   *p++ = cpu_to_be32(lmb_sets);
+
+   dr_cell = (struct of_drconf_cell_v2 *)p;
+
+   /* Second pass, populate the LMB set data */
+   prev_lmb = NULL;
+   seq_lmbs = 0;
+   for_each_drmem_lmb(lmb) {
+   if (prev_lmb == NULL) {
+   /* Start of first LMB set */
+   prev_lmb = lmb;
+   init_drconf_v2_cell(dr_cell, lmb);
+   seq_lmbs++;
+   continue;
+   }
+
+   if (prev_lmb->aa_index != lmb->aa_index ||
+   prev_lmb->flags != lmb->flags) {
+   /* end of one set, start of another */
+   dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+   dr_cell++;
+
+   init_drconf_v2_cell(dr_cell, lmb);
+   seq_lmbs = 1;
+   } else {
+   seq_lmbs++;
+   }
+
+   prev_lmb = lmb;
+   }
+
+   /* close out last LMB set */
+   dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+   of_update_property(memory, new_prop);
+   return 0;
+}
+
 int drmem_update_dt(void)
 {
struct device_node *memory;
@@ -102,8 +180,13 @@ int drmem_update_dt(void)
return -1;
 
prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
-   if (prop)
+   if (prop) {
rc = drmem_update_dt_v1(memory, prop);
+   } else {
+   prop = of_find_property(memory, "ibm,d

[PATCH V3 7/9] powerpc: Move of_drconf_cell struct to asm/drmem.h

2017-12-01 Thread Nathan Fontenot
Now that the powerpc code parses dynamic reconfiguration memory
LMB information from the LMB array and not the device tree
directly we can move the of_drconf_cell struct to drmem.h where
it fits better.

In addition, the struct is renamed to of_drconf_cell_v1 in
anticipation of upcoming support for version 2 of the dynamic
reconfiguration property and the members are typed as __be*
values to reflect how they exist in the device tree.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/drmem.h|   18 ++
 arch/powerpc/include/asm/prom.h |   16 
 arch/powerpc/mm/drmem.c |4 ++--
 arch/powerpc/platforms/pseries/hotplug-memory.c |6 +++---
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 96d7a908146f..afa7dce89a67 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -35,6 +35,24 @@ extern struct drmem_lmb_info *drmem_info;
_info->lmbs[0],   \
_info->lmbs[drmem_info->n_lmbs - 1])
 
+/* The of_drconf_cell_v1 struct defines the layout of the LMB data
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+   __be64  base_addr;
+   __be32  drc_index;
+   __be32  reserved;
+   __be32  aa_index;
+   __be32  flags;
+};
+
+#define DRCONF_MEM_ASSIGNED0x0008
+#define DRCONF_MEM_AI_INVALID  0x0040
+#define DRCONF_MEM_RESERVED0x0080
+
 static inline u32 drmem_lmb_size(void)
 {
return drmem_info->lmb_size;
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 825bd5998701..f0a30a003bd8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -80,22 +80,6 @@ extern void of_instantiate_rtc(void);
 
 extern int of_get_ibm_chip_id(struct device_node *np);
 
-/* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
- */
-struct of_drconf_cell {
-   u64 base_addr;
-   u32 drc_index;
-   u32 reserved;
-   u32 aa_index;
-   u32 flags;
-};
-
-#define DRCONF_MEM_ASSIGNED0x0008
-#define DRCONF_MEM_AI_INVALID  0x0040
-#define DRCONF_MEM_RESERVED0x0080
-
 /*
  * There are two methods for telling firmware what our capabilities are.
  * Newer machines have an "ibm,client-architecture-support" method on the
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index e9cf79eb1257..5fe3944ebf28 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -65,7 +65,7 @@ static int drmem_update_dt_v1(struct device_node *memory,
  struct property *prop)
 {
struct property *new_prop;
-   struct of_drconf_cell *dr_cell;
+   struct of_drconf_cell_v1 *dr_cell;
struct drmem_lmb *lmb;
u32 *p;
 
@@ -76,7 +76,7 @@ static int drmem_update_dt_v1(struct device_node *memory,
p = new_prop->value;
*p++ = cpu_to_be32(drmem_info->n_lmbs);
 
-   dr_cell = (struct of_drconf_cell *)p;
+   dr_cell = (struct of_drconf_cell_v1 *)p;
 
for_each_drmem_lmb(lmb) {
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 2043bc2b77b3..c1578f54c626 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -996,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np)
 
 static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
 {
-   struct of_drconf_cell *new_drmem, *old_drmem;
+   struct of_drconf_cell_v1 *new_drmem, *old_drmem;
unsigned long memblock_size;
u32 entries;
__be32 *p;
@@ -1019,11 +1019,11 @@ static int pseries_update_drconf_memory(struct 
of_reconfig_data *pr)
 * of_drconf_cell's.
 */
entries = be32_to_cpu(*p++);
-   old_drmem = (struct of_drconf_cell *)p;
+   old_drmem = (struct of_drconf_cell_v1 *)p;
 
p = (__be32 *)pr->prop->value;
p++;
-   new_drmem = (struct of_drconf_cell *)p;
+   new_drmem = (struct of_drconf_cell_v1 *)p;
 
for (i = 0; i < entries; i++) {
if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&



[PATCH V3 6/9] powerpc/pseries: Update memory hotplug code to use drmem LMB array

2017-12-01 Thread Nathan Fontenot
Update the pseries memory hotplug code to use the newly added
dynamic reconfiguration LMB array. Doing this is required for the
upcoming support of version 2 of the dynamic reconfiguration
device tree property.

In addition, making this change cleans up the code that parses the
LMB information as we no longer need to worry about device tree
format. This allows us to discard one of the first steps on memory
hotplug where we make a working copy of the device tree property and
convert the entire property to cpu format. Instead we just use the
LMB array directly while holding the memory hotplug lock.

This patch also moves the updating of the device tree property to
powerpc/mm/drmem.c. This allows to the hotplug code to work without
needing to know the device tree format and provides a single
routine for updating the device tree property. This new routine
will handle determination of the proper device tree format and
generate a properly formatted device tree property.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Updates for V2: Correct build issues with uninitialized variables

 arch/powerpc/include/asm/drmem.h|   18 +
 arch/powerpc/mm/drmem.c |   80 
 arch/powerpc/platforms/pseries/hotplug-memory.c |  516 +--
 3 files changed, 296 insertions(+), 318 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 679da30a1bea..96d7a908146f 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -40,9 +40,27 @@ static inline u32 drmem_lmb_size(void)
return drmem_info->lmb_size;
 }
 
+#define DRMEM_LMB_RESERVED 0x8000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+   lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+   lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+   return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
 u64 drmem_lmb_memory_max(void);
 void __init walk_drmem_lmbs(struct device_node *dn,
void (*func)(struct drmem_lmb *, const __be32 **));
+int drmem_update_dt(void);
 
 #ifdef CONFIG_PPC_PSERIES
 void __init walk_drmem_lmbs_early(unsigned long node,
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 5888ac3ca8a9..e9cf79eb1257 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -29,6 +29,86 @@ u64 drmem_lmb_memory_max(void)
return last_lmb->base_addr + drmem_lmb_size();
 }
 
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+   /* Return the value of the lmb flags field minus the reserved
+* bit used internally for hotplug processing.
+*/
+   return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+   struct property *new_prop;
+
+   new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+   if (!new_prop)
+   return NULL;
+
+   new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+   new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+   if (!new_prop->name || !new_prop->value) {
+   kfree(new_prop->name);
+   kfree(new_prop->value);
+   kfree(new_prop);
+   return NULL;
+   }
+
+   new_prop->length = prop_sz;
+#if defined(CONFIG_OF_DYNAMIC)
+   of_property_set_flag(new_prop, OF_DYNAMIC);
+#endif
+   return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+ struct property *prop)
+{
+   struct property *new_prop;
+   struct of_drconf_cell *dr_cell;
+   struct drmem_lmb *lmb;
+   u32 *p;
+
+   new_prop = clone_property(prop, prop->length);
+   if (!new_prop)
+   return -1;
+
+   p = new_prop->value;
+   *p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+   dr_cell = (struct of_drconf_cell *)p;
+
+   for_each_drmem_lmb(lmb) {
+   dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+   dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+   dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+   dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+   dr_cell++;
+   }
+
+   of_update_property(memory, new_prop);
+   return 0;
+}
+
+int drmem_update_dt(void)
+{
+   struct device_node *memory;
+   struct property *prop;
+   int rc = -1;
+
+   memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!memory)
+   return -1;
+
+   prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+   if (prop)
+   rc = drmem_update_dt_v1(memory, prop);
+
+   of_node_put(memory);
+   return rc;
+}

[PATCH V3 5/9] powerpc/numa: Update numa code use walk_drmem_lmbs

2017-12-01 Thread Nathan Fontenot
Update code in powerpc/numa.c to use the walk_drmem_lmbs()
routine instead of parsing the device tree directly. This is
in anticipation of introducing a new ibm,dynamic-memory-v2
property with a different format. This will allow the numa code
to use a single initialization routine per-LMB irregardless of
the device tree format.

Additionally, to support additional routines in numa.c that need
to look up LMB information, an late_init routine is added to drmem.c
to allocate the array of LMB information. This LMB array will provide
per-LMB information to separate the LMB data from the device tree
format.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Updates for V3: Convert numa.c to use walk_drmem_lmbs routine and
add the late init call for allocating the LMB array.

Updates for V2: Removed unused device node paramter to
numa_setup_drmem_lmbs() and hot_add_drconf_scn_to_nid().

 arch/powerpc/include/asm/drmem.h |4 +
 arch/powerpc/mm/drmem.c  |  100 +
 arch/powerpc/mm/numa.c   |  222 +-
 3 files changed, 154 insertions(+), 172 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index c7fc5c4d8a7c..679da30a1bea 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -40,6 +40,10 @@ static inline u32 drmem_lmb_size(void)
return drmem_info->lmb_size;
 }
 
+u64 drmem_lmb_memory_max(void);
+void __init walk_drmem_lmbs(struct device_node *dn,
+   void (*func)(struct drmem_lmb *, const __be32 **));
+
 #ifdef CONFIG_PPC_PSERIES
 void __init walk_drmem_lmbs_early(unsigned long node,
void (*func)(struct drmem_lmb *, const __be32 **));
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index f8ee0f355405..5888ac3ca8a9 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -21,7 +21,14 @@
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
 
-#ifdef CONFIG_PPC_PSERIES
+u64 drmem_lmb_memory_max(void)
+{
+   struct drmem_lmb *last_lmb;
+
+   last_lmb = _info->lmbs[drmem_info->n_lmbs - 1];
+   return last_lmb->base_addr + drmem_lmb_size();
+}
+
 static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
   const __be32 **prop)
 {
@@ -52,6 +59,7 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, 
const __be32 *usm,
}
 }
 
+#ifdef CONFIG_PPC_PSERIES
 void __init walk_drmem_lmbs_early(unsigned long node,
void (*func)(struct drmem_lmb *, const __be32 **))
 {
@@ -74,3 +82,93 @@ void __init walk_drmem_lmbs_early(unsigned long node,
 }
 
 #endif
+
+static int __init init_drmem_lmb_size(struct device_node *dn)
+{
+   const __be32 *prop;
+   int len;
+
+   if (drmem_info->lmb_size)
+   return 0;
+
+   prop = of_get_property(dn, "ibm,lmb-size", );
+   if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+   pr_info("Could not determine LMB size\n");
+   return -1;
+   }
+
+   drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, );
+   return 0;
+}
+
+/*
+ * Returns the property linux,drconf-usable-memory if
+ * it exists (the property exists only in kexec/kdump kernels,
+ * added by kexec-tools)
+ */
+static const __be32 *of_get_usable_memory(struct device_node *dn)
+{
+   const __be32 *prop;
+   u32 len;
+
+   prop = of_get_property(dn, "linux,drconf-usable-memory", );
+   if (!prop || len < sizeof(unsigned int))
+   return NULL;
+
+   return prop;
+}
+
+void __init walk_drmem_lmbs(struct device_node *dn,
+   void (*func)(struct drmem_lmb *, const __be32 **))
+{
+   const __be32 *prop, *usm;
+
+   if (init_drmem_lmb_size(dn))
+   return;
+
+   usm = of_get_usable_memory(dn);
+
+   prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+   if (prop)
+   __walk_drmem_v1_lmbs(prop, usm, func);
+}
+
+static void __init init_drmem_v1_lmbs(const __be32 *prop)
+{
+   struct drmem_lmb *lmb;
+
+   drmem_info->n_lmbs = of_read_number(prop++, 1);
+
+   drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+  GFP_KERNEL);
+   if (!drmem_info->lmbs)
+   return;
+
+   for_each_drmem_lmb(lmb)
+   read_drconf_v1_cell(lmb, );
+}
+
+static int __init drmem_init(void)
+{
+   struct device_node *dn;
+   const __be32 *prop;
+
+   dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!dn) {
+   pr_info("No dynamic reconfiguration memory found\n");
+   return 0;
+   }
+
+   if (init_drmem_lmb_size(dn)) {
+   of_node_put(dn);
+   

[PATCH V3 4/9] powerpc/mm: Separate ibm, dynamic-memory data from DT format

2017-12-01 Thread Nathan Fontenot
We currently have code to parse the dynamic reconfiguration LMB
information from the ibm,dynamic-meory device tree property in
multiple locations; numa.c, prom.c, and pseries/hotplug-memory.c.
In anticipation of adding support for a version 2 of the
ibm,dynamic-memory property this patch aims to separate the device
tree information from the device tree format.

Doing this requires a two step process to avoid a possibly very large
bootmem allocation early in boot. During initial boot, new routines
are provided to walk the device tree property and make a call-back
for each LMB.

The second step (introduced in later patches) will allocate an
array of LMB information that can be used directly without needing
to know the DT format.

This approach provides the benefit of consolidating the device tree
property parsing to a single location and (eventually) providing
a common data structure for retrieving LMB information.

This patch introduces a routine to walk the ibm,dynamic-memory
property in the flattened device tree and updates the prom.c code
to use this to initialize memory.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Updates for V3: Converted from doing a bootmem allocation early in
boot for the LMB array to providing a routine to walk the memory
property and provide a callback for each LMB.

Updates for V2: Correct build break for non-pseries builds.

 arch/powerpc/include/asm/drmem.h |   48 
 arch/powerpc/kernel/prom.c   |  114 --
 arch/powerpc/mm/Makefile |2 -
 arch/powerpc/mm/drmem.c  |   76 +
 4 files changed, 173 insertions(+), 67 deletions(-)
 create mode 100644 arch/powerpc/include/asm/drmem.h
 create mode 100644 arch/powerpc/mm/drmem.c

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
new file mode 100644
index ..c7fc5c4d8a7c
--- /dev/null
+++ b/arch/powerpc/include/asm/drmem.h
@@ -0,0 +1,48 @@
+/*
+ * drmem.h: Power specific logical memory block representation
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_LMB_H
+#define _ASM_POWERPC_LMB_H
+
+struct drmem_lmb {
+   u64 base_addr;
+   u32 drc_index;
+   u32 aa_index;
+   u32 flags;
+};
+
+struct drmem_lmb_info {
+   struct drmem_lmb*lmbs;
+   int n_lmbs;
+   u32 lmb_size;
+};
+
+extern struct drmem_lmb_info *drmem_info;
+
+#define for_each_drmem_lmb_in_range(lmb, start, end)   \
+   for ((lmb) = (start); (lmb) <= (end); (lmb)++)
+
+#define for_each_drmem_lmb(lmb)\
+   for_each_drmem_lmb_in_range((lmb),  \
+   _info->lmbs[0],   \
+   _info->lmbs[drmem_info->n_lmbs - 1])
+
+static inline u32 drmem_lmb_size(void)
+{
+   return drmem_info->lmb_size;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void __init walk_drmem_lmbs_early(unsigned long node,
+   void (*func)(struct drmem_lmb *, const __be32 **));
+#endif
+#endif
+
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b15bae265c90..b800f1acc4fc 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -59,6 +59,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -455,92 +456,73 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned 
long node,
 
 #ifdef CONFIG_PPC_PSERIES
 /*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
  * This contains a list of memory blocks along with NUMA affinity
  * information.
  */
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+   const __be32 **usm)
 {
-   const __be32 *dm, *ls, *usm;
-   int l;
-   unsigned long n, flags;
-   u64 base, size, memblock_size;
-   unsigned int is_kexec_kdump = 0, rngs;
-
-   ls = of_get_flat_dt_prop(node, "ibm,lmb-size", );
-   if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
-   return 0;
-   memblock_size = dt_mem_next_cell(dt_root_size_cells, );
+   u64 base, size;
+   int is_kexec_kdump = 0, rngs;
 
-   dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", );
-   if (dm == NULL || l < sizeof(__be32))
-   return 0;
+   base = lmb->base_addr;
+   size = drmem_lmb_size();
+   rngs = 1;
 
-   n = of_read_number(dm++, 1);/* number of entri

[PATCH V3 3/9] powerpc/numa: Look up associativity array in of_drconf_to_nid_single

2017-12-01 Thread Nathan Fontenot
Look up the associativity arrays in of_drconf_to_nid_single when
deriving the nid for a LMB instead of having it passed in as a
parameter.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/mm/numa.c |   40 +---
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index baba6403488b..d25278adaead 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -509,26 +509,30 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
  */
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
-  struct assoc_arrays *aa)
+static int of_drconf_to_nid_single(struct of_drconf_cell *drmem)
 {
+   struct assoc_arrays aa = { .arrays = NULL };
int default_nid = 0;
int nid = default_nid;
-   int index;
+   int rc, index;
+
+   rc = of_get_assoc_arrays();
+   if (rc)
+   return default_nid;
 
-   if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
+   if (min_common_depth > 0 && min_common_depth <= aa.array_sz &&
!(drmem->flags & DRCONF_MEM_AI_INVALID) &&
-   drmem->aa_index < aa->n_arrays) {
-   index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
-   nid = of_read_number(>arrays[index], 1);
+   drmem->aa_index < aa.n_arrays) {
+   index = drmem->aa_index * aa.array_sz + min_common_depth - 1;
+   nid = of_read_number([index], 1);
 
if (nid == 0x || nid >= MAX_NUMNODES)
nid = default_nid;
 
if (nid > 0) {
-   index = drmem->aa_index * aa->array_sz;
+   index = drmem->aa_index * aa.array_sz;
initialize_distance_lookup_table(nid,
-   >arrays[index]);
+   [index]);
}
}
 
@@ -664,10 +668,9 @@ static inline int __init read_usm_ranges(const __be32 
**usm)
 static void __init parse_drconf_memory(struct device_node *memory)
 {
const __be32 *uninitialized_var(dm), *usm;
-   unsigned int n, rc, ranges, is_kexec_kdump = 0;
+   unsigned int n, ranges, is_kexec_kdump = 0;
unsigned long lmb_size, base, size, sz;
int nid;
-   struct assoc_arrays aa = { .arrays = NULL };
 
n = of_get_drconf_memory(memory, );
if (!n)
@@ -677,10 +680,6 @@ static void __init parse_drconf_memory(struct device_node 
*memory)
if (!lmb_size)
return;
 
-   rc = of_get_assoc_arrays();
-   if (rc)
-   return;
-
/* check if this is a kexec/kdump kernel */
usm = of_get_usable_memory();
if (usm != NULL)
@@ -711,7 +710,7 @@ static void __init parse_drconf_memory(struct device_node 
*memory)
base = read_n_cells(n_mem_addr_cells, );
size = read_n_cells(n_mem_size_cells, );
}
-   nid = of_drconf_to_nid_single(, );
+   nid = of_drconf_to_nid_single();
fake_numa_create_new_node(
((base + size) >> PAGE_SHIFT),
   );
@@ -999,9 +998,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node 
*memory,
 unsigned long scn_addr)
 {
const __be32 *dm;
-   unsigned int drconf_cell_cnt, rc;
+   unsigned int drconf_cell_cnt;
unsigned long lmb_size;
-   struct assoc_arrays aa;
int nid = -1;
 
drconf_cell_cnt = of_get_drconf_memory(memory, );
@@ -1012,10 +1010,6 @@ static int hot_add_drconf_scn_to_nid(struct device_node 
*memory,
if (!lmb_size)
return -1;
 
-   rc = of_get_assoc_arrays();
-   if (rc)
-   return -1;
-
for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
struct of_drconf_cell drmem;
 
@@ -1031,7 +1025,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node 
*memory,
|| (scn_addr >= (drmem.base_addr + lmb_size)))
continue;
 
-   nid = of_drconf_to_nid_single(, );
+   nid = of_drconf_to_nid_single();
break;
}
 



[PATCH V3 2/9] powerpc/numa: Look up device node in of_get_usable_memory()

2017-12-01 Thread Nathan Fontenot
Look up the device node for the usable memory property instead
of having it passed in as a parameter. This changes precedes an update
in which the calling routines for of_get_usable_memory() will not have
the device node pointer to pass in.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/mm/numa.c |   12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a0214aa2dd78..baba6403488b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -184,11 +184,19 @@ static const __be32 *of_get_associativity(struct 
device_node *dev)
  * it exists (the property exists only in kexec/kdump kernels,
  * added by kexec-tools)
  */
-static const __be32 *of_get_usable_memory(struct device_node *memory)
+static const __be32 *of_get_usable_memory(void)
 {
+   struct device_node *memory;
const __be32 *prop;
u32 len;
+
+   memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!memory)
+   return NULL;
+
prop = of_get_property(memory, "linux,drconf-usable-memory", );
+   of_node_put(memory);
+
if (!prop || len < sizeof(unsigned int))
return NULL;
return prop;
@@ -674,7 +682,7 @@ static void __init parse_drconf_memory(struct device_node 
*memory)
return;
 
/* check if this is a kexec/kdump kernel */
-   usm = of_get_usable_memory(memory);
+   usm = of_get_usable_memory();
if (usm != NULL)
is_kexec_kdump = 1;
 



[PATCH V3 1/9] powerpc/numa: Look up device node in of_get_assoc_arrays()

2017-12-01 Thread Nathan Fontenot
Look up the device node for the associativity array property instead
of having it passed in as a parameter. This changes precedes an update
in which the calling routines for of_get_assoc_arrays() will not have
the device node pointer to pass in.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/mm/numa.c |   18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index adb6364f4091..a0214aa2dd78 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -466,19 +466,27 @@ struct assoc_arrays {
  * indicating the size of each associativity array, followed by a list
  * of N associativity arrays.
  */
-static int of_get_assoc_arrays(struct device_node *memory,
-  struct assoc_arrays *aa)
+static int of_get_assoc_arrays(struct assoc_arrays *aa)
 {
+   struct device_node *memory;
const __be32 *prop;
u32 len;
 
+   memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!memory)
+   return -1;
+
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", );
-   if (!prop || len < 2 * sizeof(unsigned int))
+   if (!prop || len < 2 * sizeof(unsigned int)) {
+   of_node_put(memory);
return -1;
+   }
 
aa->n_arrays = of_read_number(prop++, 1);
aa->array_sz = of_read_number(prop++, 1);
 
+   of_node_put(memory);
+
/* Now that we know the number of arrays and size of each array,
 * revalidate the size of the property read in.
 */
@@ -661,7 +669,7 @@ static void __init parse_drconf_memory(struct device_node 
*memory)
if (!lmb_size)
return;
 
-   rc = of_get_assoc_arrays(memory, );
+   rc = of_get_assoc_arrays();
if (rc)
return;
 
@@ -996,7 +1004,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node 
*memory,
if (!lmb_size)
return -1;
 
-   rc = of_get_assoc_arrays(memory, );
+   rc = of_get_assoc_arrays();
if (rc)
return -1;
 



[PATCH V3 0/9] powerpc: Support for ibm,dynamic-memory-v2

2017-12-01 Thread Nathan Fontenot
This patch set provides a series of updates to de-couple the LMB
information provided in the device tree property from the device
tree property format. This eases the ability to support a new
format for the dynamic memory property, ibm,dynamic-memory-v2.

This series of patches consolidates the routines for parsing the
LMB device tree properties into a new file, powerpc/mm/drmem.c,
and provides the ability to retrieve LMB information without having
to know the backing device tree format.

To do this, a set of routines are introduced that will walk the
device tree property and make a call back for each LMB represented
in the device tree. These are to be used by init routines during
boot, currently in prom.c and numa.c.

A late_initcall is used to allocate and initialize a LMB array to
provide a common data structure of per-LMB data. This array provides
a data structure to retrieve LMB information without knowing the
backing tree format. This is used in numa.c and pseries/hotplug-memory.c.

This is a big design change from the first two version of the patch set
that attempted to allocate the LMB array with bootmem very early.
Testing showed that this allocation can fail which pushed the change
to the current design.

To support memory hotplug needing to update the device tree, a
common routine is introduced to create a new copy of the device
tree property in the proper format.

The first three patches update the of_get_assoc_arrays(),
of_get_usable_memory(), and of_drconf_to_nid_single() routines
to do device tree lookups for information they need instead of having
the nodes/properties passed in. These are updates needed for later
changes.

The fourth patch adds the walk_drmem_lmbs_early() routine to provide
parsing of the flattened device tree and make a per-LMB call back,
used in prom.c

The fifth patch provides a walk_drmem_lmbs() routine to parse the
device tree and provide a per-LMB call back, used in numa.c. This
also allocates and initializes the LMB array, and updates numa.c
to use the array.

The sixth patch updates pseries hotplug code new LMB array data 
instead of parsing the device tree directly and introduces the
common routine to create a new device tree property.

The seventh patch moves the of_drconf_cell struct to drmem.h where it
fits better than prom.h

The eighth patch introduces support for the ibm,dynamic-memory-v2
property format by updating the new drmem.c code to be able to parse
and create this new device tree format.

The last patch in the series updates the architecture vector to indicate
support for ibm,dynamic-memory-v2.

-Nathan
---

Nathan Fontenot (9):
  powerpc/numa: Look up device node in of_get_assoc_arrays()
  powerpc/numa: Look up device node in of_get_usable_memory()
  powerpc/numa: Look up associativity array in of_drconf_to_nid_single
  powerpc/mm: Separate ibm,dynamic-memory data from DT format
  powerpc/numa: Update numa code use walk_drmem_lmbs
  powerpc/pseries: Update memory hotplug code to use drmem LMB array
  powerpc: Move of_drconf_cell struct to asm/drmem.h
  powerpc/drmem: Add support for ibm,dynamic-memory-v2 property
  powerpc: Enable support of ibm,dynamic-memory-v2


 arch/powerpc/include/asm/drmem.h|  100 
 arch/powerpc/include/asm/firmware.h |3 
 arch/powerpc/include/asm/prom.h |   17 -
 arch/powerpc/kernel/prom.c  |  114 ++---
 arch/powerpc/kernel/prom_init.c |1 
 arch/powerpc/mm/Makefile|2 
 arch/powerpc/mm/drmem.c |  438 +++
 arch/powerpc/mm/numa.c  |  252 +++
 arch/powerpc/platforms/pseries/firmware.c   |1 
 arch/powerpc/platforms/pseries/hotplug-memory.c |  522 +--
 10 files changed, 864 insertions(+), 586 deletions(-)
 create mode 100644 arch/powerpc/include/asm/drmem.h
 create mode 100644 arch/powerpc/mm/drmem.c



Re: Resend: [PATCH V5 3/4] hotplug/drc-info: Add code to search ibm,drc-info property

2017-11-30 Thread Nathan Fontenot


On 11/28/2017 05:07 PM, Michael Bringmann wrote:
> rpadlpar_core.c: Provide parallel routines to search the older device-
> tree properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
> and "ibm,drc-power-domains"), or the new property "ibm,drc-info".
> 
> The interface to examine the DRC information is changed from a "get"
> function that returns values for local verification elsewhere, to a
> "check" function that validates the 'name' and/or 'type' of a device
> node.  This update hides the format of the underlying device-tree
> properties, and concentrates the value checks into a single function
> without requiring the user to verify whether a search was successful.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V5:
>   -- Simplify of_prop_next_u32 invocation
>   -- Fix some spacing within arguments
> ---
>  drivers/pci/hotplug/rpadlpar_core.c |   13 ++--
>  drivers/pci/hotplug/rpaphp.h|4 +
>  drivers/pci/hotplug/rpaphp_core.c   |  109 
> +++
>  3 files changed, 91 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
> b/drivers/pci/hotplug/rpadlpar_core.c
> index a3449d7..fc01d7d 100644
> --- a/drivers/pci/hotplug/rpadlpar_core.c
> +++ b/drivers/pci/hotplug/rpadlpar_core.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "../pci.h"
>  #include "rpaphp.h"
> @@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char 
> *drc_name)
>  {
>   struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
>   struct device_node *dn = NULL;
> - char *name;
>   int rc;
> 
>   if (!parent)
>   return NULL;
> 
>   while ((dn = of_get_next_child(parent, dn))) {
> - rc = rpaphp_get_drc_props(dn, NULL, , NULL, NULL);
> - if ((rc == 0) && (!strcmp(drc_name, name)))
> + rc = rpaphp_check_drc_props(dn, drc_name, NULL);
> + if (rc == 0)
>   break;
>   }
> 
> @@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char 
> *drc_name,
> char *drc_type)
>  {
>   struct device_node *np = NULL;
> - char *name;
> - char *type;
>   int rc;
> 
>   while ((np = of_find_node_by_name(np, "pci"))) {
> - rc = rpaphp_get_drc_props(np, NULL, , , NULL);
> + rc = rpaphp_check_drc_props(np, drc_name, drc_type);
>   if (rc == 0)
> - if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
> - break;
> + break;
>   }
> 
>   return np;
> diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
> index 7db024e..8db5f2e 100644
> --- a/drivers/pci/hotplug/rpaphp.h
> +++ b/drivers/pci/hotplug/rpaphp.h
> @@ -91,8 +91,8 @@ struct slot {
> 
>  /* rpaphp_core.c */
>  int rpaphp_add_slot(struct device_node *dn);
> -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
> - char **drc_name, char **drc_type, int *drc_power_domain);
> +int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
> + char *drc_type);
> 
>  /* rpaphp_slot.c */
>  void dealloc_slot_struct(struct slot *slot);
> diff --git a/drivers/pci/hotplug/rpaphp_core.c 
> b/drivers/pci/hotplug/rpaphp_core.c
> index 1e29aba..6da613a 100644
> --- a/drivers/pci/hotplug/rpaphp_core.c
> +++ b/drivers/pci/hotplug/rpaphp_core.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include/* for eeh_add_device() */
>  #include /* rtas_call */
>  #include   /* for pci_controller */
> @@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, 
> const int **drc_indexes,
>   return 0;
>  }
> 
> -/* To get the DRC props describing the current node, first obtain it's
> - * my-drc-index property.  Next obtain the DRC list from it's parent.  Use
> - * the my-drc-index for correlation, and obtain the requested properties.
> +
> +/* Verify the existence of 'drc_name' and/or 'drc_type' within the
> + * current node.  First obtain it's my-drc-index property.  Next,
> + * obtain the DRC info from it's parent.  Use the my-drc-index for
> + * correlation, and obtain/validate the requested properties.
>   */
> -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
> - char **drc_name, char **drc_type, int *drc_power_domain)
> +
> +static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
> + char *drc_type, unsigned int my_index)
>  {
> + char *name_tmp, *type_tmp;
>   const int *indexes, *names;
>   const int *types, *domains;
> - const unsigned int *my_index;
> - char *name_tmp, *type_tmp;
>   int i, rc;
> 
> - my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
> - if (!my_index) {
> - /* Node 

Re: Resend: [PATCH V5 2/4] pseries/drc-info: Search DRC properties for CPU indexes

2017-11-30 Thread Nathan Fontenot
On 11/28/2017 05:07 PM, Michael Bringmann wrote:
> pseries/drc-info: Provide parallel routines to convert between
> drc_index and CPU numbers at runtime, using the older device-tree
> properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
> and "ibm,drc-power-domains"), or the new property "ibm,drc-info".
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V5:
>   -- Simplify of_prop_next_u32 invocation
>   -- Remove unnecessary WARN_ON() tests

I'm not sure the WARN_ON()'s that you removed are unnecessary, I had just asked
that they get moved to read_drc_info_cell(). If you think they are not needed
perhaps making them pr_debug() instead.
 
> ---
>  arch/powerpc/include/asm/prom.h |   15 +++
>  arch/powerpc/platforms/pseries/of_helpers.c |   60 +++
>  arch/powerpc/platforms/pseries/pseries_energy.c |  126 
> ++-
>  3 files changed, 173 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index 3243455..0ef41b1 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -96,6 +96,21 @@ struct of_drconf_cell {
>  #define DRCONF_MEM_AI_INVALID0x0040
>  #define DRCONF_MEM_RESERVED  0x0080
> 
> +struct of_drc_info {
> + char *drc_type;
> + char *drc_name_prefix;
> + u32 drc_index_start;
> + u32 drc_name_suffix_start;
> + u32 num_sequential_elems;
> + u32 sequential_inc;
> + u32 drc_power_domain;
> + u32 last_drc_index;
> +};
> +
> +extern int of_read_drc_info_cell(struct property **prop,
> + const __be32 **curval, struct of_drc_info *data);
> +
> +
>  /*
>   * There are two methods for telling firmware what our capabilities are.
>   * Newer machines have an "ibm,client-architecture-support" method on the
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 7e75101..6df192f 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -3,6 +3,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "of_helpers.h"
> 
> @@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char 
> *path)
>   kfree(parent_path);
>   return parent ? parent : ERR_PTR(-EINVAL);
>  }
> +
> +
> +/* Helper Routines to convert between drc_index to cpu numbers */
> +
> +int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
> + struct of_drc_info *data)
> +{
> + const char *p;
> + const __be32 *p2;
> +
> + if (!data)
> + return -EINVAL;
> +
> + /* Get drc-type:encode-string */
> + p = data->drc_type = (char*) (*curval);
> + p = of_prop_next_string(*prop, p);
> + if (!p)
> + return -EINVAL;
> +
> + /* Get drc-name-prefix:encode-string */
> + data->drc_name_prefix = (char *)p;
> + p = of_prop_next_string(*prop, p);
> + if (!p)
> + return -EINVAL;
> +
> + /* Get drc-index-start:encode-int */
> + p2 = (const __be32 *)p;
> + p2 = of_prop_next_u32(*prop, p2, >drc_index_start);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get drc-name-suffix-start:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >drc_name_suffix_start);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get number-sequential-elements:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >num_sequential_elems);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get sequential-increment:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >sequential_inc);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get drc-power-domain:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >drc_power_domain);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Should now know end of current entry */
> + (*curval) = (void *)p2;
> + data->last_drc_index = data->drc_index_start +
> + ((data->num_sequential_elems - 1) * data->sequential_inc);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(of_read_drc_info_cell);
> diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c 
> b/arch/powerpc/platforms/pseries/pseries_energy.c
> index 35c891a..f96677b 100644
> --- a/arch/powerpc/platforms/pseries/pseries_energy.c
> +++ b/arch/powerpc/platforms/pseries/pseries_energy.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
> 
>  #define MODULE_VERS "1.0"
> @@ -38,26 +39,58 @@
>  static u32 cpu_to_drc_index(int cpu)
>  {
>   struct device_node *dn = NULL;
> - const int *indexes;
> - int i;
> + int thread_index;
>   int rc = 1;
>   u32 ret = 0;
> 
>   dn = of_find_node_by_path("/cpus");
>   if (dn == NULL)
>   goto err;
> - indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
> - if (indexes 

Re: [PATCH V7 3/3] hotplug/cpu: Fix crash with memoryless nodes

2017-11-20 Thread Nathan Fontenot
On 11/16/2017 11:28 AM, Michael Bringmann wrote:
> On powerpc systems with shared configurations of CPUs and memory and
> memoryless nodes at boot, an event ordering problem was observed on
> a SLES12 build platforms with the hot-add of CPUs to the memoryless
> nodes.
> 
> * The most common error occurred when the memory SLAB driver attempted
>   to reference the memoryless node to which a CPU was being added
>   before the kernel had finished initializing all of the data structures
>   for the CPU and exited 'device_online' under DLPAR/hot-add.
> 
>   Normally the memoryless node would be initialized through the call
>   path device_online ... arch_update_cpu_topology ... find_cpu_nid
>   ...  try_online_node.  This patch ensures that the powerpc node will
>   be initialized as early as possible, even if it was memoryless and
>   CPU-less at the point when we are trying to hot-add a new CPU to it.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V7:
>   -- Make function find_cpu_nid() externally visible/usable so that
>  it may be used from hotplug-cpu.c
> ---
>  arch/powerpc/mm/numa.c   |3 ++-
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |3 +++
>  2 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 163f4cc..d6d4f7c 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -1310,7 +1310,7 @@ static long vphn_get_associativity(unsigned long cpu,
>   return rc;
>  }
> 
> -static inline int find_cpu_nid(int cpu)
> +int find_cpu_nid(int cpu)
>  {
>   __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
>   int new_nid;
> @@ -1343,6 +1343,7 @@ static inline int find_cpu_nid(int cpu)
>  #endif
>   }
> 
> + printk(KERN_INFO "%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__, cpu, 
> new_nid);

This seems like a more likely pr_debug statement.

>   return new_nid;
>  }
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a7d14aa7..df8c732 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node 
> *np)
>   cpu_maps_update_done();
>  }
> 
> +extern int find_cpu_nid(int cpu);
> +
>  static int dlpar_online_cpu(struct device_node *dn)
>  {
>   int rc = 0;
> @@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn)
>   != CPU_STATE_OFFLINE);
>   cpu_maps_update_done();
>   timed_topology_update(1);
> + find_cpu_nid(cpu);

We don't use the returned node from this call, so I'm not sure why it gets
called. Perhaps its the possible call to try_online_node() that may get called
in find_cpu_nid(), if so perhpas naming the routine something slightly
different would be good, like find_and_online_cpu_nid?

-Nathan
>   rc = device_online(get_cpu_device(cpu));
>   if (rc)
>   goto out;
> 



Re: RESEND [PATCH V7 2/3] poserpc/initnodes: Ensure nodes initialized for hotplug

2017-11-20 Thread Nathan Fontenot
On 11/16/2017 11:27 AM, Michael Bringmann wrote:
> On powerpc systems which allow 'hot-add' of CPU, it may occur that
> the new resources are to be inserted into nodes that were not used
> for memory resources at bootup.  Many different configurations of
> PowerPC resources may need to be supported depending upon the
> environment.  Important characteristics of the nodes and operating
> environment include:
> 
> * Dedicated vs. shared resources.  Shared resources require

this should be shared CPUs require...since shared CPUs have their
affinity set to node 0 at boot and when hot-added.

>   information such as the VPHN hcall for CPU assignment to nodes.
>   Associativity decisions made based on dedicated resource rules,
>   such as associativity properties in the device tree, may vary
>   from decisions made using the values returned by the VPHN hcall.
> * memoryless nodes at boot.  Nodes need to be defined as 'possible'
>   at boot for operation with other code modules.  Previously, the
>   powerpc code would limit the set of possible nodes to those which
>   have memory assigned at boot, and were thus online.  Subsequent
>   add/remove of CPUs or memory would only work with this subset of
>   possible nodes.
> * memoryless nodes with CPUs at boot.  Due to the previous restriction
>   on nodes, nodes that had CPUs but no memory were being collapsed
>   into other nodes that did have memory at boot.  In practice this
>   meant that the node assignment presented by the runtime kernel
>   differed from the affinity and associativity attributes presented
>   by the device tree or VPHN hcalls.  Nodes that might be known to
>   the pHyp were not 'possible' in the runtime kernel because they did
>   not have memory at boot.
> 
> This patch fixes some problems encountered at runtime with
> configurations that support memory-less nodes, or that hot-add CPUs
> into nodes that are memoryless during system execution after boot.
> The problems of interest include,
> 
> * Nodes known to powerpc to be memoryless at boot, but to have
>   CPUs in them are allowed to be 'possible' and 'online'.  Memory
>   allocations for those nodes are taken from another node that does
>   have memory until and if memory is hot-added to the node.
> * Nodes which have no resources assigned at boot, but which may still
>   be referenced subsequently by affinity or associativity attributes,
>   are kept in the list of 'possible' nodes for powerpc.  Hot-add of
>   memory or CPUs to the system can reference these nodes and bring
>   them online instead of redirecting the references to one of the set
>   of nodes known to have memory at boot.
> 
> Note that this software operates under the context of CPU hotplug.
> We are not doing memory hotplug in this code, but rather updating
> the kernel's CPU topology (i.e. arch_update_cpu_topology /
> numa_update_cpu_topology).  We are initializing a node that may be
> used by CPUs or memory before it can be referenced as invalid by a
> CPU hotplug operation.  CPU hotplug operations are protected by a
> range of APIs including cpu_maps_update_begin/cpu_maps_update_done,
> cpus_read/write_lock / cpus_read/write_unlock, device locks, and more.
> Memory hotplug operations, including try_online_node, are protected
> by mem_hotplug_begin/mem_hotplug_done, device locks, and more.  In
> the case of CPUs being hot-added to a previously memoryless node, the
> try_online_node operation occurs wholly within the CPU locks with no
> overlap.  Using HMC hot-add/hot-remove operations, we have been able
> to add and remove CPUs to any possible node without failures.  HMC
> operations involve a degree self-serialization, though.

This may be able to be stated as simply saying that cpu hotplug operations
are serialized with the device_hotplug_lock.

> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V6:
>   -- Add some needed node initialization to runtime code that maps
>  CPUs based on VPHN associativity
>   -- Add error checks and alternate recovery for compile flag
>  CONFIG_MEMORY_HOTPLUG
>   -- Add alternate node selection recovery for !CONFIG_MEMORY_HOTPLUG
>   -- Add more information to the patch introductory text
> ---
>  arch/powerpc/mm/numa.c |   51 
> ++--
>  1 file changed, 40 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 334a1ff..163f4cc 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long lcpu)
>   nid = of_node_to_nid_single(cpu);
> 
>  out_present:
> - if (nid < 0 || !node_online(nid))
> + if (nid < 0 || !node_possible(nid))
>   nid = first_online_node;
> 
>   map_cpu_to_node(lcpu, nid);
> @@ -867,7 +867,7 @@ void __init dump_numa_cpu_topology(void)
>  }
> 
>  /* Initialize NODE_DATA for a node on the local memory */
> -static void __init setup_node_data(int 

Re: [PATCH V7 1/3] powerpc/nodes: Ensure enough nodes avail for operations

2017-11-20 Thread Nathan Fontenot


On 11/16/2017 11:24 AM, Michael Bringmann wrote:
> On powerpc systems which allow 'hot-add' of CPU or memory resources,
> it may occur that the new resources are to be inserted into nodes
> that were not used for these resources at bootup.  In the kernel,
> any node that is used must be defined and initialized.  These empty
> nodes may occur when,
> 
> * Dedicated vs. shared resources.  Shared resources require
>   information such as the VPHN hcall for CPU assignment to nodes.
>   Associativity decisions made based on dedicated resource rules,
>   such as associativity properties in the device tree, may vary
>   from decisions made using the values returned by the VPHN hcall.
> * memoryless nodes at boot.  Nodes need to be defined as 'possible'
>   at boot for operation with other code modules.  Previously, the
>   powerpc code would limit the set of possible nodes to those which
>   have memory assigned at boot, and were thus online.  Subsequent
>   add/remove of CPUs or memory would only work with this subset of
>   possible nodes.
> * memoryless nodes with CPUs at boot.  Due to the previous restriction
>   on nodes, nodes that had CPUs but no memory were being collapsed
>   into other nodes that did have memory at boot.  In practice this
>   meant that the node assignment presented by the runtime kernel
>   differed from the affinity and associativity attributes presented
>   by the device tree or VPHN hcalls.  Nodes that might be known to
>   the pHyp were not 'possible' in the runtime kernel because they did
>   not have memory at boot.
> 
> This patch ensures that sufficient nodes are defined to support
> configuration requirements after boot, as well as at boot.  This
> patch set fixes a couple of problems.
> 
> * Nodes known to powerpc to be memoryless at boot, but to have
>   CPUs in them are allowed to be 'possible' and 'online'.  Memory
>   allocations for those nodes are taken from another node that does
>   have memory until and if memory is hot-added to the node.
> * Nodes which have no resources assigned at boot, but which may still
>   be referenced subsequently by affinity or associativity attributes,
>   are kept in the list of 'possible' nodes for powerpc.  Hot-add of
>   memory or CPUs to the system can reference these nodes and bring
>   them online instead of redirecting to one of the set of nodes that
>   were known to have memory at boot.
> 
> This patch extracts the value of the lowest domain level (number of
> allocable resources) from the device tree property
> "ibm,max-associativity-domains" to use as the maximum number of nodes
> to setup as possibly available in the system.  This new setting will
> override the instruction,
> 
> nodes_and(node_possible_map, node_possible_map, node_online_map);
> 
> presently seen in the function arch/powerpc/mm/numa.c:initmem_init().
> 
> If the "ibm,max-associativity-domains" property is not present at boot,
> no operation will be performed to define or enable additional nodes, or
> enable the above 'nodes_and()'.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V6:
>   -- Remove some node initialization/allocation from boot setup
>  to later in runtime to try to limit memory needs early on
>   -- Augment descriptive documentation for patch
> ---
>  arch/powerpc/mm/numa.c |   40 +---
>  1 file changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index eb604b3..334a1ff 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -892,6 +892,37 @@ static void __init setup_node_data(int nid, u64 
> start_pfn, u64 end_pfn)
>   NODE_DATA(nid)->node_spanned_pages = spanned_pages;
>  }
> 
> +static void __init find_possible_nodes(void)
> +{
> + struct device_node *rtas;
> + u32 numnodes, i;
> +
> + if (min_common_depth <= 0)
> + return;
> +
> + rtas = of_find_node_by_path("/rtas");
> + if (!rtas)
> + return;
> +
> + if (of_property_read_u32_index(rtas,
> + "ibm,max-associativity-domains",
> + min_common_depth, ))
> + goto out;
> +
> + pr_info("numa: Nodes = %d (mcd = %d)\n", numnodes,
> + min_common_depth);

numa.c already has a pr_fmt define, no need to pre-pend "numa:" to the
information message.

-Nathan

> +
> + for (i = 0; i < numnodes; i++) {
> + if (!node_possible(i)) {
> + setup_node_data(i, 0, 0);
> + node_set(i, node_possible_map);
> + }
> + }
> +
> +out:
> + of_node_put(rtas);
> +}
> +
>  void __init initmem_init(void)
>  {
>   int nid, cpu;
> @@ -905,12 +936,15 @@ void __init initmem_init(void)
>   memblock_dump_all();
> 
>   /*
> -  * Reduce the possible NUMA nodes to the online NUMA nodes,
> -  * since we do not support node hotplug. This ensures that  we
> - 

Re: [PATCH V2 3/3] postmigration/memory: Associativity & ibm,dynamic-memory-v2

2017-11-20 Thread Nathan Fontenot
We may want to wait on this patch. I have been working on patches to separate
the LMB information from the device tree property format. Once those patches
are acceptable we can use a common routine for affinity updates.

-Nathan

On 11/16/2017 11:51 AM, Michael Bringmann wrote:
> postmigration/memory: Now apply changes to the associativity of memory
> blocks described by the 'ibm,dynamic-memory-v2' property regarding
> the topology of LPARS in Post Migration events.
> 
> * Extend the previous work done for the 'ibm,associativity-lookup-array'
>   to apply to either property 'ibm,dynamic-memory' or
>   'ibm,dynamic-memory-v2', whichever is present.
> * Add new code to parse the 'ibm,dynamic-memory-v2' property looking
>   for differences in block 'assignment', associativity indexes per
>   block, and any other difference currently known.
> 
> When block differences are recognized, the memory block may be removed,
> added, or updated depending upon the state of the new device tree
> property and differences from the migrated value of the property.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V2:
>   -- Remove unnecessary spacing changes from patch.
>   -- Improve patch description.
> ---
>  arch/powerpc/include/asm/prom.h |   12 ++
>  arch/powerpc/platforms/pseries/hotplug-memory.c |  169 
> ++-
>  2 files changed, 172 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index 825bd59..e16ef0f 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -92,6 +92,18 @@ struct of_drconf_cell {
>   u32 flags;
>  };
> 
> +/* The of_drconf_cell_v2 struct defines the layout of the LMB array
> + * specified in the device tree property
> + * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory-v2
> + */
> +struct of_drconf_cell_v2 {
> + u32 num_seq_lmbs;
> + u64 base_address;
> + u32 drc_index;
> + u32 aa_index;
> + u32 flags;
> +} __attribute__((packed));
> +
>  #define DRCONF_MEM_ASSIGNED  0x0008
>  #define DRCONF_MEM_AI_INVALID0x0040
>  #define DRCONF_MEM_RESERVED  0x0080
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index b37e6ad..bf9687b 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -1171,14 +1171,111 @@ static int pseries_update_drconf_memory(struct 
> of_reconfig_data *pr)
>   return rc;
>  }
> 
> +static inline int pseries_memory_v2_find_drc(u32 drc_index,
> + u64 *base_addr, unsigned long memblock_size,
> + struct of_drconf_cell_v2 **drmem,
> + struct of_drconf_cell_v2 *last_drmem)
> +{
> + struct of_drconf_cell_v2 *dm = (*drmem);
> +
> + while (dm < last_drmem) {
> + if ((be32_to_cpu(dm->drc_index) <= drc_index) &&
> + (drc_index <= (be32_to_cpu(dm->drc_index)+
> + be32_to_cpu(dm->num_seq_lmbs)-1))) {
> + int offset = drc_index - be32_to_cpu(dm->drc_index);
> + (*base_addr) = be64_to_cpu(dm->base_address) +
> + (offset * memblock_size);
> + break;
> + } else if (drc_index > (be32_to_cpu(dm->drc_index)+
> + be32_to_cpu(dm->num_seq_lmbs)-1)) {
> + dm++;
> + (*drmem) = dm;
> + } else if (be32_to_cpu(dm->drc_index) > drc_index) {
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int pseries_update_drconf_memory_v2(struct of_reconfig_data *pr)
> +{
> + struct of_drconf_cell_v2 *new_drmem, *old_drmem, *last_old_drmem;
> + unsigned long memblock_size;
> + u32 new_entries, old_entries;
> + u64 old_base_addr;
> + __be32 *p;
> + int i, rc = 0;
> +
> + if (rtas_hp_event)
> + return 0;
> +
> + memblock_size = pseries_memory_block_size();
> + if (!memblock_size)
> + return -EINVAL;
> +
> + /* The first int of the property is the number of lmb's
> +  * described by the property. This is followed by an array
> +  * of of_drconf_cell_v2 entries. Get the number of entries
> +  * and skip to the array of of_drconf_cell_v2's.
> +  */
> + p = (__be32 *) pr->old_prop->value;
> + if (!p)
> + return -EINVAL;
> + old_entries = be32_to_cpu(*p++);
> + old_drmem = (struct of_drconf_cell_v2 *)p;
> + last_old_drmem = old_drmem +
> + (sizeof(struct of_drconf_cell_v2) * old_entries);
> +
> + p = (__be32 *)pr->prop->value;
> + new_entries = be32_to_cpu(*p++);
> + new_drmem = (struct of_drconf_cell_v2 *)p;
> +
> + for (i = 0; i < new_entries; i++) {

Re: [PATCH V2 1/3] hotplug/mobility: Apply assoc updates for Post Migration Topo

2017-11-20 Thread Nathan Fontenot
On 11/16/2017 11:50 AM, Michael Bringmann wrote:
> hotplug/mobility: Recognize more changes to the associativity of
> memory blocks described by the 'ibm,dynamic-memory' and 'cpu'
> properties when processing the topology of LPARS in Post Migration
> events.  Previous efforts only recognized whether a memory block's
> assignment had changed in the property.  Changes here include:
> 
> * Checking the aa_index values of the old/new properties and 'readd'
>   any block for which the setting has changed.
> * Checking for changes in cpus and submitting 'readd' ops for them.
> * Creating some common support routines for the submission of memory
>   or cpu 'readd' operations.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V2:
>   -- Try to improve patch header documentation.
> ---
>  arch/powerpc/platforms/pseries/hotplug-cpu.c|   64 
> +++
>  arch/powerpc/platforms/pseries/hotplug-memory.c |6 ++
>  arch/powerpc/platforms/pseries/mobility.c   |   47 +
>  arch/powerpc/platforms/pseries/pseries.h|2 +
>  4 files changed, 109 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index fadb95e..d127c3a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -634,6 +634,27 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
>   return rc;
>  }
> 
> +static int dlpar_cpu_readd_by_index(u32 drc_index)
> +{
> + int rc = 0;
> +
> + pr_info("Attempting to update CPU, drc index %x\n", drc_index);
> +
> + if (dlpar_cpu_remove_by_index(drc_index))
> + rc = -EINVAL;
> + else if (dlpar_cpu_add(drc_index))
> + rc = -EINVAL;
> +
> + if (rc)
> + pr_info("Failed to update cpu at drc_index %lx\n",
> + (unsigned long int)drc_index);
> + else
> + pr_info("CPU at drc_index %lx was updated\n",
> + (unsigned long int)drc_index);
> +
> + return rc;
> +}
> +
>  static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
>  {
>   struct device_node *dn;
> @@ -824,6 +845,9 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
>   else
>   rc = -EINVAL;
>   break;
> + case PSERIES_HP_ELOG_ACTION_READD:
> + rc = dlpar_cpu_readd_by_index(drc_index);
> + break;
>   default:
>   pr_err("Invalid action (%d) specified\n", hp_elog->action);
>   rc = -EINVAL;
> @@ -874,6 +898,42 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t 
> count)
> 
>  #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
> 
> +static int pseries_update_drconf_cpu(struct of_reconfig_data *pr)

I think we can drop the 'drconf' piece from this function name.

I'm think you got that from the memory routines that use drconf, which
is really short for dynamic reconfiguration. This was used to state the
routine worked on memory represented in the dynamic-reconfiguration 
properties.

> +{
> + u32 old_entries, new_entries;
> + __be32 *p, *old_assoc, *new_assoc;
> +
> + if (strcmp(pr->dn->type, "cpu"))
> + return 0;
> +
> + /* The first int of the property is the number of domains's
> +  * described.  This is followed by an array of level values.
> +  */
> + p = (__be32 *) pr->old_prop->value;
> + if (!p)
> + return -EINVAL;
> + old_entries = be32_to_cpu(*p++);
> + old_assoc = p;
> +
> + p = (__be32 *)pr->prop->value;
> + if (!p)
> + return -EINVAL;
> + new_entries = be32_to_cpu(*p++);
> + new_assoc = p;
> +
> + if (old_entries == new_entries) {
> + int sz = old_entries * sizeof(int);
> +
> + if (!memcmp(old_assoc, new_assoc, sz))
> + pseries_cpu_readd_by_index(pr->dn->phandle);
> +
> + } else {
> + pseries_cpu_readd_by_index(pr->dn->phandle);
> + }
> +
> + return 0;
> +}
> +
>  static int pseries_smp_notifier(struct notifier_block *nb,
>   unsigned long action, void *data)
>  {
> @@ -887,6 +947,10 @@ static int pseries_smp_notifier(struct notifier_block 
> *nb,
>   case OF_RECONFIG_DETACH_NODE:
>   pseries_remove_processor(rd->dn);
>   break;
> + case OF_RECONFIG_UPDATE_PROPERTY:
> + if (!strcmp(rd->prop->name, "ibm,associativity"))
> + err = pseries_update_drconf_cpu(rd);
> + break;
>   }
>   return notifier_from_errno(err);
>  }
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 1d48ab4..c61cfc6 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -1160,6 +1160,12 @@ static 

Re: Subject: [PATCH V4 3/4] hotplug/drc-info: Add code to search ibm,drc-info property

2017-11-20 Thread Nathan Fontenot
On 11/16/2017 02:11 PM, Michael Bringmann wrote:
> rpadlpar_core.c: Provide parallel routines to search the older device-
> tree properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
> and "ibm,drc-power-domains"), or the new property "ibm,drc-info".
> 
> The interface to examine the DRC information is changed from a "get"
> function that returns values for local verification elsewhere, to a
> "check" function that validates the 'name' and/or 'type' of a device
> node.  This update hides the format of the underlying device-tree
> properties, and concentrates the value checks into a single function
> without requiring the user to verify whether a search was successful.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V4:
>   -- Rename of_one_drc_info to of_read_drc_info_cell
>   -- Fix some spacing within arguments
> ---
>  drivers/pci/hotplug/rpadlpar_core.c |   13 ++--
>  drivers/pci/hotplug/rpaphp.h|4 +
>  drivers/pci/hotplug/rpaphp_core.c   |  110 
> +++
>  3 files changed, 92 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
> b/drivers/pci/hotplug/rpadlpar_core.c
> index a3449d7..fc01d7d 100644
> --- a/drivers/pci/hotplug/rpadlpar_core.c
> +++ b/drivers/pci/hotplug/rpadlpar_core.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "../pci.h"
>  #include "rpaphp.h"
> @@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char 
> *drc_name)
>  {
>   struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
>   struct device_node *dn = NULL;
> - char *name;
>   int rc;
> 
>   if (!parent)
>   return NULL;
> 
>   while ((dn = of_get_next_child(parent, dn))) {
> - rc = rpaphp_get_drc_props(dn, NULL, , NULL, NULL);
> - if ((rc == 0) && (!strcmp(drc_name, name)))
> + rc = rpaphp_check_drc_props(dn, drc_name, NULL);
> + if (rc == 0)
>   break;
>   }
> 
> @@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char 
> *drc_name,
> char *drc_type)
>  {
>   struct device_node *np = NULL;
> - char *name;
> - char *type;
>   int rc;
> 
>   while ((np = of_find_node_by_name(np, "pci"))) {
> - rc = rpaphp_get_drc_props(np, NULL, , , NULL);
> + rc = rpaphp_check_drc_props(np, drc_name, drc_type);
>   if (rc == 0)
> - if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
> - break;
> + break;
>   }
> 
>   return np;
> diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
> index 7db024e..8db5f2e 100644
> --- a/drivers/pci/hotplug/rpaphp.h
> +++ b/drivers/pci/hotplug/rpaphp.h
> @@ -91,8 +91,8 @@ struct slot {
> 
>  /* rpaphp_core.c */
>  int rpaphp_add_slot(struct device_node *dn);
> -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
> - char **drc_name, char **drc_type, int *drc_power_domain);
> +int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
> + char *drc_type);
> 
>  /* rpaphp_slot.c */
>  void dealloc_slot_struct(struct slot *slot);
> diff --git a/drivers/pci/hotplug/rpaphp_core.c 
> b/drivers/pci/hotplug/rpaphp_core.c
> index 1e29aba..0a3b5f5 100644
> --- a/drivers/pci/hotplug/rpaphp_core.c
> +++ b/drivers/pci/hotplug/rpaphp_core.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include/* for eeh_add_device() */
>  #include /* rtas_call */
>  #include   /* for pci_controller */
> @@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, 
> const int **drc_indexes,
>   return 0;
>  }
> 
> -/* To get the DRC props describing the current node, first obtain it's
> - * my-drc-index property.  Next obtain the DRC list from it's parent.  Use
> - * the my-drc-index for correlation, and obtain the requested properties.
> +
> +/* Verify the existence of 'drc_name' and/or 'drc_type' within the
> + * current node.  First obtain it's my-drc-index property.  Next,
> + * obtain the DRC info from it's parent.  Use the my-drc-index for
> + * correlation, and obtain/validate the requested properties.
>   */
> -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
> - char **drc_name, char **drc_type, int *drc_power_domain)
> +
> +static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
> + char *drc_type, unsigned int my_index)
>  {
> + char *name_tmp, *type_tmp;
>   const int *indexes, *names;
>   const int *types, *domains;
> - const unsigned int *my_index;
> - char *name_tmp, *type_tmp;
>   int i, rc;
> 
> - my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
> - if (!my_index) {
> - 

Re: [PATCH V4 2/4] pseries/drc-info: Search DRC properties for CPU indexes

2017-11-20 Thread Nathan Fontenot
On 11/16/2017 02:11 PM, Michael Bringmann wrote:
> pseries/drc-info: Provide parallel routines to convert between
> drc_index and CPU numbers at runtime, using the older device-tree
> properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
> and "ibm,drc-power-domains"), or the new property "ibm,drc-info".
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V4:
>   -- Rename of_one_drc_info to of_read_drc_info_cell
>   -- Fix some spacing within expressions
>   -- Make some style corrections
> ---
>  arch/powerpc/include/asm/prom.h |   15 +++
>  arch/powerpc/platforms/pseries/of_helpers.c |   60 ++
>  arch/powerpc/platforms/pseries/pseries_energy.c |  138 
> ++-
>  3 files changed, 185 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index 3243455..0ef41b1 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -96,6 +96,21 @@ struct of_drconf_cell {
>  #define DRCONF_MEM_AI_INVALID0x0040
>  #define DRCONF_MEM_RESERVED  0x0080
> 
> +struct of_drc_info {
> + char *drc_type;
> + char *drc_name_prefix;
> + u32 drc_index_start;
> + u32 drc_name_suffix_start;
> + u32 num_sequential_elems;
> + u32 sequential_inc;
> + u32 drc_power_domain;
> + u32 last_drc_index;
> +};
> +
> +extern int of_read_drc_info_cell(struct property **prop,
> + const __be32 **curval, struct of_drc_info *data);
> +
> +
>  /*
>   * There are two methods for telling firmware what our capabilities are.
>   * Newer machines have an "ibm,client-architecture-support" method on the
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 2798933..b36f1ae 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -2,6 +2,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "of_helpers.h"
> 
> @@ -36,3 +37,62 @@ struct device_node *pseries_of_derive_parent(const char 
> *path)
>   kfree(parent_path);
>   return parent ? parent : ERR_PTR(-EINVAL);
>  }
> +
> +
> +/* Helper Routines to convert between drc_index to cpu numbers */
> +
> +int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
> + struct of_drc_info *data)
> +{
> + const char *p;
> + const __be32 *p2;
> +
> + if (!data)
> + return -EINVAL;
> +
> + /* Get drc-type:encode-string */
> + p = data->drc_type = (char*) (*curval);
> + p = of_prop_next_string(*prop, p);
> + if (!p)
> + return -EINVAL;
> +
> + /* Get drc-name-prefix:encode-string */
> + data->drc_name_prefix = (char *)p;
> + p = of_prop_next_string(*prop, p);
> + if (!p)
> + return -EINVAL;
> +
> + /* Get drc-index-start:encode-int */
> + p2 = (const __be32 *)p;
> + p2 = of_prop_next_u32(*prop, p2, >drc_index_start);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get drc-name-suffix-start:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >drc_name_suffix_start);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get number-sequential-elements:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >num_sequential_elems);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get sequential-increment:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >sequential_inc);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get drc-power-domain:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >drc_power_domain);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Should now know end of current entry */
> + (*curval) = (void *)p2;
> + data->last_drc_index = data->drc_index_start +
> + ((data->num_sequential_elems - 1) * data->sequential_inc);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(of_read_drc_info_cell);
> diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c 
> b/arch/powerpc/platforms/pseries/pseries_energy.c
> index 35c891a..b8f6603 100644
> --- a/arch/powerpc/platforms/pseries/pseries_energy.c
> +++ b/arch/powerpc/platforms/pseries/pseries_energy.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
> 
>  #define MODULE_VERS "1.0"
> @@ -38,26 +39,64 @@
>  static u32 cpu_to_drc_index(int cpu)
>  {
>   struct device_node *dn = NULL;
> - const int *indexes;
> - int i;
> + int thread_index;
>   int rc = 1;
>   u32 ret = 0;
> 
>   dn = of_find_node_by_path("/cpus");
>   if (dn == NULL)
>   goto err;
> - indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
> - if (indexes == NULL)
> - goto err_of_node_put;
> +
>   /* Convert logical cpu number to core number */
> - i = cpu_core_index_of_thread(cpu);
> -

Re: [PATCH v2 0/8] powerpc: Support ibm,dynamic-memory-v2 property

2017-11-20 Thread Nathan Fontenot
On 11/16/2017 10:51 PM, Bharata B Rao wrote:
> 
> On Thu, Nov 16, 2017 at 9:31 PM, Nathan Fontenot <nf...@linux.vnet.ibm.com 
> <mailto:nf...@linux.vnet.ibm.com>> wrote:
> 
> 
> 
> On 11/15/2017 11:37 PM, Bharata B Rao wrote:
> > On Fri, Oct 20, 2017 at 6:51 PM, Nathan Fontenot 
> <nf...@linux.vnet.ibm.com <mailto:nf...@linux.vnet.ibm.com> 
> <mailto:nf...@linux.vnet.ibm.com <mailto:nf...@linux.vnet.ibm.com>>> wrote:
> >
> >     This patch set provides a set of updates to de-couple the LMB 
> information
> >     provided in the ibm,dynamic-memory device tree property from the 
> device
> >     tree property format. A part of this patch series introduces a new
> >     device tree property format for dynamic memory, 
> ibm-dynamic-meory-v2.
> >     By separating the device tree format from the information provided 
> by
> >     the device tree property consumers of this information need not know
> >     what format is currently being used and provide multiple parsing 
> routines
> >     for the information.
> >
> >     The first two patches update the of_get_assoc_arrays() and
> >     of_get_usable_memory() routines to look up the device node for the
> >     properties they parse. This is needed because the calling routines 
> for
> >     these two functions will not have the device node to pass in in
> >     subsequent patches.
> >
> >     The third patch adds a new kernel structure, struct drmem_lmb, that
> >     is used to represent each of the possible LMBs specified in the
> >     ibm,dynamic-memory* device tree properties. The patch adds code
> >     to parse the property and build the LMB array data, and updates 
> prom.c
> >     to use this new data structure instead of parsing the device tree 
> directly.
> >
> >     The fourth and fifth patches update the numa and pseries hotplug 
> code
> >     respectively to use the new LMB array data instead of parsing the
> >     device tree directly.
> >
> >     The sixth patch moves the of_drconf_cell struct to drmem.h where it
> >     fits better than prom.h
> >
> >     The seventh patch introduces support for the ibm,dynamic-memory-v2
> >     property format by updating the new drmem.c code to be able to parse
> >     and create this new device tree format.
> >
> >     The last patch in the series updates the architecture vector to 
> indicate
> >     support for ibm,dynamic-memory-v2.
> >
> >
> > Here we are consolidating LMBs into LMB sets but still end up working 
> with individual LMBs during hotplug. Can we instead start working with LMB 
> sets together during hotplug ? In other words
> 
> In a sense we do do this when handling memory DLPAR indexed-count 
> requests. This takes a starting
> drc index for a LMB and adds/removes the following  contiguous 
> LMBs. This operation is
> all-or-nothing, if any LMB fails to add/remove we revert back to the 
> original state.
> 
> 
> I am aware of count-indexed and we do use it for memory hotplug/unplug for 
> KVM on Power. However the RTAS and configure-connector calls there are still 
> per-LMB.
> 
> 
> Thi isn't exactly what you're asking for but...
> >
> > - The RTAS calls involved during DRC acquire stage can be done only 
> once per LMB set.
> > - One configure-connector call for the entire LMB set.
> 
> these two interfaces work on a single drc index, not a set of drc 
> indexes. Working on a set
> of LMBs would require extending the current rtas calls or creating new 
> ones.
> 
> 
> Yes.
>  
> 
> 
> One thing we can look into doing for indexed-count requests is to perform 
> each of the
> steps for all LMBs in the set at once, i.e. make the acquire call for 
> LMBs, then make the
> configure-connector calls for all the LMBs...
> 
> 
> That is what I am hinting at to check the feasibility of such a mechanism. 
> Given that all the LMBs of the set are supposed to have similar attributes 
> (like node associativity etc), it makes sense to have a single DRC acquire 
> call and single configure-connector call for the entire set.

I agree. I'll talk to pHyp development to see if this is something they are 
interested in pursuing.
If not we can submit updates to the PAPR to implement these new rtas calls even 
if they do not
support them in pHyp.

> 
> 
> The only drawback is this approach would make handling failures and 
> back

Re: RESEND [PATCH V3 3/4] hotplug/drc-info: Add code to search ibm,drc-info property

2017-11-16 Thread Nathan Fontenot


On 11/15/2017 12:09 PM, Michael Bringmann wrote:
> rpadlpar_core.c: Provide parallel routines to search the older device-
> tree properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
> and "ibm,drc-power-domains"), or the new property "ibm,drc-info".
> 
> The interface to examine the DRC information is changed from a "get"
> function that returns values for local verification elsewhere, to a
> "check" function that validates the 'name' and/or 'type' of a device
> node.  This update hides the format of the underlying device-tree
> properties, and concentrates the value checks into a single function
> without requiring the user to verify whether a search was successful.
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V3:
>   -- Now passing more values by structure reducing use of local
>  declarations / initialization.
>   -- Improve some code spacing for better clarity.
> ---
>  drivers/pci/hotplug/rpadlpar_core.c |   13 ++--
>  drivers/pci/hotplug/rpaphp.h|4 +
>  drivers/pci/hotplug/rpaphp_core.c   |  110 
> +++
>  3 files changed, 92 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/rpadlpar_core.c 
> b/drivers/pci/hotplug/rpadlpar_core.c
> index a3449d7..fc01d7d 100644
> --- a/drivers/pci/hotplug/rpadlpar_core.c
> +++ b/drivers/pci/hotplug/rpadlpar_core.c
> @@ -27,6 +27,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "../pci.h"
>  #include "rpaphp.h"
> @@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char 
> *drc_name)
>  {
>   struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
>   struct device_node *dn = NULL;
> - char *name;
>   int rc;
> 
>   if (!parent)
>   return NULL;
> 
>   while ((dn = of_get_next_child(parent, dn))) {
> - rc = rpaphp_get_drc_props(dn, NULL, , NULL, NULL);
> - if ((rc == 0) && (!strcmp(drc_name, name)))
> + rc = rpaphp_check_drc_props(dn, drc_name, NULL);
> + if (rc == 0)
>   break;
>   }
> 
> @@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char 
> *drc_name,
> char *drc_type)
>  {
>   struct device_node *np = NULL;
> - char *name;
> - char *type;
>   int rc;
> 
>   while ((np = of_find_node_by_name(np, "pci"))) {
> - rc = rpaphp_get_drc_props(np, NULL, , , NULL);
> + rc = rpaphp_check_drc_props(np, drc_name, drc_type);
>   if (rc == 0)
> - if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
> - break;
> + break;
>   }
> 
>   return np;
> diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
> index 7db024e..8db5f2e 100644
> --- a/drivers/pci/hotplug/rpaphp.h
> +++ b/drivers/pci/hotplug/rpaphp.h
> @@ -91,8 +91,8 @@ struct slot {
> 
>  /* rpaphp_core.c */
>  int rpaphp_add_slot(struct device_node *dn);
> -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
> - char **drc_name, char **drc_type, int *drc_power_domain);
> +int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
> + char *drc_type);
> 
>  /* rpaphp_slot.c */
>  void dealloc_slot_struct(struct slot *slot);
> diff --git a/drivers/pci/hotplug/rpaphp_core.c 
> b/drivers/pci/hotplug/rpaphp_core.c
> index 1e29aba..6606175 100644
> --- a/drivers/pci/hotplug/rpaphp_core.c
> +++ b/drivers/pci/hotplug/rpaphp_core.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include/* for eeh_add_device() */
>  #include /* rtas_call */
>  #include   /* for pci_controller */
> @@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, 
> const int **drc_indexes,
>   return 0;
>  }
> 
> -/* To get the DRC props describing the current node, first obtain it's
> - * my-drc-index property.  Next obtain the DRC list from it's parent.  Use
> - * the my-drc-index for correlation, and obtain the requested properties.
> +
> +/* Verify the existence of 'drc_name' and/or 'drc_type' within the
> + * current node.  First obtain it's my-drc-index property.  Next,
> + * obtain the DRC info from it's parent.  Use the my-drc-index for
> + * correlation, and obtain/validate the requested properties.
>   */
> -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
> - char **drc_name, char **drc_type, int *drc_power_domain)
> +
> +static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
> + char *drc_type, unsigned int my_index)
>  {
> + char *name_tmp, *type_tmp;
>   const int *indexes, *names;
>   const int *types, *domains;
> - const unsigned int *my_index;
> - char *name_tmp, *type_tmp;
>   int i, rc;
> 
> - my_index = of_get_property(dn, 

Re: RESEND [PATCH V3 2/4] pseries/drc-info: Search DRC properties for CPU indexes

2017-11-16 Thread Nathan Fontenot
On 11/15/2017 12:09 PM, Michael Bringmann wrote:
> pseries/drc-info: Provide parallel routines to convert between
> drc_index and CPU numbers at runtime, using the older device-tree
> properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types"
> and "ibm,drc-power-domains"), or the new property "ibm,drc-info".
> 
> Signed-off-by: Michael Bringmann 
> ---
> Changes in V3:
>   -- Some code compression and use of data structures for value passing.
> ---
>  arch/powerpc/include/asm/prom.h |   15 ++
>  arch/powerpc/platforms/pseries/of_helpers.c |   60 ++
>  arch/powerpc/platforms/pseries/pseries_energy.c |  139 
> ++-
>  3 files changed, 186 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index 3243455..007430a 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -96,6 +96,21 @@ struct of_drconf_cell {
>  #define DRCONF_MEM_AI_INVALID0x0040
>  #define DRCONF_MEM_RESERVED  0x0080
> 
> +struct of_drc_info {
> + char *drc_type;
> + char *drc_name_prefix;
> + u32 drc_index_start;
> + u32 drc_name_suffix_start;
> + u32 num_sequential_elems;
> + u32 sequential_inc;
> + u32 drc_power_domain;
> + u32 last_drc_index;
> +};
> +
> +extern int of_one_drc_info(struct property **prop, void **curval,
> + struct of_drc_info *data);

I'm not sure if prom.h is where this really belongs but I also do
not see an existing header file that it really makes sense to put it in.
 
> +
> +
>  /*
>   * There are two methods for telling firmware what our capabilities are.
>   * Newer machines have an "ibm,client-architecture-support" method on the
> diff --git a/arch/powerpc/platforms/pseries/of_helpers.c 
> b/arch/powerpc/platforms/pseries/of_helpers.c
> index 2798933..62dc8e9 100644
> --- a/arch/powerpc/platforms/pseries/of_helpers.c
> +++ b/arch/powerpc/platforms/pseries/of_helpers.c
> @@ -2,6 +2,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "of_helpers.h"
> 
> @@ -36,3 +37,62 @@ struct device_node *pseries_of_derive_parent(const char 
> *path)
>   kfree(parent_path);
>   return parent ? parent : ERR_PTR(-EINVAL);
>  }
> +
> +
> +/* Helper Routines to convert between drc_index to cpu numbers */
> +
> +int of_one_drc_info(struct property **prop, void **curval,
> + struct of_drc_info *data)

Small nit, this should probably be of_read_drc_info_cell.
 
> +{
> + const char *p;
> + const __be32 *p2;
> +
> + if (!data)
> + return -EINVAL;
> +
> + /* Get drc-type:encode-string */
> + p = data->drc_type = (*curval);
> + p = of_prop_next_string(*prop, p);
> + if (!p)
> + return -EINVAL;
> +
> + /* Get drc-name-prefix:encode-string */
> + data->drc_name_prefix = (char *)p;
> + p = of_prop_next_string(*prop, p);
> + if (!p)
> + return -EINVAL;
> +
> + /* Get drc-index-start:encode-int */
> + p2 = (const __be32 *)p;
> + p2 = of_prop_next_u32(*prop, p2, >drc_index_start);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get/skip drc-name-suffix-start:encode-int */

You're getting the suffix, should probably drop 'skip' in the comment.

> + p2 = of_prop_next_u32(*prop, p2, >drc_name_suffix_start);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get number-sequential-elements:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >num_sequential_elems);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get sequential-increment:encode-int */
> + p2 = of_prop_next_u32(*prop, p2, >sequential_inc);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Get/skip drc-power-domain:encode-int */

Same here.

> + p2 = of_prop_next_u32(*prop, p2, >drc_power_domain);
> + if (!p2)
> + return -EINVAL;
> +
> + /* Should now know end of current entry */
> + (*curval) = (void *)p2;
> + data->last_drc_index = data->drc_index_start +
> + ((data->num_sequential_elems-1)*data->sequential_inc);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(of_one_drc_info);
> diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c 
> b/arch/powerpc/platforms/pseries/pseries_energy.c
> index 35c891a..7160855 100644
> --- a/arch/powerpc/platforms/pseries/pseries_energy.c
> +++ b/arch/powerpc/platforms/pseries/pseries_energy.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
> 
>  #define MODULE_VERS "1.0"
> @@ -38,26 +39,65 @@
>  static u32 cpu_to_drc_index(int cpu)
>  {
>   struct device_node *dn = NULL;
> - const int *indexes;
> - int i;
> + int thread_index;
>   int rc = 1;
>   u32 ret = 0;
> 
>   dn = of_find_node_by_path("/cpus");
>   if (dn == NULL)
>   goto err;
> - indexes = of_get_property(dn, 

Re: RESEND [PATCH V3 1/4] powerpc/firmware: Add definitions for new drc-info firmware feature

2017-11-16 Thread Nathan Fontenot
On 11/15/2017 12:09 PM, Michael Bringmann wrote:
> Firmware Features: Define new bit flag representing the presence of
> new device tree property "ibm,drc-info".  The flag is used to tell
> the front end processor whether the Linux kernel supports the new
> property, and by the front end processor to tell the Linux kernel
> that the new property is present in the device tree.

This patch seems to be adding a bit for the drc-info feature so that
we can use the firmware_has_feature() interface to determine if the
device tree has the new ibm,drc-info properties.

I'm not sure what front-end processor you're referring to? Is this
in reference to the architecture vector that is exchanged with firmware?

-Nathan

> 
> Signed-off-by: Michael Bringmann 
> ---
>  arch/powerpc/include/asm/firmware.h   |3 ++-
>  arch/powerpc/include/asm/prom.h   |1 +
>  arch/powerpc/platforms/pseries/firmware.c |1 +
>  3 files changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/firmware.h 
> b/arch/powerpc/include/asm/firmware.h
> index 8645897..329d537 100644
> --- a/arch/powerpc/include/asm/firmware.h
> +++ b/arch/powerpc/include/asm/firmware.h
> @@ -51,6 +51,7 @@
>  #define FW_FEATURE_BEST_ENERGY   ASM_CONST(0x8000)
>  #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0001)
>  #define FW_FEATURE_PRRN  ASM_CONST(0x0002)
> +#define FW_FEATURE_DRC_INFO  ASM_CONST(0x0004)
> 
>  #ifndef __ASSEMBLY__
> 
> @@ -67,7 +68,7 @@ enum {
>   FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
>   FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
>   FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
> - FW_FEATURE_HPT_RESIZE,
> + FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRC_INFO,
>   FW_FEATURE_PSERIES_ALWAYS = 0,
>   FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
>   FW_FEATURE_POWERNV_ALWAYS = 0,
> diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
> index 825bd59..3243455 100644
> --- a/arch/powerpc/include/asm/prom.h
> +++ b/arch/powerpc/include/asm/prom.h
> @@ -175,6 +175,7 @@ struct of_drconf_cell {
>  #define OV5_HASH_GTSE0x1940  /* Guest Translation Shoot Down 
> Avail */
>  /* Radix Table Extensions */
>  #define OV5_RADIX_GTSE   0x1A40  /* Guest Translation Shoot Down 
> Avail */
> +#define OV5_DRC_INFO 0x1640  /* Redef Prop Structures: drc-info   */
> 
>  /* Option Vector 6: IBM PAPR hints */
>  #define OV6_LINUX0x02/* Linux is our OS */
> diff --git a/arch/powerpc/platforms/pseries/firmware.c 
> b/arch/powerpc/platforms/pseries/firmware.c
> index 63cc82a..757d757 100644
> --- a/arch/powerpc/platforms/pseries/firmware.c
> +++ b/arch/powerpc/platforms/pseries/firmware.c
> @@ -114,6 +114,7 @@ struct vec5_fw_feature {
>  vec5_fw_features_table[] = {
>   {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
>   {FW_FEATURE_PRRN,   OV5_PRRN},
> + {FW_FEATURE_DRC_INFO,   OV5_DRC_INFO},
>  };
> 
>  static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
> 



Re: [PATCH 2/2] powerpc/hotplug: Ensure nodes initialized for hotplug

2017-11-16 Thread Nathan Fontenot


On 11/15/2017 12:28 PM, Michael Bringmann wrote:
> Hello:
> See below.
> 
> On 10/16/2017 07:54 AM, Michael Ellerman wrote:
>> Michael Bringmann  writes:
>>
>>> powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU,
>>> it may occur that the new resources are to be inserted into nodes
>>> that were not used for memory resources at bootup.  Many different
>>> configurations of PowerPC resources may need to be supported depending
>>> upon the environment.
>>
>> Give me some detail please?!
> 
> The most important characteristics that I have observed are:
> 
> * Dedicated vs. shared resources.  Shared resources require information
>   such as the VPHN hcall for CPU assignment to nodes.
> * memoryless nodes at boot.  Nodes need to be defined as 'possible' at
>   boot for operation with other code modules.  Previously, the powerpc
>   code would limit the set of possible/online nodes to those which have
>   memory assigned at boot.  Subsequent add/remove of CPUs or memory would
>   only work with this subset of possible nodes.
> * memoryless nodes with CPUs at boot.  Due to the previous restriction on
>   nodes, nodes that had CPUs but no memory were being collapsed into other
>   nodes that did have memory at boot.  In practice this meant that the
>   node assignment presented by the runtime kernel differed from the affinity
>   and associativity attirbutes presented by the device tree or VPHN hcalls.
>   Nodes that might be known to the pHyp were not 'possible' in the runtime
>   kernel because they did not have memory at boot.
> 
>>
>>> This patch fixes some problems encountered at
>>
>> What problems?
> 
> This patch set fixes a couple of problems.
> 
> * Nodes known to powerpc to be memoryless at boot, but to have CPUs in them
>   are allowed to be 'possible' and 'online'.  Memory allocations for those
>   nodes are taken from another node that does have memory until and if memory
>   is hot-added to the node.
> * Nodes which have no resources assigned at boot, but which may still be
>   referenced subsequently by affinity or associativity attributes, are kept
>   in the list of 'possible' nodes for powerpc.  Hot-add of memory or CPUs
>   to the system can reference these nodes and bring them online instead of
>   redirecting the resources to the set of nodes known to have memory at boot.
> 
>>
>>> runtime with configurations that support memory-less nodes, but which
>>> allow CPUs to be added at and after boot.
>>
>> How does it fix those problems?
> 
> This problem was fixed in a couple of ways.  First, the code now checks
> whether the node to which a CPU is mapped by 'numa_update_cpu_topology' /
> 'arch_update_cpu_topology' has been initialized and has memory available.
> If either test is false, a call is made to 'try_online_node()' to finish
> the data structure initialization.  Only if we are unable to initialize
> the node at this point will the CPU node assignment be collapsed into an
> existing node.  After initialization by 'try_online_node()', calls to
> 'local_memory_node' no longer crash for these memoryless nodes.
> 
>>
>>> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
>>> index b385cd0..e811dd1 100644
>>> --- a/arch/powerpc/mm/numa.c
>>> +++ b/arch/powerpc/mm/numa.c
>>> @@ -1325,6 +1325,17 @@ static long vphn_get_associativity(unsigned long cpu,
>>> return rc;
>>>  }
>>>  
>>> +static int verify_node_preparation(int nid)
>>> +{
>>
>> I would not expect a function called "verify" ...
>>
>>> +   if ((NODE_DATA(nid) == NULL) ||
>>> +   (NODE_DATA(nid)->node_spanned_pages == 0)) {
>>> +   if (try_online_node(nid))
>>
>> .. to do something like online a node.
> 
> We have changed the function name to 'find_cpu_nid'.

Ok, but I would still not expect 'find_cpu_nid' to online the node.

> 
>>
>>> +   return first_online_node;
>>> +   }
>>> +
>>> +   return nid;
>>> +}
>>> +
>>>  /*
>>>   * Update the CPU maps and sysfs entries for a single CPU when its NUMA
>>>   * characteristics change. This function doesn't perform any locking and is
>>> @@ -1433,9 +1444,11 @@ int numa_update_cpu_topology(bool cpus_locked)
>>> /* Use associativity from first thread for all siblings */
>>> vphn_get_associativity(cpu, associativity);
>>> new_nid = associativity_to_nid(associativity);
>>> -   if (new_nid < 0 || !node_online(new_nid))
>>> +   if (new_nid < 0 || !node_possible(new_nid))
>>> new_nid = first_online_node;
>>>  
>>> +   new_nid = verify_node_preparation(new_nid);
>>
>> You're being called part-way through CPU hotplug here, are we sure it's
>> safe to go and do memory hotplug from there? What's the locking
>> situation?
> 
> We are not doing memory hotplug.  We are initializing a node that may be used
> by CPUs or memory before it can be referenced as invalid by a CPU hotplug
> operation.  CPU hotplug operations are protected by a 

Re: [PATCH v2 0/8] powerpc: Support ibm,dynamic-memory-v2 property

2017-11-16 Thread Nathan Fontenot


On 11/15/2017 11:37 PM, Bharata B Rao wrote:
> On Fri, Oct 20, 2017 at 6:51 PM, Nathan Fontenot <nf...@linux.vnet.ibm.com 
> <mailto:nf...@linux.vnet.ibm.com>> wrote:
> 
> This patch set provides a set of updates to de-couple the LMB information
> provided in the ibm,dynamic-memory device tree property from the device
> tree property format. A part of this patch series introduces a new
> device tree property format for dynamic memory, ibm-dynamic-meory-v2.
> By separating the device tree format from the information provided by
> the device tree property consumers of this information need not know
> what format is currently being used and provide multiple parsing routines
> for the information.
> 
> The first two patches update the of_get_assoc_arrays() and
> of_get_usable_memory() routines to look up the device node for the
> properties they parse. This is needed because the calling routines for
> these two functions will not have the device node to pass in in
> subsequent patches.
> 
> The third patch adds a new kernel structure, struct drmem_lmb, that
> is used to represent each of the possible LMBs specified in the
> ibm,dynamic-memory* device tree properties. The patch adds code
> to parse the property and build the LMB array data, and updates prom.c
> to use this new data structure instead of parsing the device tree 
> directly.
> 
> The fourth and fifth patches update the numa and pseries hotplug code
> respectively to use the new LMB array data instead of parsing the
> device tree directly.
> 
> The sixth patch moves the of_drconf_cell struct to drmem.h where it
> fits better than prom.h
> 
> The seventh patch introduces support for the ibm,dynamic-memory-v2
> property format by updating the new drmem.c code to be able to parse
> and create this new device tree format.
> 
> The last patch in the series updates the architecture vector to indicate
> support for ibm,dynamic-memory-v2.
> 
> 
> Here we are consolidating LMBs into LMB sets but still end up working with 
> individual LMBs during hotplug. Can we instead start working with LMB sets 
> together during hotplug ? In other words

In a sense we do do this when handling memory DLPAR indexed-count requests. 
This takes a starting
drc index for a LMB and adds/removes the following  contiguous LMBs. 
This operation is
all-or-nothing, if any LMB fails to add/remove we revert back to the original 
state.

Thi isn't exactly what you're asking for but...
> 
> - The RTAS calls involved during DRC acquire stage can be done only once per 
> LMB set.
> - One configure-connector call for the entire LMB set.

these two interfaces work on a single drc index, not a set of drc indexes. 
Working on a set
of LMBs would require extending the current rtas calls or creating new ones.

One thing we can look into doing for indexed-count requests is to perform each 
of the
steps for all LMBs in the set at once, i.e. make the acquire call for LMBs, 
then make the
configure-connector calls for all the LMBs...

The only drawback is this approach would make handling failures and backing out 
of the
updates a bit messier, but I've never really thought that optimizing for the 
failure
case to be as important.

-Nathan

> 
> I think this should help hotplugging of large amounts of memory. Other than 
> that, if we choose to use LMB representation for PMEM, it will be useful 
> there too to handle all the LMBs of a PMEM range as one set.
> 
> Regards,
> Bharata.



Re: [PATCH] [net-next,v3] ibmvnic: Feature implementation of Vital Product Data (VPD) for the ibmvnic driver

2017-11-13 Thread Nathan Fontenot
On 11/10/2017 01:13 PM, Desnes Augusto Nunes do Rosário wrote:
> 
> 
> On 11/10/2017 12:54 PM, Nathan Fontenot wrote:
>> On 11/10/2017 08:41 AM, Desnes Augusto Nunes do Rosário wrote:
>>>
>>>
>>> On 11/09/2017 06:31 PM, Nathan Fontenot wrote:
>>>> On 11/09/2017 01:00 PM, Desnes Augusto Nunes do Rosario wrote:
>>>>> This patch implements and enables VDP support for the ibmvnic driver.
>>>>> Moreover, it includes the implementation of suitable structs, signal
>>>>>    transmission/handling and functions which allows the retrival of 
>>>>> firmware
>>>>>    information from the ibmvnic card through the ethtool command.
>>>>>
>>>>> Signed-off-by: Desnes A. Nunes do Rosario <desn...@linux.vnet.ibm.com>
>>>>> Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
>>>>> ---
>>>>>    drivers/net/ethernet/ibm/ibmvnic.c | 149 
>>>>> -
>>>>>    drivers/net/ethernet/ibm/ibmvnic.h |  27 ++-
>>>>>    2 files changed, 173 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
>>>>> b/drivers/net/ethernet/ibm/ibmvnic.c
>>>>> index d0cff28..693b502 100644
>>>>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>>>>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>>>>> @@ -573,6 +573,15 @@ static int reset_tx_pools(struct ibmvnic_adapter 
>>>>> *adapter)
>>>>>    return 0;
>>>>>    }
>>>>>
>>>>> +static void release_vpd_data(struct ibmvnic_adapter *adapter)
>>>>> +{
>>>>> +    if (!adapter->vpd)
>>>>> +    return;
>>>>> +
>>>>> +    kfree(adapter->vpd->buff);
>>>>> +    kfree(adapter->vpd);
>>>>> +}
>>>>> +
>>>>>    static void release_tx_pools(struct ibmvnic_adapter *adapter)
>>>>>    {
>>>>>    struct ibmvnic_tx_pool *tx_pool;
>>>>> @@ -753,6 +762,8 @@ static void release_resources(struct ibmvnic_adapter 
>>>>> *adapter)
>>>>>    {
>>>>>    int i;
>>>>>
>>>>> +    release_vpd_data(adapter);
>>>>> +
>>>>>    release_tx_pools(adapter);
>>>>>    release_rx_pools(adapter);
>>>>>
>>>>> @@ -833,6 +844,53 @@ static int set_real_num_queues(struct net_device 
>>>>> *netdev)
>>>>>    return rc;
>>>>>    }
>>>>>
>>>>> +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
>>>>> +{
>>>>> +    struct device *dev = >vdev->dev;
>>>>> +    union ibmvnic_crq crq;
>>>>> +    dma_addr_t dma_addr;
>>>>> +    int len;
>>>>> +
>>>>> +    if (adapter->vpd->buff)
>>>>> +    len = adapter->vpd->len;
>>>>> +
>>>>> +    reinit_completion(>fw_done);
>>>>> +    crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
>>>>> +    crq.get_vpd_size.cmd = GET_VPD_SIZE;
>>>>> +    ibmvnic_send_crq(adapter, );
>>>>> +    wait_for_completion(>fw_done);
>>>>> +
>>>>
>>>> Shouldn't there be a check for the return code when getting the
>>>> vpd size?
>>>
>>> Hello Nathan,
>>>
>>> This check is already being performed on the handle_vpd_size_rsp() function 
>>> down below.
>>>
>>> In short, a GET_VPD_SIZE signal is sent here through a ibmvnic_crq union in 
>>> ibmvnic_send_crq(), whereas handle_query_ip_offload_rsp() receives from the 
>>> VNIC adapter a GET_VPD_SIZE_RSP containing a ibmvnic_crq union with the vpd 
>>> size information and the rc.code. If successful, a >fw_done is 
>>> sent and this part of the code continues; however if not, a dev_error() is 
>>> thrown. Same logic applies to GET_VPD/GET_VPD_RSP.
>>>
>>
>> Yes, I did see that code. You do a complet of the completion variable for 
>> both success and failure,
>> this then lets this routine continue irregardless of the results of the get 
>> vpd size request. The
>> call to dev_err will print the error message but does not prevent use from 
>> bailing if the
>> get vpd size fails. Perhaps setting vpd->len to -1 to indi

Re: [PATCH v2 3/8] powerpc/mm: Separate ibm, dynamic-memory data from DT format

2017-11-13 Thread Nathan Fontenot
On 11/12/2017 06:43 AM, Michael Ellerman wrote:
> Hi Nathan,
> 
> Nathan Fontenot <nf...@linux.vnet.ibm.com> writes:
>> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
>> index f83056297441..917184c13890 100644
>> --- a/arch/powerpc/kernel/prom.c
>> +++ b/arch/powerpc/kernel/prom.c
>> @@ -454,92 +455,93 @@ static int __init 
>> early_init_dt_scan_chosen_ppc(unsigned long node,
> ...
>>  
>>  static int __init early_init_dt_scan_memory_ppc(unsigned long node,
>>  const char *uname,
>>  int depth, void *data)
>>  {
>> +int rc;
>> +
>>  if (depth == 1 &&
>> -strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
>> -return early_init_dt_scan_drconf_memory(node);
>> +strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
>> +rc = init_drmem_lmbs(node);
>> +if (!rc)
>> +early_init_dt_scan_drmem_lmbs(node);
>> +
>> +return rc;
>> +}
>>  
>>  return early_init_dt_scan_memory(node, uname, depth, data);
>>  }
> 
> There's one bug in here which is that you return rc as returned by
> init_drmem_lmbs(). Returning non-zero from these scan routines
> terminates the scan, which means if anything goes wrong in
> init_drmem_lmbs() we may not call early_init_dt_scan_memory()
> in which case we won't have any memory at all.
> 

I didn't know this would stop scanning the device tree, thanks for letting me 
know.

> I say "may not" because it depends on the order of the nodes in the
> device tree whether you hit the memory nodes or the dynamic reconfig mem
> info first. And the order of the nodes in the device tree is arbitrary
> so we can't rely on that.
> 
> 
>> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
>> new file mode 100644
>> index ..8ad7cf36b2c4
>> --- /dev/null
>> +++ b/arch/powerpc/mm/drmem.c
>> @@ -0,0 +1,84 @@
> ...
>> +
>> +int __init init_drmem_lmbs(unsigned long node)
>> +{
>> +struct drmem_lmb *lmb;
>> +const __be32 *prop;
>> +int prop_sz;
>> +u32 len;
>> +
>> +prop = of_get_flat_dt_prop(node, "ibm,lmb-size", );
>> +if (!prop || len < dt_root_size_cells * sizeof(__be32))
>> +return -1;
>> +
>> +drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, );
>> +
>> +prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", );
>> +if (!prop || len < dt_root_size_cells * sizeof(__be32))
>> +return -1;
>> +
>> +drmem_info->n_lmbs = of_read_number(prop++, 1);
>> +prop_sz = drmem_info->n_lmbs * sizeof(struct of_drconf_cell)
>> +  + sizeof(__be32);
>> +if (prop_sz < len)
>> +return -1;
>> +
>> +drmem_info->lmbs = alloc_bootmem(drmem_info->n_lmbs * sizeof(*lmb));
>> +if (!drmem_info->lmbs)
>> +return -1;
> 
> The bigger problem we have though is that you're trying to allocate
> memory, in order to find out what memory we have :)
> 
> I suspect it works in some cases because you hit the memory@0 node first
> in the device tree, and add that memory to memblock, which means
> init_drmem_lmbs() *can* allocate memory, and everything's good.
> 
> But if we hit init_drmem_lmbs() first, or there's not enough space in
> memory@0, then allocating memory in order to discover memory is not
> going to work.
> 
> I'm not sure what the best solution is. One option would be to
> statically allocate some space, so that we can discover some of the LMBs
> without doing an allocation. But we wouldn't be able to guarantee that
> we had enough space i nthat static allocation, so the code would need to
> handle doing that and then potentially finding more LMBs later using a
> dynamic alloc. So that could be a bit messy.
> 
> The other option would be for the early_init_dt_scan_drmem_lmbs() code
> to still work on the device tree directly, rather than using the
> drmem_info array. That would make for uglier code, but may be necessary.
> 

I have been thinking about my initial approach, and the more I look at it
the more I do not like trying to do the bootmem allocation. As you mention
there is just too much that could go wrong with that.

I have started looking at a design where an interface similar to
walk_memory_range() is used for the prom and numa code so we do not have to rely
on making the allocation for the lmb array early in boot. The lmb array
could then be allocated in the late_initcall in drmem.c at which point
the general kernel allocator is available.

I'm still working on getting this coded up and when send out a new patch set
once it's ready unless anyone has objections to this approach.

-Nathan



Re: [PATCH] [net-next,v3] ibmvnic: Feature implementation of Vital Product Data (VPD) for the ibmvnic driver

2017-11-10 Thread Nathan Fontenot
On 11/10/2017 08:41 AM, Desnes Augusto Nunes do Rosário wrote:
> 
> 
> On 11/09/2017 06:31 PM, Nathan Fontenot wrote:
>> On 11/09/2017 01:00 PM, Desnes Augusto Nunes do Rosario wrote:
>>> This patch implements and enables VDP support for the ibmvnic driver.
>>> Moreover, it includes the implementation of suitable structs, signal
>>>   transmission/handling and functions which allows the retrival of firmware
>>>   information from the ibmvnic card through the ethtool command.
>>>
>>> Signed-off-by: Desnes A. Nunes do Rosario <desn...@linux.vnet.ibm.com>
>>> Signed-off-by: Thomas Falcon <tlfal...@linux.vnet.ibm.com>
>>> ---
>>>   drivers/net/ethernet/ibm/ibmvnic.c | 149 
>>> -
>>>   drivers/net/ethernet/ibm/ibmvnic.h |  27 ++-
>>>   2 files changed, 173 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
>>> b/drivers/net/ethernet/ibm/ibmvnic.c
>>> index d0cff28..693b502 100644
>>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>>> @@ -573,6 +573,15 @@ static int reset_tx_pools(struct ibmvnic_adapter 
>>> *adapter)
>>>   return 0;
>>>   }
>>>
>>> +static void release_vpd_data(struct ibmvnic_adapter *adapter)
>>> +{
>>> +    if (!adapter->vpd)
>>> +    return;
>>> +
>>> +    kfree(adapter->vpd->buff);
>>> +    kfree(adapter->vpd);
>>> +}
>>> +
>>>   static void release_tx_pools(struct ibmvnic_adapter *adapter)
>>>   {
>>>   struct ibmvnic_tx_pool *tx_pool;
>>> @@ -753,6 +762,8 @@ static void release_resources(struct ibmvnic_adapter 
>>> *adapter)
>>>   {
>>>   int i;
>>>
>>> +    release_vpd_data(adapter);
>>> +
>>>   release_tx_pools(adapter);
>>>   release_rx_pools(adapter);
>>>
>>> @@ -833,6 +844,53 @@ static int set_real_num_queues(struct net_device 
>>> *netdev)
>>>   return rc;
>>>   }
>>>
>>> +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
>>> +{
>>> +    struct device *dev = >vdev->dev;
>>> +    union ibmvnic_crq crq;
>>> +    dma_addr_t dma_addr;
>>> +    int len;
>>> +
>>> +    if (adapter->vpd->buff)
>>> +    len = adapter->vpd->len;
>>> +
>>> +    reinit_completion(>fw_done);
>>> +    crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
>>> +    crq.get_vpd_size.cmd = GET_VPD_SIZE;
>>> +    ibmvnic_send_crq(adapter, );
>>> +    wait_for_completion(>fw_done);
>>> +
>>
>> Shouldn't there be a check for the return code when getting the
>> vpd size?
> 
> Hello Nathan,
> 
> This check is already being performed on the handle_vpd_size_rsp() function 
> down below.
> 
> In short, a GET_VPD_SIZE signal is sent here through a ibmvnic_crq union in 
> ibmvnic_send_crq(), whereas handle_query_ip_offload_rsp() receives from the 
> VNIC adapter a GET_VPD_SIZE_RSP containing a ibmvnic_crq union with the vpd 
> size information and the rc.code. If successful, a >fw_done is sent 
> and this part of the code continues; however if not, a dev_error() is thrown. 
> Same logic applies to GET_VPD/GET_VPD_RSP.
> 

Yes, I did see that code. You do a complet of the completion variable for both 
success and failure,
this then lets this routine continue irregardless of the results of the get vpd 
size request. The
call to dev_err will print the error message but does not prevent use from 
bailing if the
get vpd size fails. Perhaps setting vpd->len to -1 to indicate the get vpd call 
failed which could
then be checked by the requester.

-Nathan


> What I am adding on the next version of the patch is a check if 
> adapter->vpd->len is different than 0 before allocating adapter->vpd->buff, 
> since that in a case of a failure, adapter->vpd->len will be 0.
> 
> Best Regards,
> 
>>
>>
>>> +    if (!adapter->vpd->buff)
>>> +    adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL);
>>> +    else if (adapter->vpd->len != len)
>>> +    adapter->vpd->buff =
>>> +    krealloc(adapter->vpd->buff,
>>> + adapter->vpd->len, GFP_KERNEL);
>>> +
>>> +    if (!adapter->vpd->buff) {
>>> +    dev_err(dev, "Could allocate VPD buffer\n");
>>> + 

Re: [PATCH] [net-next,v3] ibmvnic: Feature implementation of Vital Product Data (VPD) for the ibmvnic driver

2017-11-09 Thread Nathan Fontenot
On 11/09/2017 01:00 PM, Desnes Augusto Nunes do Rosario wrote:
> This patch implements and enables VDP support for the ibmvnic driver.
> Moreover, it includes the implementation of suitable structs, signal
>  transmission/handling and functions which allows the retrival of firmware
>  information from the ibmvnic card through the ethtool command.
> 
> Signed-off-by: Desnes A. Nunes do Rosario 
> Signed-off-by: Thomas Falcon 
> ---
>  drivers/net/ethernet/ibm/ibmvnic.c | 149 
> -
>  drivers/net/ethernet/ibm/ibmvnic.h |  27 ++-
>  2 files changed, 173 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index d0cff28..693b502 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -573,6 +573,15 @@ static int reset_tx_pools(struct ibmvnic_adapter 
> *adapter)
>   return 0;
>  }
> 
> +static void release_vpd_data(struct ibmvnic_adapter *adapter)
> +{
> + if (!adapter->vpd)
> + return;
> +
> + kfree(adapter->vpd->buff);
> + kfree(adapter->vpd);
> +}
> +
>  static void release_tx_pools(struct ibmvnic_adapter *adapter)
>  {
>   struct ibmvnic_tx_pool *tx_pool;
> @@ -753,6 +762,8 @@ static void release_resources(struct ibmvnic_adapter 
> *adapter)
>  {
>   int i;
> 
> + release_vpd_data(adapter);
> +
>   release_tx_pools(adapter);
>   release_rx_pools(adapter);
> 
> @@ -833,6 +844,53 @@ static int set_real_num_queues(struct net_device *netdev)
>   return rc;
>  }
> 
> +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
> +{
> + struct device *dev = >vdev->dev;
> + union ibmvnic_crq crq;
> + dma_addr_t dma_addr;
> + int len;
> +
> + if (adapter->vpd->buff)
> + len = adapter->vpd->len;
> +
> + reinit_completion(>fw_done);
> + crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
> + crq.get_vpd_size.cmd = GET_VPD_SIZE;
> + ibmvnic_send_crq(adapter, );
> + wait_for_completion(>fw_done);
> +

Shouldn't there be a check for the return code when getting the
vpd size?


> + if (!adapter->vpd->buff)
> + adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL);
> + else if (adapter->vpd->len != len)
> + adapter->vpd->buff =
> + krealloc(adapter->vpd->buff,
> +  adapter->vpd->len, GFP_KERNEL);
> +
> + if (!adapter->vpd->buff) {
> + dev_err(dev, "Could allocate VPD buffer\n");
> + return -ENOMEM;
> + }
> +
> + adapter->vpd->dma_addr =
> + dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len,
> +DMA_FROM_DEVICE);
> + if (dma_mapping_error(dev, dma_addr)) {
> + dev_err(dev, "Could not map VPD buffer\n");
> + return -ENOMEM;
> + }
> +
> + reinit_completion(>fw_done);
> + crq.get_vpd.first = IBMVNIC_CRQ_CMD;
> + crq.get_vpd.cmd = GET_VPD;
> + crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
> + crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
> + ibmvnic_send_crq(adapter, );
> + wait_for_completion(>fw_done);
> +
> + return 0;
> +}
> +
>  static int init_resources(struct ibmvnic_adapter *adapter)
>  {
>   struct net_device *netdev = adapter->netdev;
> @@ -850,6 +908,10 @@ static int init_resources(struct ibmvnic_adapter 
> *adapter)
>   if (rc)
>   return rc;
> 
> + adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
> + if (!adapter->vpd)
> + return -ENOMEM;
> +
>   adapter->map_id = 1;
>   adapter->napi = kcalloc(adapter->req_rx_queues,
>   sizeof(struct napi_struct), GFP_KERNEL);
> @@ -950,6 +1012,10 @@ static int ibmvnic_open(struct net_device *netdev)
> 
>   rc = __ibmvnic_open(netdev);
>   netif_carrier_on(netdev);
> +
> + /* Vital Product Data (VPD) */
> + ibmvnic_get_vpd(adapter);
> +
>   mutex_unlock(>reset_lock);
> 
>   return rc;
> @@ -1878,11 +1944,15 @@ static int ibmvnic_get_link_ksettings(struct 
> net_device *netdev,
>   return 0;
>  }
> 
> -static void ibmvnic_get_drvinfo(struct net_device *dev,
> +static void ibmvnic_get_drvinfo(struct net_device *netdev,
>   struct ethtool_drvinfo *info)
>  {
> + struct ibmvnic_adapter *adapter = netdev_priv(netdev);
> +
>   strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver));
>   strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version));
> + strlcpy(info->fw_version, adapter->fw_version,
> + sizeof(info->fw_version));
>  }
> 
>  static u32 ibmvnic_get_msglevel(struct net_device *netdev)
> @@ -3076,6 +3146,77 @@ static void send_cap_queries(struct ibmvnic_adapter 
> *adapter)
>   ibmvnic_send_crq(adapter, );
>  }
> 
> +static void 

Re: [PATCH v2 3/8] powerpc/mm: Separate ibm, dynamic-memory data from DT format

2017-10-24 Thread Nathan Fontenot
On 10/24/2017 01:08 AM, Michael Ellerman wrote:
> Nathan Fontenot <nf...@linux.vnet.ibm.com> writes:
> 
>> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
>> new file mode 100644
>> index ..8ad7cf36b2c4
>> --- /dev/null
>> +++ b/arch/powerpc/mm/drmem.c
>> @@ -0,0 +1,84 @@
>> +/*
>> + * Dynamic reconfiguration memory support
>> + *
>> + * Copyright 2017 IBM Corporation
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * as published by the Free Software Foundation; either version
>> + * 2 of the License, or (at your option) any later version.
>> + */
>> +
>> +#define pr_fmt(fmt) "drmem: " fmt
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +static struct drmem_lmb_info __drmem_info;
>> +struct drmem_lmb_info *drmem_info = &__drmem_info;
>> +
>> +int __init init_drmem_lmbs(unsigned long node)
>> +{
> 
> Something in here is blowing up for me.
> 
> This is a p8 LPAR, which uses petitboot so we kexec into the kernel,
> gives me:
> 
>   [   29.020618] kexec_core: Starting new kernel
>   [0.00] bootconsole [udbg0] enabled
>-> early_setup(), dt_ptr: 0x1ec6
>   [0.00] bootmem alloc of 3024 bytes failed!
>   [0.00] Kernel panic - not syncing: Out of memory
>   [0.00] CPU: 0 PID: 0 Comm: swapper Not tainted 
> 4.14.0-rc2-gcc6-gf7212f8 #1
>   [0.00] Call Trace:
>   [0.00] [c0f57b80] [c0a2bae0] dump_stack+0xb0/0xf0 
> (unreliable)
>   [0.00] [c0f57bc0] [c00fcdb0] panic+0x148/0x338
>   [0.00] [c0f57c60] [c0d4bb08] 
> ___alloc_bootmem.part.1+0x3c/0x40
>   [0.00] [c0f57cc0] [c0d4bf18] 
> __alloc_bootmem+0x3c/0x50
>   [0.00] [c0f57cf0] [c0d226c4] 
> init_drmem_lmbs+0xf8/0x31c
>   [0.00] [c0f57d70] [c0d1a05c] 
> early_init_dt_scan_memory_ppc+0x88/0x25c
>   [0.00] [c0f57e10] [00d78e78] 0xd78e78
>   [0.00] [c0f57e70] [00d1a92c] 0xd1a92c
>   [0.00] [c0f57f10] [00d1be3c] 0xd1be3c
>   [0.00] [c0f57f90] [b13c] 0xb13c
>   [0.00] Rebooting in 180 seconds..
>   [0.00] System Halted, OK to turn off power
> 
> 

Looks like we're out of memory when trying to allocate the LMB array, not sure 
why.
I'll try to re-create this  on one of my systems.

I found a patch where you tried to remove traces of bootmem from a couple of
yearts ago. Not sure if it make a difference but should I be using
memblock_virt_alloc() instead of alloc_bootmem()?

-Nathan

> I'm at kernel summit so haven't had time to look any further sorry.
> 
> cheers
> 



[PATCH v2 8/8] powerpc: Enable support of ibm,dynamic-memory-v2

2017-10-20 Thread Nathan Fontenot
Add required bits to the architecture vector to enable support
of the ibm,dynamic-memory-v2 device tree property.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/firmware.h   |3 ++-
 arch/powerpc/include/asm/prom.h   |1 +
 arch/powerpc/kernel/prom_init.c   |1 +
 arch/powerpc/platforms/pseries/firmware.c |1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/firmware.h 
b/arch/powerpc/include/asm/firmware.h
index 8645897472b1..832df61f30ef 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,6 +51,7 @@
 #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x8000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0001)
 #define FW_FEATURE_PRRNASM_CONST(0x0002)
+#define FW_FEATURE_DRMEM_V2ASM_CONST(0x0004)
 
 #ifndef __ASSEMBLY__
 
@@ -67,7 +68,7 @@ enum {
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
-   FW_FEATURE_HPT_RESIZE,
+   FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index f0a30a003bd8..9f27866e3126 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -143,6 +143,7 @@ extern int of_get_ibm_chip_id(struct device_node *np);
 #define OV5_PFO_HW_842 0x1140  /* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR0x1120  /* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS 0x1501  /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2   0x1680  /* ibm,dynamic-reconfiguration-v2 */
 #define OV5_XIVE_SUPPORT   0x17C0  /* XIVE Exploitation Support Mask */
 #define OV5_XIVE_LEGACY0x1700  /* XIVE legacy mode Only */
 #define OV5_XIVE_EXPLOIT   0x1740  /* XIVE exploitation mode Only */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 02190e90c7ae..acf4b2e0530c 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -869,6 +869,7 @@ struct ibm_arch_vec __cacheline_aligned 
ibm_architecture_vec = {
.reserved2 = 0,
.reserved3 = 0,
.subprocessors = 1,
+   .byte22 = OV5_FEAT(OV5_DRMEM_V2),
.intarch = 0,
.mmu = 0,
.hash_ext = 0,
diff --git a/arch/powerpc/platforms/pseries/firmware.c 
b/arch/powerpc/platforms/pseries/firmware.c
index 63cc82ad58ac..aac3ea2911b2 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -114,6 +114,7 @@ static __initdata struct vec5_fw_feature
 vec5_fw_features_table[] = {
{FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
{FW_FEATURE_PRRN,   OV5_PRRN},
+   {FW_FEATURE_DRMEM_V2,   OV5_DRMEM_V2},
 };
 
 static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)



[PATCH v2 7/8] powerpc/pseries: Add support for ibm, dynamic-memory-v2 property

2017-10-20 Thread Nathan Fontenot
The Power Hypervisor has introduced a new device tree format for
the property describing the dynamic reconfiguration LMBs for a system.
This new format condenses the size of the property, especially
on large memory systems.

Instead of the current format that contains an entry for every
possible LMB, the new format contains an entry for every range
of LMBs that possess the same flags and associativity index.

This patch updates the powerpc/mm/drmem.c code to parse the new
property format at boot and create the LMB array. This also updates
the device tree updating routine to build a device tree property
in this format.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/drmem.h |   12 ++
 arch/powerpc/mm/drmem.c  |  188 ++
 2 files changed, 182 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 32d859c84202..b7becafc528d 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -49,6 +49,18 @@ struct of_drconf_cell_v1 {
u32 flags;
 };
 
+/* Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+   u32 seq_lmbs;
+   u64 base_addr;
+   u32 drc_index;
+   u32 aa_index;
+   u32 flags;
+} __attribute__((packed));
+
 #define DRCONF_MEM_ASSIGNED0x0008
 #define DRCONF_MEM_AI_INVALID  0x0040
 #define DRCONF_MEM_RESERVED0x0080
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 9cd9e680874e..b5b8b8f46292 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -21,25 +21,13 @@
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
 
-int __init init_drmem_lmbs(unsigned long node)
+static int __init init_drmem_lmbs_v1(const __be32 *prop, u32 len)
 {
struct drmem_lmb *lmb;
-   const __be32 *prop;
-   int prop_sz;
-   u32 len;
-
-   prop = of_get_flat_dt_prop(node, "ibm,lmb-size", );
-   if (!prop || len < dt_root_size_cells * sizeof(__be32))
-   return -1;
-
-   drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, );
-
-   prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", );
-   if (!prop || len < dt_root_size_cells * sizeof(__be32))
-   return -1;
+   u32 prop_sz;
 
drmem_info->n_lmbs = of_read_number(prop++, 1);
-   prop_sz = drmem_info->n_lmbs * sizeof(struct of_drconf_cell)
+   prop_sz = drmem_info->n_lmbs * sizeof(struct of_drconf_cell_v1)
  + sizeof(__be32);
if (prop_sz < len)
return -1;
@@ -61,6 +49,89 @@ int __init init_drmem_lmbs(unsigned long node)
return 0;
 }
 
+static void read_one_drconf_v2_cell(const __be32 **cell,
+   struct of_drconf_cell_v2 *dr_cell)
+{
+   const __be32 *p = *cell;
+
+   dr_cell->seq_lmbs = of_read_number(p++, 1);
+   dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, );
+   dr_cell->drc_index = of_read_number(p++, 1);
+   dr_cell->aa_index = of_read_number(p++, 1);
+   dr_cell->flags = of_read_number(p++, 1);
+
+   *cell = p;
+}
+
+static int __init init_drmem_lmbs_v2(const __be32 *prop, u32 len)
+{
+   struct drmem_lmb *lmb;
+   struct of_drconf_cell_v2 dr_cell;
+   const __be32 *p;
+   u32 lmb_sets, prop_sz;
+   int i, j, lmb_index;
+
+   lmb_sets = of_read_number(prop++, 1);
+   prop_sz = lmb_sets * sizeof(struct of_drconf_cell_v2)
+ + sizeof(__be32);
+   if (prop_sz < len)
+   return -1;
+
+   /* first pass, calculate the number of LMBs */
+   p = prop;
+   for (i = 0; i < lmb_sets; i++) {
+   read_one_drconf_v2_cell(, _cell);
+   drmem_info->n_lmbs += dr_cell.seq_lmbs;
+   }
+
+   drmem_info->lmbs = alloc_bootmem(drmem_info->n_lmbs * sizeof(*lmb));
+   if (!drmem_info->lmbs)
+   return -1;
+
+   lmb_index = 0;
+   p = prop;
+   for (i = 0; i < lmb_sets; i++) {
+   read_one_drconf_v2_cell(, _cell);
+
+   for (j = 0; j < dr_cell.seq_lmbs; j++) {
+   lmb = _info->lmbs[lmb_index++];
+
+   lmb->base_addr = dr_cell.base_addr;
+   dr_cell.base_addr += drmem_info->lmb_size;
+
+   lmb->drc_index = dr_cell.drc_index;
+   dr_cell.drc_index++;
+
+   lmb->aa_index = dr_cell.aa_index;
+   lmb->flags = dr_cell.flags;
+   }
+   }
+
+   return 0;
+}
+
+int __init init_drmem_lmbs(unsigned long no

[PATCH v2 6/8] powerpc: Move of_drconf_cell struct to asm/drmem.h

2017-10-20 Thread Nathan Fontenot
Now that the powerpc code parses dynamic reconfiguration memory
LMB information from the LMB array and not the device tree
directly we can move the of_drconf_cell struct to drmem.h where
it fits better.

In addition, the struct is renamed to of_drconf_cell_v1 in
anticipation of upcoming support for version 2 of the dynamic
reconfiguration property.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/drmem.h|   18 ++
 arch/powerpc/include/asm/prom.h |   16 
 arch/powerpc/mm/drmem.c |4 ++--
 arch/powerpc/platforms/pseries/hotplug-memory.c |6 +++---
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index ccd8e1aa0cec..32d859c84202 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -35,6 +35,24 @@ extern struct drmem_lmb_info *drmem_info;
_info->lmbs[0],   \
_info->lmbs[drmem_info->n_lmbs - 1])
 
+/* The of_drconf_cell struct defines the layout of the LMB array
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+   u64 base_addr;
+   u32 drc_index;
+   u32 reserved;
+   u32 aa_index;
+   u32 flags;
+};
+
+#define DRCONF_MEM_ASSIGNED0x0008
+#define DRCONF_MEM_AI_INVALID  0x0040
+#define DRCONF_MEM_RESERVED0x0080
+
 static inline u32 drmem_lmb_size(void)
 {
return drmem_info->lmb_size;
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 825bd5998701..f0a30a003bd8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -80,22 +80,6 @@ extern void of_instantiate_rtc(void);
 
 extern int of_get_ibm_chip_id(struct device_node *np);
 
-/* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
- */
-struct of_drconf_cell {
-   u64 base_addr;
-   u32 drc_index;
-   u32 reserved;
-   u32 aa_index;
-   u32 flags;
-};
-
-#define DRCONF_MEM_ASSIGNED0x0008
-#define DRCONF_MEM_AI_INVALID  0x0040
-#define DRCONF_MEM_RESERVED0x0080
-
 /*
  * There are two methods for telling firmware what our capabilities are.
  * Newer machines have an "ibm,client-architecture-support" method on the
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 5aaee23b315c..9cd9e680874e 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -100,7 +100,7 @@ static int drmem_update_dt_v1(struct device_node *memory,
  struct property *prop)
 {
struct property *new_prop;
-   struct of_drconf_cell *dr_cell;
+   struct of_drconf_cell_v1 *dr_cell;
struct drmem_lmb *lmb;
u32 *p;
 
@@ -111,7 +111,7 @@ static int drmem_update_dt_v1(struct device_node *memory,
p = new_prop->value;
*p++ = cpu_to_be32(drmem_info->n_lmbs);
 
-   dr_cell = (struct of_drconf_cell *)p;
+   dr_cell = (struct of_drconf_cell_v1 *)p;
 
for_each_drmem_lmb(lmb) {
dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 2043bc2b77b3..c1578f54c626 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -996,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np)
 
 static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
 {
-   struct of_drconf_cell *new_drmem, *old_drmem;
+   struct of_drconf_cell_v1 *new_drmem, *old_drmem;
unsigned long memblock_size;
u32 entries;
__be32 *p;
@@ -1019,11 +1019,11 @@ static int pseries_update_drconf_memory(struct 
of_reconfig_data *pr)
 * of_drconf_cell's.
 */
entries = be32_to_cpu(*p++);
-   old_drmem = (struct of_drconf_cell *)p;
+   old_drmem = (struct of_drconf_cell_v1 *)p;
 
p = (__be32 *)pr->prop->value;
p++;
-   new_drmem = (struct of_drconf_cell *)p;
+   new_drmem = (struct of_drconf_cell_v1 *)p;
 
for (i = 0; i < entries; i++) {
if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&



[PATCH v2 5/8] powerpc/pseries: Update memory hotplug code to use drmem LMB array

2017-10-20 Thread Nathan Fontenot
Update the pseries memory hotplug code to use the newly added
dynamic reconfiguration LMB array. Doing this is required for the
upcoming support of version 2 of the dynamic reconfiguration
device tree property.

In addition, making this change cleans up the code that parses the
LMB information as we no longer need to worry about device tree
format. This allows us to discard one of the first steps on memory
hotplug where we make a working copy of the device tree property and
convert the entire property to cpu format. Instead we just use the
LMB array directly while holding the memory hotplug lock.

This patch also moves the updating of the device tree property to
powerpc/mm/drmem.c. This allows to the hotplug code to work without
needing to know the device tree format and provides a single
routine for updating the device tree property. This new routine
will handle determination of the proper device tree format and
generate a properly formatted device tree property.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Updates for V2: Correct build issues with uninitialized variables
---
 arch/powerpc/include/asm/drmem.h|   18 +
 arch/powerpc/mm/drmem.c |   81 
 arch/powerpc/platforms/pseries/hotplug-memory.c |  516 +--
 3 files changed, 297 insertions(+), 318 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 912712ecf6c6..ccd8e1aa0cec 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -40,7 +40,25 @@ static inline u32 drmem_lmb_size(void)
return drmem_info->lmb_size;
 }
 
+#define DRMEM_LMB_RESERVED 0x8000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+   lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+   lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+   return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
 extern int __init init_drmem_lmbs(unsigned long node);
 extern u64 drmem_lmb_memory_max(void);
+extern int drmem_update_dt(void);
 
 #endif
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 96e453e1fdd7..5aaee23b315c 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -69,6 +69,87 @@ u64 drmem_lmb_memory_max(void)
return last_lmb->base_addr + drmem_lmb_size();
 }
 
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+   return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+   struct property *new_prop;
+
+   new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+   if (!new_prop)
+   return NULL;
+
+   new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+   new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+   if (!new_prop->name || !new_prop->value) {
+   kfree(new_prop->name);
+   kfree(new_prop->value);
+   kfree(new_prop);
+   return NULL;
+   }
+
+   new_prop->length = prop_sz;
+   of_property_set_flag(new_prop, OF_DYNAMIC);
+   return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+ struct property *prop)
+{
+   struct property *new_prop;
+   struct of_drconf_cell *dr_cell;
+   struct drmem_lmb *lmb;
+   u32 *p;
+
+   new_prop = clone_property(prop, prop->length);
+   if (!new_prop)
+   return -1;
+
+   p = new_prop->value;
+   *p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+   dr_cell = (struct of_drconf_cell *)p;
+
+   for_each_drmem_lmb(lmb) {
+   dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+   dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+   dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+
+   /* Do not copy out the bit we use internally to mark
+* an lmb as reserved during hortplug processing.
+*/
+   dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+   dr_cell++;
+   }
+
+   of_update_property(memory, new_prop);
+   return 0;
+}
+
+int drmem_update_dt(void)
+{
+   struct device_node *memory;
+   struct property *prop;
+   int rc;
+
+   memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+   if (!memory)
+   return -1;
+
+   prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+   if (prop)
+   rc = drmem_update_dt_v1(memory, prop);
+   else
+   rc = -1;
+
+   of_node_put(memory);
+   return rc;
+}
+
 static int __init drmem_init(void)
 {
struct drmem_lmb *lmbs;
diff --git a/arch/powerpc/platforms/pseri

[PATCH v2 4/8] powerpc/numa: Update numa code use drmem LMB array

2017-10-20 Thread Nathan Fontenot
Update code in powerpc/numa.c to use the array of dynamic
reconfiguration memory LMBs instead of parsing the device tree
property directly. This allows for the removal of several
helper routines used to read dynamic reconfiguration memory
device tree property information and eases the gathering of
LMB information.

This patch also prepares the numa code for support of the
version 2 dynamic reconfiguration memory property.

Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
---

Updates for V2: Removed unused device node paramter to
numa_setup_drmem_lmbs() and hot_add_drconf_scn_to_nid().
---
 arch/powerpc/include/asm/drmem.h |1 
 arch/powerpc/mm/drmem.c  |9 ++
 arch/powerpc/mm/numa.c   |  158 --
 3 files changed, 42 insertions(+), 126 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index cafe1a3b7da6..912712ecf6c6 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -41,5 +41,6 @@ static inline u32 drmem_lmb_size(void)
 }
 
 extern int __init init_drmem_lmbs(unsigned long node);
+extern u64 drmem_lmb_memory_max(void);
 
 #endif
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 8ad7cf36b2c4..96e453e1fdd7 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -61,6 +61,14 @@ int __init init_drmem_lmbs(unsigned long node)
return 0;
 }
 
+u64 drmem_lmb_memory_max(void)
+{
+   struct drmem_lmb *last_lmb;
+
+   last_lmb = _info->lmbs[drmem_info->n_lmbs - 1];
+   return last_lmb->base_addr + drmem_lmb_size();
+}
+
 static int __init drmem_init(void)
 {
struct drmem_lmb *lmbs;
@@ -81,4 +89,3 @@ static int __init drmem_init(void)
 }
 
 late_initcall(drmem_init);
-
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a9aa353d41cd..6cddc7e73b21 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static int numa_enabled = 1;
 
@@ -395,69 +396,6 @@ static unsigned long read_n_cells(int n, const __be32 
**buf)
return result;
 }
 
-/*
- * Read the next memblock list entry from the ibm,dynamic-memory property
- * and return the information in the provided of_drconf_cell structure.
- */
-static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 
**cellp)
-{
-   const __be32 *cp;
-
-   drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
-
-   cp = *cellp;
-   drmem->drc_index = of_read_number(cp, 1);
-   drmem->reserved = of_read_number([1], 1);
-   drmem->aa_index = of_read_number([2], 1);
-   drmem->flags = of_read_number([3], 1);
-
-   *cellp = cp + 4;
-}
-
-/*
- * Retrieve and validate the ibm,dynamic-memory property of the device tree.
- *
- * The layout of the ibm,dynamic-memory property is a number N of memblock
- * list entries followed by N memblock list entries.  Each memblock list entry
- * contains information as laid out in the of_drconf_cell struct above.
- */
-static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
-{
-   const __be32 *prop;
-   u32 len, entries;
-
-   prop = of_get_property(memory, "ibm,dynamic-memory", );
-   if (!prop || len < sizeof(unsigned int))
-   return 0;
-
-   entries = of_read_number(prop++, 1);
-
-   /* Now that we know the number of entries, revalidate the size
-* of the property read in to ensure we have everything
-*/
-   if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
-   return 0;
-
-   *dm = prop;
-   return entries;
-}
-
-/*
- * Retrieve and validate the ibm,lmb-size property for drconf memory
- * from the device tree.
- */
-static u64 of_get_lmb_size(struct device_node *memory)
-{
-   const __be32 *prop;
-   u32 len;
-
-   prop = of_get_property(memory, "ibm,lmb-size", );
-   if (!prop || len < sizeof(unsigned int))
-   return 0;
-
-   return read_n_cells(n_mem_size_cells, );
-}
-
 struct assoc_arrays {
u32 n_arrays;
u32 array_sz;
@@ -509,7 +447,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
  * This is like of_node_to_nid_single() for memory represented in the
  * ibm,dynamic-reconfiguration-memory node.
  */
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
+static int of_drconf_to_nid_single(struct drmem_lmb *lmb,
   struct assoc_arrays *aa)
 {
int default_nid = 0;
@@ -517,16 +455,16 @@ static int of_drconf_to_nid_single(struct of_drconf_cell 
*drmem,
int index;
 
if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
-   !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
-   drmem->aa_index < aa->n_arrays) {
-   ind

  1   2   3   4   5   6   7   8   >