commit powerpc-utils for openSUSE:Factory

Source-Sync Sat, 10 Apr 2021 06:27:51 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package powerpc-utils for openSUSE:Factory 
checked in at 2021-04-10 15:26:41
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/powerpc-utils (Old)
 and      /work/SRC/openSUSE:Factory/.powerpc-utils.new.2401 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "powerpc-utils"

Sat Apr 10 15:26:41 2021 rev:111 rq:883877 version:1.3.8

Changes:
--------
--- /work/SRC/openSUSE:Factory/powerpc-utils/powerpc-utils.changes      
2021-04-01 14:16:31.663913372 +0200
+++ /work/SRC/openSUSE:Factory/.powerpc-utils.new.2401/powerpc-utils.changes    
2021-04-10 15:27:31.626389028 +0200
@@ -1,0 +2,15 @@
+Thu Apr  8 16:48:27 UTC 2021 - Michal Suchanek <[email protected]>
+
+- Take into account NUMA topology when removing memory (bsc#1183958 
ltc#192149).
+   + 0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
+   + 0002-drmgr-read-the-CPU-NUMA-topology.patch
+   + 0003-drmgr-introduce-NUMA-based-LMB-removal.patch
+
+-------------------------------------------------------------------
+Tue Apr  6 14:54:35 UTC 2021 - Michal Suchanek <[email protected]>
+
+- The Install section of hcn-init should also refer to NetworkManager
+  (bsc#1184136 ltc#192155).
+   * Refresh powerpc-utils-hcn-init-NM.patch
+
+-------------------------------------------------------------------

New:
----
  0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
  0002-drmgr-read-the-CPU-NUMA-topology.patch
  0003-drmgr-introduce-NUMA-based-LMB-removal.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ powerpc-utils.spec ++++++
--- /var/tmp/diff_new_pack.8R7iey/_old  2021-04-10 15:27:32.166389663 +0200
+++ /var/tmp/diff_new_pack.8R7iey/_new  2021-04-10 15:27:32.166389663 +0200
@@ -41,8 +41,12 @@
 Patch15:        ofpathname-Use-NVMe-controller-physical-nsid.patch
 Patch16:        powerpc-utils-Fix-ofpathname-race-with-udev-rename.patch
 Patch17:        powerpc-utils-Update-ppc64-cpu-usage.patch
+Patch18:        0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
+Patch19:        0002-drmgr-read-the-CPU-NUMA-topology.patch
+Patch20:        0003-drmgr-introduce-NUMA-based-LMB-removal.patch
 BuildRequires:  autoconf
 BuildRequires:  automake
+BuildRequires:  libnuma-devel
 BuildRequires:  librtas-devel
 BuildRequires:  pkgconfig
 BuildRequires:  systemd-rpm-macros

++++++ 0001-drmgr-don-t-open-sysfs-file-for-each-command.patch ++++++
>From 8b03106e699de1f19b8842ebbb5a91b9a6e5fa6d Mon Sep 17 00:00:00 2001
From: Laurent Dufour <[email protected]>
Date: Tue, 24 Nov 2020 19:28:48 +0100
Subject: [PATCH 1/4] drmgr: don't open sysfs file for each command

The new do_kernel_dlpar_common() API will be used in later commit to remove
by DRC Index LMB per LMB. This will avoiding opennig and closing the fd
each time.

The fd closing will now be done at the process exit time.

In addition add an optinal parameter to silently ignore some error.

Also, change the log level of the "success" message to debug to match
the previous one saying "Trying.."

Signed-off-by: Laurent Dufour <[email protected]>
---
 src/drmgr/common.c | 30 +++++++++++++++++-------------
 src/drmgr/dr.h     |  6 +++++-
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/drmgr/common.c b/src/drmgr/common.c
index 5e8135bcf77e..341777250feb 100644
--- a/src/drmgr/common.c
+++ b/src/drmgr/common.c
@@ -1462,39 +1462,43 @@ int kernel_dlpar_exists(void)
 }
 
 /**
- * do_kernel_dlpar
+ * do_kernel_dlpar_common
  * @brief Use the in-kernel dlpar capabilities to perform the requested
  *        dlpar operation.
  *
  * @param cmd command string to write to sysfs
+ * @silent_error if not 0, error is not reported, it's up to the caller
  * @returns 0 on success, !0 otherwise
  */
-int do_kernel_dlpar(const char *cmd, int cmdlen)
+int do_kernel_dlpar_common(const char *cmd, int cmdlen, int silent_error)
 {
-       int fd, rc;
-       int my_errno;
+       static int fd = -1;
+       int rc;
 
        say(DEBUG, "Initiating kernel DLPAR \"%s\"\n", cmd);
 
        /* write to file */
-       fd = open(SYSFS_DLPAR_FILE, O_WRONLY);
-       if (fd <= 0) {
-               say(ERROR, "Could not open %s to initiate DLPAR request\n",
-                   SYSFS_DLPAR_FILE);
-               return -1;
+       if (fd == -1) {
+               fd = open(SYSFS_DLPAR_FILE, O_WRONLY);
+               if (fd < 0) {
+                       say(ERROR,
+                           "Could not open %s to initiate DLPAR request\n",
+                           SYSFS_DLPAR_FILE);
+                       return -1;
+               }
        }
 
        rc = write(fd, cmd, cmdlen);
-       my_errno = errno;
-       close(fd);
        if (rc <= 0) {
+               if (silent_error)
+                       return (errno == 0) ? -1 : -errno;
                /* write does not set errno for rc == 0 */
                say(ERROR, "Failed to write to %s: %s\n", SYSFS_DLPAR_FILE,
-                   (rc == 0) ? "wrote 0 bytes" : strerror(my_errno));
+                   (rc == 0) ? "wrote 0 bytes" : strerror(errno));
                return -1;
        }
 
-       say(INFO, "Success\n");
+       say(DEBUG, "Success\n");
        return 0;
 }
 
diff --git a/src/drmgr/dr.h b/src/drmgr/dr.h
index f171bfea73c3..ffbcfdb15cc0 100644
--- a/src/drmgr/dr.h
+++ b/src/drmgr/dr.h
@@ -172,5 +172,9 @@ enum drc_type to_drc_type(const char *);
 int handle_prrn(void);
 
 int kernel_dlpar_exists(void);
-int do_kernel_dlpar(const char *, int);
+int do_kernel_dlpar_common(const char *, int, int);
+static inline int do_kernel_dlpar(const char *cmd, int len)
+{
+       return do_kernel_dlpar_common(cmd, len, 0);
+}
 #endif
-- 
2.31.1

++++++ 0002-drmgr-read-the-CPU-NUMA-topology.patch ++++++
>From 0ab164955a58fc0e563e417e0cd868a96b2ae38e Mon Sep 17 00:00:00 2001
From: Laurent Dufour <[email protected]>
Date: Wed, 2 Dec 2020 16:10:57 +0100
Subject: [PATCH 2/4] drmgr: read the CPU NUMA topology

This will be used in the next commit to compute LMB removal based on the
NUMA topology.

The NUMA topology is read using the libnuma, so a dependency against it is
added in the configure file.

Signed-off-by: Laurent Dufour <[email protected]>
---
 Makefile.am             |   5 +-
 configure.ac            |   4 +
 src/drmgr/common_numa.c | 271 ++++++++++++++++++++++++++++++++++++++++
 src/drmgr/common_numa.h |  84 +++++++++++++
 4 files changed, 363 insertions(+), 1 deletion(-)
 create mode 100644 src/drmgr/common_numa.c
 create mode 100644 src/drmgr/common_numa.h

diff --git a/Makefile.am b/Makefile.am
index 2ff2232537df..422503efd07c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -155,6 +155,7 @@ src_drmgr_drmgr_SOURCES = \
        src/drmgr/common_cpu.c \
        src/drmgr/common_ofdt.c \
        src/drmgr/common_pci.c \
+       src/drmgr/common_numa.c \
        src/drmgr/drmgr.c \
        src/drmgr/drmig_chrp_pmig.c \
        src/drmgr/drslot_chrp_cpu.c \
@@ -171,13 +172,14 @@ noinst_HEADERS += \
        src/drmgr/drcpu.h \
        src/drmgr/dr.h \
        src/drmgr/drmem.h \
+       src/drmgr/common_numa.h \
        src/drmgr/drpci.h \
        src/drmgr/rtas_calls.h \
        src/drmgr/ofdt.h \
        src/drmgr/rtas_calls.h \
        src/drmgr/options.c
 
-src_drmgr_drmgr_LDADD = -lrtas
+src_drmgr_drmgr_LDADD = -lrtas -lnuma
 
 src_drmgr_lsslot_SOURCES = \
        src/drmgr/lsslot.c \
@@ -186,6 +188,7 @@ src_drmgr_lsslot_SOURCES = \
        src/drmgr/common_cpu.c \
        src/drmgr/common_pci.c \
        src/drmgr/common_ofdt.c \
+       src/drmgr/common_numa.c \
        src/drmgr/rtas_calls.c \
        src/drmgr/drslot_chrp_mem.c \
        $(pseries_platform_SOURCES)
diff --git a/configure.ac b/configure.ac
index de3c6758389a..0239754cc4f4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -42,6 +42,10 @@ AC_CHECK_HEADER(zlib.h,
                [AC_CHECK_LIB(z, inflate, [], [AC_MSG_FAILURE([zlib library is 
required for compilation])])],
                [AC_MSG_FAILURE([zlib.h is required for compiliation])])
 
+AC_CHECK_HEADER(numa.h,
+               [AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_FAILURE([numa 
library is required for compilation])])],
+               [AC_MSG_FAILURE([numa.h is required for compiliation])])
+
 # check for librtas
 AC_ARG_WITH([librtas],
     [AS_HELP_STRING([--without-librtas],
diff --git a/src/drmgr/common_numa.c b/src/drmgr/common_numa.c
new file mode 100644
index 000000000000..95da06e7c2d9
--- /dev/null
+++ b/src/drmgr/common_numa.c
@@ -0,0 +1,271 @@
+/**
+ * @file common_numa.c
+ *
+ * Copyright (C) IBM Corporation 2020
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 
USA.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <numa.h>
+
+#include "dr.h"
+#include "ofdt.h"
+#include "drmem.h"             /* for DYNAMIC_RECONFIG_MEM */
+#include "common_numa.h"
+
+#define RTAS_DIRECTORY         "/proc/device-tree/rtas"
+#define CHOSEN_DIRECTORY       "/proc/device-tree/chosen"
+#define ASSOC_REF_POINTS       "ibm,associativity-reference-points"
+#define ASSOC_LOOKUP_ARRAYS    "ibm,associativity-lookup-arrays"
+#define ARCHITECTURE_VEC_5     "ibm,architecture-vec-5"
+
+/*
+ * Allocate and read a property, return the size.
+ * The read property is not converted to the host endianess.
+ */
+static int load_property(const char *dir, const char *prop, uint32_t **buf)
+{
+       int size;
+
+       size = get_property_size(dir, prop);
+       if (!size)
+               return -ENOENT;
+
+       *buf = zalloc(size);
+       if (!*buf) {
+               say(ERROR, "Could not allocate buffer read %s (%d bytes)\n",
+                   prop, size);
+               return -ENOMEM;
+       }
+
+       if (get_property(dir, prop, *buf, size)) {
+               free(*buf);
+               say(ERROR, "Can't retrieve %s/%s\n", dir, prop);
+               return -EINVAL;
+       }
+
+       return size;
+}
+
+/*
+ * Get the minimal common depth, based on the form 1 of the ibm,associativ-
+ * ity-reference-points property. We only support that form.
+ *
+ * We should check that the "ibm,architecture-vec-5" property byte 5 bit 0
+ * has the value of one.
+ */
+static int get_min_common_depth(struct ppcnuma_topology *numa)
+{
+       int size;
+       uint32_t *p;
+       unsigned char val;
+
+       size = load_property(CHOSEN_DIRECTORY, ARCHITECTURE_VEC_5, &p);
+       if (size < 0)
+               return size;
+
+       /* PAPR byte start at 1 (and not 0) but there is the length field */
+       if (size < 6) {
+               report_unknown_error(__FILE__, __LINE__);
+               free(p);
+               return -EINVAL;
+       }
+       val = ((unsigned char *)p)[5];
+       free(p);
+
+       if (!(val & 0x80))
+               return -ENOTSUP;
+
+       size = load_property(RTAS_DIRECTORY, ASSOC_REF_POINTS, &p);
+       if (size <= 0)
+               return size;
+       if (size < sizeof(uint32_t)) {
+               report_unknown_error(__FILE__, __LINE__);
+               free(p);
+               return -EINVAL;
+       }
+
+       /* Get the first entry */
+       numa->min_common_depth = be32toh(*p);
+       free(p);
+       return 0;
+}
+
+static int get_assoc_arrays(struct ppcnuma_topology *numa)
+{
+       int size;
+       int rc;
+       uint32_t *prop, i;
+       struct assoc_arrays *aa = &numa->aa;
+
+       size = load_property(DYNAMIC_RECONFIG_MEM, ASSOC_LOOKUP_ARRAYS, &prop);
+       if (size < 0)
+               return size;
+
+       size /= sizeof(uint32_t);
+       if (size < 2) {
+               say(ERROR, "Could not find the associativity lookup arrays\n");
+               free(prop);
+               return -EINVAL;
+       }
+
+       aa->n_arrays = be32toh(prop[0]);
+       aa->array_sz = be32toh(prop[1]);
+
+       rc = -EINVAL;
+       if (numa->min_common_depth > aa->array_sz) {
+               say(ERROR, "Bad min common depth or associativity array 
size\n");
+               goto out_free;
+       }
+
+       /* Sanity check */
+       if (size != (aa->n_arrays * aa->array_sz + 2)) {
+               say(ERROR, "Bad size of the associativity lookup arrays\n");
+               goto out_free;
+       }
+
+       aa->min_array = zalloc(aa->n_arrays * sizeof(uint32_t));
+
+       /* Keep only the most significant value */
+       for (i = 0; i < aa->n_arrays; i++) {
+               int prop_index = i * aa->array_sz + numa->min_common_depth + 1;
+
+               aa->min_array[i] = be32toh(prop[prop_index]);
+       }
+       rc = 0;
+
+out_free:
+       free(prop);
+       return rc;
+}
+
+struct ppcnuma_node *ppcnuma_fetch_node(struct ppcnuma_topology *numa, int nid)
+{
+       struct ppcnuma_node *node;
+
+       if (nid > MAX_NUMNODES) {
+               report_unknown_error(__FILE__, __LINE__);
+               return NULL;
+       }
+
+       node = numa->nodes[nid];
+       if (node)
+               return node;
+
+       node = zalloc(sizeof(struct ppcnuma_node));
+       if (!node) {
+               say(ERROR, "Can't allocate a new node\n");
+               return NULL;
+       }
+
+       node->node_id = nid;
+
+       if (!numa->node_count || nid < numa->node_min)
+               numa->node_min = nid;
+       if (nid > numa->node_max)
+               numa->node_max = nid;
+
+       numa->nodes[nid] = node;
+       numa->node_count++;
+
+       return node;
+}
+
+/*
+ * Read the number of CPU for each node using the libnuma to get the details
+ * from sysfs.
+ */
+static int read_numa_topology(struct ppcnuma_topology *numa)
+{
+       struct bitmask *cpus;
+       struct ppcnuma_node *node;
+       int rc, max_node, nid, i;
+
+       if (numa_available() < 0)
+               return -ENOENT;
+
+       max_node = numa_max_node();
+       if (max_node >= MAX_NUMNODES) {
+               say(ERROR, "Too many nodes %d (max:%d)\n",
+                   max_node, MAX_NUMNODES);
+               return -EINVAL;
+       }
+
+       rc = 0;
+
+       /* In case of allocation error, the libnuma is calling exit() */
+       cpus = numa_allocate_cpumask();
+
+       for (nid = 0; nid <= max_node; nid++) {
+
+               if (!numa_bitmask_isbitset(numa_nodes_ptr, nid))
+                       continue;
+
+               node = ppcnuma_fetch_node(numa, nid);
+               if (!node) {
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               rc = numa_node_to_cpus(nid, cpus);
+               if (rc < 0)
+                       break;
+
+               /* Count the CPUs in that node */
+               for (i = 0; i < cpus->size; i++)
+                       if (numa_bitmask_isbitset(cpus, i))
+                               node->n_cpus++;
+
+               numa->cpu_count += node->n_cpus;
+       }
+
+       numa_bitmask_free(cpus);
+
+       if (rc) {
+               ppcnuma_foreach_node(numa, nid, node)
+                       node->n_cpus = 0;
+               numa->cpu_count = 0;
+       }
+
+       return rc;
+}
+
+int ppcnuma_get_topology(struct ppcnuma_topology *numa)
+{
+       int rc;
+
+       rc = numa_available();
+       if (rc < 0)
+               return rc;
+
+       rc = get_min_common_depth(numa);
+       if (rc)
+               return rc;
+
+       rc = get_assoc_arrays(numa);
+       if (rc)
+               return rc;
+
+       rc = read_numa_topology(numa);
+       if (rc)
+               return rc;
+
+       if (!numa->node_count)
+               return -1;
+
+       return 0;
+}
diff --git a/src/drmgr/common_numa.h b/src/drmgr/common_numa.h
new file mode 100644
index 000000000000..acc744d489df
--- /dev/null
+++ b/src/drmgr/common_numa.h
@@ -0,0 +1,84 @@
+/**
+ * @file numa.h
+ *
+ * Copyright (C) IBM Corporation 2020
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 
USA.
+ */
+#ifndef _NUMA_H_
+#define _NUMA_H_
+
+#define MAX_NUMNODES   256
+#define NUMA_NO_NODE   -1
+
+struct ppcnuma_node {
+       int             node_id;
+       unsigned int    n_cpus;
+       unsigned int    n_lmbs;
+       unsigned int    ratio;
+       struct dr_node  *lmbs;                  /* linked by lmb_numa_next */
+       struct ppcnuma_node *ratio_next;
+};
+
+struct assoc_arrays {
+       uint32_t        n_arrays;
+       uint32_t        array_sz;
+       uint32_t        *min_array;
+};
+
+struct ppcnuma_topology {
+       unsigned int            cpu_count;
+       unsigned int            lmb_count;
+       unsigned int            cpuless_node_count;
+       unsigned int            cpuless_lmb_count;
+       unsigned int            node_count, node_min, node_max;
+       struct ppcnuma_node     *nodes[MAX_NUMNODES];
+       struct ppcnuma_node     *ratio;
+       uint32_t                min_common_depth;
+       struct assoc_arrays     aa;
+};
+
+int ppcnuma_get_topology(struct ppcnuma_topology *numa);
+struct ppcnuma_node *ppcnuma_fetch_node(struct ppcnuma_topology *numa,
+                                       int node_id);
+
+static inline int ppcnuma_aa_index_to_node(struct ppcnuma_topology *numa,
+                                          uint32_t aa_index)
+{
+       if (aa_index < numa->aa.n_arrays)
+               return numa->aa.min_array[aa_index];
+       return NUMA_NO_NODE;
+}
+
+static inline int ppcnuma_next_node(struct ppcnuma_topology *numa, int nid,
+                                   struct ppcnuma_node **node)
+{
+       for (nid++; nid <= numa->node_max; nid++)
+               if (numa->nodes[nid]) {
+                       *node = numa->nodes[nid];
+                       break;
+               }
+       return nid;
+}
+
+#define ppcnuma_foreach_node(numa, nid, node)                          \
+       for (nid = (numa)->node_min, node = (numa)->nodes[nid]; \
+            nid <= (numa)->node_max;                                   \
+            nid = ppcnuma_next_node(numa, nid, &(node)))
+
+#define ppcnuma_foreach_node_by_ratio(numa, node)                              
\
+       for (node = (numa)->ratio; node; node = node->ratio_next)
+
+#endif /* _NUMA_H_ */
-- 
2.31.1

++++++ 0003-drmgr-introduce-NUMA-based-LMB-removal.patch ++++++
>From 7923af336de2cea6887d70c2801cdce42b9420ba Mon Sep 17 00:00:00 2001
From: Laurent Dufour <[email protected]>
Date: Wed, 25 Nov 2020 18:03:45 +0100
Subject: [PATCH 3/4] drmgr: introduce NUMA based LMB removal

When the NUMA topology can be read, all the LMBs found in the Device Tree
are linked the corresponding node. LMB not associated to node are
considered as not used.

LMB associated to CPU less node are accounted separately because they will
be targeted first to be remove. The LMB are removed from the CPU less nodes
to reach an average number LMBs per CPU less node.

Node with CPU have a ration indexed on their number of CPUs. The higher a
node have CPU the lower number LMB will be removed. This way node with a
high number of CPU will get a higher amount of memory.

When a LMB can't be removed (because its memory can't be offlined by the
kernel), the LMB count for node is decremented and the LMB is removed from
the node's LMB list. This way, it is no more accounted as 'active' and the
removal operation will continue without taking it in account anymore.

The removal is done through the remove by DRC index API, allowing to remove
a LMB at a time. One futur optimization would be to extend that API to
remove a linear range of LMB each time.

When the requested amount of LMB could not be removed a partial status is
reported. This is a major difference since currently the kernel is adding back
again the removed LMBs in case the requested amount to remove cannot be reached.
That's odd and reporting a partial status is better when user want to remove as
much as memory as possible.

If the NUMA topology can't be read, we fallback using the legacy remove
way.

Signed-off-by: Laurent Dufour <[email protected]>
---
 src/drmgr/drslot_chrp_mem.c | 336 +++++++++++++++++++++++++++++++++++-
 src/drmgr/ofdt.h            |   2 +
 2 files changed, 337 insertions(+), 1 deletion(-)

diff --git a/src/drmgr/drslot_chrp_mem.c b/src/drmgr/drslot_chrp_mem.c
index 502aa3e9fff0..c58f8649f3f6 100644
--- a/src/drmgr/drslot_chrp_mem.c
+++ b/src/drmgr/drslot_chrp_mem.c
@@ -31,12 +31,16 @@
 #include "dr.h"
 #include "ofdt.h"
 #include "drmem.h"
+#include "common_numa.h"
 
 static int block_sz_bytes = 0;
 static char *state_strs[] = {"offline", "online"};
 
 static char *usagestr = "-c mem {-a | -r} {-q <quantity> -p {variable_weight | 
ent_capacity} | {-q <quantity> | -s [<drc_name> | <drc_index>]}}";
 
+static struct ppcnuma_topology numa;
+static int numa_enabled = 0;
+
 /**
  * mem_usage
  * @brief return usage string
@@ -306,6 +310,31 @@ get_mem_node_lmbs(struct lmb_list_head *lmb_list)
        return rc;
 }
 
+static int link_lmb_to_numa_node(struct dr_node *lmb)
+{
+       int nid;
+       struct ppcnuma_node *node;
+
+       nid = ppcnuma_aa_index_to_node(&numa, lmb->lmb_aa_index);
+       if (nid == NUMA_NO_NODE)
+               return 0;
+
+       node = ppcnuma_fetch_node(&numa, nid);
+       if (!node)
+               return -ENOMEM;
+
+       lmb->lmb_numa_next = node->lmbs;
+       node->lmbs = lmb;
+       node->n_lmbs++;
+
+       if (node->n_cpus)
+               numa.lmb_count++;
+       else
+               numa.cpuless_lmb_count++;
+
+       return 0;
+}
+
 int add_lmb(struct lmb_list_head *lmb_list, uint32_t drc_index,
            uint64_t address, uint64_t lmb_sz, uint32_t aa_index,
            uint32_t flags)
@@ -324,6 +353,9 @@ int add_lmb(struct lmb_list_head *lmb_list, uint32_t 
drc_index,
        lmb->lmb_address = address;
        lmb->lmb_aa_index = aa_index;
 
+       if (numa_enabled && link_lmb_to_numa_node(lmb))
+               return -ENOMEM;
+
        if (flags & DRMEM_ASSIGNED) {
                int rc;
 
@@ -490,7 +522,7 @@ get_dynamic_reconfig_lmbs(struct lmb_list_head *lmb_list)
 
        if (stat(DYNAMIC_RECONFIG_MEM_V1, &sbuf) == 0) {
                rc = get_dynamic_reconfig_lmbs_v1(lmb_sz, lmb_list);
-       } else if (is_lsslot_cmd &&
+       } else if ((is_lsslot_cmd || numa_enabled) &&
                   stat(DYNAMIC_RECONFIG_MEM_V2, &sbuf) == 0) {
                rc = get_dynamic_reconfig_lmbs_v2(lmb_sz, lmb_list);
        } else {
@@ -1424,11 +1456,313 @@ int valid_mem_options(void)
        return 0;
 }
 
+static int remove_lmb_by_index(uint32_t drc_index)
+{
+       char cmdbuf[128];
+       int offset;
+
+       offset = sprintf(cmdbuf, "memory remove index 0x%x", drc_index);
+
+       return do_kernel_dlpar_common(cmdbuf, offset,
+                                     1 /* Don't report error */);
+}
+
+static int remove_lmb_from_node(struct ppcnuma_node *node, uint32_t count)
+{
+       struct dr_node *lmb;
+       int err, done = 0, unlinked = 0;
+
+       say(DEBUG, "Try removing %d / %d LMBs from node %d\n",
+           count, node->n_lmbs, node->node_id);
+
+       for (lmb = node->lmbs; lmb && done < count; lmb = lmb->lmb_numa_next) {
+               unlinked ++;
+               err = remove_lmb_by_index(lmb->drc_index);
+               if (err)
+                       say(WARN,"Can't remove LMB node:%d index:0x%x: %s\n",
+                           node->node_id, lmb->drc_index, strerror(-err));
+               else
+                       done++;
+       }
+
+       /*
+        * Decrement the node LMB's count since whatever is the success
+        * of the removal operation, it will not be tried again on that
+        * LMB.
+        */
+       node->n_lmbs -= unlinked;
+
+       /*
+        * Update the node's list of LMB to not process the one we removed or
+        * tried to removed again.
+        */
+       node->lmbs = lmb;
+
+       /* Update numa's counters */
+       if (node->n_cpus)
+               numa.lmb_count -= unlinked;
+       else
+               numa.cpuless_node_count -= unlinked;
+
+       if (!node->n_lmbs) {
+               node->ratio = 0; /* for sanity only */
+               if (node->n_cpus)
+                       numa.cpu_count -= node->n_cpus;
+               else
+                       numa.cpuless_node_count--;
+       }
+
+       say(INFO, "Removed %d LMBs from node %d\n", done, node->node_id);
+       return done;
+}
+
+#define min(a,b) ((a < b) ? a : b)
+
+static void update_cpuless_node_ratio(void)
+{
+       struct ppcnuma_node *node;
+       int nid;
+
+       /*
+        * Assumptions:
+        * 1. numa->cpuless_node_count is up to date
+        * 2. numa->cpuless_lmb_count is up to date
+        * Nodes with no memory and nodes with CPUs are ignored here.
+        */
+       ppcnuma_foreach_node(&numa, nid, node) {
+               if (node->n_cpus ||!node->n_lmbs)
+                       continue;
+               node->ratio = (node->n_lmbs * 100) / numa.cpuless_lmb_count;
+       }
+}
+
+/*
+ * Remove LMBs from node without CPUs only.
+ * The more the node has LMBs, the more LMBs will be removed from it.
+ *
+ * We have to retry the operation multiple times because some LMB cannot be
+ * removed due to the page usage in the kernel. In that case, that LMB is no
+ * more taken in account and the node's LMB count is decremented, assuming that
+ * LMB is unremovable at this time. Thus each node's ratio has to be computed 
on
+ * each iteration. This is not a big deal, usually, there are not so much 
nodes.
+ */
+static int remove_cpuless_lmbs(uint32_t count)
+{
+       struct ppcnuma_node *node;
+       int nid;
+       uint32_t total = count, todo, done = 0, this_loop;
+
+       while (count) {
+               count = min(count, numa.cpuless_lmb_count);
+               if (!count)
+                       break;
+
+               update_cpuless_node_ratio();
+
+               this_loop = 0;
+               ppcnuma_foreach_node(&numa, nid, node) {
+                       if (!node->n_lmbs || node->n_cpus)
+                               continue;
+
+                       todo = (count * node->ratio) / 100;
+                       todo = min(todo, node->n_lmbs);
+                       /* Fix rounded value to 0 */
+                       if (!todo && node->n_lmbs)
+                               todo = (count - this_loop);
+
+                       if (todo)
+                               todo = remove_lmb_from_node(node, todo);
+
+                       this_loop += todo;
+                       done += todo;
+                       if (done >= total)
+                               break;
+               }
+
+               /* Don't continue if we didn't make any progress. */
+               if (!this_loop)
+                       break;
+
+               count -= this_loop;
+       }
+
+       say(DEBUG, "%d / %d LMBs removed from the CPU less nodes\n",
+           done, total);
+       return done;
+}
+
+static void update_node_ratio(void)
+{
+       int nid;
+       struct ppcnuma_node *node, *n, **p;
+       uint32_t cpu_ratio, mem_ratio;
+
+       /*
+        * Assumptions:
+        * 1. numa->cpu_count is up to date
+        * 2. numa->lmb_count is up to date
+        * Nodes with no memory and nodes with no CPU are ignored here.
+        */
+
+       numa.ratio = NULL;
+       ppcnuma_foreach_node(&numa, nid, node) {
+               if (!node->n_lmbs || !node->n_cpus)
+                       continue;
+               cpu_ratio = (node->n_cpus * 100) / numa.cpu_count;
+               mem_ratio = (node->n_lmbs * 100) / numa.lmb_count;
+
+               /* Say that CPU ratio is 90% of the ratio */
+               node->ratio = (cpu_ratio * 9 + mem_ratio) / 10;
+       }
+
+       /* Create an ordered link of the nodes */
+       ppcnuma_foreach_node(&numa, nid, node) {
+               if (!node->n_lmbs || !node->n_cpus)
+                       continue;
+
+               p = &numa.ratio;
+               for (n = numa.ratio;
+                    n && n->ratio < node->ratio; n = n->ratio_next)
+                       p = &n->ratio_next;
+               *p = node;
+               node->ratio_next = n;
+       }
+}
+
+/*
+ * Remove LMBs from node with CPUs.
+ *
+ * The less a node has CPU, the more memory will be removed from it.
+ *
+ * As for the CPU less nodes, we must iterate because some LMBs may not be
+ * removable at this time.
+ */
+static int remove_cpu_lmbs(uint32_t count)
+{
+       struct ppcnuma_node *node;
+       uint32_t total = count, todo, done = 0, this_loop;
+       uint32_t new_lmb_count;
+
+       while(count) {
+               count = min(count, numa.lmb_count);
+               if (!count)
+                       break;
+
+               update_node_ratio();
+
+               new_lmb_count = numa.lmb_count - count;
+
+               this_loop = 0;
+               ppcnuma_foreach_node_by_ratio(&numa, node) {
+                       if (!node->n_lmbs || !node->n_cpus)
+                               continue;
+
+                       todo = (new_lmb_count * node->ratio)  / 100;
+                       todo = node->n_lmbs - min(todo, node->n_lmbs);
+                       todo = min(count, todo);
+
+                       if (todo) {
+                               todo = remove_lmb_from_node(node, todo);
+                               count -= todo;
+                               this_loop += todo;
+                       }
+
+                       if (!count)
+                               break;
+               }
+
+               /* Don't continue if we didn't make any progress. */
+               if (!this_loop)
+                       break;
+               done += this_loop;
+       }
+
+       say(DEBUG, "%d / %d LMBs removed from the CPU nodes\n",
+           done, total);
+       return done;
+}
+
+static void build_numa_topology(void)
+{
+       int rc;
+
+       rc = ppcnuma_get_topology(&numa);
+       if (rc)
+               return;
+
+       numa_enabled = 1;
+}
+
+static void clear_numa_lmb_links(void)
+{
+       int nid;
+       struct ppcnuma_node *node;
+
+       ppcnuma_foreach_node(&numa, nid, node)
+               node->lmbs = NULL;
+}
+
+static int numa_based_remove(uint32_t count)
+{
+       struct lmb_list_head *lmb_list;
+       struct ppcnuma_node *node;
+       int nid;
+       uint32_t done = 0;
+
+       /*
+        * Read the LMBs
+        * Link the LMBs to their node
+        * Update global counter
+        */
+       lmb_list = get_lmbs(LMB_NORMAL_SORT);
+       if (lmb_list == NULL) {
+               clear_numa_lmb_links();
+               return -1;
+       }
+
+       if (!numa.node_count) {
+               clear_numa_lmb_links();
+               free_lmbs(lmb_list);
+               return -EINVAL;
+       }
+
+       ppcnuma_foreach_node(&numa, nid, node) {
+               say(INFO, "node %4d %4d CPUs %8d LMBs\n",
+                   nid, node->n_cpus, node->n_lmbs);
+       }
+
+       done += remove_cpuless_lmbs(count);
+       count -= done;
+
+       done += remove_cpu_lmbs(count);
+
+       report_resource_count(done);
+
+       clear_numa_lmb_links();
+       free_lmbs(lmb_list);
+       return 0;
+}
+
 int do_mem_kernel_dlpar(void)
 {
        char cmdbuf[128];
        int rc, offset;
 
+
+       if (usr_action == REMOVE && usr_drc_count) {
+               build_numa_topology();
+               if (numa_enabled) {
+                       if (!numa_based_remove(usr_drc_count))
+                               return 0;
+
+                       /*
+                        * If the NUMA based removal failed, lets try the legacy
+                        * way.
+                        */
+                       say(WARN, "Can't do NUMA based removal operation.\n");
+               }
+       }
+
        offset = sprintf(cmdbuf, "%s ", "memory");
 
        switch (usr_action) {
diff --git a/src/drmgr/ofdt.h b/src/drmgr/ofdt.h
index 3850a77229b4..3c2840b2e0ee 100644
--- a/src/drmgr/ofdt.h
+++ b/src/drmgr/ofdt.h
@@ -92,6 +92,7 @@ struct dr_node {
                        uint32_t        _lmb_aa_index;
                        struct mem_scn  *_mem_scns;
                        struct of_node  *_of_node;
+                       struct dr_node  *_numa_next;
                } _smem;
 
 #define lmb_address    _node_u._smem._address
@@ -99,6 +100,7 @@ struct dr_node {
 #define lmb_aa_index   _node_u._smem._lmb_aa_index
 #define lmb_mem_scns   _node_u._smem._mem_scns
 #define lmb_of_node    _node_u._smem._of_node
+#define lmb_numa_next  _node_u._smem._numa_next
 
                struct hea_info {
                        uint            _port_no;
-- 
2.31.1

++++++ powerpc-utils-hcn-init-NM.patch ++++++
--- /var/tmp/diff_new_pack.8R7iey/_old  2021-04-10 15:27:32.258389771 +0200
+++ /var/tmp/diff_new_pack.8R7iey/_new  2021-04-10 15:27:32.258389771 +0200
@@ -1,9 +1,22 @@
-Make hcn-init start with NetworkManager.
+From 3bf554da8ca6edab25a8662acb8be0bf96e93871 Mon Sep 17 00:00:00 2001
+From: Michal Suchanek <[email protected]>
+Date: Tue, 30 Mar 2021 21:06:10 +0200
+Subject: [PATCH] hcn-init.service: Start together with NetworkManager.
 
-diff -u powerpc-utils-1.3.8.orig/systemd/hcn-init.service.in 
powerpc-utils-1.3.8/systemd/hcn-init.service.in
---- powerpc-utils-1.3.8.orig/systemd/hcn-init.service.in       2020-08-31 
18:53:55.000000000 +0200
-+++ powerpc-utils-1.3.8/systemd/hcn-init.service.in    2021-03-30 
11:45:35.121304822 +0200
-@@ -1,7 +1,8 @@
+hcn-init uses NetworkManager so it does not make sense to run it when
+different connection manager is in use. Also when NetworkManager is in
+use it should be started automatically.
+
+Signed-off-by: Michal Suchanek <[email protected]>
+---
+ systemd/hcn-init.service.in | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/systemd/hcn-init.service.in b/systemd/hcn-init.service.in
+index d0aa2adf7522..768a69e0084b 100644
+--- a/systemd/hcn-init.service.in
++++ b/systemd/hcn-init.service.in
+@@ -1,11 +1,12 @@
  [Unit]
  Description=hybrid virtual network scan and config
 -After=network-online.target
@@ -14,3 +27,11 @@
  
  [Service]
  Type=oneshot
+ ExecStart=@sbindir@/hcnmgr -s
+ 
+ [Install]
+-WantedBy=multi-user.target
++WantedBy=NetworkManager.service
+-- 
+2.26.2
+

commit powerpc-utils for openSUSE:Factory

Reply via email to