While the numa_emulation (fake-numa) driver allows *all* physical nodes to
be split by a given ratio / size, sometimes what is desired is to split a
single memory range into multiple nodes. Also, numa_emulation can only do
this for injecting a fake SRAT, but splitting a NUMA node may also involve
splitting the NFIT. ACPI table injection is more flexible and capable than
what can be described on the kernel command line to the numa_emulation
driver.

Add support to the daxctl utility for splitting SRAT, SLIT, and NFIT by
proximity domain. Yes, this might be a better fit in acpica-tools, but out
of laziness and familiarity I picked daxctl.

Also, for simplicity, this only supports dividing a node by a power-of-2
factor. Conceivably this could later be extended for custom splits, but for
now this covers the most likely common case of "split in half".

Signed-off-by: Dan Williams <[email protected]>
---
 acpi.h             |  132 ++++++++
 daxctl/Makefile.am |    1 
 daxctl/acpi.c      |  870 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 daxctl/builtin.h   |    1 
 daxctl/daxctl.c    |    1 
 5 files changed, 1002 insertions(+), 3 deletions(-)
 create mode 100644 daxctl/acpi.c

diff --git a/acpi.h b/acpi.h
index 06685aa2c90a..e714e28e2354 100644
--- a/acpi.h
+++ b/acpi.h
@@ -17,11 +17,12 @@
 #include <stdint.h>
 #include <linux/uuid.h>
 
+static const uuid_le uuid_pmem = UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 
0x43, 0x0d,
+                       0x33, 0x18, 0xb7, 0x8c, 0xdb);
+
 static inline void nfit_spa_uuid_pm(void *uuid)
 {
-       uuid_le uuid_pm = UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d,
-                       0x33, 0x18, 0xb7, 0x8c, 0xdb);
-       memcpy(uuid, &uuid_pm, 16);
+       memcpy(uuid, &uuid_pmem, 16);
 }
 
 enum {
@@ -54,6 +55,18 @@ struct nfit {
        uint32_t reserved;
 } __attribute__((packed));
 
+enum acpi_nfit_type {
+       ACPI_NFIT_TYPE_SYSTEM_ADDRESS = 0,
+       ACPI_NFIT_TYPE_MEMORY_MAP = 1,
+       ACPI_NFIT_TYPE_INTERLEAVE = 2,
+       ACPI_NFIT_TYPE_SMBIOS = 3,
+       ACPI_NFIT_TYPE_CONTROL_REGION = 4,
+       ACPI_NFIT_TYPE_DATA_REGION = 5,
+       ACPI_NFIT_TYPE_FLUSH_ADDRESS = 6,
+       ACPI_NFIT_TYPE_CAPABILITIES = 7,
+       ACPI_NFIT_TYPE_RESERVED = 8     /* 8 and greater are reserved */
+};
+
 /**
  * struct nfit_spa - System Physical Address Range Descriptor Table
  */
@@ -70,6 +83,91 @@ struct nfit_spa {
        uint64_t mem_attr;
 } __attribute__((packed));
 
+struct nfit_map {
+       uint16_t type;
+       uint16_t length;
+       uint32_t device_handle;
+       uint16_t physical_id;
+       uint16_t region_id;
+       uint16_t range_index;
+       uint16_t region_index;
+       uint64_t region_size;
+       uint64_t region_offset;
+       uint64_t address;
+       uint16_t interleave_index;
+       uint16_t interleave_ways;
+       uint16_t flags;
+       uint16_t reserved;           /* Reserved, must be zero */
+} __attribute__((packed));
+
+struct srat {
+       struct acpi_header h;
+       uint32_t revision;
+       uint64_t reserved;
+} __attribute__((packed));
+
+enum acpi_srat_type {
+       ACPI_SRAT_TYPE_CPU_AFFINITY = 0,
+       ACPI_SRAT_TYPE_MEMORY_AFFINITY = 1,
+       ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY = 2,
+       ACPI_SRAT_TYPE_GICC_AFFINITY = 3,
+       ACPI_SRAT_TYPE_GIC_ITS_AFFINITY = 4,    /* ACPI 6.2 */
+       ACPI_SRAT_TYPE_GENERIC_AFFINITY = 5,    /* ACPI 6.3 */
+       ACPI_SRAT_TYPE_RESERVED = 6     /* 5 and greater are reserved */
+};
+
+struct srat_cpu {
+       uint8_t type;
+       uint8_t length;
+       uint8_t proximity_domain_lo;
+       uint8_t apic_id;
+       uint32_t flags;
+       uint8_t local_sapic_eid;
+       uint8_t proximity_domain_hi[3];
+       uint32_t clock_domain;
+} __attribute__((packed));
+
+struct srat_generic {
+       uint8_t type;
+       uint8_t length;
+       uint8_t reserved;
+       uint8_t device_handle_type;
+       uint32_t proximity_domain;
+       uint8_t device_handle[16];
+       uint32_t flags;
+       uint32_t reserved1;
+} __attribute__((packed));
+
+struct srat_mem {
+       uint8_t type;
+       uint8_t length;
+       uint32_t proximity_domain;
+       uint16_t reserved;
+       uint64_t spa_base;
+       uint64_t spa_length;
+       uint32_t reserved1;
+       uint32_t flags;
+       uint64_t reserved2;
+} __attribute__((packed));
+
+struct acpi_subtable8 {
+       uint8_t type;
+       uint8_t length;
+       uint8_t buf[];
+} __attribute__((packed));
+
+struct acpi_subtable16 {
+       uint16_t type;
+       uint16_t length;
+       uint8_t buf[];
+} __attribute__((packed));
+
+struct slit {
+       struct acpi_header h;
+       uint64_t count;
+       uint8_t entry[]; /* size = count^2 */
+} __attribute__((packed));
+
 static inline unsigned char acpi_checksum(void *buf, size_t size)
 {
         unsigned char sum, *data = buf;
@@ -107,4 +205,32 @@ static inline void writeb(unsigned char v, void *a)
 
        *p = v;
 }
+
+static inline uint64_t readq(void *a)
+{
+       uint64_t *p = a;
+
+       return le64toh(*p);
+}
+
+static inline uint32_t readl(void *a)
+{
+       uint32_t *p = a;
+
+       return le32toh(*p);
+}
+
+static inline uint16_t readw(void *a)
+{
+       uint16_t *p = a;
+
+       return le16toh(*p);
+}
+
+static inline uint8_t readb(void *a)
+{
+       uint8_t *p = a;
+
+       return *p;
+}
 #endif /* __ACPI_H__ */
diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am
index ca1b86748bfb..9b1313ac547f 100644
--- a/daxctl/Makefile.am
+++ b/daxctl/Makefile.am
@@ -13,6 +13,7 @@ config.h: $(srcdir)/Makefile.am
 
 daxctl_SOURCES =\
                daxctl.c \
+               acpi.c \
                list.c \
                migrate.c \
                device.c \
diff --git a/daxctl/acpi.c b/daxctl/acpi.c
new file mode 100644
index 000000000000..5d0e3df15aa9
--- /dev/null
+++ b/daxctl/acpi.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017-2020 Intel Corporation. All rights reserved. */
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <endian.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ccan/list/list.h>
+#include <util/bitmap.h>
+#include <ccan/minmax/minmax.h>
+#include <util/parse-options.h>
+#include <util/size.h>
+
+#include <acpi.h>
+
+static bool verbose;
+
+struct srat_container {
+       struct srat *srat;
+       struct list_head ents;
+};
+
+struct srat_ent {
+       struct list_node list;
+       struct acpi_subtable8 *tbl;
+};
+
+struct nfit_container {
+       struct nfit *nfit;
+       struct list_head ents;
+};
+
+struct nfit_ent {
+       struct list_node list;
+       struct acpi_subtable16 *tbl;
+};
+
+static void free_srat_container(struct srat_container *container)
+{
+       struct srat_ent *ent, *_e;
+
+       if (!container)
+               return;
+
+       list_for_each_safe(&container->ents, ent, _e, list) {
+               list_del_from(&container->ents, &ent->list);
+               free(ent);
+       }
+       free(container->srat);
+       free(container);
+}
+
+static void free_nfit_container(struct nfit_container *container)
+{
+       struct nfit_ent *ent, *_e;
+
+       if (!container)
+               return;
+
+       list_for_each_safe(&container->ents, ent, _e, list) {
+               list_del_from(&container->ents, &ent->list);
+               free(ent);
+       }
+       free(container->nfit);
+       free(container);
+}
+
+static void *read_table(int fd, const char *sig)
+{
+       int rc, len;
+       uint8_t checksum;
+       struct acpi_header hdr;
+       struct acpi_header *data = NULL;
+
+       rc = read(fd, &hdr, sizeof(hdr));
+       if (rc < (int) sizeof(hdr)) {
+               error("failed to read header\n");
+               rc = rc < 0 ? -errno : -EINVAL;
+               goto out;
+       }
+
+       if (strncmp((char *) hdr.signature, sig, 4) != 0) {
+               error("invalid %s header\n", sig);
+               rc = -EINVAL;
+               goto out;
+       }
+
+       data = calloc(1, hdr.length);
+       if (!data) {
+               error("failed to alloc %d bytes\n", hdr.length);
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       for (len = hdr.length; len > 0;) {
+               int offset = hdr.length - len;
+
+               rc = pread(fd, ((char *) data) + offset, len, offset);
+               if (rc < 0)
+                       break;
+               len -= rc;
+       }
+
+       if (rc < 0) {
+               error("failed to read %s\n", sig);
+               rc = rc < 0 ? -errno : -EINVAL;
+               goto out;
+       }
+
+       checksum = data->checksum;
+       data->checksum = 0;
+       if (acpi_checksum(data, data->length) != checksum) {
+               error("bad %s checksum\n", sig);
+               rc = -EINVAL;
+               goto out;
+       }
+out:
+       close(fd);
+       if (rc < 0) {
+               free(data);
+               data = NULL;
+       }
+       return data;
+}
+
+static struct nfit_container *read_nfit(int fd)
+{
+       void *buf;
+       int rc = 0;
+       unsigned int length;
+       struct nfit *nfit = NULL;
+       struct nfit_container *container = NULL;
+
+       nfit = read_table(fd, "NFIT");
+       if (!nfit)
+               return NULL;
+
+       container = calloc(1, sizeof(*container));
+       if (!container) {
+               error("failed to alloc %d bytes\n", nfit->h.length);
+               rc = -ENOMEM;
+               goto out;
+       }
+       list_head_init(&container->ents);
+       container->nfit = nfit;
+
+       length = nfit->h.length - sizeof(*nfit);
+       if (!length) {
+               error("no sub-tables found in SRAT\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       buf = nfit + 1;
+       while (length) {
+               struct nfit_ent *ent = calloc(1, sizeof(*ent));
+
+               if (!ent) {
+                       error("failed to alloc %zd bytes\n", sizeof(*ent));
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               ent->tbl = (struct acpi_subtable16 *) buf;
+               list_add_tail(&container->ents, &ent->list);
+               if (readw(&ent->tbl->length) > length
+                               || !readw(&ent->tbl->length)) {
+                       error("failed to validate all SRAT entries\n");
+                       rc = -EINVAL;
+                       goto out;
+               }
+               length -= readw(&ent->tbl->length);
+               buf += readw(&ent->tbl->length);
+       }
+out:
+       if (rc < 0) {
+               if (container)
+                       free_nfit_container(container);
+               else
+                       free(nfit);
+               container = NULL;
+       }
+       return container;
+}
+
+static struct srat_container *read_srat(int fd)
+{
+       void *buf;
+       int rc = 0;
+       unsigned int length;
+       struct srat *srat = NULL;
+       struct srat_container *container = NULL;
+
+       srat = read_table(fd, "SRAT");
+       if (!srat)
+               return NULL;
+
+       container = calloc(1, sizeof(*container));
+       if (!container) {
+               error("failed to alloc %d bytes\n", srat->h.length);
+               rc = -ENOMEM;
+               goto out;
+       }
+       list_head_init(&container->ents);
+       container->srat = srat;
+
+       length = srat->h.length - sizeof(*srat);
+       if (!length) {
+               error("no sub-tables found in SRAT\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       buf = srat + 1;
+       while (length) {
+               struct srat_ent *ent = calloc(1, sizeof(*ent));
+
+               if (!ent) {
+                       error("failed to alloc %zd bytes\n", sizeof(*ent));
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               ent->tbl = (struct acpi_subtable8 *) buf;
+               list_add_tail(&container->ents, &ent->list);
+               if (readb(&ent->tbl->length) > length
+                               || !readb(&ent->tbl->length)) {
+                       error("failed to validate all SRAT entries\n");
+                       rc = -EINVAL;
+                       goto out;
+               }
+               length -= readb(&ent->tbl->length);
+               buf += readb(&ent->tbl->length);
+       }
+out:
+       if (rc < 0) {
+               if (container)
+                       free_srat_container(container);
+               else
+                       free(srat);
+               container = NULL;
+       }
+       return container;
+}
+
+enum acpi_table {
+       ACPI_SRAT,
+       ACPI_SLIT,
+       ACPI_NFIT,
+       ACPI_TABLES,
+};
+
+static const char *acpi_table_name(enum acpi_table id)
+{
+       const char *names[ACPI_TABLES] = {
+               [ACPI_SRAT] = "srat",
+               [ACPI_SLIT] = "slit",
+               [ACPI_NFIT] = "nfit",
+       };
+
+       return names[id];
+}
+
+struct parameters {
+       char *table[ACPI_TABLES];
+       char *new_table[ACPI_TABLES];
+       int in_fd[ACPI_TABLES];
+       int out_fd[ACPI_TABLES];
+       int nodes;
+       int pxm;
+       const char *path;
+} param = {
+       .nodes = 2,
+};
+
+struct split_context {
+       uint64_t address;
+       uint64_t length;
+       int max_pxm;
+       int max_region_id;
+       int max_range_index;
+};
+
+static int create_nfit(struct parameters *p, struct nfit_container *container,
+               struct list_head *mems)
+{
+       unsigned int oem_revision;
+       size_t orig_size, size;
+       struct nfit_ent *ent;
+       struct nfit *nfit;
+       void *buf;
+       int rc;
+
+       orig_size = readl(&container->nfit->h.length);
+       size = orig_size;
+       list_for_each(mems, ent, list)
+               size += readw(&ent->tbl->length);
+
+       buf = calloc(1, size);
+       if (!buf)
+               return -ENOMEM;
+
+       nfit = buf;
+       memcpy(nfit, container->nfit, sizeof(*nfit));
+       writel(size, &nfit->h.length);
+       oem_revision = readl(&nfit->h.oem_revision);
+       writel(oem_revision + 1, &nfit->h.oem_revision);
+       buf += sizeof(*nfit);
+
+       list_append_list(&container->ents, mems);
+       list_for_each(&container->ents, ent, list) {
+               memcpy(buf, ent->tbl, readw(&ent->tbl->length));
+               buf += readw(&ent->tbl->length);
+       }
+
+       writeb(acpi_checksum(nfit, size), &nfit->h.checksum);
+
+       rc = write(p->out_fd[ACPI_NFIT], nfit, size);
+       free(nfit);
+
+       if (rc < 0)
+               return -errno;
+       return 0;
+}
+
+static int create_srat(struct parameters *p, struct srat_container *container,
+               struct list_head *mems)
+{
+       unsigned int oem_revision;
+       size_t orig_size, size;
+       struct srat_ent *ent;
+       struct srat *srat;
+       void *buf;
+       int rc;
+
+       orig_size = readl(&container->srat->h.length);
+       size = orig_size;
+       list_for_each(mems, ent, list)
+               size += readb(&ent->tbl->length);
+
+       buf = calloc(1, size);
+       if (!buf)
+               return -ENOMEM;
+
+       srat = buf;
+       memcpy(srat, container->srat, sizeof(*srat));
+       writel(size, &srat->h.length);
+       oem_revision = readl(&srat->h.oem_revision);
+       writel(oem_revision + 1, &srat->h.oem_revision);
+       buf += sizeof(*srat);
+
+       list_append_list(&container->ents, mems);
+       list_for_each(&container->ents, ent, list) {
+               memcpy(buf, ent->tbl, readb(&ent->tbl->length));
+               buf += readb(&ent->tbl->length);
+       }
+
+       writeb(acpi_checksum(srat, size), &srat->h.checksum);
+
+       rc = write(p->out_fd[ACPI_SRAT], srat, size);
+       free(srat);
+
+       if (rc < 0)
+               return -errno;
+       return 0;
+}
+
+#define dbg(fmt, ...) \
+       ({if (verbose) { \
+               fprintf(stderr, fmt, ##__VA_ARGS__); \
+       } else { \
+               do { } while (0); \
+       }})
+
+static int split_srat(struct parameters *p, struct split_context *split)
+{
+       struct srat_container *srat = read_srat(p->in_fd[ACPI_SRAT]);
+       struct srat_ent *ent, *found_ent = NULL;
+       int count = 0, max_pxm = 0, i, rc;
+       uint64_t length, address;
+       struct srat_mem *m;
+       LIST_HEAD(mems);
+
+       list_for_each(&srat->ents, ent, list) {
+               struct srat_generic *g;
+               struct srat_cpu *c;
+               int pxm, type;
+
+               type = readb(&ent->tbl->type);
+               switch (type) {
+               case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
+                       m = (struct srat_mem *) ent->tbl;
+                       pxm = readl(&m->proximity_domain);
+                       break;
+               case ACPI_SRAT_TYPE_CPU_AFFINITY:
+                       c = (struct srat_cpu *) ent->tbl;
+                       pxm = readb(&c->proximity_domain_lo);
+                       pxm |= readw(&c->proximity_domain_hi[0]) << 8;
+                       pxm |= readb(&c->proximity_domain_hi[2]) << 24;
+                       break;
+               case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+                       g = (struct srat_generic *) ent->tbl;
+                       pxm = readl(&g->proximity_domain);
+                       break;
+               default:
+                       pxm = -1;
+                       break;
+               }
+               max_pxm = max(pxm, max_pxm);
+
+               if (type != ACPI_SRAT_TYPE_MEMORY_AFFINITY)
+                       continue;
+
+               if (p->pxm == pxm) {
+                       found_ent = ent;
+                       count++;
+               }
+
+               if (count > 1) {
+                       error("SRAT: no support for splitting multiple entry 
proximity domains\n");
+                       return -ENXIO;
+               }
+
+       }
+
+       if (!found_ent) {
+               error("SRAT: proximity domain to split not found\n");
+               free_srat_container(srat);
+               return -ENOENT;
+       }
+       ent = found_ent;
+       m = (struct srat_mem *) ent->tbl;
+       address = readq(&m->spa_base);
+       length = readq(&m->spa_length);
+
+       *split = (struct split_context) {
+               .address = address,
+               .length = length,
+               .max_pxm = max_pxm,
+       };
+
+       length /= p->nodes;
+       writeq(length, &m->spa_length);
+       dbg("SRAT: edit: %#llx@%#llx pxm: %d\n", (unsigned long long) length,
+                       (unsigned long long) address, p->pxm);
+
+       address += length;
+
+       for (i = 0; i < p->nodes - 1; i++) {
+               struct srat_mem *srat_mem = calloc(1, sizeof(*srat_mem));
+
+               if (!srat_mem) {
+                       error("failed to alloc srat entry\n");
+                       return -ENOMEM;
+               }
+
+               ent = calloc(1, sizeof(*ent));
+               if (!ent) {
+                       error("failed to alloc srat entry\n");
+                       free(srat_mem);
+                       return -ENOMEM;
+               }
+
+               ent->tbl = (struct acpi_subtable8 *) srat_mem;
+                writeb(ACPI_SRAT_TYPE_MEMORY_AFFINITY, &srat_mem->type);
+                writeb(sizeof(*srat_mem), &srat_mem->length);
+                writel(max_pxm + 1 + i, &srat_mem->proximity_domain);
+                writeq(address, &srat_mem->spa_base);
+                writeq(length, &srat_mem->spa_length);
+               srat_mem->flags = m->flags;
+               dbg("SRAT:  add: %#llx@%#llx pxm: %d\n",
+                               (unsigned long long) length,
+                               (unsigned long long) address, max_pxm + 1 + i);
+
+               address += length;
+               list_add_tail(&mems, &ent->list);
+       }
+
+       rc = create_srat(p, srat, &mems);
+       free_srat_container(srat);
+       if (rc < 0)
+               return rc;
+       return max_pxm;
+}
+
+static int split_slit(struct parameters *p, struct split_context *split)
+{
+       unsigned int oem_revision;
+       int max_pxm = split->max_pxm;
+       int nodes = max_pxm + p->nodes;
+       struct slit *slit, *slit_old;
+       int old_nodes, rc, i, j;
+       size_t size;
+
+       size = sizeof(*slit) + nodes * nodes;
+       slit = calloc(1, size);
+       if (!slit) {
+               error("failed to allocated %zd bytes\n", size);
+               return -ENOMEM;
+       }
+
+       slit_old = read_table(p->in_fd[ACPI_SLIT], "SLIT");
+       if (!slit_old) {
+               error("failed to read SLIT\n");
+               free(slit);
+               return -ENOMEM;
+       }
+
+       *slit = *slit_old;
+       old_nodes = readq(&slit_old->count);
+       writeq(nodes, &slit->count);
+       writel(size, &slit->h.length);
+       oem_revision = readl(&slit->h.oem_revision);
+       writel(oem_revision + 1, &slit->h.oem_revision);
+       for (i = 0; i < nodes; i++)
+               for (j = 0; j < nodes; j++) {
+                       u8 val = 10;
+
+                       if (i > max_pxm && j > max_pxm)
+                               val = 10;
+                       else if (i <= max_pxm && j <= max_pxm)
+                               val = slit_old->entry[i * old_nodes + j];
+                       else if (i > max_pxm)
+                               val = slit_old->entry[p->pxm * old_nodes + j];
+                       else if (j > max_pxm)
+                               val = slit_old->entry[i * old_nodes + p->pxm];
+
+                       /*
+                        * Linux requires distance 10 for the i == j
+                        * case and rejects distance 10 rejects the SLIT
+                        * if 10 is found anywhere else. Fixup val per
+                        * these constraints.
+                        */
+                       if (val == 10 && i != j)
+                               val = 11;
+
+                       slit->entry[i * nodes + j] = val;
+               }
+       writeb(acpi_checksum(slit, size), &slit->h.checksum);
+
+       rc = write(p->out_fd[ACPI_SLIT], slit, size);
+       free(slit);
+       free(slit_old);
+       return rc;
+}
+
+static int split_nfit_map(struct parameters *p, struct nfit_map *map,
+               struct list_head *maps, struct split_context *split)
+{
+       int rc, i, max_region_id = split->max_region_id,
+           max_range_index = split->max_range_index;
+       uint64_t region_offset, region_size;
+       struct nfit_ent *ent, *_ent;
+
+       region_offset = readq(&map->region_offset);
+       region_size = readq(&map->region_size);
+       region_size /= p->nodes;
+       writeq(region_size, &map->region_size);
+       dbg("NFIT: edit: %#llx@%#llx region_id: %d\n",
+                       (unsigned long long) region_size,
+                       (unsigned long long) region_offset,
+                       readw(&map->region_id));
+       region_offset += region_size;
+
+       for (i = 0; i < p->nodes - 1; i++) {
+               struct nfit_map *nfit_map = calloc(1, sizeof(*nfit_map));
+
+               if (!nfit_map) {
+                       error("failed to alloc nfit entry\n");
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               ent = calloc(1, sizeof(*ent));
+               if (!ent) {
+                       error("failed to alloc nfit entry\n");
+                       free(nfit_map);
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               ent->tbl = (struct acpi_subtable16 *) nfit_map;
+               *nfit_map = *map;
+               writew(max_region_id + 1 + i, &nfit_map->region_id);
+               writew(max_range_index + 1 + i, &nfit_map->range_index);
+               writeq(region_size, &nfit_map->region_size);
+               writeq(region_offset, &nfit_map->region_offset);
+
+               dbg("NFIT:  add: %#llx@%#llx region_id: %d\n",
+                               (unsigned long long) region_size,
+                               (unsigned long long) region_offset,
+                               max_region_id + 1 + i);
+
+               region_offset += region_size;
+               list_add_tail(maps, &ent->list);
+       }
+
+       if (i < p->nodes - 1)
+               list_for_each_safe(maps, ent, _ent, list) {
+                       list_del(&ent->list);
+                       free(ent->tbl);
+                       free(ent);
+                       return rc;
+               }
+
+       split->max_region_id = max_region_id + i;
+       return 0;
+}
+
+static int split_nfit(struct parameters *p, struct split_context *split)
+{
+       int count = 0, max_pxm = split->max_pxm, i, rc, max_range_index = 0,
+           max_region_id = 0;
+       struct nfit_container *nfit = read_nfit(p->in_fd[ACPI_NFIT]);
+       struct nfit_ent *ent, *_ent, *found_ent = NULL;
+       uint64_t length, address;
+       struct nfit_spa *spa;
+       struct nfit_map *map;
+       LIST_HEAD(new_maps);
+       LIST_HEAD(mems);
+       LIST_HEAD(maps);
+
+       list_for_each(&nfit->ents, ent, list) {
+               int pxm, type, range_index, region_id;
+
+               type = readw(&ent->tbl->type);
+               if (type == ACPI_NFIT_TYPE_MEMORY_MAP) {
+                       map = (struct nfit_map *) ent->tbl;
+                       region_id = readw(&map->region_id);
+                       max_region_id = max(max_region_id, region_id);
+                       continue;
+               }
+
+               if (type != ACPI_NFIT_TYPE_SYSTEM_ADDRESS)
+                       continue;
+
+               spa = (struct nfit_spa *) ent->tbl;
+               range_index = readw(&spa->range_index);
+               max_range_index = max(range_index, max_range_index);
+
+               if (memcmp(&spa->type_uuid, &uuid_pmem, sizeof(uuid_pmem)) != 0)
+                       continue;
+
+               pxm = readl(&spa->proximity_domain);
+               if (pxm != p->pxm)
+                       continue;
+
+               if (split->address != readq(&spa->spa_base))
+                       continue;
+
+               if (split->length != readq(&spa->spa_length))
+                       continue;
+
+               found_ent = ent;
+               count++;
+
+               if (count > 1) {
+                       error("NFIT: no support for splitting multiple entry 
proximity domains\n");
+                       return -ENXIO;
+               }
+       }
+
+       if (!found_ent) {
+               dbg("NFIT: proximity domain to split not found\n");
+               free_nfit_container(nfit);
+               return -ENOENT;
+       }
+       ent = found_ent;
+       spa = (struct nfit_spa *) ent->tbl;
+       address = readq(&spa->spa_base);
+       length = readq(&spa->spa_length) / p->nodes;
+       writeq(length, &spa->spa_length);
+       dbg("NFIT: edit: %#llx@%#llx pxm: %d\n", (unsigned long long) length,
+                       (unsigned long long) address, p->pxm);
+       address += length;
+
+       for (i = 0; i < p->nodes - 1; i++) {
+               struct nfit_spa *nfit_spa = calloc(1, sizeof(*nfit_spa));
+
+               if (!nfit_spa) {
+                       error("failed to alloc nfit entry\n");
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               ent = calloc(1, sizeof(*ent));
+               if (!ent) {
+                       error("failed to alloc nfit entry\n");
+                       free(nfit_spa);
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               ent->tbl = (struct acpi_subtable16 *) nfit_spa;
+               *nfit_spa = *spa;
+               writew(max_range_index + i + 1, &nfit_spa->range_index);
+                writel(max_pxm + 1 + i, &nfit_spa->proximity_domain);
+               writeq(address, &nfit_spa->spa_base);
+               writeq(length, &nfit_spa->spa_length);
+
+               dbg("NFIT:  add: %#llx@%#llx pxm: %d\n",
+                               (unsigned long long) length,
+                               (unsigned long long) address, max_pxm + 1 + i);
+
+               address += length;
+               list_add_tail(&mems, &ent->list);
+       }
+
+       if (i < p->nodes - 1)
+               list_for_each_safe(&mems, ent, _ent, list) {
+                       list_del(&ent->list);
+                       free(ent->tbl);
+                       free(ent);
+                       return rc;
+               }
+
+       /*
+        * Find and split the maps that might be referring to split
+        * address range.
+        */
+       split->max_region_id = max_region_id;
+       split->max_range_index = max_range_index;
+       list_for_each_safe(&nfit->ents, ent, _ent, list) {
+               unsigned int type;
+
+               type = readw(&ent->tbl->type);
+               if (type != ACPI_NFIT_TYPE_MEMORY_MAP)
+                       continue;
+               map = (struct nfit_map *) ent->tbl;
+               if (map->range_index != spa->range_index)
+                       continue;
+               list_del_from(&nfit->ents, &ent->list);
+               list_add_tail(&maps, &ent->list);
+       }
+
+       list_for_each(&maps, ent, list) {
+               map = (struct nfit_map *) ent->tbl;
+               rc = split_nfit_map(p, map, &new_maps, split);
+               if (rc)
+                       return rc;
+       }
+
+       list_append_list(&maps, &new_maps);
+       list_append_list(&mems, &maps);
+
+       rc = create_nfit(p, nfit, &mems);
+       free_nfit_container(nfit);
+       if (rc < 0)
+               return rc;
+       return max_pxm;
+}
+
+static int do_split(struct parameters *p)
+{
+       struct split_context split;
+       int rc = split_srat(p, &split);
+
+       if (rc < 0)
+               return rc;
+       fprintf(stderr, "created: %s\n", p->new_table[ACPI_SRAT]);
+
+       rc = split_slit(p, &split);
+       if (rc < 0)
+               return rc;
+       fprintf(stderr, "created: %s\n", p->new_table[ACPI_SLIT]);
+
+       rc = split_nfit(p, &split);
+       if (rc == -ENOENT) {
+               unlink(p->new_table[ACPI_NFIT]);
+               return 0;
+       }
+
+       if (rc < 0)
+               return rc;
+
+       fprintf(stderr, "created: %s\n", p->new_table[ACPI_NFIT]);
+       return 0;
+}
+
+int cmd_split_acpi(int argc, const char **argv, void *ctx)
+{
+       int i, rc = 0;
+       const char * const u[] = {
+               "daxctl split-acpi <options>",
+               NULL
+       };
+       const struct option options[] = {
+       OPT_STRING('d', "directory", &param.path, "path",
+                       "Path to ACPI tables dumped by \"acpixtract -a\""),
+       OPT_INTEGER('p', "pxm", &param.pxm,
+                       "Proximity domain to split"),
+       OPT_INTEGER('n', "nodes", &param.nodes,
+                       "Number of nodes to split capacity (default 2)"),
+       OPT_BOOLEAN('v', "verbose", &verbose, "Enable verbose output"),
+       OPT_END(),
+       };
+
+        argc = parse_options(argc, argv, options, u, 0);
+
+       for (i = 0; i < argc; i++) {
+               error("unknown parameter \"%s\"\n", argv[i]);
+               rc = -EINVAL;
+       }
+
+       if (param.nodes < 2) {
+               error("--nodes=%d, must be greater than 2\n", param.nodes);
+               rc = -EINVAL;
+       }
+
+       if (!is_power_of_2(param.nodes)) {
+               error("--nodes=%d, must be power of 2\n", param.nodes);
+               rc = -EINVAL;
+       }
+
+       if (rc)
+               usage_with_options(u, options);
+
+       for (i = 0; i < ACPI_TABLES; i++) {
+               rc = asprintf(&param.table[i], "%s/%s.dat", param.path
+                               ? param.path : ".", acpi_table_name(i));
+               if (rc < 0) {
+                       error("failed to allocate path for %s\n",
+                                       acpi_table_name(i));
+                       break;
+               }
+
+               rc = open(param.table[i], O_RDONLY);
+               if (rc < 0 && i > ACPI_SLIT) {
+                       error("failed to open required %s\n", param.table[i]);
+                       break;
+               }
+
+               if (rc < 0)
+                       continue;
+               param.in_fd[i] = rc;
+
+               rc = asprintf(&param.new_table[i], "%s/%s.dat.new", param.path
+                               ? param.path : ".", acpi_table_name(i));
+               if (rc < 0) {
+                       error("failed to allocate path for %s.new\n",
+                                       acpi_table_name(i));
+                       break;
+               }
+
+               rc = open(param.new_table[i], O_RDWR | O_TRUNC | O_CREAT, 0640);
+               if (rc < 0 && i <= ACPI_SLIT) {
+                       error("failed to open %s\n", param.new_table[i]);
+                       break;
+               }
+               param.out_fd[i] = rc;
+       }
+
+       if (rc < 0) {
+               rc = EXIT_FAILURE;
+               goto out;
+       }
+
+       rc = do_split(&param);
+out:
+       for (i = 0; i < ACPI_TABLES; i++) {
+               free(param.table[i]);
+               free(param.new_table[i]);
+               if (param.in_fd[i] > 0)
+                       close(param.in_fd[i]);
+               if (param.out_fd[i] > 0)
+                       close(param.out_fd[i]);
+       }
+       return rc;
+}
diff --git a/daxctl/builtin.h b/daxctl/builtin.h
index f5a0147f0e11..cbee984971cf 100644
--- a/daxctl/builtin.h
+++ b/daxctl/builtin.h
@@ -9,4 +9,5 @@ int cmd_migrate(int argc, const char **argv, struct daxctl_ctx 
*ctx);
 int cmd_reconfig_device(int argc, const char **argv, struct daxctl_ctx *ctx);
 int cmd_online_memory(int argc, const char **argv, struct daxctl_ctx *ctx);
 int cmd_offline_memory(int argc, const char **argv, struct daxctl_ctx *ctx);
+int cmd_split_acpi(int argc, const char **argv, struct daxctl_ctx *ctx);
 #endif /* _DAXCTL_BUILTIN_H_ */
diff --git a/daxctl/daxctl.c b/daxctl/daxctl.c
index 1ab073200313..ec366dd71707 100644
--- a/daxctl/daxctl.c
+++ b/daxctl/daxctl.c
@@ -70,6 +70,7 @@ static struct cmd_struct commands[] = {
        { "version", .d_fn = cmd_version },
        { "list", .d_fn = cmd_list },
        { "help", .d_fn = cmd_help },
+       { "split-acpi", .d_fn = cmd_split_acpi, },
        { "migrate-device-model", .d_fn = cmd_migrate },
        { "reconfigure-device", .d_fn = cmd_reconfig_device },
        { "online-memory", .d_fn = cmd_online_memory },
_______________________________________________
Linux-nvdimm mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to