[PATCH v7 04/12] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2024-04-18 Thread nifan . cxl
From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions.
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support like below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region > /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5d6d3ab87d..5ceed0ab4c 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,7 @@
 #include "hw/pci/msix.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -567,6 +568,46 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+/*
+ * TODO: dc region configuration will be updated once host backend and address
+ * space support is added for DCD.
+ */
+static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
+{
+int i;
+uint64_t region_base = 0;
+uint64_t region_len =  2 * GiB;
+uint64_t decode_len = 2 * GiB;
+uint64_t blk_size = 2 * MiB;
+CXLDCRegion *region;
+MemoryRegion *mr;
+
+if (ct3d->hostvmem) {
+mr = host_memory_backend_get_memory(ct3d->hostvmem);
+region_base += memory_region_size(mr);
+}
+if (ct3d->hostpmem) {
+mr = host_memory_backend_get_memory(ct3d->hostpmem);
+region_base += memory_region_size(mr);
+}
+assert(region_base % CXL_CAPACITY_MULTIPLIER == 0);
+
+for (i = 0, region = >dc.regions[0];
+ i < ct3d->dc.num_regions;
+ i++, region++, region_base += region_len) {
+*region = (CXLDCRegion) {
+.base = region_base,
+.decode_len = decode_len,
+.len = region_len,
+.block_size = blk_size,
+/* dsmad_handle set when creating CDAT table entries */
+.flags = 0,
+};
+}
+
+return true;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -635,6 +676,13 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (ct3d->dc.num_regions > 0) {
+if (!cxl_create_dc_regions(ct3d, errp)) {
+error_setg(errp, "setup DC regions failed");
+return false;
+}
+}
+
 return true;
 }
 
@@ -931,6 +979,7 @@ static Property ct3_props[] = {
  HostMemoryBackend *),
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.43.0




[PATCH v7 10/12] hw/mem/cxl_type3: Add DPA range validation for accesses to DC regions

2024-04-18 Thread nifan . cxl
From: Fan Ni 

All DPA ranges in the DC regions are invalid to access until an extent
covering the range has been successfully accepted by the host. A bitmap
is added to each region to record whether a DC block in the region has
been backed by a DC extent. Each bit in the bitmap represents a DC block.
When a DC extent is accepted, all the bits representing the blocks in the
extent are set, which will be cleared when the extent is released.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  3 ++
 hw/mem/cxl_type3.c  | 76 +
 include/hw/cxl/cxl_device.h |  7 
 3 files changed, 86 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 3569902e9e..57f1ce9cce 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1655,6 +1655,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 }
 /* Remove the first extent group in the pending list*/
 cxl_extent_group_list_delete_front(>dc.extents_pending);
@@ -1813,10 +1814,12 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
  * list and update the extent count;
  */
 QTAILQ_FOREACH_SAFE(ent, >dc.extents, node, ent_next) {
+ct3_clear_region_block_backed(ct3d, ent->start_dpa, ent->len);
 cxl_remove_extent_from_extent_list(>dc.extents, ent);
 }
 copy_extent_list(>dc.extents, _list);
 QTAILQ_FOREACH_SAFE(ent, _list, node, ent_next) {
+ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len);
 cxl_remove_extent_from_extent_list(_list, ent);
 }
 ct3d->dc.total_extent_count = updated_list_size;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index e892b3de7b..a3e1a5de25 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -665,6 +665,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 .flags = 0,
 };
 ct3d->dc.total_capacity += region->len;
+region->blk_bitmap = bitmap_new(region->len / region->block_size);
 }
 QTAILQ_INIT(>dc.extents);
 QTAILQ_INIT(>dc.extents_pending);
@@ -676,6 +677,8 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 {
 CXLDCExtent *ent, *ent_next;
 CXLDCExtentGroup *group, *group_next;
+int i;
+CXLDCRegion *region;
 
 QTAILQ_FOREACH_SAFE(ent, >dc.extents, node, ent_next) {
 cxl_remove_extent_from_extent_list(>dc.extents, ent);
@@ -688,6 +691,11 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 }
 g_free(group);
 }
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+g_free(region->blk_bitmap);
+}
 }
 
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
@@ -920,6 +928,70 @@ static void ct3_exit(PCIDevice *pci_dev)
 }
 }
 
+/*
+ * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
+ * happens when a DC extent is added and accepted by the host.
+ */
+void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len)
+{
+CXLDCRegion *region;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return;
+}
+
+bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+   len / region->block_size);
+}
+
+/*
+ * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
+ * Used when validating read/write to dc regions
+ */
+bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+  uint64_t len)
+{
+CXLDCRegion *region;
+uint64_t nbits;
+long nr;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return false;
+}
+
+nr = (dpa - region->base) / region->block_size;
+nbits = DIV_ROUND_UP(len, region->block_size);
+/*
+ * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
+ * backed with DC extents, return true; else return false.
+ */
+return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + 
nbits;
+}
+
+/*
+ * Mark the DPA range [dpa, dap + len - 1] to be unbacked and inaccessible.
+ * This happens when a dc extent is released by the host.
+ */
+void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+   uint64_t len)
+{
+CXLDCRegion *region;
+uint64_t nbits;
+long nr;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return;
+}
+
+nr = (dpa - region->base) / region->block_size;
+nbits = len / region->block_size;
+bitmap_clear(region->blk_bitmap, nr, nbits);
+}
+
 static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, 

[PATCH v7 05/12] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size instead of mr as argument

2024-04-18 Thread nifan . cxl
From: Fan Ni 

The function ct3_build_cdat_entries_for_mr only uses size of the passed
memory region argument, refactor the function definition to make the passed
arguments more specific.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5ceed0ab4c..a1fe268560 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -44,7 +44,7 @@ enum {
 };
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-  int dsmad_handle, MemoryRegion *mr,
+  int dsmad_handle, uint64_t size,
   bool is_pmem, uint64_t dpa_base)
 {
 CDATDsmas *dsmas;
@@ -63,7 +63,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .DSMADhandle = dsmad_handle,
 .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -132,7 +132,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  */
 .EFI_memory_type_attr = is_pmem ? 2 : 1,
 .DPA_offset = 0,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* Header always at start of structure */
@@ -149,6 +149,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
@@ -163,6 +164,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+vmr_size = memory_region_size(volatile_mr);
 }
 
 if (ct3d->hostpmem) {
@@ -171,21 +173,22 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+pmr_size = memory_region_size(nonvolatile_mr);
 }
 
 table = g_malloc0(len * sizeof(*table));
 
 /* Now fill them in */
 if (volatile_mr) {
-ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
+ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
   false, 0);
 cur_ent = CT3_CDAT_NUM_ENTRIES;
 }
 
 if (nonvolatile_mr) {
-uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0;
+uint64_t base = vmr_size;
 ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-  nonvolatile_mr, true, base);
+  pmr_size, true, base);
 cur_ent += CT3_CDAT_NUM_ENTRIES;
 }
 assert(len == cur_ent);
-- 
2.43.0




[PATCH v7 11/12] hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support

2024-04-18 Thread nifan . cxl
From: Fan Ni 

With the change, we extend the extent release mailbox command processing
to allow more flexible release. As long as the DPA range of the extent to
release is covered by accepted extent(s) in the device, the release can be
performed.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 21 -
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 57f1ce9cce..89f0ab8116 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1704,6 +1704,13 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 dpa = in->updated_entries[i].start_dpa;
 len = in->updated_entries[i].len;
 
+/* Check if the DPA range is not fully backed with valid extents */
+if (!ct3_test_region_block_backed(ct3d, dpa, len)) {
+ret = CXL_MBOX_INVALID_PA;
+goto free_and_exit;
+}
+
+/* After this point, extent overflow is the only error can happen */
 while (len > 0) {
 QTAILQ_FOREACH(ent, updated_list, node) {
 range_init_nofail(, ent->start_dpa, ent->len);
@@ -1718,14 +1725,7 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 if (range_contains(, dpa + len - 1)) {
 len2 = ent_start_dpa + ent_len - dpa - len;
 } else {
-/*
- * TODO: we reject the attempt to remove an extent
- * that overlaps with multiple extents in the device
- * for now. We will allow it once superset release
- * support is added.
- */
-ret = CXL_MBOX_INVALID_PA;
-goto free_and_exit;
+dpa = ent_start_dpa + ent_len;
 }
 len_done = ent_len - len1 - len2;
 
@@ -1752,14 +1752,9 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 }
 
 len -= len_done;
-/* len == 0 here until superset release is added */
 break;
 }
 }
-if (len) {
-ret = CXL_MBOX_INVALID_PA;
-goto free_and_exit;
-}
 }
 }
 free_and_exit:
-- 
2.43.0




[PATCH v7 12/12] hw/mem/cxl_type3: Allow to release extent superset in QMP interface

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Before the change, the QMP interface used for add/release DC extents
only allows to release an extent whose DPA range is contained by a single
accepted extent in the device.

With the change, we relax the constraints.  As long as the DPA range of
the extent is covered by accepted extents, we allow the release.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a3e1a5de25..9e725647f1 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1941,7 +1941,7 @@ static void 
qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
"cannot release extent with pending DPA range");
 return;
 }
-if (!cxl_extents_contains_dpa_range(>dc.extents, dpa, len)) {
+if (!ct3_test_region_block_backed(dcd, dpa, len)) {
 error_setg(errp,
"cannot release extent with non-existing DPA 
range");
 return;
-- 
2.43.0




[PATCH v7 07/12] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and implement get DC extent list mailbox command per
CXL.spec.3.1:.8.2.9.9.9.2.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 73 -
 hw/mem/cxl_type3.c  |  1 +
 include/hw/cxl/cxl_device.h | 22 +++
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 831cef0567..1915959015 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1322,7 +1323,8 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct 
cxl_cmd *cmd,
  * to use.
  */
 stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
-stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
+ ct3d->dc.total_extent_count);
 stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
 stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
 
@@ -1330,6 +1332,72 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.2:
+ * Get Dynamic Capacity Extent List (Opcode 4801h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
+   uint8_t *payload_in,
+   size_t len_in,
+   uint8_t *payload_out,
+   size_t *len_out,
+   CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtentRaw records[];
+} QEMU_PACKED *out = (void *)payload_out;
+uint32_t start_extent_id = in->start_extent_id;
+CXLDCExtentList *extent_list = >dc.extents;
+uint16_t record_count = 0, i = 0, record_done = 0;
+uint16_t out_pl_len, size;
+CXLDCExtent *ent;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+   ct3d->dc.total_extent_count - start_extent_id);
+size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out);
+record_count = MIN(record_count, size / sizeof(out->records[0]));
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+CXLDCExtentRaw *out_rec = >records[record_done];
+
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(_rec->start_dpa, ent->start_dpa);
+stq_le_p(_rec->len, ent->len);
+memcpy(_rec->tag, ent->tag, 0x10);
+stw_le_p(_rec->shared_seq, ent->shared_seq);
+
+record_done++;
+if (record_done == record_count) {
+break;
+}
+}
+}
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1377,6 +1445,9 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
 [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
 cmd_dcd_get_dyn_cap_config, 2, 0 },
+[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+8, 0 },
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index ac87398089..9fffeae613 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -666,6 +666,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 };
 ct3d->dc.total_capacity += region->len;
 }
+QTAILQ_INIT(>dc.extents);
 
 return true;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index c2c3df0d2a..6aec6ac983 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -424,6 +424,25 @@ typedef QLIST_HEAD(, 

[PATCH v7 08/12] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.

For the process of the above two commands, we use two-pass approach.
Pass 1: Check whether the input payload is valid or not; if not, skip
Pass 2 and return mailbox process error.
Pass 2: Do the real work--add or release extents, respectively.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 394 
 hw/mem/cxl_type3.c  |  11 +
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 409 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 1915959015..9d54e10cd4 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -19,6 +19,7 @@
 #include "qemu/units.h"
 #include "qemu/uuid.h"
 #include "sysemu/hostmem.h"
+#include "qemu/range.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
@@ -85,6 +86,8 @@ enum {
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1398,6 +1401,391 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+  unsigned long size)
+{
+unsigned long res = find_next_bit(addr, size + nr, nr);
+
+return res < nr + size;
+}
+
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+int i;
+CXLDCRegion *region = >dc.regions[0];
+
+if (dpa < region->base ||
+dpa >= region->base + ct3d->dc.total_capacity) {
+return NULL;
+}
+
+/*
+ * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents 
that
+ * cross multiple regions are not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+if (dpa + len > region->base + region->len) {
+return NULL;
+}
+return region;
+}
+}
+
+return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+CXLDCExtent *extent;
+
+extent = g_new0(CXLDCExtent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+CXLDCExtent *extent)
+{
+QTAILQ_REMOVE(list, extent, node);
+g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+uint32_t num_entries_updated;
+uint8_t flags;
+uint8_t rsvd[3];
+/* CXL r3.1 Table 8-169: Updated Extent */
+struct {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+const CXLUpdateDCExtentListInPl *in)
+{
+uint64_t min_block_size = UINT64_MAX;
+CXLDCRegion *region;
+CXLDCRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
+g_autofree unsigned long *blk_bitmap = NULL;
+uint64_t dpa, len;
+uint32_t i;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+min_block_size = MIN(min_block_size, region->block_size);
+}
+
+blk_bitmap = 

[PATCH v7 03/12] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 4 ++--
 hw/mem/cxl_type3.c  | 8 
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 49c7944d93..0f2ad58a14 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -803,7 +803,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
 stq_le_p(>total_capacity,
- cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+ cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity,
  cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity,
@@ -1179,7 +1179,7 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b0a7e9f11b..5d6d3ab87d 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -608,7 +608,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -631,7 +631,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -838,7 +838,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1011,7 +1011,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index e839370266..f7f56b44e3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -234,7 +234,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 
-- 
2.43.0




[PATCH v7 06/12] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Add (file/memory backed) host backend for DCD. All the dynamic capacity
regions will share a single, large enough host backend. Set up address
space for DC regions to support read/write operations to dynamic capacity
for DCD.

With the change, the following support is added:
1. Add a new property to type3 device "volatile-dc-memdev" to point to host
   memory backend for dynamic capacity. Currently, all DC regions share one
   host backend;
2. Add namespace for dynamic capacity for read/write support;
3. Create cdat entries for each dynamic capacity region.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  16 ++--
 hw/mem/cxl_type3.c  | 172 +---
 include/hw/cxl/cxl_device.h |   8 ++
 3 files changed, 160 insertions(+), 36 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 0f2ad58a14..831cef0567 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -622,7 +622,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
size_t *len_out,
CXLCCI *cci)
 {
-CXLDeviceState *cxl_dstate = _TYPE3(cci->d)->cxl_dstate;
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+CXLDeviceState *cxl_dstate = >cxl_dstate;
 struct {
 uint8_t slots_supported;
 uint8_t slot_info;
@@ -636,7 +637,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -793,7 +795,8 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 CXLDeviceState *cxl_dstate = >cxl_dstate;
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -835,9 +838,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)payload_out;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -1179,7 +1184,8 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
+ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a1fe268560..ac87398089 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -45,7 +45,8 @@ enum {
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
   int dsmad_handle, uint64_t size,
-  bool is_pmem, uint64_t dpa_base)
+  bool is_pmem, bool is_dynamic,
+  uint64_t dpa_base)
 {
 CDATDsmas *dsmas;
 CDATDslbis *dslbis0;
@@ -61,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },
 .DSMADhandle = dsmad_handle,
-.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
+ (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
 .DPA_base = dpa_base,
 .DPA_length = size,
 };
@@ -149,12 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+MemoryRegion *dc_mr = NULL;
 uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
 
-if (!ct3d->hostpmem && !ct3d->hostvmem) {
+if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
 return 0;
 }
 
@@ -176,21 

[PATCH v7 09/12] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-04-18 Thread nifan . cxl
From: Fan Ni 

To simulate FM functionalities for initiating Dynamic Capacity Add
(Opcode 5604h) and Dynamic Capacity Release (Opcode 5605h) as in CXL spec
r3.1 7.6.7.6.5 and 7.6.7.6.6, we implemented two QMP interfaces to issue
add/release dynamic capacity extents requests.

With the change, we allow to release an extent only when its DPA range
is contained by a single accepted extent in the device. That is to say,
extent superset release is not supported yet.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "hid": 0,
  "selection-policy": 2,
  "region-id": 0,
  "tag": "",
  "extents": [
  {
  "offset": 0,
  "len": 134217728
  },
  {
  "offset": 134217728,
  "len": 134217728
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) looks like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "hid": 0,
  "flags": 1,
  "region-id": 0,
  "tag": "",
  "extents": [
  {
  "offset": 134217728,
  "len": 134217728
  }
  ]
  }
}

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  62 +--
 hw/mem/cxl_type3.c  | 311 +++-
 hw/mem/cxl_type3_stubs.c|  20 +++
 include/hw/cxl/cxl_device.h |  22 +++
 include/hw/cxl/cxl_events.h |  18 +++
 qapi/cxl.json   |  69 
 6 files changed, 489 insertions(+), 13 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 9d54e10cd4..3569902e9e 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1405,7 +1405,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
  * Check whether any bit between addr[nr, nr+size) is set,
  * return true if any bit is set, otherwise return false
  */
-static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
   unsigned long size)
 {
 unsigned long res = find_next_bit(addr, size + nr, nr);
@@ -1444,7 +1444,7 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, 
uint64_t dpa, uint64_t len)
 return NULL;
 }
 
-static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
  uint64_t dpa,
  uint64_t len,
  uint8_t *tag,
@@ -1470,6 +1470,44 @@ void cxl_remove_extent_from_extent_list(CXLDCExtentList 
*list,
 g_free(extent);
 }
 
+/*
+ * Add a new extent to the extent "group" if group exists;
+ * otherwise, create a new group
+ * Return value: return the group where the extent is inserted.
+ */
+CXLDCExtentGroup *cxl_insert_extent_to_extent_group(CXLDCExtentGroup *group,
+uint64_t dpa,
+uint64_t len,
+uint8_t *tag,
+uint16_t shared_seq)
+{
+if (!group) {
+group = g_new0(CXLDCExtentGroup, 1);
+QTAILQ_INIT(>list);
+}
+cxl_insert_extent_to_extent_list(>list, dpa, len,
+ tag, shared_seq);
+return group;
+}
+
+void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list,
+   CXLDCExtentGroup *group)
+{
+QTAILQ_INSERT_TAIL(list, group, node);
+}
+
+void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list)
+{
+CXLDCExtent *ent, *ent_next;
+CXLDCExtentGroup *group = QTAILQ_FIRST(list);
+
+QTAILQ_REMOVE(list, group, node);
+QTAILQ_FOREACH_SAFE(ent, >list, node, ent_next) {
+cxl_remove_extent_from_extent_list(>list, ent);
+}
+g_free(group);
+}
+
 /*
  * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
  * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
@@ -1541,6 +1579,7 @@ static CXLRetCode 
cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d,
 {
 uint32_t i;
 CXLDCExtent *ent;
+CXLDCExtentGroup *ext_group;
 uint64_t dpa, len;
 Range range1, range2;
 
@@ -1551,9 +1590,13 @@ static CXLRetCode 
cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d,
 range_init_nofail(, dpa, len);
 
 /*
- * TODO: once the pending extent list is added, check against
- * the list will be added here.
+ * The host-accepted DPA range must be contained by 

[PATCH v7 02/12] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Per cxl spec r3.1, add dynamic capacity region representative based on
Table 8-165 and extend the cxl type3 device definition to include DC region
information. Also, based on info in 8.2.9.9.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Note: we store region decode length as byte-wise length on the device, which
should be divided by 256 * MiB before being returned to the host
for "Get Dynamic Capacity Configuration" mailbox command per
specification.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 96 +
 include/hw/cxl/cxl_device.h | 16 +++
 2 files changed, 112 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index ba1d9901df..49c7944d93 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -22,6 +22,8 @@
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
+#define CXL_NUM_EXTENTS_SUPPORTED 512
+#define CXL_NUM_TAGS_SUPPORTED 0
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -80,6 +82,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48,
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1238,6 +1242,88 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
+ * (Opcode: 4800h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint8_t region_cnt;
+uint8_t start_rid;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint8_t num_regions;
+uint8_t regions_returned;
+uint8_t rsvd1[6];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+} QEMU_PACKED *out = (void *)payload_out;
+struct {
+uint32_t num_extents_supported;
+uint32_t num_extents_available;
+uint32_t num_tags_supported;
+uint32_t num_tags_available;
+} QEMU_PACKED *extra_out;
+uint16_t record_count;
+uint16_t i;
+uint16_t out_pl_len;
+uint8_t start_rid;
+
+start_rid = in->start_rid;
+if (start_rid >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+extra_out = (void *)(payload_out + out_pl_len);
+out_pl_len += sizeof(*extra_out);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+out->num_regions = ct3d->dc.num_regions;
+out->regions_returned = record_count;
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ ct3d->dc.regions[start_rid + i].base);
+stq_le_p(>records[i].decode_len,
+ ct3d->dc.regions[start_rid + i].decode_len /
+ CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>records[i].region_len,
+ ct3d->dc.regions[start_rid + i].len);
+stq_le_p(>records[i].block_size,
+ ct3d->dc.regions[start_rid + i].block_size);
+stl_le_p(>records[i].dsmadhandle,
+ ct3d->dc.regions[start_rid + i].dsmadhandle);
+out->records[i].flags = ct3d->dc.regions[start_rid + i].flags;
+}
+/*
+ * TODO: Assign values once extents and tags are introduced
+ * to use.
+ */
+stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
+stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1282,6 +1368,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_clear_poison, 72, 0 },
 };
 
+static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
+[DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
+

[PATCH v7 01/12] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2024-04-18 Thread nifan . cxl
From: Fan Ni 

Based on CXL spec r3.1 Table 8-127 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 4bcd727f4c..ba1d9901df 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,7 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -780,8 +781,9 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 CXLDeviceState *cxl_dstate = >cxl_dstate;
@@ -807,6 +809,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len_out = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
2.43.0




[PATCH v7 00/12] Enabling DCD emulation support in Qemu

2024-04-18 Thread nifan . cxl
A git tree of this series can be found here (with one extra commit on top
for printing out accepted/pending extent list): 
https://github.com/moking/qemu/tree/dcd-v7

v6->v7:

1. Fixed the dvsec range register issue mentioned in the the cover letter in v6.
   Only relevant bits are set to mark the device ready (Patch 6). (Jonathan)
2. Moved the if statement in cxl_setup_memory from Patch 6 to Patch 4. 
(Jonathan)
3. Used MIN instead of if statement to get record_count in Patch 7. (Jonathan)
4. Added "Reviewed-by" tag to Patch 7.
5. Modified cxl_dc_extent_release_dry_run so the updated extent list can be
   reused in cmd_dcd_release_dyn_cap to simplify the process in Patch 8. 
(Jørgen) 
6. Added comments to indicate further "TODO" items in cmd_dcd_add_dyn_cap_rsp.
(Jonathan)
7. Avoided irrelevant code reformat in Patch 8. (Jonathan)
8. Modified QMP interfaces for adding/releasing DC extents to allow passing
   tags, selection policy, flags in the interface. (Jonathan, Gregory)
9. Redesigned the pending list so extents in the same requests are grouped
together. A new data structure is introduced to represent "extent group"
in pending list.  (Jonathan)
10. Added support in QMP interface for "More" flag. 
11. Check "Forced removal" flag for release request and not let it pass through.
12. Removed the dynamic capacity log type from CxlEventLog definition in 
cxl.json
   to avoid the side effect it may introduce to inject error to DC event log.
   (Jonathan)
13. Hard coded the event log type to dynamic capacity event log in QMP
interfaces. (Jonathan)
14. Adding space in between "-1]". (Jonathan)
15. Some minor comment fixes.

The code is tested with similar setup and has passed similar tests as listed
in the cover letter of v5[1] and v6[2].
Also, the code is tested with the latest DCD kernel patchset[3].

[1] Qemu DCD patchset v5: 
https://lore.kernel.org/linux-cxl/20240304194331.1586191-1-nifan@gmail.com/T/#t
[2] Qemu DCD patchset v6: 
https://lore.kernel.org/linux-cxl/20240325190339.696686-1-nifan@gmail.com/T/#t
[3] DCD kernel patches: 
https://lore.kernel.org/linux-cxl/20240324-dcd-type2-upstream-v1-0-b7b00d623...@intel.com/T/#m11c571e21c4fe17c7d04ec5c2c7bc7cbf2cd07e3


Fan Ni (12):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
devices
  hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr
size instead of mr as argument
  hw/mem/cxl_type3: Add host backend and address space handling for DC
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
extents
  hw/mem/cxl_type3: Add DPA range validation for accesses to DC regions
  hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support
  hw/mem/cxl_type3: Allow to release extent superset in QMP interface

 hw/cxl/cxl-mailbox-utils.c  | 620 ++-
 hw/mem/cxl_type3.c  | 633 +---
 hw/mem/cxl_type3_stubs.c|  20 ++
 include/hw/cxl/cxl_device.h |  81 -
 include/hw/cxl/cxl_events.h |  18 +
 qapi/cxl.json   |  69 
 6 files changed, 1396 insertions(+), 45 deletions(-)

-- 
2.43.0




[PATCH v6 00/12] Enabling DCD emulation support in Qemu

2024-03-25 Thread nifan . cxl
From: Fan Ni 

A git tree of his series can be found here (with one extra commit on top
for printing out accepted/pending extent list): 
https://github.com/moking/qemu/tree/dcd-v6

v5->v6:
1. Picked up tags;
2. Renamed start_region_id to start_rid; (Jonathan)
3. For get extent list mailbox command, add logic to adjust returned extent
   count based on output payload size; (Jonathan)
4. Use Range to detect extent comparison and overlaps; (Jonathan)
5. Renamed extents_pending_to_add to extents_pending; (Jonathan)
6. Updated the commit log of the qmp interface patch by renaming "dpa" to offset
to align with the code. (Gregory)
7. For DC extent add response and release mailbox command, we use a 2 pass
   approach. The first pass is to detect any potential errors, and the second
   pass to update the in-device data structure;
8. For QMP interface for add/release DC extents, use 2 pass approach with the
   first pass detecting any faulty input and second pass filling the event log.
   Note, based on sswg discussion, we disallow release extents which has DPA
   range not accepted by the host yet;
9. We enforce the in-order process of the pending list for DC extent release
   mailbox command, and the head of pending list is handled accordingly.
10. The last patch from v5 has been removed from this series.

Note: we do not drop the DC changes in build_dvsecs which was suggested
by Jonathan, the reason is that during testing, we found in the current
kernel code, when checking whether the media is ready
(in cxl_await_media_ready), the devsec range registers are checked, for
dcd device, if we leave dvsec range registers unset, the device cannot be
put into "ready" state, which will cause the device inactive.
The related code is below,
https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/tree/drivers/cxl/core/pci.c?h=fixes=d206a76d7d2726f3b096037f2079ce0bd3ba329b#n195

Compared to v5[1], PATCH 8-9 and PATCH 11-12 are almost re-coded, so need more
care when review.

The code is tested with similar setup and has passed similar tests as listed
in the cover letter of v5.
Also, the code passes similar tests with the latest DCD kernel patchset[2].

[1] Qemu DCD patches v5: 
https://lore.kernel.org/linux-cxl/20240304194331.1586191-1-nifan@gmail.com/T/#t
[2] DCD kernel patches: 
https://lore.kernel.org/linux-cxl/20240324-dcd-type2-upstream-v1-0-b7b00d623...@intel.com/T/#m11c571e21c4fe17c7d04ec5c2c7bc7cbf2cd07e3



Fan Ni (12):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
devices
  hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr
size instead of mr as argument
  hw/mem/cxl_type3: Add host backend and address space handling for DC
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
extents
  hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions
  hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support
  hw/mem/cxl_type3: Allow to release extent superset in QMP interface

 hw/cxl/cxl-mailbox-utils.c  | 644 +++-
 hw/mem/cxl_type3.c  | 580 +---
 hw/mem/cxl_type3_stubs.c|  14 +
 include/hw/cxl/cxl_device.h |  67 +++-
 include/hw/cxl/cxl_events.h |  18 +
 qapi/cxl.json   |  61 +++-
 6 files changed, 1334 insertions(+), 50 deletions(-)

-- 
2.43.0




[PATCH v6 07/12] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and implement get DC extent list mailbox command per
CXL.spec.3.1:.8.2.9.9.9.2.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 75 -
 hw/mem/cxl_type3.c  |  1 +
 include/hw/cxl/cxl_device.h | 22 +++
 3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 831cef0567..30ef46a036 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1322,7 +1323,8 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct 
cxl_cmd *cmd,
  * to use.
  */
 stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
-stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
+ ct3d->dc.total_extent_count);
 stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
 stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
 
@@ -1330,6 +1332,74 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.2:
+ * Get Dynamic Capacity Extent List (Opcode 4801h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
+   uint8_t *payload_in,
+   size_t len_in,
+   uint8_t *payload_out,
+   size_t *len_out,
+   CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtentRaw records[];
+} QEMU_PACKED *out = (void *)payload_out;
+uint32_t start_extent_id = in->start_extent_id;
+CXLDCExtentList *extent_list = >dc.extents;
+uint16_t record_count = 0, i = 0, record_done = 0;
+uint16_t out_pl_len, size;
+CXLDCExtent *ent;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+   ct3d->dc.total_extent_count - start_extent_id);
+size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out);
+if (size / sizeof(out->records[0]) < record_count) {
+record_count = size / sizeof(out->records[0]);
+}
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+CXLDCExtentRaw *out_rec = >records[record_done];
+
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(_rec->start_dpa, ent->start_dpa);
+stq_le_p(_rec->len, ent->len);
+memcpy(_rec->tag, ent->tag, 0x10);
+stw_le_p(_rec->shared_seq, ent->shared_seq);
+
+record_done++;
+if (record_done == record_count) {
+break;
+}
+}
+}
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1377,6 +1447,9 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
 [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
 cmd_dcd_get_dyn_cap_config, 2, 0 },
+[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+8, 0 },
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 75ea9b20e1..5be3c904ba 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -673,6 +673,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 };
 ct3d->dc.total_capacity += region->len;
 }
+QTAILQ_INIT(>dc.extents);
 
 return true;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index c2c3df0d2a..6aec6ac983 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -424,6 +424,25 @@ 

[PATCH v6 02/12] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Per cxl spec r3.1, add dynamic capacity region representative based on
Table 8-165 and extend the cxl type3 device definition to include dc region
information. Also, based on info in 8.2.9.9.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Note: we store region decode length as byte-wise length on the device, which
should be divided by 256 * MiB before being returned to the host
for "Get Dynamic Capacity Configuration" mailbox command per
specification.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 96 +
 include/hw/cxl/cxl_device.h | 16 +++
 2 files changed, 112 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index ba1d9901df..49c7944d93 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -22,6 +22,8 @@
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
+#define CXL_NUM_EXTENTS_SUPPORTED 512
+#define CXL_NUM_TAGS_SUPPORTED 0
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -80,6 +82,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48,
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1238,6 +1242,88 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
+ * (Opcode: 4800h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint8_t region_cnt;
+uint8_t start_rid;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint8_t num_regions;
+uint8_t regions_returned;
+uint8_t rsvd1[6];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+} QEMU_PACKED *out = (void *)payload_out;
+struct {
+uint32_t num_extents_supported;
+uint32_t num_extents_available;
+uint32_t num_tags_supported;
+uint32_t num_tags_available;
+} QEMU_PACKED *extra_out;
+uint16_t record_count;
+uint16_t i;
+uint16_t out_pl_len;
+uint8_t start_rid;
+
+start_rid = in->start_rid;
+if (start_rid >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+extra_out = (void *)(payload_out + out_pl_len);
+out_pl_len += sizeof(*extra_out);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+out->num_regions = ct3d->dc.num_regions;
+out->regions_returned = record_count;
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ ct3d->dc.regions[start_rid + i].base);
+stq_le_p(>records[i].decode_len,
+ ct3d->dc.regions[start_rid + i].decode_len /
+ CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>records[i].region_len,
+ ct3d->dc.regions[start_rid + i].len);
+stq_le_p(>records[i].block_size,
+ ct3d->dc.regions[start_rid + i].block_size);
+stl_le_p(>records[i].dsmadhandle,
+ ct3d->dc.regions[start_rid + i].dsmadhandle);
+out->records[i].flags = ct3d->dc.regions[start_rid + i].flags;
+}
+/*
+ * TODO: Assign values once extents and tags are introduced
+ * to use.
+ */
+stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
+stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1282,6 +1368,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_clear_poison, 72, 0 },
 };
 
+static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
+[DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
+

[PATCH v6 10/12] hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions

2024-03-25 Thread nifan . cxl
From: Fan Ni 

All dpa ranges in the DC regions are invalid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a DC block in the region has been backed by DC extent.
For the bitmap, a bit in the bitmap represents a DC block. When a DC
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  6 +++
 hw/mem/cxl_type3.c  | 76 +
 include/hw/cxl/cxl_device.h |  7 
 3 files changed, 89 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 7094e007b9..a0d2239176 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1620,6 +1620,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 
 ent = QTAILQ_FIRST(>dc.extents_pending);
 cxl_remove_extent_from_extent_list(>dc.extents_pending, ent);
@@ -1798,18 +1799,23 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
 
 cxl_remove_extent_from_extent_list(extent_list, ent);
 ct3d->dc.total_extent_count -= 1;
+ct3_clear_region_block_backed(ct3d, ent_start_dpa,
+  ent_len);
 
 if (len1) {
 cxl_insert_extent_to_extent_list(extent_list,
  ent_start_dpa,
  len1, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, ent_start_dpa,
+len1);
 }
 if (len2) {
 cxl_insert_extent_to_extent_list(extent_list,
  dpa + len,
  len2, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa + len, len2);
 }
 
 len -= len_done;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 74cb64e843..2628a6f50f 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -672,6 +672,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 .flags = 0,
 };
 ct3d->dc.total_capacity += region->len;
+region->blk_bitmap = bitmap_new(region->len / region->block_size);
 }
 QTAILQ_INIT(>dc.extents);
 QTAILQ_INIT(>dc.extents_pending);
@@ -682,6 +683,8 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 {
 CXLDCExtent *ent, *ent_next;
+int i;
+CXLDCRegion *region;
 
 QTAILQ_FOREACH_SAFE(ent, >dc.extents, node, ent_next) {
 cxl_remove_extent_from_extent_list(>dc.extents, ent);
@@ -690,6 +693,11 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 cxl_remove_extent_from_extent_list(>dc.extents_pending,
ent);
 }
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+g_free(region->blk_bitmap);
+}
 }
 
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
@@ -921,6 +929,70 @@ static void ct3_exit(PCIDevice *pci_dev)
 }
 }
 
+/*
+ * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
+ * happens when a DC extent is added and accepted by the host.
+ */
+void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len)
+{
+CXLDCRegion *region;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return;
+}
+
+bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+   len / region->block_size);
+}
+
+/*
+ * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
+ * Used when validating read/write to dc regions
+ */
+bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+  uint64_t len)
+{
+CXLDCRegion *region;
+uint64_t nbits;
+long nr;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return false;
+}
+
+nr = (dpa - region->base) / region->block_size;
+nbits = DIV_ROUND_UP(len, region->block_size);
+/*
+ * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
+ * backed with DC extents, return true; else return false.
+ */
+return 

[PATCH v6 12/12] hw/mem/cxl_type3: Allow to release extent superset in QMP interface

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Before the change, the QMP interface used for add/release DC extents
only allows to release an extent whose DPA range is contained by a single
accepted extent in the device.

With the change, we relax the constraints.  As long as the DPA range of
the extent is covered by accepted extents, we allow the release.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 2628a6f50f..62c2022477 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1935,8 +1935,7 @@ static void qmp_cxl_process_dynamic_capacity(const char 
*path, CxlEventLog log,
"cannot release extent with pending DPA range");
 return;
 }
-if (!cxl_extents_contains_dpa_range(>dc.extents,
-dpa, len)) {
+if (!ct3_test_region_block_backed(dcd, dpa, len)) {
 error_setg(errp,
"cannot release extent with non-existing DPA 
range");
 return;
-- 
2.43.0




[PATCH v6 11/12] hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support

2024-03-25 Thread nifan . cxl
From: Fan Ni 

With the change, we extend the extent release mailbox command processing
to allow more flexible release. As long as the DPA range of the extent to
release is covered by accepted extent(s) in the device, the release can be
performed.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 41 ++
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index a0d2239176..3b7949c364 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1674,6 +1674,12 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 dpa = in->updated_entries[i].start_dpa;
 len = in->updated_entries[i].len;
 
+/* Check if the DPA range is not fully backed with valid extents */
+if (!ct3_test_region_block_backed(ct3d, dpa, len)) {
+ret = CXL_MBOX_INVALID_PA;
+goto free_and_exit;
+}
+/* After this point, extent overflow is the only error can happen */
 while (len > 0) {
 QTAILQ_FOREACH(ent, _list, node) {
 range_init_nofail(, ent->start_dpa, ent->len);
@@ -1713,25 +1719,27 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 goto free_and_exit;
 }
 } else {
-/*
- * TODO: we reject the attempt to remove an extent
- * that overlaps with multiple extents in the device
- * for now, we will allow it once superset release
- * support is added.
- */
-ret = CXL_MBOX_INVALID_PA;
-goto free_and_exit;
+len1 = dpa - ent_start_dpa;
+len2 = 0;
+len_done = ent_len - len1 - len2;
+
+cxl_remove_extent_from_extent_list(_list, ent);
+cnt_delta--;
+if (len1) {
+cxl_insert_extent_to_extent_list(_list,
+ ent_start_dpa,
+ len1, NULL, 0);
+cnt_delta++;
+}
 }
 
 len -= len_done;
-/* len == 0 here until superset release is added */
+if (len) {
+dpa = ent_start_dpa + ent_len;
+}
 break;
 }
 }
-if (len) {
-ret = CXL_MBOX_INVALID_PA;
-goto free_and_exit;
-}
 }
 }
 free_and_exit:
@@ -1819,10 +1827,9 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
 }
 
 len -= len_done;
-/*
- * len will always be 0 until superset release is add.
- * TODO: superset release will be added.
- */
+if (len > 0) {
+dpa = ent_start_dpa + ent_len;
+}
 break;
 }
 }
-- 
2.43.0




[PATCH v6 03/12] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 4 ++--
 hw/mem/cxl_type3.c  | 8 
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 49c7944d93..0f2ad58a14 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -803,7 +803,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
 stq_le_p(>total_capacity,
- cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+ cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity,
  cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity,
@@ -1179,7 +1179,7 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b679dfae1c..0836e9135b 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -608,7 +608,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -631,7 +631,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -837,7 +837,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1010,7 +1010,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index e839370266..f7f56b44e3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -234,7 +234,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 
-- 
2.43.0




[PATCH v6 04/12] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2024-03-25 Thread nifan . cxl
From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions.
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support like below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region > /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 47 ++
 1 file changed, 47 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0836e9135b..c83d6f8004 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,7 @@
 #include "hw/pci/msix.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -567,6 +568,46 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+/*
+ * TODO: dc region configuration will be updated once host backend and address
+ * space support is added for DCD.
+ */
+static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
+{
+int i;
+uint64_t region_base = 0;
+uint64_t region_len =  2 * GiB;
+uint64_t decode_len = 2 * GiB;
+uint64_t blk_size = 2 * MiB;
+CXLDCRegion *region;
+MemoryRegion *mr;
+
+if (ct3d->hostvmem) {
+mr = host_memory_backend_get_memory(ct3d->hostvmem);
+region_base += memory_region_size(mr);
+}
+if (ct3d->hostpmem) {
+mr = host_memory_backend_get_memory(ct3d->hostpmem);
+region_base += memory_region_size(mr);
+}
+assert(region_base % CXL_CAPACITY_MULTIPLIER == 0);
+
+for (i = 0, region = >dc.regions[0];
+ i < ct3d->dc.num_regions;
+ i++, region++, region_base += region_len) {
+*region = (CXLDCRegion) {
+.base = region_base,
+.decode_len = decode_len,
+.len = region_len,
+.block_size = blk_size,
+/* dsmad_handle set when creating CDAT table entries */
+.flags = 0,
+};
+}
+
+return true;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -635,6 +676,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (!cxl_create_dc_regions(ct3d, errp)) {
+error_setg(errp, "setup DC regions failed");
+return false;
+}
+
 return true;
 }
 
@@ -930,6 +976,7 @@ static Property ct3_props[] = {
  HostMemoryBackend *),
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.43.0




[PATCH v6 06/12] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Add (file/memory backed) host backend, all the dynamic capacity regions
will share a single, large enough host backend. Set up address space for
DC regions to support read/write operations to dynamic capacity for DCD.

With the change, following supports are added:
1. Add a new property to type3 device "volatile-dc-memdev" to point to host
   memory backend for dynamic capacity. Currently, all dc regions share one
   host backend.
2. Add namespace for dynamic capacity for read/write support;
3. Create cdat entries for each dynamic capacity region;

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  16 ++-
 hw/mem/cxl_type3.c  | 187 +---
 include/hw/cxl/cxl_device.h |   8 ++
 3 files changed, 172 insertions(+), 39 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 0f2ad58a14..831cef0567 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -622,7 +622,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
size_t *len_out,
CXLCCI *cci)
 {
-CXLDeviceState *cxl_dstate = _TYPE3(cci->d)->cxl_dstate;
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+CXLDeviceState *cxl_dstate = >cxl_dstate;
 struct {
 uint8_t slots_supported;
 uint8_t slot_info;
@@ -636,7 +637,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -793,7 +795,8 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 CXLDeviceState *cxl_dstate = >cxl_dstate;
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -835,9 +838,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)payload_out;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -1179,7 +1184,8 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
+ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a9e8bdc436..75ea9b20e1 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -45,7 +45,8 @@ enum {
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
   int dsmad_handle, uint64_t size,
-  bool is_pmem, uint64_t dpa_base)
+  bool is_pmem, bool is_dynamic,
+  uint64_t dpa_base)
 {
 CDATDsmas *dsmas;
 CDATDslbis *dslbis0;
@@ -61,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },
 .DSMADhandle = dsmad_handle,
-.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
+ (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
 .DPA_base = dpa_base,
 .DPA_length = size,
 };
@@ -149,12 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+MemoryRegion *dc_mr = NULL;
 uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
 
-if (!ct3d->hostpmem && !ct3d->hostvmem) {
+if (!ct3d->hostpmem && !ct3d->hostvmem && !ct3d->dc.num_regions) {
 return 0;
 }
 
@@ -176,21 +179,54 @@ static 

[PATCH v6 05/12] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size instead of mr as argument

2024-03-25 Thread nifan . cxl
From: Fan Ni 

The function ct3_build_cdat_entries_for_mr only uses size of the passed
memory region argument, refactor the function definition to make the passed
arguments more specific.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index c83d6f8004..a9e8bdc436 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -44,7 +44,7 @@ enum {
 };
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-  int dsmad_handle, MemoryRegion *mr,
+  int dsmad_handle, uint64_t size,
   bool is_pmem, uint64_t dpa_base)
 {
 CDATDsmas *dsmas;
@@ -63,7 +63,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .DSMADhandle = dsmad_handle,
 .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -132,7 +132,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  */
 .EFI_memory_type_attr = is_pmem ? 2 : 1,
 .DPA_offset = 0,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* Header always at start of structure */
@@ -149,6 +149,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
@@ -163,6 +164,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+vmr_size = memory_region_size(volatile_mr);
 }
 
 if (ct3d->hostpmem) {
@@ -171,21 +173,22 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+pmr_size = memory_region_size(nonvolatile_mr);
 }
 
 table = g_malloc0(len * sizeof(*table));
 
 /* Now fill them in */
 if (volatile_mr) {
-ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
+ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
   false, 0);
 cur_ent = CT3_CDAT_NUM_ENTRIES;
 }
 
 if (nonvolatile_mr) {
-uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0;
+uint64_t base = vmr_size;
 ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-  nonvolatile_mr, true, base);
+  pmr_size, true, base);
 cur_ent += CT3_CDAT_NUM_ENTRIES;
 }
 assert(len == cur_ent);
-- 
2.43.0




[PATCH v6 09/12] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-03-25 Thread nifan . cxl
From: Fan Ni 

To simulate FM functionalities for initiating Dynamic Capacity Add
(Opcode 5604h) and Dynamic Capacity Release (Opcode 5605h) as in CXL spec
r3.1 7.6.7.6.5 and 7.6.7.6.6, we implemented two QMP interfaces to issue
add/release dynamic capacity extents requests.

With the change, we allow to release an extent only when its DPA range
is contained by a single accepted extent in the device. That is to say,
extent superset release is not supported yet.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "offset": 0,
  "len": 134217728
  },
  {
  "offset": 134217728,
  "len": 134217728
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) looks like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "offset": 134217728,
  "len": 134217728
  }
  ]
  }
}

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  26 ++--
 hw/mem/cxl_type3.c  | 252 +++-
 hw/mem/cxl_type3_stubs.c|  14 ++
 include/hw/cxl/cxl_device.h |   8 ++
 include/hw/cxl/cxl_events.h |  18 +++
 qapi/cxl.json   |  61 -
 6 files changed, 367 insertions(+), 12 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index a9eca516c8..7094e007b9 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1407,7 +1407,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
  * Check whether any bit between addr[nr, nr+size) is set,
  * return true if any bit is set, otherwise return false
  */
-static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
   unsigned long size)
 {
 unsigned long res = find_next_bit(addr, size + nr, nr);
@@ -1446,7 +1446,7 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, 
uint64_t dpa, uint64_t len)
 return NULL;
 }
 
-static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
  uint64_t dpa,
  uint64_t len,
  uint8_t *tag,
@@ -1552,10 +1552,11 @@ static CXLRetCode 
cxl_dcd_add_dyn_cap_rsp_dry_run(CXLType3Dev *ct3d,
 
 range_init_nofail(, dpa, len);
 
-/*
- * TODO: once the pending extent list is added, check against
- * the list will be added here.
- */
+/* host-accepted DPA range must be contained by pending extent */
+if (!cxl_extents_contains_dpa_range(>dc.extents_pending,
+dpa, len)) {
+return CXL_MBOX_INVALID_PA;
+}
 
 /* to-be-added range should not overlap with range already accepted */
 QTAILQ_FOREACH(ent, >dc.extents, node) {
@@ -1585,9 +1586,13 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 CXLDCExtentList *extent_list = >dc.extents;
 uint32_t i;
 uint64_t dpa, len;
+CXLDCExtent *ent;
 CXLRetCode ret;
 
 if (in->num_entries_updated == 0) {
+/* Always remove the first pending extent when response received. */
+ent = QTAILQ_FIRST(>dc.extents_pending);
+cxl_remove_extent_from_extent_list(>dc.extents_pending, ent);
 return CXL_MBOX_SUCCESS;
 }
 
@@ -1604,6 +1609,8 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 
 ret = cxl_dcd_add_dyn_cap_rsp_dry_run(ct3d, in);
 if (ret != CXL_MBOX_SUCCESS) {
+ent = QTAILQ_FIRST(>dc.extents_pending);
+cxl_remove_extent_from_extent_list(>dc.extents_pending, ent);
 return ret;
 }
 
@@ -1613,10 +1620,9 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
-/*
- * TODO: we will add a pending extent list based on event log record
- * and process the list according here.
- */
+
+ent = QTAILQ_FIRST(>dc.extents_pending);
+cxl_remove_extent_from_extent_list(>dc.extents_pending, ent);
 }
 
 return CXL_MBOX_SUCCESS;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 951bd79a82..74cb64e843 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -674,6 +674,7 @@ static 

[PATCH v6 01/12] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Based on CXL spec r3.1 Table 8-127 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 4bcd727f4c..ba1d9901df 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,7 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -780,8 +781,9 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 CXLDeviceState *cxl_dstate = >cxl_dstate;
@@ -807,6 +809,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len_out = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
2.43.0




[PATCH v6 08/12] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-03-25 Thread nifan . cxl
From: Fan Ni 

Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.

For the process of the above two commands, we use two-pass approach.
Pass 1: Check whether the input payload is valid or not; if not, skip
Pass 2 and return mailbox process error.
Pass 2: Do the real work--add or release extents, respectively.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 433 +++-
 hw/mem/cxl_type3.c  |  11 +
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 444 insertions(+), 4 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 30ef46a036..a9eca516c8 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -19,6 +19,7 @@
 #include "qemu/units.h"
 #include "qemu/uuid.h"
 #include "sysemu/hostmem.h"
+#include "qemu/range.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
@@ -85,6 +86,8 @@ enum {
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1400,6 +1403,422 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+  unsigned long size)
+{
+unsigned long res = find_next_bit(addr, size + nr, nr);
+
+return res < nr + size;
+}
+
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+int i;
+CXLDCRegion *region = >dc.regions[0];
+
+if (dpa < region->base ||
+dpa >= region->base + ct3d->dc.total_capacity) {
+return NULL;
+}
+
+/*
+ * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents 
that
+ * cross multiple regions are not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+if (dpa + len > region->base + region->len) {
+return NULL;
+}
+return region;
+}
+}
+
+return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+CXLDCExtent *extent;
+
+extent = g_new0(CXLDCExtent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+CXLDCExtent *extent)
+{
+QTAILQ_REMOVE(list, extent, node);
+g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+uint32_t num_entries_updated;
+uint8_t flags;
+uint8_t rsvd[3];
+/* CXL r3.1 Table 8-169: Updated Extent */
+struct {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+const CXLUpdateDCExtentListInPl *in)
+{
+uint64_t min_block_size = UINT64_MAX;
+CXLDCRegion *region = >dc.regions[0];
+CXLDCRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
+g_autofree unsigned long *blk_bitmap = NULL;
+uint64_t dpa, len;
+uint32_t i;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+min_block_size = MIN(min_block_size, region->block_size);
+   

[PATCH v5 00/13] Enabling DCD emulation support in Qemu

2024-03-04 Thread nifan . cxl
From: Fan Ni 

The code is rebased on mainstream QEMU with the following patch series:
hw/cxl/mailbox: change CCI cmd set structure to be a member, not a reference
hw/cxl/mailbox: interface to add CCI commands to an existing CCI

v4->v5:
1. Did spell check for the patchset and fixed 2 typos;
2. Fixed a misuse of stq_le_p and replaced it with stl_le_p in
   cmd_dcd_get_dyn_cap_config;
3. Removed unnecessary switch case in patch 9;
4. Capitalized ""opcode" to "Opcode" in two code comments;
5. Updated one reference text to the spec;
6. Minor text change in qapi/cxl.json.
7. Removed the definition of CXL_SPEC_AFTER_R30
8. Removed the unnecessary input and output mailbox payload struct type
   definition;
9. set num_extents_supported and num_extents_avaible based on extents;
10. Renamed DCD_MAX_REGION_NUM to DCD_MAX_NUM_REGION;
11. Updated the commit log in Patch 4 to remove the unwanted text;
12. Fixed some indent issues;
13. Fail removal if it causes extents overflow after removal;
14. Capture cases when adding response causing the device to exceed its extent
tracking ability;
15. Using memory_region_size instead of directly accessing the field in
cxl_create_dc_regions;
16. Renamed DCDRegion as DCRegion;

Changes 1-5 are suggested by Wonjae Lee (wj28@gmail.com), 
Changes 6-14 are suggested by Jonathan.
Changes 15-16 are suggested by Jorgen.

Thanks for the review of v4.

For this version, the main code changes (PATCH 11~12) extend DC extent
release processing, including mailbox command and QMP interface processing.

Based on Jonathan's suggestion, we updated the code, and now we can process
extent release request more flexible. We can now support superset release
(actually we can do even more, as long as the DPA range is covered by
accepted extents, we can release).

For superset extent release, the only requirement, if we find part of its DPA
range is still pending to add, while the other is accepted, we reject it
through QMP interface.

Last but not least, in the series, we introduced a new patch (PATCH 13),
which We introduce new QMP interfaces to print out accepted and
pending-to-add list in the device to a file.

The latest code branch is https://github.com/moking/qemu/tree/dcd-v5.

The code is tested with Ira's last kernel DCD patch set [2] with some minor
bug fixes[3]. Tested operations include:

Extra extent release test,
1. Add multiple extents, and removing them one by one, passed;
2. Superset release: add multiple extents with continuous DPA ranges, and
   Remove all of them with a single extent request with extent covering the
   whole DPA range, passed;
3. Partial extent release: add a large extent and release only part of it,
   passed;
4. Partial+superset release: add multiple extents,and release it with some
   leftover with one request with an extent. For example, add extents [0-128M]
   and [128-256], release [64M-256M], passed;
5. release extent not aligned to block size, failed as expected;
6. extents have overlaps, fail the request as expected;
7. extent has uncovered DPA range, skip the extent as expected; 
8. OS tries to release a non-exist extent, return invalid PA as expected;
 
[1] v4: 
https://lore.kernel.org/linux-cxl/20240221182020.1086096-1-nifan@gmail.com/T/#m580c59be5deca2639dd7158e57f4bc1141d1c885
[2] https://github.com/weiny2/linux-kernel/tree/dcd-v3-2023-10-30
[3] 
https://github.com/moking/linux-dcd/commit/9d24fa6e5d39f934623220953caecc080f93e964

Fan Ni (13):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
devices
  hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr
size insead of mr as argument
  hw/mem/cxl_type3: Add host backend and address space handling for DC
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
extents
  hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions
  hw/cxl/cxl-mailbox-utils: Add partial and superset extent release
mailbox support
  hw/mem/cxl_type3: Allow to release partial extent and extent superset
in QMP interface
  qapi/cxl.json: Add QMP interfaces to print out accepted and pending DC
extents

 hw/cxl/cxl-mailbox-utils.c  | 682 -
 hw/mem/cxl_type3.c  | 724 ++--
 hw/mem/cxl_type3_stubs.c|  26 ++
 include/hw/cxl/cxl_device.h |  61 ++-
 include/hw/cxl/cxl_events.h |  18 +
 qapi/cxl.json   |  93 -
 6 files changed, 1558 

[PATCH v5 10/13] hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Not all dpa range in the DC regions is valid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a DC block in the region has been backed by DC extent.
For the bitmap, a bit in the bitmap represents a DC block. When a DC
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  4 ++
 hw/mem/cxl_type3.c  | 76 +
 include/hw/cxl/cxl_device.h |  7 
 3 files changed, 87 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 53ebc526ae..b538297bb5 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1606,6 +1606,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 }
 
 /*
@@ -1681,17 +1682,20 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
 found = true;
 cxl_remove_extent_from_extent_list(extent_list, ent);
 ct3d->dc.total_extent_count -= 1;
+ct3_clear_region_block_backed(ct3d, ent_start_dpa, 
ent_len);
 
 if (len1) {
 cxl_insert_extent_to_extent_list(extent_list,
  ent_start_dpa, len1,
  NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, ent_start_dpa, len1);
 }
 if (len2) {
 cxl_insert_extent_to_extent_list(extent_list, dpa + 
len,
  len2, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa + len, len2);
 }
 break;
 } else {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index e9c8994cdb..c164cf4580 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -672,6 +672,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 
 region_base += region->len;
 ct3d->dc.total_capacity += region->len;
+region->blk_bitmap = bitmap_new(region->len / region->block_size);
 }
 QTAILQ_INIT(>dc.extents);
 QTAILQ_INIT(>dc.extents_pending_to_add);
@@ -682,6 +683,8 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 {
 CXLDCExtent *ent;
+int i;
+CXLDCRegion *region;
 
 while (!QTAILQ_EMPTY(>dc.extents)) {
 ent = QTAILQ_FIRST(>dc.extents);
@@ -693,6 +696,11 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
ent);
 }
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+g_free(region->blk_bitmap);
+}
 }
 
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
@@ -924,6 +932,70 @@ static void ct3_exit(PCIDevice *pci_dev)
 }
 }
 
+/*
+ * Mark the DPA range [dpa, dap + len) to be backed and accessible. This
+ * happens when a DC extent is added and accepted by the host.
+ */
+void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len)
+{
+CXLDCRegion *region;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return;
+}
+
+bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+   len / region->block_size);
+}
+
+/*
+ * Check whether the DPA range [dpa, dpa + len) is backed with DC extents.
+ * Used when validating read/write to dc regions
+ */
+bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+  uint64_t len)
+{
+CXLDCRegion *region;
+uint64_t nbits;
+long nr;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return false;
+}
+
+nr = (dpa - region->base) / region->block_size;
+nbits = DIV_ROUND_UP(len, region->block_size);
+/*
+ * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
+ * backed with DC extents, return true; else return false.
+ */
+return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + 
nbits;
+}
+
+/*
+ * Mark the DPA range [dpa, dap + len) to be unbacked and inaccessible. This
+ * happens when a dc extent is released by the host.
+ */
+void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+   

[PATCH v5 06/13] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Add (file/memory backed) host backend, all the dynamic capacity regions
will share a single, large enough host backend. Set up address space for
DC regions to support read/write operations to dynamic capacity for DCD.

With the change, following supports are added:
1. Add a new property to type3 device "volatile-dc-memdev" to point to host
   memory backend for dynamic capacity. Currently, all dc regions share one
   host backend.
2. Add namespace for dynamic capacity for read/write support;
3. Create cdat entries for each dynamic capacity region;
4. Fix dvsec range registers to include DC regions.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  16 ++-
 hw/mem/cxl_type3.c  | 189 +---
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 170 insertions(+), 39 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 853dadba39..8309f27a2b 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -622,7 +622,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
size_t *len_out,
CXLCCI *cci)
 {
-CXLDeviceState *cxl_dstate = _TYPE3(cci->d)->cxl_dstate;
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+CXLDeviceState *cxl_dstate = >cxl_dstate;
 struct {
 uint8_t slots_supported;
 uint8_t slot_info;
@@ -636,7 +637,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -793,7 +795,8 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 CXLDeviceState *cxl_dstate = >cxl_dstate;
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -835,9 +838,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)payload_out;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -1179,7 +1184,8 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
+ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index c045fee32d..2b380a260b 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -45,7 +45,8 @@ enum {
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
   int dsmad_handle, uint64_t size,
-  bool is_pmem, uint64_t dpa_base)
+  bool is_pmem, bool is_dynamic,
+  uint64_t dpa_base)
 {
 g_autofree CDATDsmas *dsmas = NULL;
 g_autofree CDATDslbis *dslbis0 = NULL;
@@ -61,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },
 .DSMADhandle = dsmad_handle,
-.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
+ (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
 .DPA_base = dpa_base,
 .DPA_length = size,
 };
@@ -149,12 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+MemoryRegion *dc_mr = NULL;
 uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
 
-if (!ct3d->hostpmem && !ct3d->hostvmem) {
+if (!ct3d->hostpmem && 

[PATCH v5 02/13] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Per cxl spec r3.1, add dynamic capacity region representative based on
Table 8-165 and extend the cxl type3 device definition to include dc region
information. Also, based on info in 8.2.9.9.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Note: we store region decode length as byte-wise length on the device, which
should be divided by 256 * MiB before being returned to the host
for "Get Dynamic Capacity Configuration" mailbox command per
specification.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 99 +
 include/hw/cxl/cxl_device.h | 16 ++
 2 files changed, 115 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index ba1d9901df..5792010c12 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -22,6 +22,8 @@
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
+#define CXL_NUM_EXTENTS_SUPPORTED 512
+#define CXL_NUM_TAGS_SUPPORTED 0
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -80,6 +82,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48,
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1238,6 +1242,91 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
+ * (Opcode: 4800h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint8_t region_cnt;
+uint8_t start_region_id;
+} QEMU_PACKED *in;
+struct {
+uint8_t num_regions;
+uint8_t regions_returned;
+uint8_t rsvd1[6];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+} QEMU_PACKED *out;
+struct {
+uint32_t num_extents_supported;
+uint32_t num_extents_available;
+uint32_t num_tags_supported;
+uint32_t num_tags_available;
+} QEMU_PACKED *extra_out;
+uint16_t record_count;
+uint16_t i;
+uint16_t out_pl_len;
+uint8_t start_region_id;
+
+in = (void *)payload_in;
+out = (void *)payload_out;
+start_region_id = in->start_region_id;
+if (start_region_id >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
+in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+extra_out = (void *)(payload_out + out_pl_len);
+out_pl_len += sizeof(*extra_out);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+out->num_regions = ct3d->dc.num_regions;
+out->regions_returned = record_count;
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ ct3d->dc.regions[start_region_id + i].base);
+stq_le_p(>records[i].decode_len,
+ ct3d->dc.regions[start_region_id + i].decode_len /
+ CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>records[i].region_len,
+ ct3d->dc.regions[start_region_id + i].len);
+stq_le_p(>records[i].block_size,
+ ct3d->dc.regions[start_region_id + i].block_size);
+stl_le_p(>records[i].dsmadhandle,
+ ct3d->dc.regions[start_region_id + i].dsmadhandle);
+out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
+}
+/*
+ * TODO: will assign proper values when extents and tags are introduced
+ * to use.
+ */
+stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
+stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1282,6 +1371,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_clear_poison, 72, 0 },
 };
 
+static const struct cxl_cmd 

[PATCH v5 04/13] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2024-03-04 Thread nifan . cxl
From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions.
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support like below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region > /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 244d2b5fd5..a191211009 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,7 @@
 #include "hw/pci/msix.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -567,6 +568,45 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+/*
+ * TODO: dc region configuration will be updated once host backend and address
+ * space support is added for DCD.
+ */
+static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
+{
+int i;
+uint64_t region_base = 0;
+uint64_t region_len =  2 * GiB;
+uint64_t decode_len = 2 * GiB;
+uint64_t blk_size = 2 * MiB;
+CXLDCRegion *region;
+MemoryRegion *mr;
+
+if (ct3d->hostvmem) {
+mr = host_memory_backend_get_memory(ct3d->hostvmem);
+region_base += memory_region_size(mr);
+}
+if (ct3d->hostpmem) {
+mr = host_memory_backend_get_memory(ct3d->hostpmem);
+region_base += memory_region_size(mr);
+}
+assert(region_base % CXL_CAPACITY_MULTIPLIER == 0);
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+region->base = region_base;
+region->decode_len = decode_len;
+region->len = region_len;
+region->block_size = blk_size;
+/* dsmad_handle is set when creating cdat table entries */
+region->flags = 0;
+
+region_base += region->len;
+}
+
+return true;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -635,6 +675,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (!cxl_create_dc_regions(ct3d, errp)) {
+error_setg(errp, "setup DC regions failed");
+return false;
+}
+
 return true;
 }
 
@@ -930,6 +975,7 @@ static Property ct3_props[] = {
  HostMemoryBackend *),
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.43.0




[PATCH v5 13/13] qapi/cxl.json: Add QMP interfaces to print out accepted and pending DC extents

2024-03-04 Thread nifan . cxl
From: Fan Ni 

With the change, we add the following two QMP interfaces to print out
extents information in the device,
1. cxl-display-accepted-dc-extents: print out the accepted DC extents in
   the device;
2. cxl-display-pending-to-add-dc-extents: print out the pending-to-add
   DC extents in the device;
The output is appended to a file passed to the command and by default
it is /tmp/dc-extent.txt.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c   | 80 
 hw/mem/cxl_type3_stubs.c | 12 ++
 qapi/cxl.json| 32 
 3 files changed, 124 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5bd64e604e..6a08e7ae40 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -2089,6 +2089,86 @@ void qmp_cxl_release_dynamic_capacity(const char *path, 
uint8_t region_id,
  region_id, records, errp);
 }
 
+static void cxl_dcd_display_extent_list(const CXLType3Dev *dcd, const char *f,
+bool accepted_list, Error **errp)
+{
+const CXLDCExtentList *list;
+CXLDCExtent *ent;
+FILE *fp = NULL;
+int i = 0;
+
+if (!dcd->dc.num_regions) {
+error_setg(errp, "No dynamic capacity support from the device");
+return;
+}
+
+if (!f) {
+fp = fopen("/tmp/dc-extent.txt", "a+");
+} else {
+fp = fopen(f, "a+");
+}
+
+if (!fp) {
+error_setg(errp, "Open log file failed");
+return;
+}
+if (accepted_list) {
+list = >dc.extents;
+fprintf(fp, "Print accepted extent info:\n");
+} else {
+list = >dc.extents_pending_to_add;
+fprintf(fp, "Print pending-to-add extent info:\n");
+}
+
+QTAILQ_FOREACH(ent, list, node) {
+fprintf(fp, "%d: [0x%lx - 0x%lx]\n", i++, ent->start_dpa,
+   ent->start_dpa + ent->len);
+}
+fprintf(fp, "In total, %d extents printed!\n", i);
+fclose(fp);
+}
+
+void qmp_cxl_display_accepted_dc_extents(const char *path, const char *f,
+ Error **errp)
+{
+Object *obj;
+CXLType3Dev *dcd;
+
+obj = object_resolve_path(path, NULL);
+if (!obj) {
+error_setg(errp, "Unable to resolve path");
+return;
+}
+if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+error_setg(errp, "Path not point to a valid CXL type3 device");
+return;
+}
+
+dcd = CXL_TYPE3(obj);
+cxl_dcd_display_extent_list(dcd, f, true, errp);
+}
+
+void qmp_cxl_display_pending_to_add_dc_extents(const char *path, const char *f,
+   Error **errp)
+{
+Object *obj;
+CXLType3Dev *dcd;
+
+obj = object_resolve_path(path, NULL);
+if (!obj) {
+error_setg(errp, "Unable to resolve path");
+return;
+}
+if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+error_setg(errp, "Path not point to a valid CXL type3 device");
+return;
+}
+
+
+dcd = CXL_TYPE3(obj);
+cxl_dcd_display_extent_list(dcd, f, false, errp);
+}
+
 static void ct3_class_init(ObjectClass *oc, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index d913b11b4d..d896758301 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -81,3 +81,15 @@ void qmp_cxl_release_dynamic_capacity(const char *path, 
uint8_t region_id,
 {
 error_setg(errp, "CXL Type 3 support is not compiled in");
 }
+
+void qmp_cxl_display_accepted_dc_extents(const char *path, const char *f,
+ Error **errp)
+{
+error_setg(errp, "CXL Type 3 support is not compiled in");
+}
+
+void qmp_cxl_display_pending_to_add_dc_extents(const char *path, const char *f,
+   Error **errp)
+{
+error_setg(errp, "CXL Type 3 support is not compiled in");
+}
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 2645004666..6f10300ec6 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -420,3 +420,35 @@
 'extents': [ 'CXLDCExtentRecord' ]
}
 }
+
+##
+# @cxl-display-accepted-dc-extents:
+#
+# Command to print out all the accepted DC extents in the device
+#
+# @path: CXL DCD canonical QOM path
+# @output: path of output file to dump the results to
+#
+# Since : 9.0
+##
+{ 'command': 'cxl-display-accepted-dc-extents',
+  'data': { 'path': 'str',
+'output': 'str'
+   }
+}
+
+##
+# @cxl-display-pending-to-add-dc-extents:
+#
+# Command to print out all the pending-to-add DC extents in the device
+#
+# @path: CXL DCD canonical QOM path
+# @output: path of output file to dump the results to
+#
+# Since : 9.0
+##
+{ 'command': 'cxl-display-pending-to-add-dc-extents',
+  'data': { 'path': 'str',
+'output': 'str'
+   }
+}
-- 
2.43.0




[PATCH v5 03/13] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 4 ++--
 hw/mem/cxl_type3.c  | 8 
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 5792010c12..853dadba39 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -803,7 +803,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
 stq_le_p(>total_capacity,
- cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+ cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity,
  cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity,
@@ -1179,7 +1179,7 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index e8801805b9..244d2b5fd5 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -608,7 +608,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -631,7 +631,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -837,7 +837,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1010,7 +1010,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 93ce047b28..f82d018422 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -234,7 +234,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 
-- 
2.43.0




[PATCH v5 11/13] hw/cxl/cxl-mailbox-utils: Add partial and superset extent release mailbox support

2024-03-04 Thread nifan . cxl
From: Fan Ni 

With the change, we extend the extent release mailbox command processing
to allow more flexible release. As long as the DPA range of the extent to
release is covered by valid extent(s) in the device, the release can be
performed.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 211 +
 1 file changed, 188 insertions(+), 23 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index b538297bb5..eaff5c4c93 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1617,6 +1617,155 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Return value: the id of the DC region that covers the DPA range
+ * [dpa, dpa+len) The assumption is that the range is valid and within
+ * a DC region.
+ */
+static uint8_t cxl_find_dc_region_id(const CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len)
+{
+int i;
+const CXLDCRegion *region;
+
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+break;
+}
+}
+return i;
+}
+
+/*
+ * Copy extent list from src to dst
+ * Return value: number of extents copied
+ */
+static uint32_t copy_extent_list(CXLDCExtentList *dst,
+ const CXLDCExtentList *src)
+{
+uint32_t cnt = 0;
+CXLDCExtent *ent;
+
+if (!dst || !src) {
+return 0;
+}
+
+QTAILQ_FOREACH(ent, src, node) {
+cxl_insert_extent_to_extent_list(dst, ent->start_dpa, ent->len,
+ ent->tag, ent->shared_seq);
+cnt++;
+}
+return cnt;
+}
+
+/*
+ * Detect potential extent overflow caused by extent split during processing
+ * extent release requests, also allow releasing superset of extents where the
+ * extent to release covers the range of multiple extents in the device.
+ * Note:
+ * 1.we will reject releasing an extent if some portion of its rang is
+ * not covered by valid extents.
+ * 2.This function is called after cxl_detect_malformed_extent_list so checks
+ * already performed there will be skipped.
+ */
+static CXLRetCode cxl_detect_extent_overflow(const CXLType3Dev *ct3d,
+const CXLUpdateDCExtentListInPl *in)
+{
+uint64_t nbits, offset;
+const CXLDCRegion *region;
+unsigned long **bitmaps_copied;
+uint64_t dpa, len;
+int i, rid;
+CXLRetCode ret = CXL_MBOX_SUCCESS;
+long extent_cnt_delta = 0;
+CXLDCExtentList tmp_list;
+CXLDCExtent *ent;
+
+QTAILQ_INIT(_list);
+copy_extent_list(_list, >dc.extents);
+
+bitmaps_copied = g_new0(unsigned long *, ct3d->dc.num_regions);
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+nbits = region->len / region->block_size;
+bitmaps_copied[i] = bitmap_new(nbits);
+bitmap_copy(bitmaps_copied[i], region->blk_bitmap, nbits);
+}
+
+for (i = 0; i < in->num_entries_updated; i++) {
+dpa = in->updated_entries[i].start_dpa;
+len = in->updated_entries[i].len;
+
+rid = cxl_find_dc_region_id(ct3d, dpa, len);
+region = >dc.regions[rid];
+offset = (dpa - region->base) / region->block_size;
+nbits = len / region->block_size;
+
+/* Check whether range [dpa, dpa + len) is covered by valid range */
+if (find_next_zero_bit(bitmaps_copied[rid], offset + nbits, offset) <
+   offset + nbits) {
+ret = CXL_MBOX_INVALID_PA;
+goto free_and_exit;
+}
+
+QTAILQ_FOREACH(ent, _list, node) {
+/* Only split within an extent can cause extent count increase */
+if (ent->start_dpa <= dpa &&
+dpa + len <= ent->start_dpa + ent->len) {
+uint64_t ent_start_dpa = ent->start_dpa;
+uint64_t ent_len = ent->len;
+uint64_t len1 = dpa - ent_start_dpa;
+uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
+
+extent_cnt_delta += len1 && len2 ? 2 : (len1 || len2 ? 1 : 0);
+extent_cnt_delta -= 1;
+if (ct3d->dc.total_extent_count + extent_cnt_delta >
+CXL_NUM_EXTENTS_SUPPORTED) {
+ret = CXL_MBOX_RESOURCES_EXHAUSTED;
+goto free_and_exit;
+}
+
+offset = (ent->start_dpa - region->base) / region->block_size;
+nbits = ent->len / region->block_size;
+bitmap_clear(bitmaps_copied[rid], offset, nbits);
+cxl_remove_extent_from_extent_list(_list, ent);
+
+ if (len1) {
+offset = (dpa - region->base) / region->block_size;
+nbits = len1 / region->block_size;
+bitmap_set(bitmaps_copied[rid], offset, nbits);
+

[PATCH v5 07/13] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and add get DC extent list mailbox command per
CXL.spec.3.1:.8.2.9.9.9.2.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 71 -
 hw/mem/cxl_type3.c  |  1 +
 include/hw/cxl/cxl_device.h | 22 
 3 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 8309f27a2b..425b378a2c 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1325,7 +1326,8 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct 
cxl_cmd *cmd,
  * to use.
  */
 stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
-stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
+ ct3d->dc.total_extent_count);
 stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
 stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
 
@@ -1333,6 +1335,70 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.2:
+ * Get Dynamic Capacity Extent List (Opcode 4801h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
+   uint8_t *payload_in,
+   size_t len_in,
+   uint8_t *payload_out,
+   size_t *len_out,
+   CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtentRaw records[];
+} QEMU_PACKED *out = (void *)payload_out;
+uint16_t record_count = 0, i = 0, record_done = 0;
+uint16_t out_pl_len;
+uint32_t start_extent_id = in->start_extent_id;
+CXLDCExtentList *extent_list = >dc.extents;
+CXLDCExtent *ent;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+   ct3d->dc.total_extent_count - start_extent_id);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(>records[record_done].start_dpa, ent->start_dpa);
+stq_le_p(>records[record_done].len, ent->len);
+memcpy(>records[record_done].tag, ent->tag, 0x10);
+stw_le_p(>records[record_done].shared_seq, ent->shared_seq);
+
+record_done++;
+if (record_done == record_count) {
+break;
+}
+}
+}
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1380,6 +1446,9 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
 [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
 cmd_dcd_get_dyn_cap_config, 2, 0 },
+[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+8, 0 },
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 2b380a260b..102fa8151e 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -673,6 +673,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 region_base += region->len;
 ct3d->dc.total_capacity += region->len;
 }
+QTAILQ_INIT(>dc.extents);
 
 return true;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 265679302c..8148bcc34b 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -424,6 +424,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 
 #define DCD_MAX_NUM_REGION 8
 
+typedef struct 

[PATCH v5 12/13] hw/mem/cxl_type3: Allow to release partial extent and extent superset in QMP interface

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Before the change, the QMP interface used for add/release DC extents
only allows to release extents that exist in either pending-to-add list
or accepted list in the device, which means the DPA range of the extent must
match exactly that of an extent in either list. Otherwise, the release
request will be ignored.

With the change, we relax the constraints. As long as the DPA range of the
extent to release is covered by extents in one of the two lists
mentioned above, we allow the release.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 110 -
 1 file changed, 89 insertions(+), 21 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index c164cf4580..5bd64e604e 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1802,28 +1802,79 @@ typedef enum CXLDCEventType {
 } CXLDCEventType;
 
 /*
- * Check whether the exact extent exists in the list
- * Return value: the extent pointer in the list; else null
+ * Testing whether the DPA range [dpa, dpa + len) is covered by
+ * extents in the list.
  */
-static CXLDCExtent *cxl_dc_extent_exists(CXLDCExtentList *list,
- CXLDCExtentRaw *ext)
+static bool cxl_test_dpa_range_covered_by_extents(CXLDCExtentList *list,
+  uint64_t dpa, uint64_t len)
 {
 CXLDCExtent *ent;
 
-if (!ext || !list) {
-return NULL;
+if (!list) {
+return false;
 }
 
-QTAILQ_FOREACH(ent, list, node) {
-if (ent->start_dpa != ext->start_dpa) {
-continue;
-}
+while (len) {
+bool has_leftover = false;
 
-/* Found exact extent */
-return ent->len == ext->len ? ent : NULL;
+QTAILQ_FOREACH(ent, list, node) {
+if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
+if (dpa + len <= ent->start_dpa + ent->len) {
+return true;
+} else {
+len = dpa + len - ent->start_dpa - ent->len;
+dpa = ent->start_dpa + ent->len;
+has_leftover = true;
+break;
+}
+}
+}
+if (!has_leftover) {
+break;
+}
 }
+return false;
+}
+
+/*
+ * Remove all extents whose DPA range has overlaps with  the DPA range
+ * [dpa, dpa + len) from the list, and delete the overlapped portion.
+ * Note:
+ * 1. If the removed extents is fully within the DPA range, delete the extent;
+ * 2. Otherwise, keep the portion that does not overlap, insert new extents to
+ * the list if needed for the un-coverlapped part.
+ */
+static void cxl_delist_extent_by_dpa_range(CXLDCExtentList *list,
+   uint64_t dpa, uint64_t len)
+{
+CXLDCExtent *ent;
 
-return NULL;
+process_leftover:
+QTAILQ_FOREACH(ent, list, node) {
+if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
+uint64_t ent_start_dpa = ent->start_dpa;
+uint64_t ent_len = ent->len;
+uint64_t len1 = dpa - ent_start_dpa;
+
+cxl_remove_extent_from_extent_list(list, ent);
+if (len1) {
+cxl_insert_extent_to_extent_list(list, ent_start_dpa,
+ len1, NULL, 0);
+}
+
+if (dpa + len <= ent_start_dpa + ent_len) {
+uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
+if (len2) {
+cxl_insert_extent_to_extent_list(list, dpa + len,
+ len2, NULL, 0);
+}
+} else {
+len = dpa + len - ent_start_dpa - ent_len;
+dpa = ent_start_dpa + ent_len;
+goto process_leftover;
+}
+}
+}
 }
 
 /*
@@ -1915,8 +1966,8 @@ static void qmp_cxl_process_dynamic_capacity(const char 
*path, CxlEventLog log,
 list = records;
 extents = g_new0(CXLDCExtentRaw, num_extents);
 while (list) {
-CXLDCExtent *ent;
 bool skip_extent = false;
+CXLDCExtentList *extent_list;
 
 offset = list->value->offset;
 len = list->value->len;
@@ -1933,15 +1984,32 @@ static void qmp_cxl_process_dynamic_capacity(const char 
*path, CxlEventLog log,
  * remove it from the pending extent list, so later when the add
  * response for the extent arrives, the device can reject the
  * extent as it is not in the pending list.
+ * Now, we can handle the case where the extent covers the DPA
+ * range of multiple extents in the pending_to_add list.
+ * TODO: we do not allow the extent covers range of extents in
+ * pending_to_add list and accepted list at the same time for now.
  */
-ent = 

[PATCH v5 09/13] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Since fabric manager emulation is not supported yet, the change implements
the functions to add/release dynamic capacity extents as QMP interfaces.

Note: we skips any FM issued extent release request if the exact extent
does not exist in the extent list of the device. We will loose the
restriction later once we have partial release support in the kernel.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "dpa": 0,
  "len": 134217728
  },
  {
  "dpa": 134217728,
  "len": 134217728
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) look like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "dpa": 134217728,
  "len": 134217728
  }
  ]
  }
}

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  26 ++--
 hw/mem/cxl_type3.c  | 245 +++-
 hw/mem/cxl_type3_stubs.c|  14 +++
 include/hw/cxl/cxl_device.h |   6 +
 include/hw/cxl/cxl_events.h |  18 +++
 qapi/cxl.json   |  61 -
 6 files changed, 361 insertions(+), 9 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 8c59635a9f..53ebc526ae 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1405,7 +1405,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
  * Check whether any bit between addr[nr, nr+size) is set,
  * return true if any bit is set, otherwise return false
  */
-static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
   unsigned long size)
 {
 unsigned long res = find_next_bit(addr, size + nr, nr);
@@ -1444,7 +1444,7 @@ CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, 
uint64_t dpa, uint64_t len)
 return NULL;
 }
 
-static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
  uint64_t dpa,
  uint64_t len,
  uint8_t *tag,
@@ -1591,16 +1591,28 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 }
 }
 
-/*
- * TODO: we will add a pending extent list based on event log record
- * and verify the input response; also, the "More" flag is not
- * considered at the moment.
- */
+QTAILQ_FOREACH(ent, >dc.extents_pending_to_add, node) {
+if (ent->start_dpa <= dpa &&
+dpa + len <= ent->start_dpa + ent->len) {
+break;
+}
+}
+if (!ent) {
+return CXL_MBOX_INVALID_PA;
+}
+
+cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
+   ent);
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
 }
 
+/*
+ * TODO: extents_pending_to_add needs to be cleared so the extents not
+ * accepted can be reclaimed base on spec r3.1: 8.2.9.9.9.3
+ */
+
 return CXL_MBOX_SUCCESS;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index dccfaaad3a..e9c8994cdb 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -674,6 +674,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 ct3d->dc.total_capacity += region->len;
 }
 QTAILQ_INIT(>dc.extents);
+QTAILQ_INIT(>dc.extents_pending_to_add);
 
 return true;
 }
@@ -686,6 +687,12 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 ent = QTAILQ_FIRST(>dc.extents);
 cxl_remove_extent_from_extent_list(>dc.extents, ent);
 }
+
+while (!QTAILQ_EMPTY(>dc.extents_pending_to_add)) {
+ent = QTAILQ_FIRST(>dc.extents_pending_to_add);
+cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
+   ent);
+}
 }
 
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
@@ -1451,7 +1458,8 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
 return CXL_EVENT_TYPE_FAIL;
 case CXL_EVENT_LOG_FATAL:
 return CXL_EVENT_TYPE_FATAL;
-/* DCD not yet supported */
+case CXL_EVENT_LOG_DYNCAP:
+return CXL_EVENT_TYPE_DYNAMIC_CAP;
 default:
 return -EINVAL;
 }
@@ -1702,6 

[PATCH v5 05/13] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size insead of mr as argument

2024-03-04 Thread nifan . cxl
From: Fan Ni 

The function ct3_build_cdat_entries_for_mr only uses size of the passed
memory region argument, refactor the function definition to make the passed
arguments more specific.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a191211009..c045fee32d 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -44,7 +44,7 @@ enum {
 };
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-  int dsmad_handle, MemoryRegion *mr,
+  int dsmad_handle, uint64_t size,
   bool is_pmem, uint64_t dpa_base)
 {
 g_autofree CDATDsmas *dsmas = NULL;
@@ -63,7 +63,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .DSMADhandle = dsmad_handle,
 .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -132,7 +132,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  */
 .EFI_memory_type_attr = is_pmem ? 2 : 1,
 .DPA_offset = 0,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* Header always at start of structure */
@@ -149,6 +149,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
@@ -163,6 +164,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+vmr_size = memory_region_size(volatile_mr);
 }
 
 if (ct3d->hostpmem) {
@@ -171,21 +173,22 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+pmr_size = memory_region_size(nonvolatile_mr);
 }
 
 table = g_malloc0(len * sizeof(*table));
 
 /* Now fill them in */
 if (volatile_mr) {
-ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
+ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
   false, 0);
 cur_ent = CT3_CDAT_NUM_ENTRIES;
 }
 
 if (nonvolatile_mr) {
-uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0;
+uint64_t base = vmr_size;
 ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-  nonvolatile_mr, true, base);
+  pmr_size, true, base);
 cur_ent += CT3_CDAT_NUM_ENTRIES;
 }
 assert(len == cur_ent);
-- 
2.43.0




[PATCH v5 01/13] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Based on CXL spec r3.1 Table 8-127 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 4bcd727f4c..ba1d9901df 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,7 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -780,8 +781,9 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 CXLDeviceState *cxl_dstate = >cxl_dstate;
@@ -807,6 +809,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len_out = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
2.43.0




[PATCH v5 08/13] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-03-04 Thread nifan . cxl
From: Fan Ni 

Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 310 
 hw/mem/cxl_type3.c  |  12 ++
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 326 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 425b378a2c..8c59635a9f 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -85,6 +85,8 @@ enum {
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1399,6 +1401,308 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+  unsigned long size)
+{
+unsigned long res = find_next_bit(addr, size + nr, nr);
+
+return res < nr + size;
+}
+
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+int i;
+CXLDCRegion *region = >dc.regions[0];
+
+if (dpa < region->base ||
+dpa >= region->base + ct3d->dc.total_capacity) {
+return NULL;
+}
+
+/*
+ * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents 
that
+ * cross multiple regions are not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+if (dpa + len > region->base + region->len) {
+return NULL;
+}
+return region;
+}
+}
+
+return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+CXLDCExtent *extent;
+
+extent = g_new0(CXLDCExtent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+CXLDCExtent *extent)
+{
+QTAILQ_REMOVE(list, extent, node);
+g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+uint32_t num_entries_updated;
+uint8_t flags;
+uint8_t rsvd[3];
+/* CXL r3.1 Table 8-169: Updated Extent */
+struct {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+const CXLUpdateDCExtentListInPl *in)
+{
+uint64_t min_block_size = UINT64_MAX;
+CXLDCRegion *region = >dc.regions[0];
+CXLDCRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
+g_autofree unsigned long *blk_bitmap = NULL;
+uint64_t dpa, len;
+uint32_t i;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+min_block_size = MIN(min_block_size, region->block_size);
+}
+
+blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
+ ct3d->dc.regions[0].base) / min_block_size);
+
+for (i = 0; i < in->num_entries_updated; i++) {
+dpa = in->updated_entries[i].start_dpa;
+len = in->updated_entries[i].len;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return CXL_MBOX_INVALID_PA;
+}
+
+dpa -= ct3d->dc.regions[0].base;

[PATCH v2] hw/cxl/cxl-mailbox-utils: remove unneeded mailbox output payload space zeroing

2024-02-21 Thread nifan . cxl
From: Fan Ni 

The whole mailbox output payload space is already zeroed after copying
out the input payload, which happens before processing the specific mailbox
command:
https://elixir.bootlin.com/qemu/v8.2.1/source/hw/cxl/cxl-device-utils.c#L204

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index e5eb97cb91..fda88470a3 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -226,7 +226,6 @@ static CXLRetCode cmd_events_get_records(const struct 
cxl_cmd *cmd,
 log_type = payload_in[0];
 
 pl = (CXLGetEventPayload *)payload_out;
-memset(pl, 0, sizeof(*pl));
 
 max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) /
 CXL_EVENT_RECORD_SIZE;
@@ -264,7 +263,6 @@ static CXLRetCode cmd_events_get_interrupt_policy(const 
struct cxl_cmd *cmd,
 CXLEventLog *log;
 
 policy = (CXLEventInterruptPolicy *)payload_out;
-memset(policy, 0, sizeof(*policy));
 
 log = >event_logs[CXL_EVENT_TYPE_INFO];
 if (log->irq_enabled) {
@@ -363,7 +361,6 @@ static CXLRetCode cmd_infostat_identify(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*is_identify) != 18);
 
 is_identify = (void *)payload_out;
-memset(is_identify, 0, sizeof(*is_identify));
 is_identify->pcie_vid = class->vendor_id;
 is_identify->pcie_did = class->device_id;
 if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_USP)) {
@@ -597,7 +594,6 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*bg_op_status) != 8);
 
 bg_op_status = (void *)payload_out;
-memset(bg_op_status, 0, sizeof(*bg_op_status));
 bg_op_status->status = cci->bg.complete_pct << 1;
 if (cci->bg.runtime > 0) {
 bg_op_status->status |= 1U << 0;
@@ -636,7 +632,6 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 }
 
 fw_info = (void *)payload_out;
-memset(fw_info, 0, sizeof(*fw_info));
 
 fw_info->slots_supported = 2;
 fw_info->slot_info = BIT(0) | BIT(3);
@@ -792,7 +787,6 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 }
 
 id = (void *)payload_out;
-memset(id, 0, sizeof(*id));
 
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
@@ -1079,7 +1073,6 @@ static CXLRetCode cmd_media_get_poison_list(const struct 
cxl_cmd *cmd,
 out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
 assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
 
-memset(out, 0, out_pl_len);
 QLIST_FOREACH(ent, poison_list, node) {
 uint64_t start, stop;
 
-- 
2.43.0




[PATCH] hw/cxl/cxl-mailbox-utils: remove unneeded mailbox output payload space zeroing

2024-02-21 Thread nifan . cxl
From: Fan Ni 

The whole mailbox output payload space is already zeroed after copying
out the input payload, which happens before processing the specific mailbox
command:
https://elixir.bootlin.com/qemu/latest/source/hw/cxl/cxl-device-utils.c#L204

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index e5eb97cb91..fda88470a3 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -226,7 +226,6 @@ static CXLRetCode cmd_events_get_records(const struct 
cxl_cmd *cmd,
 log_type = payload_in[0];
 
 pl = (CXLGetEventPayload *)payload_out;
-memset(pl, 0, sizeof(*pl));
 
 max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) /
 CXL_EVENT_RECORD_SIZE;
@@ -264,7 +263,6 @@ static CXLRetCode cmd_events_get_interrupt_policy(const 
struct cxl_cmd *cmd,
 CXLEventLog *log;
 
 policy = (CXLEventInterruptPolicy *)payload_out;
-memset(policy, 0, sizeof(*policy));
 
 log = >event_logs[CXL_EVENT_TYPE_INFO];
 if (log->irq_enabled) {
@@ -363,7 +361,6 @@ static CXLRetCode cmd_infostat_identify(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*is_identify) != 18);
 
 is_identify = (void *)payload_out;
-memset(is_identify, 0, sizeof(*is_identify));
 is_identify->pcie_vid = class->vendor_id;
 is_identify->pcie_did = class->device_id;
 if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_USP)) {
@@ -597,7 +594,6 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*bg_op_status) != 8);
 
 bg_op_status = (void *)payload_out;
-memset(bg_op_status, 0, sizeof(*bg_op_status));
 bg_op_status->status = cci->bg.complete_pct << 1;
 if (cci->bg.runtime > 0) {
 bg_op_status->status |= 1U << 0;
@@ -636,7 +632,6 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 }
 
 fw_info = (void *)payload_out;
-memset(fw_info, 0, sizeof(*fw_info));
 
 fw_info->slots_supported = 2;
 fw_info->slot_info = BIT(0) | BIT(3);
@@ -792,7 +787,6 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 }
 
 id = (void *)payload_out;
-memset(id, 0, sizeof(*id));
 
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
@@ -1079,7 +1073,6 @@ static CXLRetCode cmd_media_get_poison_list(const struct 
cxl_cmd *cmd,
 out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
 assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
 
-memset(out, 0, out_pl_len);
 QLIST_FOREACH(ent, poison_list, node) {
 uint64_t start, stop;
 
-- 
2.43.0




[PATCH v4 01/10] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Based on CXL spec r3.1 Table 8-127 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 4bcd727f4c..ba1d9901df 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,7 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -780,8 +781,9 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 CXLDeviceState *cxl_dstate = >cxl_dstate;
@@ -807,6 +809,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len_out = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
2.43.0




[PATCH v4 02/10] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Per cxl spec r3.1, add dynamic capacity region representative based on
Table 8-165 and extend the cxl type3 device definition to include dc region
information. Also, based on info in 8.2.9.9.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Note: decode_len of a dc region is aligned to 256*MiB, divided by
256 * MiB before returned to the host for "Get Dynamic Capacity Configuration"
mailbox command.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 110 
 include/hw/cxl/cxl_device.h |  16 ++
 2 files changed, 126 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index ba1d9901df..88e3b733e3 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -22,6 +22,7 @@
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
+#define CXL_SPEC_AFTER_R30
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -80,6 +81,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48,
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1238,6 +1241,103 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
+ * (Opcode: 4800h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct get_dyn_cap_config_in_pl {
+uint8_t region_cnt;
+uint8_t start_region_id;
+} QEMU_PACKED;
+
+struct get_dyn_cap_config_out_pl {
+uint8_t num_regions;
+uint8_t regions_returned;
+uint8_t rsvd1[6];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+/*
+ * if cxl spec version >= 3.1, extra output payload as defined
+ * in struct get_dyn_cap_config_out_pl_extra comes here.
+ */
+} QEMU_PACKED;
+
+struct get_dyn_cap_config_in_pl *in = (void *)payload_in;
+struct get_dyn_cap_config_out_pl *out = (void *)payload_out;
+uint16_t record_count = 0;
+uint16_t i;
+uint16_t out_pl_len;
+uint8_t start_region_id = in->start_region_id;
+#ifdef CXL_SPEC_AFTER_R30
+struct get_dyn_cap_config_out_pl_extra {
+uint32_t num_extents_supported;
+uint32_t num_extents_available;
+uint32_t num_tags_supported;
+uint32_t num_tags_available;
+} QEMU_PACKED;
+struct get_dyn_cap_config_out_pl_extra *extra_out;
+#endif
+
+if (start_region_id >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
+in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+#ifdef CXL_SPEC_AFTER_R30
+extra_out = (struct get_dyn_cap_config_out_pl_extra *)(payload_out +
+out_pl_len);
+out_pl_len += sizeof(struct get_dyn_cap_config_out_pl_extra);
+#endif
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+out->num_regions = ct3d->dc.num_regions;
+#ifdef CXL_SPEC_AFTER_R30
+out->regions_returned = record_count;
+#endif
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ct3d->dc.regions[start_region_id + i].base);
+stq_le_p(>records[i].decode_len,
+ct3d->dc.regions[start_region_id + i].decode_len /
+CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>records[i].region_len,
+ct3d->dc.regions[start_region_id + i].len);
+stq_le_p(>records[i].block_size,
+ct3d->dc.regions[start_region_id + i].block_size);
+stl_le_p(>records[i].dsmadhandle,
+ct3d->dc.regions[start_region_id + i].dsmadhandle);
+out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
+}
+#ifdef CXL_SPEC_AFTER_R30
+/*FIXME: need to set valid values in the future*/
+stq_le_p(_out->num_extents_supported, 0);
+stq_le_p(_out->num_extents_available, 0);
+stq_le_p(_out->num_tags_supported, 0);
+stq_le_p(_out->num_tags_available, 0);
+#endif
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 

[PATCH v4 00/10] Enabling DCD emulation support in Qemu

2024-02-21 Thread nifan . cxl
From: Fan Ni 

v3[1]->v4: 

The code is rebased on mainstream QEMU with the following patch series:

hw/cxl/mailbox: change CCI cmd set structure to be a member, not a reference
hw/cxl/mailbox: interface to add CCI commands to an existing CCI

Main changes include:

1. Updated the specification references to align with cxl spec r3.1.
2. Add extra elements to get dc region configuration output payload and
procecced accordingly in mailbox command 4800h.
3. Removed the unwanted space.
4. Refactored ct3_build_cdat_entries_for_mr and extract it as a separate patch.
5. Updated cxl_create_dc_regions function to derive region len from host
backend size.
6. Changed the logic for creating DC regions when host backend and address
space processing is introduced, now cxl_create_dc_regions is called only
when host backend exists.
7. Updated the name of the definitions related to DC extents for consistency.
7. Updated dynamic capacity event record definition to align with spec r3.1.
9. Changed the dynamic capacity request process logic, for release request,
extra checks are done against the pending list to remove the extent yet added.
10. Changed the return value of cxl_create_dc_regions so the return can be used
to Remove the extent for the list if needed.
11. offset and size in the qmp interface are changed to be byte-wise while the
original is MiB-wise.
12. Fixed bugs in handling bitmap for dpa range existence.
13. NOTE: in previous version DC is set to non-volatile, while in this version
we change it to volatile per Jonathan's suggestion.
14. Updated the doc in qapi/cxl.json.

Thank Jonathan for the detailed review of the last version[1].

The code is tested with Ira's last kernel DCD patch set [2] with some minor
bug fixes[3]. Tested operations include:
1. create DC region;
2. Add/release DC extents;
3. convert DC capacity into system RAM (no real read/write to DCD tested);


v3: 
[1] 
https://lore.kernel.org/linux-cxl/20231107180907.553451-1-nifan@gmail.com/T/#t
[2] https://github.com/weiny2/linux-kernel/tree/dcd-v3-2023-10-30
[3] 
https://github.com/moking/linux-dcd/commit/9d24fa6e5d39f934623220953caecc080f93e964

Fan Ni (10):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
devices
  hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr
size insead of mr as argument
  hw/mem/cxl_type3: Add host backend and address space handling for DC
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
extents
  hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions

 hw/cxl/cxl-mailbox-utils.c  | 507 +++-
 hw/mem/cxl_type3.c  | 559 +---
 hw/mem/cxl_type3_stubs.c|  14 +
 include/hw/cxl/cxl_device.h |  61 +++-
 include/hw/cxl/cxl_events.h |  18 ++
 qapi/cxl.json   |  61 +++-
 6 files changed, 1174 insertions(+), 46 deletions(-)

-- 
2.43.0




[PATCH v4 03/10] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 4 ++--
 hw/mem/cxl_type3.c  | 8 
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 88e3b733e3..7d2b74c9c5 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -802,7 +802,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
 stq_le_p(>total_capacity,
- cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+ cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity,
  cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity,
@@ -1178,7 +1178,7 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index e8801805b9..244d2b5fd5 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -608,7 +608,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -631,7 +631,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -837,7 +837,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1010,7 +1010,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 6df7fecdf1..80188db670 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -234,7 +234,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 
-- 
2.43.0




[PATCH v4 10/10] hw/mem/cxl_type3: Add dpa range validation for accesses to DC regions

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Not all dpa range in the DC regions is valid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a DC block in the region has been backed by DC extent.
For the bitmap, a bit in the bitmap represents a DC block. When a DC
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  3 ++
 hw/mem/cxl_type3.c  | 82 +
 include/hw/cxl/cxl_device.h |  7 
 3 files changed, 92 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 34c4ebbd12..fd3be2f9cf 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1676,17 +1676,20 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
 found = true;
 cxl_remove_extent_from_extent_list(extent_list, ent);
 ct3d->dc.total_extent_count -= 1;
+ct3_clear_region_block_backed(ct3d, ent_start_dpa, ent_len);
 
 if (len1) {
 cxl_insert_extent_to_extent_list(extent_list,
  ent_start_dpa, len1,
  NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, ent_start_dpa, len1);
 }
 if (len2) {
 cxl_insert_extent_to_extent_list(extent_list, dpa + len,
  len2, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa + len, len2);
 }
 break;
 /*Currently we reject the attempt to remove a superset*/
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b8c4273e99..a56906db25 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -660,6 +660,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 
 region_base += region->len;
 ct3d->dc.total_capacity += region->len;
+region->blk_bitmap = bitmap_new(region->len / region->block_size);
 }
 QTAILQ_INIT(>dc.extents);
 QTAILQ_INIT(>dc.extents_pending_to_add);
@@ -667,6 +668,17 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 return true;
 }
 
+static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
+{
+int i;
+struct CXLDCDRegion *region;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+g_free(region->blk_bitmap);
+}
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -860,6 +872,7 @@ err_free_special_ops:
 g_free(regs->special_ops);
 err_address_space_free:
 if (ct3d->dc.host_dc) {
+cxl_destroy_dc_regions(ct3d);
 address_space_destroy(>dc.host_dc_as);
 }
 if (ct3d->hostpmem) {
@@ -881,6 +894,7 @@ static void ct3_exit(PCIDevice *pci_dev)
 cxl_doe_cdat_release(cxl_cstate);
 g_free(regs->special_ops);
 if (ct3d->dc.host_dc) {
+cxl_destroy_dc_regions(ct3d);
 address_space_destroy(>dc.host_dc_as);
 }
 if (ct3d->hostpmem) {
@@ -891,6 +905,70 @@ static void ct3_exit(PCIDevice *pci_dev)
 }
 }
 
+/*
+ * Mark the DPA range [dpa, dap + len) to be backed and accessible. This
+ * happens when a DC extent is added and accepted by the host.
+ */
+void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len)
+{
+CXLDCDRegion *region;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return;
+}
+
+bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+   len / region->block_size);
+}
+
+/*
+ * Check whether the DPA range [dpa, dpa + len) is backed with DC extents.
+ * Used when validating read/write to dc regions
+ */
+bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+  uint64_t len)
+{
+CXLDCDRegion *region;
+uint64_t nbits;
+long nr;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return false;
+}
+
+nr = (dpa - region->base) / region->block_size;
+nbits = DIV_ROUND_UP(len, region->block_size);
+/*
+ * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
+ * backed with DC extents, return true; else return false.
+ */
+return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + 
nbits;
+}
+
+/*
+ * Mark the DPA range [dpa, dap + len) to be unbacked and inaccessible. This
+ * happens when a dc extent is released by the host.
+ */
+void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+

[PATCH v4 06/10] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Add (file/memory backed) host backend, all the dynamic capacity regions
will share a single, large enough host backend. Set up address space for
DC regions to support read/write operations to dynamic capacity for DCD.

With the change, following supports are added:
1. Add a new property to type3 device "volatile-dc-memdev" to point to host
   memory backend for dynamic capacity. Currently, all dc regions share one
   one host backend.
2. Add namespace for dynamic capacity for read/write support;
3. Create cdat entries for each dynamic capacity region;
4. Fix dvsec range registers to include DC regions.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  16 ++-
 hw/mem/cxl_type3.c  | 188 
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 165 insertions(+), 43 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 7d2b74c9c5..f95e417683 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -621,7 +621,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
size_t *len_out,
CXLCCI *cci)
 {
-CXLDeviceState *cxl_dstate = _TYPE3(cci->d)->cxl_dstate;
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+CXLDeviceState *cxl_dstate = >cxl_dstate;
 struct {
 uint8_t slots_supported;
 uint8_t slot_info;
@@ -635,7 +636,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -792,7 +794,8 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 CXLDeviceState *cxl_dstate = >cxl_dstate;
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -834,9 +837,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)payload_out;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -1178,7 +1183,8 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
+ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 6e5f908fb1..b966fa4f10 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,7 @@
 #include "hw/pci/msix.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -44,7 +45,8 @@ enum {
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
   int dsmad_handle, uint64_t size,
-  bool is_pmem, uint64_t dpa_base)
+  bool is_pmem, bool is_dynamic,
+  uint64_t dpa_base)
 {
 g_autofree CDATDsmas *dsmas = NULL;
 g_autofree CDATDslbis *dslbis0 = NULL;
@@ -60,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },
 .DSMADhandle = dsmad_handle,
-.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
+ (is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
 .DPA_base = dpa_base,
 .DPA_length = size,
 };
@@ -148,12 +151,13 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+MemoryRegion *dc_mr = NULL;
 uint64_t 

[PATCH v4 09/10] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Since fabric manager emulation is not supported yet, the change implements
the functions to add/release dynamic capacity extents as QMP interfaces.

Note: we skips any FM issued extent release request if the exact extent
does not exist in the extent list of the device. We will loose the
restriction later once we have partial release support in the kernel.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "dpa": 0,
  "len": 134217728
  },
  {
  "dpa": 134217728,
  "len": 134217728
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) look like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "dpa": 134217728,
  "len": 134217728
  }
  ]
  }
}

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  26 +++-
 hw/mem/cxl_type3.c  | 249 +++-
 hw/mem/cxl_type3_stubs.c|  14 ++
 include/hw/cxl/cxl_device.h |   7 +
 include/hw/cxl/cxl_events.h |  18 +++
 qapi/cxl.json   |  61 -
 6 files changed, 366 insertions(+), 9 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 65ed28f700..34c4ebbd12 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1418,7 +1418,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
  * Check whether any bit between addr[nr, nr+size) is set,
  * return true if any bit is set, otherwise return false
  */
-static bool test_any_bits_set(const unsigned long *addr, int nr, int size)
+bool test_any_bits_set(const unsigned long *addr, int nr, int size)
 {
 unsigned long res = find_next_bit(addr, size + nr, nr);
 
@@ -1456,7 +1456,7 @@ CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, 
uint64_t dpa, uint64_t len)
 return NULL;
 }
 
-static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
  uint64_t dpa,
  uint64_t len,
  uint8_t *tag,
@@ -1597,16 +1597,28 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 }
 }
 
-/*
- * TODO: we will add a pending extent list based on event log record
- * and verify the input response; also, the "More" flag is not
- * considered at the moment.
- */
+QTAILQ_FOREACH(ent, >dc.extents_pending_to_add, node) {
+if (ent->start_dpa <= dpa &&
+dpa + len <= ent->start_dpa + ent->len) {
+break;
+}
+}
+if (!ent) {
+return CXL_MBOX_INVALID_PA;
+}
+
+cxl_remove_extent_from_extent_list(>dc.extents_pending_to_add,
+   ent);
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
 }
 
+/*
+ * TODO: extents_pending_to_add needs to be cleared so the extents not
+ * accepted can be reclaimed base on spec r3.1: 8.2.9.9.9.3
+ */
+
 return CXL_MBOX_SUCCESS;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index f4edada303..b8c4273e99 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -662,6 +662,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 ct3d->dc.total_capacity += region->len;
 }
 QTAILQ_INIT(>dc.extents);
+QTAILQ_INIT(>dc.extents_pending_to_add);
 
 return true;
 }
@@ -1424,7 +1425,8 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
 return CXL_EVENT_TYPE_FAIL;
 case CXL_EVENT_LOG_FATAL:
 return CXL_EVENT_TYPE_FATAL;
-/* DCD not yet supported */
+case CXL_EVENT_LOG_DYNCAP:
+return CXL_EVENT_TYPE_DYNAMIC_CAP;
 default:
 return -EINVAL;
 }
@@ -1675,6 +1677,251 @@ void qmp_cxl_inject_memory_module_event(const char 
*path, CxlEventLog log,
 }
 }
 
+/* CXL r3.1 Table 8-50: Dynanic Capacity Event Record */
+static const QemuUUID dynamic_capacity_uuid = {
+.data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
+ 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
+};
+
+typedef enum CXLDCEventType {
+DC_EVENT_ADD_CAPACITY = 0x0,
+DC_EVENT_RELEASE_CAPACITY = 0x1,
+DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
+DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
+DC_EVENT_ADD_CAPACITY_RSP = 

[PATCH v4 05/10] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size insead of mr as argument

2024-02-21 Thread nifan . cxl
From: Fan Ni 

The function ct3_build_cdat_entries_for_mr only uses size of the passed
memory region argument, refactor the function definition to make the passed
arguments more specific.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index c61cd2b5ac..6e5f908fb1 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -43,7 +43,7 @@ enum {
 };
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-  int dsmad_handle, MemoryRegion *mr,
+  int dsmad_handle, uint64_t size,
   bool is_pmem, uint64_t dpa_base)
 {
 g_autofree CDATDsmas *dsmas = NULL;
@@ -62,7 +62,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .DSMADhandle = dsmad_handle,
 .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -131,7 +131,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  */
 .EFI_memory_type_attr = is_pmem ? 2 : 1,
 .DPA_offset = 0,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* Header always at start of structure */
@@ -148,6 +148,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
@@ -162,6 +163,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+vmr_size = memory_region_size(volatile_mr);
 }
 
 if (ct3d->hostpmem) {
@@ -170,21 +172,22 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+pmr_size = memory_region_size(nonvolatile_mr);
 }
 
 table = g_malloc0(len * sizeof(*table));
 
 /* Now fill them in */
 if (volatile_mr) {
-ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
+ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
   false, 0);
 cur_ent = CT3_CDAT_NUM_ENTRIES;
 }
 
 if (nonvolatile_mr) {
-uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0;
+uint64_t base = vmr_size;
 ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-  nonvolatile_mr, true, base);
+  pmr_size, true, base);
 cur_ent += CT3_CDAT_NUM_ENTRIES;
 }
 assert(len == cur_ent);
-- 
2.43.0




[PATCH v4 08/10] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 288 
 include/hw/cxl/cxl_device.h |   2 +
 2 files changed, 290 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index dae7fe00ed..65ed28f700 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,8 @@ enum {
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1412,6 +1414,286 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, int nr, int size)
+{
+unsigned long res = find_next_bit(addr, size + nr, nr);
+
+return res < nr + size;
+}
+
+CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+int i;
+CXLDCDRegion *region = >dc.regions[0];
+
+if (dpa < region->base ||
+dpa >= region->base + ct3d->dc.total_capacity) {
+return NULL;
+}
+
+/*
+ * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents 
that
+ * cross multiple regions are not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+if (dpa + len > region->base + region->len) {
+return NULL;
+}
+return region;
+}
+}
+
+return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+CXLDCExtent *extent;
+
+extent = g_new0(CXLDCExtent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+static void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+   CXLDCExtent *extent)
+{
+QTAILQ_REMOVE(list, extent, node);
+g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+uint32_t num_entries_updated;
+uint8_t flags;
+uint8_t rsvd[3];
+/* CXL r3.1 Table 8-169: Updated Extent List */
+struct {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+const CXLUpdateDCExtentListInPl *in)
+{
+uint64_t min_block_size = UINT64_MAX;
+CXLDCDRegion *region = >dc.regions[0];
+CXLDCDRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
+g_autofree unsigned long *blk_bitmap = NULL;
+uint64_t dpa, len;
+uint32_t i;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+min_block_size = MIN(min_block_size, region->block_size);
+}
+
+blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
+ ct3d->dc.regions[0].base) / min_block_size);
+
+for (i = 0; i < in->num_entries_updated; i++) {
+dpa = in->updated_entries[i].start_dpa;
+len = in->updated_entries[i].len;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return CXL_MBOX_INVALID_PA;
+}
+
+dpa -= ct3d->dc.regions[0].base;
+if (dpa % region->block_size || len % region->block_size) 

[PATCH v4 04/10] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2024-02-21 Thread nifan . cxl
From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions.
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support like below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region > /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

However, we cannot really read/write to the DC regions due to lack of
1. host backend and address space setup for DC regions;
2. mailbox command support for adding/releasing DC extents.

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 40 
 1 file changed, 40 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 244d2b5fd5..c61cd2b5ac 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -567,6 +567,40 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+/*
+ * TODO: dc region configuration will be updated once host backend and address
+ * space support is added for DCD.
+ */
+static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
+{
+int i;
+uint64_t region_base = 0;
+uint64_t region_len =  2 * GiB;
+uint64_t decode_len = 2 * GiB;
+uint64_t blk_size = 2 * MiB;
+CXLDCDRegion *region;
+
+if (ct3d->hostvmem) {
+region_base += ct3d->hostvmem->size;
+}
+if (ct3d->hostpmem) {
+region_base += ct3d->hostpmem->size;
+}
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+region->base = region_base;
+region->decode_len = decode_len;
+region->len = region_len;
+region->block_size = blk_size;
+/* dsmad_handle is set when creating cdat table entries */
+region->flags = 0;
+
+region_base += region->len;
+}
+
+return true;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -635,6 +669,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (!cxl_create_dc_regions(ct3d, errp)) {
+error_setg(errp, "setup DC regions failed");
+return false;
+}
+
 return true;
 }
 
@@ -930,6 +969,7 @@ static Property ct3_props[] = {
  HostMemoryBackend *),
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.43.0




[PATCH v4 07/10] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2024-02-21 Thread nifan . cxl
From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and add get DC extent list mailbox command per
CXL.spec.3.1:.8.2.9.9.9.2.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 71 +
 hw/mem/cxl_type3.c  |  1 +
 include/hw/cxl/cxl_device.h | 23 
 3 files changed, 95 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index f95e417683..dae7fe00ed 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -83,6 +83,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1344,6 +1345,73 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.2:
+ * Get Dynamic Capacity Extent List (Opcode 4801h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
+   uint8_t *payload_in,
+   size_t len_in,
+   uint8_t *payload_out,
+   size_t *len_out,
+   CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct get_dyn_cap_ext_list_in_pl {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED;
+
+struct get_dyn_cap_ext_list_out_pl {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtentRaw records[];
+} QEMU_PACKED;
+
+struct get_dyn_cap_ext_list_in_pl *in = (void *)payload_in;
+struct get_dyn_cap_ext_list_out_pl *out = (void *)payload_out;
+uint16_t record_count = 0, i = 0, record_done = 0;
+CXLDCExtentList *extent_list = >dc.extents;
+CXLDCExtent *ent;
+uint16_t out_pl_len;
+uint32_t start_extent_id = in->start_extent_id;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+   ct3d->dc.total_extent_count - start_extent_id);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(>records[record_done].start_dpa, ent->start_dpa);
+stq_le_p(>records[record_done].len, ent->len);
+memcpy(>records[record_done].tag, ent->tag, 0x10);
+stw_le_p(>records[record_done].shared_seq, ent->shared_seq);
+record_done++;
+if (record_done == record_count) {
+break;
+}
+}
+}
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1391,6 +1459,9 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
 [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
 cmd_dcd_get_dyn_cap_config, 2, 0 },
+[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+8, 0 },
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b966fa4f10..f4edada303 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -661,6 +661,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 region_base += region->len;
 ct3d->dc.total_capacity += region->len;
 }
+QTAILQ_INIT(>dc.extents);
 
 return true;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 2f244da9a1..12a6fb47a9 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -424,6 +424,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 
 #define DCD_MAX_REGION_NUM 8
 
+typedef struct CXLDCExtentRaw {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+} QEMU_PACKED CXLDCExtentRaw;
+
+typedef struct CXLDCExtent {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+
+QTAILQ_ENTRY(CXLDCExtent) node;
+} CXLDCExtent;
+typedef 

[PATCH v3 8/9] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Since fabric manager emulation is not supported yet, the change implements
the functions to add/release dynamic capacity extents as QMP interfaces.

Note: we block any FM issued extent release request if the exact extent
does not exist in the extent list of the device. We will loose the
restriction later once we have partial release support in the kernel.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "dpa": 0,
  "len": 128
  },
  {
  "dpa": 128,
  "len": 128
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) look like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "region-id": 0,
  "extents": [
  {
  "dpa": 128,
  "len": 128
  }
  ]
  }
}

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  25 +++-
 hw/mem/cxl_type3.c  | 225 +++-
 hw/mem/cxl_type3_stubs.c|  14 +++
 include/hw/cxl/cxl_device.h |   8 +-
 include/hw/cxl/cxl_events.h |  15 +++
 qapi/cxl.json   |  60 +-
 6 files changed, 338 insertions(+), 9 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 9f788b03b6..8e6a98753a 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1362,7 +1362,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
  * Check whether any bit between addr[nr, nr+size) is set,
  * return true if any bit is set, otherwise return false
  */
-static bool test_any_bits_set(const unsigned long *addr, int nr, int size)
+bool test_any_bits_set(const unsigned long *addr, int nr, int size)
 {
 unsigned long res = find_next_bit(addr, size + nr, nr);
 
@@ -1400,7 +1400,7 @@ CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, 
uint64_t dpa, uint64_t len)
 return NULL;
 }
 
-static void cxl_insert_extent_to_extent_list(CXLDCDExtentList *list,
+void cxl_insert_extent_to_extent_list(CXLDCDExtentList *list,
  uint64_t dpa,
  uint64_t len,
  uint8_t *tag,
@@ -1538,15 +1538,28 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 }
 }
 
-/*
- * TODO: add a pending extent list based on event log record and
- * verify the input response
- */
+QTAILQ_FOREACH(ent, >dc.extents_pending_to_add, node) {
+if (ent->start_dpa <= dpa &&
+dpa + len <= ent->start_dpa + ent->len) {
+break;
+}
+}
+if (ent) {
+QTAILQ_REMOVE(>dc.extents_pending_to_add, ent, node);
+g_free(ent);
+} else {
+return CXL_MBOX_INVALID_PA;
+}
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
 }
 
+/*
+ * TODO: extents_pending_to_add needs to be cleared so the extents not
+ * accepted can be reclaimed base on spec r3.0: 8.2.9.8.9.3
+ */
+
 return CXL_MBOX_SUCCESS;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 482329a499..43cea3d818 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -813,6 +813,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 region_base += region->len;
 }
 QTAILQ_INIT(>dc.extents);
+QTAILQ_INIT(>dc.extents_pending_to_add);
 
 return 0;
 }
@@ -1616,7 +1617,8 @@ static int ct3d_qmp_cxl_event_log_enc(CxlEventLog log)
 return CXL_EVENT_TYPE_FAIL;
 case CXL_EVENT_LOG_FATAL:
 return CXL_EVENT_TYPE_FATAL;
-/* DCD not yet supported */
+case CXL_EVENT_LOG_DYNCAP:
+return CXL_EVENT_TYPE_DYNAMIC_CAP;
 default:
 return -EINVAL;
 }
@@ -1867,6 +1869,227 @@ void qmp_cxl_inject_memory_module_event(const char 
*path, CxlEventLog log,
 }
 }
 
+/* CXL r3.0 Table 8-47: Dynanic Capacity Event Record */
+static const QemuUUID dynamic_capacity_uuid = {
+.data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
+ 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
+};
+
+typedef enum CXLDCEventType {
+DC_EVENT_ADD_CAPACITY = 0x0,
+DC_EVENT_RELEASE_CAPACITY = 0x1,
+DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
+DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
+DC_EVENT_ADD_CAPACITY_RSP = 0x4,
+DC_EVENT_CAPACITY_RELEASED = 0x5,
+DC_EVENT_NUM
+} CXLDCEventType;
+
+/*
+ * Check whether the exact extent exists in the list
+ * 

[PATCH v3 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Not all dpa range in the dc regions is valid to access until an extent
covering the range has been added. Add a bitmap for each region to
record whether a dc block in the region has been backed by dc extent.
For the bitmap, a bit in the bitmap represents a dc block. When a dc
extent is added, all the bits of the blocks in the extent will be set,
which will be cleared when the extent is released.

Signed-off-by: Fan Ni 

--
JC changes:
- Rebase on what will be next gitlab.com/jic23/qemu CXL staging tree.
- Drop unnecessary handling of failed bitmap allocations. In common with
  most QEMU allocations they fail hard anyway.
- Use previously factored out cxl_find_region() helper
- Minor editorial stuff in comments such as spec version references
  according to the standard form I'm trying to push through the code.
Picked up Jørgen's fix:
https://lore.kernel.org/qemu-devel/d0d7ca1d-81bc-19b3-4904-d60046ded...@wdc.com/T/#u
---
 hw/cxl/cxl-mailbox-utils.c  | 31 +--
 hw/mem/cxl_type3.c  | 78 +
 include/hw/cxl/cxl_device.h | 15 +--
 3 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 8e6a98753a..6be92fb5ba 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1401,10 +1401,9 @@ CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, 
uint64_t dpa, uint64_t len)
 }
 
 void cxl_insert_extent_to_extent_list(CXLDCDExtentList *list,
- uint64_t dpa,
- uint64_t len,
- uint8_t *tag,
- uint16_t shared_seq)
+  uint64_t dpa, uint64_t len,
+  uint8_t *tag,
+  uint16_t shared_seq)
 {
 CXLDCDExtent *extent;
 
@@ -1421,6 +1420,13 @@ void cxl_insert_extent_to_extent_list(CXLDCDExtentList 
*list,
 QTAILQ_INSERT_TAIL(list, extent, node);
 }
 
+static void cxl_remove_extent_to_extent_list(CXLDCDExtentList *list,
+ CXLDCDExtent *ent)
+{
+QTAILQ_REMOVE(list, ent, node);
+g_free(ent);
+}
+
 /*
  * CXL r3.0 Table 8-129: Add Dynamic Capacity Response Input Payload
  * CXL r3.0 Table 8-131: Release Dynamic Capacity Input Payload
@@ -1545,14 +1551,15 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 }
 }
 if (ent) {
-QTAILQ_REMOVE(>dc.extents_pending_to_add, ent, node);
-g_free(ent);
+cxl_remove_extent_to_extent_list(>dc.extents_pending_to_add,
+ ent);
 } else {
 return CXL_MBOX_INVALID_PA;
 }
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 }
 
 /*
@@ -1601,16 +1608,22 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
 uint64_t len1 = dpa - ent->start_dpa;
 uint64_t len2 = ent->start_dpa + ent->len - dpa - len;
 
+cxl_remove_extent_to_extent_list(extent_list, ent);
+ct3d->dc.total_extent_count -= 1;
+ct3_clear_region_block_backed(ct3d, dpa, len);
+
 if (len1) {
 cxl_insert_extent_to_extent_list(extent_list,
  ent->start_dpa, len1,
  NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 }
 if (len2) {
 cxl_insert_extent_to_extent_list(extent_list, dpa + len,
  len2, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 }
 break;
 /*Currently we reject the attempt to remove a superset*/
@@ -1621,11 +1634,7 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
 }
 }
 
-if (ent) {
-QTAILQ_REMOVE(extent_list, ent, node);
-g_free(ent);
-ct3d->dc.total_extent_count -= 1;
-} else {
+if (!ent) {
 /* Try to remove a non-existing extent */
 return CXL_MBOX_INVALID_PA;
 }
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 43cea3d818..4ec65a751a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -810,6 +810,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 /* dsmad_handle is set when creating cdat table entries */
 region->flags = 0;
 
+

[PATCH v3 6/9] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and add get DC extent list mailbox command per
CXL.spec.3.0:.8.2.9.8.9.2.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 73 +
 hw/mem/cxl_type3.c  |  1 +
 include/hw/cxl/cxl_device.h | 23 
 3 files changed, 97 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 1f512b3e6b..56f4aa237a 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -82,6 +82,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1286,6 +1287,75 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.0 section 8.2.9.8.9.2:
+ * Get Dynamic Capacity Extent List (Opcode 4810h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
+   uint8_t *payload_in,
+   size_t len_in,
+   uint8_t *payload_out,
+   size_t *len_out,
+   CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct get_dyn_cap_ext_list_in_pl {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED;
+
+struct get_dyn_cap_ext_list_out_pl {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtentRaw records[];
+} QEMU_PACKED;
+
+struct get_dyn_cap_ext_list_in_pl *in = (void *)payload_in;
+struct get_dyn_cap_ext_list_out_pl *out = (void *)payload_out;
+uint16_t record_count = 0, i = 0, record_done = 0;
+CXLDCDExtentList *extent_list = >dc.extents;
+CXLDCDExtent *ent;
+uint16_t out_pl_len;
+uint32_t start_extent_id = in->start_extent_id;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+   ct3d->dc.total_extent_count - start_extent_id);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+/* May need more processing here in the future */
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+memset(out, 0, out_pl_len);
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(>records[record_done].start_dpa, ent->start_dpa);
+stq_le_p(>records[record_done].len, ent->len);
+memcpy(>records[record_done].tag, ent->tag, 0x10);
+stw_le_p(>records[record_done].shared_seq, ent->shared_seq);
+record_done++;
+if (record_done == record_count) {
+break;
+}
+}
+}
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1333,6 +1403,9 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
 [DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
 cmd_dcd_get_dyn_cap_config, 2, 0 },
+[DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
+"DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
+8, 0 },
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 152a51306d..c9d792a725 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -811,6 +811,7 @@ static int cxl_create_dc_regions(CXLType3Dev *ct3d)
 
 region_base += region->len;
 }
+QTAILQ_INIT(>dc.extents);
 
 return 0;
 }
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 3dc6928bc5..5738c6f434 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -420,6 +420,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 
 #define DCD_MAX_REGION_NUM 8
 
+typedef struct CXLDCDExtentRaw {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+} QEMU_PACKED CXLDCExtentRaw;
+
+typedef struct CXLDCDExtent {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+
+QTAILQ_ENTRY(CXLDCDExtent) node;
+} 

[PATCH v3 4/9] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2023-11-07 Thread nifan . cxl
From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions.
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support as below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region> /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Signed-off-by: Fan Ni 
---
 hw/mem/cxl_type3.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 754c885cd1..2d67d2015c 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -721,6 +721,36 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+static int cxl_create_dc_regions(CXLType3Dev *ct3d)
+{
+int i;
+uint64_t region_base = 0;
+uint64_t region_len =  2 * GiB;
+uint64_t decode_len = 8; /* 8*256MB */
+uint64_t blk_size = 2 * MiB;
+CXLDCDRegion *region;
+
+if (ct3d->hostvmem) {
+region_base += ct3d->hostvmem->size;
+}
+if (ct3d->hostpmem) {
+region_base += ct3d->hostpmem->size;
+}
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+region->base = region_base;
+region->decode_len = decode_len;
+region->len = region_len;
+region->block_size = blk_size;
+/* dsmad_handle is set when creating cdat table entries */
+region->flags = 0;
+
+region_base += region->len;
+}
+
+return 0;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -789,6 +819,10 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (cxl_create_dc_regions(ct3d)) {
+return false;
+}
+
 return true;
 }
 
@@ -1108,6 +1142,7 @@ static Property ct3_props[] = {
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
 DEFINE_PROP_UINT16("spdm", CXLType3Dev, spdm_port, 0),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.42.0




[PATCH v3 1/9] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Based on CXL spec 3.0 Table 8-94 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index f1145e9671..8eceedfa87 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,7 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -753,8 +754,9 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 CXLDeviceState *cxl_dstate = >cxl_dstate;
@@ -780,6 +782,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len_out = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
2.42.0




[PATCH v3 0/9] Enabling DCD emulation support in Qemu

2023-11-07 Thread nifan . cxl
From: Fan Ni 


The patch series are based on Jonathan's branch cxl-2023-09-26.

The main changes include,
1. Update cxl_find_dc_region to detect the case the range of the extent cross
multiple DC regions.
2. Add comments to explain the checks performed in function
cxl_detect_malformed_extent_list. (Jonathan)
3. Minimize the checks in cmd_dcd_add_dyn_cap_rsp.(Jonathan)
4. Update total_extent_count in add/release dynamic capacity response function.
(Ira and Jorgen Hansen).
5. Fix the logic issue in test_bits and renamed it to
test_any_bits_set to clear its function.
6. Add pending extent list for dc extent add event.
7. When add extent response is received, use the pending-to-add list to
verify the extents are valid.
8. Add test_any_bits_set and cxl_insert_extent_to_extent_list declaration to
cxl_device.h so it can be used in different files.
9. Updated ct3d_qmp_cxl_event_log_enc to include dynamic capacity event
log type.
10. Extract the functionality to delete extent from extent list to a helper
function.
11. Move the update of the bitmap which reflects which blocks are backed with
dc extents from the moment when a dc extent is offered to the moment when it
is accepted from the host.
12. Free dc_name after calling address_space_init to avoid memory leak when
returning early. (Nathan)
13. Add code to detect and reject QMP requests without any extents. (Jonathan)
14. Add code to detect and reject QMP requests where the extent len is 0.
15. Change the QMP interface and move the region-id out of extents and now
each command only takes care of extent add/release request in a single
region. (Jonathan)
16. Change the region bitmap length from decode_len to len.
17. Rename "dpa" to "offset" in the add/release dc extent qmp interface.
(Jonathan)
18. Block any dc extent release command if the exact extent is not already in
the extent list of the device.

The code is tested together with Ira's kernel DCD support:
https://github.com/weiny2/linux-kernel/tree/dcd-v3-2023-10-30

Cover letter from v2 is here:
https://lore.kernel.org/linux-cxl/20230724162313.34196-1-fan...@samsung.com/T/#m63039621087023691c9749a0af1212deb5549ddf

Last version (v2) is here:
https://lore.kernel.org/linux-cxl/20230725183939.2741025-1-fan...@samsung.com/

More DCD related discussions are here:
https://lore.kernel.org/linux-cxl/650cc29ab3f64_50d07294e7@iweiny-mobl.notmuch/



Fan Ni (9):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output
payload of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative
and mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for
type3 memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory
devices
  hw/mem/cxl_type3: Add host backend and address space handling for DC
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity
extents
  hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

 hw/cxl/cxl-mailbox-utils.c  | 469 +-
 hw/mem/cxl_type3.c  | 548 +---
 hw/mem/cxl_type3_stubs.c|  14 +
 include/hw/cxl/cxl_device.h |  64 -
 include/hw/cxl/cxl_events.h |  15 +
 qapi/cxl.json   |  60 +++-
 6 files changed, 1123 insertions(+), 47 deletions(-)

-- 
2.42.0




[PATCH v3 3/9] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 4 ++--
 hw/mem/cxl_type3.c  | 8 
 include/hw/cxl/cxl_device.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index f80dd6474f..707fd9fe7f 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -774,7 +774,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
 stq_le_p(>total_capacity,
- cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity,
  cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity,
@@ -1149,7 +1149,7 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 6c1ccda159..754c885cd1 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -762,7 +762,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -785,7 +785,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -1008,7 +1008,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1188,7 +1188,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 334c51fddb..de6469eef7 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -232,7 +232,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 bool is_dcd;
-- 
2.42.0




[PATCH v3 7/9] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Per CXL spec 3.0, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.8.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.8.9.4.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 271 
 hw/mem/cxl_type3.c  |   3 +-
 include/hw/cxl/cxl_device.h |   5 +-
 3 files changed, 277 insertions(+), 2 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 56f4aa237a..9f788b03b6 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -83,6 +83,8 @@ enum {
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1356,6 +1358,269 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, int nr, int size)
+{
+unsigned long res = find_next_bit(addr, size + nr, nr);
+
+return res < nr + size;
+}
+
+CXLDCDRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+CXLDCDRegion *region = >dc.regions[0];
+int i;
+
+if (dpa < region->base ||
+dpa >= region->base + ct3d->dc.total_capacity) {
+return NULL;
+}
+
+/*
+ * CXL r3.0 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents 
that
+ * cross multiple regions are not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+/*Should we compare with decode_len or len of the region??*/
+if (dpa + len > region->base +
+region->decode_len * CXL_CAPACITY_MULTIPLIER)
+return NULL;
+return region;
+}
+}
+return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCDExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+CXLDCDExtent *extent;
+
+extent = g_new0(CXLDCDExtent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+} else {
+memset(extent->tag, 0, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+/*
+ * CXL r3.0 Table 8-129: Add Dynamic Capacity Response Input Payload
+ * CXL r3.0 Table 8-131: Release Dynamic Capacity Input Payload
+ */
+typedef struct updated_dc_extent_list_in_pl {
+uint32_t num_entries_updated;
+uint8_t rsvd[4];
+/* CXL r3.0 Table 8-130: Updated Extent List */
+struct {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED updated_dc_extent_list_in_pl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+const updated_dc_extent_list_in_pl *in)
+{
+uint64_t min_block_size = UINT64_MAX;
+CXLDCDRegion *region = >dc.regions[0];
+CXLDCDRegion *lastregion = >dc.regions[ct3d->dc.num_regions - 1];
+g_autofree unsigned long *blk_bitmap = NULL;
+uint64_t dpa, len;
+uint32_t i;
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+min_block_size = MIN(min_block_size, region->block_size);
+}
+
+blk_bitmap = bitmap_new((lastregion->len + lastregion->base -
+ ct3d->dc.regions[0].base) / min_block_size);
+
+for (i = 0; i < in->num_entries_updated; i++) {
+dpa = in->updated_entries[i].start_dpa;
+len = in->updated_entries[i].len;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return CXL_MBOX_INVALID_PA;
+}
+
+dpa -= ct3d->dc.regions[0].base;
+if (dpa % region->block_size || len % region->block_size) 

[PATCH v3 5/9] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Add (file/memory backed) host backend, all the dynamic capacity regions
will share a single, large enough host backend. Set up address space for
DC regions to support read/write operations to dynamic capacity for DCD.

With the change, following supports are added:
1. Add a new property to type3 device "nonvolatile-dc-memdev" to point to host
   memory backend for dynamic capacity. Currently, all dc regions share one
   one host backend.
2. Add namespace for dynamic capacity for read/write support;
3. Create cdat entries for each dynamic capacity region;
4. Fix dvsec range registers to include DC regions.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  |  16 ++-
 hw/mem/cxl_type3.c  | 198 +---
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 179 insertions(+), 39 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 707fd9fe7f..1f512b3e6b 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -596,7 +596,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
size_t *len_out,
CXLCCI *cci)
 {
-CXLDeviceState *cxl_dstate = _TYPE3(cci->d)->cxl_dstate;
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+CXLDeviceState *cxl_dstate = >cxl_dstate;
 struct {
 uint8_t slots_supported;
 uint8_t slot_info;
@@ -610,7 +611,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -764,7 +766,8 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 CXLDeviceState *cxl_dstate = >cxl_dstate;
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -805,9 +808,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)payload_out;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -1149,7 +1154,8 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
+ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 2d67d2015c..152a51306d 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -31,6 +31,7 @@
 #include "hw/pci/spdm.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -44,8 +45,9 @@ enum {
 };
 
 static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
- int dsmad_handle, MemoryRegion *mr,
- bool is_pmem, uint64_t dpa_base)
+ int dsmad_handle, uint64_t size,
+ bool is_pmem, bool is_dynamic,
+ uint64_t dpa_base)
 {
 g_autofree CDATDsmas *dsmas = NULL;
 g_autofree CDATDslbis *dslbis0 = NULL;
@@ -64,9 +66,10 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },
 .DSMADhandle = dsmad_handle,
-.flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
+.flags = (is_pmem ? CDAT_DSMAS_FLAG_NV : 0) |
+(is_dynamic ? CDAT_DSMAS_FLAG_DYNAMIC_CAP : 0),
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -150,7 +153,7 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  

[PATCH v3 2/9] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2023-11-07 Thread nifan . cxl
From: Fan Ni 

Per cxl spec 3.0, add dynamic capacity region representative based on
Table 8-126 and extend the cxl type3 device definition to include dc region
information. Also, based on info in 8.2.9.8.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Note: decode_len of a dc region is aligned to 256*MiB, need to be divided by
256 * MiB before returned to the host for "Get Dynamic Capacity Configuration"
mailbox command.

Signed-off-by: Fan Ni 
---
 hw/cxl/cxl-mailbox-utils.c  | 80 +
 hw/mem/cxl_type3.c  |  6 +++
 include/hw/cxl/cxl_device.h | 17 
 3 files changed, 103 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 8eceedfa87..f80dd6474f 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -80,6 +80,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48,
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1210,6 +1212,74 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.0 section 8.2.9.8.9.1: Get Dynamic Capacity Configuration
+ * (Opcode: 4800h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct get_dyn_cap_config_in_pl {
+uint8_t region_cnt;
+uint8_t start_region_id;
+} QEMU_PACKED;
+
+struct get_dyn_cap_config_out_pl {
+uint8_t num_regions;
+uint8_t rsvd1[7];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+} QEMU_PACKED;
+
+struct get_dyn_cap_config_in_pl *in = (void *)payload_in;
+struct get_dyn_cap_config_out_pl *out = (void *)payload_out;
+uint16_t record_count = 0, i;
+uint16_t out_pl_len;
+uint8_t start_region_id = in->start_region_id;
+
+if (start_region_id >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_region_id,
+in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+memset(out, 0, out_pl_len);
+out->num_regions = record_count;
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ct3d->dc.regions[start_region_id + i].base);
+stq_le_p(>records[i].decode_len,
+ct3d->dc.regions[start_region_id + i].decode_len /
+CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>records[i].region_len,
+ct3d->dc.regions[start_region_id + i].len);
+stq_le_p(>records[i].block_size,
+ct3d->dc.regions[start_region_id + i].block_size);
+stl_le_p(>records[i].dsmadhandle,
+ct3d->dc.regions[start_region_id + i].dsmadhandle);
+out->records[i].flags = ct3d->dc.regions[start_region_id + i].flags;
+}
+
+*len_out = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1254,6 +1324,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_clear_poison, 72, 0 },
 };
 
+static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
+[DCD_CONFIG][GET_DC_CONFIG] = { "DCD_GET_DC_CONFIG",
+cmd_dcd_get_dyn_cap_config, 2, 0 },
+};
+
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
 [INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 18 },
 [INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS",
@@ -1465,7 +1540,12 @@ void cxl_initialize_mailbox_swcci(CXLCCI *cci, 
DeviceState *intf,
 
 void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max)
 {
+CXLType3Dev *ct3d = CXL_TYPE3(d);
+
 cxl_copy_cci_commands(cci, cxl_cmd_set);
+if (ct3d->dc.num_regions) {
+cxl_copy_cci_commands(cci, cxl_cmd_set_dcd);
+}
 cci->d = d;
 
 /* No separation for PCI MB as protocol handled in PCI device */
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 7b4d1ee774..6c1ccda159 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c