Re: [RESEND PATCH v6 8/8] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.
> > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h > > index 7e5ad65c1d..d589f78202 100644 > > --- a/include/hw/cxl/cxl_device.h > > +++ b/include/hw/cxl/cxl_device.h > > @@ -232,6 +232,14 @@ REG64(CXL_MEM_DEV_STS, 0) > > FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1) > > FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3) > > > > +typedef struct CXLError { > > +QTAILQ_ENTRY(CXLError) node; > > +int type; /* Error code as per FE definition */ > > +uint32_t header[32]; > Instead of using 32 here, would it be better to use > CXL_RAS_ERR_HEADER_NUM? Yes, that would be better. Please send a patch. > > +} CXLError;
Re: [RESEND PATCH v6 8/8] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.
On Tue, Mar 07, 2023 at 07:26:41PM +, Fan Ni wrote: > > +typedef struct CXLError { > > +QTAILQ_ENTRY(CXLError) node; > > +int type; /* Error code as per FE definition */ > > +uint32_t header[32]; > Instead of using 32 here, would it be better to use > CXL_RAS_ERR_HEADER_NUM? merged as is, fix on top pls. -- MST
Re: [RESEND PATCH v6 8/8] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.
On Thu, Mar 02, 2023 at 01:37:09PM +, Jonathan Cameron wrote: > CXL uses PCI AER Internal errors to signal to the host that an error has > occurred. The host can then read more detailed status from the CXL RAS > capability. > > For uncorrectable errors: support multiple injection in one operation > as this is needed to reliably test multiple header logging support in an > OS. The equivalent feature doesn't exist for correctable errors, so only > one error need be injected at a time. > > Note: > - Header content needs to be manually specified in a fashion that >matches the specification for what can be in the header for each >error type. > > Injection via QMP: > { "execute": "qmp_capabilities" } > ... > { "execute": "cxl-inject-uncorrectable-errors", > "arguments": { > "path": "/machine/peripheral/cxl-pmem0", > "errors": [ > { > "type": "cache-address-parity", > "header": [ 3, 4] > }, > { > "type": "cache-data-parity", > "header": > [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] > }, > { > "type": "internal", > "header": [ 1, 2, 4] > } > ] > }} > ... > { "execute": "cxl-inject-correctable-error", > "arguments": { > "path": "/machine/peripheral/cxl-pmem0", > "type": "physical" > } } > > Signed-off-by: Jonathan Cameron > --- Reviewed-by: Fan Ni One minor thing, see below in "typedef struct CXLError". > v6: (Thanks to Philippe Mathieu-Daudé) > - Add Since entries in cxl.json > - Add error prints in the stub functions so that if they are called without > CONFIG_CXL_MEM_DEVICE then we get a useful print rather than just silently > eating them. > > --- > hw/cxl/cxl-component-utils.c | 4 +- > hw/mem/cxl_type3.c | 281 + > hw/mem/cxl_type3_stubs.c | 17 ++ > hw/mem/meson.build | 2 + > include/hw/cxl/cxl_component.h | 26 +++ > include/hw/cxl/cxl_device.h| 11 ++ > qapi/cxl.json | 128 +++ > qapi/meson.build | 1 + > qapi/qapi-schema.json | 1 + > 9 files changed, 470 insertions(+), 1 deletion(-) > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c > index 737b4764b9..b665d4f565 100644 > --- a/hw/cxl/cxl-component-utils.c > +++ b/hw/cxl/cxl-component-utils.c > @@ -142,16 +142,18 @@ static void ras_init_common(uint32_t *reg_state, > uint32_t *write_msk) > * be handled as RO. > */ > stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0); > +stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff); > /* Bits 12-13 and 17-31 reserved in CXL 2.0 */ > stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff); > stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff); > stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff); > stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff); > stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0); > +stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f); > stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f); > stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f); > /* CXL switches and devices must set */ > -stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x00); > +stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200); > } > > static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c > index 6cdd988d1d..abe60b362c 100644 > --- a/hw/mem/cxl_type3.c > +++ b/hw/mem/cxl_type3.c > @@ -1,6 +1,7 @@ > #include "qemu/osdep.h" > #include "qemu/units.h" > #include "qemu/error-report.h" > +#include "qapi/qapi-commands-cxl.h" > #include "hw/mem/memory-device.h" > #include "hw/mem/pc-dimm.h" > #include "hw/pci/pci.h" > @@ -323,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int > which) > ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); > } > > +static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) > +{ > +switch (qmp_err) { > +case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY: > +return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY; > +case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY: > +return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY; > +case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY: > +return CXL_RAS_UNC_ERR_CACHE_BE_PARITY; > +case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC: > +return CXL_RAS_UNC_ERR_CACHE_DATA_ECC; > +case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY: > +return CXL_RAS_UNC_ERR_MEM_DATA_PARITY; > +case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY: > +return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY; > +case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY: > +return CXL_RAS_UNC_ERR_MEM_BE_PARITY; > +case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC: > +return
Re: [RESEND PATCH v6 8/8] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.
On Thu, Mar 02, 2023 at 01:37:09PM +, Jonathan Cameron wrote: > CXL uses PCI AER Internal errors to signal to the host that an error has > occurred. The host can then read more detailed status from the CXL RAS > capability. > > For uncorrectable errors: support multiple injection in one operation > as this is needed to reliably test multiple header logging support in an > OS. The equivalent feature doesn't exist for correctable errors, so only > one error need be injected at a time. > > Note: > - Header content needs to be manually specified in a fashion that >matches the specification for what can be in the header for each >error type. > > Injection via QMP: > { "execute": "qmp_capabilities" } > ... > { "execute": "cxl-inject-uncorrectable-errors", > "arguments": { > "path": "/machine/peripheral/cxl-pmem0", > "errors": [ > { > "type": "cache-address-parity", > "header": [ 3, 4] > }, > { > "type": "cache-data-parity", > "header": > [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] > }, > { > "type": "internal", > "header": [ 1, 2, 4] > } > ] > }} > ... > { "execute": "cxl-inject-correctable-error", > "arguments": { > "path": "/machine/peripheral/cxl-pmem0", > "type": "physical" > } } > > Signed-off-by: Jonathan Cameron I will assume the silence of QAPI maintainers implies acceptance. > --- > v6: (Thanks to Philippe Mathieu-Daudé) > - Add Since entries in cxl.json > - Add error prints in the stub functions so that if they are called without > CONFIG_CXL_MEM_DEVICE then we get a useful print rather than just silently > eating them. > > --- > hw/cxl/cxl-component-utils.c | 4 +- > hw/mem/cxl_type3.c | 281 + > hw/mem/cxl_type3_stubs.c | 17 ++ > hw/mem/meson.build | 2 + > include/hw/cxl/cxl_component.h | 26 +++ > include/hw/cxl/cxl_device.h| 11 ++ > qapi/cxl.json | 128 +++ > qapi/meson.build | 1 + > qapi/qapi-schema.json | 1 + > 9 files changed, 470 insertions(+), 1 deletion(-) > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c > index 737b4764b9..b665d4f565 100644 > --- a/hw/cxl/cxl-component-utils.c > +++ b/hw/cxl/cxl-component-utils.c > @@ -142,16 +142,18 @@ static void ras_init_common(uint32_t *reg_state, > uint32_t *write_msk) > * be handled as RO. > */ > stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0); > +stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff); > /* Bits 12-13 and 17-31 reserved in CXL 2.0 */ > stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff); > stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff); > stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff); > stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff); > stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0); > +stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f); > stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f); > stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f); > /* CXL switches and devices must set */ > -stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x00); > +stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200); > } > > static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk, > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c > index 6cdd988d1d..abe60b362c 100644 > --- a/hw/mem/cxl_type3.c > +++ b/hw/mem/cxl_type3.c > @@ -1,6 +1,7 @@ > #include "qemu/osdep.h" > #include "qemu/units.h" > #include "qemu/error-report.h" > +#include "qapi/qapi-commands-cxl.h" > #include "hw/mem/memory-device.h" > #include "hw/mem/pc-dimm.h" > #include "hw/pci/pci.h" > @@ -323,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int > which) > ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); > } > > +static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) > +{ > +switch (qmp_err) { > +case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY: > +return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY; > +case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY: > +return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY; > +case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY: > +return CXL_RAS_UNC_ERR_CACHE_BE_PARITY; > +case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC: > +return CXL_RAS_UNC_ERR_CACHE_DATA_ECC; > +case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY: > +return CXL_RAS_UNC_ERR_MEM_DATA_PARITY; > +case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY: > +return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY; > +case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY: > +return CXL_RAS_UNC_ERR_MEM_BE_PARITY; > +case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC: > +return CXL_RAS_UNC_ERR_MEM_DATA_ECC; > +case