Re: [PATCH 11/17] hw/block/nvme: add remaining mandatory controller parameters

2020-07-02 Thread Klaus Jensen
On Jul  3 00:46, Dmitry Fomichev wrote:
> LGTM with one small nit (see below)...
> 
> Reviewed-by: Dmitry Fomichev 
> 
> On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Add support for any remaining mandatory controller operating parameters
> > (features).
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c   | 39 +--
> >  hw/block/nvme.h   | 18 ++
> >  hw/block/trace-events |  2 ++
> >  include/block/nvme.h  |  7 +++
> >  4 files changed, 60 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index da13ca1ddb60..647f408854ae 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -1057,8 +1057,16 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> > NvmeCmd *cmd, NvmeRequest *req)
> >  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> >  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> >  uint32_t result;
> > +uint8_t fid = NVME_GETSETFEAT_FID(dw10);
> > +uint16_t iv;
> >  
> > -switch (dw10) {
> > +trace_pci_nvme_getfeat(nvme_cid(req), fid, dw11);
> > +
> > +if (!nvme_feature_support[fid]) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +switch (fid) {
> >  case NVME_TEMPERATURE_THRESHOLD:
> >  result = 0;
> >  
> > @@ -1089,14 +1097,27 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, 
> > NvmeCmd *cmd, NvmeRequest *req)
> >   ((n->params.max_ioqpairs - 1) << 16));
> >  trace_pci_nvme_getfeat_numq(result);
> >  break;
> > +case NVME_INTERRUPT_VECTOR_CONF:
> > +iv = dw11 & 0x;
> > +if (iv >= n->params.max_ioqpairs + 1) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +result = iv;
> > +if (iv == n->admin_cq.vector) {
> > +result |= NVME_INTVC_NOCOALESCING;
> > +}
> > +
> > +result = cpu_to_le32(result);
> > +break;
> >  case NVME_ASYNCHRONOUS_EVENT_CONF:
> >  result = cpu_to_le32(n->features.async_config);
> >  break;
> >  case NVME_TIMESTAMP:
> >  return nvme_get_feature_timestamp(n, cmd);
> >  default:
> > -trace_pci_nvme_err_invalid_getfeat(dw10);
> > -return NVME_INVALID_FIELD | NVME_DNR;
> > +result = cpu_to_le32(nvme_feature_default[fid]);
> > +break;
> >  }
> >  
> >  req->cqe.result = result;
> > @@ -1125,8 +1146,15 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, 
> > NvmeCmd *cmd, NvmeRequest *req)
> >  {
> >  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> >  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> > +uint8_t fid = NVME_GETSETFEAT_FID(dw10);
> >  
> > -switch (dw10) {
> > +trace_pci_nvme_setfeat(nvme_cid(req), fid, dw11);
> > +
> > +if (!nvme_feature_support[fid]) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +switch (fid) {
> >  case NVME_TEMPERATURE_THRESHOLD:
> >  if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
> >  break;
> > @@ -1173,8 +1201,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> > *cmd, NvmeRequest *req)
> >  case NVME_TIMESTAMP:
> >  return nvme_set_feature_timestamp(n, cmd);
> >  default:
> > -trace_pci_nvme_err_invalid_setfeat(dw10);
> > -return NVME_INVALID_FIELD | NVME_DNR;
> > +return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
> 
> In spec, it is "Changeable", could as well add that 'e' here

Good catch, typo fixed.

> 
> >  }
> >  return NVME_SUCCESS;
> >  }
> > diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> > index 16a254d30b4e..d0763eb59e5d 100644
> > --- a/hw/block/nvme.h
> > +++ b/hw/block/nvme.h
> > @@ -90,6 +90,24 @@ typedef struct NvmeFeatureVal {
> >  uint32_tasync_config;
> >  } NvmeFeatureVal;
> >  
> > +static const uint32_t nvme_feature_default[0x100] = {
> > +[NVME_ARBITRATION]   = NVME_ARB_AB_NOLIMIT,
> > +};
> > +
> > +static const bool nvme_feature_support[0x100] = {
> > +[NVME_ARBITRATION]  = true,
> > +[NVME_POWER_MANAGEMENT] = true,
> > +[NVME_TEMPERATURE_THRESHOLD]= true,
> > +[NVME_ERROR_RECOVERY]   = true,
> > +[NVME_VOLATILE_WRITE_CACHE] = true,
> > +[NVME_NUMBER_OF_QUEUES] = true,
> > +[NVME_INTERRUPT_COALESCING] = true,
> > +[NVME_INTERRUPT_VECTOR_CONF]= true,
> > +[NVME_WRITE_ATOMICITY]  = true,
> > +[NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
> > +[NVME_TIMESTAMP]= true,
> > +};
> > +
> >  typedef struct NvmeCtrl {
> >  PCIDeviceparent_obj;
> >  MemoryRegion iomem;
> > diff --git a/hw/block/trace-events b/hw/block/trace-events
> > index 091af16ca7d7..42e62f4649f8 100644
> > --- a/hw/block/trace-events
> > +++ b/hw/block/trace-events
> > @@ -46,6 +46,8 @@ pci_nvme_identify_ctrl(void) "identify controller"
> > 

Re: [PATCH 06/17] hw/block/nvme: add support for the get log page command

2020-07-02 Thread Klaus Jensen
On Jul  3 00:45, Dmitry Fomichev wrote:
> On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Add support for the Get Log Page command and basic implementations of
> > the mandatory Error Information, SMART / Health Information and Firmware
> > Slot Information log pages.
> > 
> > In violation of the specification, the SMART / Health Information log
> > page does not persist information over the lifetime of the controller
> > because the device has no place to store such persistent state.
> > 
> > Note that the LPA field in the Identify Controller data structure
> > intentionally has bit 0 cleared because there is no namespace specific
> > information in the SMART / Health information log page.
> > 
> > Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> > Section 5.14 ("Get Log Page command").
> > 
> > Signed-off-by: Klaus Jensen 
> > Signed-off-by: Klaus Jensen 
> > Acked-by: Keith Busch 
> > ---
> >  hw/block/nvme.c   | 141 +-
> >  hw/block/nvme.h   |   2 +
> >  hw/block/trace-events |   2 +
> >  include/block/nvme.h  |   4 ++
> >  4 files changed, 148 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f8e91a6965ed..fe5d052ab159 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -592,6 +592,141 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd 
> > *cmd)
> >  return NVME_SUCCESS;
> >  }
> >  
> > +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > buf_len,
> > +uint64_t off, NvmeRequest *req)
> > +{
> > +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > +uint32_t nsid = le32_to_cpu(cmd->nsid);
> > +
> > +uint32_t trans_len;
> > +time_t current_ms;
> > +uint64_t units_read = 0, units_written = 0;
> > +uint64_t read_commands = 0, write_commands = 0;
> > +NvmeSmartLog smart;
> > +BlockAcctStats *s;
> > +
> > +if (nsid && nsid != 0x) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +s = blk_get_stats(n->conf.blk);
> > +
> > +units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> > +units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> > +read_commands = s->nr_ops[BLOCK_ACCT_READ];
> > +write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> > +
> > +if (off > sizeof(smart)) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +trans_len = MIN(sizeof(smart) - off, buf_len);
> > +
> > +memset(, 0x0, sizeof(smart));
> > +
> > +smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> > +smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> > +smart.host_read_commands[0] = cpu_to_le64(read_commands);
> > +smart.host_write_commands[0] = cpu_to_le64(write_commands);
> > +
> > +smart.temperature[0] = n->temperature & 0xff;
> > +smart.temperature[1] = (n->temperature >> 8) & 0xff;
> 
> Why not change temperature[2] in NvmeSmartLog to uint16_t and use 
> cpu_to_le16() here?
> 

It's because of the wierd alignment. But you are right and I changed it
to uint16_t and added the QEMU_PACKED attribute to the struct. It should
be there anyway.

> > +if ((n->temperature >= n->features.temp_thresh_hi) ||
> > +(n->temperature <= n->features.temp_thresh_low)) {
> > +smart.critical_warning |= NVME_SMART_TEMPERATURE;
> > +}
> > +
> > +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> > +smart.power_on_hours[0] =
> > +cpu_to_le64current_ms - n->starttime_ms) / 1000) / 60) / 60);
> > +
> > +return nvme_dma_read_prp(n, (uint8_t *)  + off, trans_len, prp1,
> > + prp2);
> > +}
> > +
> > +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > buf_len,
> > + uint64_t off, NvmeRequest *req)
> > +{
> > +uint32_t trans_len;
> > +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > +NvmeFwSlotInfoLog fw_log = {
> > +.afi = 0x1,
> > +};
> > +
> > +strpadcpy((char *)_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> > +
> > +if (off > sizeof(fw_log)) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +trans_len = MIN(sizeof(fw_log) - off, buf_len);
> > +
> > +return nvme_dma_read_prp(n, (uint8_t *) _log + off, trans_len, prp1,
> > + prp2);
> > +}
> > +
> > +static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t 
> > buf_len,
> > +uint64_t off, NvmeRequest *req)
> > +{
> > +uint32_t trans_len;
> > +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> > +NvmeErrorLog errlog;
> > +
> > +if (off > sizeof(errlog)) {
> > + 

Re: [PATCH 04/17] hw/block/nvme: add temperature threshold feature

2020-07-02 Thread Klaus Jensen
On Jul  3 00:44, Dmitry Fomichev wrote:
> On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > It might seem weird to implement this feature for an emulated device,
> > but it is mandatory to support and the feature is useful for testing
> > asynchronous event request support, which will be added in a later
> > patch.
> > 
> > Signed-off-by: Klaus Jensen 
> > Acked-by: Keith Busch 
> > Reviewed-by: Maxim Levitsky 
> > ---
> >  hw/block/nvme.c  | 48 
> >  hw/block/nvme.h  |  1 +
> >  include/block/nvme.h |  8 +++-
> >  3 files changed, 56 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index b7037a7d3504..5ca50646369e 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -59,6 +59,9 @@
> >  #define NVME_DB_SIZE  4
> >  #define NVME_CMB_BIR 2
> >  #define NVME_PMR_BIR 2
> > +#define NVME_TEMPERATURE 0x143
> > +#define NVME_TEMPERATURE_WARNING 0x157
> > +#define NVME_TEMPERATURE_CRITICAL 0x175
> >  
> >  #define NVME_GUEST_ERR(trace, fmt, ...) \
> >  do { \
> > @@ -827,9 +830,31 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl 
> > *n, NvmeCmd *cmd)
> >  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest 
> > *req)
> >  {
> >  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> > +uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> >  uint32_t result;
> >  
> >  switch (dw10) {
> > +case NVME_TEMPERATURE_THRESHOLD:
> > +result = 0;
> > +
> > +/*
> > + * The controller only implements the Composite Temperature 
> > sensor, so
> > + * return 0 for all other sensors.
> > + */
> > +if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
> > +break;
> > +}
> > +
> > +switch (NVME_TEMP_THSEL(dw11)) {
> > +case NVME_TEMP_THSEL_OVER:
> > +result = cpu_to_le16(n->features.temp_thresh_hi);
> > +break;
> > +case NVME_TEMP_THSEL_UNDER:
> > +result = cpu_to_le16(n->features.temp_thresh_low);
> > +break;
> > +}
> > +
> > +break;
> > 
> > >  case NVME_VOLATILE_WRITE_CACHE:
> >  result = blk_enable_write_cache(n->conf.blk);
> >  trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
> > @@ -874,6 +899,23 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> > *cmd, NvmeRequest *req)
> >  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> >  
> >  switch (dw10) {
> > +case NVME_TEMPERATURE_THRESHOLD:
> > +if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
> > +break;
> > +}
> > +
> > +switch (NVME_TEMP_THSEL(dw11)) {
> > +case NVME_TEMP_THSEL_OVER:
> > +n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
> > +break;
> > +case NVME_TEMP_THSEL_UNDER:
> > +n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
> > +break;
> > +default:
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +break;
> >  case NVME_VOLATILE_WRITE_CACHE:
> >  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
> >  break;
> > @@ -1454,6 +1496,7 @@ static void nvme_init_state(NvmeCtrl *n)
> >  n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
> >  n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
> >  n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
> > +n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
> >  }
> >  
> >  static void nvme_init_blk(NvmeCtrl *n, Error **errp)
> > @@ -1611,6 +1654,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> > *pci_dev)
> >  id->acl = 3;
> >  id->frmw = 7 << 1;
> >  id->lpa = 1 << 0;
> > +
> > +/* recommended default value (~70 C) */
> > +id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
> > +id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
> > +
> >  id->sqes = (0x6 << 4) | 0x6;
> >  id->cqes = (0x4 << 4) | 0x4;
> >  id->nn = cpu_to_le32(n->num_namespaces);
> > diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> > index 1bf5c80ed843..3acde10e1d2a 100644
> > --- a/hw/block/nvme.h
> > +++ b/hw/block/nvme.h
> > @@ -107,6 +107,7 @@ typedef struct NvmeCtrl {
> >  NvmeSQueue  admin_sq;
> >  NvmeCQueue  admin_cq;
> >  NvmeIdCtrl  id_ctrl;
> > +NvmeFeatureVal  features;
> >  } NvmeCtrl;
> >  
> >  /* calculate the number of LBAs that the namespace can accomodate */
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index 6d1fa6ff2228..bb651d0cbf5a 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -860,7 +860,13 @@ enum NvmeIdCtrlOncs {
> >  typedef struct NvmeFeatureVal {
> >  uint32_tarbitration;
> >  uint32_tpower_mgmt;
> > -uint32_ttemp_thresh;
> > +union {
> > +struct {
> > +

Re: [PATCH 01/17] hw/block/nvme: bump spec data structures to v1.3

2020-07-02 Thread Klaus Jensen
On Jul  3 00:44, Dmitry Fomichev wrote:
> On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Add missing fields in the Identify Controller and Identify Namespace
> > data structures to bring them in line with NVMe v1.3.
> > 
> > This also adds data structures and defines for SGL support which
> > requires a couple of trivial changes to the nvme block driver as well.
> > 
> > Signed-off-by: Klaus Jensen 
> > Acked-by: Fam Zheng 
> > Reviewed-by: Maxim Levitsky 
> > ---
> >  block/nvme.c |  18 ++---
> >  hw/block/nvme.c  |  12 ++--
> >  include/block/nvme.h | 154 ++-
> >  3 files changed, 152 insertions(+), 32 deletions(-)
> > 
> > diff --git a/block/nvme.c b/block/nvme.c
> > index eb2f54dd9dc9..29e90557c428 100644
> > --- a/block/nvme.c
> > +++ b/block/nvme.c
> > @@ -446,7 +446,7 @@ static void nvme_identify(BlockDriverState *bs, int 
> > namespace, Error **errp)
> >  error_setg(errp, "Cannot map buffer for DMA");
> >  goto out;
> >  }
> > -cmd.prp1 = cpu_to_le64(iova);
> > +cmd.dptr.prp1 = cpu_to_le64(iova);
> >  
> >  if (nvme_cmd_sync(bs, s->queues[0], )) {
> >  error_setg(errp, "Failed to identify controller");
> > @@ -545,7 +545,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, 
> > Error **errp)
> >  }
> >  cmd = (NvmeCmd) {
> >  .opcode = NVME_ADM_CMD_CREATE_CQ,
> > -.prp1 = cpu_to_le64(q->cq.iova),
> > +.dptr.prp1 = cpu_to_le64(q->cq.iova),
> >  .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
> >  .cdw11 = cpu_to_le32(0x3),
> >  };
> > @@ -556,7 +556,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, 
> > Error **errp)
> >  }
> >  cmd = (NvmeCmd) {
> >  .opcode = NVME_ADM_CMD_CREATE_SQ,
> > -.prp1 = cpu_to_le64(q->sq.iova),
> > +.dptr.prp1 = cpu_to_le64(q->sq.iova),
> >  .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
> >  .cdw11 = cpu_to_le32(0x1 | (n << 16)),
> >  };
> > @@ -904,16 +904,16 @@ try_map:
> >  case 0:
> >  abort();
> >  case 1:
> > -cmd->prp1 = pagelist[0];
> > -cmd->prp2 = 0;
> > +cmd->dptr.prp1 = pagelist[0];
> > +cmd->dptr.prp2 = 0;
> >  break;
> >  case 2:
> > -cmd->prp1 = pagelist[0];
> > -cmd->prp2 = pagelist[1];
> > +cmd->dptr.prp1 = pagelist[0];
> > +cmd->dptr.prp2 = pagelist[1];
> >  break;
> >  default:
> > -cmd->prp1 = pagelist[0];
> > -cmd->prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t));
> > +cmd->dptr.prp1 = pagelist[0];
> > +cmd->dptr.prp2 = cpu_to_le64(req->prp_list_iova + 
> > sizeof(uint64_t));
> >  break;
> >  }
> >  trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries);
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 1aee042d4cb2..71b388aa0e20 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -397,8 +397,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> > NvmeCmd *cmd,
> >  NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
> >  uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
> >  uint64_t slba = le64_to_cpu(rw->slba);
> > -uint64_t prp1 = le64_to_cpu(rw->prp1);
> > -uint64_t prp2 = le64_to_cpu(rw->prp2);
> > +uint64_t prp1 = le64_to_cpu(rw->dptr.prp1);
> > +uint64_t prp2 = le64_to_cpu(rw->dptr.prp2);
> >  
> >  uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
> >  uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
> > @@ -795,8 +795,8 @@ static inline uint64_t nvme_get_timestamp(const 
> > NvmeCtrl *n)
> >  
> >  static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
> >  {
> > -uint64_t prp1 = le64_to_cpu(cmd->prp1);
> > -uint64_t prp2 = le64_to_cpu(cmd->prp2);
> > +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> >  
> >  uint64_t timestamp = nvme_get_timestamp(n);
> >  
> > @@ -834,8 +834,8 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, 
> > NvmeCmd *cmd)
> >  {
> >  uint16_t ret;
> >  uint64_t timestamp;
> > -uint64_t prp1 = le64_to_cpu(cmd->prp1);
> > -uint64_t prp2 = le64_to_cpu(cmd->prp2);
> > +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> > +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> >  
> >  ret = nvme_dma_write_prp(n, (uint8_t *),
> >  sizeof(timestamp), prp1, prp2);
> > diff --git a/include/block/nvme.h b/include/block/nvme.h
> > index 1720ee1d5158..6d1fa6ff2228 100644
> > --- a/include/block/nvme.h
> > +++ b/include/block/nvme.h
> > @@ -377,15 +377,53 @@ enum NvmePmrmscMask {
> >  #define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
> >  (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
> >  
> > +enum NvmeSglDescriptorType {
> > +NVME_SGL_DESCR_TYPE_DATA_BLOCK  = 0x0,
> > +  

Re: [PATCH] softmmu/vl: Remove the check for colons in -accel parameters

2020-07-02 Thread Thomas Huth

On 18/06/2020 09.40, Thomas Huth wrote:

The new -accel option does not accept colons in the parameters anymore
(since it does not convert the parameters to -machine accel=... parameters
anymore). Thus we can now remove the check for colons in -accel:

$ qemu-system-x86_64 -accel kvm:tcg
qemu-system-x86_64: -accel kvm:tcg: invalid accelerator kvm:tcg

Signed-off-by: Thomas Huth 
---
  softmmu/vl.c | 5 -
  1 file changed, 5 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index f669c06ede..273acfcf6b 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -3485,11 +3485,6 @@ void qemu_init(int argc, char **argv, char **envp)
  g_slist_free(accel_list);
  exit(0);
  }
-if (optarg && strchr(optarg, ':')) {
-error_report("Don't use ':' with -accel, "
- "use -M accel=... for now instead");
-exit(1);
-}
  break;
  case QEMU_OPTION_usb:
  olist = qemu_find_opts("machine");



Ping?

 Thomas




Re: [PATCH] configure / util: Auto-detect the availability of openpty()

2020-07-02 Thread Thomas Huth

On 02/07/2020 23.33, Michele Denber wrote:

On 07/02/20 13:34, Thomas Huth wrote:


That's just the context, cut at three lines after the last change. 
Simply try to apply the patch with "patch -p1 -i ..." or "git am" in a 
separate git branch if you're using git. It should hopefully apply 
cleanly.


Looks like it partly worked:

root@hemlock:~/qemu-5.0.0# gpatch -p1 -i < configpatch.diff
gpatch: option requires an argument -- 'i'
gpatch: Try 'gpatch --help' for more information.
root@hemlock:~/qemu-5.0.0# gpatch -p1 < configpatch.diff
patching file configure
Hunk #1 succeeded at 5049 (offset -85 lines).
Hunk #2 succeeded at 7117 (offset -267 lines).
patching file util/qemu-openpty.c
Hunk #1 FAILED at 52.
Hunk #2 succeeded at 95 with fuzz 2 (offset 2 lines).
1 out of 2 hunks FAILED -- saving rejects to file util/qemu-openpty.c.rej
root@hemlock:~/qemu-5.0.0# cat util/qemu-openpty.c.rej
--- util/qemu-openpty.c
+++ util/qemu-openpty.c
@@ -52,7 +52,8 @@
  #endif

  #ifdef __sun__
-/* Once Solaris has openpty(), this is going to be removed. */
+
+#if !defined(HAVE_OPENPTY)
  static int openpty(int *amaster, int *aslave, char *name,
     struct termios *termp, struct winsize *winp)


Did you have a local modification to that file? If so, please revert it 
first before applying the patch (or add the "#if !defined(HAVE_OPENPTY)" 
now manually to see whether it works).


 Thanks,
  Thomas




[PATCH] hw/riscv: virt: Sort the SoC memmap table entries

2020-07-02 Thread Bin Meng
From: Bin Meng 

Adjust the PCIe memory maps to follow the order.

Signed-off-by: Bin Meng 
---

 hw/riscv/virt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 5ca49c5..aacfbda 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -57,14 +57,14 @@ static const struct MemmapEntry {
 [VIRT_TEST] ={   0x10,0x1000 },
 [VIRT_RTC] = {   0x101000,0x1000 },
 [VIRT_CLINT] =   {  0x200,   0x1 },
+[VIRT_PCIE_PIO] ={  0x300,   0x1 },
 [VIRT_PLIC] ={  0xc00, 0x400 },
 [VIRT_UART0] =   { 0x1000, 0x100 },
 [VIRT_VIRTIO] =  { 0x10001000,0x1000 },
 [VIRT_FLASH] =   { 0x2000, 0x400 },
-[VIRT_DRAM] ={ 0x8000,   0x0 },
-[VIRT_PCIE_MMIO] =   { 0x4000,0x4000 },
-[VIRT_PCIE_PIO] ={ 0x0300,0x0001 },
 [VIRT_PCIE_ECAM] =   { 0x3000,0x1000 },
+[VIRT_PCIE_MMIO] =   { 0x4000,0x4000 },
+[VIRT_DRAM] ={ 0x8000,   0x0 },
 };
 
 #define VIRT_FLASH_SECTOR_SIZE (256 * KiB)
-- 
2.7.4




[PATCH v3 3/7] roms/Makefile: Build the generic platform for RISC-V OpenSBI firmware

2020-07-02 Thread Bin Meng
From: Bin Meng 

The RISC-V generic platform is a flattened device tree (FDT) based
platform where all platform specific functionality is provided based
on FDT passed by previous booting stage. The support was added in
the upstream OpenSBI v0.8 release recently.

Update our Makefile to build the generic platform instead of building
virt and sifive_u separately for RISC-V OpenSBI firmware, and change
to use fw_dynamic type images as well.

Signed-off-by: Bin Meng 
Reviewed-by: Anup Patel 

---

Changes in v3:
- Change fw_jump to fw_dynamic in the make rules

Changes in v2:
- Copy the ELF images too in the make rules

 roms/Makefile | 32 ++--
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/roms/Makefile b/roms/Makefile
index f9acf39..5d9f15b 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -64,10 +64,8 @@ default help:
@echo "  u-boot.e500-- update u-boot.e500"
@echo "  u-boot.sam460  -- update u-boot.sam460"
@echo "  efi-- update UEFI (edk2) platform firmware"
-   @echo "  opensbi32-virt -- update OpenSBI for 32-bit virt machine"
-   @echo "  opensbi64-virt -- update OpenSBI for 64-bit virt machine"
-   @echo "  opensbi32-sifive_u -- update OpenSBI for 32-bit sifive_u 
machine"
-   @echo "  opensbi64-sifive_u -- update OpenSBI for 64-bit sifive_u 
machine"
+   @echo "  opensbi32-generic  -- update OpenSBI for 32-bit generic 
machine"
+   @echo "  opensbi64-generic  -- update OpenSBI for 64-bit generic 
machine"
@echo "  bios-microvm   -- update bios-microvm.bin (qboot)"
@echo "  clean  -- delete the files generated by the 
previous" \
  "build targets"
@@ -170,29 +168,19 @@ skiboot:
 efi: edk2-basetools
$(MAKE) -f Makefile.edk2
 
-opensbi32-virt:
+opensbi32-generic:
$(MAKE) -C opensbi \
CROSS_COMPILE=$(riscv32_cross_prefix) \
-   PLATFORM="qemu/virt"
-   cp opensbi/build/platform/qemu/virt/firmware/fw_jump.bin 
../pc-bios/opensbi-riscv32-virt-fw_jump.bin
+   PLATFORM="generic"
+   cp opensbi/build/platform/generic/firmware/fw_dynamic.bin 
../pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
+   cp opensbi/build/platform/generic/firmware/fw_dynamic.elf 
../pc-bios/opensbi-riscv32-generic-fw_dynamic.elf
 
-opensbi64-virt:
+opensbi64-generic:
$(MAKE) -C opensbi \
CROSS_COMPILE=$(riscv64_cross_prefix) \
-   PLATFORM="qemu/virt"
-   cp opensbi/build/platform/qemu/virt/firmware/fw_jump.bin 
../pc-bios/opensbi-riscv64-virt-fw_jump.bin
-
-opensbi32-sifive_u:
-   $(MAKE) -C opensbi \
-   CROSS_COMPILE=$(riscv32_cross_prefix) \
-   PLATFORM="sifive/fu540"
-   cp opensbi/build/platform/sifive/fu540/firmware/fw_jump.bin 
../pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin
-
-opensbi64-sifive_u:
-   $(MAKE) -C opensbi \
-   CROSS_COMPILE=$(riscv64_cross_prefix) \
-   PLATFORM="sifive/fu540"
-   cp opensbi/build/platform/sifive/fu540/firmware/fw_jump.bin 
../pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin
+   PLATFORM="generic"
+   cp opensbi/build/platform/generic/firmware/fw_dynamic.bin 
../pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
+   cp opensbi/build/platform/generic/firmware/fw_dynamic.elf 
../pc-bios/opensbi-riscv64-generic-fw_dynamic.elf
 
 bios-microvm:
$(MAKE) -C qboot
-- 
2.7.4




[PATCH v3 2/7] roms/opensbi: Upgrade from v0.7 to v0.8

2020-07-02 Thread Bin Meng
From: Bin Meng 

Upgrade OpenSBI from v0.7 to v0.8.

The v0.8 release includes the following commits:

1bb00ab lib: No need to provide default PMP region using platform callbacks
a9eac67 include: sbi_platform: Combine reboot and shutdown into one callback
6585fab lib: utils: Add SiFive test device
4781545 platform: Add Nuclei UX600 platform
3a326af scripts: adapt binary archive script for Nuclei UX600
5bdf022 firmware: fw_base: Remove CSR_MTVEC update check
e6c1345 lib: utils/serial: Skip baudrate config if input frequency is zero
01a8c8e lib: utils: Improve fdt_parse_uart8250() API
0a0093b lib: utils: Add fdt_parse_uart8250_node() function
243b0d0 lib: utils: Remove redundant clint_ipi_sync() declaration
e3ad7c1 lib: utils: Rename fdt_parse_clint() to fdt_parse_compat_addr()
a39cd6f lib: utils: Add FDT match table based node lookup
dd33b9e lib: utils: Make fdt_get_node_addr_size() public function
66185b3 lib: utils: Add fdt_parse_sifive_uart_node() function
19e966b lib: utils: Add fdt_parse_hart_id() function
44dd7be lib: utils: Add fdt_parse_max_hart_id() API
f0eb503 lib: utils: Add fdt_parse_plic_node() function
1ac794c include: Add array_size() macro
8ff2b94 lib: utils: Add simple FDT timer framework
76f0f81 lib: utils: Add simple FDT ipi framework
75322a6 lib: utils: Add simple FDT irqchip framework
76a8940 lib: utils: Add simple FDT serial framework
7cc6fa4 lib: utils: Add simple FDT reset framework
4d06353 firmware: fw_base: Introduce optional fw_platform_init()
f1aa9e5 platform: Add generic FDT based platform support
1f21b99 lib: sbi: Print platform hart count at boot time
2ba7087 scripts: Add generic platform to create-binary-archive.sh
4f18c6e platform: generic: Add Sifive FU540 TLB flush range limit override
13717a8 platform: Remove qemu/virt directory
65c06b0 platform: Remove spike directory
d626037 docs: Add missing links in platform.md
7993ca2 include: sbi: Remove redundant page table related defines
5338679 lib: sbi_tlb: Fix remote TLB HFENCE VVMA implementation
dc38929 lib: sbi: Improve misa_string() implementation
433bac7 docs: platform/generic: Add details about stdout-path DT property
b4efa70 docs: platform/generic: Add details about IPI and timer expectations
dfd9dd6 docs: Add platform requirements document
c2286b6 docs: Fix ordering of pages in table of contents
7be75f5 docs: Don't use italic text in page title
63a513e lib: Rename unprivileged trap handler
aef9a60 lib: Add csr detect support
13ca20d lib: Create a separate math helper function file
79d0fad lib: utils: Update reserved memory fdt node even if PMP is not present
6a053f6 lib: Add support for hart specific features
b2df751 platform: Move platform features to hart
4938024 platform: fpga: Remove redundant platform specific features
ec0d2a7 lib: timer: Provide a hart based timer feature
1f235ec lib: Add platform features in boot time print
22c4334 lib: Add hart features in boot time print
36833ab lib: Optimize inline assembly for unprivilege access functions
38a4b54 firmware: Correct spelling mistakes
28b4052 lib: sbi: detect features before everything else in sbi_hart_init()
4984183 lib: sbi: Improve get_feature_str() implementation and usage
3aa1036 lib: sbi: Remove extra spaces from boot time prints
3a8fc81 lib: sbi: Print platform HART count just before boot HART id
63b0f5f include: sbi: Use scratch pointer as parmeter in HART feature APIs
2966510 lib: sbi: Few cosmetic improvements to HART feature detection
a38bea9 lib: sbi_hart: Detect number of supported PMP regions
89ba634 include: sbi: Add firmware extension constants
73d6ef3 lib: utils: Remove redundant parameters from PLIC init functions
446a9c6 lib: utils: Allow PLIC functions to be used for multiple PLICs
2c685c2 lib: utils: Extend fdt_find_match() Implementation
d30bb68 lib: utils/irqchip: Initialize all matching irqchip DT nodes
a9a9751 lib: utils: Allow CLINT functions to be used for multiple CLINTs
569dd64 lib: utils: Add fdt_parse_clint_node() function
6956e83 lib: utils/ipi: Initialize all matching ipi DT nodes
a63f05f lib: utils/timer: Initialize all matching timer DT nodes
30b6040 Makefile: Fix builtin DTB compilation for out-of-tree platforms
64f1408 firmware: fw_base: Make builtin DTB available to fw_platform_init()
4ce6b7a firmware: fw_base: Don't OR forced FW_OPTIONS
86ec534 firmware: Allow fw_platform_init() to return updated FDT location
c6c65ee Makefile: Preprocess builtin DTS
4e3876d Makefile: Add mechanism for platforms to have multiple builtin DTBs
72019ee platform: kendryte/k210: Use new mechanism of builtin DTB
51f0e4a firmware: Remove FW_PAYLOAD_FDT and related documentation
1b8c012 lib: Add RISC-V hypervisor v0.6.1 support
79bfd67 docs: Use doxygen config to mark the main page
106b888 docs: Remove redundant documentation about combined payload use case
9802906 platform: Add AE350 platform specific SBI handler
32f87e5 platform: Add AE350 cache control SBIs
e2c3f01 lib: Fix __sbi_hfence_gvma_vmid_gpa() and __sbi_hfence_vvma_asid_va()
6966ad0 

[PATCH v3 7/7] Makefile: Ship the generic platform bios images for RISC-V

2020-07-02 Thread Bin Meng
From: Bin Meng 

Update the install blob list to include the generic platform
fw_dynamic bios images.

Signed-off-by: Bin Meng 

---

Changes in v3:
- change fw_jump to fw_dynamic in the Makefile

Changes in v2:
- new patch: Makefile: Ship the generic platform bios images for RISC-V

 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index b1b8a5a..05e05bb 100644
--- a/Makefile
+++ b/Makefile
@@ -841,8 +841,8 @@ u-boot.e500 u-boot-sam460-20100605.bin \
 qemu_vga.ndrv \
 edk2-licenses.txt \
 hppa-firmware.img \
-opensbi-riscv32-sifive_u-fw_jump.bin opensbi-riscv32-virt-fw_jump.bin \
-opensbi-riscv64-sifive_u-fw_jump.bin opensbi-riscv64-virt-fw_jump.bin
+opensbi-riscv32-generic-fw_dynamic.bin opensbi-riscv32-generic-fw_dynamic.elf \
+opensbi-riscv64-generic-fw_dynamic.bin opensbi-riscv64-generic-fw_dynamic.elf
 
 
 DESCS=50-edk2-i386-secure.json 50-edk2-x86_64-secure.json \
-- 
2.7.4




[PATCH v3 1/7] configure: Create symbolic links for pc-bios/*.elf files

2020-07-02 Thread Bin Meng
From: Bin Meng 

Now we need to ship the OpenSBI fw_dynamic.elf image for the
RISC-V Spike machine, it requires us to create symbolic links
for pc-bios/*.elf files.

Signed-off-by: Bin Meng 
Reviewed-by: Alistair Francis 

---

(no changes since v2)

Changes in v2:
- new patch: configure: Create symbolic links for pc-bios/*.elf files

 configure | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index 8a65240..75b855d 100755
--- a/configure
+++ b/configure
@@ -8501,6 +8501,7 @@ LINKS="$LINKS tests/qemu-iotests/check"
 LINKS="$LINKS python"
 for bios_file in \
 $source_path/pc-bios/*.bin \
+$source_path/pc-bios/*.elf \
 $source_path/pc-bios/*.lid \
 $source_path/pc-bios/*.rom \
 $source_path/pc-bios/*.dtb \
-- 
2.7.4




[PATCH v3 6/7] gitlab-ci/opensbi: Update GitLab CI to build generic platform

2020-07-02 Thread Bin Meng
From: Bin Meng 

This updates the GitLab CI opensbi job to build opensbi bios images
for the generic platform.

Signed-off-by: Bin Meng 
Reviewed-by: Anup Patel 

---

Changes in v3:
- Generate fw_dynamic images in the artifacts

Changes in v2:
- Include ELF images in the artifacts

 .gitlab-ci.d/opensbi.yml | 28 ++--
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/.gitlab-ci.d/opensbi.yml b/.gitlab-ci.d/opensbi.yml
index dd051c0..fd9eed4 100644
--- a/.gitlab-ci.d/opensbi.yml
+++ b/.gitlab-ci.d/opensbi.yml
@@ -34,18 +34,14 @@ build-opensbi:
when: always
  artifacts:
paths: # 'artifacts.zip' will contains the following files:
-   - pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin
-   - pc-bios/opensbi-riscv32-virt-fw_jump.bin
-   - pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin
-   - pc-bios/opensbi-riscv64-virt-fw_jump.bin
-   - opensbi32-virt-stdout.log
-   - opensbi32-virt-stderr.log
-   - opensbi64-virt-stdout.log
-   - opensbi64-virt-stderr.log
-   - opensbi32-sifive_u-stdout.log
-   - opensbi32-sifive_u-stderr.log
-   - opensbi64-sifive_u-stdout.log
-   - opensbi64-sifive_u-stderr.log
+   - pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
+   - pc-bios/opensbi-riscv32-generic-fw_dynamic.elf
+   - pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
+   - pc-bios/opensbi-riscv64-generic-fw_dynamic.elf
+   - opensbi32-generic-stdout.log
+   - opensbi32-generic-stderr.log
+   - opensbi64-generic-stdout.log
+   - opensbi64-generic-stderr.log
  image: $CI_REGISTRY_IMAGE:opensbi-cross-build
  variables:
GIT_DEPTH: 3
@@ -54,10 +50,6 @@ build-opensbi:
  - export JOBS=$(($(getconf _NPROCESSORS_ONLN) + 1))
  - echo "=== Using ${JOBS} simultaneous jobs ==="
  - make -j${JOBS} -C roms/opensbi clean
- - make -j${JOBS} -C roms opensbi32-virt 2>&1 1>opensbi32-virt-stdout.log | 
tee -a opensbi32-virt-stderr.log >&2
+ - make -j${JOBS} -C roms opensbi32-generic 2>&1 
1>opensbi32-generic-stdout.log | tee -a opensbi32-generic-stderr.log >&2
  - make -j${JOBS} -C roms/opensbi clean
- - make -j${JOBS} -C roms opensbi64-virt 2>&1 1>opensbi64-virt-stdout.log | 
tee -a opensbi64-virt-stderr.log >&2
- - make -j${JOBS} -C roms/opensbi clean
- - make -j${JOBS} -C roms opensbi32-sifive_u 2>&1 
1>opensbi32-sifive_u-stdout.log | tee -a opensbi32-sifive_u-stderr.log >&2
- - make -j${JOBS} -C roms/opensbi clean
- - make -j${JOBS} -C roms opensbi64-sifive_u 2>&1 
1>opensbi64-sifive_u-stdout.log | tee -a opensbi64-sifive_u-stderr.log >&2
+ - make -j${JOBS} -C roms opensbi64-generic 2>&1 
1>opensbi64-generic-stdout.log | tee -a opensbi64-generic-stderr.log >&2
-- 
2.7.4




[PATCH v3 0/7] riscv: Switch to use generic platform fw_dynamic type opensbi bios images

2020-07-02 Thread Bin Meng
From: Bin Meng 

The RISC-V generic platform is a flattened device tree (FDT) based
platform where all platform specific functionality is provided based
on FDT passed by previous booting stage. The support was added in
the upstream OpenSBI v0.8 release recently.

This series updates QEMU to switch to use generic platform of opensbi
bios images. With the recent fw_dynamic image support, let's replace
the fw_jump images with fw_dynamic ones too.

The patch emails do not contain binary bits, please grab all updates
at https://github.com/lbmeng/qemu.git bios branch.

This series is rebased on Atish's fw_dynamic series @
http://patchwork.ozlabs.org/project/qemu-devel/list/?series=186980

Changes in v3:
- Change fw_jump to fw_dynamic in the make rules
- Change to fw_dynamic.bin for virt & sifive_u
- Change to fw_dynamic.elf for Spike
- Generate fw_dynamic images in the artifacts
- change fw_jump to fw_dynamic in the Makefile

Changes in v2:
- new patch: configure: Create symbolic links for pc-bios/*.elf files
- Upgrade OpenSBI to v0.8 release
- Copy the ELF images too in the make rules
- Include ELF images in the artifacts
- new patch: Makefile: Ship the generic platform bios images for RISC-V

Bin Meng (7):
  configure: Create symbolic links for pc-bios/*.elf files
  roms/opensbi: Upgrade from v0.7 to v0.8
  roms/Makefile: Build the generic platform for RISC-V OpenSBI firmware
  hw/riscv: Use pre-built bios image of generic platform for virt &
sifive_u
  hw/riscv: spike: Change the default bios to use generic platform image
  gitlab-ci/opensbi: Update GitLab CI to build generic platform
  Makefile: Ship the generic platform bios images for RISC-V

 .gitlab-ci.d/opensbi.yml   |  28 --
 Makefile   |   4 ++--
 configure  |   1 +
 hw/riscv/sifive_u.c|   4 ++--
 hw/riscv/spike.c   |   9 +--
 hw/riscv/virt.c|   4 ++--
 pc-bios/opensbi-riscv32-generic-fw_dynamic.bin | Bin 0 -> 62144 bytes
 pc-bios/opensbi-riscv32-generic-fw_dynamic.elf | Bin 0 -> 558668 bytes
 pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin   | Bin 49520 -> 0 bytes
 pc-bios/opensbi-riscv32-virt-fw_jump.bin   | Bin 49504 -> 0 bytes
 pc-bios/opensbi-riscv64-generic-fw_dynamic.bin | Bin 0 -> 70792 bytes
 pc-bios/opensbi-riscv64-generic-fw_dynamic.elf | Bin 0 -> 620424 bytes
 pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin   | Bin 57936 -> 0 bytes
 pc-bios/opensbi-riscv64-virt-fw_jump.bin   | Bin 57920 -> 0 bytes
 roms/Makefile  |  32 -
 roms/opensbi   |   2 +-
 16 files changed, 35 insertions(+), 49 deletions(-)
 create mode 100644 pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
 create mode 100644 pc-bios/opensbi-riscv32-generic-fw_dynamic.elf
 delete mode 100644 pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin
 delete mode 100644 pc-bios/opensbi-riscv32-virt-fw_jump.bin
 create mode 100644 pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
 create mode 100644 pc-bios/opensbi-riscv64-generic-fw_dynamic.elf
 delete mode 100644 pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin
 delete mode 100644 pc-bios/opensbi-riscv64-virt-fw_jump.bin

-- 
2.7.4




[PATCH v3 4/7] hw/riscv: Use pre-built bios image of generic platform for virt & sifive_u

2020-07-02 Thread Bin Meng
From: Bin Meng 

Update virt and sifive_u machines to use the opensbi fw_dynamic bios
image built for the generic FDT platform.

Remove the out-of-date no longer used bios images.

Signed-off-by: Bin Meng 
Reviewed-by: Anup Patel 
Reviewed-by: Alistair Francis 

---

Changes in v3:
- Change to fw_dynamic.bin for virt & sifive_u

 hw/riscv/sifive_u.c|   4 ++--
 hw/riscv/virt.c|   4 ++--
 pc-bios/opensbi-riscv32-generic-fw_dynamic.bin | Bin 0 -> 62144 bytes
 pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin   | Bin 49520 -> 0 bytes
 pc-bios/opensbi-riscv32-virt-fw_jump.bin   | Bin 49504 -> 0 bytes
 pc-bios/opensbi-riscv64-generic-fw_dynamic.bin | Bin 0 -> 70792 bytes
 pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin   | Bin 57936 -> 0 bytes
 pc-bios/opensbi-riscv64-virt-fw_jump.bin   | Bin 57920 -> 0 bytes
 8 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
 delete mode 100644 pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin
 delete mode 100644 pc-bios/opensbi-riscv32-virt-fw_jump.bin
 create mode 100644 pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
 delete mode 100644 pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin
 delete mode 100644 pc-bios/opensbi-riscv64-virt-fw_jump.bin

diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index b434fd3..dc46f64 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -60,9 +60,9 @@
 #include 
 
 #if defined(TARGET_RISCV32)
-# define BIOS_FILENAME "opensbi-riscv32-sifive_u-fw_jump.bin"
+# define BIOS_FILENAME "opensbi-riscv32-generic-fw_dynamic.bin"
 #else
-# define BIOS_FILENAME "opensbi-riscv64-sifive_u-fw_jump.bin"
+# define BIOS_FILENAME "opensbi-riscv64-generic-fw_dynamic.bin"
 #endif
 
 static const struct MemmapEntry {
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 85e17fe..5ca49c5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -43,9 +43,9 @@
 #include "hw/pci-host/gpex.h"
 
 #if defined(TARGET_RISCV32)
-# define BIOS_FILENAME "opensbi-riscv32-virt-fw_jump.bin"
+# define BIOS_FILENAME "opensbi-riscv32-generic-fw_dynamic.bin"
 #else
-# define BIOS_FILENAME "opensbi-riscv64-virt-fw_jump.bin"
+# define BIOS_FILENAME "opensbi-riscv64-generic-fw_dynamic.bin"
 #endif
 
 static const struct MemmapEntry {
diff --git a/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin 
b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
new file mode 100644
index 000..23b4dfb
Binary files /dev/null and b/pc-bios/opensbi-riscv32-generic-fw_dynamic.bin 
differ
diff --git a/pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin 
b/pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin
deleted file mode 100644
index 3e0da54..000
Binary files a/pc-bios/opensbi-riscv32-sifive_u-fw_jump.bin and /dev/null differ
diff --git a/pc-bios/opensbi-riscv32-virt-fw_jump.bin 
b/pc-bios/opensbi-riscv32-virt-fw_jump.bin
deleted file mode 100644
index bc56ed6..000
Binary files a/pc-bios/opensbi-riscv32-virt-fw_jump.bin and /dev/null differ
diff --git a/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin 
b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
new file mode 100644
index 000..16c0cf4
Binary files /dev/null and b/pc-bios/opensbi-riscv64-generic-fw_dynamic.bin 
differ
diff --git a/pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin 
b/pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin
deleted file mode 100644
index 1acee86..000
Binary files a/pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin and /dev/null differ
diff --git a/pc-bios/opensbi-riscv64-virt-fw_jump.bin 
b/pc-bios/opensbi-riscv64-virt-fw_jump.bin
deleted file mode 100644
index c62f2b4..000
Binary files a/pc-bios/opensbi-riscv64-virt-fw_jump.bin and /dev/null differ
-- 
2.7.4




[PATCH v3 5/7] hw/riscv: spike: Change the default bios to use generic platform image

2020-07-02 Thread Bin Meng
From: Bin Meng 

To keep sync with other RISC-V machines, change the default bios to
use generic platform fw_dynamic.elf image.

While we are here, add some comments to mention that using ELF files
for the Spike machine was intentional.

Signed-off-by: Bin Meng 
Reviewed-by: Anup Patel 
Reviewed-by: Alistair Francis 

---

Changes in v3:
- Change to fw_dynamic.elf for Spike

 hw/riscv/spike.c   |   9 +++--
 pc-bios/opensbi-riscv32-generic-fw_dynamic.elf | Bin 0 -> 558668 bytes
 pc-bios/opensbi-riscv64-generic-fw_dynamic.elf | Bin 0 -> 620424 bytes
 3 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 pc-bios/opensbi-riscv32-generic-fw_dynamic.elf
 create mode 100644 pc-bios/opensbi-riscv64-generic-fw_dynamic.elf

diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index b295a43..a187aa3 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -42,10 +42,15 @@
 #include "sysemu/qtest.h"
 #include "sysemu/sysemu.h"
 
+/*
+ * Not like other RISC-V machines that use plain binary bios images,
+ * keeping ELF files here was intentional because BIN files don't work
+ * for the Spike machine as HTIF emulation depends on ELF parsing.
+ */
 #if defined(TARGET_RISCV32)
-# define BIOS_FILENAME "opensbi-riscv32-spike-fw_jump.elf"
+# define BIOS_FILENAME "opensbi-riscv32-generic-fw_dynamic.elf"
 #else
-# define BIOS_FILENAME "opensbi-riscv64-spike-fw_jump.elf"
+# define BIOS_FILENAME "opensbi-riscv64-generic-fw_dynamic.elf"
 #endif
 
 static const struct MemmapEntry {
diff --git a/pc-bios/opensbi-riscv32-generic-fw_dynamic.elf 
b/pc-bios/opensbi-riscv32-generic-fw_dynamic.elf
new file mode 100644
index 000..eb9ebf5
Binary files /dev/null and b/pc-bios/opensbi-riscv32-generic-fw_dynamic.elf 
differ
diff --git a/pc-bios/opensbi-riscv64-generic-fw_dynamic.elf 
b/pc-bios/opensbi-riscv64-generic-fw_dynamic.elf
new file mode 100644
index 000..642a64e
Binary files /dev/null and b/pc-bios/opensbi-riscv64-generic-fw_dynamic.elf 
differ
-- 
2.7.4




Re: Question about virtio-net: implement RX RSS processing

2020-07-02 Thread Jason Wang



On 2020/7/3 上午1:07, Vincent Li wrote:

Hi,

I noticed the [PULL V2 02/33] virtio-net: implement RX RSS processing
https://marc.info/?l=qemu-devel=159248675403246=2 and cloned
https://github.com/jasowang/qemu.git tags/net-pull-request for testing the
RX RSS feature, but I am not clear how to test this feature and see if it
meets my needs.



Yuri may know more but I think the only driver that supports RSS is 
Windows driver currently.





I am running F-Stack Nginx applications
https://github.com/F-Stack/f-stack/tree/dev/app/nginx-1.16.1 in KVM guest,
F-Stack is FreeBSD TCP/IP stack ported to support DPDK, and F-Stack set
RSS mode as code below
https://github.com/F-Stack/f-stack/blob/dev/lib/ff_dpdk_if.c#L605

  /* Set RSS mode */
  uint64_t default_rss_hf = ETH_RSS_PROTO_MASK;
  port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
  port_conf.rx_adv_conf.rss_conf.rss_hf = default_rss_hf;
  if (dev_info.hash_key_size == 52) {
  port_conf.rx_adv_conf.rss_conf.rss_key = default_rsskey_52bytes;
  port_conf.rx_adv_conf.rss_conf.rss_key_len = 52;
  use_rsskey_52bytes = 1;
  } else {
  port_conf.rx_adv_conf.rss_conf.rss_key = default_rsskey_40bytes;
  port_conf.rx_adv_conf.rss_conf.rss_key_len = 40;
  }
  port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads;
  if (port_conf.rx_adv_conf.rss_conf.rss_hf != ETH_RSS_PROTO_MASK) {
  printf("Port %u modified RSS hash function based on hardware support,"
  "requested:%#"PRIx64" configured:%#"PRIx64"\n",
  port_id, default_rss_hf, port_conf.rx_adv_conf.rss_conf.rss_hf);
  }

But  DPDK virtio PMD does not support RSS as below commit shows:

commit 13b3137f3b7c8f866947a9b34e06a8aec0d084f7
Author: Dilshod Urazov
Date:   Wed Oct 9 13:32:07 2019 +0100

 net/virtio: reject unsupported Rx multi-queue modes
 
 This driver supports none of DCB, RSS or VMDQ modes, therefore must

 check and return error if configured incorrectly.
 
 Virtio can distribute Rx packets across multi-queue, but there is

 no controls (algorithm, redirection table, hash function) except
 number of Rx queues and ETH_MQ_RX_NONE is the best fit meaning
 no method is enforced on how to route packets to MQs.
 
 Fixes: c1f86306a026 ("virtio: add new driver")

 Cc: sta...@dpdk.org
 
 Signed-off-by: Dilshod Urazov

 Signed-off-by: Andrew Rybchenko
 Reviewed-by: Maxime Coquelin

diff --git a/drivers/net/virtio/virtio_ethdev.c
b/drivers/net/virtio/virtio_ethdev.c
index 0a2ed2e50..76bd40a3e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -2066,6 +2066,13 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 PMD_INIT_LOG(DEBUG, "configure");
 req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
  
+   if (rxmode->mq_mode != ETH_MQ_RX_NONE) {

+   PMD_DRV_LOG(ERR,
+   "Unsupported Rx multi queue mode %d",
+   rxmode->mq_mode);
+   return -EINVAL;
+   }
+
 if (dev->data->dev_conf.intr_conf.rxq) {
 ret = virtio_init_device(dev, hw->req_guest_features);
 if (ret < 0)



So the problem is I can't run F-Stack Applications in KVM/Qemu guest with
multi queue/vCPU/RSS  support, this problem seems apply to DPDK TCP
applications require multi queue/RSS support in KVM/Qemu guest, for
example mTCP https://github.com/mtcp-stack/mtcp I tested has similar
problem.

I am not clear on the picture of how everything work together for  this
virtio-net RSS feature.

I have read following blogs

https://www.redhat.com/en/blog/introduction-virtio-networking-and-vhost-net
https://www.redhat.com/en/blog/how-vhost-user-came-being-virtio-networking-and-dpdk

Someone told me that in order for DPDK frond end virtio PMD in guest support 
RSS, the backend
also needs to support RSS, including vhost-net and vhost-user, it should
have nothing to do with this Qemu virtio-net RSS, is that correct?  if
correct, I have following questions:

1, What is the use case for this Qemu virtio-net RSS?



It's just RSS as what other device can provide for steering or balancing.



2, How to test the use case?



Need use windows guest.



3, Are there any plan to improve vhost-net/vhost-user, DPDK virtio PMD to 
support RSS?



For vhost-net, Sameeh posted a eBPF based solution RFC[1], we need some 
one to carry on the work. It doesn't request any extension to vhost-net 
thanks to the steering eBPF support in tuntap.


For vhost-user, we need probably extend vhost-user protocols first.

You're welcome to contribute patches.

[1] https://patchwork.kernel.org/cover/10581921/

Thanks




For 3,  I think this is important for KVM/Qemu/OVS-DPDK/Vhost-net environment 
for DPDK TCP/UDP applications.

Note I have no problem running F-Stack or mTCP applications in VMware ESXi
guest environment with multi queue/vCPU/RSS 

RE?? How to build QEMU plugin on Windows with mingw

2020-07-02 Thread casmac
Hi Alex,
   Thansk for looking on this . In the last message, the compilation command is 
a bit misleading. I am filling in more details.
If I keep "-Wl,-soname,$@" in the command , I got similar linking errors.
  
makefile:  
SONAMES := $(addsuffix .dll,$(addprefix lib,$(NAMES)))

QEMU_CFLAGS += -fPIC -fno-stack-protector -DBUILDING_DLL
QEMU_CFLAGS += -I$(SRC_PATH)/include/qemu

all: $(SONAMES)
lib%.dll: %.o
$(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDLIBS)

output:
x86_64-w64-mingw32-gcc -shared -Wl,-soname,libbb.dll -o libbb.dll bb.o
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `g_autoptr_cleanup_generic_gfree':
C:/msys64/mingw64/include/glib-2.0/glib/glib-autocleanups.h:28: undefined 
reference to `g_free'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `plugin_exit':
C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:27: undefined reference to 
`g_strdup_printf'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:29: undefined reference to 
`qemu_plugin_outs'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `vcpu_tb_trans':
C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:42: undefined reference to 
`qemu_plugin_tb_n_insns'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:45: undefined reference to 
`qemu_plugin_register_vcpu_tb_exec_inline'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:47: undefined reference to 
`qemu_plugin_register_vcpu_tb_exec_inline'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:50: undefined reference to 
`qemu_plugin_register_vcpu_tb_exec_cb'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `qemu_plugin_install':
C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:64: undefined reference to 
`qemu_plugin_register_vcpu_tb_trans_cb'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:65: undefined reference to 
`qemu_plugin_register_atexit_cb'
collect2.exe: error: ld returned 1 exit status

On windows, I replace $LDLIBS with $LIBS here, because $LDLIBS seems 
undefined. But still yields linking error, except. 
 lib%.dll: %.o
   $(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LIBS)"

x86_64-w64-mingw32-gcc -shared -Wl,-soname,libbb.dll -o libbb.dll bb.o -ldl 
-LC:/msys64/mingw64/lib -lz -LC:/msys64/mingw64/lib -lgmodule-2.0 -pthread 
-lglib-2.0 -lintl -LC:/msys64/mingw64/lib -lgthread-2.0 -pthread -lglib-2.0 
-lintl -lwinmm -lws2_32 -liphlpapi
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `plugin_exit':
C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:29: undefined reference to 
`qemu_plugin_outs'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `vcpu_tb_trans':
C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:42: undefined reference to 
`qemu_plugin_tb_n_insns'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:45: undefined reference to 
`qemu_plugin_register_vcpu_tb_exec_inline'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:47: undefined reference to 
`qemu_plugin_register_vcpu_tb_exec_inline'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:50: undefined reference to 
`qemu_plugin_register_vcpu_tb_exec_cb'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 bb.o: in function `qemu_plugin_install':
C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:64: undefined reference to 
`qemu_plugin_register_vcpu_tb_trans_cb'
C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
 C:/QRS_Project/simdsp/qemu-4.2.0/tests/plugin/bb.c:65: undefined reference to 
`qemu_plugin_register_atexit_cb'

The libdl library (where dlopen() is defined) was not shipped with MSYS2 
mingw . It is compiled from source(https://github.com/dlfcn-win32/dlfcn-win32), 
and I place the generated libdl.dll, libdl.a, libdl.dll.a in 
C:\msys64\mingw64\include.  

Re: [PATCH v1 3/3] target/riscv: Regen floating point rounding mode in dynamic mode

2020-07-02 Thread Bin Meng
On Wed, Jul 1, 2020 at 4:23 AM Alistair Francis
 wrote:
>
> When a guest specificies the the rounding mode should be dynamic 0b111
> then we want to re-caclulate the rounding mode on each instruction. The
> gen_helper_set_rounding_mode() function will correctly check the
> rounding mode and handle a dynamic rounding, we just need to make sure
> it's always called if dynamic rounding is selected.
>
> Fixes: 1885350 ("RISCV dynamic rounding mode is not behaving correctly")

I can't find this commit id.

> Signed-off-by: Alistair Francis 
> ---
>  target/riscv/translate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>

Regards,
Bin



Re: [PATCH v4 4/4] RISC-V: Support 64 bit start address

2020-07-02 Thread Bin Meng
On Thu, Jul 2, 2020 at 2:39 AM Atish Patra  wrote:
>
> Even though the start address in ROM code is declared as a 64 bit address
> for RV64, it can't be used as upper bits are set to zero in ROM code.
>
> Update the ROM code correctly to reflect the 64bit value.
>
> Signed-off-by: Atish Patra 
> ---
>  hw/riscv/boot.c | 6 +-
>  hw/riscv/sifive_u.c | 6 +-
>  2 files changed, 10 insertions(+), 2 deletions(-)
>

Reviewed-by: Bin Meng 
Tested-by: Bin Meng 



Re: [PATCH 5/5] MAINTAINERS: Adjust MIPS maintainership (Add Huacai Chen & Jiaxun Yang)

2020-07-02 Thread chen huacai
Acked-by: Huacai Chen 

On Thu, Jul 2, 2020 at 10:23 PM Jiaxun Yang  wrote:
>
>
> 在 2020/7/2 下午9:37, Philippe Mathieu-Daudé 写道:
> > From: Aleksandar Markovic 
> >
> > Huacai Chen steps in as new energy [1].
> >
> > Aurelien Jarno comment [2]:
> >
> >It happens that I known Huacai Chen from the time he was
> >upstreaming the Loongson 3 support to the kernel, I have been
> >testing and reviewing his patches. I also know Jiaxun Yang from
> >the #debian-mips IRC channel. I know that they are both very
> >competent and have a good knowledge of the open source world.
> >I therefore agree that they are good additions to maintain and/or
> >review the MIPS part of QEMU.
> >
> > [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg718434.html
> > [2] https://www.mail-archive.com/qemu-devel@nongnu.org/msg718738.html
> >
> > Cc: Huacai Chen 
> > Cc: Jiaxun Yang 
> > Cc: Aurelien Jarno 
> > Signed-off-by: Aleksandar Markovic 
> > Message-Id: <20200701182559.28841-3-aleksandar.qemu.de...@gmail.com>
> > PMD: [Split patch in 3, added Aurelien comment]
> > Signed-off-by: Philippe Mathieu-Daudé 
>
> Acked-by: Jiaxun Yang 
>
> I can help with reviewing relevant patches .
>
> Thanks.
>
> > ---
> > Aurelien comment is probably worth an Acked-by tag.
> >
> > Cc: Yunqiang Su 
>
> + Yunqiang's Debian mailbox as he no longer works for MIPS/Wave but
> still working on Debian/MIPS.
>
> > Cc: Lichao Liu 
> > Cc: Thomas Bogendoerfer 
> > Cc: Aleksandar Rikalo 
> >
> >   MAINTAINERS | 4 
> >   1 file changed, 4 insertions(+)
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 64f54c553c..8155525077 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -213,6 +213,7 @@ F: disas/microblaze.c
> >
> >   MIPS TCG CPUs
> >   M: Aleksandar Markovic 
> > +R: Jiaxun Yang 
> >   R: Aleksandar Rikalo 
> >   S: Maintained
> >   F: target/mips/
> > @@ -375,6 +376,7 @@ S: Maintained
> >   F: target/arm/kvm.c
> >
> >   MIPS KVM CPUs
> > +M: Huacai Chen 
> >   M: Aleksandar Markovic 
> >   S: Odd Fixes
> >   F: target/mips/kvm.c
> > @@ -2672,6 +2674,8 @@ F: disas/i386.c
> >
> >   MIPS TCG target
> >   M: Aleksandar Markovic 
> > +R: Huacai Chen 
> > +R: Jiaxun Yang 
> >   R: Aleksandar Rikalo 
> >   S: Maintained
> >   F: tcg/mips/
>


-- 
Huacai Chen



Re: [PATCH v4 3/4] riscv: Add opensbi firmware dynamic support

2020-07-02 Thread Bin Meng
On Thu, Jul 2, 2020 at 2:39 AM Atish Patra  wrote:
>
> OpenSBI is the default firmware in Qemu and has various firmware loading
> options. Currently, qemu loader uses fw_jump which has a compile time
> pre-defined address where fdt & kernel image must reside. This puts a
> constraint on image size of the Linux kernel depending on the fdt location
> and available memory. However, fw_dynamic allows the loader to specify
> the next stage location (i.e. Linux kernel/U-Boot) in memory and other
> configurable boot options available in OpenSBI.
>
> Add support for OpenSBI dynamic firmware loading support. This doesn't
> break existing setup and fw_jump will continue to work as it is. Any
> other firmware will continue to work without any issues as long as it
> doesn't expect anything specific from loader in "a2" register.
>
> Signed-off-by: Atish Patra 
> Reviewed-by: Alistair Francis 
> ---
>  hw/riscv/boot.c | 42 +---
>  hw/riscv/sifive_u.c | 20 +---
>  hw/riscv/spike.c| 13 ++--
>  hw/riscv/virt.c | 12 +--
>  include/hw/riscv/boot.h |  5 ++-
>  include/hw/riscv/boot_opensbi.h | 58 +
>  6 files changed, 134 insertions(+), 16 deletions(-)
>  create mode 100644 include/hw/riscv/boot_opensbi.h
>

Reviewed-by: Bin Meng 
Tested-by: Bin Meng 



Re: [PATCH v4 2/4] RISC-V: Copy the fdt in dram instead of ROM

2020-07-02 Thread Bin Meng
On Thu, Jul 2, 2020 at 2:39 AM Atish Patra  wrote:
>
> Currently, the fdt is copied to the ROM after the reset vector. The firmware
> has to copy it to DRAM. Instead of this, directly copy the device tree to a
> pre-computed dram address. The device tree load address should be as far as
> possible from kernel and initrd images. That's why it is kept at the end of
> the DRAM or 4GB whichever is lesser.
>
> Signed-off-by: Atish Patra 
> Reviewed-by: Alistair Francis 
> ---
>  hw/riscv/boot.c | 53 +
>  hw/riscv/sifive_u.c | 28 ++
>  hw/riscv/spike.c|  7 +-
>  hw/riscv/virt.c |  7 +-
>  include/hw/riscv/boot.h |  4 +++-
>  5 files changed, 66 insertions(+), 33 deletions(-)
>

Reviewed-by: Bin Meng 
Tested-by: Bin Meng 



Re: [PATCH 16/17] hw/block/nvme: provide the mandatory subnqn field

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The SUBNQN field is mandatory in NVM Express 1.3.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 798f6f30e7da..9f0b9de73307 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -2131,6 +2131,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP |
> NVME_ONCS_FEATURES);
>  
> +pstrcpy((char *) id->subnqn, sizeof(id->subnqn), 
> "nqn.2019-08.org.qemu:");
> +pstrcat((char *) id->subnqn, sizeof(id->subnqn), n->params.serial);
> +
>  id->psd[0].mp = cpu_to_le16(0x9c4);
>  id->psd[0].enlat = cpu_to_le32(0x10);
>  id->psd[0].exlat = cpu_to_le32(0x4);


Re: [PATCH 15/17] hw/block/nvme: enforce valid queue creation sequence

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Support returning Command Sequence Error if Set Features on Number of
> Queues is called after queues have been created.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 12 
>  hw/block/nvme.h |  1 +
>  2 files changed, 13 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 8a816b558eeb..798f6f30e7da 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -911,6 +911,13 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
>  cq = g_malloc0(sizeof(*cq));
>  nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
>  NVME_CQ_FLAGS_IEN(qflags));
> +
> +/*
> + * It is only required to set qs_created when creating a completion 
> queue;
> + * creating a submission queue without a matching completion queue will
> + * fail.
> + */
> +n->qs_created = true;
>  return NVME_SUCCESS;
>  }
>  
> @@ -1298,6 +1305,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
>  break;
>  case NVME_NUMBER_OF_QUEUES:
> +if (n->qs_created) {
> +return NVME_CMD_SEQ_ERROR | NVME_DNR;
> +}
> +
>  /*
>   * NVMe v1.3, Section 5.21.1.7: 0x is not an allowed value for 
> NCQR
>   * and NSQR.
> @@ -1430,6 +1441,7 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
>  
>  n->aer_queued = 0;
>  n->outstanding_aers = 0;
> +n->qs_created = false;
>  
>  blk_flush(n->conf.blk);
>  n->bar.cc = 0;
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 34a29d96..54ec54f491bf 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -124,6 +124,7 @@ typedef struct NvmeCtrl {
>  BlockConfconf;
>  NvmeParams   params;
>  
> +boolqs_created;
>  uint32_tpage_size;
>  uint16_tpage_bits;
>  uint16_tmax_prp_ents;


Re: [PATCH 13/17] hw/block/nvme: make sure ncqr and nsqr is valid

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> 0x is not an allowed value for NCQR and NSQR in Set Features on
> Number of Queues.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index a41665746d33..2279d8395aaa 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1257,6 +1257,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
>  break;
>  case NVME_NUMBER_OF_QUEUES:
> +/*
> + * NVMe v1.3, Section 5.21.1.7: 0x is not an allowed value for 
> NCQR
> + * and NSQR.
> + */
> +if ((dw11 & 0x) == 0x || ((dw11 >> 16) & 0x) == 0x) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
>  trace_pci_nvme_setfeat_numq((dw11 & 0x) + 1,
>  ((dw11 >> 16) & 0x) + 1,
>  n->params.max_ioqpairs,


Re: [PATCH 12/17] hw/block/nvme: support the get/set features select and save fields

2020-07-02 Thread Dmitry Fomichev
On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Since the device does not have any persistance state storage, no
> features are "saveable" and setting the Save (SV) field in any Set
> Features command will result in a Feature Identifier Not Saveable status
> code.
> 
> Similarly, if the Select (SEL) field is set to request saved values, the
> devices will (as it should) return the default values instead.
> 
> Since this also introduces "Supported Capabilities", the nsid field is
> now also checked for validity wrt. the feature being get/set'ed.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 87 +++
>  hw/block/nvme.h   |  8 
>  hw/block/trace-events |  4 +-
>  include/block/nvme.h  | 27 +-
>  4 files changed, 115 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 647f408854ae..a41665746d33 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1056,16 +1056,43 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  {
>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +uint32_t nsid = le32_to_cpu(cmd->nsid);
>  uint32_t result;
>  uint8_t fid = NVME_GETSETFEAT_FID(dw10);
> +NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
>  uint16_t iv;
>  
> -trace_pci_nvme_getfeat(nvme_cid(req), fid, dw11);
> +trace_pci_nvme_getfeat(nvme_cid(req), fid, sel, dw11);
>  
>  if (!nvme_feature_support[fid]) {
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> +if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
> +if (!nsid || nsid > n->num_namespaces) {
> +/*
> + * The Reservation Notification Mask and Reservation Persistence
> + * features require a status code of Invalid Field in Command 
> when
> + * NSID is 0x. Since the device does not support those
> + * features we can always return Invalid Namespace or Format as 
> we
> + * should do for all other features.
> + */
> +return NVME_INVALID_NSID | NVME_DNR;
> +}
> +}
> +
> +switch (sel) {
> +case NVME_GETFEAT_SELECT_CURRENT:
> +break;
> +case NVME_GETFEAT_SELECT_SAVED:
> +/* no features are saveable by the controller; fallthrough */
> +case NVME_GETFEAT_SELECT_DEFAULT:
> +goto defaults;
> +case NVME_GETFEAT_SELECT_CAP:
> +result = cpu_to_le32(nvme_feature_cap[fid]);
> +goto out;
> +}
> +
>  switch (fid) {
>  case NVME_TEMPERATURE_THRESHOLD:
>  result = 0;
> @@ -1091,6 +1118,29 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  case NVME_VOLATILE_WRITE_CACHE:
>  result = cpu_to_le32(blk_enable_write_cache(n->conf.blk));
>  trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
> +break;
> +case NVME_ASYNCHRONOUS_EVENT_CONF:
> +result = cpu_to_le32(n->features.async_config);
> +break;
> +case NVME_TIMESTAMP:
> +return nvme_get_feature_timestamp(n, cmd);
> +default:
> +break;
> +}
> +
> +defaults:
> +switch (fid) {
> +case NVME_TEMPERATURE_THRESHOLD:
> +result = 0;

This will reset the high or low threshold value set earlier in this function.
You could do the following to avoid this -

@ -1163,7 +1163,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
*cmd, NvmeRequest *req)
 break;
 }
 
-break;
+goto out;
 case NVME_VOLATILE_WRITE_CACHE:
 result = cpu_to_le32(blk_enable_write_cache(n->conf.blk));
 trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");

> +
> +if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
> +break;
> +}
> +
> +if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) {
> +result = cpu_to_le16(NVME_TEMPERATURE_WARNING);
> +}
> +
>  break;
>  case NVME_NUMBER_OF_QUEUES:
>  result = cpu_to_le32((n->params.max_ioqpairs - 1) |
> @@ -1110,16 +1160,12 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  
>  result = cpu_to_le32(result);
>  break;
> -case NVME_ASYNCHRONOUS_EVENT_CONF:
> -result = cpu_to_le32(n->features.async_config);
> -break;
> -case NVME_TIMESTAMP:
> -return nvme_get_feature_timestamp(n, cmd);
>  default:
>  result = cpu_to_le32(nvme_feature_default[fid]);
>  break;
>  }
>  
> +out:
>  req->cqe.result = result;
>  return NVME_SUCCESS;
>  }
> @@ -1146,14 +1192,37 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  {
>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +uint32_t 

Re: [PATCH 17/17] hw/block/nvme: bump supported version to v1.3

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Bump the supported NVM Express version to v1.3.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9f0b9de73307..fbe9b2d50895 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -57,6 +57,7 @@
>  #define NVME_MAX_IOQPAIRS 0x
>  #define NVME_REG_SIZE 0x1000
>  #define NVME_DB_SIZE  4
> +#define NVME_SPEC_VER 0x00010300
>  #define NVME_CMB_BIR 2
>  #define NVME_PMR_BIR 2
>  #define NVME_TEMPERATURE 0x143
> @@ -2103,6 +2104,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->ieee[0] = 0x00;
>  id->ieee[1] = 0x02;
>  id->ieee[2] = 0xb3;
> +id->ver = cpu_to_le32(NVME_SPEC_VER);
>  id->oacs = cpu_to_le16(0);
>  
>  /*
> @@ -2148,7 +2150,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  NVME_CAP_SET_CSS(n->bar.cap, 1);
>  NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
>  
> -n->bar.vs = 0x00010200;
> +n->bar.vs = NVME_SPEC_VER;
>  n->bar.intmc = n->bar.intms = 0;
>  }
>  


Re: [PATCH 14/17] hw/block/nvme: support identify namespace descriptor list

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Since we are not providing the NGUID or EUI64 fields, we must support
> the Namespace UUID. We do not have any way of storing a persistent
> unique identifier, so conjure up a UUID that is just the namespace id.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 41 +
>  hw/block/trace-events |  1 +
>  2 files changed, 42 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 2279d8395aaa..8a816b558eeb 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -972,6 +972,45 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
> NvmeIdentify *c)
>  return ret;
>  }
>  
> +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c)
> +{
> +uint32_t nsid = le32_to_cpu(c->nsid);
> +uint64_t prp1 = le64_to_cpu(c->prp1);
> +uint64_t prp2 = le64_to_cpu(c->prp2);
> +
> +uint8_t list[NVME_IDENTIFY_DATA_SIZE];
> +
> +struct data {
> +struct {
> +NvmeIdNsDescr hdr;
> +uint8_t v[16];
> +} uuid;
> +};
> +
> +struct data *ns_descrs = (struct data *)list;
> +
> +trace_pci_nvme_identify_ns_descr_list(nsid);
> +
> +if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
> +trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces);
> +return NVME_INVALID_NSID | NVME_DNR;
> +}
> +
> +memset(list, 0x0, sizeof(list));
> +
> +/*
> + * Because the NGUID and EUI64 fields are 0 in the Identify Namespace 
> data
> + * structure, a Namespace UUID (nidt = 0x3) must be reported in the
> + * Namespace Identification Descriptor. Add a very basic Namespace UUID
> + * here.
> + */
> +ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
> +ns_descrs->uuid.hdr.nidl = NVME_NIDT_UUID_LEN;
> +stl_be_p(_descrs->uuid.v, nsid);
> +
> +return nvme_dma_read_prp(n, list, NVME_IDENTIFY_DATA_SIZE, prp1, prp2);
> +}
> +
>  static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  {
>  NvmeIdentify *c = (NvmeIdentify *)cmd;
> @@ -983,6 +1022,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  return nvme_identify_ctrl(n, c);
>  case NVME_ID_CNS_NS_ACTIVE_LIST:
>  return nvme_identify_nslist(n, c);
> +case NVME_ID_CNS_NS_DESCR_LIST:
> +return nvme_identify_ns_descr_list(n, c);
>  default:
>  trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
>  return NVME_INVALID_FIELD | NVME_DNR;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 4a4ef34071df..7b7303cab1dd 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -45,6 +45,7 @@ pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, 
> cqid=%"PRIu16""
>  pci_nvme_identify_ctrl(void) "identify controller"
>  pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
> +pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, 
> uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 
> 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
>  pci_nvme_getfeat(uint16_t cid, uint8_t fid, uint8_t sel, uint32_t cdw11) 
> "cid %"PRIu16" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
>  pci_nvme_setfeat(uint16_t cid, uint8_t fid, uint8_t save, uint32_t cdw11) 
> "cid %"PRIu16" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32""


[Bug 1869006] Re: PCIe cards passthrough to TCG guest works on 2GB of guest memory but fails on 4GB (vfio_dma_map invalid arg)

2020-07-02 Thread costinel
Alex, thanks for the quick answer, but sadly I still do not fully
understand the implications, even if I read the pdf paper on RH website
you mention, as well as the vendor advisory at
https://support.hpe.com/hpesc/public/docDisplay?docId=emr_na-c04781229

When you say "qemu has no support", do you actually mean "qemu people
are unable to help you if you break things by bypassing the in-place
restrictions", or "qemu is designed to not work when restrictions are
bypassed"?

Do I understand correctly that the BIOS can modify portions of the
system usable RAM, so the vendor specific software tools can read those
addresses, and if yes, does this mean is there a risk for data
corruption if the RMRR restrictions are bypassed?

I have eventually managed to passthrough an nvidia card in the
microserver gen8 to a windows vm using patched kernel 5.3, along with
the vendor instructions to exclude the pcie slot aka the conrep solution
but for it to work it still needed the "rmrr patch" aka removing the
"return -EPERM" line below the "Device is ineligible [...]" in
drivers/iommu/intel-iommu.c


However applying the same modification to kernel 5.4 leads to the 
"VFIO_MAP_DMA: -22" error.

Is there other place in the kernel 5.4 source that must be modified to
bring back the v5.3 kernel behaviour? (ie. I have a stable home windows
vm with the gpu passthrough despite all)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1869006

Title:
  PCIe cards passthrough to TCG guest works on 2GB of guest memory but
  fails on 4GB (vfio_dma_map invalid arg)

Status in QEMU:
  New

Bug description:
  During one meeting coworker asked "did someone tried to passthrough
  PCIe card to other arch guest?" and I decided to check it.

  Plugged SATA and USB3 controllers into spare slots on mainboard and
  started playing. On 1GB VM instance it worked (both cold- and hot-
  plugged). On 4GB one it did not:

  Błąd podczas uruchamiania domeny: internal error: process exited while 
connecting to monitor: 2020-03-25T13:43:39.107524Z qemu-system-aarch64: -device 
vfio-pci,host=:29:00.0,id=hostdev0,bus=pci.3,addr=0x0: VFIO_MAP_DMA: -22
  2020-03-25T13:43:39.107560Z qemu-system-aarch64: -device 
vfio-pci,host=:29:00.0,id=hostdev0,bus=pci.3,addr=0x0: vfio :29:00.0: 
failed to setup container for group 28: memory listener initialization failed: 
Region mach-virt.ram: vfio_dma_map(0x563169753c80, 0x4000, 0x1, 
0x7fb2a3e0) = -22 (Invalid argument)

  Traceback (most recent call last):
File "/usr/share/virt-manager/virtManager/asyncjob.py", line 75, in 
cb_wrapper
  callback(asyncjob, *args, **kwargs)
File "/usr/share/virt-manager/virtManager/asyncjob.py", line 111, in tmpcb
  callback(*args, **kwargs)
File "/usr/share/virt-manager/virtManager/object/libvirtobject.py", line 
66, in newfn
  ret = fn(self, *args, **kwargs)
File "/usr/share/virt-manager/virtManager/object/domain.py", line 1279, in 
startup
  self._backend.create()
File "/usr/lib64/python3.8/site-packages/libvirt.py", line 1234, in create
  if ret == -1: raise libvirtError ('virDomainCreate() failed', dom=self)
  libvirt.libvirtError: internal error: process exited while connecting to 
monitor: 2020-03-25T13:43:39.107524Z qemu-system-aarch64: -device 
vfio-pci,host=:29:00.0,id=hostdev0,bus=pci.3,addr=0x0: VFIO_MAP_DMA: -22
  2020-03-25T13:43:39.107560Z qemu-system-aarch64: -device 
vfio-pci,host=:29:00.0,id=hostdev0,bus=pci.3,addr=0x0: vfio :29:00.0: 
failed to setup container for group 28: memory listener initialization failed: 
Region mach-virt.ram: vfio_dma_map(0x563169753c80, 0x4000, 0x1, 
0x7fb2a3e0) = -22 (Invalid argument)

  
  I played with memory and 3054 MB is maximum value possible to boot VM with 
coldplugged host PCIe cards.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1869006/+subscriptions



Re: [PATCH 09/17] hw/block/nvme: flush write cache when disabled

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> If the write cache is disabled with a Set Features command, flush it if
> currently enabled.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 39e680a15c56..c2507d8836fd 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1153,6 +1153,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  
>  break;
>  case NVME_VOLATILE_WRITE_CACHE:
> +if (!(dw11 & 0x1) && blk_enable_write_cache(n->conf.blk)) {
> +blk_flush(n->conf.blk);
> +}
> +
>  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
>  break;
>  case NVME_NUMBER_OF_QUEUES:


Re: [PATCH 07/17] hw/block/nvme: add support for the asynchronous event request command

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for the Asynchronous Event Request command. Required for
> compliance with NVMe revision 1.3d. See NVM Express 1.3d, Section 5.2
> ("Asynchronous Event Request command").
> 
> Mostly imported from Keith's qemu-nvme tree. Modified with a max number
> of queued events (controllable with the aer_max_queued device
> parameter). The spec states that the controller *should* retain
> events, so we do best effort here.
> 
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c   | 180 --
>  hw/block/nvme.h   |  10 ++-
>  hw/block/trace-events |   9 +++
>  include/block/nvme.h  |   8 +-
>  4 files changed, 198 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index fe5d052ab159..39e680a15c56 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -342,6 +342,85 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, 
> NvmeRequest *req)
>  timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
>  }
>  
> +static void nvme_process_aers(void *opaque)
> +{
> +NvmeCtrl *n = opaque;
> +NvmeAsyncEvent *event, *next;
> +
> +trace_pci_nvme_process_aers(n->aer_queued);
> +
> +QTAILQ_FOREACH_SAFE(event, >aer_queue, entry, next) {
> +NvmeRequest *req;
> +NvmeAerResult *result;
> +
> +/* can't post cqe if there is nothing to complete */
> +if (!n->outstanding_aers) {
> +trace_pci_nvme_no_outstanding_aers();
> +break;
> +}
> +
> +/* ignore if masked (cqe posted, but event not cleared) */
> +if (n->aer_mask & (1 << event->result.event_type)) {
> +trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
> +continue;
> +}
> +
> +QTAILQ_REMOVE(>aer_queue, event, entry);
> +n->aer_queued--;
> +
> +n->aer_mask |= 1 << event->result.event_type;
> +n->outstanding_aers--;
> +
> +req = n->aer_reqs[n->outstanding_aers];
> +
> +result = (NvmeAerResult *) >cqe.result;
> +result->event_type = event->result.event_type;
> +result->event_info = event->result.event_info;
> +result->log_page = event->result.log_page;
> +g_free(event);
> +
> +req->status = NVME_SUCCESS;
> +
> +trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
> +result->log_page);
> +
> +nvme_enqueue_req_completion(>admin_cq, req);
> +}
> +}
> +
> +static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
> +   uint8_t event_info, uint8_t log_page)
> +{
> +NvmeAsyncEvent *event;
> +
> +trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
> +
> +if (n->aer_queued == n->params.aer_max_queued) {
> +trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
> +return;
> +}
> +
> +event = g_new(NvmeAsyncEvent, 1);
> +event->result = (NvmeAerResult) {
> +.event_type = event_type,
> +.event_info = event_info,
> +.log_page   = log_page,
> +};
> +
> +QTAILQ_INSERT_TAIL(>aer_queue, event, entry);
> +n->aer_queued++;
> +
> +nvme_process_aers(n);
> +}
> +
> +static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
> +{
> +n->aer_mask &= ~(1 << event_type);
> +if (!QTAILQ_EMPTY(>aer_queue)) {
> +nvme_process_aers(n);
> +}
> +}
> +
>  static void nvme_rw_cb(void *opaque, int ret)
>  {
>  NvmeRequest *req = opaque;
> @@ -592,8 +671,9 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
>  return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> -uint64_t off, NvmeRequest *req)
> +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
> +uint32_t buf_len, uint64_t off,
> +NvmeRequest *req)
>  {
>  uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>  uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> @@ -642,6 +722,10 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd 
> *cmd, uint32_t buf_len,
>  smart.power_on_hours[0] =
>  cpu_to_le64current_ms - n->starttime_ms) / 1000) / 60) / 60);
>  
> +if (!rae) {
> +nvme_clear_events(n, NVME_AER_TYPE_SMART);
> +}
> +
>  return nvme_dma_read_prp(n, (uint8_t *)  + off, trans_len, prp1,
>   prp2);
>  }
> @@ -668,14 +752,19 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd 
> *cmd, uint32_t buf_len,
>   prp2);
>  }
>  
> -static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> -  

Re: [PATCH 06/17] hw/block/nvme: add support for the get log page command

2020-07-02 Thread Dmitry Fomichev
On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for the Get Log Page command and basic implementations of
> the mandatory Error Information, SMART / Health Information and Firmware
> Slot Information log pages.
> 
> In violation of the specification, the SMART / Health Information log
> page does not persist information over the lifetime of the controller
> because the device has no place to store such persistent state.
> 
> Note that the LPA field in the Identify Controller data structure
> intentionally has bit 0 cleared because there is no namespace specific
> information in the SMART / Health information log page.
> 
> Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> Section 5.14 ("Get Log Page command").
> 
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> ---
>  hw/block/nvme.c   | 141 +-
>  hw/block/nvme.h   |   2 +
>  hw/block/trace-events |   2 +
>  include/block/nvme.h  |   4 ++
>  4 files changed, 148 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f8e91a6965ed..fe5d052ab159 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -592,6 +592,141 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd 
> *cmd)
>  return NVME_SUCCESS;
>  }
>  
> +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +uint64_t off, NvmeRequest *req)
> +{
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +uint32_t nsid = le32_to_cpu(cmd->nsid);
> +
> +uint32_t trans_len;
> +time_t current_ms;
> +uint64_t units_read = 0, units_written = 0;
> +uint64_t read_commands = 0, write_commands = 0;
> +NvmeSmartLog smart;
> +BlockAcctStats *s;
> +
> +if (nsid && nsid != 0x) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +s = blk_get_stats(n->conf.blk);
> +
> +units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> +units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> +read_commands = s->nr_ops[BLOCK_ACCT_READ];
> +write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> +
> +if (off > sizeof(smart)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +trans_len = MIN(sizeof(smart) - off, buf_len);
> +
> +memset(, 0x0, sizeof(smart));
> +
> +smart.data_units_read[0] = cpu_to_le64(units_read / 1000);
> +smart.data_units_written[0] = cpu_to_le64(units_written / 1000);
> +smart.host_read_commands[0] = cpu_to_le64(read_commands);
> +smart.host_write_commands[0] = cpu_to_le64(write_commands);
> +
> +smart.temperature[0] = n->temperature & 0xff;
> +smart.temperature[1] = (n->temperature >> 8) & 0xff;

Why not change temperature[2] in NvmeSmartLog to uint16_t and use cpu_to_le16() 
here?

> +if ((n->temperature >= n->features.temp_thresh_hi) ||
> +(n->temperature <= n->features.temp_thresh_low)) {
> +smart.critical_warning |= NVME_SMART_TEMPERATURE;
> +}
> +
> +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
> +smart.power_on_hours[0] =
> +cpu_to_le64current_ms - n->starttime_ms) / 1000) / 60) / 60);
> +
> +return nvme_dma_read_prp(n, (uint8_t *)  + off, trans_len, prp1,
> + prp2);
> +}
> +
> +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> + uint64_t off, NvmeRequest *req)
> +{
> +uint32_t trans_len;
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +NvmeFwSlotInfoLog fw_log = {
> +.afi = 0x1,
> +};
> +
> +strpadcpy((char *)_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> +
> +if (off > sizeof(fw_log)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +trans_len = MIN(sizeof(fw_log) - off, buf_len);
> +
> +return nvme_dma_read_prp(n, (uint8_t *) _log + off, trans_len, prp1,
> + prp2);
> +}
> +
> +static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +uint64_t off, NvmeRequest *req)
> +{
> +uint32_t trans_len;
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +NvmeErrorLog errlog;
> +
> +if (off > sizeof(errlog)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +memset(, 0x0, sizeof(errlog));
> +
> +trans_len = MIN(sizeof(errlog) - off, buf_len);
> +
> +return nvme_dma_read_prp(n, (uint8_t *), trans_len, prp1, prp2);
> +}
> +
> +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> +uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> +

Re: [PATCH 11/17] hw/block/nvme: add remaining mandatory controller parameters

2020-07-02 Thread Dmitry Fomichev
LGTM with one small nit (see below)...

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for any remaining mandatory controller operating parameters
> (features).
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 39 +--
>  hw/block/nvme.h   | 18 ++
>  hw/block/trace-events |  2 ++
>  include/block/nvme.h  |  7 +++
>  4 files changed, 60 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index da13ca1ddb60..647f408854ae 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1057,8 +1057,16 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>  uint32_t result;
> +uint8_t fid = NVME_GETSETFEAT_FID(dw10);
> +uint16_t iv;
>  
> -switch (dw10) {
> +trace_pci_nvme_getfeat(nvme_cid(req), fid, dw11);
> +
> +if (!nvme_feature_support[fid]) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +switch (fid) {
>  case NVME_TEMPERATURE_THRESHOLD:
>  result = 0;
>  
> @@ -1089,14 +1097,27 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>   ((n->params.max_ioqpairs - 1) << 16));
>  trace_pci_nvme_getfeat_numq(result);
>  break;
> +case NVME_INTERRUPT_VECTOR_CONF:
> +iv = dw11 & 0x;
> +if (iv >= n->params.max_ioqpairs + 1) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +result = iv;
> +if (iv == n->admin_cq.vector) {
> +result |= NVME_INTVC_NOCOALESCING;
> +}
> +
> +result = cpu_to_le32(result);
> +break;
>  case NVME_ASYNCHRONOUS_EVENT_CONF:
>  result = cpu_to_le32(n->features.async_config);
>  break;
>  case NVME_TIMESTAMP:
>  return nvme_get_feature_timestamp(n, cmd);
>  default:
> -trace_pci_nvme_err_invalid_getfeat(dw10);
> -return NVME_INVALID_FIELD | NVME_DNR;
> +result = cpu_to_le32(nvme_feature_default[fid]);
> +break;
>  }
>  
>  req->cqe.result = result;
> @@ -1125,8 +1146,15 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  {
>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
> +uint8_t fid = NVME_GETSETFEAT_FID(dw10);
>  
> -switch (dw10) {
> +trace_pci_nvme_setfeat(nvme_cid(req), fid, dw11);
> +
> +if (!nvme_feature_support[fid]) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +switch (fid) {
>  case NVME_TEMPERATURE_THRESHOLD:
>  if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
>  break;
> @@ -1173,8 +1201,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  case NVME_TIMESTAMP:
>  return nvme_set_feature_timestamp(n, cmd);
>  default:
> -trace_pci_nvme_err_invalid_setfeat(dw10);
> -return NVME_INVALID_FIELD | NVME_DNR;
> +return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;

In spec, it is "Changeable", could as well add that 'e' here

>  }
>  return NVME_SUCCESS;
>  }
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 16a254d30b4e..d0763eb59e5d 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -90,6 +90,24 @@ typedef struct NvmeFeatureVal {
>  uint32_tasync_config;
>  } NvmeFeatureVal;
>  
> +static const uint32_t nvme_feature_default[0x100] = {
> +[NVME_ARBITRATION]   = NVME_ARB_AB_NOLIMIT,
> +};
> +
> +static const bool nvme_feature_support[0x100] = {
> +[NVME_ARBITRATION]  = true,
> +[NVME_POWER_MANAGEMENT] = true,
> +[NVME_TEMPERATURE_THRESHOLD]= true,
> +[NVME_ERROR_RECOVERY]   = true,
> +[NVME_VOLATILE_WRITE_CACHE] = true,
> +[NVME_NUMBER_OF_QUEUES] = true,
> +[NVME_INTERRUPT_COALESCING] = true,
> +[NVME_INTERRUPT_VECTOR_CONF]= true,
> +[NVME_WRITE_ATOMICITY]  = true,
> +[NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
> +[NVME_TIMESTAMP]= true,
> +};
> +
>  typedef struct NvmeCtrl {
>  PCIDeviceparent_obj;
>  MemoryRegion iomem;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 091af16ca7d7..42e62f4649f8 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -46,6 +46,8 @@ pci_nvme_identify_ctrl(void) "identify controller"
>  pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, 
> uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 
> 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
> +pci_nvme_getfeat(uint16_t cid, 

Re: [PATCH 08/17] hw/block/nvme: move NvmeFeatureVal into hw/block/nvme.h

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The NvmeFeatureVal does not belong with the spec-related data structures
> in include/block/nvme.h that is shared between the block-level nvme
> driver and the emulated nvme device.
> 
> Move it into the nvme device specific header file as it is the only
> user of the structure. Also, remove the unused members.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.h  | 11 +++
>  include/block/nvme.h | 20 
>  2 files changed, 11 insertions(+), 20 deletions(-)
> 
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 1f64a0e94035..16a254d30b4e 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -79,6 +79,17 @@ static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
>  #define NVME(obj) \
>  OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
>  
> +typedef struct NvmeFeatureVal {
> +union {
> +struct {
> +uint16_t temp_thresh_hi;
> +uint16_t temp_thresh_low;
> +};
> +uint32_t temp_thresh;
> +};
> +uint32_tasync_config;
> +} NvmeFeatureVal;
> +
>  typedef struct NvmeCtrl {
>  PCIDeviceparent_obj;
>  MemoryRegion iomem;
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index e98584e38134..c9f232a70e98 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -865,26 +865,6 @@ enum NvmeIdCtrlLpa {
>  #define NVME_CTRL_SGLS_MPTR_SGL  (0x1 << 19)
>  #define NVME_CTRL_SGLS_ADDR_OFFSET   (0x1 << 20)
>  
> -typedef struct NvmeFeatureVal {
> -uint32_tarbitration;
> -uint32_tpower_mgmt;
> -union {
> -struct {
> -uint16_t temp_thresh_hi;
> -uint16_t temp_thresh_low;
> -};
> -uint32_t temp_thresh;
> -};
> -uint32_terr_rec;
> -uint32_tvolatile_wc;
> -uint32_tnum_queues;
> -uint32_tint_coalescing;
> -uint32_t*int_vector_config;
> -uint32_twrite_atomicity;
> -uint32_tasync_config;
> -uint32_tsw_prog_marker;
> -} NvmeFeatureVal;
> -
>  #define NVME_ARB_AB(arb)(arb & 0x7)
>  #define NVME_ARB_LPW(arb)   ((arb >> 8) & 0xff)
>  #define NVME_ARB_MPW(arb)   ((arb >> 16) & 0xff)


Re: [PATCH 04/17] hw/block/nvme: add temperature threshold feature

2020-07-02 Thread Dmitry Fomichev
On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> It might seem weird to implement this feature for an emulated device,
> but it is mandatory to support and the feature is useful for testing
> asynchronous event request support, which will be added in a later
> patch.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c  | 48 
>  hw/block/nvme.h  |  1 +
>  include/block/nvme.h |  8 +++-
>  3 files changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index b7037a7d3504..5ca50646369e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -59,6 +59,9 @@
>  #define NVME_DB_SIZE  4
>  #define NVME_CMB_BIR 2
>  #define NVME_PMR_BIR 2
> +#define NVME_TEMPERATURE 0x143
> +#define NVME_TEMPERATURE_WARNING 0x157
> +#define NVME_TEMPERATURE_CRITICAL 0x175
>  
>  #define NVME_GUEST_ERR(trace, fmt, ...) \
>  do { \
> @@ -827,9 +830,31 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, 
> NvmeCmd *cmd)
>  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
> +uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>  uint32_t result;
>  
>  switch (dw10) {
> +case NVME_TEMPERATURE_THRESHOLD:
> +result = 0;
> +
> +/*
> + * The controller only implements the Composite Temperature sensor, 
> so
> + * return 0 for all other sensors.
> + */
> +if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
> +break;
> +}
> +
> +switch (NVME_TEMP_THSEL(dw11)) {
> +case NVME_TEMP_THSEL_OVER:
> +result = cpu_to_le16(n->features.temp_thresh_hi);
> +break;
> +case NVME_TEMP_THSEL_UNDER:
> +result = cpu_to_le16(n->features.temp_thresh_low);
> +break;
> +}
> +
> +break;
> 
> >  case NVME_VOLATILE_WRITE_CACHE:
>  result = blk_enable_write_cache(n->conf.blk);
>  trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
> @@ -874,6 +899,23 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>  
>  switch (dw10) {
> +case NVME_TEMPERATURE_THRESHOLD:
> +if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
> +break;
> +}
> +
> +switch (NVME_TEMP_THSEL(dw11)) {
> +case NVME_TEMP_THSEL_OVER:
> +n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
> +break;
> +case NVME_TEMP_THSEL_UNDER:
> +n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
> +break;
> +default:
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +break;
>  case NVME_VOLATILE_WRITE_CACHE:
>  blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
>  break;
> @@ -1454,6 +1496,7 @@ static void nvme_init_state(NvmeCtrl *n)
>  n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
>  n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
>  n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
> +n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
>  }
>  
>  static void nvme_init_blk(NvmeCtrl *n, Error **errp)
> @@ -1611,6 +1654,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->acl = 3;
>  id->frmw = 7 << 1;
>  id->lpa = 1 << 0;
> +
> +/* recommended default value (~70 C) */
> +id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
> +id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
> +
>  id->sqes = (0x6 << 4) | 0x6;
>  id->cqes = (0x4 << 4) | 0x4;
>  id->nn = cpu_to_le32(n->num_namespaces);
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 1bf5c80ed843..3acde10e1d2a 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -107,6 +107,7 @@ typedef struct NvmeCtrl {
>  NvmeSQueue  admin_sq;
>  NvmeCQueue  admin_cq;
>  NvmeIdCtrl  id_ctrl;
> +NvmeFeatureVal  features;
>  } NvmeCtrl;
>  
>  /* calculate the number of LBAs that the namespace can accomodate */
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 6d1fa6ff2228..bb651d0cbf5a 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -860,7 +860,13 @@ enum NvmeIdCtrlOncs {
>  typedef struct NvmeFeatureVal {
>  uint32_tarbitration;
>  uint32_tpower_mgmt;
> -uint32_ttemp_thresh;
> +union {
> +struct {
> +uint16_t temp_thresh_hi;
> +uint16_t temp_thresh_low;
> +};
> +uint32_t temp_thresh;

temp_thresh seems unused, is this union really needed?

> +};
>  uint32_terr_rec;
>  uint32_tvolatile_wc;
>  uint32_tnum_queues;


Re: [PATCH 02/17] hw/block/nvme: additional tracing

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add various additional tracing and streamline nvme_identify_ns and
> nvme_identify_nslist (they do not need to repeat the command, it is
> already in the trace name).
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 19 +++
>  hw/block/nvme.h   | 14 ++
>  hw/block/trace-events | 13 +++--
>  3 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 71b388aa0e20..f5d9148f0936 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -331,6 +331,8 @@ static void nvme_post_cqes(void *opaque)
>  static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
>  {
>  assert(cq->cqid == req->sq->cqid);
> +trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
> +  req->status);
>  QTAILQ_REMOVE(>sq->out_req_list, req, entry);
>  QTAILQ_INSERT_TAIL(>req_list, req, entry);
>  timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
> @@ -343,6 +345,8 @@ static void nvme_rw_cb(void *opaque, int ret)
>  NvmeCtrl *n = sq->ctrl;
>  NvmeCQueue *cq = n->cq[sq->cqid];
>  
> +trace_pci_nvme_rw_cb(nvme_cid(req));
> +
>  if (!ret) {
>  block_acct_done(blk_get_stats(n->conf.blk), >acct);
>  req->status = NVME_SUCCESS;
> @@ -378,6 +382,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, 
> NvmeNamespace *ns, NvmeCmd *cmd,
>  uint64_t offset = slba << data_shift;
>  uint32_t count = nlb << data_shift;
>  
> +trace_pci_nvme_write_zeroes(nvme_cid(req), slba, nlb);
> +
>  if (unlikely(slba + nlb > ns->id_ns.nsze)) {
>  trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
>  return NVME_LBA_RANGE | NVME_DNR;
> @@ -445,6 +451,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  NvmeNamespace *ns;
>  uint32_t nsid = le32_to_cpu(cmd->nsid);
>  
> +trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
> +
>  if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
>  trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces);
>  return NVME_INVALID_NSID | NVME_DNR;
> @@ -876,6 +884,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  
>  static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
> +trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), cmd->opcode);
> +
>  switch (cmd->opcode) {
>  case NVME_ADM_CMD_DELETE_SQ:
>  return nvme_del_sq(n, cmd);
> @@ -1204,6 +1214,8 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr 
> addr, unsigned size)
>  uint8_t *ptr = (uint8_t *)>bar;
>  uint64_t val = 0;
>  
> +trace_pci_nvme_mmio_read(addr);
> +
>  if (unlikely(addr & (sizeof(uint32_t) - 1))) {
>  NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32,
> "MMIO read not 32-bit aligned,"
> @@ -1273,6 +1285,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, 
> int val)
>  return;
>  }
>  
> +trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
> +
>  start_sqs = nvme_cq_full(cq) ? 1 : 0;
>  cq->head = new_head;
>  if (start_sqs) {
> @@ -1311,6 +1325,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, 
> int val)
>  return;
>  }
>  
> +trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
> +
>  sq->tail = new_tail;
>  timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
>  }
> @@ -1320,6 +1336,9 @@ static void nvme_mmio_write(void *opaque, hwaddr addr, 
> uint64_t data,
>  unsigned size)
>  {
>  NvmeCtrl *n = (NvmeCtrl *)opaque;
> +
> +trace_pci_nvme_mmio_write(addr, data);
> +
>  if (addr < sizeof(n->bar)) {
>  nvme_write_bar(n, addr, data, size);
>  } else if (addr >= 0x1000) {
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 1d30c0bca283..1bf5c80ed843 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -115,4 +115,18 @@ static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, 
> NvmeNamespace *ns)
>  return n->ns_size >> nvme_ns_lbads(ns);
>  }
>  
> +static inline uint16_t nvme_cid(NvmeRequest *req)
> +{
> +if (req) {
> +return le16_to_cpu(req->cqe.cid);
> +}
> +
> +return 0x;
> +}
> +
> +static inline uint16_t nvme_sqid(NvmeRequest *req)
> +{
> +return le16_to_cpu(req->sq->sqid);
> +}
> +
>  #endif /* HW_NVME_H */
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 958fcc5508d1..c40c0d2e4b28 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -33,19 +33,28 @@ pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ 
> vector %u"
>  pci_nvme_irq_pin(void) "pulsing IRQ pin"
>  pci_nvme_irq_masked(void) "IRQ is masked"

Re: [PATCH 10/17] hw/block/nvme: fix missing endian conversion

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Fix a missing cpu_to conversion.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index c2507d8836fd..da13ca1ddb60 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1081,7 +1081,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
> *cmd, NvmeRequest *req)
>  
>  break;
>  case NVME_VOLATILE_WRITE_CACHE:
> -result = blk_enable_write_cache(n->conf.blk);
> +result = cpu_to_le32(blk_enable_write_cache(n->conf.blk));
>  trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
>  break;
>  case NVME_NUMBER_OF_QUEUES:


Re: [PATCH 05/17] hw/block/nvme: mark fw slot 1 as read-only

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Mark firmware slot 1 as read-only and only support that slot.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c  | 3 ++-
>  include/block/nvme.h | 4 
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 5ca50646369e..f8e91a6965ed 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -62,6 +62,7 @@
>  #define NVME_TEMPERATURE 0x143
>  #define NVME_TEMPERATURE_WARNING 0x157
>  #define NVME_TEMPERATURE_CRITICAL 0x175
> +#define NVME_NUM_FW_SLOTS 1
>  
>  #define NVME_GUEST_ERR(trace, fmt, ...) \
>  do { \
> @@ -1652,7 +1653,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>   * inconsequential.
>   */
>  id->acl = 3;
> -id->frmw = 7 << 1;
> +id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
>  id->lpa = 1 << 0;
>  
>  /* recommended default value (~70 C) */
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index bb651d0cbf5a..003b15af9cd9 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -842,6 +842,10 @@ enum NvmeIdCtrlOncs {
>  NVME_ONCS_TIMESTAMP = 1 << 6,
>  };
>  
> +enum NvmeIdCtrlFrmw {
> +NVME_FRMW_SLOT1_RO = 1 << 0,
> +};
> +
>  #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
>  #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
>  #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)


Re: [PATCH 01/17] hw/block/nvme: bump spec data structures to v1.3

2020-07-02 Thread Dmitry Fomichev
On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add missing fields in the Identify Controller and Identify Namespace
> data structures to bring them in line with NVMe v1.3.
> 
> This also adds data structures and defines for SGL support which
> requires a couple of trivial changes to the nvme block driver as well.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Fam Zheng 
> Reviewed-by: Maxim Levitsky 
> ---
>  block/nvme.c |  18 ++---
>  hw/block/nvme.c  |  12 ++--
>  include/block/nvme.h | 154 ++-
>  3 files changed, 152 insertions(+), 32 deletions(-)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index eb2f54dd9dc9..29e90557c428 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -446,7 +446,7 @@ static void nvme_identify(BlockDriverState *bs, int 
> namespace, Error **errp)
>  error_setg(errp, "Cannot map buffer for DMA");
>  goto out;
>  }
> -cmd.prp1 = cpu_to_le64(iova);
> +cmd.dptr.prp1 = cpu_to_le64(iova);
>  
>  if (nvme_cmd_sync(bs, s->queues[0], )) {
>  error_setg(errp, "Failed to identify controller");
> @@ -545,7 +545,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
> **errp)
>  }
>  cmd = (NvmeCmd) {
>  .opcode = NVME_ADM_CMD_CREATE_CQ,
> -.prp1 = cpu_to_le64(q->cq.iova),
> +.dptr.prp1 = cpu_to_le64(q->cq.iova),
>  .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
>  .cdw11 = cpu_to_le32(0x3),
>  };
> @@ -556,7 +556,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error 
> **errp)
>  }
>  cmd = (NvmeCmd) {
>  .opcode = NVME_ADM_CMD_CREATE_SQ,
> -.prp1 = cpu_to_le64(q->sq.iova),
> +.dptr.prp1 = cpu_to_le64(q->sq.iova),
>  .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0x)),
>  .cdw11 = cpu_to_le32(0x1 | (n << 16)),
>  };
> @@ -904,16 +904,16 @@ try_map:
>  case 0:
>  abort();
>  case 1:
> -cmd->prp1 = pagelist[0];
> -cmd->prp2 = 0;
> +cmd->dptr.prp1 = pagelist[0];
> +cmd->dptr.prp2 = 0;
>  break;
>  case 2:
> -cmd->prp1 = pagelist[0];
> -cmd->prp2 = pagelist[1];
> +cmd->dptr.prp1 = pagelist[0];
> +cmd->dptr.prp2 = pagelist[1];
>  break;
>  default:
> -cmd->prp1 = pagelist[0];
> -cmd->prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t));
> +cmd->dptr.prp1 = pagelist[0];
> +cmd->dptr.prp2 = cpu_to_le64(req->prp_list_iova + sizeof(uint64_t));
>  break;
>  }
>  trace_nvme_cmd_map_qiov(s, cmd, req, qiov, entries);
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 1aee042d4cb2..71b388aa0e20 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -397,8 +397,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>  NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
>  uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
>  uint64_t slba = le64_to_cpu(rw->slba);
> -uint64_t prp1 = le64_to_cpu(rw->prp1);
> -uint64_t prp2 = le64_to_cpu(rw->prp2);
> +uint64_t prp1 = le64_to_cpu(rw->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(rw->dptr.prp2);
>  
>  uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>  uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
> @@ -795,8 +795,8 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl 
> *n)
>  
>  static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
>  {
> -uint64_t prp1 = le64_to_cpu(cmd->prp1);
> -uint64_t prp2 = le64_to_cpu(cmd->prp2);
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
>  uint64_t timestamp = nvme_get_timestamp(n);
>  
> @@ -834,8 +834,8 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, 
> NvmeCmd *cmd)
>  {
>  uint16_t ret;
>  uint64_t timestamp;
> -uint64_t prp1 = le64_to_cpu(cmd->prp1);
> -uint64_t prp2 = le64_to_cpu(cmd->prp2);
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
>  ret = nvme_dma_write_prp(n, (uint8_t *),
>  sizeof(timestamp), prp1, prp2);
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 1720ee1d5158..6d1fa6ff2228 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -377,15 +377,53 @@ enum NvmePmrmscMask {
>  #define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
>  (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
>  
> +enum NvmeSglDescriptorType {
> +NVME_SGL_DESCR_TYPE_DATA_BLOCK  = 0x0,
> +NVME_SGL_DESCR_TYPE_BIT_BUCKET  = 0x1,
> +NVME_SGL_DESCR_TYPE_SEGMENT = 0x2,
> +NVME_SGL_DESCR_TYPE_LAST_SEGMENT= 0x3,
> +NVME_SGL_DESCR_TYPE_KEYED_DATA_BLOCK= 0x4,
> +
> +NVME_SGL_DESCR_TYPE_VENDOR_SPECIFIC = 0xf,
> +};
> +
> +enum 

Re: [PATCH 03/17] hw/block/nvme: add support for the abort command

2020-07-02 Thread Dmitry Fomichev
Looks good,

Reviewed-by: Dmitry Fomichev 

On Mon, 2020-06-29 at 20:26 +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> Section 5.1 ("Abort command").
> 
> The Abort command is a best effort command; for now, the device always
> fails to abort the given command.
> 
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 27 +++
>  1 file changed, 27 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f5d9148f0936..b7037a7d3504 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -761,6 +761,18 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>  }
>  }
>  
> +static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0x;
> +
> +req->cqe.result = 1;
> +if (nvme_check_sqid(n, sqid)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +return NVME_SUCCESS;
> +}
> +
>  static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
>  {
>  trace_pci_nvme_setfeat_timestamp(ts);
> @@ -897,6 +909,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  return nvme_create_cq(n, cmd);
>  case NVME_ADM_CMD_IDENTIFY:
>  return nvme_identify(n, cmd);
> +case NVME_ADM_CMD_ABORT:
> +return nvme_abort(n, cmd, req);
>  case NVME_ADM_CMD_SET_FEATURES:
>  return nvme_set_feature(n, cmd, req);
>  case NVME_ADM_CMD_GET_FEATURES:
> @@ -1582,6 +1596,19 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->ieee[1] = 0x02;
>  id->ieee[2] = 0xb3;
>  id->oacs = cpu_to_le16(0);
> +
> +/*
> + * Because the controller always completes the Abort command immediately,
> + * there can never be more than one concurrently executing Abort command,
> + * so this value is never used for anything. Note that there can easily 
> be
> + * many Abort commands in the queues, but they are not considered
> + * "executing" until processed by nvme_abort.
> + *
> + * The specification recommends a value of 3 for Abort Command Limit 
> (four
> + * concurrently outstanding Abort commands), so lets use that though it 
> is
> + * inconsequential.
> + */
> +id->acl = 3;
>  id->frmw = 7 << 1;
>  id->lpa = 1 << 0;
>  id->sqes = (0x6 << 4) | 0x6;


Re: [PATCH v4 2/2] nvme: allow cmb and pmr to be enabled on same device

2020-07-02 Thread Andrzej Jakowski
On 7/2/20 10:51 AM, Klaus Jensen wrote:
> On Jul  2 08:07, Andrzej Jakowski wrote:
>> On 7/2/20 3:31 AM, Klaus Jensen wrote:
>>> Aight, an update here. This only happens when QEMU is run with a virtual
>>> IOMMU. Otherwise, the kernel is happy.
>>>
>>> With the vIOMMU, qemu also craps out a bit:
>>>
>>> qemu-system-x86_64: vtd_iova_to_slpte: detected slpte permission error 
>>> (iova=0xfd20, level=0x2, slpte=0x0, write=0)
>>> qemu-system-x86_64: vtd_iommu_translate: detected translation failure 
>>> (dev=03:00:00, iova=0xfd20)
>>>
>>> So I think we are back in QEMU land for the bug.
>>
>> Can you share command line for that?
>>
>>
> 
> qemu-system-x86_64 \
>   -nodefaults \
>   -display none \
>   -device intel-iommu,pt,intremap=on,device-iotlb=on \
>   -machine type=q35,accel=kvm,kernel_irqchip=split \
>   -cpu host \
>   -smp 4 \
>   -m 8G \
>   -nic user,model=virtio-net-pci,hostfwd=tcp::-:22 \
>   -device virtio-rng-pci \
>   -drive 
> id=boot,file=/home/kbj/work/src/vmctl/state/pmr/boot.qcow2,format=qcow2,if=virtio,discard=on,detect-zeroes=unmap
>  \
>   -device pcie-root-port,id=pcie_root_port1,chassis=1,slot=0 \
>   -device x3130-upstream,id=pcie_upstream1,bus=pcie_root_port1 \
>   -device 
> xio3130-downstream,id=pcie_downstream1,bus=pcie_upstream1,chassis=1,slot=1 \
>   -drive 
> id=nvme0n1,file=/home/kbj/work/src/vmctl/state/pmr/nvme0n1.img,format=raw,if=none,discard=on,detect-zeroes=unmap
>  \
>   -object memory-backend-file,id=pmr,share=on,mem-path=pmr.bin,size=1M \
>   -device 
> nvme,id=nvme0,serial=deadbeef,bus=pcie_downstream1,drive=nvme0n1,msix_qsize=1,pmrdev=pmr,cmb_size_mb=2
>  \
>   -pidfile /home/kbj/work/src/vmctl/run/pmr/pidfile \
>   -kernel /home/kbj/work/src/kernel/linux/arch/x86_64/boot/bzImage \
>   -append root=/dev/vda1 console=ttyS0,115200 audit=0 nokaslr \
>   -virtfs 
> local,path=/home/kbj/work/src/kernel/linux,security_model=none,readonly,mount_tag=modules
>  \
>   -serial mon:stdio \
>   -trace pci_nvme*
> 
> 

I focused on reproduction and it looks to me that my patch doesn't 
necessarily introduce regression. I run it w/ and w/o patch in both cases
getting error while registering. Here is kernel guest log:

[   87.606482] nvme nvme0: pci function :00:04.0
[   87.635577] dev=95b0a83b bar=2 size=134217728 offset=0
[   87.636593] nvme nvme0: failed to register the CMB ret=-95
[   87.643262] nvme nvme0: 12/0/0 default/read/poll queues

Any thoughts?



Re: [PATCH v9 30/34] qcow2: Add prealloc field to QCowL2Meta

2020-07-02 Thread Alberto Garcia
On Thu 02 Jul 2020 05:09:47 PM CEST, Max Reitz wrote:
>> Without a backing file, there is no read required - writing to an
>> unallocated subcluster within a preallocated cluster merely has to
>> provide zeros to the rest of the write.  And depending on whether we
>> can intelligently guarantee that the underlying protocol already
>> reads as zeroes when preallocated, we even have an optimization where
>> even that is not necessary.  We can still lump it in the "COW"
>> terminology, in that our write is more complex than merely writing in
>> place, but it isn't a true copy-on-write operation as there is
>> nothing to be copied.
>
> The term “COW” specifically in the qcow2 driver also refers to having
> to write zeroes to an area that isn’t written to by the guest as part
> of the process of having to allocate a (sub)cluster.

The question is valid: if the space for the clusters is allocated but
the subclusters are not marked as such then any partial write request
will need to fill the rest with zeroes (in practice handle_alloc_space()
can do that efficiently but that's another question).

If there is a backing file then there's no other alternative because we
do need to copy the data from the backing file.

If there is no backing file perhaps we could allocate all subclusters as
well. I suppose we can detect that scenario at that point in the code (I
haven't checked) and I don't know what would happen if one later
attaches a backing file on runtime using the command-line options.

But what I would argue is that I don't see the benefit of using extended
L2 entries on an preallocated image with no backing file: other than
having twice as much L2 metadata what would be the use? The point of
subclusters is that they make allocation more efficient, but if the
image is already fully allocated then they give you nothing.

Berto



Re: [PATCH v9 28/34] qcow2: Add subcluster support to qcow2_co_pwrite_zeroes()

2020-07-02 Thread Alberto Garcia
On Thu 02 Jul 2020 04:28:57 PM CEST, Max Reitz wrote:
>> +/* For full clusters use zero_in_l2_slice() instead */
>> +assert(nb_subclusters > 0 && nb_subclusters < 
>> s->subclusters_per_cluster);
>> +assert(sc + nb_subclusters <= s->subclusters_per_cluster);
>
> Maybe we should also assert that @offset is aligned to the subcluster
> size.

It doesn't hurt but the only caller already guarantees that already ...

>> @@ -4367,12 +4367,13 @@ static int coroutine_fn 
>> qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
>>  uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size);
>
> Can we instead align this to just subclusters?

I think so, good catch.

Berto



Re: [PATCH v3 01/19] .mailmap: Update Alexander Graf email address

2020-07-02 Thread Alexander Graf



> Am 02.07.2020 um 19:38 schrieb Philippe Mathieu-Daudé :
> 
> Update Alexander Graf email address to avoid emails bouncing.
> 
> Suggested-by: Alexander Graf 
> Signed-off-by: Philippe Mathieu-Daudé 

Thanks a lot!

Reviewed-by: Alexander Graf 

Alex

> ---
> .mailmap | 1 +
> 1 file changed, 1 insertion(+)
> 
> diff --git a/.mailmap b/.mailmap
> index 926cac6bb8..e46e6c7302 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -44,6 +44,7 @@ Aleksandar Markovic  
>  Aleksandar Markovic  
> Aleksandar Rikalo  
> Aleksandar Rikalo  
> +Alexander Graf  
> Anthony Liguori  Anthony Liguori 
> Filip Bozuta  
> Frederic Konrad  
> -- 
> 2.21.3
> 



Re: [PATCH v9 21/34] qcow2: Add subcluster support to qcow2_get_host_offset()

2020-07-02 Thread Alberto Garcia
On Thu 02 Jul 2020 02:46:27 PM CEST, Max Reitz wrote:
>> -/* must be allocated */
>> -assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
>> -   first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
>> +assert(*l2_index + nb_clusters <= s->l2_size);
>
> Not l2_slice_size?

Oh, indeed!

>> +} else if (check_offset) {
>
> My gcc (v10.1.1) appears to be a bit daft, and so doesn’t recognize
> that check_offset must always be initialized before this line is hit.

Yeah I noticed that patchew complained, I'll fix that.

Berto



Re: [PATCH v9 14/34] qcow2: Add QCow2SubclusterType and qcow2_get_subcluster_type()

2020-07-02 Thread Alberto Garcia
On Thu 02 Jul 2020 11:57:46 AM CEST, Max Reitz wrote:
>> The reason why we would want to check it is, of course, because that
>> bit does have a meaning in regular L2 entries.
>> 
>> But that bit is ignored in images with subclusters so the only reason
>> why we would check it is to report corruption, not because we need to
>> know its value.
>
> Sure.  But isn’t that the whole point of having
> QCOW2_SUBCLUSTER_INVALID in the first place?

At the moment we're only returning QCOW2_SUBCLUSTER_INVALID in cases
where there is no way to interpret the entry correctly: a) the
allocation and zero bits are set for the same subcluster, and b) the
allocation bit is set but the entry has no valid offset.

It doesn't mean that we cannot use _SUBCLUSTER_INVALID for cases like
the one we're discussing, but this one is different from the other two.

Berto



Re: [PATCH] tcg: Fix do_nonatomic_op_* vs signed operations

2020-07-02 Thread Alistair Francis
On Wed, Jul 1, 2020 at 10:00 AM Richard Henderson
 wrote:
>
> The smin/smax/umin/umax operations require the operands to be
> properly sign extended.  Do not drop the MO_SIGN bit from the
> load, and additionally extend the val input.
>
> Reported-by: LIU Zhiwei 
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Applied to riscv-to-apply.next

Alistair

> ---
>  tcg/tcg-op.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index e60b74fb82..4b8a473fad 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -3189,8 +3189,9 @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv 
> addr, TCGv_i32 val,
>
>  memop = tcg_canonicalize_memop(memop, 0, 0);
>
> -tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
> -gen(t2, t1, val);
> +tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
> +tcg_gen_ext_i32(t2, val, memop);
> +gen(t2, t1, t2);
>  tcg_gen_qemu_st_i32(t2, addr, idx, memop);
>
>  tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
> @@ -3232,8 +3233,9 @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv 
> addr, TCGv_i64 val,
>
>  memop = tcg_canonicalize_memop(memop, 1, 0);
>
> -tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
> -gen(t2, t1, val);
> +tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
> +tcg_gen_ext_i64(t2, val, memop);
> +gen(t2, t1, t2);
>  tcg_gen_qemu_st_i64(t2, addr, idx, memop);
>
>  tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
> --
> 2.25.1
>
>



Re: [PATCH] configure / util: Auto-detect the availability of openpty()

2020-07-02 Thread Michele Denber

On 07/02/20 13:34, Thomas Huth wrote:


That's just the context, cut at three lines after the last change. 
Simply try to apply the patch with "patch -p1 -i ..." or "git am" in a 
separate git branch if you're using git. It should hopefully apply 
cleanly.


Looks like it partly worked:

root@hemlock:~/qemu-5.0.0# gpatch -p1 -i < configpatch.diff
gpatch: option requires an argument -- 'i'
gpatch: Try 'gpatch --help' for more information.
root@hemlock:~/qemu-5.0.0# gpatch -p1 < configpatch.diff
patching file configure
Hunk #1 succeeded at 5049 (offset -85 lines).
Hunk #2 succeeded at 7117 (offset -267 lines).
patching file util/qemu-openpty.c
Hunk #1 FAILED at 52.
Hunk #2 succeeded at 95 with fuzz 2 (offset 2 lines).
1 out of 2 hunks FAILED -- saving rejects to file util/qemu-openpty.c.rej
root@hemlock:~/qemu-5.0.0# cat util/qemu-openpty.c.rej
--- util/qemu-openpty.c
+++ util/qemu-openpty.c
@@ -52,7 +52,8 @@
 #endif

 #ifdef __sun__
-/* Once Solaris has openpty(), this is going to be removed. */
+
+#if !defined(HAVE_OPENPTY)
 static int openpty(int *amaster, int *aslave, char *name,
struct termios *termp, struct winsize *winp)
 {
root@hemlock:~/qemu-5.0.0#


- Michele



Re: [PATCH v4 00/21] microvm: add acpi support

2020-07-02 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200702204859.9876-1-kra...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v4 00/21] microvm: add acpi support
Type: series
Message-id: 20200702204859.9876-1-kra...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  patchew/20200702204859.9876-1-kra...@redhat.com -> 
patchew/20200702204859.9876-1-kra...@redhat.com
Switched to a new branch 'test'
1e173ac tests/acpi: update expected data files for microvm
7f4d75c tests/acpi: add microvm test
7933c7d tests/acpi: allow override blkdev
8246d8e tests/acpi: allow microvm test data updates.
09ff54c tests/acpi: clear bios-tables-test-allowed-diff.h
a6ac3a9 microvm: wire up hotplug
37fe132 x86: move cpu plug from pc to x86
18502ab x86: move acpi_dev from pc/microvm
e25eb4c x86: constify x86_machine_is_*_enabled
3144c4e microvm/acpi: disable virtio-mmio cmdline hack
488512b microvm/acpi: use seabios with acpi=on
4dce0b0 microvm/acpi: use GSI 16-23 for virtio
6c1e43a microvm/acpi: add acpi_dsdt_add_virtio() for x86
7e90f10 microvm/acpi: add minimal acpi support
91fb357 microvm: make virtio irq base runtime configurable
75d0e28 acpi: move acpi_dsdt_add_power_button() to ged
b1b3872 acpi: ged: add x86 device variant.
2f82b96 acpi: ged: add control regs
7b24784 seabios: add bios-microvm.bin binary
be06767 seabios: add microvm config, update build rules
30aa71d microvm: name qboot binary qboot.rom

=== OUTPUT BEGIN ===
1/21 Checking commit 30aa71d445e7 (microvm: name qboot binary qboot.rom)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#36: 
rename from pc-bios/bios-microvm.bin

total: 0 errors, 1 warnings, 35 lines checked

Patch 1/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
2/21 Checking commit be0676767e5e (seabios: add microvm config, update build 
rules)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#29: 
new file mode 100644

total: 0 errors, 1 warnings, 39 lines checked

Patch 2/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/21 Checking commit 7b247849b125 (seabios: add bios-microvm.bin binary)
4/21 Checking commit 2f82b96fc4fb (acpi: ged: add control regs)
5/21 Checking commit b1b387286c9d (acpi: ged: add x86 device variant.)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

total: 0 errors, 1 warnings, 51 lines checked

Patch 5/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
6/21 Checking commit 75d0e28f4afe (acpi: move acpi_dsdt_add_power_button() to 
ged)
7/21 Checking commit 91fb3578211e (microvm: make virtio irq base runtime 
configurable)
8/21 Checking commit 7e90f10773b4 (microvm/acpi: add minimal acpi support)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#34: 
new file mode 100644

WARNING: Block comments use a leading /* on a separate line
#39: FILE: hw/i386/acpi-microvm.c:1:
+/* Support for generating ACPI tables and passing them to Guests

WARNING: Block comments use a leading /* on a separate line
#150: FILE: hw/i386/acpi-microvm.c:112:
+table_offsets = g_array_new(false, true /* clear */,

WARNING: Block comments use a leading /* on a separate line
#154: FILE: hw/i386/acpi-microvm.c:116:
+ 64 /* Ensure FACS is aligned */,

WARNING: Block comments use a leading /* on a separate line
#155: FILE: hw/i386/acpi-microvm.c:117:
+ false /* high memory */);

total: 0 errors, 5 warnings, 297 lines checked

Patch 8/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
9/21 Checking commit 6c1e43a0b904 (microvm/acpi: add acpi_dsdt_add_virtio() for 
x86)
10/21 Checking commit 4dce0b04d88f (microvm/acpi: use GSI 16-23 for virtio)
11/21 Checking commit 488512b60cfb (microvm/acpi: use seabios with acpi=on)
12/21 Checking commit 3144c4ef8a34 (microvm/acpi: disable virtio-mmio cmdline 
hack)
13/21 Checking commit e25eb4c6c1ca (x86: constify x86_machine_is_*_enabled)
14/21 Checking commit 18502ab064f4 (x86: move acpi_dev from pc/microvm)
15/21 Checking commit 37fe132c446b (x86: move cpu plug from pc to x86)
ERROR: space required before the open parenthesis '('
#344: FILE: hw/i386/x86.c:187:
+if(!object_dynamic_cast(OBJECT(cpu), 

Re: nvme emulation merge process

2020-07-02 Thread Keith Busch
On Thu, Jul 02, 2020 at 01:29:26PM -0700, Andrzej Jakowski wrote:
> 
> Thx! Of course I am interested in helping and I think it is actually great 
> idea to have couple of designated maintainers/reviewers as it would be easier
> for folks to receive feedback vs requesting it in polling manner :)
> And please don't get me wrong -- I'm not complaining about anything -- I
> think it is just reality that everybody is stretched out into multiple 
> directions
> struggling to allocate time for multiple things. Having many people will
> actually increase likelihood of introducing high quality improvements.
> 
> Also, +1 on separate tree for nvme emulation.

Thanks for your help.

Klaus and I will be setting up an external tree for qemu-nvme
development (tentatively on git.infradead.org) and pull-request. I'm
just waiting for the server admin to upload our public keys. If I don't
hear back by Monday, I will use an alternate server in the interim.



Re: [PATCH v4 00/21] microvm: add acpi support

2020-07-02 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200702204859.9876-1-kra...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v4 00/21] microvm: add acpi support
Type: series
Message-id: 20200702204859.9876-1-kra...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

From https://github.com/patchew-project/qemu
 * [new tag] patchew/20200702204859.9876-1-kra...@redhat.com -> 
patchew/20200702204859.9876-1-kra...@redhat.com
Switched to a new branch 'test'
6d5eb94 tests/acpi: update expected data files for microvm
bc45791 tests/acpi: add microvm test
c4b1e12 tests/acpi: allow override blkdev
afb44f2 tests/acpi: allow microvm test data updates.
ef356bd tests/acpi: clear bios-tables-test-allowed-diff.h
b80aa70 microvm: wire up hotplug
8f80d76 x86: move cpu plug from pc to x86
e3d689d x86: move acpi_dev from pc/microvm
2ca513e x86: constify x86_machine_is_*_enabled
dff3286 microvm/acpi: disable virtio-mmio cmdline hack
ee2b30f microvm/acpi: use seabios with acpi=on
ec0fefa microvm/acpi: use GSI 16-23 for virtio
35a5595 microvm/acpi: add acpi_dsdt_add_virtio() for x86
ef52f38 microvm/acpi: add minimal acpi support
9c5c1a3 microvm: make virtio irq base runtime configurable
dfbba95 acpi: move acpi_dsdt_add_power_button() to ged
9350314 acpi: ged: add x86 device variant.
7e7cbaa acpi: ged: add control regs
0d6d2dd seabios: add bios-microvm.bin binary
f847401 seabios: add microvm config, update build rules
1aa4e76 microvm: name qboot binary qboot.rom

=== OUTPUT BEGIN ===
1/21 Checking commit 1aa4e767856e (microvm: name qboot binary qboot.rom)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#36: 
rename from pc-bios/bios-microvm.bin

total: 0 errors, 1 warnings, 35 lines checked

Patch 1/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
2/21 Checking commit f84740183b7c (seabios: add microvm config, update build 
rules)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#29: 
new file mode 100644

total: 0 errors, 1 warnings, 39 lines checked

Patch 2/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/21 Checking commit 0d6d2ddc3815 (seabios: add bios-microvm.bin binary)
4/21 Checking commit 7e7cbaa1cf2a (acpi: ged: add control regs)
5/21 Checking commit 9350314a8c62 (acpi: ged: add x86 device variant.)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

total: 0 errors, 1 warnings, 51 lines checked

Patch 5/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
6/21 Checking commit dfbba956d1bc (acpi: move acpi_dsdt_add_power_button() to 
ged)
7/21 Checking commit 9c5c1a34efc7 (microvm: make virtio irq base runtime 
configurable)
8/21 Checking commit ef52f38efb11 (microvm/acpi: add minimal acpi support)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#34: 
new file mode 100644

WARNING: Block comments use a leading /* on a separate line
#39: FILE: hw/i386/acpi-microvm.c:1:
+/* Support for generating ACPI tables and passing them to Guests

WARNING: Block comments use a leading /* on a separate line
#150: FILE: hw/i386/acpi-microvm.c:112:
+table_offsets = g_array_new(false, true /* clear */,

WARNING: Block comments use a leading /* on a separate line
#154: FILE: hw/i386/acpi-microvm.c:116:
+ 64 /* Ensure FACS is aligned */,

WARNING: Block comments use a leading /* on a separate line
#155: FILE: hw/i386/acpi-microvm.c:117:
+ false /* high memory */);

total: 0 errors, 5 warnings, 297 lines checked

Patch 8/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
9/21 Checking commit 35a55956c365 (microvm/acpi: add acpi_dsdt_add_virtio() for 
x86)
10/21 Checking commit ec0fefa9351d (microvm/acpi: use GSI 16-23 for virtio)
11/21 Checking commit ee2b30f1d829 (microvm/acpi: use seabios with acpi=on)
12/21 Checking commit dff3286bf4a4 (microvm/acpi: disable virtio-mmio cmdline 
hack)
13/21 Checking commit 2ca513ea5407 (x86: constify x86_machine_is_*_enabled)
14/21 Checking commit e3d689d93ee0 (x86: move acpi_dev from pc/microvm)
15/21 Checking commit 8f80d768dea4 (x86: move cpu plug from pc to x86)
ERROR: space required before the open parenthesis '('
#344: FILE: hw/i386/x86.c:187:
+if(!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {

WARNING: Block comments use a leading /* on 

[PATCH v4 17/21] tests/acpi: clear bios-tables-test-allowed-diff.h

2020-07-02 Thread Gerd Hoffmann
Leftover from acpi cleanup series.

Signed-off-by: Gerd Hoffmann 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 18 --
 1 file changed, 18 deletions(-)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index 8992f1f12b77..dfb8523c8bf4 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,19 +1 @@
 /* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/pc/DSDT",
-"tests/data/acpi/pc/DSDT.acpihmat",
-"tests/data/acpi/pc/DSDT.bridge",
-"tests/data/acpi/pc/DSDT.cphp",
-"tests/data/acpi/pc/DSDT.dimmpxm",
-"tests/data/acpi/pc/DSDT.ipmikcs",
-"tests/data/acpi/pc/DSDT.memhp",
-"tests/data/acpi/pc/DSDT.numamem",
-"tests/data/acpi/q35/DSDT",
-"tests/data/acpi/q35/DSDT.acpihmat",
-"tests/data/acpi/q35/DSDT.bridge",
-"tests/data/acpi/q35/DSDT.cphp",
-"tests/data/acpi/q35/DSDT.dimmpxm",
-"tests/data/acpi/q35/DSDT.ipmibt",
-"tests/data/acpi/q35/DSDT.memhp",
-"tests/data/acpi/q35/DSDT.mmio64",
-"tests/data/acpi/q35/DSDT.numamem",
-"tests/data/acpi/q35/DSDT.tis",
-- 
2.18.4




[PATCH v4 10/21] microvm/acpi: use GSI 16-23 for virtio

2020-07-02 Thread Gerd Hoffmann
With ACPI enabled and IO-APIC being properly declared in the ACPI tables
we can use interrupt lines 16-23 for virtio and avoid shared interrupts.

With acpi disabled we continue to use lines 5-12.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Sergio Lopez 
Reviewed-by: Igor Mammedov 
---
 hw/i386/microvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 75eca7306b11..1bcd57a6fd80 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -125,7 +125,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 
 kvmclock_create();
 
-mms->virtio_irq_base = 5;
+mms->virtio_irq_base = x86_machine_is_acpi_enabled(x86ms) ? 16 : 5;
 for (i = 0; i < VIRTIO_NUM_TRANSPORTS; i++) {
 sysbus_create_simple("virtio-mmio",
  VIRTIO_MMIO_BASE + i * 512,
-- 
2.18.4




[PATCH v4 06/21] acpi: move acpi_dsdt_add_power_button() to ged

2020-07-02 Thread Gerd Hoffmann
Allow reuse for microvm.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Igor Mammedov 
---
 include/hw/acpi/generic_event_device.h | 1 +
 hw/acpi/generic_event_device.c | 8 
 hw/arm/virt-acpi-build.c   | 8 
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/hw/acpi/generic_event_device.h 
b/include/hw/acpi/generic_event_device.h
index 9687cb524258..ebed1ffe4231 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -113,5 +113,6 @@ typedef struct AcpiGedState {
 
 void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev,
uint32_t ged_irq, AmlRegionSpace rs, hwaddr ged_base);
+void acpi_dsdt_add_power_button(Aml *scope);
 
 #endif
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 491df80a5cc7..6df400e1ee16 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -142,6 +142,14 @@ void build_ged_aml(Aml *table, const char *name, 
HotplugHandler *hotplug_dev,
 aml_append(table, dev);
 }
 
+void acpi_dsdt_add_power_button(Aml *scope)
+{
+Aml *dev = aml_device(ACPI_POWER_BUTTON_DEVICE);
+aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C0C")));
+aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+aml_append(scope, dev);
+}
+
 /* Memory read by the GED _EVT AML dynamic method */
 static uint64_t ged_evt_read(void *opaque, hwaddr addr, unsigned size)
 {
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 1384a2cf2ab4..852eb6484b0e 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -357,14 +357,6 @@ static void acpi_dsdt_add_gpio(Aml *scope, const 
MemMapEntry *gpio_memmap,
 aml_append(scope, dev);
 }
 
-static void acpi_dsdt_add_power_button(Aml *scope)
-{
-Aml *dev = aml_device(ACPI_POWER_BUTTON_DEVICE);
-aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C0C")));
-aml_append(dev, aml_name_decl("_UID", aml_int(0)));
-aml_append(scope, dev);
-}
-
 static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
 {
 PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
-- 
2.18.4




[PATCH v4 11/21] microvm/acpi: use seabios with acpi=on

2020-07-02 Thread Gerd Hoffmann
With acpi=off continue to use qboot.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
---
 hw/i386/microvm.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 1bcd57a6fd80..ac605d92b984 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -52,6 +52,7 @@
 #include "hw/xen/start_info.h"
 
 #define MICROVM_QBOOT_FILENAME "qboot.rom"
+#define MICROVM_BIOS_FILENAME  "bios-microvm.bin"
 
 static void microvm_set_rtc(MicrovmMachineState *mms, ISADevice *s)
 {
@@ -174,7 +175,9 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 }
 
 if (bios_name == NULL) {
-bios_name = MICROVM_QBOOT_FILENAME;
+bios_name = x86_machine_is_acpi_enabled(x86ms)
+? MICROVM_BIOS_FILENAME
+: MICROVM_QBOOT_FILENAME;
 }
 x86_bios_rom_init(get_system_memory(), true);
 }
-- 
2.18.4




[Bug 1886097] [NEW] Error in user-mode calculation of ELF program's brk

2020-07-02 Thread Langston
Public bug reported:

There's a discrepancy between the way QEMU user-mode and Linux calculate
the initial program break for statically-linked binaries. I have a
binary with the following segments:

  Program Headers:
Type   Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
EXIDX  0x065a14 0x00075a14 0x00075a14 0x00588 0x00588 R   0x4
PHDR   0x0a3000 0x000a3000 0x000a3000 0x00160 0x00160 R   0x1000
LOAD   0x0a3000 0x000a3000 0x000a3000 0x00160 0x00160 R   0x1000
LOAD   0x00 0x0001 0x0001 0x65fa0 0x65fa0 R E 0x1
LOAD   0x066b7c 0x00086b7c 0x00086b7c 0x02384 0x02384 RW  0x1
NOTE   0x000114 0x00010114 0x00010114 0x00044 0x00044 R   0x4
TLS0x066b7c 0x00086b7c 0x00086b7c 0x00010 0x00030 R   0x4
GNU_STACK  0x00 0x 0x 0x0 0x0 RW  0x8
GNU_RELRO  0x066b7c 0x00086b7c 0x00086b7c 0x00484 0x00484 R   0x1
LOAD   0x07e000 0x00089000 0x00089000 0x03ff4 0x03ff4 R E 0x1000
LOAD   0x098000 0x0003 0x0003 0x01000 0x01000 RW  0x1000

The call to set_brk in Linux's binfmt_elf.c receives these arguments:

  set_brk(0xa3160, 0xa3160, 1)
  
Whereas in QEMU, info->brk gets set to 0x88f00. When the binary is run in QEMU, 
it crashes on the second call to brk, whereas it runs fine on real ARM 
hardware. I think the trouble is that the program break is set to an address 
lower than the virtual address of a LOAD segment (the program headers, in this 
case).

I believe that this discrepancy arises because in QEMU, info->brk is
only incremented when the LOAD segment in question has PROT_WRITE. For
this binary, the LOAD segment with write permissions and the highest
virtual address is

  LOAD   0x066b7c 0x00086b7c 0x00086b7c 0x02384 0x02384 RW  0x1

which overlaps with the TLS segment:

TLS0x066b7c 0x00086b7c 0x00086b7c 0x00010 0x00030 R   0x4

However, the Linux kernel puts the program break after the loadable segment 
with the highest virtual address, regardless of flags. So I think the fix is 
for QEMU to do the same.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1886097

Title:
  Error in user-mode calculation of ELF program's brk

Status in QEMU:
  New

Bug description:
  There's a discrepancy between the way QEMU user-mode and Linux
  calculate the initial program break for statically-linked binaries. I
  have a binary with the following segments:

Program Headers:
  Type   Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  EXIDX  0x065a14 0x00075a14 0x00075a14 0x00588 0x00588 R   0x4
  PHDR   0x0a3000 0x000a3000 0x000a3000 0x00160 0x00160 R   0x1000
  LOAD   0x0a3000 0x000a3000 0x000a3000 0x00160 0x00160 R   0x1000
  LOAD   0x00 0x0001 0x0001 0x65fa0 0x65fa0 R E 0x1
  LOAD   0x066b7c 0x00086b7c 0x00086b7c 0x02384 0x02384 RW  0x1
  NOTE   0x000114 0x00010114 0x00010114 0x00044 0x00044 R   0x4
  TLS0x066b7c 0x00086b7c 0x00086b7c 0x00010 0x00030 R   0x4
  GNU_STACK  0x00 0x 0x 0x0 0x0 RW  0x8
  GNU_RELRO  0x066b7c 0x00086b7c 0x00086b7c 0x00484 0x00484 R   0x1
  LOAD   0x07e000 0x00089000 0x00089000 0x03ff4 0x03ff4 R E 0x1000
  LOAD   0x098000 0x0003 0x0003 0x01000 0x01000 RW  0x1000

  The call to set_brk in Linux's binfmt_elf.c receives these arguments:

set_brk(0xa3160, 0xa3160, 1)

  Whereas in QEMU, info->brk gets set to 0x88f00. When the binary is run in 
QEMU, it crashes on the second call to brk, whereas it runs fine on real ARM 
hardware. I think the trouble is that the program break is set to an address 
lower than the virtual address of a LOAD segment (the program headers, in this 
case).

  I believe that this discrepancy arises because in QEMU, info->brk is
  only incremented when the LOAD segment in question has PROT_WRITE. For
  this binary, the LOAD segment with write permissions and the highest
  virtual address is

LOAD   0x066b7c 0x00086b7c 0x00086b7c 0x02384 0x02384 RW  0x1
  
  which overlaps with the TLS segment:

  TLS0x066b7c 0x00086b7c 0x00086b7c 0x00010 0x00030 R   0x4
  
  However, the Linux kernel puts the program break after the loadable segment 
with the highest virtual address, regardless of flags. So I think the fix is 
for QEMU to do the same.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1886097/+subscriptions



[PATCH v4 05/21] acpi: ged: add x86 device variant.

2020-07-02 Thread Gerd Hoffmann
Set AcpiDeviceIfClass->madt_cpu,
otherwise identical to TYPE_ACPI_GED.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
---
 include/hw/acpi/generic_event_device.h |  4 +++
 hw/i386/generic_event_device_x86.c | 36 ++
 hw/i386/Makefile.objs  |  1 +
 3 files changed, 41 insertions(+)
 create mode 100644 hw/i386/generic_event_device_x86.c

diff --git a/include/hw/acpi/generic_event_device.h 
b/include/hw/acpi/generic_event_device.h
index 474c92198080..9687cb524258 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -69,6 +69,10 @@
 #define ACPI_GED(obj) \
 OBJECT_CHECK(AcpiGedState, (obj), TYPE_ACPI_GED)
 
+#define TYPE_ACPI_GED_X86 "acpi-ged-x86"
+#define ACPI_GED_X86(obj) \
+OBJECT_CHECK(AcpiGedX86State, (obj), TYPE_ACPI_GED_X86)
+
 #define ACPI_GED_EVT_SEL_OFFSET0x0
 #define ACPI_GED_EVT_SEL_LEN   0x4
 
diff --git a/hw/i386/generic_event_device_x86.c 
b/hw/i386/generic_event_device_x86.c
new file mode 100644
index ..e26fb02a2ef6
--- /dev/null
+++ b/hw/i386/generic_event_device_x86.c
@@ -0,0 +1,36 @@
+/*
+ * x86 variant of the generic event device for hw reduced acpi
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/acpi/generic_event_device.h"
+#include "hw/i386/pc.h"
+
+static void acpi_ged_x86_class_init(ObjectClass *class, void *data)
+{
+AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class);
+
+adevc->madt_cpu = pc_madt_cpu_entry;
+}
+
+static const TypeInfo acpi_ged_x86_info = {
+.name  = TYPE_ACPI_GED_X86,
+.parent= TYPE_ACPI_GED,
+.class_init= acpi_ged_x86_class_init,
+.interfaces = (InterfaceInfo[]) {
+{ TYPE_HOTPLUG_HANDLER },
+{ TYPE_ACPI_DEVICE_IF },
+{ }
+}
+};
+
+static void acpi_ged_x86_register_types(void)
+{
+type_register_static(_ged_x86_info);
+}
+
+type_init(acpi_ged_x86_register_types)
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 6abc74551a72..622739305882 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -17,4 +17,5 @@ obj-$(CONFIG_PC) += port92.o
 
 obj-y += kvmvapic.o
 obj-$(CONFIG_ACPI) += acpi-common.o
+obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device_x86.o
 obj-$(CONFIG_PC) += acpi-build.o
-- 
2.18.4




[PATCH v4 15/21] x86: move cpu plug from pc to x86

2020-07-02 Thread Gerd Hoffmann
The cpu hotplug code handles the initialization of coldplugged cpus
too, so it is needed even in case cpu hotplug is not supported.

Move the code from pc to x86, so microvm can use it.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/i386/x86.h |   6 ++
 hw/i386/pc.c  | 234 ++
 hw/i386/x86.c | 222 +++
 3 files changed, 234 insertions(+), 228 deletions(-)

diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index de74c831c3ab..23c964471802 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -102,6 +102,12 @@ CpuInstanceProperties x86_cpu_index_to_props(MachineState 
*ms,
  unsigned cpu_index);
 int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx);
 const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms);
+CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
+void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
+void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
+  DeviceState *dev, Error **errp);
+void x86_cpu_plug(HotplugHandler *hotplug_dev,
+  DeviceState *dev, Error **errp);
 
 void x86_bios_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw);
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 98d29ead09b0..14036fcd4e3a 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -798,19 +798,6 @@ void pc_hot_add_cpu(MachineState *ms, const int64_t id, 
Error **errp)
 }
 }
 
-static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count)
-{
-if (cpus_count > 0xff) {
-/* If the number of CPUs can't be represented in 8 bits, the
- * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just
- * to make old BIOSes fail more predictably.
- */
-rtc_set_memory(rtc, 0x5f, 0);
-} else {
-rtc_set_memory(rtc, 0x5f, cpus_count - 1);
-}
-}
-
 static
 void pc_machine_done(Notifier *notifier, void *data)
 {
@@ -820,7 +807,7 @@ void pc_machine_done(Notifier *notifier, void *data)
 PCIBus *bus = pcms->bus;
 
 /* set the number of CPUs */
-rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
+x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
 
 if (bus) {
 int extra_hosts = 0;
@@ -1373,62 +1360,6 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev,
 error_propagate(errp, local_err);
 }
 
-static int pc_apic_cmp(const void *a, const void *b)
-{
-   CPUArchId *apic_a = (CPUArchId *)a;
-   CPUArchId *apic_b = (CPUArchId *)b;
-
-   return apic_a->arch_id - apic_b->arch_id;
-}
-
-/* returns pointer to CPUArchId descriptor that matches CPU's apic_id
- * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no
- * entry corresponding to CPU's apic_id returns NULL.
- */
-static CPUArchId *pc_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
-{
-CPUArchId apic_id, *found_cpu;
-
-apic_id.arch_id = id;
-found_cpu = bsearch(_id, ms->possible_cpus->cpus,
-ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus),
-pc_apic_cmp);
-if (found_cpu && idx) {
-*idx = found_cpu - ms->possible_cpus->cpus;
-}
-return found_cpu;
-}
-
-static void pc_cpu_plug(HotplugHandler *hotplug_dev,
-DeviceState *dev, Error **errp)
-{
-CPUArchId *found_cpu;
-Error *local_err = NULL;
-X86CPU *cpu = X86_CPU(dev);
-PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
-
-if (x86ms->acpi_dev) {
-hotplug_handler_plug(x86ms->acpi_dev, dev, _err);
-if (local_err) {
-goto out;
-}
-}
-
-/* increment the number of CPUs */
-x86ms->boot_cpus++;
-if (x86ms->rtc) {
-rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
-}
-if (x86ms->fw_cfg) {
-fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
-}
-
-found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
-found_cpu->cpu = OBJECT(dev);
-out:
-error_propagate(errp, local_err);
-}
 static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
  DeviceState *dev, Error **errp)
 {
@@ -1443,7 +1374,7 @@ static void pc_cpu_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 goto out;
 }
 
-pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, );
+x86_find_cpu_slot(MACHINE(pcms), cpu->apic_id, );
 assert(idx != -1);
 if (idx == 0) {
 error_setg(_err, "Boot CPU is unpluggable");
@@ -1475,172 +1406,19 @@ static void pc_cpu_unplug_cb(HotplugHandler 
*hotplug_dev,
 goto out;
 }
 
-found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
+found_cpu = x86_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
 found_cpu->cpu = NULL;
 qdev_unrealize(dev);
 
 /* decrement the number of CPUs */
 x86ms->boot_cpus--;
 /* Update the 

[PATCH v4 00/21] microvm: add acpi support

2020-07-02 Thread Gerd Hoffmann
I know that not supporting ACPI in microvm is intentional.  If you still
don't want ACPI this is perfectly fine, you can use the usual -no-acpi
switch to toggle ACPI support.

These are the advantages you are going to loose then:

  (1) virtio-mmio device discovery without command line hacks (tweaking
  the command line is a problem when not using direct kernel boot).
  (2) Better IO-APIC support, we can use IRQ lines 16-23.
  (3) ACPI power button (aka powerdown request) works.
  (4) machine poweroff (aka S5 state) works.

Together with seabios patches for virtio-mmio support this allows to
boot standard fedora images (cloud, coreos, workstation live) with the
microvm machine type.

git branch for testing (including updated seabios):
https://git.kraxel.org/cgit/qemu/log/?h=sirius/microvm

changes in v2:
  * some acpi cleanups are an separate patch series now.
  * switched to hw reduced acpi & generic event device.
  * misc fixes here and there.

changes in v3:
  * depeds on "[PATCH v6 00/16] acpi: i386 tweaks" series.
  * renamed qboot to qboot.bin
  * updated seabios to master branch snapshot.
- this version boots fine with rtc=off
  * generic event device tweaks (Igor's comments).
  * make SMP work.
  * add RfC patches to turn off acpi by default for microvm.
  * misc fixes here and there.

changes in v4
  * rebase to latest master
- this also depends on the pending seabios update
  * drop some patches which got cherry-picked.
  * wire up cpu hotplug (also does coldplug cpu init).
  * add microvm acpi test case.
  * dropped RfC patches to turn off acpi by default for microvm.
  * misc fixes here and there.

take care,
  Gerd

Gerd Hoffmann (21):
  microvm: name qboot binary qboot.rom
  seabios: add microvm config, update build rules
  seabios: add bios-microvm.bin binary
  acpi: ged: add control regs
  acpi: ged: add x86 device variant.
  acpi: move acpi_dsdt_add_power_button() to ged
  microvm: make virtio irq base runtime configurable
  microvm/acpi: add minimal acpi support
  microvm/acpi: add acpi_dsdt_add_virtio() for x86
  microvm/acpi: use GSI 16-23 for virtio
  microvm/acpi: use seabios with acpi=on
  microvm/acpi: disable virtio-mmio cmdline hack
  x86: constify x86_machine_is_*_enabled
  x86: move acpi_dev from pc/microvm
  x86: move cpu plug from pc to x86
  microvm: wire up hotplug
  tests/acpi: clear bios-tables-test-allowed-diff.h
  tests/acpi: allow microvm test data updates.
  tests/acpi: allow override blkdev
  tests/acpi: add microvm test
  tests/acpi: update expected data files for microvm

 hw/i386/acpi-microvm.h  |   8 +
 include/hw/acpi/generic_event_device.h  |  12 +
 include/hw/i386/microvm.h   |  10 +-
 include/hw/i386/pc.h|   1 -
 include/hw/i386/x86.h   |  11 +-
 tests/qtest/bios-tables-test-allowed-diff.h |  18 --
 hw/acpi/generic_event_device.c  |  52 
 hw/arm/virt-acpi-build.c|   8 -
 hw/i386/acpi-build.c|   2 +-
 hw/i386/acpi-microvm.c  | 234 ++
 hw/i386/generic_event_device_x86.c  |  36 +++
 hw/i386/microvm.c   | 105 +++-
 hw/i386/pc.c| 260 ++--
 hw/i386/pc_piix.c   |   2 +-
 hw/i386/pc_q35.c|   2 +-
 hw/i386/x86.c   | 226 -
 tests/qtest/bios-tables-test.c  |  21 +-
 hw/i386/Kconfig |   1 +
 hw/i386/Makefile.objs   |   2 +
 pc-bios/bios-microvm.bin| Bin 65536 -> 131072 bytes
 pc-bios/qboot.rom   | Bin 0 -> 65536 bytes
 roms/Makefile   |  11 +-
 roms/config.seabios-microvm |  26 ++
 tests/data/acpi/microvm/APIC| Bin 0 -> 70 bytes
 tests/data/acpi/microvm/DSDT| Bin 0 -> 365 bytes
 tests/data/acpi/microvm/FACP| Bin 0 -> 268 bytes
 26 files changed, 761 insertions(+), 287 deletions(-)
 create mode 100644 hw/i386/acpi-microvm.h
 create mode 100644 hw/i386/acpi-microvm.c
 create mode 100644 hw/i386/generic_event_device_x86.c
 create mode 100644 pc-bios/qboot.rom
 create mode 100644 roms/config.seabios-microvm
 create mode 100644 tests/data/acpi/microvm/APIC
 create mode 100644 tests/data/acpi/microvm/DSDT
 create mode 100644 tests/data/acpi/microvm/FACP

-- 
2.18.4




[PATCH v4 20/21] tests/acpi: add microvm test

2020-07-02 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 tests/qtest/bios-tables-test.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 24b715dce780..b5b98d5c0742 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -1007,6 +1007,20 @@ static void test_acpi_virt_tcg_memhp(void)
 
 }
 
+static void test_acpi_microvm_tcg(void)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = "microvm";
+data.required_struct_types = base_required_struct_types;
+data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
+data.blkdev = "virtio-blk-device";
+test_acpi_one(" -machine microvm,acpi=on,rtc=off",
+  );
+free_test_data();
+}
+
 static void test_acpi_virt_tcg_numamem(void)
 {
 test_data data = {
@@ -1118,6 +1132,7 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
 qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat);
 qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat);
+qtest_add_func("acpi/microvm", test_acpi_microvm_tcg);
 } else if (strcmp(arch, "aarch64") == 0) {
 qtest_add_func("acpi/virt", test_acpi_virt_tcg);
 qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem);
-- 
2.18.4




[PATCH v4 12/21] microvm/acpi: disable virtio-mmio cmdline hack

2020-07-02 Thread Gerd Hoffmann
... in case we are using ACPI.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Igor Mammedov 
---
 hw/i386/microvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index ac605d92b984..fbda323f3b2a 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -343,7 +343,8 @@ static void microvm_machine_reset(MachineState *machine)
 CPUState *cs;
 X86CPU *cpu;
 
-if (machine->kernel_filename != NULL &&
+if (!x86_machine_is_acpi_enabled(X86_MACHINE(machine)) &&
+machine->kernel_filename != NULL &&
 mms->auto_kernel_cmdline && !mms->kernel_cmdline_fixed) {
 microvm_fix_kernel_cmdline(machine);
 mms->kernel_cmdline_fixed = true;
-- 
2.18.4




[PATCH v4 19/21] tests/acpi: allow override blkdev

2020-07-02 Thread Gerd Hoffmann
microvm needs virtio-blk instead of ide.

Signed-off-by: Gerd Hoffmann 
---
 tests/qtest/bios-tables-test.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index c315156858f4..24b715dce780 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -72,6 +72,7 @@ typedef struct {
 const char *variant;
 const char *uefi_fl1;
 const char *uefi_fl2;
+const char *blkdev;
 const char *cd;
 const uint64_t ram_start;
 const uint64_t scan_len;
@@ -635,9 +636,10 @@ static void test_acpi_one(const char *params, test_data 
*data)
 args = g_strdup_printf("-machine %s,kernel-irqchip=off %s -accel tcg "
 "-net none -display none %s "
 "-drive id=hd0,if=none,file=%s,format=raw "
-"-device ide-hd,drive=hd0 ",
+"-device %s,drive=hd0 ",
  data->machine, data->tcg_only ? "" : "-accel kvm",
- params ? params : "", disk);
+ params ? params : "", disk,
+ data->blkdev ?: "ide-hd");
 }
 
 data->qts = qtest_init(args);
-- 
2.18.4




[PATCH v4 09/21] microvm/acpi: add acpi_dsdt_add_virtio() for x86

2020-07-02 Thread Gerd Hoffmann
Makes x86 linux kernel find virtio-mmio devices automatically.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Sergio Lopez 
Reviewed-by: Igor Mammedov 
---
 hw/i386/acpi-microvm.c | 52 ++
 1 file changed, 52 insertions(+)

diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c
index 7cd830e628af..20f297517457 100644
--- a/hw/i386/acpi-microvm.c
+++ b/hw/i386/acpi-microvm.c
@@ -21,6 +21,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qapi/error.h"
 
 #include "exec/memory.h"
@@ -32,10 +33,60 @@
 #include "hw/boards.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/i386/microvm.h"
+#include "hw/virtio/virtio-mmio.h"
 
 #include "acpi-common.h"
 #include "acpi-microvm.h"
 
+static void acpi_dsdt_add_virtio(Aml *scope,
+ MicrovmMachineState *mms)
+{
+gchar *separator;
+long int index;
+BusState *bus;
+BusChild *kid;
+
+bus = sysbus_get_default();
+QTAILQ_FOREACH(kid, >children, sibling) {
+DeviceState *dev = kid->child;
+Object *obj = object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MMIO);
+
+if (obj) {
+VirtIOMMIOProxy *mmio = VIRTIO_MMIO(obj);
+VirtioBusState *mmio_virtio_bus = >bus;
+BusState *mmio_bus = _virtio_bus->parent_obj;
+
+if (QTAILQ_EMPTY(_bus->children)) {
+continue;
+}
+separator = g_strrstr(mmio_bus->name, ".");
+if (!separator) {
+continue;
+}
+if (qemu_strtol(separator + 1, NULL, 10, ) != 0) {
+continue;
+}
+
+uint32_t irq = mms->virtio_irq_base + index;
+hwaddr base = VIRTIO_MMIO_BASE + index * 512;
+hwaddr size = 512;
+
+Aml *dev = aml_device("VR%02u", (unsigned)index);
+aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005")));
+aml_append(dev, aml_name_decl("_UID", aml_int(index)));
+aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
+aml_append(crs,
+   aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+ AML_EXCLUSIVE, , 1));
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+}
+}
+}
+
 static void
 build_dsdt_microvm(GArray *table_data, BIOSLinker *linker,
MicrovmMachineState *mms)
@@ -60,6 +111,7 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker,
 build_ged_aml(sb_scope, GED_DEVICE, HOTPLUG_HANDLER(mms->acpi_dev),
   GED_MMIO_IRQ, AML_SYSTEM_MEMORY, GED_MMIO_BASE);
 acpi_dsdt_add_power_button(sb_scope);
+acpi_dsdt_add_virtio(sb_scope, mms);
 aml_append(dsdt, sb_scope);
 
 scope = aml_scope("\\");
-- 
2.18.4




[PATCH v4 18/21] tests/acpi: allow microvm test data updates.

2020-07-02 Thread Gerd Hoffmann
Also add empty test data files.

Signed-off-by: Gerd Hoffmann 
---
 tests/qtest/bios-tables-test-allowed-diff.h | 3 +++
 tests/data/acpi/microvm/APIC| 0
 tests/data/acpi/microvm/DSDT| 0
 tests/data/acpi/microvm/FACP| 0
 4 files changed, 3 insertions(+)
 create mode 100644 tests/data/acpi/microvm/APIC
 create mode 100644 tests/data/acpi/microvm/DSDT
 create mode 100644 tests/data/acpi/microvm/FACP

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8bf4..97c3fa621b7f 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,4 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/microvm/APIC",
+"tests/data/acpi/microvm/DSDT",
+"tests/data/acpi/microvm/FACP",
diff --git a/tests/data/acpi/microvm/APIC b/tests/data/acpi/microvm/APIC
new file mode 100644
index ..e69de29bb2d1
diff --git a/tests/data/acpi/microvm/DSDT b/tests/data/acpi/microvm/DSDT
new file mode 100644
index ..e69de29bb2d1
diff --git a/tests/data/acpi/microvm/FACP b/tests/data/acpi/microvm/FACP
new file mode 100644
index ..e69de29bb2d1
-- 
2.18.4




[PATCH v4 13/21] x86: constify x86_machine_is_*_enabled

2020-07-02 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 include/hw/i386/x86.h | 4 ++--
 hw/i386/x86.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index b79f24e28545..a350ea3609f5 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -110,8 +110,8 @@ void x86_load_linux(X86MachineState *x86ms,
 bool pvh_enabled,
 bool linuxboot_dma_enabled);
 
-bool x86_machine_is_smm_enabled(X86MachineState *x86ms);
-bool x86_machine_is_acpi_enabled(X86MachineState *x86ms);
+bool x86_machine_is_smm_enabled(const X86MachineState *x86ms);
+bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms);
 
 /* Global System Interrupts */
 
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 34229b45c759..614723afbd9d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -846,7 +846,7 @@ void x86_bios_rom_init(MemoryRegion *rom_memory, bool 
isapc_ram_fw)
 bios);
 }
 
-bool x86_machine_is_smm_enabled(X86MachineState *x86ms)
+bool x86_machine_is_smm_enabled(const X86MachineState *x86ms)
 {
 bool smm_available = false;
 
@@ -888,7 +888,7 @@ static void x86_machine_set_smm(Object *obj, Visitor *v, 
const char *name,
 visit_type_OnOffAuto(v, name, >smm, errp);
 }
 
-bool x86_machine_is_acpi_enabled(X86MachineState *x86ms)
+bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms)
 {
 if (x86ms->acpi == ON_OFF_AUTO_OFF) {
 return false;
-- 
2.18.4




[PATCH v4 16/21] microvm: wire up hotplug

2020-07-02 Thread Gerd Hoffmann
The cpu hotplug code handles the initialization of coldplugged cpus
too, so it is needed even in case cpu hotplug is not supported.

Wire cpu hotplug up for microvm.
Without this we get a broken MADT table.

Signed-off-by: Gerd Hoffmann 
---
 hw/i386/microvm.c | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 3d8a66cfc3ac..a5b16b728f9f 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -320,6 +320,39 @@ static void microvm_fix_kernel_cmdline(MachineState 
*machine)
 g_free(cmdline);
 }
 
+static void microvm_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+   DeviceState *dev, Error **errp)
+{
+x86_cpu_pre_plug(hotplug_dev, dev, errp);
+}
+
+static void microvm_device_plug_cb(HotplugHandler *hotplug_dev,
+   DeviceState *dev, Error **errp)
+{
+x86_cpu_plug(hotplug_dev, dev, errp);
+}
+
+static void microvm_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+error_setg(errp, "unplug not supported by microvm");
+}
+
+static void microvm_device_unplug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+error_setg(errp, "unplug not supported by microvm");
+}
+
+static HotplugHandler *microvm_get_hotplug_handler(MachineState *machine,
+   DeviceState *dev)
+{
+if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+return HOTPLUG_HANDLER(machine);
+}
+return NULL;
+}
+
 static void microvm_machine_state_init(MachineState *machine)
 {
 MicrovmMachineState *mms = MICROVM_MACHINE(machine);
@@ -503,6 +536,7 @@ static void microvm_machine_initfn(Object *obj)
 static void microvm_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
+HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
 mc->init = microvm_machine_state_init;
 
@@ -523,6 +557,13 @@ static void microvm_class_init(ObjectClass *oc, void *data)
 /* Machine class handlers */
 mc->reset = microvm_machine_reset;
 
+/* hotplug (for cpu coldplug) */
+mc->get_hotplug_handler = microvm_get_hotplug_handler;
+hc->pre_plug = microvm_device_pre_plug_cb;
+hc->plug = microvm_device_plug_cb;
+hc->unplug_request = microvm_device_unplug_request_cb;
+hc->unplug = microvm_device_unplug_cb;
+
 object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto",
   microvm_machine_get_pic,
   microvm_machine_set_pic,
@@ -572,6 +613,7 @@ static const TypeInfo microvm_machine_info = {
 .class_size= sizeof(MicrovmMachineClass),
 .class_init= microvm_class_init,
 .interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
  { }
 },
 };
-- 
2.18.4




[PATCH v4 02/21] seabios: add microvm config, update build rules

2020-07-02 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 roms/Makefile   |  5 -
 roms/config.seabios-microvm | 26 ++
 2 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 roms/config.seabios-microvm

diff --git a/roms/Makefile b/roms/Makefile
index b185c880541c..1a9d14674464 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -72,9 +72,12 @@ default help:
@echo "  clean  -- delete the files generated by the 
previous" \
  "build targets"
 
-bios: build-seabios-config-seabios-128k build-seabios-config-seabios-256k
+bios: build-seabios-config-seabios-128k \
+   build-seabios-config-seabios-256k \
+   build-seabios-config-seabios-microvm
cp seabios/builds/seabios-128k/bios.bin ../pc-bios/bios.bin
cp seabios/builds/seabios-256k/bios.bin ../pc-bios/bios-256k.bin
+   cp seabios/builds/seabios-microvm/bios.bin ../pc-bios/bios-microvm.bin
 
 vgabios seavgabios: $(patsubst %,seavgabios-%,$(vgabios_variants))
 
diff --git a/roms/config.seabios-microvm b/roms/config.seabios-microvm
new file mode 100644
index ..a253e2edc6ec
--- /dev/null
+++ b/roms/config.seabios-microvm
@@ -0,0 +1,26 @@
+CONFIG_QEMU=y
+CONFIG_QEMU_HARDWARE=y
+CONFIG_PERMIT_UNALIGNED_PCIROM=y
+CONFIG_ROM_SIZE=128
+CONFIG_XEN=n
+CONFIG_BOOTSPLASH=n
+CONFIG_ATA=n
+CONFIG_AHCI=n
+CONFIG_SDCARD=n
+CONFIG_PVSCSI=n
+CONFIG_ESP_SCSI=n
+CONFIG_LSI_SCSI=n
+CONFIG_MEGASAS=n
+CONFIG_MPT_SCSI=n
+CONFIG_FLOPPY=n
+CONFIG_FLASH_FLOPPY=n
+CONFIG_NVME=n
+CONFIG_PS2PORT=n
+CONFIG_USB=n
+CONFIG_LPT=n
+CONFIG_RTC_TIMER=n
+CONFIG_USE_SMM=n
+CONFIG_PMTIMER=n
+CONFIG_TCGBIOS=n
+CONFIG_HARDWARE_IRQ=n
+CONFIG_ACPI_PARSE=y
-- 
2.18.4




[PATCH v4 14/21] x86: move acpi_dev from pc/microvm

2020-07-02 Thread Gerd Hoffmann
Both pc and microvm machine types have a acpi_dev field.
Move it to the common base type.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/i386/microvm.h |  1 -
 include/hw/i386/pc.h  |  1 -
 include/hw/i386/x86.h |  1 +
 hw/i386/acpi-build.c  |  2 +-
 hw/i386/acpi-microvm.c|  5 +++--
 hw/i386/microvm.c | 10 ++
 hw/i386/pc.c  | 34 +++---
 hw/i386/pc_piix.c |  2 +-
 hw/i386/pc_q35.c  |  2 +-
 9 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
index b6e0d4395af7..b8ec99aeb051 100644
--- a/include/hw/i386/microvm.h
+++ b/include/hw/i386/microvm.h
@@ -66,7 +66,6 @@ typedef struct {
 bool kernel_cmdline_fixed;
 Notifier machine_done;
 Notifier powerdown_req;
-AcpiDeviceIf *acpi_dev;
 } MicrovmMachineState;
 
 #define TYPE_MICROVM_MACHINE   MACHINE_TYPE_NAME("microvm")
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index dce1273c7dad..5e4126fe817d 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -29,7 +29,6 @@ struct PCMachineState {
 Notifier machine_done;
 
 /* Pointers to devices and objects: */
-HotplugHandler *acpi_dev;
 PCIBus *bus;
 I2CBus *smbus;
 PFlashCFI01 *flash[2];
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index a350ea3609f5..de74c831c3ab 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -50,6 +50,7 @@ typedef struct {
 FWCfgState *fw_cfg;
 qemu_irq *gsi;
 GMappedFile *initrd_mapped_file;
+HotplugHandler *acpi_dev;
 
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index b7bc2a35..c356cc71fe08 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2431,7 +2431,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
 
 acpi_add_table(table_offsets, tables_blob);
 acpi_build_madt(tables_blob, tables->linker, x86ms,
-ACPI_DEVICE_IF(pcms->acpi_dev), true);
+ACPI_DEVICE_IF(x86ms->acpi_dev), true);
 
 vmgenid_dev = find_vmgenid_dev();
 if (vmgenid_dev) {
diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c
index 20f297517457..5856b37a06db 100644
--- a/hw/i386/acpi-microvm.c
+++ b/hw/i386/acpi-microvm.c
@@ -108,7 +108,7 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker,
 sb_scope = aml_scope("_SB");
 fw_cfg_add_acpi_dsdt(sb_scope, x86ms->fw_cfg);
 isa_build_aml(ISA_BUS(isabus), sb_scope);
-build_ged_aml(sb_scope, GED_DEVICE, HOTPLUG_HANDLER(mms->acpi_dev),
+build_ged_aml(sb_scope, GED_DEVICE, x86ms->acpi_dev,
   GED_MMIO_IRQ, AML_SYSTEM_MEMORY, GED_MMIO_BASE);
 acpi_dsdt_add_power_button(sb_scope);
 acpi_dsdt_add_virtio(sb_scope, mms);
@@ -135,6 +135,7 @@ static void acpi_build_microvm(AcpiBuildTables *tables,
MicrovmMachineState *mms)
 {
 MachineState *machine = MACHINE(mms);
+X86MachineState *x86ms = X86_MACHINE(mms);
 GArray *table_offsets;
 GArray *tables_blob = tables->table_data;
 unsigned dsdt, xsdt;
@@ -178,7 +179,7 @@ static void acpi_build_microvm(AcpiBuildTables *tables,
 
 acpi_add_table(table_offsets, tables_blob);
 acpi_build_madt(tables_blob, tables->linker, X86_MACHINE(machine),
-mms->acpi_dev, false);
+ACPI_DEVICE_IF(x86ms->acpi_dev), false);
 
 xsdt = tables_blob->len;
 build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL);
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index fbda323f3b2a..3d8a66cfc3ac 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -143,7 +143,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
x86ms->gsi[GED_MMIO_IRQ]);
 sysbus_realize(SYS_BUS_DEVICE(dev), _fatal);
-mms->acpi_dev = ACPI_DEVICE_IF(dev);
+x86ms->acpi_dev = HOTPLUG_HANDLER(dev);
 }
 
 if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) {
@@ -469,11 +469,13 @@ static void microvm_powerdown_req(Notifier *notifier, 
void *data)
 {
 MicrovmMachineState *mms = container_of(notifier, MicrovmMachineState,
 powerdown_req);
+X86MachineState *x86ms = X86_MACHINE(mms);
 
-if (mms->acpi_dev) {
-Object *obj = OBJECT(mms->acpi_dev);
+if (x86ms->acpi_dev) {
+Object *obj = OBJECT(x86ms->acpi_dev);
 AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
-adevc->send_event(mms->acpi_dev, ACPI_POWER_DOWN_STATUS);
+adevc->send_event(ACPI_DEVICE_IF(x86ms->acpi_dev),
+  ACPI_POWER_DOWN_STATUS);
 }
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 4af9679d039b..98d29ead09b0 100644
--- a/hw/i386/pc.c
+++ 

[PATCH v4 03/21] seabios: add bios-microvm.bin binary

2020-07-02 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 pc-bios/bios-microvm.bin | Bin 0 -> 131072 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/bios-microvm.bin

diff --git a/pc-bios/bios-microvm.bin b/pc-bios/bios-microvm.bin
new file mode 100644
index 
..352c82692e11155c3e2c70d226c789a7c2b055f4
GIT binary patch
literal 131072
zcmeFadw5jU^~ZfCnIw}ia0VDKK#-`ThDwzvDiP3xNdUQMAY8)TR)1*(F9g(K-y)RR&%+g{RIrULenOL
z*2DgHXuckfVZ-wx#NYna)i@qj&~?0DlAZU1<7#K3CIv-T=?R6+hRsW53X}1W2nS?q=Eno!
zPH@dUJ`wmhrr|D0C*+HcmaFC2Vh|}@nARj0DKCZtLS%-3k>iucoO^rXtmT8^Z@<9rC=;r
z1KtF?z=yyD3DD2bw93_t<9jsiOEB!$n)YWf@Lo;(J=k&|a{Pd%EdvjM{lNDiW91=D
z`{kp^Ab1D-3)p{yt^iMh1KoV;7RZzXa-*Zhnw<10Q?$s?T1W)U*^zNpdM@nsp$5r!5Xj~90Cc@|7>&%h=TXP
zVc;IXJOGA(Vo(8=fP29+;3e=X*bY7cr$AONeFZKCSA+53cJM3k0C)nt3=RR;IgB4L
z5zGg7fk(lxbLkiG2G|GMK>B}?@1tIVl_%q1z&~`8f+yWj0b>KsA?)jQ_9he1{
zfe5(p0_p{3fV;tmAooH|y9~Su_JGtp>IcfeGvEVo#X#f++yItQXJ8>%1%3wxT#fDke*{N>ehp(AOav9625bZWTL>)>9*GVGpMaaZ@V*2&
z1bfCn_geHA7!oG0uE7`J!|~{g3Dg%%zYbiFt^|$X3oxXV@dcW}+n^V7=P+;=
zh=YNX=v(kA_y-tYrfE|^1-KI!-~q4}q)(yV;9l?wcnh?GjH%44;36;wdQVJMb)c38dWt
zpTM>AAJN~1s{OY#po4q5cFFD4e%@QCfEzQE~VX|6cpTv
z-UZ!$iLL|tz^|7v_kkC{9%)rG4S$T$SK$fZe0P-!M!UPQ($L6)5cZ7
zd(aGygFe5aF5r3aCU_ULg5%)IAiM`A=w6L31o})_zax?7(55}gKZ$?ceDcx1*5?fPzxRbb>JiLB{=K%%GuY3rlnzK|%Gh4yA+Q{XDimpU@uHgJ=%quJI}u+^WM-`HHXy6@0z
zUq+wC;8t7QQ8<#|!}*Ofo2QnA7usyE8_fmp`nru;ech?*ZN7AG?YKnz^OjU?=A34u
z$y|p86fU+U3brnE28QOY%ox*BXw%}Y#@5xY>RmoZ;6(1q)WZ0Y=8)8~TWE}cNNYPwQF(O9oycxi#46GQ19B2Zbt@hPrMhL3ny%{x)}2(*d4q_Akv1
z6@49=*p^y!sOQAi&>i0fPiRZhgP(qPozF2Y7<0$}ncp~q6}`474azQUZ6n#J$uwb(
zvC};7R3c$SL;YW@i~@aGBjWZnT=ciGy|Np?)>{{RfS`Gv~~u!?RasRqywA
z2^_z~msh>be{THTz{#KZd#*0G(Z`t*Y#+Ma?K1Ht$2SC*IyFDzLxhXQPbmwH$=2)3
zd)u}nvloQN+VdL=wl5s6*NwD=$ETzeytnA$z?}%S0z@4Ww|FL*FN#0x3Cp=No
z?q2ND>n1X|cuF_6TEfv>##ktKhO|aARj+HT+3!oYH5q&3aT#FY;*>=20O5g#8|eJ+
zrzK$~>D$vz?w%N)y`-CM$LZAZtl-Dw{FRk+&0zS5IB9_;=`BFjAi5-6g1TiMunX
zF`D)}D^iQ5E#>L2f9$3;Pci--Ds;dDN2v7cP+@C4qrpbT{9UtERH-98_CS7P{;uHh
z1HNvlMUI{mouO-UgR$KB;n~gMu?{i^>zy0l$s09a@kZT$-)<283J{RljtnI(@yu
z<++UOb5A1Duk(;65ATIX9gr+LVE+`PK6uC#UXUFwcamT}Bj1P`JCXGpcr1wa*|{j)
zXf-}CnhC0uSi8rV?6LE94Q~w8##8x+FBK}Y5xAuaF1R$BqKi{pn-7KRw!%>$4lj
z@{bxm__`dvF84Sx>4h${JkG*`P5y0g)^6oGFQ91)lTkWi+|p(N<+PrJ|CMfEX7`K
zo4?;ZunFxReo(Y~IQWJr>)3P15IhMzirYpj*uKOadI6`4hETot)l;E>gtpIK=x4n?
zd%YIG-&-h2_L|3$6f0n)3YZ!Dj1awETU$uan~z82MM*)h(Z2R63T&3h;Y7whCsM`?
z!{OchL}2Nans4B=N$aIYA|C#j2@*`3Y6avf^5!9eV0U~`VCfZ_Z<_JuJZ+w4rEo}=
zH@=s}X=PbY5UFgZQ+ZF#X^Iutp7LHrJU8AuT$tBU5vh&2n27QjY5+)u%Va>l
ztJm+1q+3Ot`zjt{SIFt{+w-@@Co*sKh~Nko29*$1+mxS(=j(Ob!V_)rIP=&(y)IS@
z4Q4&>gFt9^YBvDK>(-t30Et5*mH(e>?u?+O6${E9o_w6|a7By6JAIM=D7UO3a4>$;O=v_hi-GF-sUKHe@Wkh4;S_}${og1
zr%~!M<~T!B?dH4RBoguT&=v9R`TLa+*@$TI$wP{lgIw>
zeB)(YYK{K%Fd^|QZ~P;%*vQw}n&2S^V(Ru7Z$?uXFNvs(MlD(-71CPWXrT
zom$^|*@ZG#5J8;6T*L*^xXpis^rEe&20M?3LjlWR~oY3`}
zW)gJhr{?#-^qd%$4JN+USChGxW7P7iJU|wED3qeq)qq#uPj1DaXh#rrBfHl0NKB
ziHOoM9Wqg=1Yxh;3i{?mBGH*T%AlD(
z+^(-6%l*R4v<-xa?g*qnDdjTDSpGGCIS
zt;?S_-&<0%R(A*LWMfOrm@1*)(j^>$e(5lq!i`|95uzWvCs(_#4xE^=
zX}wV`raw2Q6qS5Zvsez%+B`g<5k
zj7in(jglmOrCztdmY--)qh*j@uU^!c7!Xp=X_BfZYmW42isZ0t@1yw%L_W>PH}15v
zyc07cyXvb+WyQIybgI5AojfTwbGv_J@I=qWBj9PX5;jxJ&)~L1b))~Hz%t#oxKC)A
zyWrHqvyBsU>|Z(}eHn4*Ya-poaqN?A32Y--LDIu6V~o9}&|!-fW{tewJEkI9*sUx#
zTId>CUg|YU-O<8qsbRD*M*^aSxrzy|o0FlO=gm|yZS0H;C5`z$a@FF!($?b5OfkB&
zcrQ$;Y7)I>j60jUB)&}~BEkg`!RL4p5saEY{Q?Fru(dCH_xs$knmKE2%gATaG%nqYEMV)~r_b!1yUGwE6jN?!mM44Q_7=@O|0`#(CByip={vh|u(mWM_^BRz
ztI#-7TWSjp@q~ICrK#Zsj+Vj9TPj84+J7^Z9peVU@MGPf#!4qndR4i4YMs78QmVTK+l>R|N_6>UQcbLS&$C%Wq+H$1}`dqAZM-4e@t$O4%Q|fXv}yPm1|rovc%e
zJr@+Fv=n;=*o=KD_-V|+qY~=B@O0Y0ccj%)+LNsynOBkvg*V%&8aQUFi(*%Zm@TT
zt}X6qZXq%_LZUSP5jEn}A~dwFnidqixnzutsZhgxyoCZ{)Yl~ZU5%Kr2`x~7trQ;+
zaWQZ!zCbqr#(U^N$tcb)5*@29P=KpN*Fl3vlhm{oTk$c9WBQBr=V&7o`vS^;j
z#O?N&*CWPyeLAYg9QrpbGMnESX&6h|-w3fj!2{nK*NA)F79bXiS9J(AcG~;
zkIa^OhijVYp}EmfIh13mD!hB)Lc95!IMeu6^JWS=%`tzM%EfoH@R$yz^5)V>m0@`s
z$6Ii-Q>MJWfs^`TSLA?Hy$y;pV|yTAI3GQgVlAy&}P`u6eAGgU1If?jBipE&0!r;^xGD0fqTr
zI*rE*i8k%7Fx+MsOrBUUM-*JVuU(1*o0ZywK}Lu%cIFozzKa
zTFcLvF724iLUp{yIls+qd}*!q>eX6r)z{=Rj+n1~NvbpZ^SB|9jq-92vzFi4{h+%j
zJwev^%fo1?@u3P?lBUR%x(W{Z`?oh^;MP9-I;tj2F=lyO@$2BItm9n+-|F+Bu|O6e
zLX4)k3uryoTEAjX_M!_X@t95HaxRh}>O85GH)m+PYPQ>)j=KesnmSblJM07Z_BleY
zyNy(F)!gjzcelam?!sxw(3cFs?-IU227D(ZhYIG>;Gy)z@b6XD5!~#N$sqPL6My@2
zGYxTc!5;Ue#V(*J%75Bix?WUpWDtMWh-cco>_{Rpoe}WUvhT%+>)Px?%nV|BMjjG5
zoJ1Bg%nXRpHq_X(z6s0C!y
zcqcGo!j0%$-)-yhd212*qlz)6H#r=TPNJpBXLTsdm;o
z=BJNKbH*{WNr?EFQ(;hlsa+y6?x`l51cDA(;m9)~4iI0Rup={Q;
z1s#l{+T;4tKp4B2#mD>zT5eLk(m9O33TZP>~SiDO?nPcH5%IX(y5Z$qilTVOcpLG~Zcx+r9IJ}%$N`xtGBGrj1rr-8OS!>D
zB_q{NgVQ!_M57xxqx(Naw_p#+4-qc*GeU09bS3Hc{2PDk{I-?x<>!UQwHAE2^5fvC
z)Rkx9QtufW_x0-TL(f{cgZY1=yI`7oVT-|->lfcp7V7_;(Au^%#n?W{XfpOGbJv_e
zrMJS^Qbke234{^Y>ucjkN!oLruOv#GEhZx
z1hq5%tY(=FGAkQ@zmLDsps;UxeK&25Y?mN9YTKk-8r0!ihG{4v#WvAZd5m^ZU4y6=
zKZAL-H{?OZbWts13l-}g*(`HU21b#2nEjymiik>Jr%q$};;dyD?kHIE)fmN*E=g
z+3iM+SNj5)2jFO!Bjn44obP}eEm;K_<($>&|uZIoy7+>~7ku2;#487&=PJrJT

[PATCH v4 07/21] microvm: make virtio irq base runtime configurable

2020-07-02 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Sergio Lopez 
---
 include/hw/i386/microvm.h |  2 +-
 hw/i386/microvm.c | 11 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
index fd34b78e0d2a..03e735723726 100644
--- a/include/hw/i386/microvm.h
+++ b/include/hw/i386/microvm.h
@@ -27,7 +27,6 @@
 
 /* Platform virtio definitions */
 #define VIRTIO_MMIO_BASE  0xfeb0
-#define VIRTIO_IRQ_BASE   5
 #define VIRTIO_NUM_TRANSPORTS 8
 #define VIRTIO_CMDLINE_MAXLEN 64
 
@@ -57,6 +56,7 @@ typedef struct {
 bool auto_kernel_cmdline;
 
 /* Machine state */
+uint32_t virtio_irq_base;
 bool kernel_cmdline_fixed;
 } MicrovmMachineState;
 
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 1300c396947b..ab6ee6c67b1a 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -121,10 +121,11 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 
 kvmclock_create();
 
+mms->virtio_irq_base = 5;
 for (i = 0; i < VIRTIO_NUM_TRANSPORTS; i++) {
 sysbus_create_simple("virtio-mmio",
  VIRTIO_MMIO_BASE + i * 512,
- x86ms->gsi[VIRTIO_IRQ_BASE + i]);
+ x86ms->gsi[mms->virtio_irq_base + i]);
 }
 
 /* Optional and legacy devices */
@@ -227,7 +228,7 @@ static void microvm_memory_init(MicrovmMachineState *mms)
 x86ms->ioapic_as = _space_memory;
 }
 
-static gchar *microvm_get_mmio_cmdline(gchar *name)
+static gchar *microvm_get_mmio_cmdline(gchar *name, uint32_t virtio_irq_base)
 {
 gchar *cmdline;
 gchar *separator;
@@ -247,7 +248,7 @@ static gchar *microvm_get_mmio_cmdline(gchar *name)
 ret = g_snprintf(cmdline, VIRTIO_CMDLINE_MAXLEN,
  " virtio_mmio.device=512@0x%lx:%ld",
  VIRTIO_MMIO_BASE + index * 512,
- VIRTIO_IRQ_BASE + index);
+ virtio_irq_base + index);
 if (ret < 0 || ret >= VIRTIO_CMDLINE_MAXLEN) {
 g_free(cmdline);
 return NULL;
@@ -259,6 +260,7 @@ static gchar *microvm_get_mmio_cmdline(gchar *name)
 static void microvm_fix_kernel_cmdline(MachineState *machine)
 {
 X86MachineState *x86ms = X86_MACHINE(machine);
+MicrovmMachineState *mms = MICROVM_MACHINE(machine);
 BusState *bus;
 BusChild *kid;
 char *cmdline;
@@ -282,7 +284,8 @@ static void microvm_fix_kernel_cmdline(MachineState 
*machine)
 BusState *mmio_bus = _virtio_bus->parent_obj;
 
 if (!QTAILQ_EMPTY(_bus->children)) {
-gchar *mmio_cmdline = microvm_get_mmio_cmdline(mmio_bus->name);
+gchar *mmio_cmdline = microvm_get_mmio_cmdline
+(mmio_bus->name, mms->virtio_irq_base);
 if (mmio_cmdline) {
 char *newcmd = g_strjoin(NULL, cmdline, mmio_cmdline, 
NULL);
 g_free(mmio_cmdline);
-- 
2.18.4




[PATCH v4 01/21] microvm: name qboot binary qboot.rom

2020-07-02 Thread Gerd Hoffmann
qboot isn't a bios and shouldnt be named that way.

Signed-off-by: Gerd Hoffmann 
---
 hw/i386/microvm.c   |   4 ++--
 pc-bios/{bios-microvm.bin => qboot.rom} | Bin
 roms/Makefile   |   6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)
 rename pc-bios/{bios-microvm.bin => qboot.rom} (100%)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 5e931975a06d..1300c396947b 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -47,7 +47,7 @@
 #include "kvm_i386.h"
 #include "hw/xen/start_info.h"
 
-#define MICROVM_BIOS_FILENAME "bios-microvm.bin"
+#define MICROVM_QBOOT_FILENAME "qboot.rom"
 
 static void microvm_set_rtc(MicrovmMachineState *mms, ISADevice *s)
 {
@@ -158,7 +158,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 }
 
 if (bios_name == NULL) {
-bios_name = MICROVM_BIOS_FILENAME;
+bios_name = MICROVM_QBOOT_FILENAME;
 }
 x86_bios_rom_init(get_system_memory(), true);
 }
diff --git a/pc-bios/bios-microvm.bin b/pc-bios/qboot.rom
similarity index 100%
rename from pc-bios/bios-microvm.bin
rename to pc-bios/qboot.rom
diff --git a/roms/Makefile b/roms/Makefile
index f9acf39954dc..b185c880541c 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -68,7 +68,7 @@ default help:
@echo "  opensbi64-virt -- update OpenSBI for 64-bit virt machine"
@echo "  opensbi32-sifive_u -- update OpenSBI for 32-bit sifive_u 
machine"
@echo "  opensbi64-sifive_u -- update OpenSBI for 64-bit sifive_u 
machine"
-   @echo "  bios-microvm   -- update bios-microvm.bin (qboot)"
+   @echo "  qboot  -- update qboot"
@echo "  clean  -- delete the files generated by the 
previous" \
  "build targets"
 
@@ -194,9 +194,9 @@ opensbi64-sifive_u:
PLATFORM="sifive/fu540"
cp opensbi/build/platform/sifive/fu540/firmware/fw_jump.bin 
../pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin
 
-bios-microvm:
+qboot:
$(MAKE) -C qboot
-   cp qboot/bios.bin ../pc-bios/bios-microvm.bin
+   cp qboot/bios.bin ../pc-bios/qboot.rom
 
 clean:
rm -rf seabios/.config seabios/out seabios/builds
-- 
2.18.4




[PATCH v4 21/21] tests/acpi: update expected data files for microvm

2020-07-02 Thread Gerd Hoffmann
Also clear tests/qtest/bios-tables-test-allowed-diff.h

Signed-off-by: Gerd Hoffmann 
---
 tests/qtest/bios-tables-test-allowed-diff.h |   3 ---
 tests/data/acpi/microvm/APIC| Bin 0 -> 70 bytes
 tests/data/acpi/microvm/DSDT| Bin 0 -> 365 bytes
 tests/data/acpi/microvm/FACP| Bin 0 -> 268 bytes
 4 files changed, 3 deletions(-)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index 97c3fa621b7f..dfb8523c8bf4 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,4 +1 @@
 /* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/microvm/APIC",
-"tests/data/acpi/microvm/DSDT",
-"tests/data/acpi/microvm/FACP",
diff --git a/tests/data/acpi/microvm/APIC b/tests/data/acpi/microvm/APIC
index 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7472c7e830b6c7139720e93dd544d4441556661d
 100644
GIT binary patch
literal 70
zcmZ<^@N{-#U|?Xp?<65v<@85#a0y6k`O6f!H9Lf#JbFFwFr}2jnsGfW!{`1CcCj
H|A7JkC_j6YE5wz#n_gs%GoenLN`
zpW{k;$86@Fd(Irrxsi_M?+m~-VJ0WKAHL=?B917q*iIf_8y%W*_cDFd20*z#XX{ze8pGFMFpU84R0aBbPnKQWME*eb@F%i2v%^42yk`-iUEZfKx`0ARlp*^z`()4z{IrRAIMxM1_r8I
SLD0QG`x&72gWSRhWdi`;u?ztK

literal 0
HcmV?d1

-- 
2.18.4




[PATCH v4 08/21] microvm/acpi: add minimal acpi support

2020-07-02 Thread Gerd Hoffmann
$subject says all.  Can be controlled using -M microvm,acpi=on/off.

Signed-off-by: Gerd Hoffmann 
---
 hw/i386/acpi-microvm.h|   8 ++
 include/hw/i386/microvm.h |   9 ++
 hw/i386/acpi-microvm.c| 181 ++
 hw/i386/microvm.c |  40 +
 hw/i386/Kconfig   |   1 +
 hw/i386/Makefile.objs |   1 +
 6 files changed, 240 insertions(+)
 create mode 100644 hw/i386/acpi-microvm.h
 create mode 100644 hw/i386/acpi-microvm.c

diff --git a/hw/i386/acpi-microvm.h b/hw/i386/acpi-microvm.h
new file mode 100644
index ..dfe853690e15
--- /dev/null
+++ b/hw/i386/acpi-microvm.h
@@ -0,0 +1,8 @@
+#ifndef HW_I386_ACPI_MICROVM_H
+#define HW_I386_ACPI_MICROVM_H
+
+#include "hw/i386/microvm.h"
+
+void acpi_setup_microvm(MicrovmMachineState *mms);
+
+#endif
diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
index 03e735723726..b6e0d4395af7 100644
--- a/include/hw/i386/microvm.h
+++ b/include/hw/i386/microvm.h
@@ -24,12 +24,18 @@
 
 #include "hw/boards.h"
 #include "hw/i386/x86.h"
+#include "hw/acpi/acpi_dev_interface.h"
 
 /* Platform virtio definitions */
 #define VIRTIO_MMIO_BASE  0xfeb0
 #define VIRTIO_NUM_TRANSPORTS 8
 #define VIRTIO_CMDLINE_MAXLEN 64
 
+#define GED_MMIO_BASE 0xfea0
+#define GED_MMIO_BASE_MEMHP   (GED_MMIO_BASE + 0x100)
+#define GED_MMIO_BASE_REGS(GED_MMIO_BASE + 0x200)
+#define GED_MMIO_IRQ  9
+
 /* Machine type options */
 #define MICROVM_MACHINE_PIT "pit"
 #define MICROVM_MACHINE_PIC "pic"
@@ -58,6 +64,9 @@ typedef struct {
 /* Machine state */
 uint32_t virtio_irq_base;
 bool kernel_cmdline_fixed;
+Notifier machine_done;
+Notifier powerdown_req;
+AcpiDeviceIf *acpi_dev;
 } MicrovmMachineState;
 
 #define TYPE_MICROVM_MACHINE   MACHINE_TYPE_NAME("microvm")
diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c
new file mode 100644
index ..7cd830e628af
--- /dev/null
+++ b/hw/i386/acpi-microvm.c
@@ -0,0 +1,181 @@
+/* Support for generating ACPI tables and passing them to Guests
+ *
+ * Copyright (C) 2008-2010  Kevin O'Connor 
+ * Copyright (C) 2006 Fabrice Bellard
+ * Copyright (C) 2013 Red Hat Inc
+ *
+ * Author: Michael S. Tsirkin 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "exec/memory.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/acpi/generic_event_device.h"
+#include "hw/acpi/utils.h"
+#include "hw/boards.h"
+#include "hw/i386/fw_cfg.h"
+#include "hw/i386/microvm.h"
+
+#include "acpi-common.h"
+#include "acpi-microvm.h"
+
+static void
+build_dsdt_microvm(GArray *table_data, BIOSLinker *linker,
+   MicrovmMachineState *mms)
+{
+X86MachineState *x86ms = X86_MACHINE(mms);
+Aml *dsdt, *sb_scope, *scope, *pkg;
+bool ambiguous;
+Object *isabus;
+
+isabus = object_resolve_path_type("", TYPE_ISA_BUS, );
+assert(isabus);
+assert(!ambiguous);
+
+dsdt = init_aml_allocator();
+
+/* Reserve space for header */
+acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader));
+
+sb_scope = aml_scope("_SB");
+fw_cfg_add_acpi_dsdt(sb_scope, x86ms->fw_cfg);
+isa_build_aml(ISA_BUS(isabus), sb_scope);
+build_ged_aml(sb_scope, GED_DEVICE, HOTPLUG_HANDLER(mms->acpi_dev),
+  GED_MMIO_IRQ, AML_SYSTEM_MEMORY, GED_MMIO_BASE);
+acpi_dsdt_add_power_button(sb_scope);
+aml_append(dsdt, sb_scope);
+
+scope = aml_scope("\\");
+pkg = aml_package(4);
+aml_append(pkg, aml_int(5)); /* SLEEP_CONTROL_REG.SLP_TYP */
+aml_append(pkg, aml_int(0)); /* ignored */
+aml_append(pkg, aml_int(0)); /* reserved */
+aml_append(pkg, aml_int(0)); /* reserved */
+aml_append(scope, aml_name_decl("_S5", pkg));
+aml_append(dsdt, scope);
+
+/* copy AML table into ACPI tables blob and patch header there */
+g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
+build_header(linker, table_data,
+(void *)(table_data->data + table_data->len - dsdt->buf->len),
+"DSDT", dsdt->buf->len, 5, NULL, NULL);
+free_aml_allocator();
+}
+
+static void acpi_build_microvm(AcpiBuildTables *tables,
+   MicrovmMachineState *mms)
+{
+MachineState *machine = 

[PATCH v4 04/21] acpi: ged: add control regs

2020-07-02 Thread Gerd Hoffmann
Add control regs (sleep, reset) for hw-reduced acpi.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/acpi/generic_event_device.h |  7 
 hw/acpi/generic_event_device.c | 44 ++
 2 files changed, 51 insertions(+)

diff --git a/include/hw/acpi/generic_event_device.h 
b/include/hw/acpi/generic_event_device.h
index 90a9180db572..474c92198080 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -72,6 +72,12 @@
 #define ACPI_GED_EVT_SEL_OFFSET0x0
 #define ACPI_GED_EVT_SEL_LEN   0x4
 
+#define ACPI_GED_REG_SLEEP_CTL 0x00
+#define ACPI_GED_REG_SLEEP_STS 0x01
+#define ACPI_GED_REG_RESET 0x02
+#define   ACPI_GED_RESET_VALUE 0x42
+#define ACPI_GED_REG_COUNT 0x03
+
 #define GED_DEVICE  "GED"
 #define AML_GED_EVT_REG "EREG"
 #define AML_GED_EVT_SEL "ESEL"
@@ -87,6 +93,7 @@
 
 typedef struct GEDState {
 MemoryRegion evt;
+MemoryRegion regs;
 uint32_t sel;
 } GEDState;
 
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index b8abdefa1c77..491df80a5cc7 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -20,6 +20,7 @@
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "qemu/error-report.h"
+#include "sysemu/runstate.h"
 
 static const uint32_t ged_supported_events[] = {
 ACPI_GED_MEM_HOTPLUG_EVT,
@@ -176,6 +177,45 @@ static const MemoryRegionOps ged_evt_ops = {
 },
 };
 
+static uint64_t ged_regs_read(void *opaque, hwaddr addr, unsigned size)
+{
+return 0;
+}
+
+static void ged_regs_write(void *opaque, hwaddr addr, uint64_t data,
+   unsigned int size)
+{
+bool slp_en;
+int slp_typ;
+
+switch (addr) {
+case ACPI_GED_REG_SLEEP_CTL:
+slp_typ = (data >> 2) & 0x07;
+slp_en  = (data >> 5) & 0x01;
+if (slp_en && slp_typ == 5) {
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+}
+return;
+case ACPI_GED_REG_SLEEP_STS:
+return;
+case ACPI_GED_REG_RESET:
+if (data == ACPI_GED_RESET_VALUE) {
+qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+}
+return;
+}
+}
+
+static const MemoryRegionOps ged_regs_ops = {
+.read = ged_regs_read,
+.write = ged_regs_write,
+.endianness = DEVICE_LITTLE_ENDIAN,
+.valid = {
+.min_access_size = 1,
+.max_access_size = 1,
+},
+};
+
 static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev,
 DeviceState *dev, Error **errp)
 {
@@ -332,6 +372,10 @@ static void acpi_ged_initfn(Object *obj)
  sysbus_init_mmio(sbd, >container_memhp);
  acpi_memory_hotplug_init(>container_memhp, OBJECT(dev),
   >memhp_state, 0);
+
+memory_region_init_io(_st->regs, obj, _regs_ops, ged_st,
+  TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT);
+sysbus_init_mmio(sbd, _st->regs);
 }
 
 static void acpi_ged_class_init(ObjectClass *class, void *data)
-- 
2.18.4




[PATCH 0/1] Add Loongson 2F disassembler

2020-07-02 Thread Stefan Brankovic
This patch adds disassembler for Loongson 2F instruction set.

Stefan Brankovic (1):
  disas: mips: Add Loongson 2F disassembler

 MAINTAINERS |1 +
 configure   |1 +
 disas/Makefile.objs |1 +
 disas/loongson2f.cpp| 8134 +++
 disas/loongson2f.h  | 2542 
 include/disas/dis-asm.h |1 +
 include/exec/poison.h   |1 +
 target/mips/cpu.c   |4 +
 8 files changed, 10685 insertions(+)
 create mode 100644 disas/loongson2f.cpp
 create mode 100644 disas/loongson2f.h

-- 
2.17.1




[Bug 1873769] Re: SB16 audio playback freezes emulation in Windows 95 guest

2020-07-02 Thread Allan Peramaki
This is with GTK UI? Do you still have the same problem if you use Spice
and remote-viewer instead?

(GTK UI and Sound Blaster 16 emulation don't play well together. GTK UI
does screen updates only when the main event loop becomes idle, but it
never becomes idle when SB16 audio is playing due to the way
hw/dma/i8257 works. The combination of GTK UI screen updates + SB16 DMA
transfer additionally causes i8257_dma_run() getting called at a very
rapid rate.)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1873769

Title:
  SB16 audio playback freezes emulation in Windows 95 guest

Status in QEMU:
  New

Bug description:
  - QEMU 4.2.93 (v5.0.0-rc3) built from latest git master
  20038cd7a8412feeb49c01f6ede89e36c8995472 using MSYS2 on Windows 10 and
  launched on same Windows 10

  - Launched using "qemu-system-i386.exe -drive format=raw,file=hdd-
  2gb.img -soundhw pcspk,sb16 -m 16 -cpu pentium -vga std -cdrom
  Windows_95.iso -boot c"

  - I have attached video screen capture of the issue

  ---

  I decided to make my first ever QEMU build after encountering the
  dsound issues using the latest 4.2.0 binary from
  https://qemu.weilnetz.de/w64/. In my 5.0.0-rc3 build the sound
  playback is working correctly, however the whole Windows 95 UI freezes
  while sound is playing.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1873769/+subscriptions



Re: [PATCH] tests/acceptance: Add a test for the sun4u sparc64 machine

2020-07-02 Thread Mark Cave-Ayland
On 02/07/2020 15:10, Thomas Huth wrote:

> We can use the image from the advent calendar 2018 to test the sun4u
> machine. It's not using the "QEMU advent calendar" string, so we can
> not use the do_test_advcal_2018() from boot_linux_console.py, thus
> let's also put it into a separate file to also be able to add an
> entry to the MAINTAINERS file.
> 
> Signed-off-by: Thomas Huth 
> ---
>  MAINTAINERS   |  1 +
>  tests/acceptance/machine_sparc64_sun4u.py | 35 +++
>  2 files changed, 36 insertions(+)
>  create mode 100644 tests/acceptance/machine_sparc64_sun4u.py
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index dec252f38b..c26bc06e39 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1318,6 +1318,7 @@ F: include/hw/pci-host/sabre.h
>  F: hw/pci-bridge/simba.c
>  F: include/hw/pci-bridge/simba.h
>  F: pc-bios/openbios-sparc64
> +F: tests/acceptance/machine_sparc64_sun4u.py
>  
>  Sun4v
>  M: Artyom Tarasenko 
> diff --git a/tests/acceptance/machine_sparc64_sun4u.py 
> b/tests/acceptance/machine_sparc64_sun4u.py
> new file mode 100644
> index 00..2506eefe3f
> --- /dev/null
> +++ b/tests/acceptance/machine_sparc64_sun4u.py
> @@ -0,0 +1,35 @@
> +# Functional test that boots a Linux kernel and checks the console
> +#
> +# Copyright (c) 2020 Red Hat, Inc.
> +#
> +# Author:
> +#  Thomas Huth 
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later. See the COPYING file in the top-level directory.
> +
> +import os
> +
> +from avocado_qemu import Test
> +from avocado_qemu import wait_for_console_pattern
> +from avocado.utils import archive
> +
> +class Sun4uMachine(Test):
> +"""Boots the Linux kernel and checks that the console is operational"""
> +
> +timeout = 90
> +
> +def test_sparc64_sun4u(self):
> +"""
> +:avocado: tags=arch:sparc64
> +:avocado: tags=machine:sun4u
> +"""
> +tar_url = ('https://www.qemu-advent-calendar.org'
> +   '/2018/download/day23.tar.xz')
> +tar_hash = '142db83cd974ffadc4f75c8a5cad5bcc5722c240'
> +file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
> +archive.extract(file_path, self.workdir)
> +self.vm.set_console()
> +self.vm.add_args('-kernel', self.workdir + '/day23/vmlinux')
> +self.vm.launch()
> +wait_for_console_pattern(self, 'Starting logging: OK')

Thanks Thomas!

I've just tested this and the patch appears to work fine and passes here:

Fetching asset from
tests/acceptance/machine_sparc64_sun4u.py:Sun4uMachine.test_sparc64_sun4u
JOB ID : c719db83a6011ed88a485b177079d0add26f6f81
JOB LOG:
/home/build/src/qemu/git/qemu/tests/results/job-2020-07-02T21.25-c719db8/job.log
 (01/11) tests/acceptance/empty_cpu_model.py:EmptyCPUModel.test: CANCEL: No QEMU
binary defined or found in the build tree (0.00 s)
 (02/11) 
tests/acceptance/machine_sparc64_sun4u.py:Sun4uMachine.test_sparc64_sun4u:
PASS (18.74 s)
...
...


Tested-by: Mark Cave-Ayland 


ATB,

Mark.



Re: nvme emulation merge process

2020-07-02 Thread Andrzej Jakowski
On 7/1/20 6:57 AM, Philippe Mathieu-Daudé wrote:
> On 7/1/20 3:18 PM, Klaus Jensen wrote:
>> On Jul  1 12:34, Kevin Wolf wrote:
>>> Am 30.06.2020 um 22:36 hat Klaus Jensen geschrieben:
 On Jun 30 08:42, Keith Busch wrote:
> On Tue, Jun 30, 2020 at 04:09:46PM +0200, Philippe Mathieu-Daudé wrote:
>> What I see doable for the following days is:
>> - hw/block/nvme: Fix I/O BAR structure [3]
>> - hw/block/nvme: handle transient dma errors
>> - hw/block/nvme: bump to v1.3
>
>
> These look like sensible patches to rebase future work on, IMO. The 1.3
> updates had been prepared a while ago, at least.

 I think Philippe's "hw/block/nvme: Fix I/O BAR structure" series is a
 no-brainer. It just needs to get in asap.
>>>
>>> I think we need to talk about how nvme patches are supposed to get
>>> merged. I'm not familiar with the hardware nor the code, so the model
>>> was that I just blindly merge patches that Keith has reviewed/acked,
>>> just to spare him the work to prepare a pull request. But obviously, we
>>> started doing things this way when there was a lot less activity around
>>> the nvme emulation.
>>>
>>> If we find that this doesn't scale any more, maybe we need to change
>>> something.
>>
>> Honestly, I do not think the current model has worked very well for some
>> time; especially for larger series where I, for one, has felt that my
>> work was largely ignored due to a lack of designated reviewers. Things
>> only picked up when Beata, Maxim and Philippe started reviewing my
>> series - maybe out of pity or because I was bombing the list, I don't
>> know ;)
> 
> I have no interest in the NVMe device emulation, but one of the first
> thing I notice when I look at the wiki the time I wanted to send my
> first patch, is the "Return the favor" paragraph:
> https://wiki.qemu.org/Contribute/SubmitAPatch#Return_the_favor
> 
>  "Peer review only works if everyone chips in a bit of review time.
>   If everyone submitted more patches than they reviewed, we would
>   have a patch backlog. A good goal is to try to review at least as
>   many patches from others as what you submit. Don't worry if you
>   don't know the code base as well as a maintainer; it's perfectly
>   fine to admit when your review is weak because you are unfamiliar
>   with the code."
> 
> So as some reviewed my patches, I try to return the favor to the
> community, in particular when I see someone is stuck waiting for
> review, and the patch topic is some area I can understand.
> 
> I don't see that as an "out of pity" reaction.
> 
> Note, it is true bomb series scares reviewers. You learned it the
> bad way. But you can see, after resending the first part of your
> "bomb", even if it took 10 versions, the result is a great
> improvement!
> 
>> We've also seen good patches from Andrzej linger on the list for quite a
>> while, prompting a number of RESENDs. I only recently allocated more
>> time and upped my review game, but I hope that contributors feel that
>> stuff gets reviewed in a timely fashion by now.
>>
>> Please understand that this is in NO WAY a criticism of Keith who
>> already made it very clear to me that he did not have a lot time to
>> review, but only ack the odd patch.
>>
>>> Depending on how much time Keith can spend on review in the
>>> near future and how much control he wants to keep over the development,
>>> I could imagine adding Klaus to MAINTAINERS, either as a co-maintainer
>>> or as a reviewer. Then I could rely on reviews/acks from either of you
>>> for merging series.
>>>
>>
>> I would be happy to step up (officially) to help maintain the device
>> with Keith and review on a daily basis, and my position can support
>> this.
> 
> Sounds good to me, but it is up to Keith Busch to accept.
> 
> It would be nice to have at least one developer from WDC listed as
> designated reviewer too.
> 
> Maxim is candidate for designated reviewer but I think he doesn't
> have the time.
> 
> It would also nice to have Andrzej Jakowski listed, if he is interested.

Thx! Of course I am interested in helping and I think it is actually great 
idea to have couple of designated maintainers/reviewers as it would be easier
for folks to receive feedback vs requesting it in polling manner :)
And please don't get me wrong -- I'm not complaining about anything -- I
think it is just reality that everybody is stretched out into multiple 
directions
struggling to allocate time for multiple things. Having many people will
actually increase likelihood of introducing high quality improvements.

Also, +1 on separate tree for nvme emulation.

> 
>>
>>> Of course, the patches don't necessarily have to go through my tree
>>> either if this only serves to complicate things these days. If sending
>>> separate pull requests directly to Peter would make things easier, I
>>> certainly wouldn't object.
>>>
>>
>> I don't think there is any reason to by-pass your tree. I think the
>> volume would need to 

Re: [PATCH] linux-user/elfload: use MAP_FIXED in pgb_reserved_va

2020-07-02 Thread Richard Henderson
On 6/30/20 7:41 AM, Alex Bennée wrote:
> 
> Peter Maydell  writes:
> 
>> On Tue, 30 Jun 2020 at 11:36, Alex Bennée  wrote:
>>>
>>> Given we assert the requested address matches what we asked we should
>>> also make that clear in the mmap flags. Otherwise we see failures in
>>> the GitLab environment for some currently unknown but allowable
>>> reason.
>>
>> Adding MAP_FIXED will mean that instead of failing if there's
>> something else already at that address, the kernel will now
>> silently blow that away in favour of the new mapping. Is
>> that definitely what we want here ?
> 
> Hmm maybe not.

Definitely not.

> But hey I just noticed that we have MAP_FIXED_NOREPLACE
> (since Linux 4.17) which says:
> 
>This flag provides behavior that is similar  to  MAP_FIXED  with
>respect   to   the   addr   enforcement,  but  differs  in  that
>MAP_FIXED_NOREPLACE never clobbers a preexisting  mapped  range.
>If  the  requested range would collide with an existing mapping,
>then this call fails with  the  error  EEXIST.   This  flag  can
>therefore  be used as a way to atomically (with respect to other
>threads) attempt to map an address range: one thread  will  suc‐
>ceed; all others will report failure.
> 
>Note   that   older   kernels   which   do   not  recognize  the
>MAP_FIXED_NOREPLACE flag will typically (upon detecting a colli‐
>sion  with a preexisting mapping) fall back to a "non-MAP_FIXED"
>type of behavior: they will return an address that is  different
>from  the  requested  address.   Therefore,  backward-compatible
>software should check the returned address against the requested
>address.
> 
> So maybe that is what we should do?

Yes, that would be better, because those are the exact semantics that we want.
 Though it would be Really Nice to know what's up with gitlab...

> Now you've pointed that out I wonder if we need to fix
> pgd_find_hole_fallback as well?

Yes, that could benefit from MAP_FIXED_NOREPLACE.

I do think there's a way we could streamline the 32-on-64 case.  At present we
are groveling through /proc/self/maps, or mmaping+unmaping, and then mmaping.
Whereas we could just mmap once and be done -- it's the 32-on-32 case that
requires the song and dance.


r~



Re: [PATCH v2 0/2] tests/qht-bench: Adjust rate/threshold computation

2020-07-02 Thread Alex Bennée


Richard Henderson  writes:

> Supercedes: <20200620214551.447392-1-richard.hender...@linaro.org>
>
> Thanks for Emilio's review of v1.  I've split "seed" from "rate"
> as suggested, left the comparisons alone, and expanded the comment
> in do_threshold.

Queued to testing/next, thanks.

>
>
> r~
>
>
> Richard Henderson (2):
>   tests/qht-bench: Adjust testing rate by -1
>   tests/qht-bench: Adjust threshold computation
>
>  tests/qht-bench.c | 40 +++-
>  1 file changed, 31 insertions(+), 9 deletions(-)


-- 
Alex Bennée



Re: [PATCH v2 11/44] qemu-option: Replace opt_set() by cleaner opt_validate()

2020-07-02 Thread Vladimir Sementsov-Ogievskiy

02.07.2020 18:49, Markus Armbruster wrote:

opt_set() frees its argument @value on failure.  Slightly unclean;
functions ideally do nothing on failure.

To tidy this up, move opt_create() from opt_set() into its callers,
along with the cleanup.  Rename opt_set() to opt_validate(), noting
its similarity to qemu_opts_validate().  Drop redundant parameter
@opts; use opt->opts instead.

Signed-off-by: Markus Armbruster
Reviewed-by: Eric Blake


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir



[PATCH v2] SEV: QMP support for Inject-Launch-Secret

2020-07-02 Thread Tobin Feldman-Fitzthum
From: Tobin Feldman-Fitzthum 

AMD SEV allows a guest owner to inject a secret blob
into the memory of a virtual machine. The secret is
encrypted with the SEV Transport Encryption Key and
integrity is guaranteed with the Transport Integrity
Key. Although QEMU faciliates the injection of the
launch secret, it cannot access the secret.

Signed-off-by: Tobin Feldman-Fitzthum 
---
 include/monitor/monitor.h |  3 ++
 include/sysemu/sev.h  |  2 ++
 monitor/misc.c|  8 ++---
 qapi/misc-target.json | 18 +++
 target/i386/monitor.c |  9 ++
 target/i386/sev-stub.c|  5 +++
 target/i386/sev.c | 66 +++
 target/i386/sev_i386.h|  3 ++
 target/i386/trace-events  |  1 +
 9 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 1018d754a6..bf049c5b00 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -4,6 +4,7 @@
 #include "block/block.h"
 #include "qapi/qapi-types-misc.h"
 #include "qemu/readline.h"
+#include "include/exec/hwaddr.h"
 
 extern __thread Monitor *cur_mon;
 typedef struct MonitorHMP MonitorHMP;
@@ -36,6 +37,8 @@ void monitor_flush(Monitor *mon);
 int monitor_set_cpu(int cpu_index);
 int monitor_get_cpu_index(void);
 
+void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, uint64_t size, Error **errp);
+
 void monitor_read_command(MonitorHMP *mon, int show_prompt);
 int monitor_read_password(MonitorHMP *mon, ReadLineFunc *readline_func,
   void *opaque);
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 98c1ec8d38..b279b293e8 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -18,4 +18,6 @@
 
 void *sev_guest_init(const char *id);
 int sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len);
+int sev_inject_launch_secret(const char *hdr, const char *secret,
+ uint64_t gpa);
 #endif
diff --git a/monitor/misc.c b/monitor/misc.c
index 89bb970b00..b9ec8ba410 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -674,10 +674,10 @@ static void hmp_physical_memory_dump(Monitor *mon, const 
QDict *qdict)
 memory_dump(mon, count, format, size, addr, 1);
 }
 
-static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
+void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, uint64_t size, Error **errp)
 {
 MemoryRegionSection mrs = memory_region_find(get_system_memory(),
- addr, 1);
+ addr, size);
 
 if (!mrs.mr) {
 error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, 
addr);
@@ -701,7 +701,7 @@ static void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
 MemoryRegion *mr = NULL;
 void *ptr;
 
-ptr = gpa2hva(, addr, _err);
+ptr = gpa2hva(, addr, 1, _err);
 if (local_err) {
 error_report_err(local_err);
 return;
@@ -777,7 +777,7 @@ static void hmp_gpa2hpa(Monitor *mon, const QDict *qdict)
 void *ptr;
 uint64_t physaddr;
 
-ptr = gpa2hva(, addr, _err);
+ptr = gpa2hva(, addr, 1, _err);
 if (local_err) {
 error_report_err(local_err);
 return;
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index dee3b45930..d145f916b3 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -200,6 +200,24 @@
 { 'command': 'query-sev-capabilities', 'returns': 'SevCapability',
   'if': 'defined(TARGET_I386)' }
 
+##
+# @sev-inject-launch-secret:
+#
+# This command injects a secret blob into memory of SEV guest.
+#
+# @packet-header: the launch secret packet header encoded in base64
+#
+# @secret: the launch secret data to be injected encoded in base64
+#
+# @gpa: the guest physical address where secret will be injected.
+#
+# Since: 5.1
+#
+##
+{ 'command': 'sev-inject-launch-secret',
+  'data': { 'packet-header': 'str', 'secret': 'str', 'gpa': 'uint64' },
+  'if': 'defined(TARGET_I386)' }
+
 ##
 # @dump-skeys:
 #
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 27ebfa3ad2..42bcfe6dc0 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -736,3 +736,12 @@ SevCapability *qmp_query_sev_capabilities(Error **errp)
 
 return data;
 }
+
+void qmp_sev_inject_launch_secret(const char *packet_hdr,
+  const char *secret, uint64_t gpa,
+  Error **errp)
+{
+if (sev_inject_launch_secret(packet_hdr, secret, gpa) != 0) {
+error_setg(errp, "SEV inject secret failed");
+}
+}
diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
index e5ee13309c..fed4588185 100644
--- a/target/i386/sev-stub.c
+++ b/target/i386/sev-stub.c
@@ -48,3 +48,8 @@ SevCapability *sev_get_capabilities(void)
 {
 return NULL;
 }
+int sev_inject_launch_secret(const char *hdr, const char *secret,
+ uint64_t gpa)
+{
+return 1;
+}
diff --git a/target/i386/sev.c 

Re: [PATCH 0/4] hw/arm/virt-acpi-build: Only expose flash on older machine types

2020-07-02 Thread Auger Eric
Hi Drew,

On 6/29/20 4:09 PM, Andrew Jones wrote:
> The flash device is exclusively for the host-controlled firmware, so
> we should not expose it to the OS. Exposing it risks the OS messing
> with it, which could break firmware runtime services and surprise the
> OS when all its changes disappear after reboot.
> 
> This change was suggested by Ard and Laszlo.
> 
> Patch 3/4 is the meat. The other patches deal with updating qtest.
> 
> Thanks,
> drew
> 
> Andrew Jones (4):
>   tests/acpi: remove stale allowed tables
>   tests/acpi: virt: allow DSDT acpi table changes
>   hw/arm/virt-acpi-build: Only expose flash on older machine types
>   tests/acpi: virt: update golden masters for DSDT

SERIES:
Reviewed-by: Eric Auger 

Thanks

Eric

> 
>  hw/arm/virt-acpi-build.c|   5 -
>  hw/arm/virt.c   |   3 +++
>  include/hw/arm/virt.h   |   1 +
>  tests/data/acpi/virt/DSDT   | Bin 5307 -> 5205 bytes
>  tests/data/acpi/virt/DSDT.memhp | Bin 6668 -> 6566 bytes
>  tests/data/acpi/virt/DSDT.numamem   | Bin 5307 -> 5205 bytes
>  tests/qtest/bios-tables-test-allowed-diff.h |  18 --
>  7 files changed, 8 insertions(+), 19 deletions(-)
> 




Re: [PATCH v4 2/2] target/m68k: consolidate physical translation offset into get_physical_address()

2020-07-02 Thread Laurent Vivier
Le 01/07/2020 à 22:15, Mark Cave-Ayland a écrit :
> Since all callers to get_physical_address() now apply the same page offset to
> the translation result, move the logic into get_physical_address() itself to
> avoid duplication.
> 
> Suggested-by: Philippe Mathieu-Daudé 
> Signed-off-by: Mark Cave-Ayland 
> ---
>  target/m68k/helper.c | 17 ++---
>  1 file changed, 6 insertions(+), 11 deletions(-)
> 
> diff --git a/target/m68k/helper.c b/target/m68k/helper.c
> index 631eab7774..3ff5765795 100644
> --- a/target/m68k/helper.c
> +++ b/target/m68k/helper.c
> @@ -643,7 +643,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr 
> *physical,
>  /* Transparent Translation Register bit */
>  env->mmu.mmusr = M68K_MMU_T_040 | M68K_MMU_R_040;
>  }
> -*physical = address & TARGET_PAGE_MASK;
> +*physical = address;
>  *page_size = TARGET_PAGE_SIZE;
>  return 0;
>  }
> @@ -771,7 +771,7 @@ static int get_physical_address(CPUM68KState *env, hwaddr 
> *physical,
>  }
>  *page_size = 1 << page_bits;
>  page_mask = ~(*page_size - 1);
> -*physical = next & page_mask;
> +*physical = (next & page_mask) + (address & (*page_size - 1));
>  
>  if (access_type & ACCESS_PTEST) {
>  env->mmu.mmusr |= next & M68K_MMU_SR_MASK_040;
> @@ -826,8 +826,6 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cs, vaddr 
> addr)
>  return -1;
>  }
>  
> -addr &= TARGET_PAGE_MASK;
> -phys_addr += addr & (page_size - 1);
>  return phys_addr;
>  }
>  
> @@ -891,10 +889,8 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
>  ret = get_physical_address(>env, , ,
> address, access_type, _size);
>  if (likely(ret == 0)) {
> -address &= TARGET_PAGE_MASK;
> -physical += address & (page_size - 1);
> -tlb_set_page(cs, address, physical,
> - prot, mmu_idx, TARGET_PAGE_SIZE);
> +tlb_set_page(cs, address & TARGET_PAGE_MASK,
> + physical & TARGET_PAGE_MASK, prot, mmu_idx, page_size);
>  return true;
>  }
>  
> @@ -1383,9 +1379,8 @@ void HELPER(ptest)(CPUM68KState *env, uint32_t addr, 
> uint32_t is_read)
>  ret = get_physical_address(env, , , addr,
> access_type, _size);
>  if (ret == 0) {
> -addr &= TARGET_PAGE_MASK;
> -physical += addr & (page_size - 1);
> -tlb_set_page(env_cpu(env), addr, physical,
> +tlb_set_page(env_cpu(env), addr & TARGET_PAGE_MASK,
> + physical & TARGET_PAGE_MASK,
>   prot, access_type & ACCESS_SUPER ?
>   MMU_KERNEL_IDX : MMU_USER_IDX, page_size);
>  }
> 

Reviewed-by: Laurent Vivier 



Re: [PATCH] target/arm: Fix temp double-free in sve ldr/str

2020-07-02 Thread Philippe Mathieu-Daudé
On 7/2/20 7:56 PM, Richard Henderson wrote:
> The temp that gets assigned to clean_addr has been allocated with
> new_tmp_a64, which means that it will be freed at the end of the
> instruction.  Freeing it earlier leads to assertion failure.
> 
> The loop creates a complication, in which we allocate a new local
> temp, which does need freeing, and the final code path is shared
> between the loop and non-loop.
> 
> Fix this complication by adding new_tmp_a64_local so that the new
> local temp is freed at the end, and can be treated exactly like
> the non-loop path.
> 
> Fixes: bba87d0a0f4
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/translate-a64.h | 1 +
>  target/arm/translate-a64.c | 6 ++
>  target/arm/translate-sve.c | 8 ++--
>  3 files changed, 9 insertions(+), 6 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 



Re: [PATCH 1/6] migration: improve error reporting of block driver state name

2020-07-02 Thread Dr. David Alan Gilbert
* Eric Blake (ebl...@redhat.com) wrote:
> On 7/2/20 12:57 PM, Daniel P. Berrangé wrote:
> > With blockdev, a BlockDriverState may not have an device name,
> 
> s/an/a/
> 
> > so using a node name is required as an alternative.
> > 
> > Signed-off-by: Daniel P. Berrangé 
> > ---
> >   migration/savevm.c | 12 ++--
> >   1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> 
> Reviewed-by: Eric Blake 

Why don't you send this one to trivial.

Dave

> 
> -- 
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.   +1-919-301-3226
> Virtualization:  qemu.org | libvirt.org
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH v2 1/1] scripts/performance: Add dissect.py script

2020-07-02 Thread Ahmed Karaman
On Thu, Jul 2, 2020 at 5:45 PM Aleksandar Markovic
 wrote:
>
>
> A very good script! Hopefully there will be some script in near future that 
> will, for example, list all hepers used in the test program.
>
> Reviewed-by: Aleksandar Markovic 
>
>
Thanks Mr. Aleksandar. I Will start working on it.

Best regards,
Ahmed Karaman



Re: [PATCH v5 19/21] virtio-mem: Add trace events

2020-07-02 Thread Dr. David Alan Gilbert
* David Hildenbrand (da...@redhat.com) wrote:
> Let's add some trace events that might come in handy later.
> 
> Cc: "Michael S. Tsirkin" 
> Cc: "Dr. David Alan Gilbert" 
> Signed-off-by: David Hildenbrand 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  hw/virtio/trace-events | 10 ++
>  hw/virtio/virtio-mem.c | 10 +-
>  2 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 6427a0047d..292fc15e29 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -74,3 +74,13 @@ virtio_iommu_get_domain(uint32_t domain_id) "Alloc 
> domain=%d"
>  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
>  virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t 
> sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
>  virtio_iommu_report_fault(uint8_t reason, uint32_t flags, uint32_t endpoint, 
> uint64_t addr) "FAULT reason=%d flags=%d endpoint=%d address =0x%"PRIx64
> +
> +# virtio-mem.c
> +virtio_mem_send_response(uint16_t type) "type=%" PRIu16
> +virtio_mem_plug_request(uint64_t addr, uint16_t nb_blocks) "addr=0x%" PRIx64 
> " nb_blocks=%" PRIu16
> +virtio_mem_unplug_request(uint64_t addr, uint16_t nb_blocks) "addr=0x%" 
> PRIx64 " nb_blocks=%" PRIu16
> +virtio_mem_unplugged_all(void) ""
> +virtio_mem_unplug_all_request(void) ""
> +virtio_mem_resized_usable_region(uint64_t old_size, uint64_t new_size) 
> "old_size=0x%" PRIx64 "new_size=0x%" PRIx64
> +virtio_mem_state_request(uint64_t addr, uint16_t nb_blocks) "addr=0x%" 
> PRIx64 " nb_blocks=%" PRIu16
> +virtio_mem_state_response(uint16_t state) "state=%" PRIu16
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 6ed5409669..fdd4dbb42c 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -30,6 +30,7 @@
>  #include "hw/boards.h"
>  #include "hw/qdev-properties.h"
>  #include "config-devices.h"
> +#include "trace.h"
>  
>  /*
>   * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging
> @@ -100,6 +101,7 @@ static void virtio_mem_send_response(VirtIOMEM *vmem, 
> VirtQueueElement *elem,
>  VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
>  VirtQueue *vq = vmem->vq;
>  
> +trace_virtio_mem_send_response(le16_to_cpu(resp->type));
>  iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
>  
>  virtqueue_push(vq, elem, sizeof(*resp));
> @@ -195,6 +197,7 @@ static void virtio_mem_plug_request(VirtIOMEM *vmem, 
> VirtQueueElement *elem,
>  const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
>  uint16_t type;
>  
> +trace_virtio_mem_plug_request(gpa, nb_blocks);
>  type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
>  virtio_mem_send_response_simple(vmem, elem, type);
>  }
> @@ -206,6 +209,7 @@ static void virtio_mem_unplug_request(VirtIOMEM *vmem, 
> VirtQueueElement *elem,
>  const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
>  uint16_t type;
>  
> +trace_virtio_mem_unplug_request(gpa, nb_blocks);
>  type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
>  virtio_mem_send_response_simple(vmem, elem, type);
>  }
> @@ -225,6 +229,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM 
> *vmem,
>  return;
>  }
>  
> +trace_virtio_mem_resized_usable_region(vmem->usable_region_size, 
> newsize);
>  vmem->usable_region_size = newsize;
>  }
>  
> @@ -247,7 +252,7 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)
>  vmem->size = 0;
>  notifier_list_notify(>size_change_notifiers, >size);
>  }
> -
> +trace_virtio_mem_unplugged_all();
>  virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
>  return 0;
>  }
> @@ -255,6 +260,7 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)
>  static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
>VirtQueueElement *elem)
>  {
> +trace_virtio_mem_unplug_all_request();
>  if (virtio_mem_unplug_all(vmem)) {
>  virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
>  } else {
> @@ -272,6 +278,7 @@ static void virtio_mem_state_request(VirtIOMEM *vmem, 
> VirtQueueElement *elem,
>  .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
>  };
>  
> +trace_virtio_mem_state_request(gpa, nb_blocks);
>  if (!virtio_mem_valid_range(vmem, gpa, size)) {
>  virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
>  return;
> @@ -284,6 +291,7 @@ static void virtio_mem_state_request(VirtIOMEM *vmem, 
> VirtQueueElement *elem,
>  } else {
>  resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
>  }
> +trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
>  virtio_mem_send_response(vmem, elem, );
>  }
>  
> -- 
> 2.26.2
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH 0/6] migration: bring savevm/loadvm/delvm over to QMP

2020-07-02 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20200702175754.2211821-1-berra...@redhat.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

 (qemu) info snapshots
 No available block device supports snapshots
 (qemu) loadvm snap0
-Error: No block device supports snapshots
+Error: No block device can accept snapshots
 (qemu) quit
 
 
---
 Testing: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=file
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) savevm snap0
-Error: Device '' is writable but does not support snapshots
+Error: Device 'file' is writable but does not support snapshots
 (qemu) info snapshots
 No available block device supports snapshots
 (qemu) loadvm snap0
-Error: Device '' is writable but does not support snapshots
+Error: Device 'file' is writable but does not support snapshots
 (qemu) quit
 
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
---
Not run: 259
Failures: 267
Failed 1 of 119 iotests
make: *** [check-tests/check-block.sh] Error 1
make: *** Waiting for unfinished jobs
  TESTcheck-qtest-aarch64: tests/qtest/test-hmp
  TESTcheck-qtest-aarch64: tests/qtest/qos-test
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=80ecabfdc3b44cdabd915b427d791afe', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-vlubojb7/src/docker-src.2020-07-02-14.52.56.31106:/var/tmp/qemu:z,ro',
 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=80ecabfdc3b44cdabd915b427d791afe
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-vlubojb7/src'
make: *** [docker-run-test-quick@centos7] Error 2

real14m49.668s
user0m8.931s


The full log is available at
http://patchew.org/logs/20200702175754.2211821-1-berra...@redhat.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v2 03/44] qdev: Use returned bool to check for qdev_realize() etc. failure

2020-07-02 Thread Vladimir Sementsov-Ogievskiy

02.07.2020 18:49, Markus Armbruster wrote:

Convert

 foo(..., );
 if (err) {
 ...
 }

to

 if (!foo(..., )) {
 ...
 }

for qdev_realize(), qdev_realize_and_unref(), qbus_realize() and their
wrappers isa_realize_and_unref(), pci_realize_and_unref(),
sysbus_realize(), sysbus_realize_and_unref(), usb_realize_and_unref().
Coccinelle script:

 @@
 identifier fun = {isa_realize_and_unref, pci_realize_and_unref, 
qbus_realize, qdev_realize, qdev_realize_and_unref, sysbus_realize, 
sysbus_realize_and_unref, usb_realize_and_unref};
 expression list args, args2;
 typedef Error;
 Error *err;
 @@
 -fun(args, , args2);
 -if (err)
 +if (!fun(args, , args2))
  {
  ...
  }

Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error
message "no position information".  Nothing to convert there; skipped.

Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by
ARMSSE being used both as typedef and function-like macro there.
Converted manually.

A few line breaks tidied up manually.

Signed-off-by: Markus Armbruster



Sorry me, reviewing this patch with help of script:
#!/usr/bin/env python3

import sys
import re

with open(sys.argv[1]) as f:
patch = f.read()

regex = re.compile(r'^- *(?P(?P\w+)\(.*, &(?P\w+)\));\n'
   r'^- *if \((?P=err)( != NULL)?\) \{\n'
   r'^\+ *if \(!(?P=func_call)\) \{$', flags=re.MULTILINE)

for chunk in re.split('^@', patch, flags=re.MULTILINE):
filtered = regex.sub('OK', chunk)

if re.search('^[+-][^+-]', filtered, flags=re.MULTILINE):
print(re.sub('^', '   ', '@' + chunk, flags=re.MULTILINE))


funcs = set()

for m in regex.finditer(patch):
funcs.add(m.group('func'))

print()
for func in funcs:
print(func)



output:

   @@ -34,9 +34,7 @@ static void virtio_gpu_pci_base_realize(VirtIOPCIProxy 
*vpci_dev, Error **errp)
Error *local_error = NULL;

virtio_pci_force_virtio_1(vpci_dev);

   -qdev_realize(vdev, BUS(_dev->bus), _error);
   -
   -if (local_error) {
   +if (!qdev_realize(vdev, BUS(_dev->bus), _error)) {
error_propagate(errp, local_error);
return;
}
   diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
   index 67f409e106..0fc00fee1f 100644
   --- a/hw/display/virtio-vga.c
   +++ b/hw/display/virtio-vga.c
   
   @@ -444,15 +444,13 @@ static void realize_event_facility(DeviceState *dev, Error **errp)

SCLPEventFacility *event_facility = EVENT_FACILITY(dev);
Error *local_err = NULL;

   -qdev_realize(DEVICE(_facility->quiesce),

   - BUS(_facility->sbus), _err);
   -if (local_err) {
   +if (!qdev_realize(DEVICE(_facility->quiesce),
   +  BUS(_facility->sbus), _err)) {
error_propagate(errp, local_err);
return;
}
   -qdev_realize(DEVICE(_facility->cpu_hotplug),
   - BUS(_facility->sbus), _err);
   -if (local_err) {
   +if (!qdev_realize(DEVICE(_facility->cpu_hotplug),
   +  BUS(_facility->sbus), _err)) {
error_propagate(errp, local_err);
qdev_unrealize(DEVICE(_facility->quiesce));
return;
   diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
   index 142e52a8ff..0517901024 100644
   --- a/hw/s390x/s390-pci-bus.c
   +++ b/hw/s390x/s390-pci-bus.c
   


usb_realize_and_unref
sysbus_realize
sysbus_realize_and_unref
qdev_realize
qdev_realize_and_unref

===

So, the remaning non-matching seems correct, and all found functions seems to 
have corresponding semantics:

Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir



Re: [PATCH 0/6] migration: bring savevm/loadvm/delvm over to QMP

2020-07-02 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20200702175754.2211821-1-berra...@redhat.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

 (qemu) info snapshots
 No available block device supports snapshots
 (qemu) loadvm snap0
-Error: No block device supports snapshots
+Error: No block device can accept snapshots
 (qemu) quit
 
 
---
 Testing: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=file
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) savevm snap0
-Error: Device '' is writable but does not support snapshots
+Error: Device 'file' is writable but does not support snapshots
 (qemu) info snapshots
 No available block device supports snapshots
 (qemu) loadvm snap0
-Error: Device '' is writable but does not support snapshots
+Error: Device 'file' is writable but does not support snapshots
 (qemu) quit
 
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
---
Not run: 259
Failures: 267
Failed 1 of 119 iotests
make: *** [check-tests/check-block.sh] Error 1
make: *** Waiting for unfinished jobs
  TESTcheck-qtest-aarch64: tests/qtest/qos-test
Traceback (most recent call last):
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=d415a115078246e4ab99c8a4d27787e7', '-u', 
'1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-vi4bosi3/src/docker-src.2020-07-02-14.36.29.6528:/var/tmp/qemu:z,ro',
 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=d415a115078246e4ab99c8a4d27787e7
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-vi4bosi3/src'
make: *** [docker-run-test-quick@centos7] Error 2

real16m29.279s
user0m8.998s


The full log is available at
http://patchew.org/logs/20200702175754.2211821-1-berra...@redhat.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH 3/3] target/nios2: Use gen_io_start around wrctl instruction

2020-07-02 Thread Richard Henderson
On 6/29/20 9:05 AM, Wentong Wu wrote:
> wrctl instruction on nios2 target will cause checking cpu
> interrupt but tcg_handle_interrupt() will call cpu_abort()
> if the CPU gets an interrupt while it's not in 'can do IO'
> state, so add gen_io_start around wrctl instruction. Also
> at the same time, end the onging TB with DISAS_UPDATE.
> 
> Signed-off-by: Wentong Wu 
> ---
>  target/nios2/translate.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/target/nios2/translate.c b/target/nios2/translate.c
> index 83c10eb2..51347ada 100644
> --- a/target/nios2/translate.c
> +++ b/target/nios2/translate.c
> @@ -32,6 +32,7 @@
>  #include "exec/cpu_ldst.h"
>  #include "exec/translator.h"
>  #include "qemu/qemu-print.h"
> +#include "exec/gen-icount.h"
>  
>  /* is_jmp field values */
>  #define DISAS_JUMPDISAS_TARGET_0 /* only pc was modified dynamically */
> @@ -518,7 +519,11 @@ static void wrctl(DisasContext *dc, uint32_t code, 
> uint32_t flags)
>  /* If interrupts were enabled using WRCTL, trigger them. */
>  #if !defined(CONFIG_USER_ONLY)
>  if ((instr.imm5 + CR_BASE) == CR_STATUS) {
> +if (tb_cflags(dc->tb) & CF_USE_ICOUNT) {
> +gen_io_start();
> +}
>  gen_helper_check_interrupts(dc->cpu_env);
> +dc->is_jmp = DISAS_UPDATE;
>  }
>  #endif

This isn't right.  Not so much the gen_io_start portion, but the entire
existence of helper_check_interrupt.

The correct way to acknowledge interrupts after changing an interrupt mask bit
is to exit the TB back to the cpu main loop.
Which you are doing here with DISAS_UPDATE, so that part is fine.  (Although
you could merge that into the switch statement above.)

Looking at nios_pic_cpu_handler, there are two other bugs:

1) Get rid of env->irq_pending and use cpu_interrupt/cpu_reset_interrupt 
instead.

2) Do not check env->regs[CR_STATUS] & CR_STATUS_PIE.  That variable does not
belong to the pic and should not be checked there.  The check belongs in
nios2_cpu_exec_interrupt, and is in fact already there.


r~



Re: [PATCH v3 14/19] .mailmap: Display correct email address for Michael Tokarev

2020-07-02 Thread Michael Tokarev
02.07.2020 20:38, Philippe Mathieu-Daudé wrote:
> Fix Michael Tokarev email address in the following commits:
> 
>   $ git log --committer=mjt@gandalf --oneline

Heh. It was a misconfiguration on my part which I haven't noticed
for quite some time.  Dunno if it's worth the effort to fix that
now, since all the commits are old (in 2016), and all are trivial.

/mjt



Re: [PATCH 1/6] migration: improve error reporting of block driver state name

2020-07-02 Thread Eric Blake

On 7/2/20 12:57 PM, Daniel P. Berrangé wrote:

With blockdev, a BlockDriverState may not have an device name,


s/an/a/


so using a node name is required as an alternative.

Signed-off-by: Daniel P. Berrangé 
---
  migration/savevm.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)



Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 17/17] Replace uses of FROM_SSI_SLAVE() macro with QOM casts

2020-07-02 Thread Alistair Francis
On Sun, Jun 28, 2020 at 7:37 AM Peter Maydell  wrote:
>
> The FROM_SSI_SLAVE() macro predates QOM and is used as a typesafe way
> to cast from an SSISlave* to the instance struct of a subtype of
> TYPE_SSI_SLAVE.  Switch to using the QOM cast macros instead, which
> have the same effect (by writing the QOM macros if the types were
> previously missing them.)
>
> (The FROM_SSI_SLAVE() macro allows the SSISlave member of the
> subtype's struct to be anywhere as long as it is named "ssidev",
> whereas a QOM cast macro insists that it is the first thing in the
> subtype's struct.  This is true for all the types we convert here.)
>
> This removes all the uses of FROM_SSI_SLAVE() so we can delete the
> definition.
>
> Signed-off-by: Peter Maydell 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  include/hw/ssi/ssi.h |  2 --
>  hw/arm/z2.c  | 11 +++
>  hw/display/ads7846.c |  9 ++---
>  hw/display/ssd0323.c | 10 +++---
>  hw/sd/ssi-sd.c   |  4 ++--
>  5 files changed, 22 insertions(+), 14 deletions(-)
>
> diff --git a/include/hw/ssi/ssi.h b/include/hw/ssi/ssi.h
> index 5fd411f2e4e..eac168aa1db 100644
> --- a/include/hw/ssi/ssi.h
> +++ b/include/hw/ssi/ssi.h
> @@ -66,8 +66,6 @@ struct SSISlave {
>  bool cs;
>  };
>
> -#define FROM_SSI_SLAVE(type, dev) DO_UPCAST(type, ssidev, dev)
> -
>  extern const VMStateDescription vmstate_ssi_slave;
>
>  #define VMSTATE_SSI_SLAVE(_field, _state) {  \
> diff --git a/hw/arm/z2.c b/hw/arm/z2.c
> index a0f40959904..e1f22f58681 100644
> --- a/hw/arm/z2.c
> +++ b/hw/arm/z2.c
> @@ -111,9 +111,12 @@ typedef struct {
>  int pos;
>  } ZipitLCD;
>
> +#define TYPE_ZIPIT_LCD "zipit-lcd"
> +#define ZIPIT_LCD(obj) OBJECT_CHECK(ZipitLCD, (obj), TYPE_ZIPIT_LCD)
> +
>  static uint32_t zipit_lcd_transfer(SSISlave *dev, uint32_t value)
>  {
> -ZipitLCD *z = FROM_SSI_SLAVE(ZipitLCD, dev);
> +ZipitLCD *z = ZIPIT_LCD(dev);
>  uint16_t val;
>  if (z->selected) {
>  z->buf[z->pos] = value & 0xff;
> @@ -153,7 +156,7 @@ static void z2_lcd_cs(void *opaque, int line, int level)
>
>  static void zipit_lcd_realize(SSISlave *dev, Error **errp)
>  {
> -ZipitLCD *z = FROM_SSI_SLAVE(ZipitLCD, dev);
> +ZipitLCD *z = ZIPIT_LCD(dev);
>  z->selected = 0;
>  z->enabled = 0;
>  z->pos = 0;
> @@ -185,7 +188,7 @@ static void zipit_lcd_class_init(ObjectClass *klass, void 
> *data)
>  }
>
>  static const TypeInfo zipit_lcd_info = {
> -.name  = "zipit-lcd",
> +.name  = TYPE_ZIPIT_LCD,
>  .parent= TYPE_SSI_SLAVE,
>  .instance_size = sizeof(ZipitLCD),
>  .class_init= zipit_lcd_class_init,
> @@ -325,7 +328,7 @@ static void z2_init(MachineState *machine)
>
>  type_register_static(_lcd_info);
>  type_register_static(_info);
> -z2_lcd = ssi_create_slave(mpu->ssp[1], "zipit-lcd");
> +z2_lcd = ssi_create_slave(mpu->ssp[1], TYPE_ZIPIT_LCD);
>  bus = pxa2xx_i2c_bus(mpu->i2c[0]);
>  i2c_create_slave(bus, TYPE_AER915, 0x55);
>  wm = i2c_create_slave(bus, TYPE_WM8750, 0x1b);
> diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c
> index 9228b40b1af..56bf82fe079 100644
> --- a/hw/display/ads7846.c
> +++ b/hw/display/ads7846.c
> @@ -29,6 +29,9 @@ typedef struct {
>  int output;
>  } ADS7846State;
>
> +#define TYPE_ADS7846 "ads7846"
> +#define ADS7846(obj) OBJECT_CHECK(ADS7846State, (obj), TYPE_ADS7846)
> +
>  /* Control-byte bitfields */
>  #define CB_PD0 (1 << 0)
>  #define CB_PD1 (1 << 1)
> @@ -61,7 +64,7 @@ static void ads7846_int_update(ADS7846State *s)
>
>  static uint32_t ads7846_transfer(SSISlave *dev, uint32_t value)
>  {
> -ADS7846State *s = FROM_SSI_SLAVE(ADS7846State, dev);
> +ADS7846State *s = ADS7846(dev);
>
>  switch (s->cycle ++) {
>  case 0:
> @@ -139,7 +142,7 @@ static const VMStateDescription vmstate_ads7846 = {
>  static void ads7846_realize(SSISlave *d, Error **errp)
>  {
>  DeviceState *dev = DEVICE(d);
> -ADS7846State *s = FROM_SSI_SLAVE(ADS7846State, d);
> +ADS7846State *s = ADS7846(d);
>
>  qdev_init_gpio_out(dev, >interrupt, 1);
>
> @@ -166,7 +169,7 @@ static void ads7846_class_init(ObjectClass *klass, void 
> *data)
>  }
>
>  static const TypeInfo ads7846_info = {
> -.name  = "ads7846",
> +.name  = TYPE_ADS7846,
>  .parent= TYPE_SSI_SLAVE,
>  .instance_size = sizeof(ADS7846State),
>  .class_init= ads7846_class_init,
> diff --git a/hw/display/ssd0323.c b/hw/display/ssd0323.c
> index c3bdb18742c..32d27f008ae 100644
> --- a/hw/display/ssd0323.c
> +++ b/hw/display/ssd0323.c
> @@ -66,9 +66,13 @@ typedef struct {
>  uint8_t framebuffer[128 * 80 / 2];
>  } ssd0323_state;
>
> +#define TYPE_SSD0323 "ssd0323"
> +#define SSD0323(obj) OBJECT_CHECK(ssd0323_state, (obj), TYPE_SSD0323)
> +
> +
>  static uint32_t ssd0323_transfer(SSISlave *dev, uint32_t data)
>  {
> -ssd0323_state *s = FROM_SSI_SLAVE(ssd0323_state, dev);
> +

Re: [PATCH v2 37/44] error: Reduce unnecessary error propagation

2020-07-02 Thread Eric Blake

On 7/2/20 10:49 AM, Markus Armbruster wrote:

When all we do with an Error we receive into a local variable is
propagating to somewhere else, we can just as well receive it there
right away, even when we need to keep error_propagate() for other
error paths.

Signed-off-by: Markus Armbruster 
---



+++ b/block/replication.c
@@ -85,7 +85,6 @@ static int replication_open(BlockDriverState *bs, QDict 
*options,
  {
  int ret;
  BDRVReplicationState *s = bs->opaque;
-Error *local_err = NULL;
  QemuOpts *opts = NULL;
  const char *mode;
  const char *top_id;
@@ -99,7 +98,7 @@ static int replication_open(BlockDriverState *bs, QDict 
*options,
  
  ret = -EINVAL;

  opts = qemu_opts_create(_runtime_opts, NULL, 0, _abort);
-if (!qemu_opts_absorb_qdict(opts, options, _err)) {
+if (!qemu_opts_absorb_qdict(opts, options, errp)) {
  goto fail;
  }


Does this one belong in 36/44, given that removal of 'local_err' is 
evidence that no other error path needed it?


Either way, it belongs in the series, and the result of the two patches 
together is fine.


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 2/3] target/nios2: in line the semantics of DISAS_UPDATE with other targets

2020-07-02 Thread Richard Henderson
On 7/2/20 11:14 AM, Richard Henderson wrote:
> On 6/29/20 9:05 AM, Wentong Wu wrote:
>> In line the semantics of DISAS_UPDATE on nios2 target with other targets
>> which is to explicitly write the PC back into the cpu state before doing
>> a tcg_gen_exit_tb().
>>
>> Signed-off-by: Wentong Wu 
>> ---
>>  target/nios2/translate.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Or simply remove it as unused, now that you've replaced the existing users 
> with
> DISAS_NORETURN.

Nevermind, you're using it in the next patch.

Reviewed-by: Richard Henderson 

r~



Re: [PATCH v2 36/44] error: Eliminate error_propagate() manually

2020-07-02 Thread Eric Blake

On 7/2/20 10:49 AM, Markus Armbruster wrote:

When all we do with an Error we receive into a local variable is
propagating to somewhere else, we can just as well receive it there
right away.  The previous two commits did that for sufficiently simple
cases with Coccinelle.  Do it for several more manually.

Signed-off-by: Markus Armbruster 
---



+++ b/qdev-monitor.c
@@ -597,7 +597,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
  const char *driver, *path;
  DeviceState *dev = NULL;
  BusState *bus = NULL;
-Error *err = NULL;
  bool hide;
  
  driver = qemu_opt_get(opts, "driver");

@@ -652,15 +651,14 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
  dev = qdev_new(driver);
  
  /* Check whether the hotplug is allowed by the machine */

-if (qdev_hotplug && !qdev_hotplug_allowed(dev, )) {
+if (qdev_hotplug && !qdev_hotplug_allowed(dev, errp)) {
  /* Error must be set in the machine hook */
-assert(err);


That comment could be deleted now.

Either way,
Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 2/6] migration: introduce savevm, loadvm, delvm QMP commands

2020-07-02 Thread Daniel P . Berrangé
On Thu, Jul 02, 2020 at 01:12:52PM -0500, Eric Blake wrote:
> On 7/2/20 12:57 PM, Daniel P. Berrangé wrote:
> > savevm, loadvm and delvm are some of the few commands that have never
> > been converted to use QMP. The primary reason for this lack of
> > conversion is that they block execution of the thread for as long as
> > they run.
> > 
> > Despite this downside, however, libvirt and applications using libvirt
> > has used these commands for as long as QMP has existed, via the
> > "human-monitor-command" passthrough command. IOW, while it is clearly
> > desirable to be able to fix the blocking problem, this is not an
> > immediate obstacle to real world usage.
> > 
> > Meanwhile there is a need for other features which involve adding new
> > parameters to the commands. This is possible with HMP passthrough, but
> > it provides no reliable way for apps to introspect features, so using
> > QAPI modelling is highly desirable.
> > 
> > This patch thus introduces trival savevm, loadvm, delvm commands
> 
> trivial
> 
> > to QMP that are functionally identical to the HMP counterpart, including
> > the blocking problem.
> 
> Should we name them 'x-savevm', 'x-loadvm', 'x-delvm' to give ourselves room
> to change them when we DO solve the blocking issue?  Or will the solution of
> the blocking issue introduce new QMP commands, at which point we can add QMP
> deprecation markers on these commands to eventually retire them?

I was in two minds about this, so I'm open to arguments either way.

The primary goal is for libvirt to consume the APIs as soon as possible,
and generally libvirt doesn't want todo this is they are declared experimental
via a "x-" prefix. So that pushes me away from "x-".

If we don't have an "x-" prefix and want to make changes, we can add extra
parameters to trigger new behaviour in backwards compatible manner. Or we can
simply deprecate these commands, deleting them 2 releases later, while adding
completely new commands.

If we think the prposed design will definitely need incompatible changes in
a very short time frame though, that would push towards "x-".

So IMHO the right answer largely depends on whether there is a credible
strategy to implement the ideal non-blocking solution in a reasonable amount
of time. I can't justify spending much time on this myself, but I'm willing
to consider & try proposals for solving the blocking problem if they're not
too complex / invasive.

I just don't want to end up having a "x-savevm" command for another 10 years,
waiting for a perfect solution that never arrives because people always have
higher priority items, as apps are clearly able to accept the blocking problem
if the alternative is no snapshots at all.


> > +
> > +##
> > +# @savevm:
> > +#
> > +# Save a VM snapshot
> > +#
> > +# @tag: name of the snapshot to create. If it already
> > +# exists it will be replaced.
> > +#
> > +# Note that execution of the VM will be paused during the time
> > +# it takes to save the snapshot
> > +#
> > +# Returns: nothing
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "savevm",
> > +#  "data": {
> > +# "tag": "my-snap"
> > +#  }
> > +#}
> > +# <- { "return": { } }
> > +#
> > +# Since: 5.2
> 
> I guess you are NOT trying to make 5.1 soft freeze next week?

Correct. It is unrealistic to consider this for soft freeze.

I'd really love to have a solution in 5.2 though, even if it doesn't
solve all our problems. Something that can at least unblock apps that
want to use OVMF with internal snapshots today.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH 6/6] migration: support picking vmstate disk in QMP snapshot commands

2020-07-02 Thread Eric Blake

On 7/2/20 12:57 PM, Daniel P. Berrangé wrote:

This wires up support for a new "vmstate" parameter to the QMP commands
for snapshots (savevm, loadvm). This parameter accepts block driver
state node name.

One use case for this would be a VM using OVMF firmware where the
variables store is the first snapshottable disk image. The vmstate
snapshot usually wants to be stored in the primary root disk of the
VM, not the firmeware varstore. Thus there needs to be a mechanism


firmware


to override the default choice of disk.

Signed-off-by: Daniel P. Berrangé 
---



+++ b/qapi/migration.json
@@ -1630,6 +1630,7 @@
  # @tag: name of the snapshot to create. If it already
  # exists it will be replaced.
  # @exclude: list of block device node names to exclude
+# @vmstate: block device node name to save vmstate to
  #
  # Note that execution of the VM will be paused during the time
  # it takes to save the snapshot
@@ -1641,6 +1642,7 @@
  # -> { "execute": "savevm",
  #  "data": {
  # "tag": "my-snap",
+# "vmstate": "disk0",
  # "exclude": ["pflash0-vars"]
  #  }
  #}
@@ -1650,6 +1652,7 @@
  ##
  { 'command': 'savevm',
'data': { 'tag': 'str',
+'*vmstate': 'str',
  '*exclude': ['str'] } }


During save, the list of block devices is obvious: everything that is 
not excluded.  But,


  
  ##

@@ -1659,6 +1662,7 @@
  #
  # @tag: name of the snapshot to load.
  # @exclude: list of block device node names to exclude
+# @vmstate: block device node name to load vmstate from
  #
  # Returns: nothing
  #
@@ -1667,6 +1671,7 @@
  # -> { "execute": "loadvm",
  #  "data": {
  # "tag": "my-snap",
+# "vmstate": "disk0",
  # "exclude": ["pflash0-vars"]
  #  }
  #}
@@ -1676,6 +1681,7 @@
  ##
  { 'command': 'loadvm',
'data': { 'tag': 'str',
+'*vmstate': 'str',
  '*exclude': ['str'] } }


...now that we support exclusion during saving, or even without 
exclusion but when the user has performed hotplug/unplug operations in 
the meantime from when the snapshot was created, isn't load better off 
listing all devices which SHOULD be restored, rather than excluding 
devices that should NOT be restored?  (After all, libvirt knows which 
disks existed at the time of the snapshot, which may be different than 
the set of disks that exist now even though we are throwing out the 
state now to go back to the state at the time of the snapshot)


Then there's the question of symmetry: if load needs an explicit list of 
blocks to load from (rather than the set of blocks that are currently 
associated with the machine), should save also take its list by positive 
inclusion rather than negative exclusion?


And why does delvm not need to specify which block is the vmstate? 
delvm is in the same boat as loadvm - the set of blocks involved at the 
time of the snapshot creation may be different than the set of blocks 
currently associated with the guest at the time you run load/delvm.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




  1   2   3   4   5   6   >