[PATCH v16 11/14] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s)

2019-11-14 Thread Tao Xu
From: Liu Jingqi 

This structure describes the memory access latency and bandwidth
information from various memory access initiator proximity domains.
The latency and bandwidth numbers represented in this structure
correspond to rated latency and bandwidth for the platform.
The software could use this information as hint for optimization.

Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

Changes in v16:
- Add more description for lb_length (Igor)
- Drop entry_list and calculate entries in this patch (Igor)

Changes in v13:
- Calculate the entries in a new patch.
---
 hw/acpi/hmat.c | 105 -
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 9ff79308a4..ed19ebed2f 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -25,8 +25,10 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "sysemu/numa.h"
 #include "hw/acpi/hmat.h"
+#include "qemu/error-report.h"
 
 /*
  * ACPI 6.3:
@@ -67,11 +69,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t 
flags,
 build_append_int_noprefix(table_data, 0, 8);
 }
 
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+  uint32_t num_initiator, uint32_t num_target,
+  uint32_t *initiator_list)
+{
+int i, index;
+HMAT_LB_Data *lb_data;
+uint16_t *entry_list;
+uint32_t base;
+/* Length in bytes for entire structure */
+uint32_t lb_length
+= 32 /* Table length upto and including Entry Base Unit */
++ 4 * num_initiator /* Initiator Proximity Domain List */
++ 4 * num_target /* Target Proximity Domain List */
++ 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */
+
+/* Type */
+build_append_int_noprefix(table_data, 1, 2);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Length */
+build_append_int_noprefix(table_data, lb_length, 4);
+/* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+assert(!(hmat_lb->hierarchy >> 4));
+build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1);
+/* Data Type */
+build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 2);
+/* Number of Initiator Proximity Domains (s) */
+build_append_int_noprefix(table_data, num_initiator, 4);
+/* Number of Target Proximity Domains (t) */
+build_append_int_noprefix(table_data, num_target, 4);
+/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);
+
+/* Entry Base Unit */
+if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) {
+/* Convert latency base from nanoseconds to picosecond */
+base = hmat_lb->base * 1000;
+} else {
+/* Convert bandwidth base from Byte to Megabyte */
+base = hmat_lb->base / MiB;
+}
+build_append_int_noprefix(table_data, base, 8);
+
+/* Initiator Proximity Domain List */
+for (i = 0; i < num_initiator; i++) {
+build_append_int_noprefix(table_data, initiator_list[i], 4);
+}
+
+/* Target Proximity Domain List */
+for (i = 0; i < num_target; i++) {
+build_append_int_noprefix(table_data, i, 4);
+}
+
+/* Latency or Bandwidth Entries */
+entry_list = g_malloc0(hmat_lb->list->len * sizeof(uint16_t));
+for (i = 0; i < hmat_lb->list->len; i++) {
+lb_data = _array_index(hmat_lb->list, HMAT_LB_Data, i);
+index = lb_data->initiator * num_target + lb_data->target;
+
+entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base);
+}
+
+for (i = 0; i < num_initiator * num_target; i++) {
+build_append_int_noprefix(table_data, entry_list[i], 2);
+}
+
+g_free(entry_list);
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 {
 uint16_t flags;
-int i;
+uint32_t num_initiator = 0;
+uint32_t initiator_list[MAX_NODES];
+int i, hierarchy, type;
+HMAT_LB_Info *hmat_lb;
 
 for (i = 0; i < numa_state->num_nodes; i++) {
 flags = 0;
@@ -82,6 +162,29 @@ static void hmat_build_table_structs(GArray *table_data, 
NumaState *numa_state)
 
 build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i);
 }
+
+for (i = 0; i < numa_state->num_nodes; i++) {
+if (numa_state->nodes[i].has_cpu) {
+initiator_list[num_initiator++] = i;
+}
+}
+
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+for (hierarchy = HMAT_LB_MEM_MEMORY;
+ hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) {
+for (type = HMAT_LB_DATA_ACCESS_LATENCY;
+ type <= 

[PATCH v16 06/14] tests: Add test for QAPI builtin type time

2019-11-14 Thread Tao Xu
Add tests for time input such as zero, around limit of precision,
signed upper limit, actual upper limit, beyond limits, time suffixes,
and etc.

Signed-off-by: Tao Xu 
---

Changes in v16:
- Update the test cases

Changes in v14:
- Drop time unit picosecond (Eric)
---
 tests/test-keyval.c| 89 ++
 tests/test-qobject-input-visitor.c | 29 ++
 2 files changed, 118 insertions(+)

diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index fad941fcb8..39443f7e0c 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -457,6 +457,94 @@ static void test_keyval_visit_size(void)
 visit_free(v);
 }
 
+static void test_keyval_visit_time(void)
+{
+Error *err = NULL;
+Visitor *v;
+QDict *qdict;
+uint64_t time;
+
+/* Lower limit zero */
+qdict = keyval_parse("time1=0", NULL, _abort);
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+qobject_unref(qdict);
+visit_start_struct(v, NULL, NULL, 0, _abort);
+visit_type_time(v, "time1", , _abort);
+g_assert_cmpuint(time, ==, 0);
+visit_check_struct(v, _abort);
+visit_end_struct(v, NULL);
+visit_free(v);
+
+/* Around limit of precision: UINT64_MAX - 1, UINT64_MAX */
+qdict = keyval_parse("time1=18446744073709551614,"
+ "time2=18446744073709551615",
+ NULL, _abort);
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+qobject_unref(qdict);
+visit_start_struct(v, NULL, NULL, 0, _abort);
+visit_type_time(v, "time1", , _abort);
+g_assert_cmphex(time, ==, 0xfffe);
+visit_type_time(v, "time2", , _abort);
+g_assert_cmphex(time, ==, 0x);
+visit_check_struct(v, _abort);
+visit_end_struct(v, NULL);
+visit_free(v);
+
+/* Beyond limits */
+qdict = keyval_parse("time1=-1,"
+ "time2=18446744073709551616", /* 2^64 */
+ NULL, _abort);
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+qobject_unref(qdict);
+visit_start_struct(v, NULL, NULL, 0, _abort);
+visit_type_time(v, "time1", , );
+error_free_or_abort();
+visit_type_time(v, "time2", , );
+error_free_or_abort();
+visit_end_struct(v, NULL);
+visit_free(v);
+
+/* Suffixes */
+qdict = keyval_parse("time1=2ns,time2=3.4us,time3=5ms,time4=600s",
+ NULL, _abort);
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+qobject_unref(qdict);
+visit_start_struct(v, NULL, NULL, 0, _abort);
+visit_type_time(v, "time1", , _abort);
+g_assert_cmpuint(time, ==, 2);
+visit_type_time(v, "time2", , _abort);
+g_assert_cmpuint(time, ==, 3400);
+visit_type_time(v, "time3", , _abort);
+g_assert_cmphex(time, ==, 5 * 1000 * 1000);
+visit_type_time(v, "time4", , _abort);
+g_assert_cmphex(time, ==, 600 * 10LL);
+visit_check_struct(v, _abort);
+visit_end_struct(v, NULL);
+visit_free(v);
+
+/* Beyond limit with suffix */
+qdict = keyval_parse("time1=1844674407370955s", NULL, _abort);
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+qobject_unref(qdict);
+visit_start_struct(v, NULL, NULL, 0, _abort);
+visit_type_time(v, "time1", , );
+error_free_or_abort();
+visit_end_struct(v, NULL);
+visit_free(v);
+
+/* Trailing crap */
+qdict = keyval_parse("time1=89ks,time2=ns", NULL, _abort);
+v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+qobject_unref(qdict);
+visit_start_struct(v, NULL, NULL, 0, _abort);
+visit_type_time(v, "time1", , );
+error_free_or_abort();
+visit_type_time(v, "time2", , );;
+error_free_or_abort();
+visit_end_struct(v, NULL);
+visit_free(v);
+}
+
 static void test_keyval_visit_dict(void)
 {
 Error *err = NULL;
@@ -645,6 +733,7 @@ int main(int argc, char *argv[])
 g_test_add_func("/keyval/visit/bool", test_keyval_visit_bool);
 g_test_add_func("/keyval/visit/number", test_keyval_visit_number);
 g_test_add_func("/keyval/visit/size", test_keyval_visit_size);
+g_test_add_func("/keyval/visit/time", test_keyval_visit_time);
 g_test_add_func("/keyval/visit/dict", test_keyval_visit_dict);
 g_test_add_func("/keyval/visit/list", test_keyval_visit_list);
 g_test_add_func("/keyval/visit/optional", test_keyval_visit_optional);
diff --git a/tests/test-qobject-input-visitor.c 
b/tests/test-qobject-input-visitor.c
index 6bacabf063..55138042b8 100644
--- a/tests/test-qobject-input-visitor.c
+++ b/tests/test-qobject-input-visitor.c
@@ -366,6 +366,31 @@ static void 
test_visitor_in_size_str_fail(TestInputVisitorData *data,
 error_free_or_abort();
 }
 
+static void test_visitor_in_time_str_keyval(TestInputVisitorData *data,
+const void *unused)
+{
+uint64_t res, value = 265 * 1000 * 1000;
+Visitor *v;
+
+v = visitor_input_test_init_full(data, true, 

[PATCH v16 05/14] qapi: Add builtin type time

2019-11-14 Thread Tao Xu
Add optional builtin type time, fallback is uint64. This type use
qemu_strtotime_ns() for pre-converting time suffix to numbers.

Signed-off-by: Tao Xu 
---

No changes in v16.

Changes in v14:
- Drop time unit picosecond (Eric)
---
 include/qapi/visitor-impl.h  |  4 
 include/qapi/visitor.h   |  8 
 qapi/opts-visitor.c  | 22 ++
 qapi/qapi-visit-core.c   | 12 
 qapi/qobject-input-visitor.c | 18 ++
 qapi/trace-events|  1 +
 scripts/qapi/schema.py   |  1 +
 7 files changed, 66 insertions(+)

diff --git a/include/qapi/visitor-impl.h b/include/qapi/visitor-impl.h
index 8ccb3b6c20..e0979563c7 100644
--- a/include/qapi/visitor-impl.h
+++ b/include/qapi/visitor-impl.h
@@ -88,6 +88,10 @@ struct Visitor
 void (*type_size)(Visitor *v, const char *name, uint64_t *obj,
   Error **errp);
 
+/* Optional; fallback is type_uint64() */
+void (*type_time)(Visitor *v, const char *name, uint64_t *obj,
+  Error **errp);
+
 /* Must be set */
 void (*type_bool)(Visitor *v, const char *name, bool *obj, Error **errp);
 
diff --git a/include/qapi/visitor.h b/include/qapi/visitor.h
index c5b23851a1..22242e706f 100644
--- a/include/qapi/visitor.h
+++ b/include/qapi/visitor.h
@@ -554,6 +554,14 @@ void visit_type_int64(Visitor *v, const char *name, 
int64_t *obj,
 void visit_type_size(Visitor *v, const char *name, uint64_t *obj,
  Error **errp);
 
+/*
+ * Visit a uint64_t value.
+ * Like visit_type_uint64(), except that some visitors may choose to
+ * recognize numbers with timeunit suffix, such as "ns", "us" "ms" and "s".
+ */
+void visit_type_time(Visitor *v, const char *name, uint64_t *obj,
+ Error **errp);
+
 /*
  * Visit a boolean value.
  *
diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c
index 5fe0276c1c..59b575f0fc 100644
--- a/qapi/opts-visitor.c
+++ b/qapi/opts-visitor.c
@@ -526,6 +526,27 @@ opts_type_size(Visitor *v, const char *name, uint64_t 
*obj, Error **errp)
 processed(ov, name);
 }
 
+static void
+opts_type_time(Visitor *v, const char *name, uint64_t *obj, Error **errp)
+{
+OptsVisitor *ov = to_ov(v);
+const QemuOpt *opt;
+int err;
+
+opt = lookup_scalar(ov, name, errp);
+if (!opt) {
+return;
+}
+
+err = qemu_strtotime_ns(opt->str ? opt->str : "", NULL, obj);
+if (err < 0) {
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE, opt->name,
+   "a time value");
+return;
+}
+
+processed(ov, name);
+}
 
 static void
 opts_optional(Visitor *v, const char *name, bool *present)
@@ -573,6 +594,7 @@ opts_visitor_new(const QemuOpts *opts)
 ov->visitor.type_int64  = _type_int64;
 ov->visitor.type_uint64 = _type_uint64;
 ov->visitor.type_size   = _type_size;
+ov->visitor.type_time   = _type_time;
 ov->visitor.type_bool   = _type_bool;
 ov->visitor.type_str= _type_str;
 
diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c
index 5365561b07..ac8896455c 100644
--- a/qapi/qapi-visit-core.c
+++ b/qapi/qapi-visit-core.c
@@ -277,6 +277,18 @@ void visit_type_size(Visitor *v, const char *name, 
uint64_t *obj,
 }
 }
 
+void visit_type_time(Visitor *v, const char *name, uint64_t *obj,
+ Error **errp)
+{
+assert(obj);
+trace_visit_type_time(v, name, obj);
+if (v->type_time) {
+v->type_time(v, name, obj, errp);
+} else {
+v->type_uint64(v, name, obj, errp);
+}
+}
+
 void visit_type_bool(Visitor *v, const char *name, bool *obj, Error **errp)
 {
 assert(obj);
diff --git a/qapi/qobject-input-visitor.c b/qapi/qobject-input-visitor.c
index 32236cbcb1..e476fe0d16 100644
--- a/qapi/qobject-input-visitor.c
+++ b/qapi/qobject-input-visitor.c
@@ -627,6 +627,23 @@ static void qobject_input_type_size_keyval(Visitor *v, 
const char *name,
 }
 }
 
+static void qobject_input_type_time_keyval(Visitor *v, const char *name,
+   uint64_t *obj, Error **errp)
+{
+QObjectInputVisitor *qiv = to_qiv(v);
+const char *str = qobject_input_get_keyval(qiv, name, errp);
+
+if (!str) {
+return;
+}
+
+if (qemu_strtotime_ns(str, NULL, obj) < 0) {
+/* TODO report -ERANGE more nicely */
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   full_name(qiv, name), "time");
+}
+}
+
 static void qobject_input_optional(Visitor *v, const char *name, bool *present)
 {
 QObjectInputVisitor *qiv = to_qiv(v);
@@ -708,6 +725,7 @@ Visitor *qobject_input_visitor_new_keyval(QObject *obj)
 v->visitor.type_any = qobject_input_type_any;
 v->visitor.type_null = qobject_input_type_null;
 v->visitor.type_size = qobject_input_type_size_keyval;
+v->visitor.type_time = qobject_input_type_time_keyval;
 v->keyval = true;
 
 return >visitor;
diff --git a/qapi/trace-events b/qapi/trace-events
index 

[PATCH v16 03/14] util/cutils: refactor do_strtosz() to support suffixes list

2019-11-14 Thread Tao Xu
Add do_strtomul() to convert string according to different suffixes.

Reviewed-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---

No changes in v16.

Changes in v15:
- Add a new patch to refactor do_strtosz() (Eduardo)
---
 util/cutils.c | 72 ++-
 1 file changed, 42 insertions(+), 30 deletions(-)

diff --git a/util/cutils.c b/util/cutils.c
index d94a468954..ffef92338a 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -181,41 +181,37 @@ int fcntl_setfl(int fd, int flag)
 }
 #endif
 
-static int64_t suffix_mul(char suffix, int64_t unit)
+static int64_t suffix_mul(const char *suffixes[], int num_suffix,
+  const char *endptr, int *offset, int64_t unit)
 {
-switch (qemu_toupper(suffix)) {
-case 'B':
-return 1;
-case 'K':
-return unit;
-case 'M':
-return unit * unit;
-case 'G':
-return unit * unit * unit;
-case 'T':
-return unit * unit * unit * unit;
-case 'P':
-return unit * unit * unit * unit * unit;
-case 'E':
-return unit * unit * unit * unit * unit * unit;
+int i, suffix_len;
+int64_t mul = 1;
+
+for (i = 0; i < num_suffix; i++) {
+suffix_len = strlen(suffixes[i]);
+if (g_ascii_strncasecmp(suffixes[i], endptr, suffix_len) == 0) {
+*offset = suffix_len;
+return mul;
+}
+mul *= unit;
 }
+
 return -1;
 }
 
 /*
- * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
- * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
- * in *end, if not NULL. Return -ERANGE on overflow, and -EINVAL on
- * other error.
+ * Convert string according to different suffixes. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, and -EINVAL on other 
error.
  */
-static int do_strtosz(const char *nptr, const char **end,
-  const char default_suffix, int64_t unit,
+static int do_strtomul(const char *nptr, const char **end,
+   const char *suffixes[], int num_suffix,
+   const char *default_suffix, int64_t unit,
   uint64_t *result)
 {
 int retval;
 const char *endptr;
-unsigned char c;
 int mul_required = 0;
+int offset = 0;
 long double val, mul, integral, fraction;
 
 retval = qemu_strtold_finite(nptr, , );
@@ -226,12 +222,12 @@ static int do_strtosz(const char *nptr, const char **end,
 if (fraction != 0) {
 mul_required = 1;
 }
-c = *endptr;
-mul = suffix_mul(c, unit);
+
+mul = suffix_mul(suffixes, num_suffix, endptr, , unit);
 if (mul >= 0) {
-endptr++;
+endptr += offset;
 } else {
-mul = suffix_mul(default_suffix, unit);
+mul = suffix_mul(suffixes, num_suffix, default_suffix, , unit);
 assert(mul >= 0);
 }
 if (mul == 1 && mul_required) {
@@ -256,19 +252,35 @@ out:
 return retval;
 }
 
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, and -EINVAL on
+ * other error.
+ */
+static int do_strtosz(const char *nptr, const char **end,
+  const char *default_suffix, int64_t unit,
+  uint64_t *result)
+{
+static const char *suffixes[] = { "B", "K", "M", "G", "T", "P", "E" };
+
+return do_strtomul(nptr, end, suffixes, ARRAY_SIZE(suffixes),
+   default_suffix, unit, result);
+}
+
 int qemu_strtosz(const char *nptr, const char **end, uint64_t *result)
 {
-return do_strtosz(nptr, end, 'B', 1024, result);
+return do_strtosz(nptr, end, "B", 1024, result);
 }
 
 int qemu_strtosz_MiB(const char *nptr, const char **end, uint64_t *result)
 {
-return do_strtosz(nptr, end, 'M', 1024, result);
+return do_strtosz(nptr, end, "M", 1024, result);
 }
 
 int qemu_strtosz_metric(const char *nptr, const char **end, uint64_t *result)
 {
-return do_strtosz(nptr, end, 'B', 1000, result);
+return do_strtosz(nptr, end, "B", 1000, result);
 }
 
 /**
-- 
2.20.1




[PATCH v16 02/14] util/cutils: Use qemu_strtold_finite to parse size

2019-11-14 Thread Tao Xu
Support full 64bit precision, modify related test cases.

Signed-off-by: Tao Xu 
---

New patch in v16
---
 tests/test-cutils.c| 41 +---
 tests/test-keyval.c| 47 +++---
 tests/test-qemu-opts.c | 39 +++
 util/cutils.c  | 13 +---
 4 files changed, 24 insertions(+), 116 deletions(-)

diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 1aa8351520..465514b85f 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -1970,40 +1970,19 @@ static void test_qemu_strtosz_simple(void)
 g_assert_cmpint(err, ==, 0);
 g_assert_cmpint(res, ==, 12345);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
+/* Note: precision is 64 bits (UINT64_MAX) */
 
-str = "9007199254740991"; /* 2^53-1 */
+str = "18446744073709551614"; /* UINT64_MAX - 1 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x1f);
-g_assert(endptr == str + 16);
-
-str = "9007199254740992"; /* 2^53 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20);
-g_assert(endptr == str + 16);
-
-str = "9007199254740993"; /* 2^53+1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0x20); /* rounded to 53 bits */
-g_assert(endptr == str + 16);
-
-str = "18446744073709549568"; /* 0xf800 (53 msbs set) */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800);
+g_assert_cmpint(res, ==, 0xfffe);
 g_assert(endptr == str + 20);
 
-str = "18446744073709550591"; /* 0xfbff */
+str = "18446744073709551615"; /* UINT64_MAX */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, 0);
-g_assert_cmpint(res, ==, 0xf800); /* rounded to 53 bits */
+g_assert_cmpint(res, ==, 0x);
 g_assert(endptr == str + 20);
-
-/* 0x7e00..0x7fff get rounded to
- * 0x8000, thus -ERANGE; see test_qemu_strtosz_erange() */
 }
 
 static void test_qemu_strtosz_units(void)
@@ -2145,16 +2124,6 @@ static void test_qemu_strtosz_erange(void)
 g_assert_cmpint(err, ==, -ERANGE);
 g_assert(endptr == str + 2);
 
-str = "18446744073709550592"; /* 0xfc00 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
-
-str = "18446744073709551615"; /* 2^64-1 */
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, -ERANGE);
-g_assert(endptr == str + 20);
-
 str = "18446744073709551616"; /* 2^64 */
 err = qemu_strtosz(str, , );
 g_assert_cmpint(err, ==, -ERANGE);
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 09b0ae3c68..fad941fcb8 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -383,59 +383,26 @@ static void test_keyval_visit_size(void)
 visit_end_struct(v, NULL);
 visit_free(v);
 
-/* Note: precision is 53 bits since we're parsing with strtod() */
+/* Note: precision is 64 bits (UINT64_MAX) */
 
-/* Around limit of precision: 2^53-1, 2^53, 2^53+1 */
-qdict = keyval_parse("sz1=9007199254740991,"
- "sz2=9007199254740992,"
- "sz3=9007199254740993",
+/* Around limit of precision: UINT64_MAX - 1, UINT64_MAX */
+qdict = keyval_parse("sz1=18446744073709551614,"
+ "sz2=18446744073709551615",
  NULL, _abort);
 v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
 qobject_unref(qdict);
 visit_start_struct(v, NULL, NULL, 0, _abort);
 visit_type_size(v, "sz1", , _abort);
-g_assert_cmphex(sz, ==, 0x1f);
+g_assert_cmphex(sz, ==, 0xfffe);
 visit_type_size(v, "sz2", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_type_size(v, "sz3", , _abort);
-g_assert_cmphex(sz, ==, 0x20);
-visit_check_struct(v, _abort);
-visit_end_struct(v, NULL);
-visit_free(v);
-
-/* Close to signed upper limit 0x7c00 (53 msbs set) */
-qdict = keyval_parse("sz1=9223372036854774784," /* 7c00 */
- "sz2=9223372036854775295", /* 7dff */
- NULL, _abort);
-v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
-qobject_unref(qdict);
-visit_start_struct(v, NULL, NULL, 0, _abort);
-visit_type_size(v, "sz1", , _abort);
-g_assert_cmphex(sz, ==, 0x7c00);
-visit_type_size(v, "sz2", , _abort);
-g_assert_cmphex(sz, ==, 0x7c00);
-visit_check_struct(v, _abort);
-visit_end_struct(v, NULL);
-visit_free(v);
-
-/* Close to actual upper limit 0xf800 (53 msbs set) */
-

[PATCH v16 04/14] util/cutils: Add qemu_strtotime_ns()

2019-11-14 Thread Tao Xu
To convert strings with time suffixes to numbers, support time unit are
"ns" for nanosecond, "us" for microsecond, "ms" for millisecond or "s"
for second. Add test for qemu_strtotime_ns, test the input of basic,
time suffixes, float, invaild, trailing and overflow.

Reviewed-by: Eduardo Habkost 
Signed-off-by: Tao Xu 
---

Changes in v16:
- Update the test because precision is 64 bits

Changes in v15:
- Add a new patch to refactor do_strtosz() (Eduardo)
- use ARRAY_SIZE(suffixes) instead of hardcoding the
  suffixes number (Eduardo)
---
 include/qemu/cutils.h |   1 +
 tests/test-cutils.c   | 173 ++
 util/cutils.c |  14 
 3 files changed, 188 insertions(+)

diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 48cf9bf776..befa94f2d4 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -185,5 +185,6 @@ int uleb128_decode_small(const uint8_t *in, uint32_t *n);
  * *str1 is <, == or > than *str2.
  */
 int qemu_pstrcmp0(const char **str1, const char **str2);
+int qemu_strtotime_ns(const char *nptr, const char **end, uint64_t *result);
 
 #endif
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 465514b85f..0ff1d816f1 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -2148,6 +2148,167 @@ static void test_qemu_strtosz_metric(void)
 g_assert(endptr == str + 6);
 }
 
+static void test_qemu_strtotime_ns_simple(void)
+{
+const char *str;
+const char *endptr;
+int err;
+uint64_t res = 0xbaadf00d;
+
+str = "0";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 0);
+g_assert(endptr == str + 1);
+
+str = "56789";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 56789);
+g_assert(endptr == str + 5);
+
+err = qemu_strtotime_ns(str, NULL, );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 56789);
+
+/* Note: precision is 64 bits (UINT64_MAX) */
+
+str = "18446744073709551614"; /* UINT64_MAX - 1 */
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 0xfffe);
+g_assert(endptr == str + 20);
+
+str = "18446744073709551615"; /* UINT64_MAX */
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 0x);
+g_assert(endptr == str + 20);
+}
+
+static void test_qemu_strtotime_ns_units(void)
+{
+const char *ns = "1ns";
+const char *us = "1us";
+const char *ms = "1ms";
+const char *s = "1s";
+int err;
+const char *endptr;
+uint64_t res = 0xbaadf00d;
+
+/* default time unit is ns */
+err = qemu_strtotime_ns(ns, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 1);
+g_assert(endptr == ns + 3);
+
+err = qemu_strtotime_ns(us, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 1000);
+g_assert(endptr == us + 3);
+
+err = qemu_strtotime_ns(ms, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 100);
+g_assert(endptr == ms + 3);
+
+err = qemu_strtotime_ns(s, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 10LL);
+g_assert(endptr == s + 2);
+}
+
+static void test_qemu_strtotime_ns_float(void)
+{
+const char *str = "56.789us";
+int err;
+const char *endptr;
+uint64_t res = 0xbaadf00d;
+
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 56.789 * 1000);
+g_assert(endptr == str + 8);
+}
+
+static void test_qemu_strtotime_ns_invalid(void)
+{
+const char *str;
+const char *endptr;
+int err;
+uint64_t res = 0xbaadf00d;
+
+str = "";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert(endptr == str);
+
+str = " \t ";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert(endptr == str);
+
+str = "crap";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert(endptr == str);
+
+str = "inf";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert(endptr == str);
+
+str = "NaN";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert(endptr == str);
+}
+
+static void test_qemu_strtotime_ns_trailing(void)
+{
+const char *str;
+const char *endptr;
+int err;
+uint64_t res = 0xbaadf00d;
+
+str = "123xxx";
+
+err = qemu_strtotime_ns(str, NULL, );
+g_assert_cmpint(err, ==, -EINVAL);
+
+str = "1msxxx";
+err = qemu_strtotime_ns(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpint(res, ==, 100);
+g_assert(endptr == str + 3);
+
+err = qemu_strtotime_ns(str, NULL, );
+g_assert_cmpint(err, ==, -EINVAL);
+}
+
+static void 

[PATCH v16 01/14] util/cutils: Add Add qemu_strtold and qemu_strtold_finite

2019-11-14 Thread Tao Xu
Work like qemu_strtod() and qemu_strtold_finite, except store long
double.

Signed-off-by: Tao Xu 
---

New patch in v16.
---
 include/qemu/cutils.h |  3 +++
 util/cutils.c | 48 ++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index b54c847e0f..48cf9bf776 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -146,6 +146,9 @@ int qemu_strtou64(const char *nptr, const char **endptr, 
int base,
   uint64_t *result);
 int qemu_strtod(const char *nptr, const char **endptr, double *result);
 int qemu_strtod_finite(const char *nptr, const char **endptr, double *result);
+int qemu_strtold(const char *nptr, const char **endptr, long double *result);
+int qemu_strtold_finite(const char *nptr, const char **endptr,
+long double *result);
 
 int parse_uint(const char *s, unsigned long long *value, char **endptr,
int base);
diff --git a/util/cutils.c b/util/cutils.c
index fd591cadf0..5db3b2add5 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -553,7 +553,7 @@ int qemu_strtou64(const char *nptr, const char **endptr, 
int base,
 
 /**
  * Convert string @nptr to a double.
-  *
+ *
  * This is a wrapper around strtod() that is harder to misuse.
  * Semantics of @nptr and @endptr match strtod() with differences
  * noted below.
@@ -616,6 +616,52 @@ int qemu_strtod_finite(const char *nptr, const char 
**endptr, double *result)
 return ret;
 }
 
+/*
+ * Convert string @nptr to a long double.
+ *
+ * Works like qemu_strtod(), except it stores long double.
+ */
+int qemu_strtold(const char *nptr, const char **endptr, long double *result)
+{
+char *ep;
+
+if (!nptr) {
+if (endptr) {
+*endptr = nptr;
+}
+return -EINVAL;
+}
+
+errno = 0;
+*result = strtold(nptr, );
+return check_strtox_error(nptr, ep, endptr, errno);
+}
+
+/*
+ * Convert string @nptr to a finite long double.
+ *
+ * Works like qemu_strtod_finite(), except it stores long double.
+ */
+int qemu_strtold_finite(const char *nptr, const char **endptr,
+long double *result)
+{
+long double tmp;
+int ret;
+
+ret = qemu_strtold(nptr, endptr, );
+if (!ret && !isfinite(tmp)) {
+if (endptr) {
+*endptr = nptr;
+}
+ret = -EINVAL;
+}
+
+if (ret != -EINVAL) {
+*result = tmp;
+}
+return ret;
+}
+
 /**
  * Searches for the first occurrence of 'c' in 's', and returns a pointer
  * to the trailing null byte if none was found.
-- 
2.20.1




[PATCH v16 00/14] Build ACPI Heterogeneous Memory Attribute Table (HMAT)

2019-11-14 Thread Tao Xu
This series of patches will build Heterogeneous Memory Attribute Table (HMAT)
according to the command line. The ACPI HMAT describes the memory attributes,
such as memory side cache attributes and bandwidth and latency details,
related to the Memory Proximity Domain.
The software is expected to use HMAT information as hint for optimization.

In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report
the platform's HMAT tables.

The V15 patches link:
https://patchwork.kernel.org/cover/11231971/

Changelog:
v16:
- Add and use qemu_strtold_finite to parse size, support full
  64bit precision, modify related test cases (Eduardo and Markus)
- Simplify struct HMAT_LB_Info and related code, unify latency
  and bandwidth (Igor)
- Add cross check with hmat_lb data (Igor)
- Fields in Cache Attributes are promoted to uint32_t before
  shifting (Igor)
- Add case for QMP build HMAT (Igor)
v15:
- Add a new patch to refactor do_strtosz() (Eduardo)
- Make tests without breaking CI (Michael)
v14:
- Reuse the codes of do_strtosz to build qemu_strtotime_ns
  (Eduardo)
- Squash patch v13 01/12 and 02/12 together (Daniel and Eduardo)
- Drop time unit picosecond (Eric)
- Use qemu ctz64 and clz64 instead of builtin function
v13:
- Modify some text description
- Drop "initiator_valid" field in struct NodeInfo
- Reuse Garray to store the raw bandwidth and bandwidth data
- Calculate common base unit using range bitmap
- Add a patch to alculate hmat latency and bandwidth entry list
- Drop the total_levels option and use readable cache size
- Remove the unnecessary head file
- Use decimal notation with appropriate suffix for cache size
v12:
- Fix a bug that a memory-only node without initiator setting
  doesn't report error. (reported by Danmei Wei)
- Fix a bug that if HMAT is enabled and without hmat-lb setting,
  QEMU will crash. (reported by Danmei Wei)
v11:
- Move numa option patches forward.
- Add num_initiator in Numa_state to record the number of
initiators.
- Simplify struct HMAT_LB_Info, use uint64_t array to store data.
- Drop hmat_get_base().
- Calculate base in build_hmat_lb().
v10:
- Add qemu_strtotime_ps() to convert strings with time suffixes
to numbers, and add some tests for it.
- Add qapi buildin type time, and add some tests for it.
- Add machine oprion properties "-machine hmat=on|off" for enabling
or disabling HMAT in QEMU.

Liu Jingqi (5):
  numa: Extend CLI to provide memory latency and bandwidth information
  numa: Extend CLI to provide memory side cache information
  hmat acpi: Build Memory Proximity Domain Attributes Structure(s)
  hmat acpi: Build System Locality Latency and Bandwidth Information
Structure(s)
  hmat acpi: Build Memory Side Cache Information Structure(s)

Tao Xu (9):
  util/cutils: Add Add qemu_strtold and qemu_strtold_finite
  util/cutils: Use qemu_strtold_finite to parse size
  util/cutils: refactor do_strtosz() to support suffixes list
  util/cutils: Add qemu_strtotime_ns()
  qapi: Add builtin type time
  tests: Add test for QAPI builtin type time
  numa: Extend CLI to provide initiator information for numa nodes
  tests/numa: Add case for QMP build HMAT
  tests/bios-tables-test: add test cases for ACPI HMAT

 hw/acpi/Kconfig   |   7 +-
 hw/acpi/Makefile.objs |   1 +
 hw/acpi/hmat.c| 269 +
 hw/acpi/hmat.h|  42 
 hw/core/machine.c |  64 ++
 hw/core/numa.c| 270 ++
 hw/i386/acpi-build.c  |   5 +
 include/qapi/visitor-impl.h   |   4 +
 include/qapi/visitor.h|   8 +
 include/qemu/cutils.h |   4 +
 include/sysemu/numa.h |  84 
 qapi/machine.json | 178 -
 qapi/opts-visitor.c   |  22 +++
 qapi/qapi-visit-core.c|  12 ++
 qapi/qobject-input-visitor.c  |  18 ++
 qapi/trace-events |   1 +
 qemu-options.hx   |  96 -
 scripts/qapi/schema.py|   1 +
 tests/bios-tables-test-allowed-diff.h |   8 +
 tests/bios-tables-test.c  |  44 +
 tests/data/acpi/pc/APIC.acpihmat  |   0
 tests/data/acpi/pc/DSDT.acpihmat  |   0
 tests/data/acpi/pc/HMAT.acpihmat  |   0
 tests/data/acpi/pc/SRAT.acpihmat  |   0
 tests/data/acpi/q35/APIC.acpihmat |   0
 tests/data/acpi/q35/DSDT.acpihmat |   0
 tests/data/acpi/q35/HMAT.acpihmat |   0
 tests/data/acpi/q35/SRAT.acpihmat |   0
 tests/numa-test.c |  51 +
 tests/test-cutils.c   | 214 
 tests/test-keyval.c   | 136 +
 tests/test-qemu-opts.c|  39 +---
 

Re: [PATCH] Modify tests to work with clang

2019-11-14 Thread Thomas Huth
On 15/11/2019 05.38, Taylor Simpson wrote:
> Signed-off-by: Taylor Simpson 
> ---
>  tests/tcg/multiarch/float_helpers.c | 13 -
>  tests/tcg/multiarch/linux-test.c|  2 +-
>  2 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/tcg/multiarch/float_helpers.c 
> b/tests/tcg/multiarch/float_helpers.c
> index 8ee7903..437247c 100644
> --- a/tests/tcg/multiarch/float_helpers.c
> +++ b/tests/tcg/multiarch/float_helpers.c
> @@ -26,6 +26,17 @@
>  
>  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
>  
> +/*
> +| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.
> +| The code is a copy of SOFTFLOAT_GNUC_PREREQ, see softfloat-macros.h.
> +**/
> +#if defined(__GNUC__) && defined(__GNUC_MINOR__)
> +# define QEMU_GNUC_PREREQ(maj, min) \
> + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
> +#else
> +# define QEMU_GNUC_PREREQ(maj, min) 0
> +#endif
> +
>  /*
>   * Half Precision Numbers
>   *
> @@ -79,7 +90,7 @@ char *fmt_16(uint16_t num)
>  
>  #ifndef SNANF
>  /* Signaling NaN macros, if supported.  */
> -# if __GNUC_PREREQ(3, 3)
> +# if defined(__clang__) || QEMU_GNUC_PREREQ(3, 3)

IIRC clearly, Clang reports itself as GCC 4.2, so you certainly don't
need the defined(__clang__) here.
But additionally, we require at least GCC 4.8 to compile QEMU these days
(see the check in the configure script), so you can even remove this #if
statement completely, so that the following #defines are simply always used.

 Thomas




Re: [SeaBIOS] Re: [PATCH] ahci: zero-initialize port struct

2019-11-14 Thread Gerd Hoffmann
On Thu, Nov 14, 2019 at 11:08:59AM +0200, Sam Eiderman wrote:
> Do you want a single commit with the changes?

That is the idea, unless it is too messy.  I don't have v4 any more,
looks like I've deleted v4 instead of v3 while cleaning up my mail
folders, so I can't easily check.  Do you have v3 and v4 as git
branches somewhere?

> I am afraid it will be slightly unreadable when looking at file histories.
> The only commit that didn't change was:
> [SeaBIOS] [PATCH v4 2/5] boot: Reorder functions in boot.c

Hmm, looks like there have been more changes than I remember.
Maybe it's better to revert and re-apply the changed patches
then.

cheers,
  Gerd




Re: [PATCH] Implement backend program convention command for vhost-user-blk

2019-11-14 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20191115060925.12346-1-mic...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH] Implement backend program convention command for vhost-user-blk
Type: series
Message-id: 20191115060925.12346-1-mic...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
2a8f5a3 Implement backend program convention command for vhost-user-blk

=== OUTPUT BEGIN ===
ERROR: trailing whitespace
#44: FILE: contrib/vhost-user-blk/vhost-user-blk.c:600:
+{   $

ERROR: suspect code indent for conditional statements (8, 11)
#96: FILE: contrib/vhost-user-blk/vhost-user-blk.c:631:
+if (lsock < 0) {
+   exit(EXIT_FAILURE);

total: 2 errors, 0 warnings, 135 lines checked

Commit 2a8f5a38e630 (Implement backend program convention command for 
vhost-user-blk) has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191115060925.12346-1-mic...@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH] Implement backend program convention command for vhost-user-blk

2019-11-14 Thread Micky Yun Chan
From: michan 

Signed-off-by: Micky Yun Chan (michiboo) 
---
 contrib/vhost-user-blk/vhost-user-blk.c | 95 +++--
 1 file changed, 57 insertions(+), 38 deletions(-)

diff --git a/contrib/vhost-user-blk/vhost-user-blk.c 
b/contrib/vhost-user-blk/vhost-user-blk.c
index ae61034656..435ddc46b2 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -576,70 +576,89 @@ vub_new(char *blk_file)
 return vdev_blk;
 }
 
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static char *opt_blk_file;
+static gboolean opt_print_caps;
+static gboolean opt_read_only;
+
+
+static GOptionEntry entries[] = {
+{ "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, _print_caps,
+  "Print capabilities", NULL },
+{ "fd", 'f', 0, G_OPTION_ARG_INT, _fdnum,
+  "Use inherited fd socket", "FDNUM" },
+{ "socket-path", 's', 0, G_OPTION_ARG_FILENAME, _socket_path,
+  "Use UNIX socket path", "PATH" },
+{"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, _blk_file,
+ "block device or file path", "PATH"},
+{ "read-only", 'r', 0, G_OPTION_ARG_NONE, _read_only,
+  "Enable read-only", NULL }
+};
+
 int main(int argc, char **argv)
-{
-int opt;
-char *unix_socket = NULL;
-char *blk_file = NULL;
-bool enable_ro = false;
+{   
 int lsock = -1, csock = -1;
 VubDev *vdev_blk = NULL;
+GError *error = NULL;
+GOptionContext *context;
 
-while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
-switch (opt) {
-case 'b':
-blk_file = g_strdup(optarg);
-break;
-case 's':
-unix_socket = g_strdup(optarg);
-break;
-case 'r':
-enable_ro = true;
-break;
-case 'h':
-default:
-printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
-   " | -r Enable read-only ] | [ -h ]\n", argv[0]);
-return 0;
-}
+context = g_option_context_new(NULL);
+g_option_context_add_main_entries(context, entries, NULL);
+if (!g_option_context_parse(context, , , )) {
+g_printerr("Option parsing failed: %s\n", error->message);
+exit(EXIT_FAILURE);
+}
+if (opt_print_caps) {
+g_print("{\n");
+g_print("  \"type\": \"blk\",\n");
+g_print("  \"features\": [\n");
+g_print("\"blk-file\",\n");
+g_print("\"read-only\"\n");
+g_print("  ]\n");
+g_print("}\n");
+exit(EXIT_SUCCESS);
 }
 
-if (!unix_socket || !blk_file) {
+if (!opt_blk_file) {
 printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
" | -r Enable read-only ] | [ -h ]\n", argv[0]);
 return -1;
 }
 
-lsock = unix_sock_new(unix_socket);
-if (lsock < 0) {
-goto err;
+if (opt_socket_path) {
+lsock = unix_sock_new(opt_socket_path);
+if (lsock < 0) {
+   exit(EXIT_FAILURE);
+}
+} else {
+lsock = opt_fdnum;
 }
 
-csock = accept(lsock, (void *)0, (void *)0);
+csock = accept(lsock, NULL, NULL);
 if (csock < 0) {
-fprintf(stderr, "Accept error %s\n", strerror(errno));
-goto err;
+g_printerr("Accept error %s\n", strerror(errno));
+exit(EXIT_FAILURE);
 }
 
-vdev_blk = vub_new(blk_file);
+vdev_blk = vub_new(opt_blk_file);
 if (!vdev_blk) {
-goto err;
+exit(EXIT_FAILURE);
 }
-if (enable_ro) {
+if (opt_read_only) {
 vdev_blk->enable_ro = true;
 }
 
 if (!vug_init(_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
   vub_panic_cb, _iface)) {
-fprintf(stderr, "Failed to initialized libvhost-user-glib\n");
-goto err;
+g_printerr("Failed to initialized libvhost-user-glib\n");
+exit(EXIT_FAILURE);
 }
 
 g_main_loop_run(vdev_blk->loop);
+g_main_loop_unref(vdev_blk->loop);
 
 vug_deinit(_blk->parent);
-
-err:
 vub_free(vdev_blk);
 if (csock >= 0) {
 close(csock);
@@ -647,8 +666,8 @@ err:
 if (lsock >= 0) {
 close(lsock);
 }
-g_free(unix_socket);
-g_free(blk_file);
+g_free(opt_socket_path);
+g_free(opt_blk_file);
 
 return 0;
 }
-- 
2.21.0




Re: [PATCH] Modify tests to work with clang

2019-11-14 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/1573792691-398-1-git-send-email-tsimp...@quicinc.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH] Modify tests to work with clang
Type: series
Message-id: 1573792691-398-1-git-send-email-tsimp...@quicinc.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
40fa5c0 Modify tests to work with clang

=== OUTPUT BEGIN ===
WARNING: Block comments use a leading /* on a separate line
#18: FILE: tests/tcg/multiarch/float_helpers.c:29:
+/*

WARNING: Block comments use * on subsequent lines
#19: FILE: tests/tcg/multiarch/float_helpers.c:30:
+/*
+| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.

WARNING: Block comments use a trailing */ on a separate line
#21: FILE: tests/tcg/multiarch/float_helpers.c:32:
+**/

WARNING: architecture specific defines should be avoided
#22: FILE: tests/tcg/multiarch/float_helpers.c:33:
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)

WARNING: architecture specific defines should be avoided
#37: FILE: tests/tcg/multiarch/float_helpers.c:93:
+# if defined(__clang__) || QEMU_GNUC_PREREQ(3, 3)

ERROR: Use of volatile is usually wrong, please add a comment
#50: FILE: tests/tcg/multiarch/linux-test.c:488:
+*(volatile uint8_t *)0 = 0;

total: 1 errors, 5 warnings, 33 lines checked

Commit 40fa5c0d7a1b (Modify tests to work with clang) has style problems, 
please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/1573792691-398-1-git-send-email-tsimp...@quicinc.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

RE: [PATCH v9 Kernel 2/5] vfio iommu: Add ioctl defination to get dirty pages bitmap.

2019-11-14 Thread Tian, Kevin
> From: Alex Williamson
> Sent: Friday, November 15, 2019 11:22 AM
> 
> On Thu, 14 Nov 2019 21:40:35 -0500
> Yan Zhao  wrote:
> 
> > On Fri, Nov 15, 2019 at 05:06:25AM +0800, Alex Williamson wrote:
> > > On Fri, 15 Nov 2019 00:26:07 +0530
> > > Kirti Wankhede  wrote:
> > >
> > > > On 11/14/2019 1:37 AM, Alex Williamson wrote:
> > > > > On Thu, 14 Nov 2019 01:07:21 +0530
> > > > > Kirti Wankhede  wrote:
> > > > >
> > > > >> On 11/13/2019 4:00 AM, Alex Williamson wrote:
> > > > >>> On Tue, 12 Nov 2019 22:33:37 +0530
> > > > >>> Kirti Wankhede  wrote:
> > > > >>>
> > > >  All pages pinned by vendor driver through vfio_pin_pages API
> should be
> > > >  considered as dirty during migration. IOMMU container
> maintains a list of
> > > >  all such pinned pages. Added an ioctl defination to get bitmap of
> such
> > > > >>>
> > > > >>> definition
> > > > >>>
> > > >  pinned pages for requested IO virtual address range.
> > > > >>>
> > > > >>> Additionally, all mapped pages are considered dirty when
> physically
> > > > >>> mapped through to an IOMMU, modulo we discussed devices
> opting in to
> > > > >>> per page pinning to indicate finer granularity with a TBD
> mechanism to
> > > > >>> figure out if any non-opt-in devices remain.
> > > > >>>
> > > > >>
> > > > >> You mean, in case of device direct assignment (device pass
> through)?
> > > > >
> > > > > Yes, or IOMMU backed mdevs.  If vfio_dmas in the container are
> fully
> > > > > pinned and mapped, then the correct dirty page set is all mapped
> pages.
> > > > > We discussed using the vpfn list as a mechanism for vendor drivers
> to
> > > > > reduce their migration footprint, but we also discussed that we
> would
> > > > > need a way to determine that all participants in the container have
> > > > > explicitly pinned their working pages or else we must consider the
> > > > > entire potential working set as dirty.
> > > > >
> > > >
> > > > How can vendor driver tell this capability to iommu module? Any
> suggestions?
> > >
> > > I think it does so by pinning pages.  Is it acceptable that if the
> > > vendor driver pins any pages, then from that point forward we consider
> > > the IOMMU group dirty page scope to be limited to pinned pages?
> There
> > > are complications around non-singleton IOMMU groups, but I think
> we're
> > > already leaning towards that being a non-worthwhile problem to solve.
> > > So if we require that only singleton IOMMU groups can pin pages and
> we
> > > pass the IOMMU group as a parameter to
> > > vfio_iommu_driver_ops.pin_pages(), then the type1 backend can set a
> > > flag on its local vfio_group struct to indicate dirty page scope is
> > > limited to pinned pages.  We might want to keep a flag on the
> > > vfio_iommu struct to indicate if all of the vfio_groups for each
> > > vfio_domain in the vfio_iommu.domain_list dirty page scope limited to
> > > pinned pages as an optimization to avoid walking lists too often.  Then
> > > we could test if vfio_iommu.domain_list is not empty and this new flag
> > > does not limit the dirty page scope, then everything within each
> > > vfio_dma is considered dirty.
> > >
> >
> > hi Alex
> > could you help clarify whether my understandings below are right?
> > In future,
> > 1. for mdev and for passthrough device withoug hardware ability to track
> > dirty pages, the vendor driver has to explicitly call
> > vfio_pin_pages()/vfio_unpin_pages() + a flag to tell vfio its dirty page 
> > set.
> 
> For non-IOMMU backed mdevs without hardware dirty page tracking,
> there's no change to the vendor driver currently.  Pages pinned by the
> vendor driver are marked as dirty.

What about the vendor driver can figure out, in software means, which
pinned pages are actually dirty? In that case, would a separate mark_dirty
interface make more sense? Or introduce read/write flag to the pin_pages
interface similar to DMA API? Existing drivers always set both r/w flags but
just in case then a specific driver may set read-only or write-only...

> 
> For any IOMMU backed device, mdev or direct assignment, all mapped
> memory would be considered dirty unless there are explicit calls to pin
> pages on top of the IOMMU page pinning and mapping.  These would likely
> be enabled only when the device is in the _SAVING device_state.
> 
> > 2. for those devices with hardware ability to track dirty pages, will still
> > provide a callback to vendor driver to get dirty pages. (as for those
> devices,
> > it is hard to explicitly call vfio_pin_pages()/vfio_unpin_pages())
> >
> > 3. for devices relying on dirty bit info in physical IOMMU, there
> > will be a callback to physical IOMMU driver to get dirty page set from
> > vfio.
> 
> The proposal here does not cover exactly how these would be
> implemented, it only establishes the container as the point of user
> interaction with the dirty bitmap and hopefully allows us to maintain
> that interface regardless of whether we have dirty tracking at the
> device 

[PULL 4/4] riscv/virt: Increase flash size

2019-11-14 Thread Palmer Dabbelt
From: Alistair Francis 

Coreboot developers have requested that they have at least 32MB of flash
to load binaries. We currently have 32MB of flash, but it is split in
two to allow loading two flash binaries. Let's increase the flash size
from 32MB to 64MB to ensure we have a single region that is 32MB.

No QEMU release has include flash in the RISC-V virt machine, so this
isn't a breaking change.

Signed-off-by: Alistair Francis 
Signed-off-by: Palmer Dabbelt 
---
 hw/riscv/virt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index cc8f311e6bbb..23f340df193e 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -62,7 +62,7 @@ static const struct MemmapEntry {
 [VIRT_PLIC] ={  0xc00, 0x400 },
 [VIRT_UART0] =   { 0x1000, 0x100 },
 [VIRT_VIRTIO] =  { 0x10001000,0x1000 },
-[VIRT_FLASH] =   { 0x2000, 0x200 },
+[VIRT_FLASH] =   { 0x2000, 0x400 },
 [VIRT_DRAM] ={ 0x8000,   0x0 },
 [VIRT_PCIE_MMIO] =   { 0x4000,0x4000 },
 [VIRT_PCIE_PIO] ={ 0x0300,0x0001 },
-- 
2.21.0




[PULL 2/4] target/riscv: Remove atomic accesses to MIP CSR

2019-11-14 Thread Palmer Dabbelt
From: Alistair Francis 

Instead of relying on atomics to access the MIP register let's update
our helper function to instead just lock the IO mutex thread before
writing. This follows the same concept as used in PPC for handling
interrupts

Signed-off-by: Alistair Francis 
Reviewed-by: Richard Henderson 
Reviewed-by: Palmer Dabbelt 
Signed-off-by: Palmer Dabbelt 
Signed-off-by: Palmer Dabbelt 
---
 target/riscv/cpu.c|  5 ++--
 target/riscv/cpu.h|  9 
 target/riscv/cpu_helper.c | 48 +++
 target/riscv/csr.c|  2 +-
 4 files changed, 21 insertions(+), 43 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 3939963b71ab..d37861a4305b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -224,8 +224,7 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int 
flags)
 #ifndef CONFIG_USER_ONLY
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mhartid ", env->mhartid);
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mstatus ", env->mstatus);
-qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mip ",
- (target_ulong)atomic_read(>mip));
+qemu_fprintf(f, " %s 0x%x\n", "mip ", env->mip);
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mie ", env->mie);
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mideleg ", env->mideleg);
 qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "medeleg ", env->medeleg);
@@ -275,7 +274,7 @@ static bool riscv_cpu_has_work(CPUState *cs)
  * Definition of the WFI instruction requires it to ignore the privilege
  * mode and delegation registers, but respect individual enables
  */
-return (atomic_read(>mip) & env->mie) != 0;
+return (env->mip & env->mie) != 0;
 #else
 return true;
 #endif
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 8c64c68538d8..e59343e13c02 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -121,15 +121,6 @@ struct CPURISCVState {
 target_ulong mhartid;
 target_ulong mstatus;
 
-/*
- * CAUTION! Unlike the rest of this struct, mip is accessed asynchonously
- * by I/O threads. It should be read with atomic_read. It should be updated
- * using riscv_cpu_update_mip with the iothread mutex held. The iothread
- * mutex must be held because mip must be consistent with the CPU inturrept
- * state. riscv_cpu_update_mip calls cpu_interrupt or cpu_reset_interrupt
- * wuth the invariant that CPU_INTERRUPT_HARD is set iff mip is non-zero.
- * mip is 32-bits to allow atomic_read on 32-bit hosts.
- */
 uint32_t mip;
 uint32_t miclaim;
 
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f13131a51bde..767c8762ac42 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
@@ -38,7 +39,7 @@ static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 {
 target_ulong mstatus_mie = get_field(env->mstatus, MSTATUS_MIE);
 target_ulong mstatus_sie = get_field(env->mstatus, MSTATUS_SIE);
-target_ulong pending = atomic_read(>mip) & env->mie;
+target_ulong pending = env->mip & env->mie;
 target_ulong mie = env->priv < PRV_M || (env->priv == PRV_M && 
mstatus_mie);
 target_ulong sie = env->priv < PRV_S || (env->priv == PRV_S && 
mstatus_sie);
 target_ulong irqs = (pending & ~env->mideleg & -mie) |
@@ -92,42 +93,29 @@ int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint32_t 
interrupts)
 }
 }
 
-struct CpuAsyncInfo {
-uint32_t new_mip;
-};
-
-static void riscv_cpu_update_mip_irqs_async(CPUState *target_cpu_state,
-run_on_cpu_data data)
-{
-struct CpuAsyncInfo *info = (struct CpuAsyncInfo *) data.host_ptr;
-
-if (info->new_mip) {
-cpu_interrupt(target_cpu_state, CPU_INTERRUPT_HARD);
-} else {
-cpu_reset_interrupt(target_cpu_state, CPU_INTERRUPT_HARD);
-}
-
-g_free(info);
-}
-
 uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value)
 {
 CPURISCVState *env = >env;
 CPUState *cs = CPU(cpu);
-struct CpuAsyncInfo *info;
-uint32_t old, new, cmp = atomic_read(>mip);
+uint32_t old = env->mip;
+bool locked = false;
+
+if (!qemu_mutex_iothread_locked()) {
+locked = true;
+qemu_mutex_lock_iothread();
+}
 
-do {
-old = cmp;
-new = (old & ~mask) | (value & mask);
-cmp = atomic_cmpxchg(>mip, old, new);
-} while (old != cmp);
+env->mip = (env->mip & ~mask) | (value & mask);
 
-info = g_new(struct CpuAsyncInfo, 1);
-info->new_mip = new;
+if (env->mip) {
+cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+} else {
+cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+}
 
-async_run_on_cpu(cs, riscv_cpu_update_mip_irqs_async,
- 

[PULL 3/4] opensbi: Upgrade from v0.4 to v0.5

2019-11-14 Thread Palmer Dabbelt
From: Alistair Francis 

This release has:
Lot of critical fixes
Hypervisor extension support
SBI v0.2 base extension support
Debug prints support
Handle traps when doing unpriv load/store
Allow compiling without FP support
Use git describe to generate boot-time banner
Andes AE350 platform support

ShortLog:

Anup Patel (14):
  platform: sifive/fu540: Move FDT further up
  lib: Allow compiling without FP support
  lib: Introduce sbi_dprintf() API
  lib: Use sbi_dprintf() for invalid CSRs
  lib: Handle traps when doing unpriv load/store in get_insn()
  lib: Delegate supervisor ecall to HS-mode when H extension available
  lib: Extend sbi_hart_switch_mode() to support hypervisor extension
  lib: Extend sbi_trap_redirect() for hypervisor extension
  lib: Redirect WFI trapped from VS/VU mode to HS-mode
  include: Extend get_insn() to read instruction from VS/VU mode
  lib: Emulate HTIMEDELTA CSR for platforms not having TIME CSR
  Makefile: Minor fix in OPENSBI_VERSION_GIT
  lib: Fix coldboot race condition observed on emulators/simulators
  include: Bump-up version to 0.5

Atish Patra (16):
  lib: Provide an atomic exchange function unsigned long
  lib: Fix race conditions in tlb fifo access.
  platform: Remove the ipi_sync method from all platforms.
  lib: Fix timer for 32 bit
  lib: Support atomic swap instructions
  lib: Upgrade to full flush if size is at least threshold
  docs: Update the fu540 platform guide as per U-Boot documents.
  lib: Change tlb range flush threshold to 4k page instead of 1G
  lib: provide a platform specific tlb range flush threshold
  lib: Fix tlb flush range limit value
  Test: Move test payload related code out of interface header
  lib: Align error codes as per SBI specification.
  lib: Rename existing SBI implementation as 0.1.
  lib: Remove redundant variable assignment
  lib: Implement SBI v0.2
  lib: Provide a platform hook to implement vendor specific SBI extensions.

Bin Meng (6):
  platform: sifive: fu540: Use standard value string for cpu node status
  README: Document 32-bit / 64-bit images build
  treewide: Use conventional names for 32-bit and 64-bit
  platform: sifive: fu540: Expand FDT size before any patching
  firmware: Use macro instead of magic number for boot status
  docs: platform: Update descriptions for qemu/sifive_u support

Damien Le Moal (4):
  kendryte/k210: Use sifive UART driver
  kendryte/k210: remove sysctl code
  README: Update license information
  kendryte/k210: remove unused file

Georg Kotheimer (1):
  utils: Use cpu_to_fdt32() when writing to fdt

Jacob Garber (4):
  lib: Use bitwise & instead of boolean &&
  lib: Use correct type for return value
  lib: Prevent unintended sign extensions
  lib: Correct null pointer check

Lukas Auer (1):
  firmware: do not use relocated _boot_status before it is valid

Nylon Chen (3):
  firmware: Fix the loop condition of _wait_relocate_copy_done section
  platform: Add Andes AE350 initial support
  scripts: Add AE350 to platform list in the binary archive script

Palmer Dabbelt (1):
  Include `git describe` in OpenSBI

Zong Li (1):
  Write MSIP by using memory-mapped control register

Signed-off-by: Alistair Francis 
Reviewed-by: Palmer Dabbelt 
Signed-off-by: Palmer Dabbelt 
Signed-off-by: Palmer Dabbelt 
---
 pc-bios/opensbi-riscv32-virt-fw_jump.bin | Bin 36888 -> 40984 bytes
 pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin | Bin 45064 -> 49160 bytes
 pc-bios/opensbi-riscv64-virt-fw_jump.bin | Bin 40968 -> 45064 bytes
 roms/opensbi |   2 +-
 4 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/pc-bios/opensbi-riscv32-virt-fw_jump.bin 
b/pc-bios/opensbi-riscv32-virt-fw_jump.bin
index 
f5bcaa56954c860d09acc0cb337c1666d88b6efb..6c5b7b89f676392b687d9835ac9fbdc34f3052cd
 100644
GIT binary patch
delta 14873
zcmcJ030M@@w(hQ~uBvWCX>DY1N=ri{#9%89(L`lxTF~GS(O?`Y5k1LOBQc35Cuy*#
zZW{;GEjA=3;snY$F~p7%eaC=EgP25#5z*YJL_{<>2~LP8$X@1-@EU755M*8
zu08zw-)j%G*4kxmKAl@eo6`nG+!CTtPdkkeUR@sH%OIp-m`+`OGIHmQ7kAP~|3?
z=on;#^`JiMtpS-s9DX0iOyPE7bNLkh|e`FUN)tBes4jeD|tCI3p;0Ay?C*$4i3o
zLnOSq6gl_HIr|^@dO?ZStJ$-w=)!)%XogD5(5(W{XrQV$ZSOy3*9gJ+DiVG;
zPA%Cl_Z{8hYVDQHca=mI2*!EP_%!v!;l{YG0ua7HqnwHH!wqS+FJR-Moj$43_G{
zd(4gK^Qn};e9j=s^{*lRawok(9{1|O@|eZJPsS`hPsZ$6ogDG$W;UAW$O{>mjEIh~
za0QDTnJfxdamZogP`C$=f(C}OiV)HquE1!1~De=POoNzYZ@
zy3i|KEDU%C9Z2Ve;lI)T!6l^FRBMM6TTMUdv^?s`H@wpN=;=;70`WiT!Sv68_!qh$
z^jK(`I>G=lTpm_x->zY$Jr65u*)1{5oYAe--YJw)u6|J>F_HZm7(;hJp;=0
z@@4|#SSCcphO2C}y#nZD#{3fxR>XFTY7NPE&=k^8k@y2eA7AI40JQ<1
zS%L8FUPzE=bSD4}^>N~@idZ^*3cjR>)jnIJC@O6uJ@iaeKx7)y8YqMZvmepQtwZAwPf@?2t?Qvd$aoHRLLZ;cpA4lq`D!)sF#KgdEqxI_LQWTnhhK2v5f+y
zQi!p6920Bf7}GvZG#$pBTzKfJ=X?K5>NlgB!K+F8T8wt>^W}IlUDQs$#AN
zJ`~U91E1gX!m2jb4IA;^k``#Foic98ErI@b@FC`mD%QB#KDI-GE}S7_+eq4sZVnvL

[PULL 1/4] remove unnecessary ifdef TARGET_RISCV64

2019-11-14 Thread Palmer Dabbelt
From: "hiroyuki.obinata" 

Signed-off-by: Hiroyuki Obinata 
Signed-off-by: Palmer Dabbelt 
---
 target/riscv/translate.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index b26533d4fd78..ab6a891dc381 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -64,12 +64,10 @@ static const int tcg_memop_lookup[8] = {
 [0] = MO_SB,
 [1] = MO_TESW,
 [2] = MO_TESL,
+[3] = MO_TEQ,
 [4] = MO_UB,
 [5] = MO_TEUW,
-#ifdef TARGET_RISCV64
-[3] = MO_TEQ,
 [6] = MO_TEUL,
-#endif
 };
 #endif
 
-- 
2.21.0




[PULL] RISC-V Fixes for 4.2-rc2

2019-11-14 Thread Palmer Dabbelt
The following changes since commit aa464db69b40b4b695be31085e6d2f1e90956c89:

  Update version for v4.2.0-rc1 release (2019-11-12 18:40:02 +)

are available in the Git repository at:

  g...@github.com:palmer-dabbelt/qemu.git tags/riscv-for-master-4.2-rc2

for you to fetch changes up to 6911fde41006b2afe3510755c4cff259ca56c1d9:

  riscv/virt: Increase flash size (2019-11-14 09:53:28 -0800)


RISC-V Fixes for 4.2-rc2

This contains a handful of patches that I'd like to target for 4.2:

* OpenSBI upgrade to 0.5
* Increase in the flash size of the virt board.
* A non-functional cleanup.
* A cleanup to our MIP handling that avoids atomics.

This passes "make check" and boots OpenEmbedded for me.


Alistair Francis (3):
  target/riscv: Remove atomic accesses to MIP CSR
  opensbi: Upgrade from v0.4 to v0.5
  riscv/virt: Increase flash size

hiroyuki.obinata (1):
  remove unnecessary ifdef TARGET_RISCV64

 hw/riscv/virt.c  |   2 +-
 pc-bios/opensbi-riscv32-virt-fw_jump.bin | Bin 36888 -> 40984 bytes
 pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin | Bin 45064 -> 49160 bytes
 pc-bios/opensbi-riscv64-virt-fw_jump.bin | Bin 40968 -> 45064 bytes
 roms/opensbi |   2 +-
 target/riscv/cpu.c   |   5 ++-
 target/riscv/cpu.h   |   9 -
 target/riscv/cpu_helper.c|  48 ++-
 target/riscv/csr.c   |   2 +-
 target/riscv/translate.c |   4 +--
 10 files changed, 24 insertions(+), 48 deletions(-)




[PATCH] Modify tests to work with clang

2019-11-14 Thread Taylor Simpson
Signed-off-by: Taylor Simpson 
---
 tests/tcg/multiarch/float_helpers.c | 13 -
 tests/tcg/multiarch/linux-test.c|  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/tests/tcg/multiarch/float_helpers.c 
b/tests/tcg/multiarch/float_helpers.c
index 8ee7903..437247c 100644
--- a/tests/tcg/multiarch/float_helpers.c
+++ b/tests/tcg/multiarch/float_helpers.c
@@ -26,6 +26,17 @@
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
+/*
+| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.
+| The code is a copy of SOFTFLOAT_GNUC_PREREQ, see softfloat-macros.h.
+**/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define QEMU_GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define QEMU_GNUC_PREREQ(maj, min) 0
+#endif
+
 /*
  * Half Precision Numbers
  *
@@ -79,7 +90,7 @@ char *fmt_16(uint16_t num)
 
 #ifndef SNANF
 /* Signaling NaN macros, if supported.  */
-# if __GNUC_PREREQ(3, 3)
+# if defined(__clang__) || QEMU_GNUC_PREREQ(3, 3)
 #  define SNANF (__builtin_nansf (""))
 #  define SNAN (__builtin_nans (""))
 #  define SNANL (__builtin_nansl (""))
diff --git a/tests/tcg/multiarch/linux-test.c b/tests/tcg/multiarch/linux-test.c
index 673d7c8..edfc02c 100644
--- a/tests/tcg/multiarch/linux-test.c
+++ b/tests/tcg/multiarch/linux-test.c
@@ -485,7 +485,7 @@ static void test_signal(void)
 act.sa_flags = SA_SIGINFO;
 chk_error(sigaction(SIGSEGV, , NULL));
 if (setjmp(jmp_env) == 0) {
-*(uint8_t *)0 = 0;
+*(volatile uint8_t *)0 = 0;
 }
 
 act.sa_handler = SIG_DFL;
-- 
2.7.4




[RFC PATCH v1 7/8] vfio-ccw: Refactor ccw irq handler

2019-11-14 Thread Eric Farman
Make it easier to add new ones in the future.

Signed-off-by: Eric Farman 
---
 hw/vfio/ccw.c | 55 ---
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 2b1a83b94c..b16526d5de 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -334,22 +334,35 @@ read_err:
 css_inject_io_interrupt(sch);
 }
 
-static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
+static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, int irq,
+   Error **errp)
 {
 VFIODevice *vdev = >vdev;
 struct vfio_irq_info *irq_info;
 size_t argsz;
 int fd;
+EventNotifier *notifier;
+IOHandler *fd_read;
+
+switch (irq) {
+case VFIO_CCW_IO_IRQ_INDEX:
+notifier = >io_notifier;
+fd_read = vfio_ccw_io_notifier_handler;
+break;
+default:
+error_setg(errp, "vfio: Unsupported device irq(%d) fd: %m", irq);
+return;
+}
 
-if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
-error_setg(errp, "vfio: unexpected number of io irqs %u",
+if (vdev->num_irqs < irq + 1) {
+error_setg(errp, "vfio: unexpected number of irqs %u",
vdev->num_irqs);
 return;
 }
 
 argsz = sizeof(*irq_info);
 irq_info = g_malloc0(argsz);
-irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
+irq_info->index = irq;
 irq_info->argsz = argsz;
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
@@ -357,37 +370,47 @@ static void vfio_ccw_register_io_notifier(VFIOCCWDevice 
*vcdev, Error **errp)
 goto out_free_info;
 }
 
-if (event_notifier_init(>io_notifier, 0)) {
+if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
- "vfio: Unable to init event notifier for IO");
+ "vfio: Unable to init event notifier for irq (%d)", 
irq);
 goto out_free_info;
 }
 
-fd = event_notifier_get_fd(>io_notifier);
-qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev);
+fd = event_notifier_get_fd(notifier);
+qemu_set_fd_handler(fd, fd_read, NULL, vcdev);
 
-if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
+if (vfio_set_irq_signaling(vdev, irq, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
 qemu_set_fd_handler(fd, NULL, NULL, vcdev);
-event_notifier_cleanup(>io_notifier);
+event_notifier_cleanup(notifier);
 }
 
 out_free_info:
 g_free(irq_info);
 }
 
-static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
+static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, int irq)
 {
 Error *err = NULL;
+EventNotifier *notifier;
+
+switch (irq) {
+case VFIO_CCW_IO_IRQ_INDEX:
+notifier = >io_notifier;
+break;
+default:
+error_report("vfio: Unsupported device irq(%d) fd: %m", irq);
+return;
+}
 
-if (vfio_set_irq_signaling(>vdev, VFIO_CCW_IO_IRQ_INDEX, 0,
+if (vfio_set_irq_signaling(>vdev, irq, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, -1, )) {
 error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
 }
 
-qemu_set_fd_handler(event_notifier_get_fd(>io_notifier),
+qemu_set_fd_handler(event_notifier_get_fd(notifier),
 NULL, NULL, vcdev);
-event_notifier_cleanup(>io_notifier);
+event_notifier_cleanup(notifier);
 }
 
 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
@@ -590,7 +613,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
 goto out_region_err;
 }
 
-vfio_ccw_register_io_notifier(vcdev, );
+vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, );
 if (err) {
 goto out_notifier_err;
 }
@@ -619,7 +642,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error 
**errp)
 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
 VFIOGroup *group = vcdev->vdev.group;
 
-vfio_ccw_unregister_io_notifier(vcdev);
+vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
 vfio_ccw_put_region(vcdev);
 vfio_ccw_put_device(vcdev);
 vfio_put_group(group);
-- 
2.17.1




[RFC PATCH v1 5/8] vfio-ccw: Add support for the schib region

2019-11-14 Thread Eric Farman
From: Farhan Ali 

The schib region can be used to obtain the latest SCHIB from the host
passthrough subchannel. Since the guest SCHIB is virtualized,
we currently only update the path related information so that the
guest is aware of any path related changes when it issues the
'stsch' instruction.

Signed-off-by: Farhan Ali 
Signed-off-by: Eric Farman 
---

Notes:
v0->v1: [EF]
 - Change various incarnations of "update chp status" to
   "handle_store", to reflect the STSCH instruction that will
   drive this code
 - Remove temporary variable for casting/testing purposes in
   s390_ccw_store(), and add a block comment of WHY its there.
 - Add a few comments to vfio_ccw_handle_store()

 hw/s390x/css.c  |  8 --
 hw/s390x/s390-ccw.c | 20 +
 hw/vfio/ccw.c   | 57 +
 include/hw/s390x/css.h  |  3 +-
 include/hw/s390x/s390-ccw.h |  1 +
 target/s390x/ioinst.c   |  3 +-
 6 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 844caab408..6ee6a96d75 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -1335,11 +1335,15 @@ static void copy_schib_to_guest(SCHIB *dest, const 
SCHIB *src)
 }
 }
 
-int css_do_stsch(SubchDev *sch, SCHIB *schib)
+IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib)
 {
+int ret = IOINST_CC_EXPECTED;
+
+ret = s390_ccw_store(sch);
+
 /* Use current status. */
 copy_schib_to_guest(schib, >curr_status);
-return 0;
+return ret;
 }
 
 static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src)
diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c
index 0c5a5b60bd..be0b379338 100644
--- a/hw/s390x/s390-ccw.c
+++ b/hw/s390x/s390-ccw.c
@@ -51,6 +51,26 @@ int s390_ccw_clear(SubchDev *sch)
 return cdc->handle_clear(sch);
 }
 
+IOInstEnding s390_ccw_store(SubchDev *sch)
+{
+S390CCWDeviceClass *cdc = NULL;
+int ret = IOINST_CC_EXPECTED;
+
+/*
+ * This only applies to passthrough devices, so we can't unconditionally
+ * set this variable like we would for halt/clear.
+ */
+if (object_dynamic_cast(OBJECT(sch->driver_data), TYPE_S390_CCW)) {
+cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data);
+}
+
+if (cdc && cdc->handle_store) {
+ret = cdc->handle_store(sch);
+}
+
+return ret;
+}
+
 static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
   char *sysfsdev,
   Error **errp)
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 3e32bc1819..2ff223470f 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -41,6 +41,9 @@ struct VFIOCCWDevice {
 uint64_t async_cmd_region_size;
 uint64_t async_cmd_region_offset;
 struct ccw_cmd_region *async_cmd_region;
+uint64_t schib_region_size;
+uint64_t schib_region_offset;
+struct ccw_schib_region *schib_region;
 EventNotifier io_notifier;
 bool force_orb_pfch;
 bool warned_orb_pfch;
@@ -124,6 +127,45 @@ again:
 }
 }
 
+static IOInstEnding vfio_ccw_handle_store(SubchDev *sch)
+{
+S390CCWDevice *cdev = sch->driver_data;
+VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
+SCHIB *schib = >curr_status;
+struct ccw_schib_region *region = vcdev->schib_region;
+SCHIB *s = (SCHIB *)region->schib_area;
+int ret;
+
+/* schib region not available so nothing else to do */
+if (!region) {
+return IOINST_CC_EXPECTED;
+}
+
+memset(region, 0, sizeof(*region));
+ret = pread(vcdev->vdev.fd, region, vcdev->schib_region_size,
+vcdev->schib_region_offset);
+
+if (ret == -1) {
+/* Assume device is damaged, regardless of errno */
+return IOINST_CC_NOT_OPERATIONAL;
+}
+
+/*
+ * Selectively copy path-related bits of the SCHIB,
+ * rather than copying the entire struct.
+ */
+schib->pmcw.pnom = s->pmcw.pnom;
+schib->pmcw.lpum = s->pmcw.lpum;
+schib->pmcw.pam = s->pmcw.pam;
+schib->pmcw.pom = s->pmcw.pom;
+
+if (s->scsw.flags & SCSW_FLAGS_MASK_PNO) {
+schib->scsw.flags |= SCSW_FLAGS_MASK_PNO;
+}
+
+return IOINST_CC_EXPECTED;
+}
+
 static int vfio_ccw_handle_clear(SubchDev *sch)
 {
 S390CCWDevice *cdev = sch->driver_data;
@@ -395,10 +437,23 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, 
Error **errp)
 vcdev->async_cmd_region = g_malloc0(info->size);
 }
 
+ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
+   VFIO_REGION_SUBTYPE_CCW_SCHIB, );
+if (!ret) {
+vcdev->schib_region_size = info->size;
+if (sizeof(*vcdev->schib_region) != vcdev->schib_region_size) {
+error_setg(errp, "vfio: Unexpected size of the schib region");
+goto out_err;
+}
+vcdev->schib_region_offset = info->offset;
+vcdev->schib_region = g_malloc(info->size);
+}
+
 g_free(info);
 

[RFC PATCH v1 8/8] vfio-ccw: Add support for the CRW irq

2019-11-14 Thread Eric Farman
From: Farhan Ali 

The CRW irq will be used by vfio-ccw to notify the userspace
about any CRWs the userspace needs to handle. Let's add support
for it.

Signed-off-by: Farhan Ali 
Signed-off-by: Eric Farman 
---

Notes:
v0->v1: [EF]
 - Check vcdev->crw_region before registering the irq,
   in case host kernel does not have matching support
 - Split the refactoring changes to an earlier (new) patch
   (and don't remove the "num_irqs" check in the register
   routine, but adjust it to the check the input variable)
 - Don't revert the cool vfio_set_irq_signaling() stuff
 - Unregister CRW IRQ before IO IRQ in unrealize
 - s/crw1/crw0/

 hw/vfio/ccw.c | 45 +
 1 file changed, 45 insertions(+)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index b16526d5de..b3f4120118 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -48,6 +48,7 @@ struct VFIOCCWDevice {
 uint64_t crw_region_offset;
 struct ccw_crw_region *crw_region;
 EventNotifier io_notifier;
+EventNotifier crw_notifier;
 bool force_orb_pfch;
 bool warned_orb_pfch;
 };
@@ -259,6 +260,34 @@ static void vfio_ccw_reset(DeviceState *dev)
 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
 }
 
+static void vfio_ccw_crw_notifier_handler(void *opaque)
+{
+VFIOCCWDevice *vcdev = opaque;
+struct ccw_crw_region *region = vcdev->crw_region;
+CRW crw;
+int size;
+uint8_t erc;
+uint16_t rsid;
+
+if (!event_notifier_test_and_clear(>crw_notifier)) {
+return;
+}
+
+memset(region, 0, sizeof(*region));
+size = pread(vcdev->vdev.fd, region, vcdev->crw_region_size,
+ vcdev->crw_region_offset);
+
+if (size == -1) {
+error_report("vfio-ccw: Read crw region failed with errno=%d", errno);
+return;
+}
+
+memcpy(, >crw0, sizeof(CRW));
+erc = crw.flags & 0x003f;
+rsid = crw.rsid;
+css_queue_crw(CRW_RSC_CHP, erc, 0, 0, rsid);
+}
+
 static void vfio_ccw_io_notifier_handler(void *opaque)
 {
 VFIOCCWDevice *vcdev = opaque;
@@ -349,6 +378,10 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev, int irq,
 notifier = >io_notifier;
 fd_read = vfio_ccw_io_notifier_handler;
 break;
+case VFIO_CCW_CRW_IRQ_INDEX:
+notifier = >crw_notifier;
+fd_read = vfio_ccw_crw_notifier_handler;
+break;
 default:
 error_setg(errp, "vfio: Unsupported device irq(%d) fd: %m", irq);
 return;
@@ -398,6 +431,9 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice 
*vcdev, int irq)
 case VFIO_CCW_IO_IRQ_INDEX:
 notifier = >io_notifier;
 break;
+case VFIO_CCW_CRW_IRQ_INDEX:
+notifier = >crw_notifier;
+break;
 default:
 error_report("vfio: Unsupported device irq(%d) fd: %m", irq);
 return;
@@ -618,6 +654,14 @@ static void vfio_ccw_realize(DeviceState *dev, Error 
**errp)
 goto out_notifier_err;
 }
 
+if (vcdev->crw_region) {
+vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, );
+if (err) {
+vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
+goto out_notifier_err;
+}
+}
+
 return;
 
 out_notifier_err:
@@ -642,6 +686,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error 
**errp)
 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
 VFIOGroup *group = vcdev->vdev.group;
 
+vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX);
 vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
 vfio_ccw_put_region(vcdev);
 vfio_ccw_put_device(vcdev);
-- 
2.17.1




[RFC PATCH v1 4/8] vfio-ccw: Refactor cleanup of regions

2019-11-14 Thread Eric Farman
While we're at it, add a g_free() for the async_cmd_region that
is the last thing currently created.  g_free() knows how to handle
NULL pointers, so this makes it easier to remember what cleanups
need to be performed when new regions are added.

Signed-off-by: Eric Farman 
---
 hw/vfio/ccw.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 0590a6f512..3e32bc1819 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -376,8 +376,7 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error 
**errp)
 vcdev->io_region_size = info->size;
 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
 error_setg(errp, "vfio: Unexpected size of the I/O region");
-g_free(info);
-return;
+goto out_err;
 }
 
 vcdev->io_region_offset = info->offset;
@@ -390,15 +389,20 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, 
Error **errp)
 vcdev->async_cmd_region_size = info->size;
 if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) {
 error_setg(errp, "vfio: Unexpected size of the async cmd region");
-g_free(vcdev->io_region);
-g_free(info);
-return;
+goto out_err;
 }
 vcdev->async_cmd_region_offset = info->offset;
 vcdev->async_cmd_region = g_malloc0(info->size);
 }
 
 g_free(info);
+return;
+
+out_err:
+g_free(vcdev->async_cmd_region);
+g_free(vcdev->io_region);
+g_free(info);
+return;
 }
 
 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
-- 
2.17.1




[RFC PATCH v1 1/8] vfio-ccw: Return IOINST_CC_NOT_OPERATIONAL for EIO

2019-11-14 Thread Eric Farman
From: Farhan Ali 

EIO is returned by vfio-ccw mediated device when the backing
host subchannel is not operational anymore. So return cc=3
back to the guest, rather than returning a unit check.
This way the guest can take appropriate action such as
issue an 'stsch'.

Signed-off-by: Farhan Ali 
---
 hw/vfio/ccw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 6863f6c69f..0919ddbeb8 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -114,6 +114,7 @@ again:
 return IOINST_CC_BUSY;
 case -ENODEV:
 case -EACCES:
+case -EIO:
 return IOINST_CC_NOT_OPERATIONAL;
 case -EFAULT:
 default:
-- 
2.17.1




[RFC PATCH v1 0/8] s390x/vfio-ccw: Channel Path Handling

2019-11-14 Thread Eric Farman
Here is a first pass at the channel-path handling code for vfio-ccw,
to take advantage of the corresponding kernel patches posted here:

https://lore.kernel.org/kvm/20191115025620.19593-1-far...@linux.ibm.com/

As with the KVM patches, these were originally written by Farhan Ali
this past summer, and my git notes log the changes I've made since
picking up this work.  There are two commits at the front of this
series that seem to be pre-reqs to the actual changes for this, which
is why the linux-headers update seems lost in the middle of this.

I've tried to be mindful of testing permutations of new/old kernel with
either new/old QEMU, and it seems to be in good shape.

Eric Farman (2):
  vfio-ccw: Refactor cleanup of regions
  vfio-ccw: Refactor ccw irq handler

Farhan Ali (6):
  vfio-ccw: Return IOINST_CC_NOT_OPERATIONAL for EIO
  vfio-ccw: Don't inject an I/O interrupt if the subchannel is not
enabled
  linux-headers: update
  vfio-ccw: Add support for the schib region
  vfio-ccw: Add support for the crw region
  vfio-ccw: Add support for the CRW irq

 hw/s390x/css.c |   8 +-
 hw/s390x/s390-ccw.c|  20 
 hw/vfio/ccw.c  | 195 +
 include/hw/s390x/css.h |   3 +-
 include/hw/s390x/s390-ccw.h|   1 +
 linux-headers/linux/vfio.h |   3 +
 linux-headers/linux/vfio_ccw.h |  10 ++
 target/s390x/ioinst.c  |   3 +-
 8 files changed, 217 insertions(+), 26 deletions(-)

-- 
2.17.1




[RFC PATCH v1 6/8] vfio-ccw: Add support for the crw region

2019-11-14 Thread Eric Farman
From: Farhan Ali 

The crw region can be used to obtain information about
Channel Report Words (CRW) from vfio-ccw driver.

Currently only channel path related CRWs are passed to
QEMU from vfio-ccw driver.

Signed-off-by: Farhan Ali 
Signed-off-by: Eric Farman 
---

Notes:
v0->v1: [EF]
 - Fixed copy/paste error in error message (s/schib/CRW)

 hw/vfio/ccw.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 2ff223470f..2b1a83b94c 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -44,6 +44,9 @@ struct VFIOCCWDevice {
 uint64_t schib_region_size;
 uint64_t schib_region_offset;
 struct ccw_schib_region *schib_region;
+uint64_t crw_region_size;
+uint64_t crw_region_offset;
+struct ccw_crw_region *crw_region;
 EventNotifier io_notifier;
 bool force_orb_pfch;
 bool warned_orb_pfch;
@@ -449,10 +452,24 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, 
Error **errp)
 vcdev->schib_region = g_malloc(info->size);
 }
 
+ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW,
+   VFIO_REGION_SUBTYPE_CCW_CRW, );
+
+if (!ret) {
+vcdev->crw_region_size = info->size;
+if (sizeof(*vcdev->crw_region) != vcdev->crw_region_size) {
+error_setg(errp, "vfio: Unexpected size of the CRW region");
+goto out_err;
+}
+vcdev->crw_region_offset = info->offset;
+vcdev->crw_region = g_malloc(info->size);
+}
+
 g_free(info);
 return;
 
 out_err:
+g_free(vcdev->crw_region);
 g_free(vcdev->schib_region);
 g_free(vcdev->async_cmd_region);
 g_free(vcdev->io_region);
@@ -462,6 +479,7 @@ out_err:
 
 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
 {
+g_free(vcdev->crw_region);
 g_free(vcdev->schib_region);
 g_free(vcdev->async_cmd_region);
 g_free(vcdev->io_region);
-- 
2.17.1




[RFC PATCH v1 3/8] linux-headers: update

2019-11-14 Thread Eric Farman
From: Farhan Ali 

Signed-off-by: Farhan Ali 
Signed-off-by: Eric Farman 
---

Notes:
v0->v1: [EF]
 - Run scripts/update-linux-headers.sh properly, but do not
   add resulting changes to linux-headers/asm-mips/

 linux-headers/linux/vfio.h |  3 +++
 linux-headers/linux/vfio_ccw.h | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index fb10370d29..9e227348b3 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -378,6 +378,8 @@ struct vfio_region_gfx_edid {
 
 /* sub-types for VFIO_REGION_TYPE_CCW */
 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD  (1)
+#define VFIO_REGION_SUBTYPE_CCW_SCHIB  (2)
+#define VFIO_REGION_SUBTYPE_CCW_CRW(3)
 
 /*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
@@ -577,6 +579,7 @@ enum {
 
 enum {
VFIO_CCW_IO_IRQ_INDEX,
+   VFIO_CCW_CRW_IRQ_INDEX,
VFIO_CCW_NUM_IRQS
 };
 
diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h
index fcc3e69ef5..8654c9afca 100644
--- a/linux-headers/linux/vfio_ccw.h
+++ b/linux-headers/linux/vfio_ccw.h
@@ -34,4 +34,14 @@ struct ccw_cmd_region {
__u32 ret_code;
 } __attribute__((packed));
 
+struct ccw_schib_region {
+#define SCHIB_AREA_SIZE 52
+   __u8 schib_area[SCHIB_AREA_SIZE];
+} __attribute__((packed));
+
+struct ccw_crw_region {
+   __u32 crw0;
+   __u32 crw1;
+} __attribute__((packed));
+
 #endif
-- 
2.17.1




[RFC PATCH v1 2/8] vfio-ccw: Don't inject an I/O interrupt if the subchannel is not enabled

2019-11-14 Thread Eric Farman
From: Farhan Ali 

According to PoPs, when the SCHIB.PMCW bit 8 is 0 status presented by
the device is not made available to the program. So don't inject an
interrupt in the guest if the guest OS has not enabled the
subchannel.

Signed-off-by: Farhan Ali 
Signed-off-by: Eric Farman 
---

Notes:
v0->v1: [EF]
 - Update commit message, as the bit in question is bit 8 not 15

 hw/vfio/ccw.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 0919ddbeb8..0590a6f512 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -230,6 +230,11 @@ static void vfio_ccw_io_notifier_handler(void *opaque)
 return;
 }
 
+/* Virtual subchannel is not enabled */
+if (!(schib->pmcw.flags & PMCW_FLAGS_MASK_ENA)) {
+return;
+}
+
 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size,
  vcdev->io_region_offset);
 if (size == -1) {
-- 
2.17.1




Re: [PATCH v5] iotests: Test NBD client reconnection

2019-11-14 Thread Andrey Shinkevich



On 14/11/2019 18:09, Eric Blake wrote:
> On 11/11/19 9:39 PM, Andrey Shinkevich wrote:
>> The test for an NBD client. The NBD server is disconnected after the
>> client write request. The NBD client should reconnect and complete
>> the write operation.
>>
>> Suggested-by: Denis V. Lunev 
>> Suggested-by: Vladimir Sementsov-Ogievskiy 
>> Signed-off-by: Andrey Shinkevich 
>> Reviewed-by: Eric Blake 
>> Tested-by: Eric Blake 
>> ---
>> v5:  "" were replaced with '' in the test except the function comments.
>>  The 'quick' word was added to the 'qroup' file next to the test 
>> #277.
> 
> Queuing for 4.2-rc2 through my NBD tree.
> 
> 

Eric,
Thank you very much.
-- 
With the best regards,
Andrey Shinkevich




Re: [PATCH v9 Kernel 2/5] vfio iommu: Add ioctl defination to get dirty pages bitmap.

2019-11-14 Thread Alex Williamson
On Thu, 14 Nov 2019 21:40:35 -0500
Yan Zhao  wrote:

> On Fri, Nov 15, 2019 at 05:06:25AM +0800, Alex Williamson wrote:
> > On Fri, 15 Nov 2019 00:26:07 +0530
> > Kirti Wankhede  wrote:
> >   
> > > On 11/14/2019 1:37 AM, Alex Williamson wrote:  
> > > > On Thu, 14 Nov 2019 01:07:21 +0530
> > > > Kirti Wankhede  wrote:
> > > > 
> > > >> On 11/13/2019 4:00 AM, Alex Williamson wrote:
> > > >>> On Tue, 12 Nov 2019 22:33:37 +0530
> > > >>> Kirti Wankhede  wrote:
> > > >>>
> > >  All pages pinned by vendor driver through vfio_pin_pages API should 
> > >  be
> > >  considered as dirty during migration. IOMMU container maintains a 
> > >  list of
> > >  all such pinned pages. Added an ioctl defination to get bitmap of 
> > >  such
> > > >>>
> > > >>> definition
> > > >>>
> > >  pinned pages for requested IO virtual address range.
> > > >>>
> > > >>> Additionally, all mapped pages are considered dirty when physically
> > > >>> mapped through to an IOMMU, modulo we discussed devices opting in to
> > > >>> per page pinning to indicate finer granularity with a TBD mechanism to
> > > >>> figure out if any non-opt-in devices remain.
> > > >>>
> > > >>
> > > >> You mean, in case of device direct assignment (device pass through)?   
> > > >>  
> > > > 
> > > > Yes, or IOMMU backed mdevs.  If vfio_dmas in the container are fully
> > > > pinned and mapped, then the correct dirty page set is all mapped pages.
> > > > We discussed using the vpfn list as a mechanism for vendor drivers to
> > > > reduce their migration footprint, but we also discussed that we would
> > > > need a way to determine that all participants in the container have
> > > > explicitly pinned their working pages or else we must consider the
> > > > entire potential working set as dirty.
> > > > 
> > > 
> > > How can vendor driver tell this capability to iommu module? Any 
> > > suggestions?  
> > 
> > I think it does so by pinning pages.  Is it acceptable that if the
> > vendor driver pins any pages, then from that point forward we consider
> > the IOMMU group dirty page scope to be limited to pinned pages?  There
> > are complications around non-singleton IOMMU groups, but I think we're
> > already leaning towards that being a non-worthwhile problem to solve.
> > So if we require that only singleton IOMMU groups can pin pages and we
> > pass the IOMMU group as a parameter to
> > vfio_iommu_driver_ops.pin_pages(), then the type1 backend can set a
> > flag on its local vfio_group struct to indicate dirty page scope is
> > limited to pinned pages.  We might want to keep a flag on the
> > vfio_iommu struct to indicate if all of the vfio_groups for each
> > vfio_domain in the vfio_iommu.domain_list dirty page scope limited to
> > pinned pages as an optimization to avoid walking lists too often.  Then
> > we could test if vfio_iommu.domain_list is not empty and this new flag
> > does not limit the dirty page scope, then everything within each
> > vfio_dma is considered dirty.
> >  
> 
> hi Alex
> could you help clarify whether my understandings below are right?
> In future,
> 1. for mdev and for passthrough device withoug hardware ability to track
> dirty pages, the vendor driver has to explicitly call
> vfio_pin_pages()/vfio_unpin_pages() + a flag to tell vfio its dirty page set.

For non-IOMMU backed mdevs without hardware dirty page tracking,
there's no change to the vendor driver currently.  Pages pinned by the
vendor driver are marked as dirty.

For any IOMMU backed device, mdev or direct assignment, all mapped
memory would be considered dirty unless there are explicit calls to pin
pages on top of the IOMMU page pinning and mapping.  These would likely
be enabled only when the device is in the _SAVING device_state.

> 2. for those devices with hardware ability to track dirty pages, will still
> provide a callback to vendor driver to get dirty pages. (as for those devices,
> it is hard to explicitly call vfio_pin_pages()/vfio_unpin_pages())
>
> 3. for devices relying on dirty bit info in physical IOMMU, there
> will be a callback to physical IOMMU driver to get dirty page set from
> vfio.

The proposal here does not cover exactly how these would be
implemented, it only establishes the container as the point of user
interaction with the dirty bitmap and hopefully allows us to maintain
that interface regardless of whether we have dirty tracking at the
device or the system IOMMU.  Ideally devices with dirty tracking would
make use of page pinning and we'd extend the interface to allow vendor
drivers the ability to indicate the clean/dirty state of those pinned
pages.  For system IOMMU dirty page tracking, that potentially might
mean that we support IOMMU page faults and the container manages those
faults such that the container is the central record of dirty pages.
Until these interfaces are designed, we can only speculate, but the
goal is to design a user interface compatible with how 

Re: [PATCH v9 Kernel 2/5] vfio iommu: Add ioctl defination to get dirty pages bitmap.

2019-11-14 Thread Yan Zhao
On Fri, Nov 15, 2019 at 05:06:25AM +0800, Alex Williamson wrote:
> On Fri, 15 Nov 2019 00:26:07 +0530
> Kirti Wankhede  wrote:
> 
> > On 11/14/2019 1:37 AM, Alex Williamson wrote:
> > > On Thu, 14 Nov 2019 01:07:21 +0530
> > > Kirti Wankhede  wrote:
> > >   
> > >> On 11/13/2019 4:00 AM, Alex Williamson wrote:  
> > >>> On Tue, 12 Nov 2019 22:33:37 +0530
> > >>> Kirti Wankhede  wrote:
> > >>>  
> >  All pages pinned by vendor driver through vfio_pin_pages API should be
> >  considered as dirty during migration. IOMMU container maintains a list 
> >  of
> >  all such pinned pages. Added an ioctl defination to get bitmap of such 
> >   
> > >>>
> > >>> definition
> > >>>  
> >  pinned pages for requested IO virtual address range.  
> > >>>
> > >>> Additionally, all mapped pages are considered dirty when physically
> > >>> mapped through to an IOMMU, modulo we discussed devices opting in to
> > >>> per page pinning to indicate finer granularity with a TBD mechanism to
> > >>> figure out if any non-opt-in devices remain.
> > >>>  
> > >>
> > >> You mean, in case of device direct assignment (device pass through)?  
> > > 
> > > Yes, or IOMMU backed mdevs.  If vfio_dmas in the container are fully
> > > pinned and mapped, then the correct dirty page set is all mapped pages.
> > > We discussed using the vpfn list as a mechanism for vendor drivers to
> > > reduce their migration footprint, but we also discussed that we would
> > > need a way to determine that all participants in the container have
> > > explicitly pinned their working pages or else we must consider the
> > > entire potential working set as dirty.
> > >   
> > 
> > How can vendor driver tell this capability to iommu module? Any suggestions?
> 
> I think it does so by pinning pages.  Is it acceptable that if the
> vendor driver pins any pages, then from that point forward we consider
> the IOMMU group dirty page scope to be limited to pinned pages?  There
> are complications around non-singleton IOMMU groups, but I think we're
> already leaning towards that being a non-worthwhile problem to solve.
> So if we require that only singleton IOMMU groups can pin pages and we
> pass the IOMMU group as a parameter to
> vfio_iommu_driver_ops.pin_pages(), then the type1 backend can set a
> flag on its local vfio_group struct to indicate dirty page scope is
> limited to pinned pages.  We might want to keep a flag on the
> vfio_iommu struct to indicate if all of the vfio_groups for each
> vfio_domain in the vfio_iommu.domain_list dirty page scope limited to
> pinned pages as an optimization to avoid walking lists too often.  Then
> we could test if vfio_iommu.domain_list is not empty and this new flag
> does not limit the dirty page scope, then everything within each
> vfio_dma is considered dirty.
>

hi Alex
could you help clarify whether my understandings below are right?
In future,
1. for mdev and for passthrough device withoug hardware ability to track
dirty pages, the vendor driver has to explicitly call
vfio_pin_pages()/vfio_unpin_pages() + a flag to tell vfio its dirty page set.

2. for those devices with hardware ability to track dirty pages, will still
provide a callback to vendor driver to get dirty pages. (as for those devices,
it is hard to explicitly call vfio_pin_pages()/vfio_unpin_pages())

3. for devices relying on dirty bit info in physical IOMMU, there
will be a callback to physical IOMMU driver to get dirty page set from
vfio.

Thanks
Yan

> >  Signed-off-by: Kirti Wankhede 
> >  Reviewed-by: Neo Jia 
> >  ---
> > include/uapi/linux/vfio.h | 23 +++
> > 1 file changed, 23 insertions(+)
> > 
> >  diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >  index 35b09427ad9f..6fd3822aa610 100644
> >  --- a/include/uapi/linux/vfio.h
> >  +++ b/include/uapi/linux/vfio.h
> >  @@ -902,6 +902,29 @@ struct vfio_iommu_type1_dma_unmap {
> > #define VFIO_IOMMU_ENABLE   _IO(VFIO_TYPE, VFIO_BASE + 15)
> > #define VFIO_IOMMU_DISABLE  _IO(VFIO_TYPE, VFIO_BASE + 16)
> > 
> >  +/**
> >  + * VFIO_IOMMU_GET_DIRTY_BITMAP - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
> >  + * struct 
> >  vfio_iommu_type1_dirty_bitmap)
> >  + *
> >  + * IOCTL to get dirty pages bitmap for IOMMU container during 
> >  migration.
> >  + * Get dirty pages bitmap of given IO virtual addresses range using
> >  + * struct vfio_iommu_type1_dirty_bitmap. Caller sets argsz, which is 
> >  size of
> >  + * struct vfio_iommu_type1_dirty_bitmap. User should allocate memory 
> >  to get
> >  + * bitmap and should set size of allocated memory in bitmap_size 
> >  field.
> >  + * One bit is used to represent per page consecutively starting from 
> >  iova
> >  + * offset. Bit set indicates page at that offset from iova is dirty.
> >  + */
> >  +struct 

Re: virtio,iommu_platform=on

2019-11-14 Thread Alexey Kardashevskiy



On 14/11/2019 19:42, Michael S. Tsirkin wrote:
> On Thu, Nov 14, 2019 at 09:58:47AM +1100, Alexey Kardashevskiy wrote:
>>
>>
>> On 13/11/2019 21:00, Michael S. Tsirkin wrote:
>>> On Wed, Nov 13, 2019 at 03:44:28PM +1100, Alexey Kardashevskiy wrote:


 On 12/11/2019 18:08, Michael S. Tsirkin wrote:
> On Tue, Nov 12, 2019 at 02:53:49PM +1100, Alexey Kardashevskiy wrote:
>> Hi!
>>
>> I am enabling IOMMU for virtio in the pseries firmware (SLOF) and seeing
>> problems, one of them is SLOF does SCSI bus scan, then it stops the
>> virtio-scsi by clearing MMIO|IO|BUSMASTER from PCI_COMMAND (as SLOF
>> stopped using the devices) and when this happens, I see unassigned
>> memory access (see below) which happens because disabling busmaster
>> disables IOMMU and QEMU cannot access the rings to do some shutdown. And
>> when this happens, the device does not come back even if SLOF re-enables 
>> it.
>
> In fact clearing bus master should disable ring access even
> without the IOMMU.
> Once you do this you should not wait for rings to be processed,
> it is safe to assume they won't be touched again and just
> free up any buffers that have not been used.
>
> Why don't you see this without IOMMU?

 Because without IOMMU, virtio can always access rings, it does not need
 bus master address space for that.
>>>
>>> Right and that's a bug in virtio scsi. E.g. virtio net checks
>>> bus mastering before each access.
>>
>> You have to be specific - virtio scsi in the guest or in QEMU?
> 
> If a device accesses memory with bus master on, it's buggy.
> 
>>
>>> Which is all well and good, but we can't just break the world
>>> so I guess we first need to fix SLOF, and then add
>>> a compat property. And maybe keep it broken for
>>> legacy ...
>>>

> It's a bug I think, probably there to work around buggy guests.
>
> So pls fix this in SLOF and then hopefully we can drop the
> work arounds and have clearing bus master actually block DMA.


 Laszlo suggested writing 0 to the status but this does not seem helping,
 with both ioeventfd=true/false. It looks like setting/clearing busmaster
 bit confused memory region caches in QEMU's virtio. I am confused which
 direction to keep digging to, any suggestions? Thanks,

>>>
>>> to clarify you reset after setting bus master? right?
>>
>>
>> I was talking about clearing the bus master, and where I call that
>> virtio reset does not matter. Thanks,
>>
>>
> 
> so
> 
> bus master =0
> reset
> bus master =1
> 
> device does not recover?


No, it does not. With my recent changes, it seems that only virtio-block
cannot recover; virtio-net and virtio-scsi are fine. Thanks,


> 
> 
>>>
>>>

>
>> Hacking SLOF to not clear BUSMASTER makes virtio-scsi work but it is
>> hardly a right fix.
>>
>> Is this something expected? Thanks,
>>
>>
>> Here is the exact command line:
>>
>> /home/aik/pbuild/qemu-garrison2-ppc64/ppc64-softmmu/qemu-system-ppc64 \
>>
>> -nodefaults \
>>
>> -chardev stdio,id=STDIO0,signal=off,mux=on \
>>
>> -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
>>
>> -mon id=MON0,chardev=STDIO0,mode=readline \
>>
>> -nographic \
>>
>> -vga none \
>>
>> -enable-kvm \
>> -m 2G \
>>
>> -device
>> virtio-scsi-pci,id=vscsi0,iommu_platform=on,disable-modern=off,disable-legacy=on
>> \
>> -drive id=DRIVE0,if=none,file=img/u1804-64le.qcow2,format=qcow2 \
>>
>> -device scsi-disk,id=scsi-disk0,drive=DRIVE0 \
>>
>> -snapshot \
>>
>> -smp 1 \
>>
>> -machine pseries \
>>
>> -L /home/aik/t/qemu-ppc64-bios/ \
>>
>> -trace events=qemu_trace_events \
>>
>> -d guest_errors \
>>
>> -chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.ssh59518 \
>>
>> -mon chardev=SOCKET0,mode=control
>>
>>
>>
>> Here is the backtrace:
>>
>> Thread 5 "qemu-system-ppc" hit Breakpoint 8, unassigned_mem_accepts
>> (opaque=0x0, addr=0x5802, size=0x2, is_write=0x0, attrs=...) at /home/
>> aik/p/qemu/memory.c:1275
>> 1275return false;
>> #0  unassigned_mem_accepts (opaque=0x0, addr=0x5802, size=0x2,
>> is_write=0x0, attrs=...) at /home/aik/p/qemu/memory.c:1275
>> #1  0x100a8ac8 in memory_region_access_valid (mr=0x1105c230
>> , addr=0x5802, size=0x2, is_write=0x0, attrs=...) at
>> /home/aik/p/qemu/memory.c:1377
>> #2  0x100a8c88 in memory_region_dispatch_read (mr=0x1105c230
>> , addr=0x5802, pval=0x7550d410, op=MO_16,
>> attrs=...) at /home/aik/p/qemu/memory.c:1418
>> #3  0x1001cad4 in address_space_lduw_internal_cached_slow
>> (cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0,
>> endian=DEVICE_LITTLE_ENDIAN) at /home/aik/p/qemu/memory_ldst.inc.c:211
>> #4  

9p: requests efficiency

2019-11-14 Thread Christian Schoenebeck
I'm currently reading up on how client requests (T messages) are currently 
dispatched in general by 9pfs, to understand where potential inefficiencies 
are that I am encountering.

I mean 9pfs is pretty fast on raw I/O (read/write requests), provided that the 
message payload on guest side was chosen large enough (e.g. 
trans=virtio,version=9p2000.L,msize=4194304,...), where I already come close 
to my test disk's therotical maximum performance on read/write tests. But 
obviously these are huge 9p requests.

However when there are a large number of (i.e. small) 9p requests, no matter 
what the actual request type is, then I am encountering severe performance 
issues with 9pfs and I try to understand whether this could be improved with 
reasonable effort.

If I understand it correctly, each incoming request (T message) is dispatched 
to its own qemu coroutine queue. So individual requests should already be 
processed in parallel, right?

Best regards,
Christian Schoenebeck





RE: QEMU for Qualcomm Hexagon - KVM Forum talk and code available

2019-11-14 Thread Taylor Simpson
OK, I changed the code in github and will upstream it that way.



-Original Message-
From: Richard Henderson 
Sent: Wednesday, November 13, 2019 3:10 PM
To: Taylor Simpson ; Alex Bennée 
; qemu-devel@nongnu.org
Cc: Alessandro Di Federico ; ni...@rev.ng; Niccolò Izzo 
; Aleksandar Markovic 
Subject: Re: QEMU for Qualcomm Hexagon - KVM Forum talk and code available

-
CAUTION: This email originated from outside of the organization.
-

On 11/13/19 8:31 PM, Taylor Simpson wrote:
> [Taylor] Currently, I have the generator and the generated code sitting in 
> the source tree.  I'm flexible on this if the decision is to regenerate it 
> every time.

I would prefer to regenerate every time, and not store the generated code in 
the source tree at all.  It makes it a no-brainer to modify the source and not 
have to remember how to regenerate, because the rules are right there in the 
makefile.


r~


[Bug 1846427] Re: 4.1.0: qcow2 corruption on savevm/quit/loadvm cycle

2019-11-14 Thread Matti Hameister
I tried the ArchLinux package that includes three patches applied to
qemu 4.1 ( see
https://git.archlinux.org/svntogit/packages.git/commit/trunk/PKGBUILD?h=packages/qemu=e9707066408de26aa04f8d0ddebe5556aa87e662
). My Windows 10 qcow2 image got corrupted again after a short time of
use. Host filesystem is ext4.

"OFLAG_COPIED data cluster: l2_entry=382c7 refcount=1"

The Windows installation seems to be fine after repair. At least Windows
did not found anything wrong.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1846427

Title:
  4.1.0: qcow2 corruption on savevm/quit/loadvm cycle

Status in QEMU:
  Fix Committed

Bug description:
  I'm seeing massive corruption of qcow2 images with qemu 4.1.0 and git
  master as of 7f21573c822805a8e6be379d9bcf3ad9effef3dc after a few
  savevm/quit/loadvm cycles. I've narrowed it down to the following
  reproducer (further notes below):

  # qemu-img check debian.qcow2
  No errors were found on the image.
  251601/327680 = 76.78% allocated, 1.63% fragmented, 0.00% compressed clusters
  Image end offset: 18340446208
  # bin/qemu/bin/qemu-system-x86_64 -machine pc-q35-4.0.1,accel=kvm -m 4096 
-chardev stdio,id=charmonitor -mon chardev=charmonitor -drive 
file=debian.qcow2,id=d -S
  qemu-system-x86_64: warning: dbind: Couldn't register with accessibility bus: 
Did not receive a reply. Possible causes include: the remote application did 
not send a reply, the message bus security policy blocked the reply, the reply 
timeout expired, or the network connection was broken.
  QEMU 4.1.50 monitor - type 'help' for more information
  (qemu) loadvm foo
  (qemu) c
  (qemu) qcow2_free_clusters failed: Invalid argument
  qcow2_free_clusters failed: Invalid argument
  qcow2_free_clusters failed: Invalid argument
  qcow2_free_clusters failed: Invalid argument
  quit
  [m@nargothrond:~] qemu-img check debian.qcow2
  Leaked cluster 85179 refcount=2 reference=1
  Leaked cluster 85180 refcount=2 reference=1
  ERROR cluster 266150 refcount=0 reference=2
  [...]
  ERROR OFLAG_COPIED data cluster: l2_entry=42284 refcount=1

  9493 errors were found on the image.
  Data may be corrupted, or further writes to the image may corrupt it.

  2 leaked clusters were found on the image.
  This means waste of disk space, but no harm to data.
  259266/327680 = 79.12% allocated, 1.67% fragmented, 0.00% compressed clusters
  Image end offset: 18340446208

  This is on a x86_64 Linux 5.3.1 Gentoo host with qemu-system-x86_64
  and accel=kvm. The compiler is gcc-9.2.0 with the rest of the system
  similarly current.

  Reproduced with qemu-4.1.0 from distribution package as well as
  vanilla git checkout of tag v4.1.0 and commit
  7f21573c822805a8e6be379d9bcf3ad9effef3dc (today's master). Does not
  happen with qemu compiled from vanilla checkout of tag v4.0.0. Build
  sequence:

  ./configure --prefix=$HOME/bin/qemu-bisect --target-list=x86_64-softmmu 
--disable-werror --disable-docs
  [...]
  CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g
  [...] (can provide full configure output if helpful)
  make -j8 install

  The kind of guest OS does not matter: seen with Debian testing 64bit,
  Windows 7 x86/x64 BIOS and Windows 7 x64 EFI.

  The virtual storage controller does not seem to matter: seen with
  VirtIO SCSI, emulated SCSI and emulated SATA AHCI.

  Caching modes (none, directsync, writeback), aio mode (threads,
  native) or discard (ignore, unmap) or detect-zeroes (off, unmap) does
  not influence occurence either.

  Having more RAM in the guest seems to increase odds of corruption:
  With 512MB to the Debian guest problem hardly occurs at all, with 4GB
  RAM it happens almost instantly.

  An automated reproducer works as follows:

  - the guest *does* mount its root fs and swap with option discard and
  my testing leaves me with the impression that file deletion rather
  than reading is causing the issue

  - foo is a snapshot of the running Debian VM which is already running
  command

  # while true ; do dd if=/dev/zero of=foo bs=10240k count=400 ; done

  to produce some I/O to the disk (4GB file with 4GB of RAM).

  - on the host a loop continuously resumes and saves the guest state
  and quits qemu inbetween:

  # while true ; do (echo loadvm foo ; echo c ; sleep 10 ; echo stop ;
  echo savevm foo ; echo quit ) | bin/qemu-bisect/bin/qemu-system-x86_64
  -machine pc-q35-3.1,accel=kvm -m 4096 -chardev stdio,id=charmonitor
  -mon chardev=charmonitor -drive file=debian.qcow2,id=d -S -display
  none ; done

  - quitting qemu inbetween saves and loads seems to be necessary for
  the problem to occur. Just continusouly in one session saving and
  loading guest state does not trigger it.

  - For me, after about 2 to 6 iterations of above loop the image is
  corrupted.

  - corruption manifests with other messages from qemu as well, e.g.:

  (qemu) loadvm foo
  Error: Device 'd' 

Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Keith Packard
Richard Henderson  writes:

> For semi-hosting, it seems even better if the semi-hosting syscall instruction
> is not "real", because you're explicitly requesting services from "unreal"
> hardware.  It should be specified to generate a SIGILL type of exception
> anywhere semi-hosting is not enabled.

In the QEMU case, yes, it's virtual hardware. However, the other common case
for semihosting is when doing hardware bringup using JTAG or other
remote debugging link -- having an instruction which safely traps to the
debugger is required to make semihosting usable there. Reading through
the history of the current RISC-V semihosting mechanism, there were many
designs considered and rejected because they wouldn't work in the JTAG
environment.

> With that in mind, it may be simpler to handle all of this not in the
> translator, but in the function that delivers the ebreak exception.  At that
> point one can arrange to read memory without raising additional
> exceptions.

I'll go explore and see if I can figure any of this out.

I'd still like to get the non-RISC-V SYS_READC patch landed someday :-)

-- 
-keith


signature.asc
Description: PGP signature


[ANNOUNCE] QEMU 4.1.1 Stable released

2019-11-14 Thread Michael Roth
Hi everyone,

I am pleased to announce that the QEMU v4.1.1 stable release is now
available:

You can grab the tarball from our download page here:

  https://www.qemu.org/download/#source

v4.1.1 is now tagged in the official qemu.git repository,
and the stable-4.1 branch has been updated accordingly:

  https://git.qemu.org/?p=qemu.git;a=shortlog;h=refs/heads/stable-4.1

This update contains a number of fixes relating qcow2 and XFS corruption, a
security fix for LSI SCSI emulation (CVE-2019-12068), and general fixes for
various other subsystems.

Please see the changelog for additional details and update accordingly.

Thank you to everyone involved!

CHANGELOG:

99c5874a9b: Update version for 4.1.1 release (Michael Roth)
e092a17d38: mirror: Keep mirror_top_bs drained after dropping permissions 
(Kevin Wolf)
088f1e8fd9: block/create: Do not abort if a block driver is not available 
(Philippe Mathieu-Daudé)
145b562990: vhost: Fix memory region section comparison (Dr. David Alan Gilbert)
42b6571357: memory: Provide an equality function for MemoryRegionSections (Dr. 
David Alan Gilbert)
c0aca9352d: memory: Align MemoryRegionSections fields (Dr. David Alan Gilbert)
54c130493c: tests: make filemonitor test more robust to event ordering (Daniel 
P. Berrangé)
3d018ff3bd: block: posix: Always allocate the first block (Nir Soffer)
f0d3fa265d: file-posix: Handle undetectable alignment (Nir Soffer)
7db05c8a73: block/file-posix: Let post-EOF fallocate serialize (Max Reitz)
d9b88f7e0d: block: Add bdrv_co_get_self_request() (Max Reitz)
590cff8230: block: Make wait/mark serialising requests public (Max Reitz)
2e2ad02f2c: block/io: refactor padding (Vladimir Sementsov-Ogievskiy)
b3b76fc643: util/iov: improve qemu_iovec_is_zero (Vladimir Sementsov-Ogievskiy)
cff024fe85: util/iov: introduce qemu_iovec_init_extended (Vladimir 
Sementsov-Ogievskiy)
40df4a1bf7: qcow2-bitmap: Fix uint64_t left-shift overflow (Tuguoyi)
b156178553: iotests: Add peek_file* functions (Max Reitz)
15f5e8c367: iotests: Add test for 4G+ compressed qcow2 write (Max Reitz)
405deba14f: qcow2: Fix QCOW2_COMPRESSED_SECTOR_MASK (Max Reitz)
01be50603b: virtio-blk: Cancel the pending BH when the dataplane is reset 
(Philippe Mathieu-Daudé)
051c9b3cbc: scsi: lsi: exit infinite loop while executing script 
(CVE-2019-12068) (Paolo Bonzini)
b387531323: target/xtensa: regenerate and re-import test_mmuhifi_c3 core (Max 
Filippov)
cdc6896659: target/arm: Allow reading flags from FPSCR for M-profile 
(Christophe Lyon)
c0b35d87de: hbitmap: handle set/reset with zero length (Vladimir 
Sementsov-Ogievskiy)
fcd7cba6ac: util/hbitmap: strict hbitmap_reset (Vladimir Sementsov-Ogievskiy)
aea18ef938: COLO-compare: Fix incorrect `if` logic (Fan Yang)
4887acf574: virtio-net: prevent offloads reset on migration (Mikhail 
Sennikovsky)
8010d3fce0: virtio: new post_load hook (Michael S. Tsirkin)
6705b9344f: ui: Fix hanging up Cocoa display on macOS 10.15 (Catalina) (Hikaru 
Nishida)
c0e2fbf124: mirror: Do not dereference invalid pointers (Max Reitz)
b077ac637d: iotests: Test large write request to qcow2 file (Max Reitz)
9e51c5306c: qcow2: Limit total allocation range to INT_MAX (Max Reitz)
aae0faa5d3: hw/core/loader: Fix possible crash in rom_copy() (Thomas Huth)
7b404cae7f: vhost-user: save features if the char dev is closed (Adrian Moreno)
d868d30db6: iotests: Test internal snapshots with -blockdev (Kevin Wolf)
7a8aa6c734: block/snapshot: Restrict set of snapshot nodes (Kevin Wolf)
331c08d300: s390: PCI: fix IOMMU region init (Matthew Rosato)
fc5afb1a92: roms/Makefile.edk2: don't pull in submodules when building from 
tarball (Michael Roth)
c5c9b1362d: make-release: pull in edk2 submodules so we can build it from 
tarballs (Michael Roth)
220816989c: hw/arm/boot.c: Set NSACR.{CP11,CP10} for NS kernel boots (Peter 
Maydell)
783e7eb52c: block/backup: fix backup_cow_with_offload for last cluster 
(Vladimir Sementsov-Ogievskiy)
e01ed1a1ae: block/backup: fix max_transfer handling for copy_range (Vladimir 
Sementsov-Ogievskiy)
416a692e51: qcow2: Fix corruption bug in qcow2_detect_metadata_preallocation() 
(Kevin Wolf)
e9bb3d942e: coroutine: Add qemu_co_mutex_assert_locked() (Kevin Wolf)
84f22c7285: block/qcow2: Fix corruption introduced by commit 8ac0f15f335 (Maxim 
Levitsky)
86b0f4022b: blockjob: update nodes head while removing all bdrv (Sergio Lopez)
2d86df1f78: curl: Handle success in multi_check_completion (Max Reitz)
18e1b71937: curl: Report only ready sockets (Max Reitz)
0888ddac8e: curl: Pass CURLSocket to curl_multi_do() (Max Reitz)
4be97ef966: curl: Check completion in curl_multi_do() (Max Reitz)
78ea94e389: curl: Keep *socket until the end of curl_sock_cb() (Max Reitz)
3648493495: curl: Keep pointer to the CURLState in CURLSocket (Max Reitz)
0694c489cd: block/nfs: tear down aio before nfs_close (Peter Lieven)
c9ffb12754: qcow2: Fix the calculation of the maximum L2 cache size (Alberto 
Garcia)
28a9a3558a: libvhost-user: fix SLAVE_SEND_FD handling (Johannes Berg)
9027d3fba6: 

[PATCH v3 4/4] tests: More iotest 223 improvements

2019-11-14 Thread Eric Blake
Run the core of the test twice, once without iothreads, and again
with, for more coverage of both setups.

Suggested-by: Nir Soffer 
Signed-off-by: Eric Blake 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/223 | 16 ++-
 tests/qemu-iotests/223.out | 85 +-
 2 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/223 b/tests/qemu-iotests/223
index b5a80e50bbc1..ea69cd4b8b5e 100755
--- a/tests/qemu-iotests/223
+++ b/tests/qemu-iotests/223
@@ -117,10 +117,19 @@ _send_qemu_cmd $QEMU_HANDLE 
'{"execute":"qmp_capabilities"}' "return"
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"blockdev-add",
   "arguments":{"driver":"qcow2", "node-name":"n",
 "file":{"driver":"file", "filename":"'"$TEST_IMG"'"}}}' "return"
-_send_qemu_cmd $QEMU_HANDLE '{"execute":"x-blockdev-set-iothread",
-  "arguments":{"node-name":"n", "iothread":"io0"}}' "return"
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"block-dirty-bitmap-disable",
   "arguments":{"node":"n", "name":"b"}}' "return"
+
+for attempt in normal iothread; do
+
+echo
+echo "=== Set up NBD with $attempt access ==="
+echo
+if [ $attempt = iothread ]; then
+_send_qemu_cmd $QEMU_HANDLE '{"execute":"x-blockdev-set-iothread",
+  "arguments":{"node-name":"n", "iothread":"io0"}}' "return"
+fi
+
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add",
   "arguments":{"device":"n"}}' "error" # Attempt add without server
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-start",
@@ -180,6 +189,9 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-remove",
   "arguments":{"name":"n2"}}' "error" # Attempt duplicate clean
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-stop"}' "return"
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-stop"}' "error" # Again
+
+done
+
 _send_qemu_cmd $QEMU_HANDLE '{"execute":"quit"}' "return"
 wait=yes _cleanup_qemu

diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index 861ddbd9e0a4..f17559880268 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -28,10 +28,91 @@ wrote 2097152/2097152 bytes at offset 2097152
 {"return": {}}
 {"execute":"blockdev-add", "arguments":{"driver":"IMGFMT", "node-name":"n", 
"file":{"driver":"file", "filename":"TEST_DIR/t.IMGFMT"}}}
 {"return": {}}
-{"execute":"x-blockdev-set-iothread", "arguments":{"node-name":"n", 
"iothread":"io0"}}
-{"return": {}}
 {"execute":"block-dirty-bitmap-disable", "arguments":{"node":"n", "name":"b"}}
 {"return": {}}
+
+=== Set up NBD with normal access ===
+
+{"execute":"nbd-server-add", "arguments":{"device":"n"}}
+{"error": {"class": "GenericError", "desc": "NBD server not running"}}
+{"execute":"nbd-server-start", "arguments":{"addr":{"type":"unix", 
"data":{"path":"SOCK_DIR/nbd"
+{"return": {}}
+{"execute":"nbd-server-start", "arguments":{"addr":{"type":"unix", 
"data":{"path":"SOCK_DIR/nbd1"
+{"error": {"class": "GenericError", "desc": "NBD server already running"}}
+exports available: 0
+{"execute":"nbd-server-add", "arguments":{"device":"n", "bitmap":"b"}}
+{"return": {}}
+{"execute":"nbd-server-add", "arguments":{"device":"nosuch"}}
+{"error": {"class": "GenericError", "desc": "Cannot find device=nosuch nor 
node_name=nosuch"}}
+{"execute":"nbd-server-add", "arguments":{"device":"n"}}
+{"error": {"class": "GenericError", "desc": "NBD server already has export 
named 'n'"}}
+{"execute":"nbd-server-add", "arguments":{"device":"n", "name":"n2", 
"bitmap":"b2"}}
+{"error": {"class": "GenericError", "desc": "Enabled bitmap 'b2' incompatible 
with readonly export"}}
+{"execute":"nbd-server-add", "arguments":{"device":"n", "name":"n2", 
"bitmap":"b3"}}
+{"error": {"class": "GenericError", "desc": "Bitmap 'b3' is not found"}}
+{"execute":"nbd-server-add", "arguments":{"device":"n", "name":"n2", 
"writable":true, "bitmap":"b2"}}
+{"return": {}}
+exports available: 2
+ export: 'n'
+  size:  4194304
+  flags: 0x58f ( readonly flush fua df multi cache )
+  min block: 1
+  opt block: 4096
+  max block: 33554432
+  available meta contexts: 2
+   base:allocation
+   qemu:dirty-bitmap:b
+ export: 'n2'
+  size:  4194304
+  flags: 0xced ( flush fua trim zeroes df cache fast-zero )
+  min block: 1
+  opt block: 4096
+  max block: 33554432
+  available meta contexts: 2
+   base:allocation
+   qemu:dirty-bitmap:b2
+
+=== Contrast normal status to large granularity dirty-bitmap ===
+
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 524288
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2097152/2097152 bytes at offset 2097152
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, 
"offset": OFFSET},
+{ "start": 

[PATCH v3 2/4] iotests: Switch nbd tests to use Unix rather than TCP

2019-11-14 Thread Eric Blake
Up to now, all it took to cause a lot of iotest failures was to have a
background process such as 'nbdkit -p 10810 null' running, because we
hard-coded the TCP port.  Switching to a Unix socket eliminates this
contention.  We still have TCP coverage in test 233, and that test is
more careful to not pick a hard-coded port.

Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/common.filter | 6 --
 tests/qemu-iotests/common.rc | 8 
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index f870e00e4421..5367deea398e 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -127,7 +127,8 @@ _filter_img_create()
 -e "s#$TEST_DIR#TEST_DIR#g" \
 -e "s#$SOCK_DIR#SOCK_DIR#g" \
 -e "s#$IMGFMT#IMGFMT#g" \
--e 's#nbd:127.0.0.1:10810#TEST_DIR/t.IMGFMT#g' \
+-e 's#nbd:127.0.0.1:[0-9]\\+#TEST_DIR/t.IMGFMT#g' \
+-e 's#nbd+unix:///\??socket=SOCK_DIR/nbd#TEST_DIR/t.IMGFMT#g' \
 -e "s# encryption=off##g" \
 -e "s# cluster_size=[0-9]\\+##g" \
 -e "s# table_size=[0-9]\\+##g" \
@@ -164,7 +165,8 @@ _filter_img_info()
 -e "s#$TEST_DIR#TEST_DIR#g" \
 -e "s#$SOCK_DIR#SOCK_DIR#g" \
 -e "s#$IMGFMT#IMGFMT#g" \
--e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \
+-e 's#nbd://127.0.0.1:[0-9]\\+$#TEST_DIR/t.IMGFMT#g' \
+-e 's#nbd+unix:///\??socket=SOCK_DIR/nbd#TEST_DIR/t.IMGFMT#g' \
 -e 's#json.*vdisk-id.*vxhs"}}#TEST_DIR/t.IMGFMT#' \
 -e "/encrypted: yes/d" \
 -e "/cluster_size: [0-9]\\+/d" \
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index fa7bae24226a..f772dcb67322 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -217,7 +217,7 @@ if [ "$IMGOPTSSYNTAX" = "true" ]; then
 TEST_IMG="$DRIVER,file.filename=$TEST_DIR/t.$IMGFMT"
 elif [ "$IMGPROTO" = "nbd" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
-TEST_IMG="$DRIVER,file.driver=nbd,file.host=127.0.0.1,file.port=10810"
+
TEST_IMG="$DRIVER,file.driver=nbd,file.type=unix,file.path=$SOCKDIR/$IMGFMT"
 elif [ "$IMGPROTO" = "ssh" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 
TEST_IMG="$DRIVER,file.driver=ssh,file.host=127.0.0.1,file.path=$TEST_IMG_FILE"
@@ -233,7 +233,7 @@ else
 TEST_IMG=$TEST_DIR/t.$IMGFMT
 elif [ "$IMGPROTO" = "nbd" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
-TEST_IMG="nbd:127.0.0.1:10810"
+TEST_IMG="nbd+unix:///?socket=$SOCK_DIR/nbd"
 elif [ "$IMGPROTO" = "ssh" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 
REMOTE_TEST_DIR="ssh://\\($USER@\\)\\?127.0.0.1\\(:[0-9]\\+\\)\\?$TEST_DIR"
@@ -293,7 +293,7 @@ _stop_nbd_server()
 local QEMU_NBD_PID
 read QEMU_NBD_PID < "${QEMU_TEST_DIR}/qemu-nbd.pid"
 kill ${QEMU_NBD_PID}
-rm -f "${QEMU_TEST_DIR}/qemu-nbd.pid"
+rm -f "${QEMU_TEST_DIR}/qemu-nbd.pid" "$SOCK_DIR/nbd"
 fi
 }

@@ -353,7 +353,7 @@ _make_test_img()
 if [ $IMGPROTO = "nbd" ]; then
 # Pass a sufficiently high number to -e that should be enough for all
 # tests
-eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT -e 42 -x '' 
$TEST_IMG_FILE >/dev/null &"
+eval "$QEMU_NBD -v -t -k '$SOCK_DIR/nbd' -f $IMGFMT -e 42 -x '' 
$TEST_IMG_FILE >/dev/null &"
 sleep 1 # FIXME: qemu-nbd needs to be listening before we continue
 fi

-- 
2.21.0




[PATCH v3 1/4] iotests: Fix 173

2019-11-14 Thread Eric Blake
This test has been broken since 3.0.  It used TEST_IMG to influence
the name of a file created during _make_test_img, but commit 655ae6bb
changed things so that the wrong file name is being created, which
then caused _launch_qemu to fail.  In the meantime, the set of events
issued for the actions of the test has increased.

Why haven't we noticed the failure? Because the test rarely gets run:
'./check -qcow2 173' is insufficient (that defaults to using file protocol)
'./check -nfs 173' is insufficient (that defaults to using raw format)
so the test is only run with:
./check -qcow2 -nfs 173

Note that we already have a number of other problems with -nfs:
./check -nfs (fails 18/30)
./check -qcow2 -nfs (fails 45/76 after this patch, if exports does
not permit 'insecure')
and it's not on my priority list to fix those.  Rather, I found this
because of my next patch's work on tests using _send_qemu_cmd.

Fixes: 655ae6b
Signed-off-by: Eric Blake 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/173 | 4 ++--
 tests/qemu-iotests/173.out | 6 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/173 b/tests/qemu-iotests/173
index 9e2fa2e73cb9..29dcaa1960df 100755
--- a/tests/qemu-iotests/173
+++ b/tests/qemu-iotests/173
@@ -47,9 +47,9 @@ size=100M
 BASE_IMG="${TEST_DIR}/image.base"
 TOP_IMG="${TEST_DIR}/image.snp1"

-TEST_IMG="${BASE_IMG}" _make_test_img $size
+TEST_IMG_FILE="${BASE_IMG}" _make_test_img $size

-TEST_IMG="${TOP_IMG}" _make_test_img $size
+TEST_IMG_FILE="${TOP_IMG}" _make_test_img $size

 echo
 echo === Running QEMU, using block-stream to find backing image ===
diff --git a/tests/qemu-iotests/173.out b/tests/qemu-iotests/173.out
index f477a0099a32..e83d17ec2f64 100644
--- a/tests/qemu-iotests/173.out
+++ b/tests/qemu-iotests/173.out
@@ -7,6 +7,10 @@ Formatting 'TEST_DIR/image.snp1', fmt=IMGFMT size=104857600
 {"return": {}}
 {"return": {}}
 {"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk2"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk2"}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_COMPLETED", "data": {"device": "disk2", "len": 104857600, "offset": 
104857600, "speed": 0, "type": "stream"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "disk2"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "disk2"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_COMPLETED", "data": {"device": "disk2", "len": 0, "offset": 0, 
"speed": 0, "type": "stream"}}
 *** done
-- 
2.21.0




[PATCH v3 3/4] iotests: Include QMP input in .out files

2019-11-14 Thread Eric Blake
We generally include relevant HMP input in .out files, by virtue of
the fact that HMP echoes its input.  But QMP does not, so we have to
explicitly inject it in the output stream (appropriately filtered to
keep the tests passing), in order to make it easier to read .out files
to see what behavior is being tested (especially true where the output
file is a sequence of {'return': {}}).

Suggested-by: Max Reitz 
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/common.qemu |  9 
 tests/qemu-iotests/085.out | 26 ++
 tests/qemu-iotests/094.out |  4 ++
 tests/qemu-iotests/095.out |  2 +
 tests/qemu-iotests/109.out | 88 ++
 tests/qemu-iotests/117.out |  5 ++
 tests/qemu-iotests/127.out |  4 ++
 tests/qemu-iotests/140.out |  5 ++
 tests/qemu-iotests/141.out | 26 ++
 tests/qemu-iotests/143.out |  3 ++
 tests/qemu-iotests/144.out |  5 ++
 tests/qemu-iotests/153.out | 11 +
 tests/qemu-iotests/156.out | 11 +
 tests/qemu-iotests/161.out |  8 
 tests/qemu-iotests/173.out |  4 ++
 tests/qemu-iotests/182.out |  8 
 tests/qemu-iotests/183.out | 11 +
 tests/qemu-iotests/185.out | 18 +++
 tests/qemu-iotests/191.out |  8 
 tests/qemu-iotests/200.out |  1 +
 tests/qemu-iotests/223.out | 19 
 tests/qemu-iotests/229.out |  3 ++
 tests/qemu-iotests/249.out |  6 +++
 23 files changed, 285 insertions(+)

diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 8d2021a7eb0c..de680cf1c7c9 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -123,6 +123,9 @@ _timed_wait_for()
 # until either timeout, or a response.  If it is not set, or <=0,
 # then the command is only sent once.
 #
+# If neither $silent nor $mismatch_only is set, and $cmd begins with '{',
+# echo the command before sending it the first time.
+#
 # If $qemu_error_no_exit is set, then even if the expected response
 # is not seen, we will not exit.  $QEMU_STATUS[$1] will be set it -1 in
 # that case.
@@ -152,6 +155,12 @@ _send_qemu_cmd()
 shift $(($# - 2))
 fi

+# Display QMP being sent, but not HMP (since HMP already echoes its
+# input back to output); decide based on leading '{'
+if [ -z "$silent" ] && [ -z "$mismatch_only" ] &&
+[ "$cmd" != "${cmd#\{}" ]; then
+echo "${cmd}" | _filter_testdir | _filter_imgfmt
+fi
 while [ ${count} -gt 0 ]
 do
 echo "${cmd}" >&${QEMU_IN[${h}]}
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 2a5f256cd3ec..bb50227b8265 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -7,48 +7,61 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728

 === Sending capabilities ===

+{ 'execute': 'qmp_capabilities' }
 {"return": {}}

 === Create a single snapshot on virtio0 ===

+{ 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'virtio0', 
'snapshot-file':'TEST_DIR/1-snapshot-v0.IMGFMT', 'format': 'IMGFMT' } }
 Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 
backing_file=TEST_DIR/t.qcow2.1 backing_fmt=qcow2 cluster_size=65536 
lazy_refcounts=off refcount_bits=16
 {"return": {}}

 === Invalid command - missing device and nodename ===

+{ 'execute': 'blockdev-snapshot-sync', 'arguments': { 
'snapshot-file':'TEST_DIR/1-snapshot-v0.IMGFMT', 'format': 'IMGFMT' } }
 {"error": {"class": "GenericError", "desc": "Cannot find device= nor 
node_name="}}

 === Invalid command - missing snapshot-file ===

+{ 'execute': 'blockdev-snapshot-sync', 'arguments': { 'device': 'virtio0', 
'format': 'IMGFMT' } }
 {"error": {"class": "GenericError", "desc": "Parameter 'snapshot-file' is 
missing"}}


 === Create several transactional group snapshots ===

+{ 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 
'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 
'TEST_DIR/2-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' 
: { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/2-snapshot-v1.IMGFMT' } } ] 
} }
 Formatting 'TEST_DIR/2-snapshot-v0.qcow2', fmt=qcow2 size=134217728 
backing_file=TEST_DIR/1-snapshot-v0.qcow2 backing_fmt=qcow2 cluster_size=65536 
lazy_refcounts=off refcount_bits=16
 Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 size=134217728 
backing_file=TEST_DIR/t.qcow2.2 backing_fmt=qcow2 cluster_size=65536 
lazy_refcounts=off refcount_bits=16
 {"return": {}}
+{ 'execute': 'transaction', 'arguments': {'actions': [ { 'type': 
'blockdev-snapshot-sync', 'data' : { 'device': 'virtio0', 'snapshot-file': 
'TEST_DIR/3-snapshot-v0.IMGFMT' } }, { 'type': 'blockdev-snapshot-sync', 'data' 
: { 'device': 'virtio1', 'snapshot-file': 'TEST_DIR/3-snapshot-v1.IMGFMT' } } ] 
} }
 Formatting 'TEST_DIR/3-snapshot-v0.qcow2', fmt=qcow2 size=134217728 
backing_file=TEST_DIR/2-snapshot-v0.qcow2 backing_fmt=qcow2 cluster_size=65536 

[PATCH v3 for-4.2 0/4] tests: More iotest 223 improvements

2019-11-14 Thread Eric Blake
Since v2:
- rebase to SOCK_DIR changes
- new patch 2 to avoid TCP port 10810 contention [Max]
- add imgfmt filtering [Max]

As this is limited to iotests, I think it is fair game for -rc2.

Eric Blake (4):
  iotests: Fix 173
  iotests: Switch nbd tests to use Unix rather than TCP
  iotests: Include QMP input in .out files
  tests: More iotest 223 improvements

 tests/qemu-iotests/common.filter |   6 +-
 tests/qemu-iotests/common.qemu   |   9 +++
 tests/qemu-iotests/common.rc |   8 +--
 tests/qemu-iotests/085.out   |  26 
 tests/qemu-iotests/094.out   |   4 ++
 tests/qemu-iotests/095.out   |   2 +
 tests/qemu-iotests/109.out   |  88 +++
 tests/qemu-iotests/117.out   |   5 ++
 tests/qemu-iotests/127.out   |   4 ++
 tests/qemu-iotests/140.out   |   5 ++
 tests/qemu-iotests/141.out   |  26 
 tests/qemu-iotests/143.out   |   3 +
 tests/qemu-iotests/144.out   |   5 ++
 tests/qemu-iotests/153.out   |  11 
 tests/qemu-iotests/156.out   |  11 
 tests/qemu-iotests/161.out   |   8 +++
 tests/qemu-iotests/173   |   4 +-
 tests/qemu-iotests/173.out   |  10 +++-
 tests/qemu-iotests/182.out   |   8 +++
 tests/qemu-iotests/183.out   |  11 
 tests/qemu-iotests/185.out   |  18 ++
 tests/qemu-iotests/191.out   |   8 +++
 tests/qemu-iotests/200.out   |   1 +
 tests/qemu-iotests/223   |  16 -
 tests/qemu-iotests/223.out   | 100 +++
 tests/qemu-iotests/229.out   |   3 +
 tests/qemu-iotests/249.out   |   6 ++
 27 files changed, 395 insertions(+), 11 deletions(-)

-- 
2.21.0




Re: [PATCH v2 1/3] hw/block/pflash: Remove dynamic field width from trace events

2019-11-14 Thread Philippe Mathieu-Daudé

Hi Eric,

On 11/8/19 4:56 PM, Eric Blake wrote:

On 11/8/19 8:40 AM, Philippe Mathieu-Daudé wrote:

Since not all trace backends support dynamic field width in
format (dtrace via stap does not), replace by a static field
width instead.

Reported-by: Eric Blake 
Buglink: https://bugs.launchpad.net/qemu/+bug/1844817
Signed-off-by: Philippe Mathieu-Daudé 
---
  hw/block/pflash_cfi01.c | 8 
  hw/block/pflash_cfi02.c | 8 
  hw/block/trace-events   | 8 
  3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 566c0acb77..787d1196f2 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -276,7 +276,7 @@ static uint32_t pflash_data_read(PFlashCFI01 *pfl, 
hwaddr offset,

  DPRINTF("BUG in %s\n", __func__);
  abort();
  }
-    trace_pflash_data_read(offset, width << 1, ret);
+    trace_pflash_data_read(offset, width << 3, ret);


Umm, why is width changing?  That's not mentioned in the commit message.


Previously it was used to set the format width: [1, 2, 4] -> [2, 4, 8].

We usually log the width in byte (accessed at memory location) or bits 
(used by the bus). When using this device I'm custom to think in bus 
access width.


Regardless whichever format we prefer, a change is needed.



@@ -389,7 +389,7 @@ static uint32_t pflash_read(PFlashCFI01 *pfl, 
hwaddr offset,

  break;
  }
-    trace_pflash_io_read(offset, width, width << 1, ret, pfl->cmd, 
pfl->wcycle);
+    trace_pflash_io_read(offset, width << 3, ret, pfl->cmd, 
pfl->wcycle);


And even this one is odd.  Matching up to the trace messages:


-pflash_io_read(uint64_t offset, int width, int fmt_width, uint32_t 
value, uint8_t cmd, uint8_t wcycle) "offset:0x%04"PRIx64" width:%d 
value:0x%0*x cmd:0x%02x wcycle:%u"


+pflash_io_read(uint64_t offset, int width, uint32_t value, uint8_t 
cmd, uint8_t wcycle) "offset:0x%04"PRIx64" width:%d value:0x%04x 
cmd:0x%02x wcycle:%u"


you are changing from:

"%04"PRIx64" %d %0*x...", offset, width, width << 1, ret,...

(where width<<1, ret matches *x)

into

"%04"PRIx64" %d %04x...", offset, width << 3, ret,...

where you are now printing a different value for width.


Do you prefer using a "-bit" suffix? As

"offset:0x%04"PRIx64" width:%d-bit value:0x%04x cmd:0x%02x wcycle:%u"

I can also simply remove this information. Ideally I'd revert this patch 
once the we get this format parsable by the SystemTap backend.





Re: [PATCH v2 3/5] MAINTAINERS: Adjust maintainership for Malta board

2019-11-14 Thread Philippe Mathieu-Daudé

+Paul Burton

On 11/13/19 2:47 PM, Aleksandar Markovic wrote:

From: Aleksandar Markovic 

Change the maintainership for Malta board to improve its quality.

Acked-by: Aurelien Jarno 
Signed-off-by: Aleksandar Markovic 
---
  MAINTAINERS | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3bf2144..6afec32 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -955,8 +955,9 @@ F: hw/display/jazz_led.c
  F: hw/dma/rc4030.c
  
  Malta

-M: Aurelien Jarno 

> +R: Aurelien Jarno 

Aurelien, do you want to stay co-maintainer with this one?
Else, thanks for staying listed as designated reviewer :)


-R: Aleksandar Rikalo 
+M: Philippe Mathieu-Daudé 


I'm happy to continue Aurelien work with this board :)
Similarly to the Fuloong board, I mostly use the Malta as a hobby, so 
I'll use my personal email.


Paul, do you mind being co-maintainer or at least listed as designated 
reviewer here?



+R: Hervé Poussineau 


I don't see commits from Hervé with this board, so he is probably not 
interested.



  S: Maintained


Until Paul is interested in co-maintenance, let's change this board 
status to 'Odd Fixes'.



  F: hw/mips/mips_malta.c
  F: hw/mips/gt64xxx_pci.c





Re: [PATCH v2 2/3] hw/mips/gt64xxx: Remove dynamic field width from trace events

2019-11-14 Thread Philippe Mathieu-Daudé

On 11/8/19 4:58 PM, Eric Blake wrote:

On 11/8/19 8:40 AM, Philippe Mathieu-Daudé wrote:

Since not all trace backends support dynamic field width in
format (dtrace via stap does not), replace by a static field
width instead.

Reported-by: Eric Blake 
Buglink: https://bugs.launchpad.net/qemu/+bug/1844817
Signed-off-by: Philippe Mathieu-Daudé 
---
v2: Do not update qemu_log_mask()
---
  hw/mips/gt64xxx_pci.c | 16 
  hw/mips/trace-events  |  4 ++--
  2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 5cab9c1ee1..6743e7c929 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -642,19 +642,19 @@ static void gt64120_writel(void *opaque, hwaddr 
addr,

  /* not really implemented */
  s->regs[saddr] = ~(~(s->regs[saddr]) | ~(val & 0xfffe));
  s->regs[saddr] |= !!(s->regs[saddr] & 0xfffe);
-    trace_gt64120_write("INTRCAUSE", size << 1, val);
+    trace_gt64120_write("INTRCAUSE", size << 3, val);


Again, this isn't mentioned in the commit message.  Why are you changing 
parameter values?




+++ b/hw/mips/trace-events
@@ -1,4 +1,4 @@
  # gt64xxx.c
-gt64120_read(const char *regname, int width, uint64_t value) "gt64120 
read %s value:0x%0*" PRIx64
-gt64120_write(const char *regname, int width, uint64_t value) 
"gt64120 write %s value:0x%0*" PRIx64
+gt64120_read(const char *regname, int width, uint64_t value) "gt64120 
read %s width:%d value:0x%08" PRIx64
+gt64120_write(const char *regname, int width, uint64_t value) 
"gt64120 write %s width:%d value:0x%08" PRIx64


Huh, we were really broken - the old code (if passed to printf) would 
try to parse 4 parameters, even though it was only passed 3.  But it 
looks like you still need a v3.


Oops. I am surprise the compiler doesn't emit a warning here...




Re: [PATCH v9 Kernel 3/5] vfio iommu: Add ioctl defination to unmap IOVA and return dirty bitmap

2019-11-14 Thread Alex Williamson
On Fri, 15 Nov 2019 00:26:26 +0530
Kirti Wankhede  wrote:

> On 11/14/2019 1:52 AM, Alex Williamson wrote:
> > On Thu, 14 Nov 2019 01:22:39 +0530
> > Kirti Wankhede  wrote:
> >   
> >> On 11/13/2019 4:00 AM, Alex Williamson wrote:  
> >>> On Tue, 12 Nov 2019 22:33:38 +0530
> >>> Kirti Wankhede  wrote:
> >>>  
>  With vIOMMU, during pre-copy phase of migration, while CPUs are still
>  running, IO virtual address unmap can happen while device still keeping
>  reference of guest pfns. Those pages should be reported as dirty before
>  unmap, so that VFIO user space application can copy content of those 
>  pages
>  from source to destination.
> 
>  IOCTL defination added here add bitmap pointer, size and flag. If flag  
> >>>
> >>> definition, adds
> >>>  
>  VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is set and bitmap memory is 
>  allocated
>  and bitmap_size of set, then ioctl will create bitmap of pinned pages 
>  and  
> >>>
> >>> s/of/is/
> >>>  
>  then unmap those.
> 
>  Signed-off-by: Kirti Wankhede 
>  Reviewed-by: Neo Jia 
>  ---
> include/uapi/linux/vfio.h | 33 +
> 1 file changed, 33 insertions(+)
> 
>  diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>  index 6fd3822aa610..72fd297baf52 100644
>  --- a/include/uapi/linux/vfio.h
>  +++ b/include/uapi/linux/vfio.h
>  @@ -925,6 +925,39 @@ struct vfio_iommu_type1_dirty_bitmap {
> 
> #define VFIO_IOMMU_GET_DIRTY_BITMAP _IO(VFIO_TYPE, 
>  VFIO_BASE + 17)
> 
>  +/**
>  + * VFIO_IOMMU_UNMAP_DMA_GET_BITMAP - _IOWR(VFIO_TYPE, VFIO_BASE + 18,
>  + *struct 
>  vfio_iommu_type1_dma_unmap_bitmap)
>  + *
>  + * Unmap IO virtual addresses using the provided struct
>  + * vfio_iommu_type1_dma_unmap_bitmap.  Caller sets argsz.
>  + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get dirty 
>  bitmap
>  + * before unmapping IO virtual addresses. If this flag is not set, only 
>  IO
>  + * virtual address are unmapped without creating pinned pages bitmap, 
>  that
>  + * is, behave same as VFIO_IOMMU_UNMAP_DMA ioctl.
>  + * User should allocate memory to get bitmap and should set size of 
>  allocated
>  + * memory in bitmap_size field. One bit in bitmap is used to represent 
>  per page
>  + * consecutively starting from iova offset. Bit set indicates page at 
>  that
>  + * offset from iova is dirty.
>  + * The actual unmapped size is returned in the size field and bitmap of 
>  pages
>  + * in the range of unmapped size is returned in bitmap if flag
>  + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is set.
>  + *
>  + * No guarantee is made to the user that arbitrary unmaps of iova or 
>  size
>  + * different from those used in the original mapping call will succeed.
>  + */
>  +struct vfio_iommu_type1_dma_unmap_bitmap {
>  +__u32argsz;
>  +__u32flags;
>  +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
>  +__u64iova;/* IO virtual address 
>  */
>  +__u64size;/* Size of mapping 
>  (bytes) */
>  +__u64bitmap_size; /* in bytes */
>  +void __user *bitmap;  /* one bit per page */
>  +};
>  +
>  +#define VFIO_IOMMU_UNMAP_DMA_GET_BITMAP _IO(VFIO_TYPE, VFIO_BASE + 18)
>  +  
> >>>
> >>> Why not extend VFIO_IOMMU_UNMAP_DMA to support this rather than add an
> >>> ioctl that duplicates the functionality and extends it??  
> >>
> >> We do want old userspace applications to work with new kernel and
> >> vice-versa, right?
> >>
> >> If I try to change existing VFIO_IOMMU_UNMAP_DMA ioctl structure, say if
> >> add 'bitmap_size' and 'bitmap' after 'size', with below code in old
> >> kernel, old kernel & new userspace will work.
> >>
> >>   minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
> >>
> >>   if (copy_from_user(, (void __user *)arg, minsz))
> >>   return -EFAULT;
> >>
> >>   if (unmap.argsz < minsz || unmap.flags)
> >>   return -EINVAL;
> >>
> >>
> >> With new kernel it would change to:
> >>   minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, bitmap);  
> > 
> > No, the minimum structure size still ends at size, we interpret flags
> > and argsz to learn if the user understands those fields and optionally
> > include them.  Therefore old userspace on new kernel continues to work.
> >   
> >>   if (copy_from_user(, (void __user *)arg, minsz))
> >>   return -EFAULT;
> >>
> >>   if (unmap.argsz < minsz || unmap.flags)
> >>   return -EINVAL;
> >>
> >> Then old 

Re: [PATCH v9 Kernel 2/5] vfio iommu: Add ioctl defination to get dirty pages bitmap.

2019-11-14 Thread Alex Williamson
On Fri, 15 Nov 2019 00:26:07 +0530
Kirti Wankhede  wrote:

> On 11/14/2019 1:37 AM, Alex Williamson wrote:
> > On Thu, 14 Nov 2019 01:07:21 +0530
> > Kirti Wankhede  wrote:
> >   
> >> On 11/13/2019 4:00 AM, Alex Williamson wrote:  
> >>> On Tue, 12 Nov 2019 22:33:37 +0530
> >>> Kirti Wankhede  wrote:
> >>>  
>  All pages pinned by vendor driver through vfio_pin_pages API should be
>  considered as dirty during migration. IOMMU container maintains a list of
>  all such pinned pages. Added an ioctl defination to get bitmap of such  
> >>>
> >>> definition
> >>>  
>  pinned pages for requested IO virtual address range.  
> >>>
> >>> Additionally, all mapped pages are considered dirty when physically
> >>> mapped through to an IOMMU, modulo we discussed devices opting in to
> >>> per page pinning to indicate finer granularity with a TBD mechanism to
> >>> figure out if any non-opt-in devices remain.
> >>>  
> >>
> >> You mean, in case of device direct assignment (device pass through)?  
> > 
> > Yes, or IOMMU backed mdevs.  If vfio_dmas in the container are fully
> > pinned and mapped, then the correct dirty page set is all mapped pages.
> > We discussed using the vpfn list as a mechanism for vendor drivers to
> > reduce their migration footprint, but we also discussed that we would
> > need a way to determine that all participants in the container have
> > explicitly pinned their working pages or else we must consider the
> > entire potential working set as dirty.
> >   
> 
> How can vendor driver tell this capability to iommu module? Any suggestions?

I think it does so by pinning pages.  Is it acceptable that if the
vendor driver pins any pages, then from that point forward we consider
the IOMMU group dirty page scope to be limited to pinned pages?  There
are complications around non-singleton IOMMU groups, but I think we're
already leaning towards that being a non-worthwhile problem to solve.
So if we require that only singleton IOMMU groups can pin pages and we
pass the IOMMU group as a parameter to
vfio_iommu_driver_ops.pin_pages(), then the type1 backend can set a
flag on its local vfio_group struct to indicate dirty page scope is
limited to pinned pages.  We might want to keep a flag on the
vfio_iommu struct to indicate if all of the vfio_groups for each
vfio_domain in the vfio_iommu.domain_list dirty page scope limited to
pinned pages as an optimization to avoid walking lists too often.  Then
we could test if vfio_iommu.domain_list is not empty and this new flag
does not limit the dirty page scope, then everything within each
vfio_dma is considered dirty.
 
>  Signed-off-by: Kirti Wankhede 
>  Reviewed-by: Neo Jia 
>  ---
> include/uapi/linux/vfio.h | 23 +++
> 1 file changed, 23 insertions(+)
> 
>  diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>  index 35b09427ad9f..6fd3822aa610 100644
>  --- a/include/uapi/linux/vfio.h
>  +++ b/include/uapi/linux/vfio.h
>  @@ -902,6 +902,29 @@ struct vfio_iommu_type1_dma_unmap {
> #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
> #define VFIO_IOMMU_DISABLE_IO(VFIO_TYPE, VFIO_BASE + 16)
> 
>  +/**
>  + * VFIO_IOMMU_GET_DIRTY_BITMAP - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
>  + * struct 
>  vfio_iommu_type1_dirty_bitmap)
>  + *
>  + * IOCTL to get dirty pages bitmap for IOMMU container during migration.
>  + * Get dirty pages bitmap of given IO virtual addresses range using
>  + * struct vfio_iommu_type1_dirty_bitmap. Caller sets argsz, which is 
>  size of
>  + * struct vfio_iommu_type1_dirty_bitmap. User should allocate memory to 
>  get
>  + * bitmap and should set size of allocated memory in bitmap_size field.
>  + * One bit is used to represent per page consecutively starting from 
>  iova
>  + * offset. Bit set indicates page at that offset from iova is dirty.
>  + */
>  +struct vfio_iommu_type1_dirty_bitmap {
>  +__u32argsz;
>  +__u32flags;
>  +__u64iova;  /* IO virtual address */
>  +__u64size;  /* Size of iova range */
>  +__u64bitmap_size;   /* in bytes */  
> >>>
> >>> This seems redundant.  We can calculate the size of the bitmap based on
> >>> the iova size.
> >>> 
> >>
> >> But in kernel space, we need to validate the size of memory allocated by
> >> user instead of assuming user is always correct, right?  
> > 
> > What does it buy us for the user to tell us the size?  They could be
> > wrong, they could be malicious.  The argsz field on the ioctl is mostly
> > for the handshake that the user is competent, we should get faults from
> > the copy-user operation if it's incorrect.
> >  
> 
> It is to mainly fail safe.
> 
>  +void 

Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Peter Maydell
On Thu, 14 Nov 2019 at 20:52, Richard Henderson
 wrote:
> Yet another reason why I prefer any semi-hosting call to use an encoding that
> is otherwise reserved illegal.
>
> For this, you have to make up your mind: is it important to execute the
> instructions as specified by the ISA, or as specified by the semi-hosting 
> spec?
>
> In this case, semi-hosting defines an "entry nop" that begins the sequence, 
> and
> I think that we are well within our rights to ignore the validity of "insn1
> insn2 || other-insn".

Perhaps. I think you get the same issue with
  insn1 || insn2
vs
  insn1 || some-other-insn

though. (And the spec has wording that explicitly wants the
latter to be handled with the normal "I'm a hint instruction"
behaviour of insn1.)

-- PMM



Re: [PATCH] pl031: Expose RTCICR as proper WC register

2019-11-14 Thread Peter Maydell
On Thu, 14 Nov 2019 at 20:45, Alexander Graf  wrote:
> On 14.11.19 15:42, Peter Maydell wrote:
> > Is that OK?
>
> It's much better. Will you just fix it up inline for me please? :)

Sure :-)

-- PMM



Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Richard Henderson
On 11/14/19 8:29 PM, Peter Maydell wrote:
> On Thu, 14 Nov 2019 at 19:18, Richard Henderson
>  wrote:
>>   - If the sequence crosses a page, then so be it.  Because of
>> step 1, this only happens when we *must* cross a page, and
>> will have recognized any paging exception anyway.
>> The generic parts of qemu will handle proper invalidation of
>> a TB that crosses a page boundary.
> 
> I'm not sure this would work. If you have
>   insn1 insn2 || other-insn
> (where || is the page boundary and page 2 is non-executable)
> then the required behaviour is "execute insn1 and insn2 with
> normal behaviour, then fault trying to read other-insn, with
> the fault address being that of other-insn".
> Whereas for
>   insn1 insn2 || insn3
> you want to treat it as a semihosting sequence. But you can't distinguish
> the two because trying to read the word in page 2 will cause us to
> generate a fault with the fault address being that of insn1. Or
> have I forgotten how the page-crossing handling works ?

Yet another reason why I prefer any semi-hosting call to use an encoding that
is otherwise reserved illegal.

For this, you have to make up your mind: is it important to execute the
instructions as specified by the ISA, or as specified by the semi-hosting spec?

In this case, semi-hosting defines an "entry nop" that begins the sequence, and
I think that we are well within our rights to ignore the validity of "insn1
insn2 || other-insn".


r~



Re: [PATCH] pl031: Expose RTCICR as proper WC register

2019-11-14 Thread Alexander Graf



On 14.11.19 15:42, Peter Maydell wrote:

On Tue, 12 Nov 2019 at 11:57, Peter Maydell  wrote:

On Tue, 12 Nov 2019 at 07:28, Alexander Graf  wrote:

I still think that being consistent with the actual PL031 spec is
preferable though. If any real world guest breaks because of this, we
can still revert this patch and document the exact breakage in the
comment instead.


Yeah, I agree; I'm essentially just gathering material
for the commit message here. (The gold standard would be
to go find some hardware with a real pl031 and prod it
to confirm behaviour, but that's more effort than really
seems justified to me.)


I propose to put this in for 4.2 with an updated commit message:

===begin===
 pl031: Expose RTCICR as proper WC register

 The current PL031 RTCICR register implementation always clears the
 IRQ pending status on a register write, regardless of the value the
 guest writes.

 To justify that behavior, it references the ARM926EJ-S Development
 Chip Reference Manual (DDI0287B) and indicates that said document
 states that any write clears the internal IRQ state.  It is indeed
 true that in section 11.1 this document says:

   "The interrupt is cleared by writing any data value to the
interrupt clear register RTCICR".

 However, later in section 11.2.2 it contradicts itself by saying:

   "Writing 1 to bit 0 of RTCICR clears the RTCINTR flag."

 The latter statement matches the PL031 TRM (DDI0224C), which says:

   "Writing 1 to bit position 0 clears the corresponding interrupt.
Writing 0 has no effect."

 Let's assume that the self-contradictory DDI0287B is in error, and
 follow the reference manual for the device itself, by making the
 register write-one-to-clear.
===endit===

Is that OK?


It's much better. Will you just fix it up inline for me please? :)


Thanks,

Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Ralf Herbrich
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879




Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Peter Maydell
On Thu, 14 Nov 2019 at 19:18, Richard Henderson
 wrote:
>   - If the sequence crosses a page, then so be it.  Because of
> step 1, this only happens when we *must* cross a page, and
> will have recognized any paging exception anyway.
> The generic parts of qemu will handle proper invalidation of
> a TB that crosses a page boundary.

I'm not sure this would work. If you have
  insn1 insn2 || other-insn
(where || is the page boundary and page 2 is non-executable)
then the required behaviour is "execute insn1 and insn2 with
normal behaviour, then fault trying to read other-insn, with
the fault address being that of other-insn".
Whereas for
  insn1 insn2 || insn3
you want to treat it as a semihosting sequence. But you can't distinguish
the two because trying to read the word in page 2 will cause us to
generate a fault with the fault address being that of insn1. Or
have I forgotten how the page-crossing handling works ?

thanks
-- PMM



Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Peter Maydell
On Thu, 14 Nov 2019 at 17:47, Peter Maydell  wrote:

> The ARM semihosting ABI also has a number of warts
> which are basically historical legacy. With a clean
> sheet you get to avoid some of them. (Notably you could
> skip the whole 'negotiate presence of extensions' business
> by just getting those parts right from the start

...better still, if you can define (a) a mandatory "return
version and feature bit info" call right from the start
and (b) the required behaviour for attempts to make
a semihosting call with an unknown/unimplemented
function number -- then you can avoid the nasty
kludge "magic behaviour by opening a magic filename"
we were stuck with specifying in arm semihosting 2.0.

thanks
-- PMM



Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Richard Henderson
On 11/14/19 5:14 PM, Peter Maydell wrote:
> On Fri, 25 Oct 2019 at 20:15, Keith Packard  wrote:
>> There seems to be convergence on a pretty simple interface which uses
>> ebreak surrounded by a couple of specific no-ops:
>>
>>   slli x0, x0, 0x1f
>>   ebreak
>>   srai x0, x0, 0x7
>>
>> There are implementations in rust and openocd, and I've got one for
>> picolibc. The risc-v semihosting code is sitting on a branch in my repo
>> on github:
>>
>> https://github.com/keith-packard/qemu/tree/riscv-semihost
> 
> I had an idle glance at this implementation, and this:
> 
>uint32_t pre = opcode_at(>base, ctx->base.pc_next - 4);
>uint32_t ebreak = opcode_at(>base, ctx->base.pc_next);
>uint32_t post = opcode_at(>base, ctx->base.pc_next + 4);
> 
> (where opcode_at() is a wrapper for cpu_ldl_code()) has
> some unfortunate side effects: if the previous instruction
> is in the previous MMU page, or the following instruction
> is in the next MMU page, you might incorrectly trigger
> an exception (where QEMU will just longjmp straight out of
> the cpu_ldl_code()) if that other page isn't actually mapped
> in the guest's page table. You need to be careful not to access
> code outside the page you're actually on unless you're really
> going to execute it and are OK with it faulting.
> 
> I'm not sure what the best way on QEMU to identify this kind
> of multiple-instruction-sequence is -- Richard might have an
> idea. One approach would be to always take an exception
> (or call a helper function) for the 'ebreak', and then
> distinguish semihosting from other kinds of ebreak by doing
> the load of the preceding/succeeding instructions at runtime
> rather than translate time.

It's irritating that this 3 insn sequence is already a thing, baked into other
sim/emulators.  Seems to me that it would have been easier to abscond with

ebreak + func3 field != 0.

For semi-hosting, it seems even better if the semi-hosting syscall instruction
is not "real", because you're explicitly requesting services from "unreal"
hardware.  It should be specified to generate a SIGILL type of exception
anywhere semi-hosting is not enabled.

That said, it is possible to do this in the translator, by considering this
sequence to be one 12-byte instruction.

You'd build in a recognizer into the decoder such that, when semi-hosting is
enabled, seeing the entry nop starts looking ahead:

  - If this is not the first insn in the TB, and there are
not 8 more bytes remaining on the page, terminate the
current TB.  This will re-start the machine in the next TB.

  - If the complete sequence is not found, then advance the pc
past the entry nop and let nature take its course wrt the
subsequent instructions.

  - If the sequence crosses a page, then so be it.  Because of
step 1, this only happens when we *must* cross a page, and
will have recognized any paging exception anyway.
The generic parts of qemu will handle proper invalidation of
a TB that crosses a page boundary.

  - This is kinda similar to the target/sh4 code to handle
generic user-space atomic sequences: c.f. decode_gusa().

  - This implementation does mean that you can't jump directly
to the ebreak insn and have the sequence be recognized.
Control must flow through the entry nop.
This is a bug or a feature depending on the reviewer.  ;-)

With that in mind, it may be simpler to handle all of this not in the
translator, but in the function that delivers the ebreak exception.  At that
point one can arrange to read memory without raising additional exceptions.


r~



Re: [PATCH v9 Kernel 3/5] vfio iommu: Add ioctl defination to unmap IOVA and return dirty bitmap

2019-11-14 Thread Kirti Wankhede




On 11/14/2019 1:52 AM, Alex Williamson wrote:

On Thu, 14 Nov 2019 01:22:39 +0530
Kirti Wankhede  wrote:


On 11/13/2019 4:00 AM, Alex Williamson wrote:

On Tue, 12 Nov 2019 22:33:38 +0530
Kirti Wankhede  wrote:
   

With vIOMMU, during pre-copy phase of migration, while CPUs are still
running, IO virtual address unmap can happen while device still keeping
reference of guest pfns. Those pages should be reported as dirty before
unmap, so that VFIO user space application can copy content of those pages
from source to destination.

IOCTL defination added here add bitmap pointer, size and flag. If flag


definition, adds
   

VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is set and bitmap memory is allocated
and bitmap_size of set, then ioctl will create bitmap of pinned pages and


s/of/is/
   

then unmap those.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
   include/uapi/linux/vfio.h | 33 +
   1 file changed, 33 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6fd3822aa610..72fd297baf52 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -925,6 +925,39 @@ struct vfio_iommu_type1_dirty_bitmap {
   
   #define VFIO_IOMMU_GET_DIRTY_BITMAP _IO(VFIO_TYPE, VFIO_BASE + 17)
   
+/**

+ * VFIO_IOMMU_UNMAP_DMA_GET_BITMAP - _IOWR(VFIO_TYPE, VFIO_BASE + 18,
+ *   struct vfio_iommu_type1_dma_unmap_bitmap)
+ *
+ * Unmap IO virtual addresses using the provided struct
+ * vfio_iommu_type1_dma_unmap_bitmap.  Caller sets argsz.
+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get dirty bitmap
+ * before unmapping IO virtual addresses. If this flag is not set, only IO
+ * virtual address are unmapped without creating pinned pages bitmap, that
+ * is, behave same as VFIO_IOMMU_UNMAP_DMA ioctl.
+ * User should allocate memory to get bitmap and should set size of allocated
+ * memory in bitmap_size field. One bit in bitmap is used to represent per page
+ * consecutively starting from iova offset. Bit set indicates page at that
+ * offset from iova is dirty.
+ * The actual unmapped size is returned in the size field and bitmap of pages
+ * in the range of unmapped size is returned in bitmap if flag
+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is set.
+ *
+ * No guarantee is made to the user that arbitrary unmaps of iova or size
+ * different from those used in the original mapping call will succeed.
+ */
+struct vfio_iommu_type1_dma_unmap_bitmap {
+   __u32argsz;
+   __u32flags;
+#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
+   __u64iova;/* IO virtual address */
+   __u64size;/* Size of mapping (bytes) */
+   __u64bitmap_size; /* in bytes */
+   void __user *bitmap;  /* one bit per page */
+};
+
+#define VFIO_IOMMU_UNMAP_DMA_GET_BITMAP _IO(VFIO_TYPE, VFIO_BASE + 18)
+


Why not extend VFIO_IOMMU_UNMAP_DMA to support this rather than add an
ioctl that duplicates the functionality and extends it??


We do want old userspace applications to work with new kernel and
vice-versa, right?

If I try to change existing VFIO_IOMMU_UNMAP_DMA ioctl structure, say if
add 'bitmap_size' and 'bitmap' after 'size', with below code in old
kernel, old kernel & new userspace will work.

  minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);

  if (copy_from_user(, (void __user *)arg, minsz))
  return -EFAULT;

  if (unmap.argsz < minsz || unmap.flags)
  return -EINVAL;


With new kernel it would change to:
  minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, bitmap);


No, the minimum structure size still ends at size, we interpret flags
and argsz to learn if the user understands those fields and optionally
include them.  Therefore old userspace on new kernel continues to work.


  if (copy_from_user(, (void __user *)arg, minsz))
  return -EFAULT;

  if (unmap.argsz < minsz || unmap.flags)
  return -EINVAL;

Then old userspace app will fail because unmap.argsz < minsz and might
be copy_from_user would cause seg fault because userspace sdk doesn't
contain new member variables.
We can't change the sequence to keep 'size' as last member, because then
new userspace app on old kernel will interpret it wrong.


If we have new userspace on old kernel, that userspace needs to be able
to learn that this feature exists (new flag in the
vfio_iommu_type1_info struct as suggested below) and only make use of it
when available.  This is why the old kernel checks argsz against minsz.
So long as the user passes something at least minsz in size, we have
compatibility.  The old kernel doesn't understand the GET_DIRTY_BITMAP
flag and will return an error if the user attempts to use it.  Thanks,



Ok. So then VFIO_IOMMU_UNMAP_DMA_GET_BITMAP 

Re: [PATCH v9 Kernel 2/5] vfio iommu: Add ioctl defination to get dirty pages bitmap.

2019-11-14 Thread Kirti Wankhede




On 11/14/2019 1:37 AM, Alex Williamson wrote:

On Thu, 14 Nov 2019 01:07:21 +0530
Kirti Wankhede  wrote:


On 11/13/2019 4:00 AM, Alex Williamson wrote:

On Tue, 12 Nov 2019 22:33:37 +0530
Kirti Wankhede  wrote:
   

All pages pinned by vendor driver through vfio_pin_pages API should be
considered as dirty during migration. IOMMU container maintains a list of
all such pinned pages. Added an ioctl defination to get bitmap of such


definition
   

pinned pages for requested IO virtual address range.


Additionally, all mapped pages are considered dirty when physically
mapped through to an IOMMU, modulo we discussed devices opting in to
per page pinning to indicate finer granularity with a TBD mechanism to
figure out if any non-opt-in devices remain.
   


You mean, in case of device direct assignment (device pass through)?


Yes, or IOMMU backed mdevs.  If vfio_dmas in the container are fully
pinned and mapped, then the correct dirty page set is all mapped pages.
We discussed using the vpfn list as a mechanism for vendor drivers to
reduce their migration footprint, but we also discussed that we would
need a way to determine that all participants in the container have
explicitly pinned their working pages or else we must consider the
entire potential working set as dirty.



How can vendor driver tell this capability to iommu module? Any suggestions?


Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
   include/uapi/linux/vfio.h | 23 +++
   1 file changed, 23 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 35b09427ad9f..6fd3822aa610 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -902,6 +902,29 @@ struct vfio_iommu_type1_dma_unmap {
   #define VFIO_IOMMU_ENABLE_IO(VFIO_TYPE, VFIO_BASE + 15)
   #define VFIO_IOMMU_DISABLE   _IO(VFIO_TYPE, VFIO_BASE + 16)
   
+/**

+ * VFIO_IOMMU_GET_DIRTY_BITMAP - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
+ * struct vfio_iommu_type1_dirty_bitmap)
+ *
+ * IOCTL to get dirty pages bitmap for IOMMU container during migration.
+ * Get dirty pages bitmap of given IO virtual addresses range using
+ * struct vfio_iommu_type1_dirty_bitmap. Caller sets argsz, which is size of
+ * struct vfio_iommu_type1_dirty_bitmap. User should allocate memory to get
+ * bitmap and should set size of allocated memory in bitmap_size field.
+ * One bit is used to represent per page consecutively starting from iova
+ * offset. Bit set indicates page at that offset from iova is dirty.
+ */
+struct vfio_iommu_type1_dirty_bitmap {
+   __u32argsz;
+   __u32flags;
+   __u64iova;  /* IO virtual address */
+   __u64size;  /* Size of iova range */
+   __u64bitmap_size;   /* in bytes */


This seems redundant.  We can calculate the size of the bitmap based on
the iova size.
  


But in kernel space, we need to validate the size of memory allocated by
user instead of assuming user is always correct, right?


What does it buy us for the user to tell us the size?  They could be
wrong, they could be malicious.  The argsz field on the ioctl is mostly
for the handshake that the user is competent, we should get faults from
the copy-user operation if it's incorrect.



It is to mainly fail safe.


+   void __user *bitmap;/* one bit per page */


Should we define that as a __u64* to (a) help with the size
calculation, and (b) assure that we can use 8-byte ops on it?

However, who defines page size?  Is it necessarily the processor page
size?  A physical IOMMU may support page sizes other than the CPU page
size.  It might be more important to indicate the expected page size
than the bitmap size.  Thanks,
  


I see in QEMU and in vfio_iommu_type1 module, page sizes considered for
mapping are CPU page size, 4K. Do we still need to have such argument?


That assumption exists for backwards compatibility prior to supporting
the iova_pgsizes field in vfio_iommu_type1_info.  AFAIK the current
interface has no page size assumptions and we should not add any.


So userspace has iova_pgsizes information, which can be input to this 
ioctl. Bitmap should be considering smallest page size. Does that makes 
sense?



Thanks,
Kirti


Thanks,

Alex





Re: [PATCH v9 Kernel 1/5] vfio: KABI for migration interface for device state

2019-11-14 Thread Kirti Wankhede




+ * Vendor driver should decide whether to partition data section and how to
+ * partition the data section. Vendor driver should return data_offset
+ * accordingly.
+ *
+ * Sequence to be followed for _SAVING|_RUNNING device state or pre-copy phase
+ * and for _SAVING device state or stop-and-copy phase:
+ * a. read pending_bytes. If pending_bytes > 0, go through below steps.
+ * b. read data_offset, indicates kernel driver to write data to staging 
buffer.
+ *Kernel driver should return this read operation only after writing data 
to
+ *staging buffer is done.

May I know under what condition this data_offset will be changed per
each iteration from a-f ?



Its upto vendor driver, if vendor driver maintains multiple partitions
in data section.


So, do you mean each time before doing b (reading data_offset), step a
(reading pending_bytes) is mandatory, otherwise the vendor driver does
not know which data_offset is?


pending_bytes will only tell bytes remaining to transfer from vendor 
driver. On read operation on data_offset, vendor driver should decide 
what to send depending on where he is making data available to userspace.



Then, any lock to wrap step a and b to ensure atomic?


With current QEMU implementation, where migration is single thread, 
there is not need of lock yet. But when we add multi-threaded support 
may be in future then locks will be required in userspace side.


Thanks,
Kirti



Re: [PATCH v2 00/10] Further bitmaps improvements

2019-11-14 Thread Vladimir Sementsov-Ogievskiy
14.11.2019 21:47, Eric Blake wrote:
> On 10/22/19 7:58 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Hi!
>>
>> The main feature here is improvement of _next_dirty_area API, which I'm
>> going to use then for backup / block-copy.
>>
>> v2:
>> 01: just use INT64_MAX instead of adding new constant
>> 08: add separate function nbd_extent_array_convert_to_be and converted
>>  state of NBDExtentArray, to make these things explicit, and avoid
>>  extra memdup.
>> 09: Save part of comment for bitmap_to_extents(), add Eric's r-b
> 
> Is any of this series a bug fix important to get into -rc2?

Nothing

> Or is it safe to defer to the 5.0 timeframe?

Yes, no doubts.

> 
>>
>> Vladimir Sementsov-Ogievskiy (10):
>>    hbitmap: assert that we don't create bitmap larger than INT64_MAX
>>    hbitmap: move hbitmap_iter_next_word to hbitmap.c
>>    hbitmap: unpublish hbitmap_iter_skip_words
>>    hbitmap: drop meta bitmaps as they are unused
>>    block/dirty-bitmap: switch _next_dirty_area and _next_zero to int64_t
>>    block/dirty-bitmap: add _next_dirty API
>>    block/dirty-bitmap: improve _next_dirty_area API
>>    nbd/server: introduce NBDExtentArray
>>    nbd/server: use bdrv_dirty_bitmap_next_dirty_area
>>    block/qcow2-bitmap: use bdrv_dirty_bitmap_next_dirty
>>
> 


-- 
Best regards,
Vladimir


Re: [PATCH v9 Kernel 1/5] vfio: KABI for migration interface for device state

2019-11-14 Thread Kirti Wankhede




On 11/14/2019 2:10 AM, Alex Williamson wrote:

On Thu, 14 Nov 2019 01:47:04 +0530
Kirti Wankhede  wrote:


On 11/14/2019 1:18 AM, Alex Williamson wrote:

On Thu, 14 Nov 2019 00:59:52 +0530
Kirti Wankhede  wrote:
   

On 11/13/2019 11:57 PM, Alex Williamson wrote:

On Wed, 13 Nov 2019 11:24:17 +0100
Cornelia Huck  wrote:
  

On Tue, 12 Nov 2019 15:30:05 -0700
Alex Williamson  wrote:
 

On Tue, 12 Nov 2019 22:33:36 +0530
Kirti Wankhede  wrote:
 

- Defined MIGRATION region type and sub-type.
- Used 3 bits to define VFIO device states.
   Bit 0 => _RUNNING
   Bit 1 => _SAVING
   Bit 2 => _RESUMING
   Combination of these bits defines VFIO device's state during migration
   _RUNNING => Normal VFIO device running state. When its reset, it
indicates _STOPPED state. when device is changed to
_STOPPED, driver should stop device before write()
returns.
   _SAVING | _RUNNING => vCPUs are running, VFIO device is running but
 start saving state of device i.e. pre-copy state
   _SAVING  => vCPUs are stopped, VFIO device should be stopped, and


s/should/must/
 

   save device state,i.e. stop-n-copy state
   _RESUMING => VFIO device resuming state.
   _SAVING | _RESUMING and _RUNNING | _RESUMING => Invalid states


A table might be useful here and in the uapi header to indicate valid
states:


I like that.
 


| _RESUMING | _SAVING | _RUNNING | Description
+---+-+--+--
| 0 |0| 0| Stopped, not saving or resuming (a)
+---+-+--+--
| 0 |0| 1| Running, default state
+---+-+--+--
| 0 |1| 0| Stopped, migration interface in save mode
+---+-+--+--
| 0 |1| 1| Running, save mode interface, iterative
+---+-+--+--
| 1 |0| 0| Stopped, migration resume interface active
+---+-+--+--
| 1 |0| 1| Invalid (b)
+---+-+--+--
| 1 |1| 0| Invalid (c)
+---+-+--+--
| 1 |1| 1| Invalid (d)

I think we need to consider whether we define (a) as generally
available, for instance we might want to use it for diagnostics or a
fatal error condition outside of migration.

Are there hidden assumptions between state transitions here or are
there specific next possible state diagrams that we need to include as
well?


Some kind of state-change diagram might be useful in addition to the
textual description anyway. Let me try, just to make sure I understand
this correctly:
 


During User application initialization, there is one more state change:

0) 0/0/0  stop to running -> 0/0/1


0/0/0 cannot be the initial state of the device, that would imply that
a device supporting this migration interface breaks backwards
compatibility with all existing vfio userspace code and that code needs
to learn to set the device running as part of its initialization.
That's absolutely unacceptable.  The initial device state must be 0/0/1.
   


There isn't any device state for all existing vfio userspace code right
now. So default its assumed to be always running.


Exactly, there is no representation of device state, therefore it's
assumed to be running, therefore when adding a representation of device
state it must default to running.


With migration support, device states are explicitly getting added. For
example, in case of QEMU, while device is getting initialized, i.e. from
vfio_realize(), device_state is set to 0/0/0, but not required to convey
it to vendor driver.


But we have a 0/0/0 state, why would we intentionally keep an internal
state that's inconsistent with the device?


Then with vfio_vmstate_change() notifier, device
state is changed to 0/0/1 when VM/vCPU are transitioned to running, at
this moment device state is conveyed to vendor driver. So vendor driver
doesn't see 0/0/0 state.


But the running state is the state of the device, not the VM or the
vCPU.  Sure we might want to stop the device if the VM/vCPU state is
stopped, but we must accept that the device is running when it's opened
and we shouldn't intentionally maintain inconsistent state.
  

While resuming, for userspace, for example QEMU, device state change is
from 0/0/0 to 1/0/0, vendor driver see 1/0/0 after device basic
initialization is done.


I don't see why this matters, all device_state transitions are written
directly to the 

Re: [PATCH v2 00/10] Further bitmaps improvements

2019-11-14 Thread Eric Blake

On 10/22/19 7:58 AM, Vladimir Sementsov-Ogievskiy wrote:

Hi!

The main feature here is improvement of _next_dirty_area API, which I'm
going to use then for backup / block-copy.

v2:
01: just use INT64_MAX instead of adding new constant
08: add separate function nbd_extent_array_convert_to_be and converted
 state of NBDExtentArray, to make these things explicit, and avoid
 extra memdup.
09: Save part of comment for bitmap_to_extents(), add Eric's r-b


Is any of this series a bug fix important to get into -rc2?  Or is it 
safe to defer to the 5.0 timeframe?




Vladimir Sementsov-Ogievskiy (10):
   hbitmap: assert that we don't create bitmap larger than INT64_MAX
   hbitmap: move hbitmap_iter_next_word to hbitmap.c
   hbitmap: unpublish hbitmap_iter_skip_words
   hbitmap: drop meta bitmaps as they are unused
   block/dirty-bitmap: switch _next_dirty_area and _next_zero to int64_t
   block/dirty-bitmap: add _next_dirty API
   block/dirty-bitmap: improve _next_dirty_area API
   nbd/server: introduce NBDExtentArray
   nbd/server: use bdrv_dirty_bitmap_next_dirty_area
   block/qcow2-bitmap: use bdrv_dirty_bitmap_next_dirty



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Peter Maydell
On Thu, 14 Nov 2019 at 18:05, Keith Packard  wrote:
>
> Peter Maydell  writes:
>
> > I had an idle glance at this implementation, and this:
> >
> >uint32_t pre = opcode_at(>base, ctx->base.pc_next - 4);
> >uint32_t ebreak = opcode_at(>base, ctx->base.pc_next);
> >uint32_t post = opcode_at(>base, ctx->base.pc_next + 4);
> >
> > (where opcode_at() is a wrapper for cpu_ldl_code()) has
> > some unfortunate side effects: if the previous instruction
> > is in the previous MMU page, or the following instruction
> > is in the next MMU page, you might incorrectly trigger
> > an exception (where QEMU will just longjmp straight out of
> > the cpu_ldl_code()) if that other page isn't actually mapped
> > in the guest's page table. You need to be careful not to access
> > code outside the page you're actually on unless you're really
> > going to execute it and are OK with it faulting.
>
> I can't even find the implementation of cpu_ldl_code; the qemu source
> code is somewhat obscure in this area. But, longjmp'ing out of the
> middle of that seems like a bad idea.

It's the way QEMU works -- generally load/store operations
that work on virtual addresses are expected to only return
in the success case; on failure they longjmp out to cause
the guest exception. (Load/stores on physical addresses
generally return a memory transaction status for the caller
to check and handle.) I agree that within the translation code
it's a bit weird and it might be nicer for the translate.c
code to explicitly handle failures to load an insn, but it
would be a bit of an upheaval to try to rewrite it at this point.

cpu_ldl_code() is provided by include/exec/cpu_ldst.h, incidentally
(via preprocessor macros and repeated inclusion of some template
.h files, which is why a grep for the function name misses it).

> > Does your semihosting spec expect to have the semihosting
> > call work if the sequence crosses a page boundary, the
> > code is being executed by a userspace process, and one of
> > the two pages has been paged out by the OS ?
>
> You've seen the entirety of the RISC-V semihosting spec already.  For
> now, perhaps we should limit RISC-V semihosting support to devices
> without paging support and await a more complete spec.
>
> As you suggest, disallowing the sequence from crossing a page boundary
> would be a simple fix, but that would require wording changes to the
> spec.

Yeah, I'm implicitly suggesting that a bit more thought/revision
of the spec might not be a bad idea. Things that are
effectively supposed to be treated like a single instruction
but which can cross cacheline or page boundaries turn out
to be a fertile source of implementation bugs. (The arm
translate.c code has to be quite careful about handling
32-bit Thumb insns that cross pages. The x86 translate.c
code is less careful and may well be buggy in this area.)

thanks
-- PMM



Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Keith Packard
Peter Maydell  writes:

> I had an idle glance at this implementation, and this:
>
>uint32_t pre = opcode_at(>base, ctx->base.pc_next - 4);
>uint32_t ebreak = opcode_at(>base, ctx->base.pc_next);
>uint32_t post = opcode_at(>base, ctx->base.pc_next + 4);
>
> (where opcode_at() is a wrapper for cpu_ldl_code()) has
> some unfortunate side effects: if the previous instruction
> is in the previous MMU page, or the following instruction
> is in the next MMU page, you might incorrectly trigger
> an exception (where QEMU will just longjmp straight out of
> the cpu_ldl_code()) if that other page isn't actually mapped
> in the guest's page table. You need to be careful not to access
> code outside the page you're actually on unless you're really
> going to execute it and are OK with it faulting.

I can't even find the implementation of cpu_ldl_code; the qemu source
code is somewhat obscure in this area. But, longjmp'ing out of the
middle of that seems like a bad idea.

> Does your semihosting spec expect to have the semihosting
> call work if the sequence crosses a page boundary, the
> code is being executed by a userspace process, and one of
> the two pages has been paged out by the OS ?

You've seen the entirety of the RISC-V semihosting spec already.  For
now, perhaps we should limit RISC-V semihosting support to devices
without paging support and await a more complete spec.

As you suggest, disallowing the sequence from crossing a page boundary
would be a simple fix, but that would require wording changes to the
spec.

-- 
-keith


signature.asc
Description: PGP signature


Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Peter Maydell
On Thu, 14 Nov 2019 at 17:39, Keith Packard  wrote:
>
> Peter Maydell  writes:
>
> > That defines the instruction sequence used to make a semihosting
> > call, but not the specification of what the calls are:
> >  * what call numbers perform which functions
> >  * how arguments are passed to the call (registers? parameter
> >blocks in memory? other?)
> >  * the semantics of each function supported (number of arguments,
> >behaviour, error handling)
> >
> > That's really what I had in mind by the overall semihosting spec.
>
> There isn't anything more provided by the RISC-V foundation at this
> point. I'm not sure what else they *should* provide as the goal is to
> match the ARM design, which does specify all of the above.

This isn't obvious if you just say "semihosting".
QEMU currently provides 'semihosting' ABIs for:
 * arm
 * lm32
 * m68k
 * mips
 * nios2
 * xtensa

m68k and nios2 provide basically the same set of calls,
but all the other architectures differe from each other.
"Semihosting" just means "the concept of an ABI from guest
to the debugger or emulator", not a particular ABI.

The ARM semihosting ABI also has a number of warts
which are basically historical legacy. With a clean
sheet you get to avoid some of them. (Notably you could
skip the whole 'negotiate presence of extensions' business
by just getting those parts right from the start. Actually
specifying errno values would also be sensible and is
something the ARM spec sadly does not do and can't really
at this point with multiple implementations in play.)

thanks
-- PMM



Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Keith Packard
Alistair Francis  writes:

> This sounds like something that the platform spec should contain.

I'm frankly happy with it specifying the semantics by reference to the
ARM docs -- that way we can easily share existing code without concern
about subtle semantic differences.

The only thing that would be useful is to clarify the low-level ABI
details about argument passing, but given that the ARM semihosting spec
passes arguments in the standard registers, extrapolating that to RISC-V
is not exactly difficult.

> +Atish +Palmer who are in charge of that.
>
> @Keith maybe you should contact the platform spec group and ask them
> to look into this?

I can certainly ask to have the argument passing details clarified; I
doubt that group would be interested in adopting the whole semihosting
spec and publishing it separately.

-- 
-keith


signature.asc
Description: PGP signature


Re: [PATCH v1] s390x: kvm-unit-tests: a PONG device for Sub Channels tests

2019-11-14 Thread Pierre Morel



On 2019-11-14 14:19, Cornelia Huck wrote:

On Thu, 14 Nov 2019 14:02:35 +0100
Halil Pasic  wrote:


On Thu, 14 Nov 2019 11:38:23 +0100
Cornelia Huck  wrote:


On Wed, 13 Nov 2019 20:02:33 +0100
Pierre Morel  wrote:


...snip...

We made some different design decisions, while aiming essentially for the
same. Maybe it's due to different scope, maybe not. For instance one
can't test IDA with PONG, I guess.

Now that I saw this again, I also recall the discussion of comparing it
with the "testdev" for pci/isa. Anybody knows if these are used by
kvm-unit-tests?


Only by X.

Regards,

Pierre


--
Pierre Morel
IBM Lab Boeblingen




Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Keith Packard
Peter Maydell  writes:

> That defines the instruction sequence used to make a semihosting
> call, but not the specification of what the calls are:
>  * what call numbers perform which functions
>  * how arguments are passed to the call (registers? parameter
>blocks in memory? other?)
>  * the semantics of each function supported (number of arguments,
>behaviour, error handling)
>
> That's really what I had in mind by the overall semihosting spec.

There isn't anything more provided by the RISC-V foundation at this
point. I'm not sure what else they *should* provide as the goal is to
match the ARM design, which does specify all of the above.

-- 
-keith


signature.asc
Description: PGP signature


Re: [PATCH v1] s390x: kvm-unit-tests: a PONG device for Sub Channels tests

2019-11-14 Thread Pierre Morel



On 2019-11-14 14:02, Halil Pasic wrote:

On Thu, 14 Nov 2019 11:38:23 +0100
Cornelia Huck  wrote:


On Wed, 13 Nov 2019 20:02:33 +0100
Pierre Morel  wrote:

Minor nit for $SUBJECT: this isn't a kvm-unit-tests patch, that's just
one consumer :)

And subchannel is one word in s390-speak.


OK, surely better for grep




[..]


Some questions regarding this device and its intended usage:

- What are you trying to test? Basic ccw processing, or something more
   specific? Is there any way you can use the kvm-unit-test
   infrastructure to test basic processing with an existing device?

I'm also curious about the big picture (what is in scope and what out
of scope). Your design should be evaluated in the light of intended
usage.

BTW have you had a look at this abandoned patch-set of mine:

https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg04220.html



No, but now yes.

Yes, it is something similar, I can remember as you did it.




We made some different design decisions, while aiming essentially for the
same. Maybe it's due to different scope, maybe not. For instance one
can't test IDA with PONG, I guess.


No not now.


Regards,

Pierre


--
Pierre Morel
IBM Lab Boeblingen




Re: [QEMU-SECURITY] ide: fix assertion in ide_dma_cb() to prevent qemu DoS from quest

2019-11-14 Thread Alexander Popov
On 07.11.2019 01:05, Alexander Popov wrote:
> On 06.11.2019 15:05, Michael S. Tsirkin wrote:
>> Do you want to cook up a patch like this then?
> 
> Yes, I will take this task and return with a patch.
> 
> Thanks!

I've just sent the v2 of the patch.
Looking forward to your feedback.

Best regards,
Alexander



[PATCH v2 1/1] ide: check DMA transfer size in ide_dma_cb() to prevent qemu DoS from quests

2019-11-14 Thread Alexander Popov
The commit a718978ed58a from July 2015 introduced the assertion which
implies that the size of successful DMA transfers handled in ide_dma_cb()
should be multiple of 512 (the size of a sector). But guest systems can
initiate DMA transfers that don't fit this requirement.

PoC for Linux that uses SCSI_IOCTL_SEND_COMMAND to perform such an ATA
command and crash qemu:

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define CMD_SIZE 2048

struct scsi_ioctl_cmd_6 {
unsigned int inlen;
unsigned int outlen;
unsigned char cmd[6];
unsigned char data[];
};

int main(void)
{
intptr_t fd = 0;
struct scsi_ioctl_cmd_6 *cmd = NULL;

cmd = malloc(CMD_SIZE);
if (!cmd) {
perror("[-] malloc");
return 1;
}

memset(cmd, 0, CMD_SIZE);
cmd->inlen = 1337;
cmd->cmd[0] = READ_6;

fd = open("/dev/sg0", O_RDONLY);
if (fd == -1) {
perror("[-] opening sg");
return 1;
}

printf("[+] sg0 is opened\n");

printf("[.] qemu should break here:\n");
fflush(stdout);
ioctl(fd, SCSI_IOCTL_SEND_COMMAND, cmd);
printf("[-] qemu didn't break\n");

free(cmd);

return 1;
}

For fixing that let's check the number of bytes prepared for the transfer
by the prepare_buf() handler. If it is not a multiple of 512 then end
the DMA transfer with an error.

That also fixes the I/O stall in guests after a DMA transfer request
for less than the size of a sector.

Signed-off-by: Alexander Popov 
---
 hw/ide/core.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 754ff4dc34..85aac614f0 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -849,6 +849,7 @@ static void ide_dma_cb(void *opaque, int ret)
 int64_t sector_num;
 uint64_t offset;
 bool stay_active = false;
+int32_t prepared = 0;
 
 if (ret == -EINVAL) {
 ide_dma_error(s);
@@ -892,12 +893,10 @@ static void ide_dma_cb(void *opaque, int ret)
 n = s->nsector;
 s->io_buffer_index = 0;
 s->io_buffer_size = n * 512;
-if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size) < 512) {
-/* The PRDs were too short. Reset the Active bit, but don't raise an
- * interrupt. */
-s->status = READY_STAT | SEEK_STAT;
-dma_buf_commit(s, 0);
-goto eot;
+prepared = s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size);
+if (prepared % 512) {
+ide_dma_error(s);
+return;
 }
 
 trace_ide_dma_cb(s, sector_num, n, IDE_DMA_CMD_str(s->dma_cmd));
-- 
2.21.0




Re: Convert VMDK to RAW

2019-11-14 Thread Max Reitz
On 14.11.19 17:12, janine.schnei...@fau.de wrote:
> Ladies and Gentlemen,
> 
>  
> 
> I am a PhD student at the Friedrich-Alexander-University
> Erlangen-Nuremberg in Bavaria, Germany and am currently working on a
> forensic reconstruction tool. The tool can be used to analyze physical
> and virtual hard disks and to reconstruct files. I would now like to
> extend the tool so that it is able to analyze VMDK files and convert
> them to raw. Unfortunately I have not been able to understand how to
> correctly unpack and assemble VMDK containers. Since qemu is able to
> convert VMDK to raw, I wanted to ask you if you could explain to me how
> to put the grains together?

Hi,

I’m not quite sure what you mean by a “VMDK container”.  VMDK disk
images can consist of multiple files that are linked together by a
descriptor file.  In theory all you need to do is tell qemu-img to
convert that descriptor file into a raw image.  For example:

(Sorry, I don’t know much about VMware, so all I can do is use qemu
tools to demonstrate)

$ qemu-img create -f vmdk -o subformat=twoGbMaxExtentSparse foo.vmdk 4G
Formatting 'foo.vmdk', fmt=vmdk size=4294967296 compat6=off
hwversion=undefined subformat=twoGbMaxExtentSparse
$ ls
foo-s001.vmdk  foo-s002.vmdk  foo.vmdk
$

In this example, foo.vmdk is the descriptor file and it points to the
other two (data) files:

$ cat foo.vmdk
# Disk DescriptorFile
version=1
CID=6d8d65ed
parentCID=
createType="twoGbMaxExtentSparse"

# Extent description
RW 4194304 SPARSE "foo-s001.vmdk"
RW 4194304 SPARSE "foo-s002.vmdk"

# The Disk Data Base
#DDB

ddb.virtualHWVersion = "4"
ddb.geometry.cylinders = "8322"
ddb.geometry.heads = "16"
ddb.geometry.sectors = "63"
ddb.adapterType = "ide"
$


So to convert this VMDK disk image to a raw image, you’d simply do this:

$ qemu-img convert -f vmdk -O raw -p foo.vmdk foo.img
(100.00/100%)
$

Max



signature.asc
Description: OpenPGP digital signature


Re: [PATCH for-4.2 v2 3/3] block/file-posix: Let post-EOF fallocate serialize

2019-11-14 Thread Max Reitz
On 14.11.19 18:15, Max Reitz wrote:
> On 14.11.19 17:27, Christoph Hellwig wrote:
>> On Fri, Nov 01, 2019 at 04:25:10PM +0100, Max Reitz wrote:
>>> The XFS kernel driver has a bug that may cause data corruption for qcow2
>>> images as of qemu commit c8bb23cbdbe32f.  We can work around it by
>>> treating post-EOF fallocates as serializing up until infinity (INT64_MAX
>>> in practice).
>>
>> This has been fixed in the kernel a while ago.  I don't think it makes
>> sense to work around it in qemu.
> 
> Has it?  It was my understanding that this is fixed by
> https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git/commit/?h=for-next=249bd9087a5264d2b8a974081870e2e27671b4dcwhich

Sorry, broke the link.  Let me try again:

https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git/commit/?h=for-next=249bd9087a5264d2b8a974081870e2e27671b4dc

Max

> has been merged only very recently and is on track to be part of Linux
> 5.5, as far as I understand.
> 
> Max
> 




signature.asc
Description: OpenPGP digital signature


Re: [PATCH RFC] virtio-pci: disable vring processing when bus-mastering is disabled

2019-11-14 Thread Michael Roth
Quoting Michael S. Tsirkin (2019-11-14 03:12:00)
> On Thu, Nov 14, 2019 at 04:10:36AM -0500, Michael S. Tsirkin wrote:
> > On Wed, Nov 13, 2019 at 07:07:36PM -0600, Michael Roth wrote:
> > > Quoting Michael S. Tsirkin (2019-11-13 04:09:02)
> > > > On Tue, Nov 12, 2019 at 11:43:01PM -0600, Michael Roth wrote:
> > > > > Currently the SLOF firmware for pseries guests will disable/re-enable
> > > > > a PCI device multiple times via IO/MEM/MASTER bits of PCI_COMMAND
> > > > > register after the initial probe/feature negotiation, as it tends to
> > > > > work with a single device at a time at various stages like probing
> > > > > and running block/network bootloaders without doing a full reset
> > > > > in-between.
> > > > > 
> > > > > In QEMU, when PCI_COMMAND_MASTER is disabled we disable the
> > > > > corresponding IOMMU memory region, so DMA accesses (including to vring
> > > > > fields like idx/flags) will no longer undergo the necessary
> > > > > translation. Normally we wouldn't expect this to happen since it would
> > > > > be misbehavior on the driver side to continue driving DMA requests.
> > > > > 
> > > > > However, in the case of pseries, with iommu_platform=on, we trigger 
> > > > > the
> > > > > following sequence when tearing down the virtio-blk dataplane 
> > > > > ioeventfd
> > > > > in response to the guest unsetting PCI_COMMAND_MASTER:
> > > > > 
> > > > >   #2  0x55922651 in virtqueue_map_desc 
> > > > > (vdev=vdev@entry=0x56dbcfb0, 
> > > > > p_num_sg=p_num_sg@entry=0x7fffe657e1a8, 
> > > > > addr=addr@entry=0x7fffe657e240, iov=iov@entry=0x7fffe6580240, 
> > > > > max_num_sg=max_num_sg@entry=1024, is_write=is_write@entry=false, 
> > > > > pa=0, sz=0)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:757
> > > > >   #3  0x55922a89 in virtqueue_pop 
> > > > > (vq=vq@entry=0x56dc8660, sz=sz@entry=184)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:950
> > > > >   #4  0x558d3eca in virtio_blk_get_request 
> > > > > (vq=0x56dc8660, s=0x56dbcfb0)
> > > > >   at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:255
> > > > >   #5  0x558d3eca in virtio_blk_handle_vq (s=0x56dbcfb0, 
> > > > > vq=0x56dc8660)
> > > > >   at /home/mdroth/w/qemu.git/hw/block/virtio-blk.c:776
> > > > >   #6  0x5591dd66 in virtio_queue_notify_aio_vq 
> > > > > (vq=vq@entry=0x56dc8660)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1550
> > > > >   #7  0x5591ecef in virtio_queue_notify_aio_vq 
> > > > > (vq=0x56dc8660)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:1546
> > > > >   #8  0x5591ecef in virtio_queue_host_notifier_aio_poll 
> > > > > (opaque=0x56dc86c8)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio.c:2527
> > > > >   #9  0x55d02164 in run_poll_handlers_once 
> > > > > (ctx=ctx@entry=0x5688bfc0, timeout=timeout@entry=0x7fffe65844a8)
> > > > >   at /home/mdroth/w/qemu.git/util/aio-posix.c:520
> > > > >   #10 0x55d02d1b in try_poll_mode (timeout=0x7fffe65844a8, 
> > > > > ctx=0x5688bfc0)
> > > > >   at /home/mdroth/w/qemu.git/util/aio-posix.c:607
> > > > >   #11 0x55d02d1b in aio_poll (ctx=ctx@entry=0x5688bfc0, 
> > > > > blocking=blocking@entry=true)
> > > > >   at /home/mdroth/w/qemu.git/util/aio-posix.c:639
> > > > >   #12 0x55d0004d in aio_wait_bh_oneshot (ctx=0x5688bfc0, 
> > > > > cb=cb@entry=0x558d5130 , 
> > > > > opaque=opaque@entry=0x56de86f0)
> > > > >   at /home/mdroth/w/qemu.git/util/aio-wait.c:71
> > > > >   #13 0x558d59bf in virtio_blk_data_plane_stop 
> > > > > (vdev=)
> > > > >   at /home/mdroth/w/qemu.git/hw/block/dataplane/virtio-blk.c:288
> > > > >   #14 0x55b906a1 in virtio_bus_stop_ioeventfd 
> > > > > (bus=bus@entry=0x56dbcf38)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:245
> > > > >   #15 0x55b90dbb in virtio_bus_stop_ioeventfd 
> > > > > (bus=bus@entry=0x56dbcf38)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio-bus.c:237
> > > > >   #16 0x55b92a8e in virtio_pci_stop_ioeventfd 
> > > > > (proxy=0x56db4e40)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:292
> > > > >   #17 0x55b92a8e in virtio_write_config 
> > > > > (pci_dev=0x56db4e40, address=, val=1048832, 
> > > > > len=)
> > > > >   at /home/mdroth/w/qemu.git/hw/virtio/virtio-pci.c:613
> > > > > 
> > > > > I.e. the calling code is only scheduling a one-shot BH for
> > > > > virtio_blk_data_plane_stop_bh, but somehow we end up trying to process
> > > > > an additional virtqueue entry before we get there. This is likely due
> > > > > to the following check in virtio_queue_host_notifier_aio_poll:
> > > > > 
> > > > >   static bool virtio_queue_host_notifier_aio_poll(void *opaque)
> > > > >   {
> > > > >   EventNotifier *n = opaque;
> > > > >   VirtQueue *vq = container_of(n, 

Re: [PATCH for-4.2 v2 3/3] block/file-posix: Let post-EOF fallocate serialize

2019-11-14 Thread Max Reitz
On 14.11.19 17:27, Christoph Hellwig wrote:
> On Fri, Nov 01, 2019 at 04:25:10PM +0100, Max Reitz wrote:
>> The XFS kernel driver has a bug that may cause data corruption for qcow2
>> images as of qemu commit c8bb23cbdbe32f.  We can work around it by
>> treating post-EOF fallocates as serializing up until infinity (INT64_MAX
>> in practice).
> 
> This has been fixed in the kernel a while ago.  I don't think it makes
> sense to work around it in qemu.

Has it?  It was my understanding that this is fixed by
https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git/commit/?h=for-next=249bd9087a5264d2b8a974081870e2e27671b4dcwhich
has been merged only very recently and is on track to be part of Linux
5.5, as far as I understand.

Max



signature.asc
Description: OpenPGP digital signature


Re: [PATCH v1] s390x: kvm-unit-tests: a PONG device for Sub Channels tests

2019-11-14 Thread Pierre Morel



On 2019-11-14 13:33, Thomas Huth wrote:

On 14/11/2019 11.38, Cornelia Huck wrote:

On Wed, 13 Nov 2019 20:02:33 +0100
Pierre Morel  wrote:

Minor nit for $SUBJECT: this isn't a kvm-unit-tests patch, that's just
one consumer :)


The PONG device accept two commands: PONG_READ and PONG_WRITE
which allow to read from and write to an internal buffer of
1024 bytes.

The QEMU device is named ccw-pong.

Signed-off-by: Pierre Morel 
---
  hw/s390x/Makefile.objs  |   1 +
  hw/s390x/ccw-pong.c | 186 
  include/hw/s390x/pong.h |  47 
  3 files changed, 234 insertions(+)
  create mode 100644 hw/s390x/ccw-pong.c
  create mode 100644 include/hw/s390x/pong.h

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index ee91152..3a83438 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -32,6 +32,7 @@ obj-$(CONFIG_KVM) += tod-kvm.o
  obj-$(CONFIG_KVM) += s390-skeys-kvm.o
  obj-$(CONFIG_KVM) += s390-stattrib-kvm.o s390-mchk.o
  obj-y += s390-ccw.o
+obj-y += ccw-pong.o

Not sure if unconditionally introducing a test device is a good idea.

This definitely needs a CONFIG switch (which can be "y" by default, but
still we should provide a possibility to disable it)


yes, clearly





  obj-y += ap-device.o
  obj-y += ap-bridge.o
  obj-y += s390-sei.o
diff --git a/hw/s390x/ccw-pong.c b/hw/s390x/ccw-pong.c
new file mode 100644
index 000..e7439d5
--- /dev/null
+++ b/hw/s390x/ccw-pong.c
@@ -0,0 +1,186 @@
+/*
+ * CCW PING-PONG

Please add a short description here what this device is all about.



yes





+ * Copyright 2019 IBM Corp.
+ * Author(s): Pierre Morel 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "cpu.h"
+#include "exec/address-spaces.h"
+#include "hw/s390x/css.h"
+#include "hw/s390x/css-bridge.h"
+#include "hw/qdev-properties.h"
+#include "hw/s390x/pong.h"
+
+#define PONG_BUF_SIZE 0x1000
+static char buf[PONG_BUF_SIZE] = "Hello world\n";
+
+static inline int pong_rw(CCW1 *ccw, char *p, int len, bool dir)
+{
+int ret;
+
+ret = address_space_rw(_space_memory, ccw->cda,
+   MEMTXATTRS_UNSPECIFIED,
+   (unsigned char *)buf, len, dir);
+
+return (ret == MEMTX_OK) ? -EIO : 0;

If return code was OK, then you return an EIO error? That looks weird?


Totally weird. it is of course the oposite.

This explain the comment from Connie on the unit check.





+}
+
+/* Handle READ ccw commands from guest */

...snip...

+
+static Property pong_ccw_properties[] = {
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pong_ccw_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->props = pong_ccw_properties;

As long as there are no properties, I think you can simply drop that line.

  Thomas


Yes, right.


Thanks for the comments,

Regards,

Pierre


--
Pierre Morel
IBM Lab Boeblingen




Convert VMDK to RAW

2019-11-14 Thread janine.schneider
Ladies and Gentlemen,

 

I am a PhD student at the Friedrich-Alexander-University Erlangen-Nuremberg
in Bavaria, Germany and am currently working on a forensic reconstruction
tool. The tool can be used to analyze physical and virtual hard disks and to
reconstruct files. I would now like to extend the tool so that it is able to
analyze VMDK files and convert them to raw. Unfortunately I have not been
able to understand how to correctly unpack and assemble VMDK containers.
Since qemu is able to convert VMDK to raw, I wanted to ask you if you could
explain to me how to put the grains together?

 

Many thanks and greetings

Janine Schneider



Re: [PATCH v1] s390x: kvm-unit-tests: a PONG device for Sub Channels tests

2019-11-14 Thread Pierre Morel



On 2019-11-14 11:38, Cornelia Huck wrote:

On Wed, 13 Nov 2019 20:02:33 +0100
Pierre Morel  wrote:

Minor nit for $SUBJECT: this isn't a kvm-unit-tests patch, that's just
one consumer :)



yes, right.





The PONG device accept two commands: PONG_READ and PONG_WRITE
which allow to read from and write to an internal buffer of
1024 bytes.

The QEMU device is named ccw-pong.

Signed-off-by: Pierre Morel 
---
  hw/s390x/Makefile.objs  |   1 +
  hw/s390x/ccw-pong.c | 186 
  include/hw/s390x/pong.h |  47 
  3 files changed, 234 insertions(+)
  create mode 100644 hw/s390x/ccw-pong.c
  create mode 100644 include/hw/s390x/pong.h

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index ee91152..3a83438 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -32,6 +32,7 @@ obj-$(CONFIG_KVM) += tod-kvm.o
  obj-$(CONFIG_KVM) += s390-skeys-kvm.o
  obj-$(CONFIG_KVM) += s390-stattrib-kvm.o s390-mchk.o
  obj-y += s390-ccw.o
+obj-y += ccw-pong.o

Not sure if unconditionally introducing a test device is a good idea.



sure not.





  obj-y += ap-device.o
  obj-y += ap-bridge.o
  obj-y += s390-sei.o
diff --git a/hw/s390x/ccw-pong.c b/hw/s390x/ccw-pong.c
new file mode 100644
index 000..e7439d5
--- /dev/null
+++ b/hw/s390x/ccw-pong.c
@@ -0,0 +1,186 @@
+/*
+ * CCW PING-PONG
+ *
+ * Copyright 2019 IBM Corp.
+ * Author(s): Pierre Morel 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "cpu.h"
+#include "exec/address-spaces.h"
+#include "hw/s390x/css.h"
+#include "hw/s390x/css-bridge.h"
+#include "hw/qdev-properties.h"
+#include "hw/s390x/pong.h"
+
+#define PONG_BUF_SIZE 0x1000
+static char buf[PONG_BUF_SIZE] = "Hello world\n";
+
+static inline int pong_rw(CCW1 *ccw, char *p, int len, bool dir)
+{
+int ret;
+
+ret = address_space_rw(_space_memory, ccw->cda,
+   MEMTXATTRS_UNSPECIFIED,
+   (unsigned char *)buf, len, dir);
+
+return (ret == MEMTX_OK) ? -EIO : 0;
+}
+
+/* Handle READ ccw commands from guest */
+static int handle_payload_read(CcwPONGDevice *dev, CCW1 *ccw)
+{
+CcwDevice *ccw_dev = CCW_DEVICE(dev);
+int len;
+
+if (!ccw->cda) {
+return -EFAULT;
+}
+
+if (ccw->count > PONG_BUF_SIZE) {
+len = PONG_BUF_SIZE;
+ccw_dev->sch->curr_status.scsw.count = ccw->count - PONG_BUF_SIZE;
+} else {
+len = ccw->count;
+ccw_dev->sch->curr_status.scsw.count = 0;
+}
+
+return pong_rw(ccw, buf, len, 1);
+}
+
+/*
+ * Handle WRITE ccw commands to write data to client
+ * The SCSW count is set to the number of bytes not transfered.
+ */
+static int handle_payload_write(CcwPONGDevice *dev, CCW1 *ccw)
+{
+CcwDevice *ccw_dev = CCW_DEVICE(dev);
+int len;
+
+if (!ccw->cda) {
+ccw_dev->sch->curr_status.scsw.count = ccw->count;
+return -EFAULT;
+}
+
+if (ccw->count > PONG_BUF_SIZE) {
+len = PONG_BUF_SIZE;
+ccw_dev->sch->curr_status.scsw.count = ccw->count - PONG_BUF_SIZE;
+} else {
+len = ccw->count;
+ccw_dev->sch->curr_status.scsw.count = 0;
+}
+
+return pong_rw(ccw, buf, len, 0);

Can you please use the dstream infrastructure for read/write handling?

You also seem to miss some basic checks e.g. for short reads/writes
with and without SLI set.


OK,





+}
+
+static int pong_ccw_cb(SubchDev *sch, CCW1 ccw)
+{
+int rc = 0;
+CcwPONGDevice *dev = sch->driver_data;
+
+switch (ccw.cmd_code) {
+case PONG_WRITE:
+rc = handle_payload_write(dev, );
+break;
+case PONG_READ:
+rc = handle_payload_read(dev, );
+break;
+default:
+rc = -ENOSYS;
+break;
+}
+
+if (rc == -EIO) {
+/* I/O error, specific devices generate specific conditions */
+SCHIB *schib = >curr_status;
+
+sch->curr_status.scsw.dstat = SCSW_DSTAT_UNIT_CHECK;
+sch->sense_data[0] = 0x40;/* intervention-req */

This is really odd. If it succeeds, you generate a unit check with
intervention required? Confused. At the very least, this requires some
description as to how this device is supposed to interact with the
driver.



The unit check is only done in case of I/O error.

I thought it was right, since it should never happen.

Yes it needs some documentation.





+schib->scsw.ctrl &= ~SCSW_ACTL_START_PEND;
+schib->scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
+schib->scsw.ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+   SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+}
+return rc;
+}
+
+static void pong_ccw_realize(DeviceState *ds, Error **errp)
+{
+uint16_t chpid;
+CcwPONGDevice *dev = CCW_PONG(ds);
+CcwDevice *cdev = 

Re: [RFC PATCH v2 20/26] qcow2: Update L2 bitmap in qcow2_alloc_cluster_link_l2()

2019-11-14 Thread Alberto Garcia
On Tue 05 Nov 2019 12:43:16 PM CET, Max Reitz wrote:

> Speaking of handle_copied(); both elements of Qcow2COWRegion are of
> type unsigned.  handle_copied() doesn’t look like it takes any
> precautions to limit the range to even UINT_MAX (and it should
> probably limit it to INT_MAX).

Or rather, both handle_copied() and handle_alloc() should probably limit
it to BDRV_REQUEST_MAX_BYTES.

Berto



Re: [PATCH for-4.2 v2 3/3] block/file-posix: Let post-EOF fallocate serialize

2019-11-14 Thread Christoph Hellwig
On Fri, Nov 01, 2019 at 04:25:10PM +0100, Max Reitz wrote:
> The XFS kernel driver has a bug that may cause data corruption for qcow2
> images as of qemu commit c8bb23cbdbe32f.  We can work around it by
> treating post-EOF fallocates as serializing up until infinity (INT64_MAX
> in practice).

This has been fixed in the kernel a while ago.  I don't think it makes
sense to work around it in qemu.



Re: [RFC PATCH v2 16/26] qcow2: Add subcluster support to discard_in_l2_slice()

2019-11-14 Thread Max Reitz
On 14.11.19 16:33, Alberto Garcia wrote:
> On Mon 04 Nov 2019 04:07:35 PM CET, Max Reitz wrote:
>>>  /* First remove L2 entries */
>>>  qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
>>> -if (!full_discard && s->qcow_version >= 3) {
>>> +if (has_subclusters(s)) {
>>> +set_l2_entry(s, l2_slice, l2_index + i, 0);
>>> +set_l2_bitmap(s, l2_slice, l2_index + i,
>>> +  full_discard ? 0 : QCOW_L2_BITMAP_ALL_ZEROES);
>>
>> But only for !full_discard, right?
> 
> Hence the conditional operator in my patch, maybe you didn't see it?

Oops, yep, I was just hung up on the if conditional.

Max



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Peter Maydell
On Fri, 25 Oct 2019 at 20:15, Keith Packard  wrote:
> There seems to be convergence on a pretty simple interface which uses
> ebreak surrounded by a couple of specific no-ops:
>
>   slli x0, x0, 0x1f
>   ebreak
>   srai x0, x0, 0x7
>
> There are implementations in rust and openocd, and I've got one for
> picolibc. The risc-v semihosting code is sitting on a branch in my repo
> on github:
>
> https://github.com/keith-packard/qemu/tree/riscv-semihost

I had an idle glance at this implementation, and this:

   uint32_t pre = opcode_at(>base, ctx->base.pc_next - 4);
   uint32_t ebreak = opcode_at(>base, ctx->base.pc_next);
   uint32_t post = opcode_at(>base, ctx->base.pc_next + 4);

(where opcode_at() is a wrapper for cpu_ldl_code()) has
some unfortunate side effects: if the previous instruction
is in the previous MMU page, or the following instruction
is in the next MMU page, you might incorrectly trigger
an exception (where QEMU will just longjmp straight out of
the cpu_ldl_code()) if that other page isn't actually mapped
in the guest's page table. You need to be careful not to access
code outside the page you're actually on unless you're really
going to execute it and are OK with it faulting.

I'm not sure what the best way on QEMU to identify this kind
of multiple-instruction-sequence is -- Richard might have an
idea. One approach would be to always take an exception
(or call a helper function) for the 'ebreak', and then
distinguish semihosting from other kinds of ebreak by doing
the load of the preceding/succeeding instructions at runtime
rather than translate time.

Does your semihosting spec expect to have the semihosting
call work if the sequence crosses a page boundary, the
code is being executed by a userspace process, and one of
the two pages has been paged out by the OS ?

thanks
-- PMM



Re: [Qemu-devel] [PATCH v4 03/14] qapi: Introduce default values for struct members

2019-11-14 Thread Markus Armbruster
Less than thorough review, because I expect the necessary rebase will
require a bit of rewriting here and there.

Max Reitz  writes:

> With this change, it is possible to give default values for struct
> members, as follows:
>
>   What you had to do so far:
>
> # @member: Some description, defaults to 42.
> { 'struct': 'Foo',
>   'data': { '*member': 'int' } }
>
>   What you can do now:
>
> { 'struct': 'Foo',
>   'data': { '*member': { 'type': 'int', 'default': 42 } }
>
> On the C side, this change would remove Foo.has_member, because
> Foo.member is always valid now.  The input visitor deals with setting
> it.  (Naturally, this means that such defaults are useful only for input
> parameters.)
>
> At least three things are left unimplemented:
>
> First, support for alternate data types.  This is because supporting
> them would mean having to allocate the object in the input visitor, and
> then potentially going through multiple levels of nested types.  In any
> case, it would have been difficult and I do not think there is need for
> such support at this point.

I don't mind restricting the 'default' feature to uses we actually have,
at least initially.

I'm afraid I don't fully understand the difficulties you describe, but I
guess that's okay.

> Second, support for null.  The most important reason for this is that
> introspection already uses "'default': null" for "no default, but this
> field is optional".  The second reason is that without support for
> alternate data types, there is not really a point in supporting null.

Also, commit 9d55380b5a "qapi: Remove null from schema language" :)

> Third, full support for default lists.  This has a similar reason to the
> lack of support for alternate data types: Allocating a default list is
> not trivial -- unless the list is empty, which is exactly what we have
> support for.

Your commit message says "for struct members".  What about union
members?  Cases:

* Flat union 'base' members: 'base' is a a struct, possibly implicit.
  Do defaults work in implicit bases, like BlockdevOption's?

* Flat union branch members: these are always struct types, so there's
  nothing for me to ask.  I think.

* Simple union branch members: these are each wrapped in an implicit
  struct type.  Do defaults work?  I'd be totally fine with "nope, not
  implemented, not going to implement it" here.

> Signed-off-by: Max Reitz 
> ---
>  qapi/introspect.json   |   9 +-
>  scripts/qapi/commands.py   |   2 +-
>  scripts/qapi/common.py | 167 +++--
>  scripts/qapi/doc.py|  20 -
>  scripts/qapi/introspect.py |   2 +-
>  scripts/qapi/types.py  |   2 +-
>  scripts/qapi/visit.py  |  38 -
>  7 files changed, 217 insertions(+), 23 deletions(-)

Missing: docs/devel/qapi-code-gen.txt update.

>
> diff --git a/qapi/introspect.json b/qapi/introspect.json
> index 1843c1cb17..db703135f9 100644
> --- a/qapi/introspect.json
> +++ b/qapi/introspect.json
> @@ -198,11 +198,10 @@
>  #
>  # @default: default when used as command parameter.
>  #   If absent, the parameter is mandatory.
> -#   If present, the value must be null.  The parameter is
> -#   optional, and behavior when it's missing is not specified
> -#   here.
> -#   Future extension: if present and non-null, the parameter
> -#   is optional, and defaults to this value.
> +#   If present and null, the parameter is optional, and
> +#   behavior when it's missing is not specified here.
> +#   If present and non-null, the parameter is optional, and
> +#   defaults to this value.
>  #
>  # Since: 2.5
>  ##
> diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
> index b929e07be4..6c407cd4ba 100644
> --- a/scripts/qapi/commands.py
> +++ b/scripts/qapi/commands.py
> @@ -35,7 +35,7 @@ def gen_call(name, arg_type, boxed, ret_type):
>  elif arg_type:
>  assert not arg_type.variants
>  for memb in arg_type.members:
> -if memb.optional:
> +if memb.optional and memb.default is None:
>  argstr += 'arg.has_%s, ' % c_name(memb.name)
>  argstr += 'arg.%s, ' % c_name(memb.name)
>  
> diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
> index c6754a5856..8c57d0c67a 100644
> --- a/scripts/qapi/common.py
> +++ b/scripts/qapi/common.py
> @@ -14,6 +14,7 @@
>  from __future__ import print_function
>  from contextlib import contextmanager
>  import errno
> +import math
>  import os
>  import re
>  import string
> @@ -800,6 +801,136 @@ def check_if(expr, info):
>  check_if_str(ifcond, info)
>  
>  
> +def check_value_str(info, value):
> +return 'g_strdup(%s)' % to_c_string(value) if type(value) is str else 
> False

What's wrong with isinstance(value, str)?

I'm a happy user of ternaries myself, but this one results in a rather
long line.  Easier to read, I think:

   if isinstance(value, 

Re: [RFC 0/4] POC: Generating realistic block errors

2019-11-14 Thread Tony Asleson
On 9/20/19 12:28 PM, Tony Asleson wrote:
> On 9/20/19 4:22 AM, Stefan Hajnoczi wrote:
>> blkdebug is purely at the QEMU block layer level.  It is not aware of
>> storage controller-specific error information or features.  If you want
>> to inject NVMe- or SCSI-specific errors that make no sense in QEMU's
>> block layer, then trying to do it in blkdebug becomes a layering
>> violation.  This justifies adding a new error injection feature directly
>> into AHCI, virtio-scsi, NVMe, etc devices.
> 
> Good discussion point...
> 
> In my opening use case for this POC I'm generically trying to create an
> unrecoverable media error for a specific sector.  For each of the
> different device types it's different on how that error is conveyed and
> the associated data in transfer.
> 

I would like to get some additional clarification on this point.  Should
I be investing more time integrating my proposed functionality into
blkdebug or other?

Sorry for the long response time, got sidetracked with other stuff.

Thanks,
Tony




Re: [PATCH] Semihost SYS_READC implementation (v4)

2019-11-14 Thread Alistair Francis
On Mon, Nov 11, 2019 at 6:51 AM Peter Maydell  wrote:
>
> On Tue, 5 Nov 2019 at 05:10, Keith Packard  wrote:
> >
> > Peter Maydell  writes:
> >
> > > I'm going to push for somebody actually writing out a
> > > document and putting it somewhere that we can point to
> > > and say "that's the authoritative spec", please...
> > > it doesn't have to be a big formal thing, but I do
> > > think you want it written down, because the whole point
> > > is for multiple implementations and users to interoperate.
> >
> > That happened in June -- I was just looking at the wrong version of the
> > spec. In the current version, which can be found here:
> >
> > https://riscv.org/specifications/
> >
> >The RISC-V Instruction Set Manual
> >Volume I: Unprivileged ISA
> > Document Version 20190608-Base-Ratified
> >
> > Section 2.8 says:
> >
> > Another use of EBREAK is to support “semihosting”, where the
> > execution environment includes a debugger that can provide
> > services over an alternate system call interface built around
> > the EBREAK instruction.  Because the RISC-V base ISA does not
> > provide more than one EBREAK instruction, RISC-V semihosting
> > uses a special sequence of instructions to distinguish a
> > semihosting EBREAK from a debugger inserted EBREAK.
> >
> > slli x0, x0, 0x1f   # Entry NOP
> > ebreak  # Break to debugger
> > srai x0, x0, 7  # NOP encoding the semihosting call 
> > number 7
> >
> > Note that these three instructions must be 32-bit-wide
> > instructions, i.e., they mustn’t be among the compressed 16-bit
> > instructions described in Chapter 16.
> >
> > The shift NOP instructions are still considered available for
> > use as HINTS.
> >
> > Semihosting is a form of service call and would be more
> > naturally encoded as an ECALL using an existing ABI, but this
> > would require the debugger to be able to intercept ECALLs, which
> > is a newer addition to the debug standard.  We intend to move
> > over to using ECALLs with a standard ABI, in which case,
> > semihosting can share a service ABI with an existing standard.
> >
> > We note that ARM processors have also moved to using SVC instead
> > of BKPT for semihosting calls in newer designs.
>
> That defines the instruction sequence used to make a semihosting
> call, but not the specification of what the calls are:
>  * what call numbers perform which functions
>  * how arguments are passed to the call (registers? parameter
>blocks in memory? other?)
>  * the semantics of each function supported (number of arguments,
>behaviour, error handling)
>
> That's really what I had in mind by the overall semihosting spec.

This sounds like something that the platform spec should contain.

+Atish +Palmer who are in charge of that.

@Keith maybe you should contact the platform spec group and ask them
to look into this?

Alistair

>
> PS: the parenthetical about ARM semihosting at the bottom  of
> the text you quote is wrong, incidentally. The traditional insn
> for semihosting on A-profile devices has always been SWI/SVC; it
> is BKPT only on M-profile devices; and the latest revision of the
> semihosting spec recommends the HLT instruction for both A- and M-.
>
> thanks
> -- PMM
>



Re: [RFC PATCH v2 18/26] qcow2: Add subcluster support to expand_zero_clusters_in_l1()

2019-11-14 Thread Alberto Garcia
On Tue 05 Nov 2019 12:05:02 PM CET, Max Reitz wrote:
>> @@ -2102,6 +2103,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
>> *bs, uint64_t *l1_table,
>>  } else {
>>  set_l2_entry(s, l2_slice, j, offset);
>>  }
>> +set_l2_bitmap(s, l2_slice, j, QCOW_L2_BITMAP_ALL_ALLOC);
>>  l2_dirty = true;
>>  }
>
> Technically this isn’t needed because this function is only called
> when downgrading v3 to v2 images (which can’t have subclusters), but
> of course it won’t hurt.

Right, but we need to change the function anyway to either do this or
assert that there are no subclusters. I prefer to do this because it's
trivial but I won't oppose if someone prefers the alternative.

Berto



Re: [PATCH] virtio: fix IO request length in virtio SCSI/block #PSBM-78839

2019-11-14 Thread Denis Plotnikov



On 13.11.2019 16:18, Michael S. Tsirkin wrote:
> On Wed, Nov 13, 2019 at 12:38:48PM +, Denis Plotnikov wrote:
>>
>> On 06.11.2019 15:03, Michael S. Tsirkin wrote:
>>> On Thu, Oct 24, 2019 at 11:34:34AM +, Denis Lunev wrote:
 On 10/24/19 12:28 AM, Michael S. Tsirkin wrote:
> On Fri, Oct 18, 2019 at 02:55:47PM +0300, Denis Plotnikov wrote:
>> From: "Denis V. Lunev" 
>>
>> Linux guests submit IO requests no longer than PAGE_SIZE * max_seg
>> field reported by SCSI controler. Thus typical sequential read with
>> 1 MB size results in the following pattern of the IO from the guest:
>> 8,16   115754 2.766095122  2071  D   R 2095104 + 1008 [dd]
>> 8,16   115755 2.766108785  2071  D   R 2096112 + 1008 [dd]
>> 8,16   115756 2.766113486  2071  D   R 2097120 + 32 [dd]
>> 8,16   115757 2.767668961 0  C   R 2095104 + 1008 [0]
>> 8,16   115758 2.768534315 0  C   R 2096112 + 1008 [0]
>> 8,16   115759 2.768539782 0  C   R 2097120 + 32 [0]
>> The IO was generated by
>> dd if=/dev/sda of=/dev/null bs=1024 iflag=direct
>>
>> This effectively means that on rotational disks we will observe 3 IOPS
>> for each 2 MBs processed. This definitely negatively affects both
>> guest and host IO performance.
>>
>> The cure is relatively simple - we should report lengthy scatter-gather
>> ability of the SCSI controller. Fortunately the situation here is very
>> good. VirtIO transport layer can accomodate 1024 items in one request
>> while we are using only 128. This situation is present since almost
>> very beginning. 2 items are dedicated for request metadata thus we
>> should publish VIRTQUEUE_MAX_SIZE - 2 as max_seg.
>>
>> The following pattern is observed after the patch:
>> 8,16   1 9921 2.662721340  2063  D   R 2095104 + 1024 [dd]
>> 8,16   1 9922 2.662737585  2063  D   R 2096128 + 1024 [dd]
>> 8,16   1 9923 2.665188167 0  C   R 2095104 + 1024 [0]
>> 8,16   1 9924 2.665198777 0  C   R 2096128 + 1024 [0]
>> which is much better.
>>
>> The dark side of this patch is that we are tweaking guest visible
>> parameter, though this should be relatively safe as above transport
>> layer support is present in QEMU/host Linux for a very long time.
>> The patch adds configurable property for VirtIO SCSI with a new default
>> and hardcode option for VirtBlock which does not provide good
>> configurable framework.
>>
>> Unfortunately the commit can not be applied as is. For the real cure we
>> need guest to be fixed to accomodate that queue length, which is done
>> only in the latest 4.14 kernel. Thus we are going to expose the property
>> and tweak it on machine type level.
>>
>> The problem with the old kernels is that they have
>> max_segments <= virtqueue_size restriction which cause the guest
>> crashing in the case of violation.
> This isn't just in the guests: virtio spec also seems to imply this,
> or at least be vague on this point.
>
> So I think it'll need a feature bit.
> Doing that in a safe way will also allow being compatible with old guests.
>
> The only downside is it's a bit more work as we need to
> spec this out and add guest support.
>
>> To fix the case described above in the old kernels we can increase
>> virtqueue_size to 256 and max_segments to 254. The pitfall here is
>> that seabios allows the virtqueue_size-s < 128, however, the seabios
>> patch extending that value to 256 is pending.
> And the fix here is just to limit large vq size to virtio 1.0.
> In that mode it's fine I think:
>
>
>  /* check if the queue is available */
>  if (vp->use_modern) {
>  num = vp_read(>common, virtio_pci_common_cfg, queue_size);
>  if (num > MAX_QUEUE_NUM) {
>  vp_write(>common, virtio_pci_common_cfg, queue_size,
>   MAX_QUEUE_NUM);
>  num = vp_read(>common, virtio_pci_common_cfg, 
> queue_size);
>  }
>  } else {
>  num = vp_read(>legacy, virtio_pci_legacy, queue_num);
>  }
 you mean to put the code like this into virtio_pci_realize() inside QEMU?

 If no, can you pls clarify which component should be touched.

 Den
>>> I mean:
>>>- add an API to change the default queue size
>>>- add a validate features callback, in there check and for modern
>>>  flag set in features increase the queue size
>>>
>>> maybe all this is too much work, we could block this
>>> for transitional devices, but your patch does not do it,
>>> you need to check that legacy is enabled not that modern
>>> is not disabled.
>> To develop the idea of how to adjust queue size further I'd 

Re: [RFC PATCH v2 16/26] qcow2: Add subcluster support to discard_in_l2_slice()

2019-11-14 Thread Alberto Garcia
On Mon 04 Nov 2019 04:07:35 PM CET, Max Reitz wrote:
>>  /* First remove L2 entries */
>>  qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
>> -if (!full_discard && s->qcow_version >= 3) {
>> +if (has_subclusters(s)) {
>> +set_l2_entry(s, l2_slice, l2_index + i, 0);
>> +set_l2_bitmap(s, l2_slice, l2_index + i,
>> +  full_discard ? 0 : QCOW_L2_BITMAP_ALL_ZEROES);
>
> But only for !full_discard, right?

Hence the conditional operator in my patch, maybe you didn't see it?

Berto



Re: [RFC PATCH v2 15/26] qcow2: Add subcluster support to zero_in_l2_slice()

2019-11-14 Thread Alberto Garcia
On Mon 04 Nov 2019 04:10:58 PM CET, Max Reitz wrote:
>>>  qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
>>>  if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
>>> -set_l2_entry(s, l2_slice, l2_index + i, QCOW_OFLAG_ZERO);
>>>  qcow2_free_any_clusters(bs, old_offset, 1, 
>>> QCOW2_DISCARD_REQUEST);
>> 
>> It feels wrong to me to free the cluster before updating the L2
>> entry.
>
> (Although it’s pre-existing, as set_l2_entry() is just an in-cache
> operation anyway :-/)

Yes, I think that if you want to do it afterwards you need to add
another loop after the qcow2_cache_put() call.

Berto



Re: [PATCH] MAINTAINERS: add more bitmap-related to Dirty Bitmaps section

2019-11-14 Thread Eric Blake

On 10/26/19 11:56 AM, Vladimir Sementsov-Ogievskiy wrote:

Let's add bitmaps persistence qcow2 feature and postcopy bitmaps
migration to Dirty Bitmaps section.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  MAINTAINERS | 2 ++
  1 file changed, 2 insertions(+)



I see no reason why this can't go in -rc2, so I'll queue it through my 
NBD tree.



diff --git a/MAINTAINERS b/MAINTAINERS
index 556ce0bfe3..51f31b4011 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1829,6 +1829,8 @@ F: util/hbitmap.c
  F: block/dirty-bitmap.c
  F: include/qemu/hbitmap.h
  F: include/block/dirty-bitmap.h
+F: qcow2-bitmap.c
+F: migration/block-dirty-bitmap.c
  F: tests/test-hbitmap.c
  F: docs/interop/bitmaps.rst
  T: git https://github.com/jnsnow/qemu.git bitmaps



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Braille device (chardev/baum.c) is unable to detect the TTY correctly and does not act on graphic console connect/disconnect

2019-11-14 Thread Teemu Kuusisto
Hello,

As a blind developer I would be very happy to use QEMU's baum chardev for a 
braille display. Unfortunately, this device fails to detect the tty in which 
the spice client is running. I would like to improve this device but I don't 
yet know how to achieve a better solution.

The current code calls qemu_console_get_window_id() to get the tty. This 
function returns zero, which causes the code to skip even the default behavior 
of brlapi's brlapi__enterTtyMode() (including checcking some env variables such 
as CONTROLVT). Furthermore, window id sounds like something different 
than a tty number, maybe a number of X display? The code does not currently 
consider the fact that the lifetime of the connected graphical consoles is 
not the same as the lifetime of the VM.

I hardcoded a tty number to the sources of QEMU version 3.0.0 to gladly verify 
that the device is working besides these problems.

So, I have a handful of open questions to start with: Is it possible to get 
callbacks for connect and disconnect of a graphical console (like spice and 
vnc)? How? Is it further possible to get any information of the connected 
client to determine the tty, and possibly sub-windows too (see 
brlapi__enterTtyModeWithPath), in which the client is running? Such events 
should lead to calls of brlapi__EnterTtyMode() and brlapi__leaveTtyMode().

To get this device working properly would be a life changing lifehack for me so 
I highly appreciate any help!
It would allow the use of Linux side by side with Windows and ChromiumOS, which 
is a big leap in this rather poorly accessible world.
If this is successful, I'm looking forward to spreading the word and helping 
others too!

With kind regards and informally on behalf of the Finnish braille authority,
-- Teemu Kuusisto



Re: [EXTERNAL]Re: [PATCH v2 2/5] MAINTAINERS: Adjust maintainership for Fulong 2E board

2019-11-14 Thread chen huacai
Hi, all,

On Thu, Nov 14, 2019 at 8:34 PM Aleksandar Markovic
 wrote:
>
> Hi, Philippe,
>
> > From: Philippe Mathieu-Daudé 
> >
> > Hi Aleksandar,
> >
> > On 11/13/19 2:47 PM, Aleksandar Markovic wrote:
> > > From: Aleksandar Markovic 
> > >
> > > Change the maintainership for Fulong 2E board to improve its quality.
> >
> > IIRC you told me once this board is named Fuloong, and its CPU is a
> ‎> Loongson, both with 2x 'o' :) I have a patch renaming the various
> > occurrences.
> >
>
> I still think that the oficial name is "Fuloong 2E", however, it is
> shortened to "Fulong 2E" quite often in communication, and, it seems,
> sometimes even in various docs/app notes etc.
>
> Can perhaps Huacai Chen enlighten us regarding the right spelling?
The right spelling is Fuloong.

>
> > > Signed-off-by: Aleksandar Markovic 
> > > ---
> > >   MAINTAINERS | 7 ---
> > >   1 file changed, 4 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index fd9ba32..3bf2144 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -976,9 +976,10 @@ S: Maintained
> > >   F: hw/mips/mips_r4k.c
> > >
> > >   Fulong 2E
> > > -M: Aleksandar Markovic 
> > > -R: Aleksandar Rikalo 
> > > -S: Odd Fixes
> > > +M: Philippe Mathieu-Daudé 
> >
> > I am happy to keep the Fuloong working, but this will be on my personal
> > (hobbyist) time. This area is not a priority for my employer, so I'll
> > use my personal email: f4...@amsat.org.
> >
>
> OK.
>
> > The original author is active on the Linux kernel, let ask him if he'd
> > be OK to keep an eye on his work.
> >
> > Huacai, would you agree to be listed as a reviewer on the Fuloong
> > related QEMU files? You don't have to worry about the generic QEMU code
> > plate (like keeping API up to date) I'll manage that, but I'd like to
> > have you listed to assert the hardware is properly modeled.
> >
> > You would appear as:
> > R: Huacai Chen 
> >
>
> That is a great idea!
>
> Please, Chen, get involved, you would be very welcomed, there is a place
> for you in QEMU community!
I'm sorry that I'm busy now, but I think I will do something in QEMU
in the next year.

>
>
> > > +R: Hervé Poussineau 
> >
> > I don't think Hervé is interested by this board, he has not modified the
> > code.
> >
> > > +R: Aleksandar Markovic 
> > > +S: Maintained
> >
> > Let keep it as "Odd Fixes" :)
> >
>
> OK.
>
> >Odd Fixes:   It has a maintainer but they don't have
> > time to do much other than throw the odd
> > patch in.
> >
> > >   F: hw/mips/mips_fulong2e.c
> > >   F: hw/isa/vt82c686.c
> > >   F: hw/pci-host/bonito.c
> > >
> >
> > So the patch would be:
> >
> > -- 8< --
> >   Fulong 2E
> > -M: Aleksandar Markovic 
> > -R: Aleksandar Rikalo 
> > +M: Philippe Mathieu-Daudé 
> > +R: Aleksandar Markovic 
> > +R: Huacai Chen 
> >   S: Odd Fixes
> >   F: hw/mips/mips_fulong2e.c
> >   F: hw/isa/vt82c686.c
>
> Plus possible s/Fulong 2E/Fuloong 2E/
>
> > ---
> >
> > But let's wait to see what Huacai Chen thinks of it, before posting it.
> >
> > Thanks for taking care of those changes!
> >
> > Phil.
> >
> >
>
> Thank you!
>
> Aleksandar
Thanks,

Huacai



[PATCH v2 for 5.0 5/6] linux-user: Add support for get/set RTC PLL correction using ioctls

2019-11-14 Thread Filip Bozuta
This patch implements functionalities of following ioctls:

RTC_PLL_GET - Get PLL correction

Read the PLL correction for RTCs that support PLL. The PLL correction
is returned in the following structure:

struct rtc_pll_info {
int pll_ctrl;/* placeholder for fancier control */
int pll_value;   /* get/set correction value */
int pll_max; /* max +ve (faster) adjustment value */
int pll_min; /* max -ve (slower) adjustment value */
int pll_posmult; /* factor for +ve correction */
int pll_negmult; /* factor for -ve correction */
long pll_clock;  /* base PLL frequency */
};

A pointer to this structure should be passed as the third
ioctl's argument.

RTC_PLL_SET - Set PLL correction

Sets the PLL correction for RTCs that support PLL. The PLL correction
that is set is specified by the rtc_pll_info structure pointed to by
the third ioctl's' argument.

Implementation notes:

All ioctls in this patch have pointer to a structure rtc_pll_info
as their third argument. All elements of this structure are of
type 'int', except the last one that is of type 'long'. That is
the reason why a separate target structure (target_rtc_pll_info)
is defined in linux-user/syscall_defs. The rest of the
implementation is straightforward.

Signed-off-by: Filip Bozuta 
---
 linux-user/ioctls.h|  2 ++
 linux-user/syscall_defs.h  | 14 ++
 linux-user/syscall_types.h |  9 +
 3 files changed, 25 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index e4d89c2..a8dd235 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -87,6 +87,8 @@
  IOCTL(RTC_EPOCH_SET, IOC_W, MK_PTR(TYPE_ULONG))
  IOCTL(RTC_WKALM_RD, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm)))
  IOCTL(RTC_WKALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm)))
+ IOCTL(RTC_PLL_GET, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_pll_info)))
+ IOCTL(RTC_PLL_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_pll_info)))
 
  IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT))
  IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 37504a2..8370f41 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -763,6 +763,16 @@ struct target_pollfd {
 #define TARGET_KDSETLED0x4B32  /* set led state [lights, not flags] */
 #define TARGET_KDSIGACCEPT 0x4B4E
 
+struct target_rtc_pll_info {
+int pll_ctrl;
+int pll_value;
+int pll_max;
+int pll_min;
+int pll_posmult;
+int pll_negmult;
+abi_long pll_clock;
+};
+
 /* real time clock ioctls */
 #define TARGET_RTC_AIE_ON   TARGET_IO('p', 0x01)
 #define TARGET_RTC_AIE_OFF  TARGET_IO('p', 0x02)
@@ -782,6 +792,10 @@ struct target_pollfd {
 #define TARGET_RTC_EPOCH_SETTARGET_IOW('p', 0x0e, abi_ulong)
 #define TARGET_RTC_WKALM_RD TARGET_IOR('p', 0x10, struct rtc_wkalrm)
 #define TARGET_RTC_WKALM_SETTARGET_IOW('p', 0x0f, struct rtc_wkalrm)
+#define TARGET_RTC_PLL_GET  TARGET_IOR('p', 0x11,  
\
+   struct target_rtc_pll_info)
+#define TARGET_RTC_PLL_SET  TARGET_IOW('p', 0x12,  
\
+   struct target_rtc_pll_info)
 
 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) ||
\
defined(TARGET_XTENSA)
diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h
index 820bc8e..4027272 100644
--- a/linux-user/syscall_types.h
+++ b/linux-user/syscall_types.h
@@ -271,6 +271,15 @@ STRUCT(rtc_wkalrm,
TYPE_CHAR, /* pending */
MK_STRUCT(STRUCT_rtc_time)) /* time */
 
+STRUCT(rtc_pll_info,
+   TYPE_INT, /* pll_ctrl */
+   TYPE_INT, /* pll_value */
+   TYPE_INT, /* pll_max */
+   TYPE_INT, /* pll_min */
+   TYPE_INT, /* pll_posmult */
+   TYPE_INT, /* pll_negmult */
+   TYPE_LONG) /* pll_clock */
+
 STRUCT(blkpg_ioctl_arg,
TYPE_INT, /* op */
TYPE_INT, /* flags */
-- 
2.7.4




[PATCH v2 for 5.0 1/6] linux-user: Add support for enable/disable RTC features using ioctls

2019-11-14 Thread Filip Bozuta
This patch implements functionalities of following ioctls:

RTC_AIE_ON, RTC_AIE_OFF - Alarm interrupt enable on/off

Enable or disable the alarm interrupt, for RTCs that support
alarms.  The third ioctl's argument is ignored.

RTC_UIE_ON, RTC_UIE_OFF - Update interrupt enable on/off

Enable or disable the interrupt on every clock update, for
RTCs that support this once-per-second interrupt.  The third
ioctl's argument is ignored.

RTC_PIE_ON, RTC_PIE_OFF - Periodic interrupt enable on/off

Enable or disable the periodic interrupt, for RTCs that sup‐
port these periodic interrupts.  The third ioctl's argument
is ignored.  Only a privileged process (i.e., one having the
CAP_SYS_RESOURCE capability) can enable the periodic interrupt
if the frequency is currently set above the value specified in
/proc/sys/dev/rtc/max-user-freq.

RTC_WIE_ON, RTC_WIE_OFF - Watchdog interrupt enable on/off

Enable or disable the Watchdog interrupt, for RTCs that sup-
port this Watchdog interrupt. The third ioctl's argument is
ignored.

Implementation notes:

Since all of involved ioctls have NULL as their third argument,
their implementation was straightforward.

The line '#include ' was added to recognize
preprocessor definitions for these ioctls.  This needs to be
done only once in this series of commits.  Also, the content
of this file (with respect to ioctl definitions) remained
unchanged for a long time, therefore there is no need to
worry about supporting older Linux kernel version.

Signed-off-by: Filip Bozuta 
---
 linux-user/ioctls.h   |  9 +
 linux-user/syscall.c  |  1 +
 linux-user/syscall_defs.h | 10 ++
 3 files changed, 20 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index c6b9d6a..97741c7 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -69,6 +69,15 @@
  IOCTL(KDSETLED, 0, TYPE_INT)
  IOCTL_SPECIAL(KDSIGACCEPT, 0, do_ioctl_kdsigaccept, TYPE_INT)
 
+ IOCTL(RTC_AIE_ON, 0, TYPE_NULL)
+ IOCTL(RTC_AIE_OFF, 0, TYPE_NULL)
+ IOCTL(RTC_UIE_ON, 0, TYPE_NULL)
+ IOCTL(RTC_UIE_OFF, 0, TYPE_NULL)
+ IOCTL(RTC_PIE_ON, 0, TYPE_NULL)
+ IOCTL(RTC_PIE_OFF, 0, TYPE_NULL)
+ IOCTL(RTC_WIE_ON, 0, TYPE_NULL)
+ IOCTL(RTC_WIE_OFF, 0, TYPE_NULL)
+
  IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT))
  IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
  IOCTL(BLKRRPART, 0, TYPE_NULL)
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ce399a5..74c3c08 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -107,6 +107,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "linux_loop.h"
 #include "uname.h"
 
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 98c2119..f91579a 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -763,6 +763,16 @@ struct target_pollfd {
 #define TARGET_KDSETLED0x4B32  /* set led state [lights, not flags] */
 #define TARGET_KDSIGACCEPT 0x4B4E
 
+/* real time clock ioctls */
+#define TARGET_RTC_AIE_ON   TARGET_IO('p', 0x01)
+#define TARGET_RTC_AIE_OFF  TARGET_IO('p', 0x02)
+#define TARGET_RTC_UIE_ON   TARGET_IO('p', 0x03)
+#define TARGET_RTC_UIE_OFF  TARGET_IO('p', 0x04)
+#define TARGET_RTC_PIE_ON   TARGET_IO('p', 0x05)
+#define TARGET_RTC_PIE_OFF  TARGET_IO('p', 0x06)
+#define TARGET_RTC_WIE_ON   TARGET_IO('p', 0x0f)
+#define TARGET_RTC_WIE_OFF  TARGET_IO('p', 0x10)
+
 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) ||
\
defined(TARGET_XTENSA)
 #define TARGET_FIOGETOWN   TARGET_IOR('f', 123, int)
-- 
2.7.4




[PATCH v2 for 5.0 6/6] linux-user: Add support for read/clear RTC voltage low detector using ioctls

2019-11-14 Thread Filip Bozuta
RTC_VL_READ - Read voltage low detection information

Read the voltage low for RTCs that support voltage low.
The third ioctl's' argument points to an int in which
the voltage low is returned.

RTC_VL_CLR - Clear voltage low information

Clear the information about voltage low for RTCs that
support voltage low. The third ioctl(2) argument is
ignored.

Implementation notes:

Since one ioctl has a pointer to 'int' as its third agrument,
and another ioctl has NULL as its third argument, their
implementation was straightforward.

Signed-off-by: Filip Bozuta 
---
 linux-user/ioctls.h   | 2 ++
 linux-user/syscall_defs.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index a8dd235..371c25e 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -89,6 +89,8 @@
  IOCTL(RTC_WKALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm)))
  IOCTL(RTC_PLL_GET, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_pll_info)))
  IOCTL(RTC_PLL_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_pll_info)))
+ IOCTL(RTC_VL_READ, IOC_R, MK_PTR(TYPE_INT))
+ IOCTL(RTC_VL_CLR, 0, TYPE_NULL)
 
  IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT))
  IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 8370f41..af4f366 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -796,6 +796,8 @@ struct target_rtc_pll_info {
struct target_rtc_pll_info)
 #define TARGET_RTC_PLL_SET  TARGET_IOW('p', 0x12,  
\
struct target_rtc_pll_info)
+#define TARGET_RTC_VL_READ  TARGET_IOR('p', 0x13, int)
+#define TARGET_RTC_VL_CLR   TARGET_IO('p', 0x14)
 
 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) ||
\
defined(TARGET_XTENSA)
-- 
2.7.4




[PATCH v2 for 5.0 2/6] linux-user: Add support for read/set RTC time and alarm using ioctls

2019-11-14 Thread Filip Bozuta
This patch implements functionalities of following ioctls:

RTC_RD_TIME - Read RTC time

Returns this RTC's time in the following structure:

struct rtc_time {
int tm_sec;
int tm_min;
int tm_hour;
int tm_mday;
int tm_mon;
int tm_year;
int tm_wday; /* unused */
int tm_yday; /* unused */
int tm_isdst;/* unused */
};

The fields in this structure have the same meaning and ranges
as for the tm structure described in gmtime man page.  A pointer
to this structure should be passed as the third ioctl's argument.

RTC_SET_TIME - Set RTC time

Sets this RTC's time to the time specified by the rtc_time
structure pointed to by the third ioctl's argument.  To set
the RTC's time the process must be privileged (i.e., have the
CAP_SYS_TIME capability).

RTC_ALM_READ, RTC_ALM_SET - Read/Set alarm time

Read and set the alarm time, for RTCs that support alarms.
The alarm interrupt must be separately enabled or disabled
using the RTC_AIE_ON, RTC_AIE_OFF requests.  The third
ioctl's argument is a pointer to an rtc_time structure.  Only
the tm_sec, tm_min, and tm_hour fields of this structure are
used.

Implementation notes:

All ioctls in this patch have pointer to a structure rtc_time
as their third argument. That is the reason why corresponding
definition is added in linux-user/syscall_types.h. Since all
elements of this structure are of type 'int', the rest of the
implementation is straightforward.

Signed-off-by: Filip Bozuta 
---
 linux-user/ioctls.h|  4 
 linux-user/syscall_defs.h  |  4 
 linux-user/syscall_types.h | 11 +++
 3 files changed, 19 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index 97741c7..f472794 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -77,6 +77,10 @@
  IOCTL(RTC_PIE_OFF, 0, TYPE_NULL)
  IOCTL(RTC_WIE_ON, 0, TYPE_NULL)
  IOCTL(RTC_WIE_OFF, 0, TYPE_NULL)
+ IOCTL(RTC_ALM_READ, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
+ IOCTL(RTC_ALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
+ IOCTL(RTC_RD_TIME, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
+ IOCTL(RTC_SET_TIME, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
 
  IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT))
  IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index f91579a..f0bf09d 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -772,6 +772,10 @@ struct target_pollfd {
 #define TARGET_RTC_PIE_OFF  TARGET_IO('p', 0x06)
 #define TARGET_RTC_WIE_ON   TARGET_IO('p', 0x0f)
 #define TARGET_RTC_WIE_OFF  TARGET_IO('p', 0x10)
+#define TARGET_RTC_ALM_READ TARGET_IOR('p', 0x08, struct rtc_time)
+#define TARGET_RTC_ALM_SET  TARGET_IOW('p', 0x07, struct rtc_time)
+#define TARGET_RTC_RD_TIME  TARGET_IOR('p', 0x09, struct rtc_time)
+#define TARGET_RTC_SET_TIME TARGET_IOW('p', 0x0a, struct rtc_time)
 
 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) ||
\
defined(TARGET_XTENSA)
diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h
index 4e36983..a35072a 100644
--- a/linux-user/syscall_types.h
+++ b/linux-user/syscall_types.h
@@ -255,6 +255,17 @@ STRUCT(blkpg_partition,
MK_ARRAY(TYPE_CHAR, BLKPG_DEVNAMELTH), /* devname */
MK_ARRAY(TYPE_CHAR, BLKPG_VOLNAMELTH)) /* volname */
 
+STRUCT(rtc_time,
+   TYPE_INT, /* tm_sec */
+   TYPE_INT, /* tm_min */
+   TYPE_INT, /* tm_hour */
+   TYPE_INT, /* tm_mday */
+   TYPE_INT, /* tm_mon */
+   TYPE_INT, /* tm_year */
+   TYPE_INT, /* tm_wday */
+   TYPE_INT, /* tm_yday */
+   TYPE_INT) /* tm_isdst */
+
 STRUCT(blkpg_ioctl_arg,
TYPE_INT, /* op */
TYPE_INT, /* flags */
-- 
2.7.4




[PATCH v2 for 5.0 0/6] linux-user: Add support for real time clock ioctls

2019-11-14 Thread Filip Bozuta
Add ioctls for all RTC features that are currently supported in linux kernel.

This series covers following iocts:

* RTC_AIE_ON
* RTC_AIE_OFF
* RTC_UIE_ON
* RTC_UIE_OFF
* RTC_PIE_ON
* RTC_PIE_OFF
* RTC_WIE_ON
* RTC_WIE_OFF
* RTC_ALM_SET
* RTC_ALM_READ
* RTC_RD_TIME
* RTC_SET_TIME
* RTC_IRQP_READ
* RTC_IRQP_SET
* RTC_EPOCH_READ
* RTC_EPOCH_SET
* RTC_WKALM_SET
* RTC_WKALM_RD
* RTC_PLL_GET
* RTC_PLL_SET
* RTC_VL_READ
* RTC_VL_CLR

The functionalities of individual ioctls were described in this series
patch commit messages.

Testing method:

Mini test programs were written for each ioctl. Those programs were
compiled (sometimes using cross-compilers) for the following 
architectures:

* Intel 64-bit (little endian)
* Power pc 32-bit (big endian)
* Power pc 64-bit (big endian)

The corresponding native programs were executed without using 
QEMU on following hosts:

* Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz
*.7447A, (ppc32 host)

All applicable compiled programs were in turn executed through QEMU
and the results obtained were the same ones gotten for native
execution.

Example of a test program:

For ioctl RTC_RD_TIME we have used the following test program:

#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define ERROR -1

int main()
{

int fd = open("/dev/rtc", O_RDWR | O_NONBLOCK);

if(fd == ERROR)
{
perror("open");
return -1;
}

struct rtc_time cur_time;

if(ioctl(fd, RTC_RD_TIME, _time) < 0)
{
perror("ioctl");
return -1;
}

printf("Second: %d, Minute: %d, Hour: %d, Day: %d, Month: %d, Year: 
%d,",
cur_time.tm_sec, cur_time.tm_min, cur_time.tm_hour, 
cur_time.tm_mday, cur_time.tm_mon, cur_time.tm_year);

return 0;
}



Filip Bozuta (6):
  linux-user: Add support for enable/disable RTC features using ioctls
  linux-user: Add support for read/set RTC time and alarm using ioctls
  linux-user: Add support for read/set RTC periodic interrupt and epoch
using ioctls
  linux-user: Add support for get/set RTC wakeup alarm using ioctls
  linux-user: Add support for get/set RTC PLL correction using ioctls
  linux-user: Add support for read/clear RTC voltage low detector using
ioctls

 linux-user/ioctls.h| 23 +++
 linux-user/syscall.c   |  1 +
 linux-user/syscall_defs.h  | 36 
 linux-user/syscall_types.h | 25 +
 4 files changed, 85 insertions(+)

-- 
2.7.4




[PATCH v2 for 5.0 3/6] linux-user: Add support for read/set RTC periodic interrupt and epoch using ioctls

2019-11-14 Thread Filip Bozuta
This patch implements functionalities of following ioctls:

RTC_IRQP_READ, RTC_IRQP_SET - Read/Set IRQ rate

Read and set the frequency for periodic interrupts, for RTCs
that support periodic interrupts.  The periodic interrupt must
be separately enabled or disabled using the RTC_PIE_ON,
RTC_PIE_OFF requests.  The third ioctl's argument is an
unsigned long * or an unsigned long, respectively.  The value
is the frequency in interrupts per second.  The set of allow‐
able frequencies is the multiples of two in the range 2 to
8192.  Only a privileged process (i.e., one having the
CAP_SYS_RESOURCE capability) can set frequencies above the
value specified in /proc/sys/dev/rtc/max-user-freq.  (This
file contains the value 64 by default.)

RTC_EPOCH_READ, RTC_EPOCH_SET - Read/Set epoch

Many RTCs encode the year in an 8-bit register which is either
interpreted as an 8-bit binary number or as a BCD number.  In
both cases, the number is interpreted relative to this RTC's
Epoch.  The RTC's Epoch is initialized to 1900 on most systems
but on Alpha and MIPS it might also be initialized to 1952,
1980, or 2000, depending on the value of an RTC register for
the year.  With some RTCs, these operations can be used to
read or to set the RTC's Epoch, respectively.  The third
ioctl's argument is an unsigned long * or an unsigned long,
respectively, and the value returned (or assigned) is the
Epoch.  To set the RTC's Epoch the process must be privileged
(i.e., have the CAP_SYS_TIME capability).

Implementation notes:

All ioctls in this patch have a pointer to 'ulong' as their
third argument. That is the reason why corresponding parts
of added code in linux-user/syscall_defs.h contain special
handling related to 'ulong' type: they use 'abi_ulong' type
to make sure that ioctl's code is calculated correctly for
both 32-bit and 64-bit targets. Also, 'MK_PTR(TYPE_ULONG)'
is used for the similar reason in linux-user/ioctls.h.

Signed-off-by: Filip Bozuta 
---
 linux-user/ioctls.h   | 4 
 linux-user/syscall_defs.h | 4 
 2 files changed, 8 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index f472794..fa2fe7f 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -81,6 +81,10 @@
  IOCTL(RTC_ALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
  IOCTL(RTC_RD_TIME, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
  IOCTL(RTC_SET_TIME, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_time)))
+ IOCTL(RTC_IRQP_READ, IOC_R, MK_PTR(TYPE_ULONG))
+ IOCTL(RTC_IRQP_SET, IOC_W, MK_PTR(TYPE_ULONG))
+ IOCTL(RTC_EPOCH_READ, IOC_R, MK_PTR(TYPE_ULONG))
+ IOCTL(RTC_EPOCH_SET, IOC_W, MK_PTR(TYPE_ULONG))
 
  IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT))
  IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index f0bf09d..bbfa935 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -776,6 +776,10 @@ struct target_pollfd {
 #define TARGET_RTC_ALM_SET  TARGET_IOW('p', 0x07, struct rtc_time)
 #define TARGET_RTC_RD_TIME  TARGET_IOR('p', 0x09, struct rtc_time)
 #define TARGET_RTC_SET_TIME TARGET_IOW('p', 0x0a, struct rtc_time)
+#define TARGET_RTC_IRQP_READTARGET_IOR('p', 0x0b, abi_ulong)
+#define TARGET_RTC_IRQP_SET TARGET_IOW('p', 0x0c, abi_ulong)
+#define TARGET_RTC_EPOCH_READ   TARGET_IOR('p', 0x0d, abi_ulong)
+#define TARGET_RTC_EPOCH_SETTARGET_IOW('p', 0x0e, abi_ulong)
 
 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) ||
\
defined(TARGET_XTENSA)
-- 
2.7.4




[PATCH v2 for 5.0 4/6] linux-user: Add support for get/set RTC wakeup alarm using ioctls

2019-11-14 Thread Filip Bozuta
This patch implements functionalities of following ioctls:

RTC_WKALM_SET, RTC_WKALM_GET - Get/Set wakeup alarm

Some RTCs support a more powerful alarm interface, using these
ioctls to read or write the RTC's alarm time (respectively)
with this structure:

struct rtc_wkalrm {
unsigned char enabled;
unsigned char pending;
struct rtc_time time;
};

The enabled flag is used to enable or disable the alarm inter‐
rupt, or to read its current status; when using these calls,
RTC_AIE_ON and RTC_AIE_OFF are not used.  The pending flag is
used by RTC_WKALM_RD to report a pending interrupt (so it's
mostly useless on Linux, except when talking to the RTC man‐
aged by EFI firmware).  The time field is as used with
RTC_ALM_READ and RTC_ALM_SET except that the tm_mday, tm_mon,
and tm_year fields are also valid.  A pointer to this struc‐
ture should be passed as the third ioctl's argument.

Implementation notes:

All ioctls in this patch have pointer to a structure rtc_wkalrm
as their third argument. That is the reason why corresponding
definition is added in linux-user/syscall_types.h. Since all
elements of this structure are either of type 'unsigned char'
or 'struct rtc_time' (that was covered in one of previous
patches), the rest of the implementation is straightforward.

Signed-off-by: Filip Bozuta 
---
 linux-user/ioctls.h| 2 ++
 linux-user/syscall_defs.h  | 2 ++
 linux-user/syscall_types.h | 5 +
 3 files changed, 9 insertions(+)

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index fa2fe7f..e4d89c2 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -85,6 +85,8 @@
  IOCTL(RTC_IRQP_SET, IOC_W, MK_PTR(TYPE_ULONG))
  IOCTL(RTC_EPOCH_READ, IOC_R, MK_PTR(TYPE_ULONG))
  IOCTL(RTC_EPOCH_SET, IOC_W, MK_PTR(TYPE_ULONG))
+ IOCTL(RTC_WKALM_RD, IOC_R, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm)))
+ IOCTL(RTC_WKALM_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_rtc_wkalrm)))
 
  IOCTL(BLKROSET, IOC_W, MK_PTR(TYPE_INT))
  IOCTL(BLKROGET, IOC_R, MK_PTR(TYPE_INT))
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index bbfa935..37504a2 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -780,6 +780,8 @@ struct target_pollfd {
 #define TARGET_RTC_IRQP_SET TARGET_IOW('p', 0x0c, abi_ulong)
 #define TARGET_RTC_EPOCH_READ   TARGET_IOR('p', 0x0d, abi_ulong)
 #define TARGET_RTC_EPOCH_SETTARGET_IOW('p', 0x0e, abi_ulong)
+#define TARGET_RTC_WKALM_RD TARGET_IOR('p', 0x10, struct rtc_wkalrm)
+#define TARGET_RTC_WKALM_SETTARGET_IOW('p', 0x0f, struct rtc_wkalrm)
 
 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4) ||
\
defined(TARGET_XTENSA)
diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h
index a35072a..820bc8e 100644
--- a/linux-user/syscall_types.h
+++ b/linux-user/syscall_types.h
@@ -266,6 +266,11 @@ STRUCT(rtc_time,
TYPE_INT, /* tm_yday */
TYPE_INT) /* tm_isdst */
 
+STRUCT(rtc_wkalrm,
+   TYPE_CHAR, /* enabled */
+   TYPE_CHAR, /* pending */
+   MK_STRUCT(STRUCT_rtc_time)) /* time */
+
 STRUCT(blkpg_ioctl_arg,
TYPE_INT, /* op */
TYPE_INT, /* flags */
-- 
2.7.4




Re: [PATCH v5] iotests: Test NBD client reconnection

2019-11-14 Thread Eric Blake

On 11/11/19 9:39 PM, Andrey Shinkevich wrote:

The test for an NBD client. The NBD server is disconnected after the
client write request. The NBD client should reconnect and complete
the write operation.

Suggested-by: Denis V. Lunev 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Tested-by: Eric Blake 
---
v5:  "" were replaced with '' in the test except the function comments.
 The 'quick' word was added to the 'qroup' file next to the test #277.


Queuing for 4.2-rc2 through my NBD tree.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v5 00/20] Add virtual device fuzzing support

2019-11-14 Thread Alexander Bulekov

On 11/14/19 5:55 AM, Darren Kenny wrote:

Hi Alexander,

A quick comment on the fact that you omitted any Reviewed-by's that
you have received so far.

Was that intentional?

No - I'll find a way to add them.
sorry about that
-Alex


Thanks,

Darren.

On Wed, Nov 13, 2019 at 10:50:41PM +, Oleinik, Alexander wrote:

This series adds a framework for coverage-guided fuzzing of
virtual-devices. Fuzzing targets are based on qtest and can make use of
the libqos abstractions.

V5:
* misc fixes addressing V4 comments
* cleanup in-process handlers/globals in libqtest.c
* small fixes to fork-based fuzzing and support for multiple workers
* changes to the virtio-net fuzzer to kick after each vq add

V4:
* add/transfer license headers to new files
* restructure the added QTestClientTransportOps struct
* restructure the FuzzTarget struct and fuzzer skeleton
* fork-based fuzzer now directly mmaps shm over the coverage bitmaps
* fixes to i440 and virtio-net fuzz targets
* undo the changes to qtest_memwrite
* possible to build /fuzz and /all in the same build-dir
* misc fixes to address V3 comments

V3:
* rebased onto v4.1.0+
* add the fuzzer as a new build-target type in the build-system
* add indirection to qtest client/server communication functions
* remove ramfile and snapshot-based fuzzing support
* add i440fx fuzz-target as a reference for developers.
* add linker-script to assist with fork-based fuzzer

V2:
* split off changes to qos virtio-net and qtest server to other patches
* move vl:main initialization into new func: qemu_init
* moved useful functions from qos-test.c to a separate object
* use struct of function pointers for add_fuzz_target(), instead of
  arguments
* move ramfile to migration/qemu-file
* rewrite fork-based fuzzer pending patch to libfuzzer
* pass check-patch

Alexander Bulekov (20):
 softmmu: split off vl.c:main() into main.c
 libqos: Rename i2c_send and i2c_recv
 fuzz: Add FUZZ_TARGET module type
 qtest: add qtest_server_send abstraction
 libqtest: Add a layer of abstraciton to send/recv
 module: check module wasn't already initialized
 qtest: add in-process incoming command handler
 tests: provide test variables to other targets
 libqos: split qos-test and libqos makefile vars
 libqos: move useful qos-test funcs to qos_external
 libqtest: make bufwrite rely on the TransportOps
 libqtest: add in-process qtest.c tx/rx handlers
 fuzz: add configure flag --enable-fuzzing
 fuzz: Add target/fuzz makefile rules
 fuzz: add fuzzer skeleton
 fuzz: add support for fork-based fuzzing.
 fuzz: add support for qos-assisted fuzz targets
 fuzz: add i440fx fuzz targets
 fuzz: add virtio-net fuzz target
 fuzz: add documentation to docs/devel/

Makefile |  16 ++-
Makefile.objs    |   4 +
Makefile.target  |  18 ++-
configure    |  39 ++
docs/devel/fuzzing.txt   | 119 ++
exec.c   |  12 +-
include/qemu/module.h    |   4 +-
include/sysemu/qtest.h   |   4 +
include/sysemu/sysemu.h  |   4 +
main.c   |  53 
qtest.c  |  31 -
tests/Makefile.include   |  75 +--
tests/fuzz/Makefile.include  |  11 ++
tests/fuzz/fork_fuzz.c   |  55 +
tests/fuzz/fork_fuzz.h   |  23 
tests/fuzz/fork_fuzz.ld  |  37 ++
tests/fuzz/fuzz.c    | 179 +++
tests/fuzz/fuzz.h    |  94 ++
tests/fuzz/i440fx_fuzz.c | 176 ++
tests/fuzz/qos_fuzz.c    | 232 +++
tests/fuzz/qos_fuzz.h    |  33 +
tests/fuzz/virtio_net_fuzz.c | 100 +++
tests/libqos/i2c.c   |  10 +-
tests/libqos/i2c.h   |   4 +-
tests/libqos/qos_external.c  | 168 +
tests/libqos/qos_external.h  |  28 +
tests/libqtest.c | 108 ++--
tests/libqtest.h |   4 +
tests/pca9552-test.c |  10 +-
tests/qos-test.c | 140 +
util/module.c    |   7 ++
vl.c |  38 ++
32 files changed, 1607 insertions(+), 229 deletions(-)
create mode 100644 docs/devel/fuzzing.txt
create mode 100644 main.c
create mode 100644 tests/fuzz/Makefile.include
create mode 100644 tests/fuzz/fork_fuzz.c
create mode 100644 tests/fuzz/fork_fuzz.h
create mode 100644 tests/fuzz/fork_fuzz.ld
create mode 100644 tests/fuzz/fuzz.c
create mode 100644 tests/fuzz/fuzz.h
create mode 100644 tests/fuzz/i440fx_fuzz.c
create mode 100644 tests/fuzz/qos_fuzz.c
create mode 100644 tests/fuzz/qos_fuzz.h
create mode 100644 tests/fuzz/virtio_net_fuzz.c
create mode 100644 tests/libqos/qos_external.c
create mode 100644 tests/libqos/qos_external.h

--
2.23.0





--
===
I recently changed my last name from Oleinik to Bulekov
===


  1   2   >