This is nasty.
https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215
When preprocessed, we get:
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 0a83f237259f..6b8d3660240a 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
unsigned int cpu)
write_lock_irqsave(&prv->lock, flags);
rqd_ins = &prv->rql;
+
+#if 0
list_for_each_entry ( rqd, &prv->rql, rql )
+#else
+ for ( (rqd) = ({
+ typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+ ((&prv->rql)->next);
+ (typeof(*(rqd)) *)
+ ((char *)__mptr -
+ __builtin_offsetof(typeof(*(rqd)),rql) );
+ });
+ &(rqd)->rql != // <-- problem expression
+ (&prv->rql);
+ (rqd) = ({
+ typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+ ((rqd)->rql.next);
+ (typeof(*(rqd)) *)
+ ((char *)__mptr -
+ __builtin_offsetof(typeof(*(rqd)),rql) );
+ })
+ )
+#endif
{
/* Remember first unused queue index. */
if ( !rqi_unused && rqd->id > rqi )
The alignment of csched2_runqueue_data is 8, while csched2_private is 4.
priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
performs a buggily-typed container_of(), treating a csched2_private as
if it were csched2_runqueue_data.
It functions because it's only an address equality check, but it's also
why UBSAN objects.
This seems to fix the issue:
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 6b8d3660240a..ab938942d75f 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -537,7 +537,8 @@ struct csched2_private {
unsigned int ratelimit_us; /* Rate limiting for this
scheduler */
unsigned int active_queues; /* Number of active
runqueues */
- struct list_head rql; /* List of
runqueues */
+ struct list_head rql /* List of
runqueues */
+ __aligned(alignof(struct csched2_runqueue_data));
cpumask_t initialized; /* CPUs part of this
scheduler */
struct list_head sdom; /* List of domains (for debug
key) */
but it's obviously not a viable fix. I can't help feeling that the bug
is really in the list macros.
~Andrew