In the case that 'extra' isn't a multiple of uint32_t, the calculation rounds
the number of bytes up, causing later logic to read unrelated bytes beyond the
end of the object.

Also, asserting that the object is within TRACE_EXTRA_MAX, but truncating it
in release builds is rude.  Instead, reject any out-of-spec records, leaving
enough of a message to identify the faulty caller.

There is one buggy race record, TRC_RTDS_BUDGET_BURN.  As it must remain
__packed (as cur_budget is misaligned), change bool has_extratime to uint32_t
to compensate.

The new printk() can also be hit by HVMOP_xentrace, although no over-read is
possible.  This has no business being a hypercall in the first place, as it
can't be used outside of custom local debugging, so extend the uint32_t
requirement to HVMOP_xentrace too.

Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>
---
CC: George Dunlap <george.dun...@eu.citrix.com>
CC: Ian Jackson <i...@xenproject.org>
CC: Jan Beulich <jbeul...@suse.com>
CC: Stefano Stabellini <sstabell...@kernel.org>
CC: Wei Liu <w...@xen.org>
CC: Julien Grall <jul...@xen.org>
CC: Dario Faggioli <dfaggi...@suse.com>
CC: Meng Xu <men...@cis.upenn.edu>

v2:
 * Adjust HVMOP_xentrace to avoid hitting the printk()
 * Fix TRC_RTDS_BUDGET_BURN
 * Adjust wording in __trace_var()
---
 xen/arch/x86/hvm/hvm.c |  5 +++--
 xen/common/sched/rt.c  | 24 +++++++++++++-----------
 xen/common/trace.c     | 21 ++++++++++-----------
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 7b48a1b925bb..09cf6330ad26 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -5063,8 +5063,9 @@ long do_hvm_op(unsigned long op, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( copy_from_guest(&tr, arg, 1 ) )
             return -EFAULT;
 
-        if ( tr.extra_bytes > sizeof(tr.extra)
-             || (tr.event & ~((1u<<TRC_SUBCLS_SHIFT)-1)) )
+        if ( tr.extra_bytes % sizeof(uint32_t) ||
+             tr.extra_bytes > sizeof(tr.extra) ||
+             tr.event >> TRC_SUBCLS_SHIFT )
             return -EINVAL;
 
         /* Cycles will be taken at the vmexit and vmenter */
diff --git a/xen/common/sched/rt.c b/xen/common/sched/rt.c
index c24cd2ac3200..c58edca0de84 100644
--- a/xen/common/sched/rt.c
+++ b/xen/common/sched/rt.c
@@ -968,18 +968,20 @@ burn_budget(const struct scheduler *ops, struct rt_unit 
*svc, s_time_t now)
     /* TRACE */
     {
         struct __packed {
-            unsigned unit:16, dom:16;
+            uint16_t unit, dom;
             uint64_t cur_budget;
-            int delta;
-            unsigned priority_level;
-            bool has_extratime;
-        } d;
-        d.dom = svc->unit->domain->domain_id;
-        d.unit = svc->unit->unit_id;
-        d.cur_budget = (uint64_t) svc->cur_budget;
-        d.delta = delta;
-        d.priority_level = svc->priority_level;
-        d.has_extratime = svc->flags & RTDS_extratime;
+            uint32_t delta;
+            uint32_t priority_level;
+            uint32_t has_extratime;
+        } d = {
+            .unit            = svc->unit->unit_id,
+            .dom             = svc->unit->domain->domain_id,
+            .cur_budget      = svc->cur_budget,
+            .delta           = delta,
+            .priority_level  = svc->priority_level,
+            .has_extratime   = !!(svc->flags & RTDS_extratime),
+        };
+
         trace_var(TRC_RTDS_BUDGET_BURN, 1,
                   sizeof(d),
                   (unsigned char *) &d);
diff --git a/xen/common/trace.c b/xen/common/trace.c
index a2a389a1c7c3..4297ff505fb9 100644
--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -686,22 +686,21 @@ void __trace_var(u32 event, bool_t cycles, unsigned int 
extra,
     unsigned long flags;
     u32 bytes_to_tail, bytes_to_wrap;
     unsigned int rec_size, total_size;
-    unsigned int extra_word;
     bool_t started_below_highwater;
 
     if( !tb_init_done )
         return;
 
-    /* Convert byte count into word count, rounding up */
-    extra_word = (extra / sizeof(u32));
-    if ( (extra % sizeof(u32)) != 0 )
-        extra_word++;
-    
-    ASSERT(extra_word <= TRACE_EXTRA_MAX);
-    extra_word = min_t(int, extra_word, TRACE_EXTRA_MAX);
-
-    /* Round size up to nearest word */
-    extra = extra_word * sizeof(u32);
+    /*
+     * extra data needs to be an exact multiple of uint32_t to prevent the
+     * later logic over-reading the object.  Reject out-of-spec records.  Any
+     * failure here is an error in the caller.
+     */
+    if ( extra % sizeof(uint32_t) ||
+         extra / sizeof(uint32_t) > TRACE_EXTRA_MAX )
+        return printk_once(XENLOG_WARNING
+                           "Trace event %#x bad size %u, discarding\n",
+                           event, extra);
 
     if ( (tb_event_mask & event) == 0 )
         return;
-- 
2.11.0


Reply via email to