Implement the gettimexattrs64 and getcrosststampattrs callbacks in the
ptp_vmclock driver to provide clock quality attributes through the new
PTP_SYS_OFFSET_EXTENDED_ATTRS and PTP_SYS_OFFSET_PRECISE_ATTRS ioctls.

The ptp_vmclock device exposes:
- error_bound: Derived from time_maxerror_nanosec, accumulated with
  counter frequency error (counter_period_maxerror_rate_frac_sec) over
  elapsed counter ticks
- clock_status: Mapped from the device's clock_status field
- timescale: Determined from time_type (UTC, TAI, monotonic, etc.)

The legacy ioctls return -EINVAL when clock_status is UNRELIABLE since
they have no way to communicate clock state to userspace. The attrs
ioctls have a status field for this purpose, so they treat UNRELIABLE
as success and let userspace check the status field.

To avoid a race where the hypervisor could update clock_status between
the timestamp call and the UNRELIABLE check, the clock state is captured
inside the seq_count loop for a consistent snapshot with the timestamp.

Signed-off-by: Arthur Kiyanovski <[email protected]>
---
 drivers/ptp/ptp_vmclock.c | 209 +++++++++++++++++++++++++++++++++++---
 1 file changed, 193 insertions(+), 16 deletions(-)

diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c
index 8b630eb..350625e 100644
--- a/drivers/ptp/ptp_vmclock.c
+++ b/drivers/ptp/ptp_vmclock.c
@@ -53,6 +53,17 @@ struct vmclock_state {
        char *name;
 };
 
+/**
+ * struct vmclock_crosststamp_ctx - context for get_device_system_crosststamp()
+ * @st: vmclock device state
+ * @attrs: optional output for PTP clock attributes, populated inside the
+ *         seq_count loop for a consistent snapshot with the timestamp
+ */
+struct vmclock_crosststamp_ctx {
+       struct vmclock_state *st;
+       struct ptp_clock_attributes *attrs;
+};
+
 #define VMCLOCK_MAX_WAIT ms_to_ktime(100)
 
 /* Require at least the flags field to be present. All else can be optional. */
@@ -95,14 +106,123 @@ static bool tai_adjust(struct vmclock_abi *clk, uint64_t 
*sec)
        return false;
 }
 
+static uint8_t vmclock_get_ptp_timescale(uint8_t vmclock_time_type)
+{
+       switch (vmclock_time_type) {
+       case VMCLOCK_TIME_UTC:
+               return PTP_TIMESCALE_UTC;
+       case VMCLOCK_TIME_TAI:
+               return PTP_TIMESCALE_TAI;
+       case VMCLOCK_TIME_MONOTONIC:
+               return PTP_TIMESCALE_MONOTONIC;
+       default:
+               return PTP_TIMESCALE_UNKNOWN;
+       }
+}
+
+static uint8_t vmclock_get_ptp_status(uint8_t vmclock_status)
+{
+       switch (vmclock_status) {
+       case VMCLOCK_STATUS_UNKNOWN:
+               return PTP_CLOCK_STATUS_UNKNOWN;
+       case VMCLOCK_STATUS_INITIALIZING:
+               return PTP_CLOCK_STATUS_INITIALIZING;
+       case VMCLOCK_STATUS_SYNCHRONIZED:
+               return PTP_CLOCK_STATUS_SYNCED;
+       case VMCLOCK_STATUS_FREERUNNING:
+               return PTP_CLOCK_STATUS_FREE_RUNNING;
+       case VMCLOCK_STATUS_UNRELIABLE:
+               return PTP_CLOCK_STATUS_UNRELIABLE;
+       default:
+               return PTP_CLOCK_STATUS_UNKNOWN;
+       }
+}
+
+static void vmclock_populate_ptp_attributes(struct vmclock_state *st,
+                                           struct ptp_clock_attributes *att,
+                                           uint64_t delta,
+                                           uint64_t cycle)
+{
+       uint64_t maxerror_ns = UINT_MAX;
+
+       if (!att)
+               return;
+
+       /* Only calculate if the base error is flagged as valid
+        * by the hypervisor.
+        */
+       if (VMCLOCK_FIELD_PRESENT(st->clk, time_maxerror_nanosec) &&
+           (le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_TIME_MAXERROR_VALID)) {
+               maxerror_ns = le64_to_cpu(st->clk->time_maxerror_nanosec);
+
+               /* If frequency error is also valid, accumulate it
+                * over the delta.
+                */
+               if (VMCLOCK_FIELD_PRESENT(st->clk, 
counter_period_maxerror_rate_frac_sec) &&
+                   (le64_to_cpu(st->clk->flags) & 
VMCLOCK_FLAG_PERIOD_MAXERROR_VALID)) {
+                       uint64_t maxerror_rate, err_hi, err_frac, growth_ns;
+
+                       if (st->clk->counter_period_shift >= 128) {
+                               maxerror_ns = U64_MAX;
+                               goto saturate;
+                       }
+
+                       maxerror_rate = 
le64_to_cpu(st->clk->counter_period_maxerror_rate_frac_sec);
+                       err_frac = mul_u64_u64_shr_add_u64(&err_hi, delta,
+                                                          maxerror_rate,
+                                                          
st->clk->counter_period_shift,
+                                                          0);
+
+                       if (err_hi > U64_MAX / NSEC_PER_SEC) {
+                               maxerror_ns = U64_MAX;
+                               goto saturate;
+                       }
+
+                       growth_ns = (err_hi * NSEC_PER_SEC) +
+                                   mul_u64_u64_shr(err_frac, NSEC_PER_SEC, 64);
+
+                       /* Guard against overflow */
+                       if (U64_MAX - growth_ns < maxerror_ns)
+                               maxerror_ns = U64_MAX;
+                       else
+                               maxerror_ns += growth_ns;
+               }
+       }
+
+saturate:
+       /* PTP UAPI error_bound is 32-bit nanoseconds */
+       att->error_bound = (maxerror_ns > UINT_MAX) ?
+               UINT_MAX : (uint32_t)maxerror_ns;
+       att->timescale = vmclock_get_ptp_timescale(st->clk->time_type);
+       /* tai_adjust() already converted UTC to TAI before we're called */
+       if (st->clk->time_type == VMCLOCK_TIME_UTC)
+               att->timescale = PTP_TIMESCALE_TAI;
+       att->status = vmclock_get_ptp_status(st->clk->clock_status);
+
+       att->counter_value = cycle;
+       switch (st->cs_id) {
+       case CSID_X86_TSC:
+               att->counter_id = PTP_COUNTER_X86_TSC;
+               break;
+       case CSID_ARM_ARCH_COUNTER:
+               att->counter_id = PTP_COUNTER_ARM_ARCH;
+               break;
+       default:
+               att->counter_id = PTP_COUNTER_UNKNOWN;
+               break;
+       }
+}
+
 static int vmclock_get_crosststamp(struct vmclock_state *st,
                                   struct ptp_system_timestamp *sts,
                                   struct system_counterval_t *system_counter,
-                                  struct timespec64 *tspec)
+                                  struct timespec64 *tspec,
+                                  struct ptp_clock_attributes *attrs)
 {
        ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
        struct system_time_snapshot systime_snapshot;
        uint64_t cycle, delta, seq, frac_sec;
+       uint8_t clock_status = VMCLOCK_STATUS_UNKNOWN;
 
 #ifdef CONFIG_X86
        /*
@@ -122,9 +242,6 @@ static int vmclock_get_crosststamp(struct vmclock_state *st,
                 */
                virt_rmb();
 
-               if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
-                       return -EINVAL;
-
                /*
                 * When invoked for gettimex64(), fill in the pre/post system
                 * times. The simple case is when system time is based on the
@@ -163,6 +280,18 @@ static int vmclock_get_crosststamp(struct vmclock_state 
*st,
                if (!tai_adjust(st->clk, &tspec->tv_sec))
                        return -EINVAL;
 
+               /*
+                * Capture clock state inside the seq_count loop for a
+                * consistent snapshot with the timestamp. The attrs path
+                * reports it to userspace via the status field; the legacy
+                * path saves it for the UNRELIABLE check after the loop.
+                */
+               if (attrs)
+                       vmclock_populate_ptp_attributes(st, attrs, delta,
+                                                       cycle);
+               else
+                       clock_status = st->clk->clock_status;
+
                /*
                 * This pairs with a write barrier in the hypervisor
                 * which populates this structure.
@@ -186,6 +315,17 @@ static int vmclock_get_crosststamp(struct vmclock_state 
*st,
                        sts->post_ts = sts->pre_ts;
        }
 
+       /*
+        * If attrs is set, attributes were already populated inside the
+        * seq_count loop. Return success even for UNRELIABLE — the attrs
+        * ioctl can report the status to userspace.
+        */
+       if (attrs)
+               return 0;
+
+       if (clock_status == VMCLOCK_STATUS_UNRELIABLE)
+               return -EINVAL;
+
        return 0;
 }
 
@@ -198,7 +338,8 @@ static int vmclock_get_crosststamp(struct vmclock_state *st,
 static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st,
                                            struct ptp_system_timestamp *sts,
                                            struct system_counterval_t 
*system_counter,
-                                           struct timespec64 *tspec)
+                                           struct timespec64 *tspec,
+                                           struct ptp_clock_attributes *attrs)
 {
        struct pvclock_vcpu_time_info *pvti = this_cpu_pvti();
        unsigned int pvti_ver;
@@ -209,7 +350,8 @@ static int vmclock_get_crosststamp_kvmclock(struct 
vmclock_state *st,
        do {
                pvti_ver = pvclock_read_begin(pvti);
 
-               ret = vmclock_get_crosststamp(st, sts, system_counter, tspec);
+               ret = vmclock_get_crosststamp(st, sts, system_counter, tspec,
+                                            attrs);
                if (ret)
                        break;
 
@@ -238,17 +380,19 @@ static int ptp_vmclock_get_time_fn(ktime_t *device_time,
                                   struct system_counterval_t *system_counter,
                                   void *ctx)
 {
-       struct vmclock_state *st = ctx;
+       struct vmclock_crosststamp_ctx *vctx = ctx;
+       struct vmclock_state *st = vctx->st;
        struct timespec64 tspec;
        int ret;
 
 #ifdef SUPPORT_KVMCLOCK
        if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK)
                ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter,
-                                                      &tspec);
+                                                      &tspec, vctx->attrs);
        else
 #endif
-               ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec);
+               ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec,
+                                            vctx->attrs);
 
        if (!ret)
                *device_time = timespec64_to_ktime(tspec);
@@ -256,12 +400,11 @@ static int ptp_vmclock_get_time_fn(ktime_t *device_time,
        return ret;
 }
 
-static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
-                                     struct system_device_crosststamp *xtstamp)
+static int ptp_vmclock_do_getcrosststamp(struct vmclock_crosststamp_ctx *vctx,
+                                        struct system_device_crosststamp 
*xtstamp)
 {
-       struct vmclock_state *st = container_of(ptp, struct vmclock_state,
-                                               ptp_clock_info);
-       int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st,
+       struct vmclock_state *st = vctx->st;
+       int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, vctx,
                                                NULL, xtstamp);
 #ifdef SUPPORT_KVMCLOCK
        /*
@@ -278,13 +421,23 @@ static int ptp_vmclock_getcrosststamp(struct 
ptp_clock_info *ptp,
                    systime_snapshot.cs_id == CSID_X86_KVM_CLK) {
                        WRITE_ONCE(st->sys_cs_id, systime_snapshot.cs_id);
                        ret = 
get_device_system_crosststamp(ptp_vmclock_get_time_fn,
-                                                           st, NULL, xtstamp);
+                                                           vctx, NULL, 
xtstamp);
                }
        }
 #endif
        return ret;
 }
 
+static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
+                                     struct system_device_crosststamp *xtstamp)
+{
+       struct vmclock_state *st = container_of(ptp, struct vmclock_state,
+                                               ptp_clock_info);
+       struct vmclock_crosststamp_ctx vctx = { .st = st };
+
+       return ptp_vmclock_do_getcrosststamp(&vctx, xtstamp);
+}
+
 /*
  * PTP clock operations
  */
@@ -311,7 +464,29 @@ static int ptp_vmclock_gettimex(struct ptp_clock_info 
*ptp, struct timespec64 *t
        struct vmclock_state *st = container_of(ptp, struct vmclock_state,
                                                ptp_clock_info);
 
-       return vmclock_get_crosststamp(st, sts, NULL, ts);
+       return vmclock_get_crosststamp(st, sts, NULL, ts, NULL);
+}
+
+static int ptp_vmclock_gettimexattrs(struct ptp_clock_info *ptp,
+                                    struct timespec64 *ts,
+                                    struct ptp_system_timestamp *sts,
+                                    struct ptp_clock_attributes *att)
+{
+       struct vmclock_state *st = container_of(ptp, struct vmclock_state,
+                                               ptp_clock_info);
+
+       return vmclock_get_crosststamp(st, sts, NULL, ts, att);
+}
+
+static int ptp_vmclock_getcrosststampattrs(struct ptp_clock_info *ptp,
+                                          struct system_device_crosststamp 
*xtstamp,
+                                          struct ptp_clock_attributes *att)
+{
+       struct vmclock_state *st = container_of(ptp, struct vmclock_state,
+                                               ptp_clock_info);
+       struct vmclock_crosststamp_ctx vctx = { .st = st, .attrs = att };
+
+       return ptp_vmclock_do_getcrosststamp(&vctx, xtstamp);
 }
 
 static int ptp_vmclock_enable(struct ptp_clock_info *ptp,
@@ -329,9 +504,11 @@ static const struct ptp_clock_info ptp_vmclock_info = {
        .adjfine        = ptp_vmclock_adjfine,
        .adjtime        = ptp_vmclock_adjtime,
        .gettimex64     = ptp_vmclock_gettimex,
+       .gettimexattrs64 = ptp_vmclock_gettimexattrs,
        .settime64      = ptp_vmclock_settime,
        .enable         = ptp_vmclock_enable,
        .getcrosststamp = ptp_vmclock_getcrosststamp,
+       .getcrosststampattrs = ptp_vmclock_getcrosststampattrs,
 };
 
 static struct ptp_clock *vmclock_ptp_register(struct device *dev,
-- 
2.47.3


Reply via email to