Module: xenomai-3
Branch: next
Commit: 8caf3444a6158f677c4297ff689a7247ed34a53b
URL:    
http://git.xenomai.org/?p=xenomai-3.git;a=commit;h=8caf3444a6158f677c4297ff689a7247ed34a53b

Author: Philippe Gerum <r...@xenomai.org>
Date:   Tue Sep 30 12:08:11 2014 +0200

drivers/autotune: change calibration method

Apply three incremental filters for finding the best gravity value
among the set of data produced by calibration runs. Each run is
obtained by increasing the reference gravity value by steps of 500 ns,
up to 20 us (i.e. 40 steps max).

The gravity picked by the autotuner has produced the set of samples
having the best (smallest) standard deviation, out of the set of
samples ordered by the increasing minimum latency value observed,
among the set of samples having the smallest pondered mean.

---

 kernel/drivers/autotune/autotune.c |  327 ++++++++++++++++++++++--------------
 1 file changed, 205 insertions(+), 122 deletions(-)

diff --git a/kernel/drivers/autotune/autotune.c 
b/kernel/drivers/autotune/autotune.c
index 0b4204a..61649f5 100644
--- a/kernel/drivers/autotune/autotune.c
+++ b/kernel/drivers/autotune/autotune.c
@@ -19,6 +19,8 @@
  */
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/sort.h>
 #include <cobalt/kernel/arith.h>
 #include <rtdm/driver.h>
 #include <rtdm/autotune.h>
@@ -27,25 +29,38 @@
  * Auto-tuning services for the Cobalt core clock.  This driver is
  * always built statically into the kernel when enabled.
  */
-#define AUTOTUNE_STEPS  60
-#define ONE_SECOND     1000000000UL
-#define H2G2_FACTOR(g) ((g) * 4 / 5)   /* 42 would be too pessimistic */
+#define SAMPLING_TIME  500000000UL
+#define LOG_TIMESPAN   20000U  /* ns */
+#define BUCKET_TIMESPAN        500U    /* ns */
+#define NR_BUCKETS     (LOG_TIMESPAN / BUCKET_TIMESPAN)
+#define WARMUP_STEPS   3
+#define AUTOTUNE_STEPS  NR_BUCKETS
+
+struct tuning_score {
+       unsigned int pmean;
+       unsigned int stddev;
+       unsigned int minlat;
+       unsigned int gravity;
+       int step;
+};
 
 struct tuner_state {
        xnticks_t ideal;
        xnticks_t step;
-       xnsticks_t min_lat;
-       xnsticks_t max_lat;
-       xnsticks_t sum_lat;
-       unsigned long cur_samples;
-       unsigned long max_samples;
+       int min_lat;
+       int max_lat;
+       unsigned int pow_sum_avg;
+       unsigned int mean;
+       unsigned int cur_samples;
+       unsigned int max_samples;
+       unsigned int log[NR_BUCKETS];
 };
 
 struct gravity_tuner {
        const char *name;
-       unsigned long (*get_gravity)(struct gravity_tuner *tuner);
-       void (*set_gravity)(struct gravity_tuner *tuner, unsigned long gravity);
-       void (*adjust_gravity)(struct gravity_tuner *tuner, long adjust);
+       unsigned int (*get_gravity)(struct gravity_tuner *tuner);
+       void (*set_gravity)(struct gravity_tuner *tuner, unsigned int gravity);
+       unsigned int (*adjust_gravity)(struct gravity_tuner *tuner, int adjust);
        int (*init_tuner)(struct gravity_tuner *tuner);
        int (*start_tuner)(struct gravity_tuner *tuner, xnticks_t start_time,
                           xnticks_t interval);
@@ -53,6 +68,9 @@ struct gravity_tuner {
        struct tuner_state state;
        rtdm_event_t done;
        int status;
+       int quiet;
+       struct tuning_score scores[AUTOTUNE_STEPS];
+       int nscores;
 };
 
 struct irq_gravity_tuner {
@@ -100,20 +118,27 @@ static inline void done_sampling(struct gravity_tuner 
*tuner,
 static int add_sample(struct gravity_tuner *tuner, xnticks_t timestamp)
 {
        struct tuner_state *state;
-       xnsticks_t delta;
+       unsigned int b, n;
+       int delta;
 
        state = &tuner->state;
 
-       delta = (xnsticks_t)(timestamp - state->ideal);
+       delta = (int)(timestamp - state->ideal);
        if (delta < state->min_lat)
                state->min_lat = delta;
        if (delta > state->max_lat)
                state->max_lat = delta;
 
-       state->sum_lat += delta;
        state->ideal += state->step;
+       n = ++state->cur_samples;
+
+       b = (delta < 0 ? -delta : delta) / BUCKET_TIMESPAN;
+       state->log[b < NR_BUCKETS ? b : NR_BUCKETS - 1]++;
+       /* Build running mean and power sum average for stddev. */
+       state->mean += (delta - state->mean) / n;
+       state->pow_sum_avg += (delta * delta - state->pow_sum_avg) / n;
 
-       if (++state->cur_samples >= state->max_samples) {
+       if (n >= state->max_samples) {
                done_sampling(tuner, 0);
                return 1;       /* Finished. */
        }
@@ -157,19 +182,19 @@ static void destroy_irq_tuner(struct gravity_tuner *tuner)
        destroy_tuner(tuner);
 }
 
-static unsigned long get_irq_gravity(struct gravity_tuner *tuner)
+static unsigned int get_irq_gravity(struct gravity_tuner *tuner)
 {
        return nkclock.gravity.irq;
 }
 
-static void set_irq_gravity(struct gravity_tuner *tuner, unsigned long gravity)
+static void set_irq_gravity(struct gravity_tuner *tuner, unsigned int gravity)
 {
        nkclock.gravity.irq = gravity;
 }
 
-static void adjust_irq_gravity(struct gravity_tuner *tuner, long adjust)
+static unsigned int adjust_irq_gravity(struct gravity_tuner *tuner, int adjust)
 {
-       nkclock.gravity.irq += adjust;
+       return nkclock.gravity.irq += adjust;
 }
 
 static int start_irq_tuner(struct gravity_tuner *tuner,
@@ -216,8 +241,8 @@ void task_handler(void *arg)
 
                for (;;) {
                        ret = rtdm_task_wait_period();
-                       if (ret)
-                               break;
+                       if (ret && ret != -ETIMEDOUT)
+                               goto out;
 
                        now = xnclock_read_raw(&nkclock);
                        if (add_sample(&k_tuner->tuner, now)) {
@@ -226,7 +251,7 @@ void task_handler(void *arg)
                        }
                }
        }
-
+out:
        done_sampling(&k_tuner->tuner, ret);
        rtdm_task_destroy(&k_tuner->task);
 }
@@ -253,19 +278,19 @@ static void destroy_kthread_tuner(struct gravity_tuner 
*tuner)
        rtdm_event_destroy(&k_tuner->barrier);
 }
 
-static unsigned long get_kthread_gravity(struct gravity_tuner *tuner)
+static unsigned int get_kthread_gravity(struct gravity_tuner *tuner)
 {
        return nkclock.gravity.kernel;
 }
 
-static void set_kthread_gravity(struct gravity_tuner *tuner, unsigned long 
gravity)
+static void set_kthread_gravity(struct gravity_tuner *tuner, unsigned int 
gravity)
 {
        nkclock.gravity.kernel = gravity;
 }
 
-static void adjust_kthread_gravity(struct gravity_tuner *tuner, long adjust)
+static unsigned int adjust_kthread_gravity(struct gravity_tuner *tuner, int 
adjust)
 {
-       nkclock.gravity.kernel += adjust;
+       return nkclock.gravity.kernel += adjust;
 }
 
 static int start_kthread_tuner(struct gravity_tuner *tuner,
@@ -328,19 +353,19 @@ static void destroy_uthread_tuner(struct gravity_tuner 
*tuner)
        rtdm_event_destroy(&u_tuner->pulse);
 }
 
-static unsigned long get_uthread_gravity(struct gravity_tuner *tuner)
+static unsigned int get_uthread_gravity(struct gravity_tuner *tuner)
 {
        return nkclock.gravity.user;
 }
 
-static void set_uthread_gravity(struct gravity_tuner *tuner, unsigned long 
gravity)
+static void set_uthread_gravity(struct gravity_tuner *tuner, unsigned int 
gravity)
 {
        nkclock.gravity.user = gravity;
 }
 
-static void adjust_uthread_gravity(struct gravity_tuner *tuner, long adjust)
+static unsigned int adjust_uthread_gravity(struct gravity_tuner *tuner, int 
adjust)
 {
-       nkclock.gravity.user += adjust;
+       return nkclock.gravity.user += adjust;
 }
 
 static int start_uthread_tuner(struct gravity_tuner *tuner,
@@ -385,35 +410,124 @@ struct uthread_gravity_tuner uthread_tuner = {
        },
 };
 
-static int tune_gravity(struct gravity_tuner *tuner, int period, int quiet)
+static inline void build_score(struct gravity_tuner *tuner, int step)
 {
-       unsigned long old_gravity, gravity;
-       struct tuner_state *state;
-       int ret, step, wedge;
-       xnsticks_t minlat;
-       long adjust;
+       struct tuner_state *state = &tuner->state;
+       unsigned int sum, variance, n, b;
+
+       for (b = sum = n = 0; b < NR_BUCKETS; b++) {
+               sum += (b * BUCKET_TIMESPAN + BUCKET_TIMESPAN / 2) * 
state->log[b];
+               n += state->log[b];
+       }
+
+       tuner->scores[step].pmean = sum / n;
+       variance = (state->pow_sum_avg * n - n *
+                   state->mean * state->mean) / (n - 1);
+       tuner->scores[step].stddev = int_sqrt(variance);
+       tuner->scores[step].minlat = state->min_lat;
+       tuner->scores[step].gravity = tuner->get_gravity(tuner);
+       tuner->scores[step].step = step;
+       tuner->nscores++;
+}
+
+#if XENO_DEBUG(COBALT)
+#define progress(__tuner, __fmt, __args...)                            \
+       do {                                                            \
+               if (!(__tuner)->quiet)                                  \
+                       printk(XENO_INFO "autotune(%s) " __fmt "\n",    \
+                              (__tuner)->name, ##__args);              \
+       } while (0)
+#else
+#define progress(__tuner, __fmt, __args...)
+#endif
+
+static int cmp_score_mean(const void *c, const void *r)
+{
+       const struct tuning_score *sc = c, *sr = r;
+       return (int)(sc->pmean - sr->pmean);
+}
+
+static int cmp_score_minlat(const void *c, const void *r)
+{
+       const struct tuning_score *sc = c, *sr = r;
+       return (int)(sc->minlat - sr->minlat);
+}
+
+static int cmp_score_stddev(const void *c, const void *r)
+{
+       const struct tuning_score *sc = c, *sr = r;
+       return (int)(sc->stddev - sr->stddev);
+}
+
+static int filter_mean(struct gravity_tuner *tuner)
+{
+       int n;
+
+       sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
+            cmp_score_mean, NULL);
+
+       for (n = 1; n < tuner->nscores; n++) {
+               if (tuner->scores[n].pmean >
+                   tuner->scores[0].pmean + tuner->scores[0].pmean / 10)
+                       break;
+       }
+
+       return n;
+}
+
+static int filter_minlat(struct gravity_tuner *tuner)
+{
+       int n;
+
+       sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
+            cmp_score_minlat, NULL);
+
+       for (n = 1; n < tuner->nscores; n++) {
+               if (tuner->scores[n].minlat >
+                   tuner->scores[0].minlat + tuner->scores[0].minlat / 10)
+                       break;
+       }
+
+       return n;
+}
+
+static int filter_stddev(struct gravity_tuner *tuner)
+{
+       sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
+            cmp_score_stddev, NULL);
+
+       return 1;
+}
+
+static inline void filter_score(struct gravity_tuner *tuner,
+                               int (*filter)(struct gravity_tuner *tuner))
+{
+       tuner->nscores = filter(tuner);
+}
+
+static int tune_gravity(struct gravity_tuner *tuner, int period)
+{
+       struct tuner_state *state = &tuner->state;
+       unsigned int orig_gravity, gravity_limit;
+       int ret, step, adjust;
 
-       state = &tuner->state;
        state->step = xnclock_ns_to_ticks(&nkclock, period);
-       state->max_samples = ONE_SECOND / (period ?: 1);
-       minlat = xnclock_ns_to_ticks(&nkclock, ONE_SECOND);
-       old_gravity = tuner->get_gravity(tuner);
+       state->max_samples = SAMPLING_TIME / (period ?: 1);
+       orig_gravity = tuner->get_gravity(tuner);
        tuner->set_gravity(tuner, 0);
-       gravity = 0;
-       wedge = 0;
-
-       /*
-        * The tuning process is basic: we run a latency test for one
-        * second, increasing the clock gravity value by 2/3rd until
-        * we reach the wedge value or cause early shots, whichever
-        * comes first.
-        */
-       for (step = 1; step <= AUTOTUNE_STEPS; step++) {
+       tuner->nscores = 0;
+       adjust = xnclock_ns_to_ticks(&nkclock, BUCKET_TIMESPAN);
+       gravity_limit = AUTOTUNE_STEPS * adjust;
+       progress(tuner, "warming up...");
+
+       for (step = 0; step < WARMUP_STEPS + AUTOTUNE_STEPS; step++) {
                state->ideal = xnclock_read_raw(&nkclock) + state->step * 3;
-               state->min_lat = xnclock_ns_to_ticks(&nkclock, ONE_SECOND);
+               state->min_lat = xnclock_ns_to_ticks(&nkclock, SAMPLING_TIME);
                state->max_lat = 0;
-               state->sum_lat = 0;
+               state->mean = 0;
+               state->pow_sum_avg = 0;
                state->cur_samples = 0;
+               memset(state->log, 0, sizeof(state->log));
 
                ret = tuner->start_tuner(tuner,
                                         xnclock_ticks_to_ns(&nkclock, 
state->ideal),
@@ -430,84 +544,55 @@ static int tune_gravity(struct gravity_tuner *tuner, int 
period, int quiet)
                if (ret)
                        goto fail;
 
-               if (state->min_lat <= 0) {
-                       if (!quiet)
-                               printk(XENO_WARN
-                              "auto-tuning[%s]: early shot by %Ld ns"
-                              ": disabling gravity\n",
-                              tuner->name,
-                              xnclock_ticks_to_ns(&nkclock, state->min_lat));
-                       gravity = 0;
-                       minlat = 0;
-                       goto done;
+               if (step < WARMUP_STEPS) {
+                       if (step == WARMUP_STEPS - 1 && state->min_lat >= 0)
+                               gravity_limit = state->min_lat;
+                       continue;
                }
 
-               /*
-                * If we detect worse latencies with smaller gravity
-                * values across consecutive tests, we assume the
-                * former is our wedge value.  Retry and confirm it 5
-                * times before stopping.
-                */
-               if (state->min_lat > minlat) {
-#ifdef CONFIG_XENO_OPT_DEBUG_NUCLEUS
-                       if (!quiet)
-                               printk(XENO_INFO "autotune[%s]: "
-                                      "at wedge (min_ns %Ld => %Ld), "
-                                      "gravity reset to %Ld ns\n",
+               if (state->min_lat < 0) {
+                       if (tuner->get_gravity(tuner) == 0) {
+                               printk(XENO_WARN
+                                      "autotune(%s) failed with early shot 
(%Ld ns)\n",
                                       tuner->name,
-                                      xnclock_ticks_to_ns(&nkclock, minlat),
-                                      xnclock_ticks_to_ns(&nkclock, 
state->min_lat),
-                                      xnclock_ticks_to_ns(&nkclock, gravity));
-#endif
-                       if (++wedge >= 5)
-                               goto done;
-                       tuner->set_gravity(tuner, gravity);
-                       continue;
+                                      xnclock_ticks_to_ns(&nkclock, 
state->min_lat));
+                               ret = -EAGAIN;
+                               goto fail;
+                       }
+                       break;
                }
 
+               if (((step - WARMUP_STEPS) % 5) == 0)
+                       progress(tuner, "calibrating... (slice %d)",
+                                (step - WARMUP_STEPS) / 5 + 1);
+
+               build_score(tuner, step - WARMUP_STEPS);
+
                /*
-                * We seem to have a margin for compensating even
-                * more, increase the gravity value by a 3rd for next
-                * round.
+                * Anticipating more than the minimum latency detected
+                * at warmup would make no sense: cap the gravity we
+                * may try.
                 */
-               minlat = state->min_lat;
-               adjust = (long)xnarch_llimd(minlat, 2, 3);
-               if (adjust == 0)
-                       goto done;
-
-               gravity = tuner->get_gravity(tuner);
-               tuner->adjust_gravity(tuner, adjust);
-#ifdef CONFIG_XENO_OPT_DEBUG_NUCLEUS
-               if (!quiet)
-                       printk(XENO_INFO "autotune[%s]: min=%Ld | max=%Ld |"
-                              "avg=%Ld | gravity(%lut + adj=%ldt)\n",
-                              tuner->name,
-                              xnclock_ticks_to_ns(&nkclock, minlat),
-                              xnclock_ticks_to_ns(&nkclock, state->max_lat),
-                              xnclock_ticks_to_ns(&nkclock,
-                                  xnarch_llimd(state->sum_lat, 1,
-                                               (state->cur_samples ?: 1))),
-                      gravity, adjust);
-#endif
+               if (tuner->adjust_gravity(tuner, adjust) > gravity_limit)
+                       break;
        }
 
-       printk(XENO_ERR "could not auto-tune (%s) after %ds\n",
-              tuner->name, AUTOTUNE_STEPS);
-
-       return -EINVAL;
-done:
-       tuner->set_gravity(tuner, H2G2_FACTOR(gravity));
-
-       if (!quiet)
+       filter_score(tuner, filter_mean);
+       filter_score(tuner, filter_minlat);
+       filter_score(tuner, filter_stddev);
+       tuner->set_gravity(tuner, tuner->scores[0].gravity);
+       if (!tuner->quiet)
                printk(XENO_INFO
-                      "auto-tuning[%s]: gravity_ns=%Ld, min_ns=%Ld\n",
+                      "autotune(%s) mean=%Ld, stddev=%Lu, gravity=%Lu (at step 
%d)\n",
                       tuner->name,
-                      xnclock_ticks_to_ns(&nkclock, tuner->get_gravity(tuner)),
-                      step > 1 ? xnclock_ticks_to_ns(&nkclock, minlat) : 0);
+                      xnclock_ticks_to_ns(&nkclock, tuner->scores[0].pmean),
+                      xnclock_ticks_to_ns(&nkclock, tuner->scores[0].stddev),
+                      xnclock_ticks_to_ns(&nkclock, tuner->scores[0].gravity),
+                      tuner->scores[0].step);
 
        return 0;
 fail:
-       tuner->set_gravity(tuner, old_gravity);
+       tuner->set_gravity(tuner, orig_gravity);
 
        return ret;
 }
@@ -563,8 +648,7 @@ static int autotune_ioctl_nrt(struct rtdm_fd *fd, unsigned 
int request, void *ar
        context->setup = setup;
 
        if (!setup.quiet)
-               printk(XENO_INFO "auto-tuning core clock gravity, %s\n",
-                      tuner->name);
+               printk(XENO_INFO "autotune(%s) started\n", tuner->name);
 
        return ret;
 }
@@ -574,7 +658,7 @@ static int autotune_ioctl_rt(struct rtdm_fd *fd, unsigned 
int request, void *arg
        struct autotune_context *context;
        struct gravity_tuner *tuner;
        nanosecs_abs_t timestamp;
-       unsigned long gravity;
+       unsigned int gravity;
        int ret;
 
        context = rtdm_fd_to_private(fd);
@@ -584,9 +668,8 @@ static int autotune_ioctl_rt(struct rtdm_fd *fd, unsigned 
int request, void *arg
 
        switch (request) {
        case AUTOTUNE_RTIOC_RUN:
-               ret = tune_gravity(tuner,
-                                  context->setup.period,
-                                  context->setup.quiet);
+               tuner->quiet = context->setup.quiet;
+               ret = tune_gravity(tuner, context->setup.period);
                if (ret)
                        break;
                gravity = xnclock_ticks_to_ns(&nkclock,


_______________________________________________
Xenomai-git mailing list
Xenomai-git@xenomai.org
http://www.xenomai.org/mailman/listinfo/xenomai-git

Reply via email to