Thanks!

Merged to master at 5fa9f64d90ae..5aa00200baf7 (from, to]

You can see the entire diff with 'git diff' or at
https://github.com/brho/akaros/compare/5fa9f64d90ae...5aa00200baf7


(p.s. rebuild busybox if you want the date command)


On 2016-04-28 at 17:11 "'Michael Davidson' via Akaros"
<[email protected]> wrote:
> Add the date command to busybox
> 
> Use scaled integer arithmetic for TSC cycles to nanoseconds
> conversions.
> 
> Add inline conversion functions from nanoseconds to timespec and
> timeval and update existing tsc2timespec() interface to return its
> result.
> 
> Add a get_persistent_clock() interface to get the initial date and
> time when the system is initialized. (Not yet implemented)
> 
> Add very rudimentary timekeeping data to keep track of both initial
> walltime in nsecs and initial TSC value.
> 
> Use epoch_nsec() to implement gettimeofday() system call.
> 
> Note: these changes should not cause any differences in system
> behavior. Signed-off-by: Michael Davidson <[email protected]>
> ---
>  kern/arch/riscv/time.c              |   5 ++
>  kern/arch/x86/kclock.c              |   7 +-
>  kern/drivers/dev/kprof.c            |   4 +-
>  kern/include/time.h                 |  68 ++++++++++++++----
>  kern/src/init.c                     |   2 +-
>  kern/src/syscall.c                  |  29 ++------
>  kern/src/time.c                     | 138
> +++++++++++++++---------------------
> kern/src/vfs.c                      |  46 ++++++------
> tools/apps/busybox/defconfig-1.17.3 |   2 +- 9 files changed, 156
> insertions(+), 145 deletions(-)
> 
> diff --git a/kern/arch/riscv/time.c b/kern/arch/riscv/time.c
> index cd5c403..c03b80e 100644
> --- a/kern/arch/riscv/time.c
> +++ b/kern/arch/riscv/time.c
> @@ -57,3 +57,8 @@ udelay(uint64_t usec)
>       }
>       else panic("udelay() was called before timer_init(),
> moron!"); }
> +
> +uint64_t read_persistent_clock(void)
> +{
> +     return 1242129600 * 1000000000UL; /* nanwan's birthday */
> +}
> diff --git a/kern/arch/x86/kclock.c b/kern/arch/x86/kclock.c
> index 6f42dce..f5d3ae3 100644
> --- a/kern/arch/x86/kclock.c
> +++ b/kern/arch/x86/kclock.c
> @@ -9,7 +9,6 @@
>  
>  #include <kclock.h>
>  
> -
>  unsigned
>  mc146818_read(unsigned reg)
>  {
> @@ -24,5 +23,7 @@ mc146818_write(unsigned reg, unsigned datum)
>       outb(IO_RTC+1, datum);
>  }
>  
> -
> -
> +uint64_t read_persistent_clock(void)
> +{
> +     return 1242129600 * 1000000000UL; /* nanwan's birthday */
> +}
> diff --git a/kern/drivers/dev/kprof.c b/kern/drivers/dev/kprof.c
> index cb777db..0d66793 100644
> --- a/kern/drivers/dev/kprof.c
> +++ b/kern/drivers/dev/kprof.c
> @@ -348,7 +348,7 @@ static long mpstat_read(void *va, long n, int64_t
> off) cpu_total += pcpui->state_ticks[j];
>               cpu_total = MAX(cpu_total, 1);  /* for the
> divide later */ for (int j = 0; j < NR_CPU_STATES; j++) {
> -                     tsc2timespec(pcpui->state_ticks[j], &ts);
> +                     ts = tsc2timespec(pcpui->state_ticks[j]);
>                       len += snprintf(buf + len, bufsz - len,
> "%10d.%06d (%3d%%)%s", ts.tv_sec, ts.tv_nsec / 1000,
>                                       MIN((pcpui->state_ticks[j] *
> 100) / cpu_total, 100), @@ -625,7 +625,7 @@ void trace_vprintk(bool
> btrace, const char *fmt, va_list args) if (!atomic_cas(&tpb->in_use,
> 0, 1)) return;
>       if (likely(__proc_global_info.tsc_freq))
> -             tsc2timespec(read_tsc(), &ts_now);
> +             ts_now = tsc2timespec(read_tsc());
>       snprintf(hdr, sizeof(hdr), "[%lu.%09lu]:cpu%d: ",
> ts_now.tv_sec, ts_now.tv_nsec, core_id_early());
>  
> diff --git a/kern/include/time.h b/kern/include/time.h
> index 909742c..ee88e30 100644
> --- a/kern/include/time.h
> +++ b/kern/include/time.h
> @@ -5,22 +5,66 @@
>  #include <arch/time.h>
>  #include <ros/procinfo.h>
>  
> -void train_timing();
> +/* Conversion factors */
> +#define NSEC_PER_SEC 1000000000L
> +#define NSEC_PER_MSEC        1000000L
> +#define NSEC_PER_USEC        1000L
> +
> +void time_init(void);
>  void udelay(uint64_t usec);  /* done in arch-specific files */
> -uint64_t tsc2sec(uint64_t tsc_time);
> -uint64_t tsc2msec(uint64_t tsc_time);
> -uint64_t tsc2usec(uint64_t tsc_time);
> +
> +uint64_t read_persistent_clock(void);        /* arch-specific */
> +
>  uint64_t tsc2nsec(uint64_t tsc_time);
> -uint64_t sec2tsc(uint64_t sec);
> -uint64_t msec2tsc(uint64_t msec);
> -uint64_t usec2tsc(uint64_t usec);
> +static inline uint64_t tsc2usec(uint64_t tsc_time)
> +{
> +     return tsc2nsec(tsc_time) / NSEC_PER_USEC;
> +}
> +static inline uint64_t tsc2msec(uint64_t tsc_time)
> +{
> +     return tsc2nsec(tsc_time) / NSEC_PER_MSEC;
> +}
> +static inline uint64_t tsc2sec(uint64_t tsc_time)
> +{
> +     return tsc2nsec(tsc_time) / NSEC_PER_SEC;
> +}
> +
>  uint64_t nsec2tsc(uint64_t nsec);
> -uint64_t epoch_tsc(void);
> -uint64_t epoch_sec(void);
> -uint64_t epoch_msec(void);
> -uint64_t epoch_usec(void);
> +static inline uint64_t usec2tsc(uint64_t usec)
> +{
> +     return nsec2tsc(usec * NSEC_PER_USEC);
> +}
> +static inline uint64_t msec2tsc(uint64_t msec)
> +{
> +     return nsec2tsc(msec * NSEC_PER_MSEC);
> +}
> +static inline uint64_t sec2tsc(uint64_t sec)
> +{
> +     return nsec2tsc(sec * NSEC_PER_SEC);
> +}
> +
>  uint64_t epoch_nsec(void);
> -void tsc2timespec(uint64_t tsc_time, struct timespec *ts);
> +
> +static inline struct timespec nsec2timespec(uint64_t ns)
> +{
> +     return (struct timespec) {
> +             .tv_sec = ns / NSEC_PER_SEC,
> +             .tv_nsec = ns % NSEC_PER_SEC
> +     };
> +}
> +
> +static inline struct timeval nsec2timeval(uint64_t ns)
> +{
> +     return (struct timeval) {
> +             .tv_sec = ns / NSEC_PER_SEC,
> +             .tv_usec = (ns % NSEC_PER_SEC) / NSEC_PER_USEC
> +     };
> +}
> +
> +static inline struct timespec tsc2timespec(uint64_t tsc_time)
> +{
> +     return nsec2timespec(tsc2nsec(tsc_time));
> +}
>  
>  /* Just takes a time measurement.  Meant to be paired with
> stop_timing.  Use
>   * this if you don't want to muck with overheads or subtraction. */
> diff --git a/kern/src/init.c b/kern/src/init.c
> index e75a80d..6f21ec6 100644
> --- a/kern/src/init.c
> +++ b/kern/src/init.c
> @@ -158,7 +158,7 @@ void kernel_init(multiboot_info_t *mboot_info)
>       timer_init();
>       vfs_init();
>       devfs_init();
> -     train_timing();
> +     time_init();
>       kb_buf_init(&cons_buf);
>       arch_init();
>       block_init();
> diff --git a/kern/src/syscall.c b/kern/src/syscall.c
> index 852ad4e..dd26908 100644
> --- a/kern/src/syscall.c
> +++ b/kern/src/syscall.c
> @@ -47,11 +47,9 @@ static size_t systrace_fill_pretty_buf(struct
> systrace_record *trace, bool entry)
>  {
>       size_t len = 0;
> -     struct timespec ts_start;
> -     struct timespec ts_end;
> +     struct timespec ts_start =
> tsc2timespec(trace->start_timestamp);
> +     struct timespec ts_end = tsc2timespec(trace->end_timestamp);
>  
> -     tsc2timespec(trace->start_timestamp, &ts_start);
> -     tsc2timespec(trace->end_timestamp, &ts_end);
>       /* Slightly different formats between entry and exit.  Entry
> has retval set
>        * to ---, and begins with E.  Exit begins with X. */
>       if (entry) {
> @@ -488,7 +486,7 @@ static int sys_nanosleep(struct proc *p,
>        * years, which should be sufficiently long enough to ensure
> we don't
>        * overflow). */
>       if (waserror()) {
> -             tsc2timespec(read_tsc() - tsc, &krem);
> +             krem = tsc2timespec(read_tsc() - tsc);
>               if (rem && memcpy_to_user(p, rem, &krem,
> sizeof(struct timespec))) set_errno(EFAULT);
>               poperror();
> @@ -1988,26 +1986,9 @@ intreg_t sys_rmdir(struct proc *p, const char
> *path, size_t path_l) 
>  intreg_t sys_gettimeofday(struct proc *p, int *buf)
>  {
> -     static spinlock_t gtod_lock = SPINLOCK_INITIALIZER;
> -     static int t0 = 0;
> +     struct timeval tv = nsec2timeval(epoch_nsec());
>  
> -     spin_lock(&gtod_lock);
> -     if(t0 == 0)
> -
> -#if (defined CONFIG_APPSERVER)
> -     t0 = ufe(time,0,0,0,0);
> -#else
> -     // Nanwan's birthday, bitches!!
> -     t0 = 1242129600;
> -#endif
> -     spin_unlock(&gtod_lock);
> -
> -     long long dt = read_tsc();
> -     /* TODO: This probably wants its own function, using a
> struct timeval */
> -     long kbuf[2] = {t0+dt/__proc_global_info.tsc_freq,
> -
> (dt%__proc_global_info.tsc_freq)*1000000/__proc_global_info.tsc_freq};
> -
> -     return memcpy_to_user_errno(p,buf,kbuf,sizeof(kbuf));
> +     return memcpy_to_user_errno(p, buf, &tv, sizeof(tv));
>  }
>  
>  intreg_t sys_tcgetattr(struct proc *p, int fd, void *termios_p)
> diff --git a/kern/src/time.c b/kern/src/time.c
> index a18578c..250e680 100644
> --- a/kern/src/time.c
> +++ b/kern/src/time.c
> @@ -12,7 +12,7 @@
>   * attainable by using the TSC (or whatever timing source).
>   *
>   * For more detailed TSC measurements, use test_rdtsc() in
> k/a/i/rdtsc_test.c */ -void train_timing() 
> +static void train_timing(void)
>  {
>       uint64_t min_overhead = UINT64_MAX;
>       uint64_t max_overhead = 0;
> @@ -50,107 +50,87 @@ void timer_interrupt(struct hw_trapframe *hw_tf,
> void *data) __trigger_tchain(&per_cpu_info[core_id()].tchain, hw_tf);
>  }
>  
> -/* We can overflow/wraparound when we multiply up, but we have to
> divide last,
> - * or else we lose precision.  If we're too big and will overflow,
> we'll
> - * sacrifice precision for correctness, and degrade to the next
> lower level
> - * (losing 3 digits worth).  The recursive case shouldn't overflow,
> since it
> - * called something that scaled down the tsc_time by more than 1000.
> */ -uint64_t tsc2sec(uint64_t tsc_time)
> +/*
> + * We use scaled integer arithmetic for converting between TSC clock
> cycles
> + * and nanoseconds. In each case we use a fixed shift value of 32
> which
> + * gives a very high degree of accuracy.
> + *
> + * The actual scaling calculations rely on being able use the 128 bit
> + * product of two unsigned 64 bit numbers as an intermediate result
> + * in the calculation. Fortunately, on x86_64 at least, gcc's 128 bit
> + * support is sufficiently good that it generates optimal code for
> this
> + * calculation without the need to write any assembler.
> + */
> +static inline uint64_t mult_shift_64(uint64_t a, uint64_t b, uint8_t
> shift) {
> -     return tsc_time / __proc_global_info.tsc_freq;
> +     return ((unsigned __int128)a * b) >> shift;
>  }
>  
> -uint64_t tsc2msec(uint64_t tsc_time)
> -{
> -     if (mult_will_overflow_u64(tsc_time, 1000))
> -             return tsc2sec(tsc_time) * 1000;
> -     else
> -             return (tsc_time * 1000) /
> __proc_global_info.tsc_freq; -}
> +static uint64_t cycles_to_nsec_mult;
> +static uint64_t nsec_to_cycles_mult;
>  
> -uint64_t tsc2usec(uint64_t tsc_time)
> -{
> -     if (mult_will_overflow_u64(tsc_time, 1000000))
> -             return tsc2msec(tsc_time) * 1000;
> -     else
> -             return (tsc_time * 1000000) /
> __proc_global_info.tsc_freq; -}
> +#define CYCLES_TO_NSEC_SHIFT 32
> +#define NSEC_TO_CYCLES_SHIFT 32
>  
> -uint64_t tsc2nsec(uint64_t tsc_time)
> +static void cycles_to_nsec_init(uint64_t tsc_freq_hz)
>  {
> -     if (mult_will_overflow_u64(tsc_time, 1000000000))
> -             return tsc2usec(tsc_time) * 1000;
> -     else
> -             return (tsc_time * 1000000000) /
> __proc_global_info.tsc_freq;
> +     cycles_to_nsec_mult = (NSEC_PER_SEC <<
> CYCLES_TO_NSEC_SHIFT) / tsc_freq_hz; }
>  
> -uint64_t sec2tsc(uint64_t sec)
> +static void nsec_to_cycles_init(uint64_t tsc_freq_hz)
>  {
> -     if (mult_will_overflow_u64(sec,
> __proc_global_info.tsc_freq)) {
> -             /* in this case, we simply can't express the number
> of ticks */
> -             warn("Wraparound in sec2tsc(), rounding up");
> -             return (uint64_t)(-1);
> -     } else {
> -             return sec * __proc_global_info.tsc_freq;
> -     }
> -}
> +     uint64_t divisor = NSEC_PER_SEC;
>  
> -uint64_t msec2tsc(uint64_t msec)
> -{
> -     if (mult_will_overflow_u64(msec,
> __proc_global_info.tsc_freq))
> -             return sec2tsc(msec / 1000);
> -     else
> -             return (msec * __proc_global_info.tsc_freq) / 1000;
> +     /*
> +      * In the unlikely event that the TSC frequency is greater
> +      * that (1 << 32) we have to lose a little precision to
> +      * avoid overflow in the calculation of the multiplier.
> +      */
> +     while (tsc_freq_hz >= ((uint64_t)1 << NSEC_TO_CYCLES_SHIFT))
> {
> +             tsc_freq_hz >>= 1;
> +             divisor >>= 1;
> +     }
> +     nsec_to_cycles_mult = (tsc_freq_hz <<
> NSEC_TO_CYCLES_SHIFT) / divisor; }
>  
> -uint64_t usec2tsc(uint64_t usec)
> +uint64_t tsc2nsec(uint64_t tsc_time)
>  {
> -     if (mult_will_overflow_u64(usec,
> __proc_global_info.tsc_freq))
> -             return msec2tsc(usec / 1000);
> -     else
> -             return (usec * __proc_global_info.tsc_freq) /
> 1000000;
> +     return mult_shift_64(tsc_time, cycles_to_nsec_mult,
> CYCLES_TO_NSEC_SHIFT); }
>  
>  uint64_t nsec2tsc(uint64_t nsec)
>  {
> -     if (mult_will_overflow_u64(nsec,
> __proc_global_info.tsc_freq))
> -             return usec2tsc(nsec / 1000);
> -     else
> -             return (nsec * __proc_global_info.tsc_freq) /
> 1000000000;
> +     return mult_shift_64(nsec, nsec_to_cycles_mult,
> NSEC_TO_CYCLES_SHIFT); }
>  
> -/* TODO: figure out what epoch time TSC == 0 is and store that as
> boot_tsc */ -static uint64_t boot_sec = 1242129600; /* nanwan's
> birthday */ -
> -uint64_t epoch_tsc(void)
> -{
> -     return read_tsc() + sec2tsc(boot_sec);
> -}
> -
> -uint64_t epoch_sec(void)
> -{
> -     return tsc2sec(epoch_tsc());
> -}
> -
> -uint64_t epoch_msec(void)
> +/*
> + * Rudimentary timekeeping implementation.
> + *
> + * Nothing here yet apart from the base walltime and TSC cycle values
> + * at system init time.
> + */
> +static struct {
> +     uint64_t        walltime_ns_last;
> +     uint64_t        tsc_cycles_last;
> +} timekeeping;
> +
> +
> +/*
> + * Return nanoseconds since the UNIX epoch, 1st January, 1970.
> + */
> +uint64_t epoch_nsec(void)
>  {
> -     return tsc2msec(epoch_tsc());
> +     uint64_t cycles = read_tsc() - timekeeping.tsc_cycles_last;
> +     return timekeeping.walltime_ns_last + tsc2nsec(cycles);
>  }
>  
> -uint64_t epoch_usec(void)
> +void time_init(void)
>  {
> -     return tsc2usec(epoch_tsc());
> -}
> +     train_timing();
>  
> -uint64_t epoch_nsec(void)
> -{
> -     return tsc2nsec(epoch_tsc());
> -}
> +     timekeeping.walltime_ns_last = read_persistent_clock();
> +     timekeeping.tsc_cycles_last  = read_tsc();
>  
> -void tsc2timespec(uint64_t tsc_time, struct timespec *ts)
> -{
> -     ts->tv_sec = tsc2sec(tsc_time);
> -     /* subtract off everything but the remainder */
> -     tsc_time -= sec2tsc(ts->tv_sec);
> -     ts->tv_nsec = tsc2nsec(tsc_time);
> +     cycles_to_nsec_init(__proc_global_info.tsc_freq);
> +     nsec_to_cycles_init(__proc_global_info.tsc_freq);
>  }
> diff --git a/kern/src/vfs.c b/kern/src/vfs.c
> index dac0468..a4dae4a 100644
> --- a/kern/src/vfs.c
> +++ b/kern/src/vfs.c
> @@ -1023,7 +1023,7 @@ void load_inode(struct dentry *dentry, unsigned
> long ino)
>   * note we don't pass this an nd, like Linux does... */
>  static struct inode *create_inode(struct dentry *dentry, int mode)
>  {
> -     uint64_t now = epoch_sec();
> +     struct timespec now = nsec2timespec(epoch_nsec());
>       /* note it is the i_ino that uniquely identifies a file in
> the specific
>        * filesystem.  there's a diff between creating an inode
> (even for an in-use
>        * ino) and then filling it in, and vs creating a brand new
> one. @@ -1036,12 +1036,12 @@ static struct inode *create_inode(struct
> dentry *dentry, int mode) inode->i_nlink = 1;
>       inode->i_size = 0;
>       inode->i_blocks = 0;
> -     inode->i_atime.tv_sec = now;
> -     inode->i_ctime.tv_sec = now;
> -     inode->i_mtime.tv_sec = now;
> -     inode->i_atime.tv_nsec = 0;
> -     inode->i_ctime.tv_nsec = 0;
> -     inode->i_mtime.tv_nsec = 0;
> +     inode->i_atime.tv_sec = now.tv_sec;
> +     inode->i_ctime.tv_sec = now.tv_sec;
> +     inode->i_mtime.tv_sec = now.tv_sec;
> +     inode->i_atime.tv_nsec = now.tv_nsec;
> +     inode->i_ctime.tv_nsec = now.tv_nsec;
> +     inode->i_mtime.tv_nsec = now.tv_nsec;
>       inode->i_bdev = inode->i_sb->s_bdev;
>       /* when we have notions of users, do something here: */
>       inode->i_uid = 0;
> @@ -2057,7 +2057,7 @@ int do_rename(char *old_path, char *new_path)
>       struct dentry *old_d, *new_d, *unlink_d;
>       int error;
>       int retval = 0;
> -     uint64_t now;
> +     struct timespec now;
>  
>       nd_o->intent = LOOKUP_ACCESS; /* maybe, might need another
> type */ 
> @@ -2186,15 +2186,15 @@ int do_rename(char *old_path, char *new_path)
>       dcache_put(old_dir_d->d_sb, old_d);
>  
>       /* TODO could have a helper for this, but it's going away
> soon */
> -     now = epoch_sec();
> -     old_dir_i->i_ctime.tv_sec = now;
> -     old_dir_i->i_mtime.tv_sec = now;
> -     old_dir_i->i_ctime.tv_nsec = 0;
> -     old_dir_i->i_mtime.tv_nsec = 0;
> -     new_dir_i->i_ctime.tv_sec = now;
> -     new_dir_i->i_mtime.tv_sec = now;
> -     new_dir_i->i_ctime.tv_nsec = 0;
> -     new_dir_i->i_mtime.tv_nsec = 0;
> +     now = nsec2timespec(epoch_nsec());
> +     old_dir_i->i_ctime.tv_sec = now.tv_sec;
> +     old_dir_i->i_mtime.tv_sec = now.tv_sec;
> +     old_dir_i->i_ctime.tv_nsec = now.tv_nsec;
> +     old_dir_i->i_mtime.tv_nsec = now.tv_nsec;
> +     new_dir_i->i_ctime.tv_sec = now.tv_sec;
> +     new_dir_i->i_mtime.tv_sec = now.tv_sec;
> +     new_dir_i->i_ctime.tv_nsec = now.tv_nsec;
> +     new_dir_i->i_mtime.tv_nsec = now.tv_nsec;
>  
>       /* fall-through */
>  out_paths_and_refs:
> @@ -2211,7 +2211,7 @@ out_old_path:
>  int do_truncate(struct inode *inode, off64_t len)
>  {
>       off64_t old_len;
> -     uint64_t now;
> +     struct timespec now;
>       if (len < 0) {
>               set_errno(EINVAL);
>               return -1;
> @@ -2236,11 +2236,11 @@ int do_truncate(struct inode *inode, off64_t
> len) pm_remove_contig(inode->i_mapping, old_len >> PGSHIFT,
>                                (len >> PGSHIFT) - (old_len >>
> PGSHIFT)); }
> -     now = epoch_sec();
> -     inode->i_ctime.tv_sec = now;
> -     inode->i_mtime.tv_sec = now;
> -     inode->i_ctime.tv_nsec = 0;
> -     inode->i_mtime.tv_nsec = 0;
> +     now = nsec2timespec(epoch_nsec());
> +     inode->i_ctime.tv_sec = now.tv_sec;
> +     inode->i_mtime.tv_sec = now.tv_sec;
> +     inode->i_ctime.tv_nsec = now.tv_nsec;
> +     inode->i_mtime.tv_nsec = now.tv_nsec;
>       return 0;
>  }
>  
> diff --git a/tools/apps/busybox/defconfig-1.17.3
> b/tools/apps/busybox/defconfig-1.17.3 index e9577b1..d1c59c6 100644
> --- a/tools/apps/busybox/defconfig-1.17.3
> +++ b/tools/apps/busybox/defconfig-1.17.3
> @@ -163,7 +163,7 @@ CONFIG_FEATURE_CPIO_P=y
>  #
>  # CONFIG_BASENAME is not set
>  CONFIG_CAT=y
> -# CONFIG_DATE is not set
> +CONFIG_DATE=y
>  # CONFIG_FEATURE_DATE_ISOFMT is not set
>  # CONFIG_FEATURE_DATE_NANO is not set
>  # CONFIG_FEATURE_DATE_COMPAT is not set

-- 
You received this message because you are subscribed to the Google Groups 
"Akaros" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to