Thanks! Merged to master at 5fa9f64d90ae..5aa00200baf7 (from, to]
You can see the entire diff with 'git diff' or at https://github.com/brho/akaros/compare/5fa9f64d90ae...5aa00200baf7 (p.s. rebuild busybox if you want the date command) On 2016-04-28 at 17:11 "'Michael Davidson' via Akaros" <[email protected]> wrote: > Add the date command to busybox > > Use scaled integer arithmetic for TSC cycles to nanoseconds > conversions. > > Add inline conversion functions from nanoseconds to timespec and > timeval and update existing tsc2timespec() interface to return its > result. > > Add a get_persistent_clock() interface to get the initial date and > time when the system is initialized. (Not yet implemented) > > Add very rudimentary timekeeping data to keep track of both initial > walltime in nsecs and initial TSC value. > > Use epoch_nsec() to implement gettimeofday() system call. > > Note: these changes should not cause any differences in system > behavior. Signed-off-by: Michael Davidson <[email protected]> > --- > kern/arch/riscv/time.c | 5 ++ > kern/arch/x86/kclock.c | 7 +- > kern/drivers/dev/kprof.c | 4 +- > kern/include/time.h | 68 ++++++++++++++---- > kern/src/init.c | 2 +- > kern/src/syscall.c | 29 ++------ > kern/src/time.c | 138 > +++++++++++++++--------------------- > kern/src/vfs.c | 46 ++++++------ > tools/apps/busybox/defconfig-1.17.3 | 2 +- 9 files changed, 156 > insertions(+), 145 deletions(-) > > diff --git a/kern/arch/riscv/time.c b/kern/arch/riscv/time.c > index cd5c403..c03b80e 100644 > --- a/kern/arch/riscv/time.c > +++ b/kern/arch/riscv/time.c > @@ -57,3 +57,8 @@ udelay(uint64_t usec) > } > else panic("udelay() was called before timer_init(), > moron!"); } > + > +uint64_t read_persistent_clock(void) > +{ > + return 1242129600 * 1000000000UL; /* nanwan's birthday */ > +} > diff --git a/kern/arch/x86/kclock.c b/kern/arch/x86/kclock.c > index 6f42dce..f5d3ae3 100644 > --- a/kern/arch/x86/kclock.c > +++ b/kern/arch/x86/kclock.c > @@ -9,7 +9,6 @@ > > #include <kclock.h> > > - > unsigned > mc146818_read(unsigned reg) > { > @@ -24,5 +23,7 @@ mc146818_write(unsigned reg, unsigned datum) > outb(IO_RTC+1, datum); > } > > - > - > +uint64_t read_persistent_clock(void) > +{ > + return 1242129600 * 1000000000UL; /* nanwan's birthday */ > +} > diff --git a/kern/drivers/dev/kprof.c b/kern/drivers/dev/kprof.c > index cb777db..0d66793 100644 > --- a/kern/drivers/dev/kprof.c > +++ b/kern/drivers/dev/kprof.c > @@ -348,7 +348,7 @@ static long mpstat_read(void *va, long n, int64_t > off) cpu_total += pcpui->state_ticks[j]; > cpu_total = MAX(cpu_total, 1); /* for the > divide later */ for (int j = 0; j < NR_CPU_STATES; j++) { > - tsc2timespec(pcpui->state_ticks[j], &ts); > + ts = tsc2timespec(pcpui->state_ticks[j]); > len += snprintf(buf + len, bufsz - len, > "%10d.%06d (%3d%%)%s", ts.tv_sec, ts.tv_nsec / 1000, > MIN((pcpui->state_ticks[j] * > 100) / cpu_total, 100), @@ -625,7 +625,7 @@ void trace_vprintk(bool > btrace, const char *fmt, va_list args) if (!atomic_cas(&tpb->in_use, > 0, 1)) return; > if (likely(__proc_global_info.tsc_freq)) > - tsc2timespec(read_tsc(), &ts_now); > + ts_now = tsc2timespec(read_tsc()); > snprintf(hdr, sizeof(hdr), "[%lu.%09lu]:cpu%d: ", > ts_now.tv_sec, ts_now.tv_nsec, core_id_early()); > > diff --git a/kern/include/time.h b/kern/include/time.h > index 909742c..ee88e30 100644 > --- a/kern/include/time.h > +++ b/kern/include/time.h > @@ -5,22 +5,66 @@ > #include <arch/time.h> > #include <ros/procinfo.h> > > -void train_timing(); > +/* Conversion factors */ > +#define NSEC_PER_SEC 1000000000L > +#define NSEC_PER_MSEC 1000000L > +#define NSEC_PER_USEC 1000L > + > +void time_init(void); > void udelay(uint64_t usec); /* done in arch-specific files */ > -uint64_t tsc2sec(uint64_t tsc_time); > -uint64_t tsc2msec(uint64_t tsc_time); > -uint64_t tsc2usec(uint64_t tsc_time); > + > +uint64_t read_persistent_clock(void); /* arch-specific */ > + > uint64_t tsc2nsec(uint64_t tsc_time); > -uint64_t sec2tsc(uint64_t sec); > -uint64_t msec2tsc(uint64_t msec); > -uint64_t usec2tsc(uint64_t usec); > +static inline uint64_t tsc2usec(uint64_t tsc_time) > +{ > + return tsc2nsec(tsc_time) / NSEC_PER_USEC; > +} > +static inline uint64_t tsc2msec(uint64_t tsc_time) > +{ > + return tsc2nsec(tsc_time) / NSEC_PER_MSEC; > +} > +static inline uint64_t tsc2sec(uint64_t tsc_time) > +{ > + return tsc2nsec(tsc_time) / NSEC_PER_SEC; > +} > + > uint64_t nsec2tsc(uint64_t nsec); > -uint64_t epoch_tsc(void); > -uint64_t epoch_sec(void); > -uint64_t epoch_msec(void); > -uint64_t epoch_usec(void); > +static inline uint64_t usec2tsc(uint64_t usec) > +{ > + return nsec2tsc(usec * NSEC_PER_USEC); > +} > +static inline uint64_t msec2tsc(uint64_t msec) > +{ > + return nsec2tsc(msec * NSEC_PER_MSEC); > +} > +static inline uint64_t sec2tsc(uint64_t sec) > +{ > + return nsec2tsc(sec * NSEC_PER_SEC); > +} > + > uint64_t epoch_nsec(void); > -void tsc2timespec(uint64_t tsc_time, struct timespec *ts); > + > +static inline struct timespec nsec2timespec(uint64_t ns) > +{ > + return (struct timespec) { > + .tv_sec = ns / NSEC_PER_SEC, > + .tv_nsec = ns % NSEC_PER_SEC > + }; > +} > + > +static inline struct timeval nsec2timeval(uint64_t ns) > +{ > + return (struct timeval) { > + .tv_sec = ns / NSEC_PER_SEC, > + .tv_usec = (ns % NSEC_PER_SEC) / NSEC_PER_USEC > + }; > +} > + > +static inline struct timespec tsc2timespec(uint64_t tsc_time) > +{ > + return nsec2timespec(tsc2nsec(tsc_time)); > +} > > /* Just takes a time measurement. Meant to be paired with > stop_timing. Use > * this if you don't want to muck with overheads or subtraction. */ > diff --git a/kern/src/init.c b/kern/src/init.c > index e75a80d..6f21ec6 100644 > --- a/kern/src/init.c > +++ b/kern/src/init.c > @@ -158,7 +158,7 @@ void kernel_init(multiboot_info_t *mboot_info) > timer_init(); > vfs_init(); > devfs_init(); > - train_timing(); > + time_init(); > kb_buf_init(&cons_buf); > arch_init(); > block_init(); > diff --git a/kern/src/syscall.c b/kern/src/syscall.c > index 852ad4e..dd26908 100644 > --- a/kern/src/syscall.c > +++ b/kern/src/syscall.c > @@ -47,11 +47,9 @@ static size_t systrace_fill_pretty_buf(struct > systrace_record *trace, bool entry) > { > size_t len = 0; > - struct timespec ts_start; > - struct timespec ts_end; > + struct timespec ts_start = > tsc2timespec(trace->start_timestamp); > + struct timespec ts_end = tsc2timespec(trace->end_timestamp); > > - tsc2timespec(trace->start_timestamp, &ts_start); > - tsc2timespec(trace->end_timestamp, &ts_end); > /* Slightly different formats between entry and exit. Entry > has retval set > * to ---, and begins with E. Exit begins with X. */ > if (entry) { > @@ -488,7 +486,7 @@ static int sys_nanosleep(struct proc *p, > * years, which should be sufficiently long enough to ensure > we don't > * overflow). */ > if (waserror()) { > - tsc2timespec(read_tsc() - tsc, &krem); > + krem = tsc2timespec(read_tsc() - tsc); > if (rem && memcpy_to_user(p, rem, &krem, > sizeof(struct timespec))) set_errno(EFAULT); > poperror(); > @@ -1988,26 +1986,9 @@ intreg_t sys_rmdir(struct proc *p, const char > *path, size_t path_l) > intreg_t sys_gettimeofday(struct proc *p, int *buf) > { > - static spinlock_t gtod_lock = SPINLOCK_INITIALIZER; > - static int t0 = 0; > + struct timeval tv = nsec2timeval(epoch_nsec()); > > - spin_lock(>od_lock); > - if(t0 == 0) > - > -#if (defined CONFIG_APPSERVER) > - t0 = ufe(time,0,0,0,0); > -#else > - // Nanwan's birthday, bitches!! > - t0 = 1242129600; > -#endif > - spin_unlock(>od_lock); > - > - long long dt = read_tsc(); > - /* TODO: This probably wants its own function, using a > struct timeval */ > - long kbuf[2] = {t0+dt/__proc_global_info.tsc_freq, > - > (dt%__proc_global_info.tsc_freq)*1000000/__proc_global_info.tsc_freq}; > - > - return memcpy_to_user_errno(p,buf,kbuf,sizeof(kbuf)); > + return memcpy_to_user_errno(p, buf, &tv, sizeof(tv)); > } > > intreg_t sys_tcgetattr(struct proc *p, int fd, void *termios_p) > diff --git a/kern/src/time.c b/kern/src/time.c > index a18578c..250e680 100644 > --- a/kern/src/time.c > +++ b/kern/src/time.c > @@ -12,7 +12,7 @@ > * attainable by using the TSC (or whatever timing source). > * > * For more detailed TSC measurements, use test_rdtsc() in > k/a/i/rdtsc_test.c */ -void train_timing() > +static void train_timing(void) > { > uint64_t min_overhead = UINT64_MAX; > uint64_t max_overhead = 0; > @@ -50,107 +50,87 @@ void timer_interrupt(struct hw_trapframe *hw_tf, > void *data) __trigger_tchain(&per_cpu_info[core_id()].tchain, hw_tf); > } > > -/* We can overflow/wraparound when we multiply up, but we have to > divide last, > - * or else we lose precision. If we're too big and will overflow, > we'll > - * sacrifice precision for correctness, and degrade to the next > lower level > - * (losing 3 digits worth). The recursive case shouldn't overflow, > since it > - * called something that scaled down the tsc_time by more than 1000. > */ -uint64_t tsc2sec(uint64_t tsc_time) > +/* > + * We use scaled integer arithmetic for converting between TSC clock > cycles > + * and nanoseconds. In each case we use a fixed shift value of 32 > which > + * gives a very high degree of accuracy. > + * > + * The actual scaling calculations rely on being able use the 128 bit > + * product of two unsigned 64 bit numbers as an intermediate result > + * in the calculation. Fortunately, on x86_64 at least, gcc's 128 bit > + * support is sufficiently good that it generates optimal code for > this > + * calculation without the need to write any assembler. > + */ > +static inline uint64_t mult_shift_64(uint64_t a, uint64_t b, uint8_t > shift) { > - return tsc_time / __proc_global_info.tsc_freq; > + return ((unsigned __int128)a * b) >> shift; > } > > -uint64_t tsc2msec(uint64_t tsc_time) > -{ > - if (mult_will_overflow_u64(tsc_time, 1000)) > - return tsc2sec(tsc_time) * 1000; > - else > - return (tsc_time * 1000) / > __proc_global_info.tsc_freq; -} > +static uint64_t cycles_to_nsec_mult; > +static uint64_t nsec_to_cycles_mult; > > -uint64_t tsc2usec(uint64_t tsc_time) > -{ > - if (mult_will_overflow_u64(tsc_time, 1000000)) > - return tsc2msec(tsc_time) * 1000; > - else > - return (tsc_time * 1000000) / > __proc_global_info.tsc_freq; -} > +#define CYCLES_TO_NSEC_SHIFT 32 > +#define NSEC_TO_CYCLES_SHIFT 32 > > -uint64_t tsc2nsec(uint64_t tsc_time) > +static void cycles_to_nsec_init(uint64_t tsc_freq_hz) > { > - if (mult_will_overflow_u64(tsc_time, 1000000000)) > - return tsc2usec(tsc_time) * 1000; > - else > - return (tsc_time * 1000000000) / > __proc_global_info.tsc_freq; > + cycles_to_nsec_mult = (NSEC_PER_SEC << > CYCLES_TO_NSEC_SHIFT) / tsc_freq_hz; } > > -uint64_t sec2tsc(uint64_t sec) > +static void nsec_to_cycles_init(uint64_t tsc_freq_hz) > { > - if (mult_will_overflow_u64(sec, > __proc_global_info.tsc_freq)) { > - /* in this case, we simply can't express the number > of ticks */ > - warn("Wraparound in sec2tsc(), rounding up"); > - return (uint64_t)(-1); > - } else { > - return sec * __proc_global_info.tsc_freq; > - } > -} > + uint64_t divisor = NSEC_PER_SEC; > > -uint64_t msec2tsc(uint64_t msec) > -{ > - if (mult_will_overflow_u64(msec, > __proc_global_info.tsc_freq)) > - return sec2tsc(msec / 1000); > - else > - return (msec * __proc_global_info.tsc_freq) / 1000; > + /* > + * In the unlikely event that the TSC frequency is greater > + * that (1 << 32) we have to lose a little precision to > + * avoid overflow in the calculation of the multiplier. > + */ > + while (tsc_freq_hz >= ((uint64_t)1 << NSEC_TO_CYCLES_SHIFT)) > { > + tsc_freq_hz >>= 1; > + divisor >>= 1; > + } > + nsec_to_cycles_mult = (tsc_freq_hz << > NSEC_TO_CYCLES_SHIFT) / divisor; } > > -uint64_t usec2tsc(uint64_t usec) > +uint64_t tsc2nsec(uint64_t tsc_time) > { > - if (mult_will_overflow_u64(usec, > __proc_global_info.tsc_freq)) > - return msec2tsc(usec / 1000); > - else > - return (usec * __proc_global_info.tsc_freq) / > 1000000; > + return mult_shift_64(tsc_time, cycles_to_nsec_mult, > CYCLES_TO_NSEC_SHIFT); } > > uint64_t nsec2tsc(uint64_t nsec) > { > - if (mult_will_overflow_u64(nsec, > __proc_global_info.tsc_freq)) > - return usec2tsc(nsec / 1000); > - else > - return (nsec * __proc_global_info.tsc_freq) / > 1000000000; > + return mult_shift_64(nsec, nsec_to_cycles_mult, > NSEC_TO_CYCLES_SHIFT); } > > -/* TODO: figure out what epoch time TSC == 0 is and store that as > boot_tsc */ -static uint64_t boot_sec = 1242129600; /* nanwan's > birthday */ - > -uint64_t epoch_tsc(void) > -{ > - return read_tsc() + sec2tsc(boot_sec); > -} > - > -uint64_t epoch_sec(void) > -{ > - return tsc2sec(epoch_tsc()); > -} > - > -uint64_t epoch_msec(void) > +/* > + * Rudimentary timekeeping implementation. > + * > + * Nothing here yet apart from the base walltime and TSC cycle values > + * at system init time. > + */ > +static struct { > + uint64_t walltime_ns_last; > + uint64_t tsc_cycles_last; > +} timekeeping; > + > + > +/* > + * Return nanoseconds since the UNIX epoch, 1st January, 1970. > + */ > +uint64_t epoch_nsec(void) > { > - return tsc2msec(epoch_tsc()); > + uint64_t cycles = read_tsc() - timekeeping.tsc_cycles_last; > + return timekeeping.walltime_ns_last + tsc2nsec(cycles); > } > > -uint64_t epoch_usec(void) > +void time_init(void) > { > - return tsc2usec(epoch_tsc()); > -} > + train_timing(); > > -uint64_t epoch_nsec(void) > -{ > - return tsc2nsec(epoch_tsc()); > -} > + timekeeping.walltime_ns_last = read_persistent_clock(); > + timekeeping.tsc_cycles_last = read_tsc(); > > -void tsc2timespec(uint64_t tsc_time, struct timespec *ts) > -{ > - ts->tv_sec = tsc2sec(tsc_time); > - /* subtract off everything but the remainder */ > - tsc_time -= sec2tsc(ts->tv_sec); > - ts->tv_nsec = tsc2nsec(tsc_time); > + cycles_to_nsec_init(__proc_global_info.tsc_freq); > + nsec_to_cycles_init(__proc_global_info.tsc_freq); > } > diff --git a/kern/src/vfs.c b/kern/src/vfs.c > index dac0468..a4dae4a 100644 > --- a/kern/src/vfs.c > +++ b/kern/src/vfs.c > @@ -1023,7 +1023,7 @@ void load_inode(struct dentry *dentry, unsigned > long ino) > * note we don't pass this an nd, like Linux does... */ > static struct inode *create_inode(struct dentry *dentry, int mode) > { > - uint64_t now = epoch_sec(); > + struct timespec now = nsec2timespec(epoch_nsec()); > /* note it is the i_ino that uniquely identifies a file in > the specific > * filesystem. there's a diff between creating an inode > (even for an in-use > * ino) and then filling it in, and vs creating a brand new > one. @@ -1036,12 +1036,12 @@ static struct inode *create_inode(struct > dentry *dentry, int mode) inode->i_nlink = 1; > inode->i_size = 0; > inode->i_blocks = 0; > - inode->i_atime.tv_sec = now; > - inode->i_ctime.tv_sec = now; > - inode->i_mtime.tv_sec = now; > - inode->i_atime.tv_nsec = 0; > - inode->i_ctime.tv_nsec = 0; > - inode->i_mtime.tv_nsec = 0; > + inode->i_atime.tv_sec = now.tv_sec; > + inode->i_ctime.tv_sec = now.tv_sec; > + inode->i_mtime.tv_sec = now.tv_sec; > + inode->i_atime.tv_nsec = now.tv_nsec; > + inode->i_ctime.tv_nsec = now.tv_nsec; > + inode->i_mtime.tv_nsec = now.tv_nsec; > inode->i_bdev = inode->i_sb->s_bdev; > /* when we have notions of users, do something here: */ > inode->i_uid = 0; > @@ -2057,7 +2057,7 @@ int do_rename(char *old_path, char *new_path) > struct dentry *old_d, *new_d, *unlink_d; > int error; > int retval = 0; > - uint64_t now; > + struct timespec now; > > nd_o->intent = LOOKUP_ACCESS; /* maybe, might need another > type */ > @@ -2186,15 +2186,15 @@ int do_rename(char *old_path, char *new_path) > dcache_put(old_dir_d->d_sb, old_d); > > /* TODO could have a helper for this, but it's going away > soon */ > - now = epoch_sec(); > - old_dir_i->i_ctime.tv_sec = now; > - old_dir_i->i_mtime.tv_sec = now; > - old_dir_i->i_ctime.tv_nsec = 0; > - old_dir_i->i_mtime.tv_nsec = 0; > - new_dir_i->i_ctime.tv_sec = now; > - new_dir_i->i_mtime.tv_sec = now; > - new_dir_i->i_ctime.tv_nsec = 0; > - new_dir_i->i_mtime.tv_nsec = 0; > + now = nsec2timespec(epoch_nsec()); > + old_dir_i->i_ctime.tv_sec = now.tv_sec; > + old_dir_i->i_mtime.tv_sec = now.tv_sec; > + old_dir_i->i_ctime.tv_nsec = now.tv_nsec; > + old_dir_i->i_mtime.tv_nsec = now.tv_nsec; > + new_dir_i->i_ctime.tv_sec = now.tv_sec; > + new_dir_i->i_mtime.tv_sec = now.tv_sec; > + new_dir_i->i_ctime.tv_nsec = now.tv_nsec; > + new_dir_i->i_mtime.tv_nsec = now.tv_nsec; > > /* fall-through */ > out_paths_and_refs: > @@ -2211,7 +2211,7 @@ out_old_path: > int do_truncate(struct inode *inode, off64_t len) > { > off64_t old_len; > - uint64_t now; > + struct timespec now; > if (len < 0) { > set_errno(EINVAL); > return -1; > @@ -2236,11 +2236,11 @@ int do_truncate(struct inode *inode, off64_t > len) pm_remove_contig(inode->i_mapping, old_len >> PGSHIFT, > (len >> PGSHIFT) - (old_len >> > PGSHIFT)); } > - now = epoch_sec(); > - inode->i_ctime.tv_sec = now; > - inode->i_mtime.tv_sec = now; > - inode->i_ctime.tv_nsec = 0; > - inode->i_mtime.tv_nsec = 0; > + now = nsec2timespec(epoch_nsec()); > + inode->i_ctime.tv_sec = now.tv_sec; > + inode->i_mtime.tv_sec = now.tv_sec; > + inode->i_ctime.tv_nsec = now.tv_nsec; > + inode->i_mtime.tv_nsec = now.tv_nsec; > return 0; > } > > diff --git a/tools/apps/busybox/defconfig-1.17.3 > b/tools/apps/busybox/defconfig-1.17.3 index e9577b1..d1c59c6 100644 > --- a/tools/apps/busybox/defconfig-1.17.3 > +++ b/tools/apps/busybox/defconfig-1.17.3 > @@ -163,7 +163,7 @@ CONFIG_FEATURE_CPIO_P=y > # > # CONFIG_BASENAME is not set > CONFIG_CAT=y > -# CONFIG_DATE is not set > +CONFIG_DATE=y > # CONFIG_FEATURE_DATE_ISOFMT is not set > # CONFIG_FEATURE_DATE_NANO is not set > # CONFIG_FEATURE_DATE_COMPAT is not set -- You received this message because you are subscribed to the Google Groups "Akaros" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. For more options, visit https://groups.google.com/d/optout.
