userland clock_gettime proof of concept

classic Classic list List threaded Threaded
203 messages Options
123456 ... 11
Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
> Date: Fri, 29 May 2020 17:51:50 +0300
> From: Paul Irofti <[hidden email]>
>
> On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 29 May 2020 13:45:37 +0100
> > > From: Stuart Henderson <[hidden email]>
> > >
> > > On 2020/05/29 13:50, Paul Irofti wrote:
> > > > +struct __timekeep {
> > > > + uint32_t major; /* version major number */
> > > > + uint32_t minor; /* version minor number */
> > > > +
> > > > + u_int64_t th_scale;
> > > > + unsigned int th_offset_count;
> > > > + struct bintime th_offset;
> > > > + struct bintime th_naptime;
> > > > + struct bintime th_boottime;
> > > > + volatile unsigned int th_generation;
> > > > +
> > > > + unsigned int tc_user;
> > > > + unsigned int tc_counter_mask;
> > > > +};
> > >
> > > Ah good, you got rid of u_int, that was causing problems with port builds.
> >
> > That in itself is a problem.  This means <time.h> is the wrong place
> > for this struct.  We need to find a better place for this.
> >
> > Since this is now closely linked to the timecounter stuff
> > <sys/timetc.h> would be an obvious place.  Now that file has:
> >
> > #ifndef _KERNEL
> > #error "no user-serviceable parts inside"
> > #endif
> >
> > you could change that into
> >
> > #if !defined(_KERNEL) && !defined(_LIBC)
> > #error "no user-serviceable parts inside"
> > #endif
> >
> > and make sure you #define _LIBC brefore uncluding this file where it
> > is needed.  As few places as possible obviously.
>
> Done. Also includes claudio@'s observation.

What are your plans to deal with the potential "skew" between the TSCs
on different processors?  We can probably tolerate a small skew
without having to worry about it un userland as long as the skew is
smaller than the time it takes to do a context switch.  If you want to
handle the skew in userland, you need to export the skews somewhere on
the timekeep page and we'd need to use rdtscp to read the TSC and
associate it with the right skew.

A few more notes below.
 

> diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
> index e995309ed71..caa4452a3d9 100644
> --- lib/libc/arch/amd64/gen/Makefile.inc
> +++ lib/libc/arch/amd64/gen/Makefile.inc
> @@ -2,6 +2,6 @@
>  
>  SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
>   sigsetjmp.S
> -SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
> +SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
>  SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
>   fpsetround.S fpsetsticky.S
> diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
> new file mode 100644
> index 00000000000..b14c862c61a
> --- /dev/null
> +++ lib/libc/arch/amd64/gen/rdtsc.c
> @@ -0,0 +1,26 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +
> +uint64_t
> +tc_get_timecount_md(void)
> +{
> + uint32_t hi, lo;
> + asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
> + return ((uint64_t)lo)|(((uint64_t)hi)<<32);
> +}
> diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
> index cd056c85719..2b25d49f32a 100644
> --- lib/libc/asr/asr.c
> +++ lib/libc/asr/asr.c
> @@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
>   struct timespec pollstart, pollend, elapsed;
>   int r;
>  
> - if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
>   return -1;
>  
>   while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
> - if (clock_gettime(CLOCK_MONOTONIC, &pollend))
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
>   return -1;
>   timespecsub(&pollend, &pollstart, &elapsed);
>   timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
> @@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
>   asr->a_rtime = 0;
>   }
>  
> - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>   return;
>  
>   if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
> diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
> index 82de8fa33b7..02fd3013cc1 100644
> --- lib/libc/crypt/bcrypt.c
> +++ lib/libc/crypt/bcrypt.c
> @@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
>   char buf[_PASSWORD_LEN];
>   int duration;
>  
> - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
> + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
>   bcrypt_newhash("testpassword", r, buf, sizeof(buf));
> - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
> + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
>  
>   duration = after.tv_sec - before.tv_sec;
>   duration *= 1000000;
> diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
> index 270f54aada5..1ff97c12b7b 100644
> --- lib/libc/dlfcn/init.c
> +++ lib/libc/dlfcn/init.c
> @@ -20,6 +20,7 @@
>  
>  #include <sys/types.h>
>  #include <sys/syscall.h>
> +#include <sys/timetc.h> /* timekeep */
>  
>  #ifndef PIC
>  #include <sys/mman.h>
> @@ -45,8 +46,9 @@
>  /* XXX should be in an include file shared with csu */
>  char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
>  
> -/* provide definition for this */
> +/* provide definition for these */

s/definition/definitions/

>  int _pagesize = 0;
> +void *_timekeep = NULL;
>  
>  /*
>   * In dynamicly linked binaries environ and __progname are overriden by
> @@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
>  
>  /* provide definitions for these */
>  const dl_cb *_dl_cb __relro = NULL;
> +#if defined(__amd64)
> +uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> +#else
> +uint64_t (*const tc_get_timecount)(void) = NULL;
> +#endif
> +

This works fine as long as each architecture provides a single
possible timecounter that can be used from userland.  For now I think
that is good enough.  The non-amd64 architectures where we could
implement this on all have a single usable clock I think.

>  void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
>  void
> @@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
>   phnum = aux->au_v;
>   break;
>  #endif /* !PIC */
> + case AUX_openbsd_timekeep:
> + if (tc_get_timecount)
> + _timekeep = (void *)aux->au_v;
> + break;
>   }
>   }
>  
> diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
> index 1286a96fe40..32f86eda50f 100644
> --- lib/libc/gen/auth_subr.c
> +++ lib/libc/gen/auth_subr.c
> @@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
>  
>   if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
>   if (as->now.tv_sec == 0)
> - gettimeofday(&as->now, NULL);
> + WRAP(gettimeofday)(&as->now, NULL);
>   if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
>   as->state &= ~AUTH_ALLOW;
>   as->state |= AUTH_EXPIRED;
> @@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
>  
>   if (as->pwd && (quad_t)as->pwd->pw_change) {
>   if (as->now.tv_sec == 0)
> - gettimeofday(&as->now, NULL);
> + WRAP(gettimeofday)(&as->now, NULL);
>   if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
>   as->state &= ~AUTH_ALLOW;
>   as->state |= AUTH_PWEXPIRED;
> diff --git lib/libc/gen/time.c lib/libc/gen/time.c
> index 3bbd0d733d1..b3ce9a800f1 100644
> --- lib/libc/gen/time.c
> +++ lib/libc/gen/time.c
> @@ -36,7 +36,7 @@ time(time_t *t)
>  {
>   struct timeval tt;
>  
> - if (gettimeofday(&tt, NULL) == -1)
> + if (WRAP(gettimeofday)(&tt, NULL) == -1)
>   return (-1);
>   if (t)
>   *t = (time_t)tt.tv_sec;
> diff --git lib/libc/gen/times.c lib/libc/gen/times.c
> index 02e4dd44b5c..36841810d1b 100644
> --- lib/libc/gen/times.c
> +++ lib/libc/gen/times.c
> @@ -52,7 +52,7 @@ times(struct tms *tp)
>   return ((clock_t)-1);
>   tp->tms_cutime = CONVTCK(ru.ru_utime);
>   tp->tms_cstime = CONVTCK(ru.ru_stime);
> - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>   return ((clock_t)-1);
>   return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
>  }
> diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
> index 520a5954025..845cbe80356 100644
> --- lib/libc/gen/timespec_get.c
> +++ lib/libc/gen/timespec_get.c
> @@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
>  {
>   switch (base) {
>   case TIME_UTC:
> - if (clock_gettime(CLOCK_REALTIME, ts) == -1)
> + if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
>   return 0;
>   break;
>   default:
> diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
> index ed112320fa2..df717021cab 100644
> --- lib/libc/hidden/sys/time.h
> +++ lib/libc/hidden/sys/time.h
> @@ -24,7 +24,7 @@ PROTO_NORMAL(adjfreq);
>  PROTO_NORMAL(adjtime);
>  PROTO_NORMAL(futimes);
>  PROTO_NORMAL(getitimer);
> -PROTO_NORMAL(gettimeofday);
> +PROTO_WRAP(gettimeofday);
>  PROTO_NORMAL(setitimer);
>  PROTO_NORMAL(settimeofday);
>  PROTO_NORMAL(utimes);
> diff --git lib/libc/hidden/sys/timetc.h lib/libc/hidden/sys/timetc.h
> new file mode 100644
> index 00000000000..08d505e4214
> --- /dev/null
> +++ lib/libc/hidden/sys/timetc.h
> @@ -0,0 +1,39 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _LIBC_SYS_TIMETC_H_
> +#define _LIBC_SYS_TIMETC_H_
> +
> +#define _LIBC
> +#include <sys/types.h>
> +#include <sys/time.h>
> +
> +#include_next <sys/timetc.h>
> +
> +__BEGIN_HIDDEN_DECLS
> +extern void *_timekeep;
> +
> +extern uint64_t (*const tc_get_timecount)(void);
> +uint64_t tc_get_timecount_md(void);
> +
> +void _microtime(struct timeval *tvp, struct __timekeep *tk);
> +void _nanotime(struct timespec *tsp, struct __timekeep *tk);
> +void _nanoruntime(struct timespec *ts, struct __timekeep *tk);
> +void _nanouptime(struct timespec *tsp, struct __timekeep *tk);
> +__END_HIDDEN_DECLS
> +
> +#endif /* !_LIBC_SYS_TIMETC_H_ */
> diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
> index 18c49f8fcb9..d8e1e0caf64 100644
> --- lib/libc/hidden/time.h
> +++ lib/libc/hidden/time.h
> @@ -29,7 +29,7 @@ PROTO_NORMAL(asctime_r);
>  PROTO_STD_DEPRECATED(clock);
>  PROTO_DEPRECATED(clock_getcpuclockid);
>  PROTO_NORMAL(clock_getres);
> -PROTO_NORMAL(clock_gettime);
> +PROTO_WRAP(clock_gettime);
>  PROTO_NORMAL(clock_settime);
>  PROTO_STD_DEPRECATED(ctime);
>  PROTO_DEPRECATED(ctime_r);
> diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
> index 763e420bb88..9babb28470a 100644
> --- lib/libc/net/res_random.c
> +++ lib/libc/net/res_random.c
> @@ -219,7 +219,7 @@ res_initid(void)
>   if (ru_prf != NULL)
>   arc4random_buf(ru_prf, sizeof(*ru_prf));
>  
> - clock_gettime(CLOCK_MONOTONIC, &ts);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>   ru_reseed = ts.tv_sec + RU_OUT;
>   ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
>  }
> @@ -232,7 +232,7 @@ __res_randomid(void)
>   u_int r;
>   static void *randomid_mutex;
>  
> - clock_gettime(CLOCK_MONOTONIC, &ts);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>   pid = getpid();
>  
>   _MUTEX_LOCK(&randomid_mutex);
> diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
> index 402d98cede4..917a6d42b8a 100644
> --- lib/libc/rpc/auth_unix.c
> +++ lib/libc/rpc/auth_unix.c
> @@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
>   /*
>   * fill in param struct from the given params
>   */
> - (void)gettimeofday(&now,  NULL);
> + (void)WRAP(gettimeofday)(&now,  NULL);
>   aup.aup_time = now.tv_sec;
>   aup.aup_machname = machname;
>   aup.aup_uid = uid;
> @@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
>   goto done;
>  
>   /* update the time and serialize in place */
> - (void)gettimeofday(&now, NULL);
> + (void)WRAP(gettimeofday)(&now, NULL);
>   aup.aup_time = now.tv_sec;
>   xdrs.x_op = XDR_ENCODE;
>   XDR_SETPOS(&xdrs, 0);
> diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
> index 8e6ef515b0e..927b4bf2028 100644
> --- lib/libc/rpc/clnt_tcp.c
> +++ lib/libc/rpc/clnt_tcp.c
> @@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
>   pfd[0].events = POLLIN;
>   TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
>   delta = wait;
> - clock_gettime(CLOCK_MONOTONIC, &start);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>   for (;;) {
>   r = ppoll(pfd, 1, &delta, NULL);
>   save_errno = errno;
>  
> - clock_gettime(CLOCK_MONOTONIC, &after);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>   timespecsub(&start, &after, &duration);
>   timespecsub(&wait, &duration, &delta);
>   if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
> index 68d01674410..92e1d5c350d 100644
> --- lib/libc/rpc/clnt_udp.c
> +++ lib/libc/rpc/clnt_udp.c
> @@ -265,7 +265,7 @@ send_again:
>   reply_msg.acpted_rply.ar_results.where = resultsp;
>   reply_msg.acpted_rply.ar_results.proc = xresults;
>  
> - clock_gettime(CLOCK_MONOTONIC, &start);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>   for (;;) {
>   switch (ppoll(pfd, 1, &wait, NULL)) {
>   case 0:
> @@ -283,7 +283,7 @@ send_again:
>   /* FALLTHROUGH */
>   case -1:
>   if (errno == EINTR) {
> - clock_gettime(CLOCK_MONOTONIC, &after);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>   timespecsub(&after, &start, &duration);
>   timespecadd(&time_waited, &duration, &time_waited);
>   if (timespeccmp(&time_waited, &timeout, <))
> diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
> index f9d7a70938f..6c99db84359 100644
> --- lib/libc/rpc/svc_tcp.c
> +++ lib/libc/rpc/svc_tcp.c
> @@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>   * A timeout is fatal for the connection.
>   */
>   delta = wait_per_try;
> - clock_gettime(CLOCK_MONOTONIC, &start);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>   pfd[0].fd = sock;
>   pfd[0].events = POLLIN;
>   do {
> @@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>   case -1:
>   if (errno != EINTR)
>   goto fatal_err;
> - clock_gettime(CLOCK_MONOTONIC, &after);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>   timespecsub(&after, &start, &duration);
>   timespecsub(&wait_per_try, &duration, &delta);
>   if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/shlib_version lib/libc/shlib_version
> index 06f98b01084..5fb0770494f 100644
> --- lib/libc/shlib_version
> +++ lib/libc/shlib_version
> @@ -1,4 +1,4 @@
>  major=96
> -minor=0
> +minor=1
>  # note: If changes were made to include/thread_private.h or if system calls
>  # were added/changed then librthread/shlib_version must also be updated.
> diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
> index 34769576ced..d57418d81bf 100644
> --- lib/libc/sys/Makefile.inc
> +++ lib/libc/sys/Makefile.inc
> @@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
>  
>  # glue to offer userland wrappers for some syscalls
>  SRCS+= posix_madvise.c pthread_sigmask.c \
> - w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
> + w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
> + w_clock_gettime.c w_gettimeofday.c microtime.c
>  
>  # glue for compat with old syscall interfaces.
>  SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
> @@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
>  ASM= __semctl.o __syscall.o __thrsigdivert.o \
>   access.o acct.o adjfreq.o adjtime.o \
>   bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
> - clock_getres.o clock_gettime.o clock_settime.o \
> + clock_getres.o clock_settime.o \
>   dup.o dup2.o dup3.o \
>   execve.o \
>   faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
> @@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
>   getgroups.o getitimer.o getpeername.o getpgid.o \
>   getpriority.o getresgid.o getresuid.o \
>   getrlimit.o getrusage.o getsid.o getsockname.o \
> - getsockopt.o gettimeofday.o ioctl.o \
> + getsockopt.o ioctl.o \
>   kevent.o kill.o kqueue.o ktrace.o lchown.o \
>   link.o linkat.o listen.o lstat.o madvise.o \
>   minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
> @@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
>  SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
>  DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
>  
> -HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
> +HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
> + clock_gettime.o gettimeofday.o
>  PHIDDEN=${HIDDEN:.o=.po}
>  SHIDDEN=${HIDDEN:.o=.so}
>  DHIDDEN=${HIDDEN:.o=.do}
> diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
> new file mode 100644
> index 00000000000..344ef44af22
> --- /dev/null
> +++ lib/libc/sys/microtime.c
> @@ -0,0 +1,157 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/atomic.h>
> +#include <sys/timetc.h>
> +
> +#include <time.h>
> +
> +/*
> + * Return the difference between the timehands' counter value now and what
> + * was when we copied it to the timehands' offset_count.
> + */
> +static __inline u_int

s/__inline/inline/

> +tc_delta(struct __timekeep *tk)
> +{
> + return ((tc_get_timecount() - tk->th_offset_count) &
> +    tk->tc_counter_mask);
> +}
> +
> +static inline void
> +bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
> +{
> + ct->sec = bt->sec;
> + if (bt->frac > bt->frac + x)
> + ct->sec++;
> + ct->frac = bt->frac + x;
> +}
> +
> +static inline void
> +BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
> +{
> + ts->tv_sec = bt->sec;
> + ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
> +}
> +
> +static inline void
> +BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
> +{
> + tv->tv_sec = bt->sec;
> + tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
> +}
> +
> +static void
> +binuptime(struct bintime *bt, struct __timekeep *tk)
> +{
> + u_int gen;
> +
> + do {
> + gen = tk->th_generation;
> + membar_consumer();
> + *bt = tk->th_offset;
> + bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
> + membar_consumer();
> + } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +static inline void
> +bintimeadd(const struct bintime *bt, const struct bintime *ct,
> +    struct bintime *dt)
> +{
> + dt->sec = bt->sec + ct->sec;
> + if (bt->frac > bt->frac + ct->frac)
> + dt->sec++;
> + dt->frac = bt->frac + ct->frac;
> +}
> +
> +static inline void
> +bintimesub(const struct bintime *bt, const struct bintime *ct,
> +    struct bintime *dt)
> +{
> + dt->sec = bt->sec - ct->sec;
> + if (bt->frac < bt->frac - ct->frac)
> + dt->sec--;
> + dt->frac = bt->frac - ct->frac;
> +}
> +
> +static void
> +binruntime(struct bintime *bt, struct __timekeep *tk)
> +{
> + u_int gen;
> +
> + do {
> + gen = tk->th_generation;
> + membar_consumer();
> + bintimeaddfrac(&tk->th_offset, tk->th_scale * tc_delta(tk), bt);
> + bintimesub(bt, &tk->th_naptime, bt);
> + membar_consumer();
> + } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +static void
> +bintime(struct bintime *bt, struct __timekeep *tk)
> +{
> + u_int gen;
> +
> + do {
> + gen = tk->th_generation;
> + membar_consumer();
> + *bt = tk->th_offset;
> + bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
> + bintimeadd(bt, &tk->th_boottime, bt);
> + membar_consumer();
> + } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +void
> +_microtime(struct timeval *tvp, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + bintime(&bt, tk);
> + BINTIME_TO_TIMEVAL(&bt, tvp);
> +}
> +
> +void
> +_nanotime(struct timespec *tsp, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + bintime(&bt, tk);
> + BINTIME_TO_TIMESPEC(&bt, tsp);
> +}
> +
> +void
> +_nanoruntime(struct timespec *ts, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + binruntime(&bt, tk);
> + BINTIME_TO_TIMESPEC(&bt, ts);
> +}
> +
> +
> +void
> +_nanouptime(struct timespec *tsp, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + binuptime(&bt, tk);
> + BINTIME_TO_TIMESPEC(&bt, tsp);
> +}
> diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
> new file mode 100644
> index 00000000000..d14406fe408
> --- /dev/null
> +++ lib/libc/sys/w_clock_gettime.c
> @@ -0,0 +1,46 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/timetc.h>
> +
> +#include <time.h>
> +
> +int
> +WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
> +{
> + struct __timekeep *timekeep = _timekeep;
> +
> + if (timekeep == NULL || timekeep->tc_user == 0)
> + return clock_gettime(clock_id, tp);
> +
> + switch (clock_id) {
> + case CLOCK_REALTIME:
> + _nanotime(tp, timekeep);
> + break;
> + case CLOCK_UPTIME:
> + _nanoruntime(tp, timekeep);
> + break;
> + case CLOCK_MONOTONIC:
> + case CLOCK_BOOTTIME:
> + _nanouptime(tp, timekeep);
> + break;
> + default:
> + return clock_gettime(clock_id, tp);
> + }
> + return 0;
> +}
> +DEF_WRAP(clock_gettime);
> diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
> new file mode 100644
> index 00000000000..88276935675
> --- /dev/null
> +++ lib/libc/sys/w_gettimeofday.c
> @@ -0,0 +1,37 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Robert Nagy <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/timetc.h>
> +
> +int
> +WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
> +{
> + struct __timekeep *timekeep = _timekeep;
> + static struct timezone zerotz = { 0, 0 };
> +
> + if (timekeep == NULL || timekeep->tc_user == 0)
> + return gettimeofday(tp, tzp);
> +
> + if (tp)
> + _microtime(tp, timekeep);
> +
> + if (tzp)
> + tzp = &zerotz;
> +
> + return 0;
> +}
> +DEF_WRAP(gettimeofday);
> diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
> index 788890add89..df2239438d2 100644
> --- lib/libc/thread/synch.h
> +++ lib/libc/thread/synch.h
> @@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
>   if (abs == NULL)
>   return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
>  
> - if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
> + if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
>   return (EINVAL);
>  
>   rel.tv_sec = abs->tv_sec - rel.tv_sec;
> diff --git sys/arch/alpha/alpha/clock.c sys/arch/alpha/alpha/clock.c
> index 3f5f2c5b42b..6eaf8b107c6 100644
> --- sys/arch/alpha/alpha/clock.c
> +++ sys/arch/alpha/alpha/clock.c
> @@ -64,7 +64,7 @@ int clk_irq = 0;
>  
>  u_int rpcc_get_timecount(struct timecounter *);
>  struct timecounter rpcc_timecounter = {
> - rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL
> + rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
>  };
>  
>  extern todr_chip_handle_t todr_handle;
> diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
> index 7a1dcb4ad75..3db93d88dec 100644
> --- sys/arch/amd64/amd64/tsc.c
> +++ sys/arch/amd64/amd64/tsc.c
> @@ -50,7 +50,7 @@ extern u_int32_t lapic_per_second;
>  #endif
>  
>  struct timecounter tsc_timecounter = {
> - tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL
> + tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL, 1
>  };
>  
>  uint64_t
> diff --git sys/arch/amd64/isa/clock.c sys/arch/amd64/isa/clock.c
> index 613f7ee0e0f..00da0c6a8d0 100644
> --- sys/arch/amd64/isa/clock.c
> +++ sys/arch/amd64/isa/clock.c
> @@ -116,7 +116,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
>  u_int i8254_simple_get_timecount(struct timecounter *tc);
>  
>  static struct timecounter i8254_timecounter = {
> - i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
> + i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
>  };
>  
>  int clockintr(void *);
> diff --git sys/arch/arm64/dev/agtimer.c sys/arch/arm64/dev/agtimer.c
> index 29394141ad5..6b7c6db862f 100644
> --- sys/arch/arm64/dev/agtimer.c
> +++ sys/arch/arm64/dev/agtimer.c
> @@ -43,7 +43,7 @@ int32_t agtimer_frequency = TIMER_FREQUENCY;
>  u_int agtimer_get_timecount(struct timecounter *);
>  
>  static struct timecounter agtimer_timecounter = {
> - agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL
> + agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL, 0
>  };
>  
>  struct agtimer_pcpu_softc {
> diff --git sys/arch/armv7/omap/gptimer.c sys/arch/armv7/omap/gptimer.c
> index 7605845d5e2..061542d532f 100644
> --- sys/arch/armv7/omap/gptimer.c
> +++ sys/arch/armv7/omap/gptimer.c
> @@ -117,7 +117,7 @@ int gptimer_irq = 0;
>  u_int gptimer_get_timecount(struct timecounter *);
>  
>  static struct timecounter gptimer_timecounter = {
> - gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL
> + gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL, 0
>  };
>  
>  volatile u_int32_t nexttickevent;
> diff --git sys/arch/armv7/sunxi/sxitimer.c sys/arch/armv7/sunxi/sxitimer.c
> index 14a243c78d0..41028f9a602 100644
> --- sys/arch/armv7/sunxi/sxitimer.c
> +++ sys/arch/armv7/sunxi/sxitimer.c
> @@ -89,7 +89,7 @@ void sxitimer_delay(u_int);
>  u_int sxitimer_get_timecount(struct timecounter *);
>  
>  static struct timecounter sxitimer_timecounter = {
> - sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL
> + sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL, 0
>  };
>  
>  bus_space_tag_t sxitimer_iot;
> diff --git sys/arch/hppa/dev/clock.c sys/arch/hppa/dev/clock.c
> index 4c594ab5ec7..8cce6c3a893 100644
> --- sys/arch/hppa/dev/clock.c
> +++ sys/arch/hppa/dev/clock.c
> @@ -47,7 +47,7 @@ int cpu_hardclock(void *);
>  u_int itmr_get_timecount(struct timecounter *);
>  
>  struct timecounter itmr_timecounter = {
> - itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL
> + itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL, 0
>  };
>  
>  extern todr_chip_handle_t todr_handle;
> diff --git sys/arch/i386/isa/clock.c sys/arch/i386/isa/clock.c
> index 09a6db983f2..dd74bd425ad 100644
> --- sys/arch/i386/isa/clock.c
> +++ sys/arch/i386/isa/clock.c
> @@ -129,7 +129,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
>  u_int i8254_simple_get_timecount(struct timecounter *tc);
>  
>  static struct timecounter i8254_timecounter = {
> - i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
> + i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
>  };
>  struct mutex timer_mutex = MUTEX_INITIALIZER(IPL_HIGH);
>  u_long rtclock_tval;
> diff --git sys/arch/i386/pci/geodesc.c sys/arch/i386/pci/geodesc.c
> index 9d9f061eef9..bb8e4c7f9ae 100644
> --- sys/arch/i386/pci/geodesc.c
> +++ sys/arch/i386/pci/geodesc.c
> @@ -65,7 +65,9 @@ struct timecounter geodesc_timecounter = {
>   0xffffffff, /* counter_mask */
>   27000000, /* frequency */
>   "GEOTSC", /* name */
> - 2000 /* quality */
> + 2000, /* quality */
> + NULL, /* private bits */
> + 0 /* expose to user */
>  };
>  
>  int
> diff --git sys/arch/i386/pci/gscpm.c sys/arch/i386/pci/gscpm.c
> index 8b8aa4ac430..a6f324e66f3 100644
> --- sys/arch/i386/pci/gscpm.c
> +++ sys/arch/i386/pci/gscpm.c
> @@ -55,7 +55,9 @@ struct timecounter gscpm_timecounter = {
>   0xffffff, /* counter_mask */
>   3579545, /* frequency */
>   "GSCPM", /* name */
> - 1000 /* quality */
> + 1000, /* quality */
> + NULL, /* private bits */
> + 0 /* expose to user */
>  };
>  
>  struct cfattach gscpm_ca = {
> diff --git sys/arch/i386/pci/ichpcib.c sys/arch/i386/pci/ichpcib.c
> index 6abf1627de2..90814d2dba0 100644
> --- sys/arch/i386/pci/ichpcib.c
> +++ sys/arch/i386/pci/ichpcib.c
> @@ -64,6 +64,8 @@ struct timecounter ichpcib_timecounter = {
>   3579545, /* frequency */
>   "ICHPM", /* name */
>   1000 /* quality */
> + NULL, /* private bits */
> + 0 /* expose to user */
>  };
>  
>  struct cfattach ichpcib_ca = {
> diff --git sys/arch/loongson/loongson/generic3a_machdep.c sys/arch/loongson/loongson/generic3a_machdep.c
> index ac3f1db6ccd..61da18ebff7 100644
> --- sys/arch/loongson/loongson/generic3a_machdep.c
> +++ sys/arch/loongson/loongson/generic3a_machdep.c
> @@ -99,6 +99,8 @@ struct timecounter rs780e_timecounter = {
>   .tc_frequency = HPET_FREQ,
>   .tc_name = "hpet",
>   .tc_quality = 100
> + .tc_priv = NULL,
> + .tc_user = 0,
>  };
>  
>  /* Firmware entry points */
> diff --git sys/arch/luna88k/luna88k/clock.c sys/arch/luna88k/luna88k/clock.c
> index a04120987e0..6580a4a46bf 100644
> --- sys/arch/luna88k/luna88k/clock.c
> +++ sys/arch/luna88k/luna88k/clock.c
> @@ -112,7 +112,9 @@ struct timecounter clock_tc = {
>   .tc_counter_mask = 0xffffffff,
>   .tc_frequency = 0, /* will be filled in */
>   .tc_name = "clock",
> - .tc_quality = 0
> + .tc_quality = 0,
> + .tc_priv = NULL,
> + .tc_user = 0,
>  };
>  
>  /*
> diff --git sys/arch/macppc/macppc/clock.c sys/arch/macppc/macppc/clock.c
> index 4a44a92cfc0..8c3ad620be8 100644
> --- sys/arch/macppc/macppc/clock.c
> +++ sys/arch/macppc/macppc/clock.c
> @@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
>  static int32_t ticks_per_intr;
>  
>  static struct timecounter tb_timecounter = {
> - tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL
> + tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
>  };
>  
>  /* calibrate the timecounter frequency for the listed models */
> diff --git sys/arch/mips64/mips64/mips64_machdep.c sys/arch/mips64/mips64/mips64_machdep.c
> index d4a42ed5acc..5c4dbadb5bb 100644
> --- sys/arch/mips64/mips64/mips64_machdep.c
> +++ sys/arch/mips64/mips64/mips64_machdep.c
> @@ -327,7 +327,9 @@ struct timecounter cp0_timecounter = {
>   0xffffffff, /* counter_mask */
>   0, /* frequency */
>   "CP0", /* name */
> - 0 /* quality */
> + 0, /* quality */
> + NULL, /* private bits */
> + 0, /* expose to user */
>  };
>  
>  u_int
> diff --git sys/arch/octeon/octeon/machdep.c sys/arch/octeon/octeon/machdep.c
> index 604cb3be3ab..9d4d8564d5c 100644
> --- sys/arch/octeon/octeon/machdep.c
> +++ sys/arch/octeon/octeon/machdep.c
> @@ -152,8 +152,9 @@ struct timecounter ioclock_timecounter = {
>   .tc_name = "ioclock",
>   .tc_quality = 0, /* ioclock can be overridden
>   * by cp0 counter */
> - .tc_priv = 0 /* clock register,
> + .tc_priv = 0, /* clock register,
>   * determined at runtime */
> + .tc_user = 0, /* expose to user */
>  };
>  
>  static int
> diff --git sys/arch/sgi/sgi/ip27_machdep.c sys/arch/sgi/sgi/ip27_machdep.c
> index ba7fa558b96..7b3fa05ddd5 100644
> --- sys/arch/sgi/sgi/ip27_machdep.c
> +++ sys/arch/sgi/sgi/ip27_machdep.c
> @@ -111,7 +111,29 @@ struct timecounter ip27_hub_timecounter = {
>   .tc_counter_mask = 0xffffffff, /* truncated to 32 bits. */
>   .tc_frequency = 1250000,
>   .tc_name = "hubrt",
> - .tc_quality = 100
> + .tc_quality = 100,
> + .tc_priv = 0,
> + .tc_user = 0,
> +};
> +
> +static int
> +atoi(const char *s)
> +{
> + int n, neg;
> +
> + n = 0;
> + neg = 0;
> +
> + while (*s == '-') {
> + s++;
> + neg = !neg;
> + }
> +
> + while (*s != '\0') {
> + if (*s < '0' || *s > '9')
> + break;
> +
> + n = (10 * n) + (*s - '0');
>  };
>  
>  volatile uint64_t ip27_spinup_a0;
> diff --git sys/arch/sgi/xbow/xheart.c sys/arch/sgi/xbow/xheart.c
> index 56b29915c70..827775512ac 100644
> --- sys/arch/sgi/xbow/xheart.c
> +++ sys/arch/sgi/xbow/xheart.c
> @@ -83,7 +83,9 @@ struct timecounter xheart_timecounter = {
>   .tc_counter_mask = 0xffffffff, /* truncate 52-bit counter to 32-bit */
>   .tc_frequency = 12500000,
>   .tc_name = "heart",
> - .tc_quality = 100
> + .tc_quality = 100,
> + .tc_priv = NULL,
> + .tc_user = 0,
>  };
>  
>  extern uint32_t ip30_lights_frob(uint32_t, struct trapframe *);
> diff --git sys/arch/sparc64/dev/psycho.c sys/arch/sparc64/dev/psycho.c
> index e24f804dff6..1a7a1afa8c2 100644
> --- sys/arch/sparc64/dev/psycho.c
> +++ sys/arch/sparc64/dev/psycho.c
> @@ -127,7 +127,7 @@ extern struct sparc_pci_chipset _sparc_pci_chipset;
>  u_int stick_get_timecount(struct timecounter *);
>  
>  struct timecounter stick_timecounter = {
> - stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL
> + stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 0
>  };
>  
>  /*
> diff --git sys/arch/sparc64/sparc64/clock.c sys/arch/sparc64/sparc64/clock.c
> index fd5e8a9c15b..5c2e47d386b 100644
> --- sys/arch/sparc64/sparc64/clock.c
> +++ sys/arch/sparc64/sparc64/clock.c
> @@ -109,13 +109,13 @@ struct cfdriver clock_cd = {
>  u_int tick_get_timecount(struct timecounter *);
>  
>  struct timecounter tick_timecounter = {
> - tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL
> + tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
>  };
>  
>  u_int sys_tick_get_timecount(struct timecounter *);
>  
>  struct timecounter sys_tick_timecounter = {
> - sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL
> + sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
>  };
>  
>  /*
> diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
> index d0ee72cec9b..13177a909da 100644
> --- sys/dev/acpi/acpihpet.c
> +++ sys/dev/acpi/acpihpet.c
> @@ -45,7 +45,9 @@ static struct timecounter hpet_timecounter = {
>   0xffffffff, /* counter_mask (32 bits) */
>   0, /* frequency */
>   0, /* name */
> - 1000 /* quality */
> + 1000, /* quality */
> + NULL, /* private bits */
> + 0, /* expose to user */
>  };
>  
>  #define HPET_TIMERS 3
> diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
> index cdc8c99a17a..89b5a397e47 100644
> --- sys/dev/acpi/acpitimer.c
> +++ sys/dev/acpi/acpitimer.c
> @@ -36,7 +36,9 @@ static struct timecounter acpi_timecounter = {
>   0x00ffffff, /* counter_mask (24 bits) */
>   ACPI_FREQUENCY, /* frequency */
>   0, /* name */
> - 1000 /* quality */
> + 1000, /* quality */
> + NULL, /* private bits */
> + 0, /* expose to user */
>  };
>  
>  struct acpitimer_softc {
> diff --git sys/dev/pci/amdpm.c sys/dev/pci/amdpm.c
> index 3abe03de722..017611e4b31 100644
> --- sys/dev/pci/amdpm.c
> +++ sys/dev/pci/amdpm.c
> @@ -83,7 +83,9 @@ static struct timecounter amdpm_timecounter = {
>   0xffffff, /* counter_mask */
>   AMDPM_FREQUENCY, /* frequency */
>   "AMDPM", /* name */
> - 1000 /* quality */
> + 1000, /* quality */
> + NULL, /* private bits */
> + 0, /* expose to user */
>  };
>  
>  #define AMDPM_CONFREG 0x40
> diff --git sys/dev/pci/viapm.c sys/dev/pci/viapm.c
> index db806eedf80..ce33cd175e6 100644
> --- sys/dev/pci/viapm.c
> +++ sys/dev/pci/viapm.c
> @@ -177,7 +177,9 @@ static struct timecounter viapm_timecounter = {
>   0xffffff, /* counter_mask */
>   VIAPM_FREQUENCY, /* frequency */
>   "VIAPM", /* name */
> - 1000 /* quality */
> + 1000, /* quality */
> + NULL, /* private bits */
> + 0, /* expose to user */
>  };
>  
>  struct timeout viapm_timeout;
> diff --git sys/dev/pv/hyperv.c sys/dev/pv/hyperv.c
> index 3ab2ae22831..8e326cf2502 100644
> --- sys/dev/pv/hyperv.c
> +++ sys/dev/pv/hyperv.c
> @@ -143,7 +143,7 @@ struct {
>  };
>  
>  struct timecounter hv_timecounter = {
> - hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
> + hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001, NULL, 0
>  };
>  
>  struct cfdriver hyperv_cd = {
> diff --git sys/dev/pv/pvclock.c sys/dev/pv/pvclock.c
> index 6b242f7448d..b80e4d2a484 100644
> --- sys/dev/pv/pvclock.c
> +++ sys/dev/pv/pvclock.c
> @@ -74,7 +74,7 @@ struct cfdriver pvclock_cd = {
>  };
>  
>  struct timecounter pvclock_timecounter = {
> - pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
> + pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0
>  };
>  
>  int
> diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
> index 9b5b8eb3acf..59bc923a6fb 100644
> --- sys/kern/exec_elf.c
> +++ sys/kern/exec_elf.c
> @@ -124,7 +124,7 @@ extern char *syscallnames[];
>  /*
>   * How many entries are in the AuxInfo array we pass to the process?
>   */
> -#define ELF_AUX_ENTRIES 8
> +#define ELF_AUX_ENTRIES 9
>  
>  /*
>   * This is the OpenBSD ELF emul
> @@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
>   a->au_v = ap->arg_entry;
>   a++;
>  
> + a->au_id = AUX_openbsd_timekeep;
> + a->au_v = p->p_p->ps_timekeep;
> + a++;
> +
>   a->au_id = AUX_null;
>   a->au_v = 0;
>   a++;
> diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
> index 20480c2fc28..8fbcb0c43ec 100644
> --- sys/kern/kern_exec.c
> +++ sys/kern/kern_exec.c
> @@ -64,6 +64,11 @@
>  #include <uvm/uvm_extern.h>
>  #include <machine/tcb.h>
>  
> +#include <sys/timetc.h>
> +
> +struct uvm_object *timekeep_object;
> +struct __timekeep* timekeep;
> +
>  void unveil_destroy(struct process *ps);
>  
>  const struct kmem_va_mode kv_exec = {
> @@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
>   */
>  int exec_sigcode_map(struct process *, struct emul *);
>  
> +/*
> + * Map the shared timekeep page.
> + */
> +int exec_timekeep_map(struct process *);
> +
>  /*
>   * If non-zero, stackgap_random specifies the upper limit of the random gap size
>   * added to the fixed stack position. Must be n^2.
> @@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
>   /* map the process's signal trampoline code */
>   if (exec_sigcode_map(pr, pack.ep_emul))
>   goto free_pack_abort;
> + /* map the process's timekeep page */
> + if (exec_timekeep_map(pr))
> + goto free_pack_abort;
>  
>  #ifdef __HAVE_EXEC_MD_MAP
>   /* perform md specific mappings that process might need */
> @@ -863,3 +876,41 @@ exec_sigcode_map(struct process *pr, struct emul *e)
>  
>   return (0);
>  }
> +
> +int
> +exec_timekeep_map(struct process *pr)
> +{
> + size_t timekeep_sz = sizeof(struct __timekeep);
> +
> + /*
> + * Similar to the sigcode object, except that there is a single timekeep
> + * object, and not one per emulation.
> + */
> + if (timekeep_object == NULL) {
> + vaddr_t va;
> +
> + timekeep_object = uao_create(timekeep_sz, 0);
> + uao_reference(timekeep_object);
> +
> + if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
> +    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
> +    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
> + uao_detach(timekeep_object);
> + return (ENOMEM);
> + }
> +
> + timekeep = (struct __timekeep *)va;
> + timekeep->major = 0;
> + timekeep->minor = 0;
> + }
> +
> + uao_reference(timekeep_object);
> + if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
> +    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
> +    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
> + uao_detach(timekeep_object);
> + return (ENOMEM);
> + }
> +
> + return (0);
> +}
> diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
> index 4b9eedf50b9..a3465d3dafc 100644
> --- sys/kern/kern_tc.c
> +++ sys/kern/kern_tc.c
> @@ -64,7 +64,7 @@ dummy_get_timecount(struct timecounter *tc)
>  }
>  
>  static struct timecounter dummy_timecounter = {
> - dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
> + dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0
>  };
>  
>  /*
> @@ -480,6 +480,36 @@ tc_setclock(const struct timespec *ts)
>  #endif
>  }
>  
> +void
> +tc_update_timekeep(void)
> +{
> + static struct timecounter *last_tc = NULL;
> +

No blank line here please.

> + struct timehands *th;
> + u_int gen;
> +
> + if (timekeep == NULL)
> + return;
> +
> + th = timehands;
> + gen = th->th_generation;

This gen variable is no longer used.

You need to set timekeep->th_generation to zero here to indicate that
we're in the process of updating the timekeep struct.

> + membar_producer();
> + timekeep->th_scale = th->th_scale;
> + timekeep->th_offset_count = th->th_offset_count;
> + timekeep->th_offset = th->th_offset;
> + timekeep->th_naptime = th->th_naptime;
> + timekeep->th_boottime = th->th_boottime;
> + timekeep->th_generation = th->th_generation;

And you need to move this assignment to...

> + if (last_tc != th->th_counter) {
> + timekeep->tc_counter_mask = th->th_counter->tc_counter_mask;
> + timekeep->tc_user = th->th_counter->tc_user;
> + last_tc = th->th_counter;
> + }
> + membar_producer();

...here.

> +
> + return;
> +}
> +
>  /*
>   * Initialize the next struct timehands in the ring and make
>   * it the active timehands.  Along the way we might switch to a different
> @@ -632,6 +662,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
>   time_uptime = th->th_offset.sec;
>   membar_producer();
>   timehands = th;
> +
> + tc_update_timekeep();
>  }
>  
>  /* Report or change the active timecounter hardware. */
> diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
> index a40e0510273..f55b75f1e84 100644
> --- sys/sys/exec_elf.h
> +++ sys/sys/exec_elf.h
> @@ -691,7 +691,8 @@ enum AuxID {
>   AUX_sun_uid = 2000, /* euid */
>   AUX_sun_ruid = 2001, /* ruid */
>   AUX_sun_gid = 2002, /* egid */
> - AUX_sun_rgid = 2003 /* rgid */
> + AUX_sun_rgid = 2003, /* rgid */
> + AUX_openbsd_timekeep = 2004, /* userland clock_gettime */

I'd be happier if you used 4000 instead of 2004.

>  };
>  
>  struct elf_args {
> diff --git sys/sys/proc.h sys/sys/proc.h
> index 357c0c0d52c..93a79a220db 100644
> --- sys/sys/proc.h
> +++ sys/sys/proc.h
> @@ -248,6 +248,8 @@ struct process {
>   u_int ps_rtableid; /* Process routing table/domain. */
>   char ps_nice; /* Process "nice" value. */
>  
> + vaddr_t ps_timekeep; /* User pointer to timekeep */
> +

Can you move this one just after ps_strings?  Seems logical to keep
all the user pointers together in the struct.

>   struct uprof { /* profile arguments */
>   caddr_t pr_base; /* buffer base */
>   size_t  pr_size; /* buffer size */
> diff --git sys/sys/time.h sys/sys/time.h
> index e758a64ce07..3882bac6c55 100644
> --- sys/sys/time.h
> +++ sys/sys/time.h
> @@ -163,15 +163,15 @@ struct clockinfo {
>  };
>  #endif /* __BSD_VISIBLE */
>  
> -#if defined(_KERNEL) || defined(_STANDALONE)
> -#include <sys/_time.h>
> -
>  /* Time expressed as seconds and fractions of a second + operations on it. */
>  struct bintime {
>   time_t sec;
>   uint64_t frac;
>  };
>  
> +#if defined(_KERNEL) || defined(_STANDALONE)
> +#include <sys/_time.h>
> +
>  #define bintimecmp(btp, ctp, cmp) \
>   ((btp)->sec == (ctp)->sec ? \
>      (btp)->frac cmp (ctp)->frac : \

Nope, do *not* chacnge what you expose to userland.  If necessary, use
the same _LIBC trick as I suggested for <sys/timetc.h>.


> diff --git sys/sys/timetc.h sys/sys/timetc.h
> index ce81c3475a0..3dff89a3f47 100644
> --- sys/sys/timetc.h
> +++ sys/sys/timetc.h
> @@ -24,7 +24,7 @@
>  #ifndef _SYS_TIMETC_H_
>  #define _SYS_TIMETC_H_
>  
> -#ifndef _KERNEL
> +#if !defined(_KERNEL) && !defined(_LIBC)
>  #error "no user-serviceable parts inside"
>  #endif
>  
> @@ -80,6 +80,8 @@ struct timecounter {
>   */
>   void *tc_priv; /* [I] */
>   /* Pointer to the timecounter's private parts. */
> + char tc_user; /* [I] */
> + /* Expose this timecounter to userland.  Set in softc. */

The "Set in softc" bit of that comment makes no sense.  char is the
wrong type for this.  I'd suggest making this an int.  If we ever want
to support multiple userlands timecounters, we can change this one
into an MD ID of some sorts where all the different clocks get a
non-zero ID.

>   SLIST_ENTRY(timecounter) tc_next; /* [I] */
>   /* Pointer to the next timecounter. */
>   int64_t tc_freq_adj; /* [tw] */
> @@ -88,11 +90,29 @@ struct timecounter {
>   /* Precision of the counter.  Computed in tc_init(). */
>  };
>  
> +struct __timekeep {

I don't think there is any need for the double underscore here now
that this is firmly not exposed to generic userland.

> + uint32_t major; /* version major number */
> + uint32_t minor; /* version minor number */
> +
> + uint64_t th_scale;
> + unsigned int th_offset_count;
> + struct bintime th_offset;
> + struct bintime th_naptime;
> + struct bintime th_boottime;
> + volatile unsigned int th_generation;
> +
> + unsigned int tc_user;
> + unsigned int tc_counter_mask;
> +};
> +

Please give all the members a uniform tk_ prefix.  And now that this
is in <sys/timetc.h> please use the same types here as in struct
timehands/timecounter.

>  struct rwlock;
>  extern struct rwlock tc_lock;
>  
>  extern struct timecounter *timecounter;
>  
> +extern struct uvm_object *timekeep_object;
> +extern struct __timekeep *timekeep;
> +
>  u_int64_t tc_getfrequency(void);
>  u_int64_t tc_getprecision(void);
>  void tc_init(struct timecounter *tc);
>

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
In reply to this post by Robert Nagy
> Date: Sat, 30 May 2020 10:49:07 +0200
> From: Robert Nagy <[hidden email]>
>
> On 30/05/20 10:40 +0200, Mark Kettenis wrote:
> > > Date: Sat, 30 May 2020 10:32:15 +0200
> > > From: Robert Nagy <[hidden email]>
> > >
> > > On 29/05/20 17:51 +0300, Paul Irofti wrote:
> > > > On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> > > > > > Date: Fri, 29 May 2020 13:45:37 +0100
> > > > > > From: Stuart Henderson <[hidden email]>
> > > > > >
> > > > > > On 2020/05/29 13:50, Paul Irofti wrote:
> > > > > > > +struct __timekeep {
> > > > > > > + uint32_t major; /* version major number */
> > > > > > > + uint32_t minor; /* version minor number */
> > > > > > > +
> > > > > > > + u_int64_t th_scale;
> > > > > > > + unsigned int th_offset_count;
> > > > > > > + struct bintime th_offset;
> > > > > > > + struct bintime th_naptime;
> > > > > > > + struct bintime th_boottime;
> > > > > > > + volatile unsigned int th_generation;
> > > > > > > +
> > > > > > > + unsigned int tc_user;
> > > > > > > + unsigned int tc_counter_mask;
> > > > > > > +};
> > > > > >
> > > > > > Ah good, you got rid of u_int, that was causing problems with port builds.
> > > > >
> > > > > That in itself is a problem.  This means <time.h> is the wrong place
> > > > > for this struct.  We need to find a better place for this.
> > > > >
> > > > > Since this is now closely linked to the timecounter stuff
> > > > > <sys/timetc.h> would be an obvious place.  Now that file has:
> > > > >
> > > > > #ifndef _KERNEL
> > > > > #error "no user-serviceable parts inside"
> > > > > #endif
> > > > >
> > > > > you could change that into
> > > > >
> > > > > #if !defined(_KERNEL) && !defined(_LIBC)
> > > > > #error "no user-serviceable parts inside"
> > > > > #endif
> > > > >
> > > > > and make sure you #define _LIBC brefore uncluding this file where it
> > > > > is needed.  As few places as possible obviously.
> > > >
> > > > Done. Also includes claudio@'s observation.
> > >
> > > I think if there are no more header changes, this should be commited to
> > > have wider testing. We are also just after tree unlock so it feels like
> > > the right time, and since there is no library bump we can easily revert
> > > if there is a need for that.
> >
> > Not ready yet.
> >
> > I also would like to see at least one non-amd64 platform supported
> > before we settle on this approach.
>
>
> Which one would you prefer? arm64?

yes, arm64 would be good; I can probably give it a go later this weekend

Paul, do you have some sort of regression test for this stuff?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-05-30 12:40, Mark Kettenis wrote:

>> Date: Sat, 30 May 2020 10:49:07 +0200
>> From: Robert Nagy <[hidden email]>
>>
>> On 30/05/20 10:40 +0200, Mark Kettenis wrote:
>>>> Date: Sat, 30 May 2020 10:32:15 +0200
>>>> From: Robert Nagy <[hidden email]>
>>>>
>>>> On 29/05/20 17:51 +0300, Paul Irofti wrote:
>>>>> On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
>>>>>>> Date: Fri, 29 May 2020 13:45:37 +0100
>>>>>>> From: Stuart Henderson <[hidden email]>
>>>>>>>
>>>>>>> On 2020/05/29 13:50, Paul Irofti wrote:
>>>>>>>> +struct __timekeep {
>>>>>>>> + uint32_t major; /* version major number */
>>>>>>>> + uint32_t minor; /* version minor number */
>>>>>>>> +
>>>>>>>> + u_int64_t th_scale;
>>>>>>>> + unsigned int th_offset_count;
>>>>>>>> + struct bintime th_offset;
>>>>>>>> + struct bintime th_naptime;
>>>>>>>> + struct bintime th_boottime;
>>>>>>>> + volatile unsigned int th_generation;
>>>>>>>> +
>>>>>>>> + unsigned int tc_user;
>>>>>>>> + unsigned int tc_counter_mask;
>>>>>>>> +};
>>>>>>>
>>>>>>> Ah good, you got rid of u_int, that was causing problems with port builds.
>>>>>>
>>>>>> That in itself is a problem.  This means <time.h> is the wrong place
>>>>>> for this struct.  We need to find a better place for this.
>>>>>>
>>>>>> Since this is now closely linked to the timecounter stuff
>>>>>> <sys/timetc.h> would be an obvious place.  Now that file has:
>>>>>>
>>>>>> #ifndef _KERNEL
>>>>>> #error "no user-serviceable parts inside"
>>>>>> #endif
>>>>>>
>>>>>> you could change that into
>>>>>>
>>>>>> #if !defined(_KERNEL) && !defined(_LIBC)
>>>>>> #error "no user-serviceable parts inside"
>>>>>> #endif
>>>>>>
>>>>>> and make sure you #define _LIBC brefore uncluding this file where it
>>>>>> is needed.  As few places as possible obviously.
>>>>>
>>>>> Done. Also includes claudio@'s observation.
>>>>
>>>> I think if there are no more header changes, this should be commited to
>>>> have wider testing. We are also just after tree unlock so it feels like
>>>> the right time, and since there is no library bump we can easily revert
>>>> if there is a need for that.
>>>
>>> Not ready yet.
>>>
>>> I also would like to see at least one non-amd64 platform supported
>>> before we settle on this approach.
>>
>>
>> Which one would you prefer? arm64?
>
> yes, arm64 would be good; I can probably give it a go later this weekend

I was thinking we could have a common name for the MD (arch) files. In
my diff it is rdtsc.c, but I think we can switch to have all the arches
have a file named usertc.c. What do you think?

   arch/amd64/gen/rdtsc.c -> arch/amd64/gen/usertc.c

> Paul, do you have some sort of regression test for this stuff?

If you use the minor bump you can switch between libc's easily and
that's what I do now. My main regress test is Firefox.

I also have a few hand written smoke tests that I wrote in the beginning
with which I test with when I do major changes. I placed them on
cvs:~pirofti/timekeep/.

Another batch that I run is the posixtestsuite (that is available as a
package now). Example:
/usr/local/libexec/posixtestsuite/conformance/interfaces/clock_gettime/1-1.test

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
> From: Paul Irofti <[hidden email]>
> Date: Sat, 30 May 2020 13:53:18 +0300
>
> On 2020-05-30 12:40, Mark Kettenis wrote:
> >> Date: Sat, 30 May 2020 10:49:07 +0200
> >> From: Robert Nagy <[hidden email]>
> >>
> >> On 30/05/20 10:40 +0200, Mark Kettenis wrote:
> >>>> Date: Sat, 30 May 2020 10:32:15 +0200
> >>>> From: Robert Nagy <[hidden email]>
> >>>>
> >>>> On 29/05/20 17:51 +0300, Paul Irofti wrote:
> >>>>> On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> >>>>>>> Date: Fri, 29 May 2020 13:45:37 +0100
> >>>>>>> From: Stuart Henderson <[hidden email]>
> >>>>>>>
> >>>>>>> On 2020/05/29 13:50, Paul Irofti wrote:
> >>>>>>>> +struct __timekeep {
> >>>>>>>> + uint32_t major; /* version major number */
> >>>>>>>> + uint32_t minor; /* version minor number */
> >>>>>>>> +
> >>>>>>>> + u_int64_t th_scale;
> >>>>>>>> + unsigned int th_offset_count;
> >>>>>>>> + struct bintime th_offset;
> >>>>>>>> + struct bintime th_naptime;
> >>>>>>>> + struct bintime th_boottime;
> >>>>>>>> + volatile unsigned int th_generation;
> >>>>>>>> +
> >>>>>>>> + unsigned int tc_user;
> >>>>>>>> + unsigned int tc_counter_mask;
> >>>>>>>> +};
> >>>>>>>
> >>>>>>> Ah good, you got rid of u_int, that was causing problems with port builds.
> >>>>>>
> >>>>>> That in itself is a problem.  This means <time.h> is the wrong place
> >>>>>> for this struct.  We need to find a better place for this.
> >>>>>>
> >>>>>> Since this is now closely linked to the timecounter stuff
> >>>>>> <sys/timetc.h> would be an obvious place.  Now that file has:
> >>>>>>
> >>>>>> #ifndef _KERNEL
> >>>>>> #error "no user-serviceable parts inside"
> >>>>>> #endif
> >>>>>>
> >>>>>> you could change that into
> >>>>>>
> >>>>>> #if !defined(_KERNEL) && !defined(_LIBC)
> >>>>>> #error "no user-serviceable parts inside"
> >>>>>> #endif
> >>>>>>
> >>>>>> and make sure you #define _LIBC brefore uncluding this file where it
> >>>>>> is needed.  As few places as possible obviously.
> >>>>>
> >>>>> Done. Also includes claudio@'s observation.
> >>>>
> >>>> I think if there are no more header changes, this should be commited to
> >>>> have wider testing. We are also just after tree unlock so it feels like
> >>>> the right time, and since there is no library bump we can easily revert
> >>>> if there is a need for that.
> >>>
> >>> Not ready yet.
> >>>
> >>> I also would like to see at least one non-amd64 platform supported
> >>> before we settle on this approach.
> >>
> >>
> >> Which one would you prefer? arm64?
> >
> > yes, arm64 would be good; I can probably give it a go later this weekend
>
> I was thinking we could have a common name for the MD (arch) files. In
> my diff it is rdtsc.c, but I think we can switch to have all the arches
> have a file named usertc.c. What do you think?
>
>    arch/amd64/gen/rdtsc.c -> arch/amd64/gen/usertc.c

Yes, that would be better.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
On 2020-05-30 12:30, Mark Kettenis wrote:

>> Date: Fri, 29 May 2020 17:51:50 +0300
>> From: Paul Irofti <[hidden email]>
>>
>> On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
>>>> Date: Fri, 29 May 2020 13:45:37 +0100
>>>> From: Stuart Henderson <[hidden email]>
>>>>
>>>> On 2020/05/29 13:50, Paul Irofti wrote:
>>>>> +struct __timekeep {
>>>>> + uint32_t major; /* version major number */
>>>>> + uint32_t minor; /* version minor number */
>>>>> +
>>>>> + u_int64_t th_scale;
>>>>> + unsigned int th_offset_count;
>>>>> + struct bintime th_offset;
>>>>> + struct bintime th_naptime;
>>>>> + struct bintime th_boottime;
>>>>> + volatile unsigned int th_generation;
>>>>> +
>>>>> + unsigned int tc_user;
>>>>> + unsigned int tc_counter_mask;
>>>>> +};
>>>>
>>>> Ah good, you got rid of u_int, that was causing problems with port builds.
>>>
>>> That in itself is a problem.  This means <time.h> is the wrong place
>>> for this struct.  We need to find a better place for this.
>>>
>>> Since this is now closely linked to the timecounter stuff
>>> <sys/timetc.h> would be an obvious place.  Now that file has:
>>>
>>> #ifndef _KERNEL
>>> #error "no user-serviceable parts inside"
>>> #endif
>>>
>>> you could change that into
>>>
>>> #if !defined(_KERNEL) && !defined(_LIBC)
>>> #error "no user-serviceable parts inside"
>>> #endif
>>>
>>> and make sure you #define _LIBC brefore uncluding this file where it
>>> is needed.  As few places as possible obviously.
>>
>> Done. Also includes claudio@'s observation.
>
> What are your plans to deal with the potential "skew" between the TSCs
> on different processors?  We can probably tolerate a small skew
> without having to worry about it un userland as long as the skew is
> smaller than the time it takes to do a context switch.  If you want to
> handle the skew in userland, you need to export the skews somewhere on
> the timekeep page and we'd need to use rdtscp to read the TSC and
> associate it with the right skew.

The results I got from last years work on fixing TSC and adding per CPU
skew, indicated that the skew has small values (two digit numbers
usually). So indeed this does not seem an issue for userland.

Exposing the skews to the user is easy. The hard bit is figuring out on
which CPU you are to pick the proper skew without doing a system call.
If you do a syscall then all of this is for nothing :)

One option is to use a hard-thresholding strategy as you describe.

if (timekeep->maxskew > TK_MAXSKEW_THRESHOLD)
   return clock_gettime();

Another is to add support in libc to figure out on what CPU it is
running. I don't have a plan for that yet. You mention associating the
right skew for the RDTSCP call, do you have an example of how to do that?

I will also probably add support for HPET clocks (if this diff goes in)
as some machines do not have a proper, invariant, TSC (like solene@'s)
and, perhaps, others might want to switch for other reasons.

> A few more notes below.

I will fix these later and come back with a diff. Thank you for the review!

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
In reply to this post by Robert Nagy
Robert Nagy <[hidden email]> wrote:

> > I also would like to see at least one non-amd64 platform supported
> > before we settle on this approach.
>
>
> Which one would you prefer? arm64?

As many as possible.

If a design issue is encountered, cranking the major on this *might not
be easy*.  If the mechanism changes, the code may need to support TWO
methods to complete the cross-over.

That's a bunch of BS which can be avoided writing support for maximum
number of diverse platforms FIRST.


Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
In reply to this post by Paul Irofti-4
Paul Irofti <[hidden email]> wrote:

> Exposing the skews to the user is easy. The hard bit is figuring out
> on which CPU you are to pick the proper skew without doing a system
> call. If you do a syscall then all of this is for nothing :)

That can't work right.  When you figure out which cpu you are on, you
context switch and now it is untrue.

If other systems are trying to handle this, it should be looked at how
they try to handle it.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
> A few more notes below.

I addressed all the comments. Here is the updated diff. This includes
the rename to usertc that I suggested.

The libc bump is there because it helps me switch more easily between
versions. A lot of our developers tested and reported no issues with
eluding the bump. So we can remove it in the final step if you think
that's OK.

Paul


diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..c80f5cf671a 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
  sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c usertc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
  fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 00000000000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+uint64_t
+tc_get_timecount_md(void)
+{
+ uint32_t hi, lo;
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..860ae2b8698 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -20,6 +20,7 @@
 
 #include <sys/types.h>
 #include <sys/syscall.h>
+#include <sys/timetc.h> /* timekeep */
 
 #ifndef PIC
 #include <sys/mman.h>
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definitions for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ if (tc_get_timecount)
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
index 1286a96fe40..32f86eda50f 100644
--- lib/libc/gen/auth_subr.c
+++ lib/libc/gen/auth_subr.c
@@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_EXPIRED;
@@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_change) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_PWEXPIRED;
diff --git lib/libc/gen/time.c lib/libc/gen/time.c
index 3bbd0d733d1..b3ce9a800f1 100644
--- lib/libc/gen/time.c
+++ lib/libc/gen/time.c
@@ -36,7 +36,7 @@ time(time_t *t)
 {
  struct timeval tt;
 
- if (gettimeofday(&tt, NULL) == -1)
+ if (WRAP(gettimeofday)(&tt, NULL) == -1)
  return (-1);
  if (t)
  *t = (time_t)tt.tv_sec;
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..df717021cab 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -24,7 +24,7 @@ PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
-PROTO_NORMAL(gettimeofday);
+PROTO_WRAP(gettimeofday);
 PROTO_NORMAL(setitimer);
 PROTO_NORMAL(settimeofday);
 PROTO_NORMAL(utimes);
diff --git lib/libc/hidden/sys/timetc.h lib/libc/hidden/sys/timetc.h
new file mode 100644
index 00000000000..bf9e8228f95
--- /dev/null
+++ lib/libc/hidden/sys/timetc.h
@@ -0,0 +1,39 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LIBC_SYS_TIMETC_H_
+#define _LIBC_SYS_TIMETC_H_
+
+#define _LIBC
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include_next <sys/timetc.h>
+
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+
+extern uint64_t (*const tc_get_timecount)(void);
+uint64_t tc_get_timecount_md(void);
+
+void _microtime(struct timeval *tvp, struct timekeep *tk);
+void _nanotime(struct timespec *tsp, struct timekeep *tk);
+void _nanoruntime(struct timespec *ts, struct timekeep *tk);
+void _nanouptime(struct timespec *tsp, struct timekeep *tk);
+__END_HIDDEN_DECLS
+
+#endif /* !_LIBC_SYS_TIMETC_H_ */
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..d8e1e0caf64 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -29,7 +29,7 @@ PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
index 402d98cede4..917a6d42b8a 100644
--- lib/libc/rpc/auth_unix.c
+++ lib/libc/rpc/auth_unix.c
@@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
  /*
  * fill in param struct from the given params
  */
- (void)gettimeofday(&now,  NULL);
+ (void)WRAP(gettimeofday)(&now,  NULL);
  aup.aup_time = now.tv_sec;
  aup.aup_machname = machname;
  aup.aup_uid = uid;
@@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
  goto done;
 
  /* update the time and serialize in place */
- (void)gettimeofday(&now, NULL);
+ (void)WRAP(gettimeofday)(&now, NULL);
  aup.aup_time = now.tv_sec;
  xdrs.x_op = XDR_ENCODE;
  XDR_SETPOS(&xdrs, 0);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d57418d81bf 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c w_gettimeofday.c microtime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
  getgroups.o getitimer.o getpeername.o getpgid.o \
  getpriority.o getresgid.o getresuid.o \
  getrlimit.o getrusage.o getsid.o getsockname.o \
- getsockopt.o gettimeofday.o ioctl.o \
+ getsockopt.o ioctl.o \
  kevent.o kill.o kqueue.o ktrace.o lchown.o \
  link.o linkat.o listen.o lstat.o madvise.o \
  minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
@@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
+ clock_gettime.o gettimeofday.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
new file mode 100644
index 00000000000..0451b860759
--- /dev/null
+++ lib/libc/sys/microtime.c
@@ -0,0 +1,157 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/timetc.h>
+
+#include <time.h>
+
+/*
+ * Return the difference between the timehands' counter value now and what
+ * was when we copied it to the timehands' offset_count.
+ */
+static inline u_int
+tc_delta(struct timekeep *tk)
+{
+ return ((tc_get_timecount() - tk->tk_offset_count) &
+    tk->tk_counter_mask);
+}
+
+static inline void
+bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
+{
+ ct->sec = bt->sec;
+ if (bt->frac > bt->frac + x)
+ ct->sec++;
+ ct->frac = bt->frac + x;
+}
+
+static inline void
+BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
+{
+ ts->tv_sec = bt->sec;
+ ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static inline void
+BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
+{
+ tv->tv_sec = bt->sec;
+ tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static void
+binuptime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ *bt = tk->tk_offset;
+ bintimeaddfrac(bt, tk->tk_scale * tc_delta(tk), bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+static inline void
+bintimeadd(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec + ct->sec;
+ if (bt->frac > bt->frac + ct->frac)
+ dt->sec++;
+ dt->frac = bt->frac + ct->frac;
+}
+
+static inline void
+bintimesub(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec - ct->sec;
+ if (bt->frac < bt->frac - ct->frac)
+ dt->sec--;
+ dt->frac = bt->frac - ct->frac;
+}
+
+static void
+binruntime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ bintimeaddfrac(&tk->tk_offset, tk->tk_scale * tc_delta(tk), bt);
+ bintimesub(bt, &tk->tk_naptime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+static void
+bintime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ *bt = tk->tk_offset;
+ bintimeaddfrac(bt, tk->tk_scale * tc_delta(tk), bt);
+ bintimeadd(bt, &tk->tk_boottime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+void
+_microtime(struct timeval *tvp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMEVAL(&bt, tvp);
+}
+
+void
+_nanotime(struct timespec *tsp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
+
+void
+_nanoruntime(struct timespec *ts, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ binruntime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, ts);
+}
+
+
+void
+_nanouptime(struct timespec *tsp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ binuptime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..ff72c9fb64d
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,46 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+#include <time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct timekeep *timekeep = _timekeep;
+
+ if (timekeep == NULL || timekeep->tk_user == 0)
+ return clock_gettime(clock_id, tp);
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ _nanotime(tp, timekeep);
+ break;
+ case CLOCK_UPTIME:
+ _nanoruntime(tp, timekeep);
+ break;
+ case CLOCK_MONOTONIC:
+ case CLOCK_BOOTTIME:
+ _nanouptime(tp, timekeep);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
new file mode 100644
index 00000000000..0b198190485
--- /dev/null
+++ lib/libc/sys/w_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Robert Nagy <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+int
+WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
+{
+ struct timekeep *timekeep = _timekeep;
+ static struct timezone zerotz = { 0, 0 };
+
+ if (timekeep == NULL || timekeep->tk_user == 0)
+ return gettimeofday(tp, tzp);
+
+ if (tp)
+ _microtime(tp, timekeep);
+
+ if (tzp)
+ tzp = &zerotz;
+
+ return 0;
+}
+DEF_WRAP(gettimeofday);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/arch/alpha/alpha/clock.c sys/arch/alpha/alpha/clock.c
index 3f5f2c5b42b..6eaf8b107c6 100644
--- sys/arch/alpha/alpha/clock.c
+++ sys/arch/alpha/alpha/clock.c
@@ -64,7 +64,7 @@ int clk_irq = 0;
 
 u_int rpcc_get_timecount(struct timecounter *);
 struct timecounter rpcc_timecounter = {
- rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL
+ rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index 7a1dcb4ad75..3db93d88dec 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -50,7 +50,7 @@ extern u_int32_t lapic_per_second;
 #endif
 
 struct timecounter tsc_timecounter = {
- tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL
+ tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL, 1
 };
 
 uint64_t
diff --git sys/arch/amd64/isa/clock.c sys/arch/amd64/isa/clock.c
index 613f7ee0e0f..00da0c6a8d0 100644
--- sys/arch/amd64/isa/clock.c
+++ sys/arch/amd64/isa/clock.c
@@ -116,7 +116,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 
 int clockintr(void *);
diff --git sys/arch/arm64/dev/agtimer.c sys/arch/arm64/dev/agtimer.c
index 29394141ad5..6b7c6db862f 100644
--- sys/arch/arm64/dev/agtimer.c
+++ sys/arch/arm64/dev/agtimer.c
@@ -43,7 +43,7 @@ int32_t agtimer_frequency = TIMER_FREQUENCY;
 u_int agtimer_get_timecount(struct timecounter *);
 
 static struct timecounter agtimer_timecounter = {
- agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL
+ agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL, 0
 };
 
 struct agtimer_pcpu_softc {
diff --git sys/arch/armv7/omap/gptimer.c sys/arch/armv7/omap/gptimer.c
index 7605845d5e2..061542d532f 100644
--- sys/arch/armv7/omap/gptimer.c
+++ sys/arch/armv7/omap/gptimer.c
@@ -117,7 +117,7 @@ int gptimer_irq = 0;
 u_int gptimer_get_timecount(struct timecounter *);
 
 static struct timecounter gptimer_timecounter = {
- gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL
+ gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL, 0
 };
 
 volatile u_int32_t nexttickevent;
diff --git sys/arch/armv7/sunxi/sxitimer.c sys/arch/armv7/sunxi/sxitimer.c
index 14a243c78d0..41028f9a602 100644
--- sys/arch/armv7/sunxi/sxitimer.c
+++ sys/arch/armv7/sunxi/sxitimer.c
@@ -89,7 +89,7 @@ void sxitimer_delay(u_int);
 u_int sxitimer_get_timecount(struct timecounter *);
 
 static struct timecounter sxitimer_timecounter = {
- sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL
+ sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL, 0
 };
 
 bus_space_tag_t sxitimer_iot;
diff --git sys/arch/hppa/dev/clock.c sys/arch/hppa/dev/clock.c
index 4c594ab5ec7..8cce6c3a893 100644
--- sys/arch/hppa/dev/clock.c
+++ sys/arch/hppa/dev/clock.c
@@ -47,7 +47,7 @@ int cpu_hardclock(void *);
 u_int itmr_get_timecount(struct timecounter *);
 
 struct timecounter itmr_timecounter = {
- itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL
+ itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/i386/isa/clock.c sys/arch/i386/isa/clock.c
index 09a6db983f2..dd74bd425ad 100644
--- sys/arch/i386/isa/clock.c
+++ sys/arch/i386/isa/clock.c
@@ -129,7 +129,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 struct mutex timer_mutex = MUTEX_INITIALIZER(IPL_HIGH);
 u_long rtclock_tval;
diff --git sys/arch/i386/pci/geodesc.c sys/arch/i386/pci/geodesc.c
index 9d9f061eef9..bb8e4c7f9ae 100644
--- sys/arch/i386/pci/geodesc.c
+++ sys/arch/i386/pci/geodesc.c
@@ -65,7 +65,9 @@ struct timecounter geodesc_timecounter = {
  0xffffffff, /* counter_mask */
  27000000, /* frequency */
  "GEOTSC", /* name */
- 2000 /* quality */
+ 2000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 int
diff --git sys/arch/i386/pci/gscpm.c sys/arch/i386/pci/gscpm.c
index 8b8aa4ac430..a6f324e66f3 100644
--- sys/arch/i386/pci/gscpm.c
+++ sys/arch/i386/pci/gscpm.c
@@ -55,7 +55,9 @@ struct timecounter gscpm_timecounter = {
  0xffffff, /* counter_mask */
  3579545, /* frequency */
  "GSCPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach gscpm_ca = {
diff --git sys/arch/i386/pci/ichpcib.c sys/arch/i386/pci/ichpcib.c
index 6abf1627de2..90814d2dba0 100644
--- sys/arch/i386/pci/ichpcib.c
+++ sys/arch/i386/pci/ichpcib.c
@@ -64,6 +64,8 @@ struct timecounter ichpcib_timecounter = {
  3579545, /* frequency */
  "ICHPM", /* name */
  1000 /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach ichpcib_ca = {
diff --git sys/arch/loongson/loongson/generic3a_machdep.c sys/arch/loongson/loongson/generic3a_machdep.c
index ac3f1db6ccd..61da18ebff7 100644
--- sys/arch/loongson/loongson/generic3a_machdep.c
+++ sys/arch/loongson/loongson/generic3a_machdep.c
@@ -99,6 +99,8 @@ struct timecounter rs780e_timecounter = {
  .tc_frequency = HPET_FREQ,
  .tc_name = "hpet",
  .tc_quality = 100
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /* Firmware entry points */
diff --git sys/arch/luna88k/luna88k/clock.c sys/arch/luna88k/luna88k/clock.c
index a04120987e0..6580a4a46bf 100644
--- sys/arch/luna88k/luna88k/clock.c
+++ sys/arch/luna88k/luna88k/clock.c
@@ -112,7 +112,9 @@ struct timecounter clock_tc = {
  .tc_counter_mask = 0xffffffff,
  .tc_frequency = 0, /* will be filled in */
  .tc_name = "clock",
- .tc_quality = 0
+ .tc_quality = 0,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /*
diff --git sys/arch/macppc/macppc/clock.c sys/arch/macppc/macppc/clock.c
index 4a44a92cfc0..8c3ad620be8 100644
--- sys/arch/macppc/macppc/clock.c
+++ sys/arch/macppc/macppc/clock.c
@@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
 static int32_t ticks_per_intr;
 
 static struct timecounter tb_timecounter = {
- tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL
+ tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
 };
 
 /* calibrate the timecounter frequency for the listed models */
diff --git sys/arch/mips64/mips64/mips64_machdep.c sys/arch/mips64/mips64/mips64_machdep.c
index d4a42ed5acc..5c4dbadb5bb 100644
--- sys/arch/mips64/mips64/mips64_machdep.c
+++ sys/arch/mips64/mips64/mips64_machdep.c
@@ -327,7 +327,9 @@ struct timecounter cp0_timecounter = {
  0xffffffff, /* counter_mask */
  0, /* frequency */
  "CP0", /* name */
- 0 /* quality */
+ 0, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 u_int
diff --git sys/arch/octeon/octeon/machdep.c sys/arch/octeon/octeon/machdep.c
index 604cb3be3ab..9d4d8564d5c 100644
--- sys/arch/octeon/octeon/machdep.c
+++ sys/arch/octeon/octeon/machdep.c
@@ -152,8 +152,9 @@ struct timecounter ioclock_timecounter = {
  .tc_name = "ioclock",
  .tc_quality = 0, /* ioclock can be overridden
  * by cp0 counter */
- .tc_priv = 0 /* clock register,
+ .tc_priv = 0, /* clock register,
  * determined at runtime */
+ .tc_user = 0, /* expose to user */
 };
 
 static int
diff --git sys/arch/sgi/sgi/ip27_machdep.c sys/arch/sgi/sgi/ip27_machdep.c
index ba7fa558b96..7b3fa05ddd5 100644
--- sys/arch/sgi/sgi/ip27_machdep.c
+++ sys/arch/sgi/sgi/ip27_machdep.c
@@ -111,7 +111,29 @@ struct timecounter ip27_hub_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncated to 32 bits. */
  .tc_frequency = 1250000,
  .tc_name = "hubrt",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = 0,
+ .tc_user = 0,
+};
+
+static int
+atoi(const char *s)
+{
+ int n, neg;
+
+ n = 0;
+ neg = 0;
+
+ while (*s == '-') {
+ s++;
+ neg = !neg;
+ }
+
+ while (*s != '\0') {
+ if (*s < '0' || *s > '9')
+ break;
+
+ n = (10 * n) + (*s - '0');
 };
 
 volatile uint64_t ip27_spinup_a0;
diff --git sys/arch/sgi/xbow/xheart.c sys/arch/sgi/xbow/xheart.c
index 56b29915c70..827775512ac 100644
--- sys/arch/sgi/xbow/xheart.c
+++ sys/arch/sgi/xbow/xheart.c
@@ -83,7 +83,9 @@ struct timecounter xheart_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncate 52-bit counter to 32-bit */
  .tc_frequency = 12500000,
  .tc_name = "heart",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 extern uint32_t ip30_lights_frob(uint32_t, struct trapframe *);
diff --git sys/arch/sparc64/dev/psycho.c sys/arch/sparc64/dev/psycho.c
index e24f804dff6..1a7a1afa8c2 100644
--- sys/arch/sparc64/dev/psycho.c
+++ sys/arch/sparc64/dev/psycho.c
@@ -127,7 +127,7 @@ extern struct sparc_pci_chipset _sparc_pci_chipset;
 u_int stick_get_timecount(struct timecounter *);
 
 struct timecounter stick_timecounter = {
- stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL
+ stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 0
 };
 
 /*
diff --git sys/arch/sparc64/sparc64/clock.c sys/arch/sparc64/sparc64/clock.c
index fd5e8a9c15b..5c2e47d386b 100644
--- sys/arch/sparc64/sparc64/clock.c
+++ sys/arch/sparc64/sparc64/clock.c
@@ -109,13 +109,13 @@ struct cfdriver clock_cd = {
 u_int tick_get_timecount(struct timecounter *);
 
 struct timecounter tick_timecounter = {
- tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL
+ tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
 };
 
 u_int sys_tick_get_timecount(struct timecounter *);
 
 struct timecounter sys_tick_timecounter = {
- sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL
+ sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
 };
 
 /*
diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
index d0ee72cec9b..13177a909da 100644
--- sys/dev/acpi/acpihpet.c
+++ sys/dev/acpi/acpihpet.c
@@ -45,7 +45,9 @@ static struct timecounter hpet_timecounter = {
  0xffffffff, /* counter_mask (32 bits) */
  0, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define HPET_TIMERS 3
diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
index cdc8c99a17a..89b5a397e47 100644
--- sys/dev/acpi/acpitimer.c
+++ sys/dev/acpi/acpitimer.c
@@ -36,7 +36,9 @@ static struct timecounter acpi_timecounter = {
  0x00ffffff, /* counter_mask (24 bits) */
  ACPI_FREQUENCY, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct acpitimer_softc {
diff --git sys/dev/pci/amdpm.c sys/dev/pci/amdpm.c
index 3abe03de722..017611e4b31 100644
--- sys/dev/pci/amdpm.c
+++ sys/dev/pci/amdpm.c
@@ -83,7 +83,9 @@ static struct timecounter amdpm_timecounter = {
  0xffffff, /* counter_mask */
  AMDPM_FREQUENCY, /* frequency */
  "AMDPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define AMDPM_CONFREG 0x40
diff --git sys/dev/pci/viapm.c sys/dev/pci/viapm.c
index db806eedf80..ce33cd175e6 100644
--- sys/dev/pci/viapm.c
+++ sys/dev/pci/viapm.c
@@ -177,7 +177,9 @@ static struct timecounter viapm_timecounter = {
  0xffffff, /* counter_mask */
  VIAPM_FREQUENCY, /* frequency */
  "VIAPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct timeout viapm_timeout;
diff --git sys/dev/pv/hyperv.c sys/dev/pv/hyperv.c
index 3ab2ae22831..8e326cf2502 100644
--- sys/dev/pv/hyperv.c
+++ sys/dev/pv/hyperv.c
@@ -143,7 +143,7 @@ struct {
 };
 
 struct timecounter hv_timecounter = {
- hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
+ hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001, NULL, 0
 };
 
 struct cfdriver hyperv_cd = {
diff --git sys/dev/pv/pvclock.c sys/dev/pv/pvclock.c
index 6b242f7448d..b80e4d2a484 100644
--- sys/dev/pv/pvclock.c
+++ sys/dev/pv/pvclock.c
@@ -74,7 +74,7 @@ struct cfdriver pvclock_cd = {
 };
 
 struct timecounter pvclock_timecounter = {
- pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
+ pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0
 };
 
 int
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..8869f1fb89f 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/timetc.h>
+
+struct uvm_object *timekeep_object;
+struct timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,41 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct timekeep *)va;
+ timekeep->tk_major = 0;
+ timekeep->tk_minor = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..bb0f6134403 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -64,7 +64,7 @@ dummy_get_timecount(struct timecounter *tc)
 }
 
 static struct timecounter dummy_timecounter = {
- dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
+ dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0
 };
 
 /*
@@ -480,6 +480,34 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_update_timekeep(void)
+{
+ static struct timecounter *last_tc = NULL;
+ struct timehands *th;
+
+ if (timekeep == NULL)
+ return;
+
+ th = timehands;
+ timekeep->tk_generation = 0;
+ membar_producer();
+ timekeep->tk_scale = th->th_scale;
+ timekeep->tk_offset_count = th->th_offset_count;
+ timekeep->tk_offset = th->th_offset;
+ timekeep->tk_naptime = th->th_naptime;
+ timekeep->tk_boottime = th->th_boottime;
+ if (last_tc != th->th_counter) {
+ timekeep->tk_counter_mask = th->th_counter->tc_counter_mask;
+ timekeep->tk_user = th->th_counter->tc_user;
+ last_tc = th->th_counter;
+ }
+ membar_producer();
+ timekeep->tk_generation = th->th_generation;
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +660,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_update_timekeep();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..3084ed595a6 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 4000, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..c6d54572bdd 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -242,6 +242,7 @@ struct process {
  char ps_comm[MAXCOMLEN+1];
 
  vaddr_t ps_strings; /* User pointers to argv/env */
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
  vaddr_t ps_sigcode; /* User pointer to the signal code */
  vaddr_t ps_sigcoderet; /* User pointer to sigreturn retPC */
  u_long ps_sigcookie;
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..bcd3acd034d 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,7 +163,7 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
-#if defined(_KERNEL) || defined(_STANDALONE)
+#if defined(_KERNEL) || defined(_STANDALONE) || defined (_LIBC)
 #include <sys/_time.h>
 
 /* Time expressed as seconds and fractions of a second + operations on it. */
@@ -171,6 +171,9 @@ struct bintime {
  time_t sec;
  uint64_t frac;
 };
+#endif
+
+#if defined(_KERNEL) || defined(_STANDALONE)
 
 #define bintimecmp(btp, ctp, cmp) \
  ((btp)->sec == (ctp)->sec ? \
diff --git sys/sys/timetc.h sys/sys/timetc.h
index ce81c3475a0..22658b00da2 100644
--- sys/sys/timetc.h
+++ sys/sys/timetc.h
@@ -24,7 +24,7 @@
 #ifndef _SYS_TIMETC_H_
 #define _SYS_TIMETC_H_
 
-#ifndef _KERNEL
+#if !defined(_KERNEL) && !defined(_LIBC)
 #error "no user-serviceable parts inside"
 #endif
 
@@ -80,6 +80,8 @@ struct timecounter {
  */
  void *tc_priv; /* [I] */
  /* Pointer to the timecounter's private parts. */
+ int tc_user; /* [I] */
+ /* Expose this timecounter to userland. */
  SLIST_ENTRY(timecounter) tc_next; /* [I] */
  /* Pointer to the next timecounter. */
  int64_t tc_freq_adj; /* [tw] */
@@ -88,11 +90,31 @@ struct timecounter {
  /* Precision of the counter.  Computed in tc_init(). */
 };
 
+struct timekeep {
+ uint32_t tk_major; /* version major number */
+ uint32_t tk_minor; /* version minor number */
+
+ /* timehands members */
+ uint64_t tk_scale;
+ u_int tk_offset_count;
+ struct bintime tk_offset;
+ struct bintime tk_naptime;
+ struct bintime tk_boottime;
+ volatile u_int tk_generation;
+
+ /* timecounter members */
+ int tk_user;
+ u_int tk_counter_mask;
+};
+
 struct rwlock;
 extern struct rwlock tc_lock;
 
 extern struct timecounter *timecounter;
 
+extern struct uvm_object *timekeep_object;
+extern struct timekeep *timekeep;
+
 u_int64_t tc_getfrequency(void);
 u_int64_t tc_getprecision(void);
 void tc_init(struct timecounter *tc);

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Paul Irofti <[hidden email]> wrote:

> > A few more notes below.
>
> I addressed all the comments. Here is the updated diff. This includes
> the rename to usertc that I suggested.

I want to see support for quite a few more architectures, especially
those which are very different, because changing format of the shared
page later will be very painful.

> The libc bump is there because it helps me switch more easily between
> versions.

That is bogus.  Minors are used for visible ABI additions, majors are
used for ABI deletions or API changes visible as ABI.  Please don't
argue for a vague extension of the rules again.


In essence this introduction requires no major or minor crank becuase
it just starts selecting a different backend which is newly supplied.
But as soon as the back-end is changed, the version number will barely
help, since code which can't match it has to revert to the non-optimized
path.

I don't believe you can shortcut this by supporting 1 architecture and
casting a prayer it's going to be fine.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Additional question:

What happens during suspend/resume, over over a hibernate.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Theo de Raadt-2
On Sat, May 30, 2020 at 09:59:41AM -0600, Theo de Raadt wrote:

> Paul Irofti <[hidden email]> wrote:
>
> > > A few more notes below.
> >
> > I addressed all the comments. Here is the updated diff. This includes
> > the rename to usertc that I suggested.
>
> I want to see support for quite a few more architectures, especially
> those which are very different, because changing format of the shared
> page later will be very painful.

Sure. Your call. This last diff will help that as discussed with
kettenis@. We now have usertc.c which should be the only place that
needs to be touched by each arch. Let's see. I am currently looking at
doing this on an octeon or a loongson. Kettenis said he will do arm64.

> > The libc bump is there because it helps me switch more easily between
> > versions.
>
> That is bogus.  Minors are used for visible ABI additions, majors are
> used for ABI deletions or API changes visible as ABI.  Please don't
> argue for a vague extension of the rules again.

I do not know what you are taking about here. I am not looking at any
extension of the rules, nor was I in the past. The whole issue of
bumping I leave it up to you and whoever understands these rules. Some
developers said this is not required, including kettenis@, and this is
why I justified the bump in my diff. That and it might also help others
quickly test the diff.

> In essence this introduction requires no major or minor crank becuase
> it just starts selecting a different backend which is newly supplied.
> But as soon as the back-end is changed, the version number will barely
> help, since code which can't match it has to revert to the non-optimized
> path.
>
> I don't believe you can shortcut this by supporting 1 architecture and
> casting a prayer it's going to be fine.

I am not trying to shortcut anything. I am in no rush for anything.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Paul Irofti <[hidden email]> wrote:

> > > The libc bump is there because it helps me switch more easily between
> > > versions.
> >
> > That is bogus.  Minors are used for visible ABI additions, majors are
> > used for ABI deletions or API changes visible as ABI.  Please don't
> > argue for a vague extension of the rules again.
>
> I do not know what you are taking about here. I am not looking at any
> extension of the rules, nor was I in the past. The whole issue of
> bumping I leave it up to you and whoever understands these rules. Some
> developers said this is not required, including kettenis@, and this is
> why I justified the bump in my diff. That and it might also help others
> quickly test the diff.

Repeatedly you were told this wasn't needed, but you kept shipping diffs
which do it.  And now there are developers who have a future-numbered libc
on their system, which doesn't do future things.

It is not justifiable.

It does NOT help people quickly test the diff, as such an approach
requires making assumptions which are more complicated then the diff.
This is not the purpose of major and minor numbers!

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On Sat, May 30, 2020 at 10:11:10AM -0600, Theo de Raadt wrote:

> Paul Irofti <[hidden email]> wrote:
>
> > > > The libc bump is there because it helps me switch more easily between
> > > > versions.
> > >
> > > That is bogus.  Minors are used for visible ABI additions, majors are
> > > used for ABI deletions or API changes visible as ABI.  Please don't
> > > argue for a vague extension of the rules again.
> >
> > I do not know what you are taking about here. I am not looking at any
> > extension of the rules, nor was I in the past. The whole issue of
> > bumping I leave it up to you and whoever understands these rules. Some
> > developers said this is not required, including kettenis@, and this is
> > why I justified the bump in my diff. That and it might also help others
> > quickly test the diff.
>
> Repeatedly you were told this wasn't needed, but you kept shipping diffs
> which do it.  And now there are developers who have a future-numbered libc
> on their system, which doesn't do future things.
>
> It is not justifiable.
>
> It does NOT help people quickly test the diff, as such an approach
> requires making assumptions which are more complicated then the diff.
> This is not the purpose of major and minor numbers!

Oh, I see. You are correct. My appologies for that. I did not fully
understand the consequences. I will send out a new diff w/o the bump.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Paul Irofti-4
Here is an updated diff with no libc bump.  Please use this one for
further testing.

diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..c80f5cf671a 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
  sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c usertc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
  fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 00000000000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+uint64_t
+tc_get_timecount_md(void)
+{
+ uint32_t hi, lo;
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..860ae2b8698 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -20,6 +20,7 @@
 
 #include <sys/types.h>
 #include <sys/syscall.h>
+#include <sys/timetc.h> /* timekeep */
 
 #ifndef PIC
 #include <sys/mman.h>
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definitions for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ if (tc_get_timecount)
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
index 1286a96fe40..32f86eda50f 100644
--- lib/libc/gen/auth_subr.c
+++ lib/libc/gen/auth_subr.c
@@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_EXPIRED;
@@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_change) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_PWEXPIRED;
diff --git lib/libc/gen/time.c lib/libc/gen/time.c
index 3bbd0d733d1..b3ce9a800f1 100644
--- lib/libc/gen/time.c
+++ lib/libc/gen/time.c
@@ -36,7 +36,7 @@ time(time_t *t)
 {
  struct timeval tt;
 
- if (gettimeofday(&tt, NULL) == -1)
+ if (WRAP(gettimeofday)(&tt, NULL) == -1)
  return (-1);
  if (t)
  *t = (time_t)tt.tv_sec;
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..df717021cab 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -24,7 +24,7 @@ PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
-PROTO_NORMAL(gettimeofday);
+PROTO_WRAP(gettimeofday);
 PROTO_NORMAL(setitimer);
 PROTO_NORMAL(settimeofday);
 PROTO_NORMAL(utimes);
diff --git lib/libc/hidden/sys/timetc.h lib/libc/hidden/sys/timetc.h
new file mode 100644
index 00000000000..bf9e8228f95
--- /dev/null
+++ lib/libc/hidden/sys/timetc.h
@@ -0,0 +1,39 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LIBC_SYS_TIMETC_H_
+#define _LIBC_SYS_TIMETC_H_
+
+#define _LIBC
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include_next <sys/timetc.h>
+
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+
+extern uint64_t (*const tc_get_timecount)(void);
+uint64_t tc_get_timecount_md(void);
+
+void _microtime(struct timeval *tvp, struct timekeep *tk);
+void _nanotime(struct timespec *tsp, struct timekeep *tk);
+void _nanoruntime(struct timespec *ts, struct timekeep *tk);
+void _nanouptime(struct timespec *tsp, struct timekeep *tk);
+__END_HIDDEN_DECLS
+
+#endif /* !_LIBC_SYS_TIMETC_H_ */
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..d8e1e0caf64 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -29,7 +29,7 @@ PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
index 402d98cede4..917a6d42b8a 100644
--- lib/libc/rpc/auth_unix.c
+++ lib/libc/rpc/auth_unix.c
@@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
  /*
  * fill in param struct from the given params
  */
- (void)gettimeofday(&now,  NULL);
+ (void)WRAP(gettimeofday)(&now,  NULL);
  aup.aup_time = now.tv_sec;
  aup.aup_machname = machname;
  aup.aup_uid = uid;
@@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
  goto done;
 
  /* update the time and serialize in place */
- (void)gettimeofday(&now, NULL);
+ (void)WRAP(gettimeofday)(&now, NULL);
  aup.aup_time = now.tv_sec;
  xdrs.x_op = XDR_ENCODE;
  XDR_SETPOS(&xdrs, 0);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d57418d81bf 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c w_gettimeofday.c microtime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
  getgroups.o getitimer.o getpeername.o getpgid.o \
  getpriority.o getresgid.o getresuid.o \
  getrlimit.o getrusage.o getsid.o getsockname.o \
- getsockopt.o gettimeofday.o ioctl.o \
+ getsockopt.o ioctl.o \
  kevent.o kill.o kqueue.o ktrace.o lchown.o \
  link.o linkat.o listen.o lstat.o madvise.o \
  minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
@@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
+ clock_gettime.o gettimeofday.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
new file mode 100644
index 00000000000..0451b860759
--- /dev/null
+++ lib/libc/sys/microtime.c
@@ -0,0 +1,157 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/timetc.h>
+
+#include <time.h>
+
+/*
+ * Return the difference between the timehands' counter value now and what
+ * was when we copied it to the timehands' offset_count.
+ */
+static inline u_int
+tc_delta(struct timekeep *tk)
+{
+ return ((tc_get_timecount() - tk->tk_offset_count) &
+    tk->tk_counter_mask);
+}
+
+static inline void
+bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
+{
+ ct->sec = bt->sec;
+ if (bt->frac > bt->frac + x)
+ ct->sec++;
+ ct->frac = bt->frac + x;
+}
+
+static inline void
+BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
+{
+ ts->tv_sec = bt->sec;
+ ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static inline void
+BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
+{
+ tv->tv_sec = bt->sec;
+ tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static void
+binuptime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ *bt = tk->tk_offset;
+ bintimeaddfrac(bt, tk->tk_scale * tc_delta(tk), bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+static inline void
+bintimeadd(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec + ct->sec;
+ if (bt->frac > bt->frac + ct->frac)
+ dt->sec++;
+ dt->frac = bt->frac + ct->frac;
+}
+
+static inline void
+bintimesub(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec - ct->sec;
+ if (bt->frac < bt->frac - ct->frac)
+ dt->sec--;
+ dt->frac = bt->frac - ct->frac;
+}
+
+static void
+binruntime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ bintimeaddfrac(&tk->tk_offset, tk->tk_scale * tc_delta(tk), bt);
+ bintimesub(bt, &tk->tk_naptime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+static void
+bintime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ *bt = tk->tk_offset;
+ bintimeaddfrac(bt, tk->tk_scale * tc_delta(tk), bt);
+ bintimeadd(bt, &tk->tk_boottime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+void
+_microtime(struct timeval *tvp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMEVAL(&bt, tvp);
+}
+
+void
+_nanotime(struct timespec *tsp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
+
+void
+_nanoruntime(struct timespec *ts, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ binruntime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, ts);
+}
+
+
+void
+_nanouptime(struct timespec *tsp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ binuptime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..ff72c9fb64d
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,46 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+#include <time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct timekeep *timekeep = _timekeep;
+
+ if (timekeep == NULL || timekeep->tk_user == 0)
+ return clock_gettime(clock_id, tp);
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ _nanotime(tp, timekeep);
+ break;
+ case CLOCK_UPTIME:
+ _nanoruntime(tp, timekeep);
+ break;
+ case CLOCK_MONOTONIC:
+ case CLOCK_BOOTTIME:
+ _nanouptime(tp, timekeep);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
new file mode 100644
index 00000000000..0b198190485
--- /dev/null
+++ lib/libc/sys/w_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Robert Nagy <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+int
+WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
+{
+ struct timekeep *timekeep = _timekeep;
+ static struct timezone zerotz = { 0, 0 };
+
+ if (timekeep == NULL || timekeep->tk_user == 0)
+ return gettimeofday(tp, tzp);
+
+ if (tp)
+ _microtime(tp, timekeep);
+
+ if (tzp)
+ tzp = &zerotz;
+
+ return 0;
+}
+DEF_WRAP(gettimeofday);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/arch/alpha/alpha/clock.c sys/arch/alpha/alpha/clock.c
index 3f5f2c5b42b..6eaf8b107c6 100644
--- sys/arch/alpha/alpha/clock.c
+++ sys/arch/alpha/alpha/clock.c
@@ -64,7 +64,7 @@ int clk_irq = 0;
 
 u_int rpcc_get_timecount(struct timecounter *);
 struct timecounter rpcc_timecounter = {
- rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL
+ rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index 7a1dcb4ad75..3db93d88dec 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -50,7 +50,7 @@ extern u_int32_t lapic_per_second;
 #endif
 
 struct timecounter tsc_timecounter = {
- tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL
+ tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL, 1
 };
 
 uint64_t
diff --git sys/arch/amd64/isa/clock.c sys/arch/amd64/isa/clock.c
index 613f7ee0e0f..00da0c6a8d0 100644
--- sys/arch/amd64/isa/clock.c
+++ sys/arch/amd64/isa/clock.c
@@ -116,7 +116,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 
 int clockintr(void *);
diff --git sys/arch/arm64/dev/agtimer.c sys/arch/arm64/dev/agtimer.c
index 29394141ad5..6b7c6db862f 100644
--- sys/arch/arm64/dev/agtimer.c
+++ sys/arch/arm64/dev/agtimer.c
@@ -43,7 +43,7 @@ int32_t agtimer_frequency = TIMER_FREQUENCY;
 u_int agtimer_get_timecount(struct timecounter *);
 
 static struct timecounter agtimer_timecounter = {
- agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL
+ agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL, 0
 };
 
 struct agtimer_pcpu_softc {
diff --git sys/arch/armv7/omap/gptimer.c sys/arch/armv7/omap/gptimer.c
index 7605845d5e2..061542d532f 100644
--- sys/arch/armv7/omap/gptimer.c
+++ sys/arch/armv7/omap/gptimer.c
@@ -117,7 +117,7 @@ int gptimer_irq = 0;
 u_int gptimer_get_timecount(struct timecounter *);
 
 static struct timecounter gptimer_timecounter = {
- gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL
+ gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL, 0
 };
 
 volatile u_int32_t nexttickevent;
diff --git sys/arch/armv7/sunxi/sxitimer.c sys/arch/armv7/sunxi/sxitimer.c
index 14a243c78d0..41028f9a602 100644
--- sys/arch/armv7/sunxi/sxitimer.c
+++ sys/arch/armv7/sunxi/sxitimer.c
@@ -89,7 +89,7 @@ void sxitimer_delay(u_int);
 u_int sxitimer_get_timecount(struct timecounter *);
 
 static struct timecounter sxitimer_timecounter = {
- sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL
+ sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL, 0
 };
 
 bus_space_tag_t sxitimer_iot;
diff --git sys/arch/hppa/dev/clock.c sys/arch/hppa/dev/clock.c
index 4c594ab5ec7..8cce6c3a893 100644
--- sys/arch/hppa/dev/clock.c
+++ sys/arch/hppa/dev/clock.c
@@ -47,7 +47,7 @@ int cpu_hardclock(void *);
 u_int itmr_get_timecount(struct timecounter *);
 
 struct timecounter itmr_timecounter = {
- itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL
+ itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/i386/isa/clock.c sys/arch/i386/isa/clock.c
index 09a6db983f2..dd74bd425ad 100644
--- sys/arch/i386/isa/clock.c
+++ sys/arch/i386/isa/clock.c
@@ -129,7 +129,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 struct mutex timer_mutex = MUTEX_INITIALIZER(IPL_HIGH);
 u_long rtclock_tval;
diff --git sys/arch/i386/pci/geodesc.c sys/arch/i386/pci/geodesc.c
index 9d9f061eef9..bb8e4c7f9ae 100644
--- sys/arch/i386/pci/geodesc.c
+++ sys/arch/i386/pci/geodesc.c
@@ -65,7 +65,9 @@ struct timecounter geodesc_timecounter = {
  0xffffffff, /* counter_mask */
  27000000, /* frequency */
  "GEOTSC", /* name */
- 2000 /* quality */
+ 2000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 int
diff --git sys/arch/i386/pci/gscpm.c sys/arch/i386/pci/gscpm.c
index 8b8aa4ac430..a6f324e66f3 100644
--- sys/arch/i386/pci/gscpm.c
+++ sys/arch/i386/pci/gscpm.c
@@ -55,7 +55,9 @@ struct timecounter gscpm_timecounter = {
  0xffffff, /* counter_mask */
  3579545, /* frequency */
  "GSCPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach gscpm_ca = {
diff --git sys/arch/i386/pci/ichpcib.c sys/arch/i386/pci/ichpcib.c
index 6abf1627de2..90814d2dba0 100644
--- sys/arch/i386/pci/ichpcib.c
+++ sys/arch/i386/pci/ichpcib.c
@@ -64,6 +64,8 @@ struct timecounter ichpcib_timecounter = {
  3579545, /* frequency */
  "ICHPM", /* name */
  1000 /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach ichpcib_ca = {
diff --git sys/arch/loongson/loongson/generic3a_machdep.c sys/arch/loongson/loongson/generic3a_machdep.c
index ac3f1db6ccd..61da18ebff7 100644
--- sys/arch/loongson/loongson/generic3a_machdep.c
+++ sys/arch/loongson/loongson/generic3a_machdep.c
@@ -99,6 +99,8 @@ struct timecounter rs780e_timecounter = {
  .tc_frequency = HPET_FREQ,
  .tc_name = "hpet",
  .tc_quality = 100
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /* Firmware entry points */
diff --git sys/arch/luna88k/luna88k/clock.c sys/arch/luna88k/luna88k/clock.c
index a04120987e0..6580a4a46bf 100644
--- sys/arch/luna88k/luna88k/clock.c
+++ sys/arch/luna88k/luna88k/clock.c
@@ -112,7 +112,9 @@ struct timecounter clock_tc = {
  .tc_counter_mask = 0xffffffff,
  .tc_frequency = 0, /* will be filled in */
  .tc_name = "clock",
- .tc_quality = 0
+ .tc_quality = 0,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /*
diff --git sys/arch/macppc/macppc/clock.c sys/arch/macppc/macppc/clock.c
index 4a44a92cfc0..8c3ad620be8 100644
--- sys/arch/macppc/macppc/clock.c
+++ sys/arch/macppc/macppc/clock.c
@@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
 static int32_t ticks_per_intr;
 
 static struct timecounter tb_timecounter = {
- tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL
+ tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
 };
 
 /* calibrate the timecounter frequency for the listed models */
diff --git sys/arch/mips64/mips64/mips64_machdep.c sys/arch/mips64/mips64/mips64_machdep.c
index d4a42ed5acc..5c4dbadb5bb 100644
--- sys/arch/mips64/mips64/mips64_machdep.c
+++ sys/arch/mips64/mips64/mips64_machdep.c
@@ -327,7 +327,9 @@ struct timecounter cp0_timecounter = {
  0xffffffff, /* counter_mask */
  0, /* frequency */
  "CP0", /* name */
- 0 /* quality */
+ 0, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 u_int
diff --git sys/arch/octeon/octeon/machdep.c sys/arch/octeon/octeon/machdep.c
index 604cb3be3ab..9d4d8564d5c 100644
--- sys/arch/octeon/octeon/machdep.c
+++ sys/arch/octeon/octeon/machdep.c
@@ -152,8 +152,9 @@ struct timecounter ioclock_timecounter = {
  .tc_name = "ioclock",
  .tc_quality = 0, /* ioclock can be overridden
  * by cp0 counter */
- .tc_priv = 0 /* clock register,
+ .tc_priv = 0, /* clock register,
  * determined at runtime */
+ .tc_user = 0, /* expose to user */
 };
 
 static int
diff --git sys/arch/sgi/sgi/ip27_machdep.c sys/arch/sgi/sgi/ip27_machdep.c
index ba7fa558b96..7b3fa05ddd5 100644
--- sys/arch/sgi/sgi/ip27_machdep.c
+++ sys/arch/sgi/sgi/ip27_machdep.c
@@ -111,7 +111,29 @@ struct timecounter ip27_hub_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncated to 32 bits. */
  .tc_frequency = 1250000,
  .tc_name = "hubrt",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = 0,
+ .tc_user = 0,
+};
+
+static int
+atoi(const char *s)
+{
+ int n, neg;
+
+ n = 0;
+ neg = 0;
+
+ while (*s == '-') {
+ s++;
+ neg = !neg;
+ }
+
+ while (*s != '\0') {
+ if (*s < '0' || *s > '9')
+ break;
+
+ n = (10 * n) + (*s - '0');
 };
 
 volatile uint64_t ip27_spinup_a0;
diff --git sys/arch/sgi/xbow/xheart.c sys/arch/sgi/xbow/xheart.c
index 56b29915c70..827775512ac 100644
--- sys/arch/sgi/xbow/xheart.c
+++ sys/arch/sgi/xbow/xheart.c
@@ -83,7 +83,9 @@ struct timecounter xheart_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncate 52-bit counter to 32-bit */
  .tc_frequency = 12500000,
  .tc_name = "heart",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 extern uint32_t ip30_lights_frob(uint32_t, struct trapframe *);
diff --git sys/arch/sparc64/dev/psycho.c sys/arch/sparc64/dev/psycho.c
index e24f804dff6..1a7a1afa8c2 100644
--- sys/arch/sparc64/dev/psycho.c
+++ sys/arch/sparc64/dev/psycho.c
@@ -127,7 +127,7 @@ extern struct sparc_pci_chipset _sparc_pci_chipset;
 u_int stick_get_timecount(struct timecounter *);
 
 struct timecounter stick_timecounter = {
- stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL
+ stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 0
 };
 
 /*
diff --git sys/arch/sparc64/sparc64/clock.c sys/arch/sparc64/sparc64/clock.c
index fd5e8a9c15b..5c2e47d386b 100644
--- sys/arch/sparc64/sparc64/clock.c
+++ sys/arch/sparc64/sparc64/clock.c
@@ -109,13 +109,13 @@ struct cfdriver clock_cd = {
 u_int tick_get_timecount(struct timecounter *);
 
 struct timecounter tick_timecounter = {
- tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL
+ tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
 };
 
 u_int sys_tick_get_timecount(struct timecounter *);
 
 struct timecounter sys_tick_timecounter = {
- sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL
+ sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
 };
 
 /*
diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
index d0ee72cec9b..13177a909da 100644
--- sys/dev/acpi/acpihpet.c
+++ sys/dev/acpi/acpihpet.c
@@ -45,7 +45,9 @@ static struct timecounter hpet_timecounter = {
  0xffffffff, /* counter_mask (32 bits) */
  0, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define HPET_TIMERS 3
diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
index cdc8c99a17a..89b5a397e47 100644
--- sys/dev/acpi/acpitimer.c
+++ sys/dev/acpi/acpitimer.c
@@ -36,7 +36,9 @@ static struct timecounter acpi_timecounter = {
  0x00ffffff, /* counter_mask (24 bits) */
  ACPI_FREQUENCY, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct acpitimer_softc {
diff --git sys/dev/pci/amdpm.c sys/dev/pci/amdpm.c
index 3abe03de722..017611e4b31 100644
--- sys/dev/pci/amdpm.c
+++ sys/dev/pci/amdpm.c
@@ -83,7 +83,9 @@ static struct timecounter amdpm_timecounter = {
  0xffffff, /* counter_mask */
  AMDPM_FREQUENCY, /* frequency */
  "AMDPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define AMDPM_CONFREG 0x40
diff --git sys/dev/pci/viapm.c sys/dev/pci/viapm.c
index db806eedf80..ce33cd175e6 100644
--- sys/dev/pci/viapm.c
+++ sys/dev/pci/viapm.c
@@ -177,7 +177,9 @@ static struct timecounter viapm_timecounter = {
  0xffffff, /* counter_mask */
  VIAPM_FREQUENCY, /* frequency */
  "VIAPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct timeout viapm_timeout;
diff --git sys/dev/pv/hyperv.c sys/dev/pv/hyperv.c
index 3ab2ae22831..8e326cf2502 100644
--- sys/dev/pv/hyperv.c
+++ sys/dev/pv/hyperv.c
@@ -143,7 +143,7 @@ struct {
 };
 
 struct timecounter hv_timecounter = {
- hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
+ hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001, NULL, 0
 };
 
 struct cfdriver hyperv_cd = {
diff --git sys/dev/pv/pvclock.c sys/dev/pv/pvclock.c
index 6b242f7448d..b80e4d2a484 100644
--- sys/dev/pv/pvclock.c
+++ sys/dev/pv/pvclock.c
@@ -74,7 +74,7 @@ struct cfdriver pvclock_cd = {
 };
 
 struct timecounter pvclock_timecounter = {
- pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
+ pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0
 };
 
 int
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..8869f1fb89f 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/timetc.h>
+
+struct uvm_object *timekeep_object;
+struct timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,41 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct timekeep *)va;
+ timekeep->tk_major = 0;
+ timekeep->tk_minor = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..bb0f6134403 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -64,7 +64,7 @@ dummy_get_timecount(struct timecounter *tc)
 }
 
 static struct timecounter dummy_timecounter = {
- dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
+ dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0
 };
 
 /*
@@ -480,6 +480,34 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_update_timekeep(void)
+{
+ static struct timecounter *last_tc = NULL;
+ struct timehands *th;
+
+ if (timekeep == NULL)
+ return;
+
+ th = timehands;
+ timekeep->tk_generation = 0;
+ membar_producer();
+ timekeep->tk_scale = th->th_scale;
+ timekeep->tk_offset_count = th->th_offset_count;
+ timekeep->tk_offset = th->th_offset;
+ timekeep->tk_naptime = th->th_naptime;
+ timekeep->tk_boottime = th->th_boottime;
+ if (last_tc != th->th_counter) {
+ timekeep->tk_counter_mask = th->th_counter->tc_counter_mask;
+ timekeep->tk_user = th->th_counter->tc_user;
+ last_tc = th->th_counter;
+ }
+ membar_producer();
+ timekeep->tk_generation = th->th_generation;
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +660,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_update_timekeep();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..3084ed595a6 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 4000, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..c6d54572bdd 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -242,6 +242,7 @@ struct process {
  char ps_comm[MAXCOMLEN+1];
 
  vaddr_t ps_strings; /* User pointers to argv/env */
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
  vaddr_t ps_sigcode; /* User pointer to the signal code */
  vaddr_t ps_sigcoderet; /* User pointer to sigreturn retPC */
  u_long ps_sigcookie;
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..bcd3acd034d 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,7 +163,7 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
-#if defined(_KERNEL) || defined(_STANDALONE)
+#if defined(_KERNEL) || defined(_STANDALONE) || defined (_LIBC)
 #include <sys/_time.h>
 
 /* Time expressed as seconds and fractions of a second + operations on it. */
@@ -171,6 +171,9 @@ struct bintime {
  time_t sec;
  uint64_t frac;
 };
+#endif
+
+#if defined(_KERNEL) || defined(_STANDALONE)
 
 #define bintimecmp(btp, ctp, cmp) \
  ((btp)->sec == (ctp)->sec ? \
diff --git sys/sys/timetc.h sys/sys/timetc.h
index ce81c3475a0..22658b00da2 100644
--- sys/sys/timetc.h
+++ sys/sys/timetc.h
@@ -24,7 +24,7 @@
 #ifndef _SYS_TIMETC_H_
 #define _SYS_TIMETC_H_
 
-#ifndef _KERNEL
+#if !defined(_KERNEL) && !defined(_LIBC)
 #error "no user-serviceable parts inside"
 #endif
 
@@ -80,6 +80,8 @@ struct timecounter {
  */
  void *tc_priv; /* [I] */
  /* Pointer to the timecounter's private parts. */
+ int tc_user; /* [I] */
+ /* Expose this timecounter to userland. */
  SLIST_ENTRY(timecounter) tc_next; /* [I] */
  /* Pointer to the next timecounter. */
  int64_t tc_freq_adj; /* [tw] */
@@ -88,11 +90,31 @@ struct timecounter {
  /* Precision of the counter.  Computed in tc_init(). */
 };
 
+struct timekeep {
+ uint32_t tk_major; /* version major number */
+ uint32_t tk_minor; /* version minor number */
+
+ /* timehands members */
+ uint64_t tk_scale;
+ u_int tk_offset_count;
+ struct bintime tk_offset;
+ struct bintime tk_naptime;
+ struct bintime tk_boottime;
+ volatile u_int tk_generation;
+
+ /* timecounter members */
+ int tk_user;
+ u_int tk_counter_mask;
+};
+
 struct rwlock;
 extern struct rwlock tc_lock;
 
 extern struct timecounter *timecounter;
 
+extern struct uvm_object *timekeep_object;
+extern struct timekeep *timekeep;
+
 u_int64_t tc_getfrequency(void);
 u_int64_t tc_getprecision(void);
 void tc_init(struct timecounter *tc);

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

George Koehler-2
On Sat, 30 May 2020 19:21:30 +0300
Paul Irofti <[hidden email]> wrote:

> Here is an updated diff with no libc bump.  Please use this one for
> further testing.

Your diff does amd64.
Here is a diff to add macppc.  Apply after your diff.

I have only tested clock_gettime(2) with CLOCK_REALTIME,
by doing loops in Ruby like, $ ruby27 -e '10000.times{p Time.now}'
The time increased steadily, and ktrace showed only a few system calls
to clock_gettime(2).

I copied ppc_mftb() from /sys/arch/powerpc/powerpc/cpu.h and renamed
it to tc_get_timecount_md(), because #include <machine/cpu.h> doesn't
provide ppc_mftb() if not _KERNEL.  It would be better to edit the
kernel headers to give ppc_mftb() if _LIBC, but I haven't done that.

I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
not sure, but one might move the list of arches to dlfcn/Makefile.inc
and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
might drop the tc_get_timecount function pointer and just always call
the function #ifdef TIMEKEEP.

PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
timebase and a "common page" from the kernel.  Their common page also
had executable code for gettimeofday, memcpy, pthread_self, and a few
other functions.  --George

Index: lib/libc/arch/powerpc/gen/Makefile.inc
===================================================================
RCS file: /cvs/src/lib/libc/arch/powerpc/gen/Makefile.inc,v
retrieving revision 1.14
diff -u -p -r1.14 Makefile.inc
--- lib/libc/arch/powerpc/gen/Makefile.inc 12 Apr 2012 16:14:09 -0000 1.14
+++ lib/libc/arch/powerpc/gen/Makefile.inc 31 May 2020 03:20:58 -0000
@@ -3,3 +3,4 @@ SRCS+= fabs.c
 SRCS+= fpgetmask.c fpsetmask.c
 SRCS+= fpgetround.c fpsetround.c
 SRCS+= fpgetsticky.c fpsetsticky.c
+SRCS+= usertc.c
--- /dev/null Sat May 30 23:21:20 2020
+++ lib/libc/arch/powerpc/gen/usertc.c Sat May 30 19:37:59 2020
@@ -0,0 +1,44 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (C) 1995, 1996 Wolfgang Solfrank.
+ * Copyright (C) 1995, 1996 TooLs GmbH.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+uint64_t
+tc_get_timecount_md(void)
+{
+ u_long scratch;
+ u_int64_t tb;
+
+ __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;"
+    " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch));
+ return tb;
+}
--- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
+++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
@@ -70,7 +70,7 @@
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
-#if defined(__amd64)
+#if defined(__amd64__) || defined(__powerpc__)
 uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
 #else
 uint64_t (*const tc_get_timecount)(void) = NULL;
--- sys/arch/macppc/macppc/clock.c.before Sat May 30 23:28:00 2020
+++ sys/arch/macppc/macppc/clock.c Sat May 30 20:35:47 2020
@@ -57,7 +57,7 @@
 static int32_t ticks_per_intr;
 
 static struct timecounter tb_timecounter = {
- tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
+ tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 1
 };
 
 /* calibrate the timecounter frequency for the listed models */

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
> PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
> timebase and a "common page" from the kernel.  Their common page also
> had executable code for gettimeofday, memcpy, pthread_self, and a few
> other functions.

We are desperately avoiding the model where such code is exported.
It becomes a target.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-05-31 07:28, Theo de Raadt wrote:
>> PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
>> timebase and a "common page" from the kernel.  Their common page also
>> had executable code for gettimeofday, memcpy, pthread_self, and a few
>> other functions.
>
> We are desperately avoiding the model where such code is exported.
> It becomes a target.

Indeed.

Are we settled on timekeep as a name? Do you want to rename it to
something else? Make it more generic?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by George Koehler-2
On Sun, May 31, 2020 at 12:25:00AM -0400, George Koehler wrote:
> On Sat, 30 May 2020 19:21:30 +0300
> Paul Irofti <[hidden email]> wrote:
>
> > Here is an updated diff with no libc bump.  Please use this one for
> > further testing.
>
> Your diff does amd64.
> Here is a diff to add macppc.  Apply after your diff.

Cool! Thanks for doing this!

> I have only tested clock_gettime(2) with CLOCK_REALTIME,
> by doing loops in Ruby like, $ ruby27 -e '10000.times{p Time.now}'
> The time increased steadily, and ktrace showed only a few system calls
> to clock_gettime(2).

I am attaching a diff that includes minimal regression tests for this.
You can also try testing with real programs such as Firefox.

> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> might drop the tc_get_timecount function pointer and just always call
> the function #ifdef TIMEKEEP.

That could work. First we have to decide on a name. Or maybe we already
have, I don't know.

> PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
> timebase and a "common page" from the kernel.  Their common page also
> had executable code for gettimeofday, memcpy, pthread_self, and a few
> other functions.  --George

That's a no-no for security reasons. The diff looks good. Please try it
with more tests and real programs and report back.


diff --git regress/lib/libc/timekeep/Makefile regress/lib/libc/timekeep/Makefile
new file mode 100644
index 00000000000..a7f3080290d
--- /dev/null
+++ regress/lib/libc/timekeep/Makefile
@@ -0,0 +1,5 @@
+# $OpenBSD$
+
+PROGS= test_clock_gettime test_time_skew test_gettimeofday
+
+.include <bsd.regress.mk>
diff --git regress/lib/libc/timekeep/test_clock_gettime.c regress/lib/libc/timekeep/test_clock_gettime.c
new file mode 100644
index 00000000000..859ec368215
--- /dev/null
+++ regress/lib/libc/timekeep/test_clock_gettime.c
@@ -0,0 +1,43 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <assert.h>
+#include <time.h>
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+
+void
+check()
+{
+ struct timespec tp = {0};
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_BOOTTIME, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_UPTIME, &tp));
+
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp));
+
+}
+
+int main()
+{
+ check();
+ return 0;
+}
diff --git regress/lib/libc/timekeep/test_gettimeofday.c regress/lib/libc/timekeep/test_gettimeofday.c
new file mode 100644
index 00000000000..ea90a1be7e0
--- /dev/null
+++ regress/lib/libc/timekeep/test_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <assert.h>
+#include <sys/time.h>
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+
+void
+check()
+{
+ struct timeval tv = {0};
+ struct timezone tzp;
+
+ ASSERT_EQ(0, gettimeofday(&tv, NULL));
+ ASSERT_EQ(0, gettimeofday(&tv, &tzp));
+}
+
+int main()
+{
+ check();
+ return 0;
+}
diff --git regress/lib/libc/timekeep/test_time_skew.c regress/lib/libc/timekeep/test_time_skew.c
new file mode 100644
index 00000000000..dfa9481c091
--- /dev/null
+++ regress/lib/libc/timekeep/test_time_skew.c
@@ -0,0 +1,55 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/time.h>
+
+#include <assert.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+#define ASSERT_NE(a, b) assert((a) != (b))
+
+void
+check()
+{
+         struct timespec tp1, tp2, tout;
+
+         tout.tv_sec = 0;
+         tout.tv_nsec = 100000;
+
+         ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &tp1));
+
+         nanosleep(&tout, NULL);
+
+         ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &tp2));
+
+         /* tp1 should never be larger than tp2 */
+         ASSERT_NE(1, timespeccmp(&tp1, &tp2, >));
+}
+
+int
+main(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; i++)
+ check();
+
+ return 0;
+}

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
In reply to this post by George Koehler-2
> Date: Sun, 31 May 2020 00:25:00 -0400
> From: George Koehler <[hidden email]>
>
> On Sat, 30 May 2020 19:21:30 +0300
> Paul Irofti <[hidden email]> wrote:
>
> > Here is an updated diff with no libc bump.  Please use this one for
> > further testing.
>
> Your diff does amd64.
> Here is a diff to add macppc.  Apply after your diff.
>
> I have only tested clock_gettime(2) with CLOCK_REALTIME,
> by doing loops in Ruby like, $ ruby27 -e '10000.times{p Time.now}'
> The time increased steadily, and ktrace showed only a few system calls
> to clock_gettime(2).
>
> I copied ppc_mftb() from /sys/arch/powerpc/powerpc/cpu.h and renamed
> it to tc_get_timecount_md(), because #include <machine/cpu.h> doesn't
> provide ppc_mftb() if not _KERNEL.  It would be better to edit the
> kernel headers to give ppc_mftb() if _LIBC, but I haven't done that.

I think copying the code is fine.  It's only two lines of inline
assembly and I don't think it will ever change (despite what we just
went through with the clang switch) ;).

> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> might drop the tc_get_timecount function pointer and just always call
> the function #ifdef TIMEKEEP.

Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
catching that.  The benefit of the TIMEKEEP define would be that we
can eliminate the fallback code completely on architectures that don't
implement this functionality.

> PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
> timebase and a "common page" from the kernel.  Their common page also
> had executable code for gettimeofday, memcpy, pthread_self, and a few
> other functions.  --George
>
> Index: lib/libc/arch/powerpc/gen/Makefile.inc
> ===================================================================
> RCS file: /cvs/src/lib/libc/arch/powerpc/gen/Makefile.inc,v
> retrieving revision 1.14
> diff -u -p -r1.14 Makefile.inc
> --- lib/libc/arch/powerpc/gen/Makefile.inc 12 Apr 2012 16:14:09 -0000 1.14
> +++ lib/libc/arch/powerpc/gen/Makefile.inc 31 May 2020 03:20:58 -0000
> @@ -3,3 +3,4 @@ SRCS+= fabs.c
>  SRCS+= fpgetmask.c fpsetmask.c
>  SRCS+= fpgetround.c fpsetround.c
>  SRCS+= fpgetsticky.c fpsetsticky.c
> +SRCS+= usertc.c
> --- /dev/null Sat May 30 23:21:20 2020
> +++ lib/libc/arch/powerpc/gen/usertc.c Sat May 30 19:37:59 2020
> @@ -0,0 +1,44 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (C) 1995, 1996 Wolfgang Solfrank.
> + * Copyright (C) 1995, 1996 TooLs GmbH.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. All advertising materials mentioning features or use of this software
> + *    must display the following acknowledgement:
> + * This product includes software developed by TooLs GmbH.
> + * 4. The name of TooLs GmbH may not be used to endorse or promote products
> + *    derived from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
> + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
> + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
> + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
> + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
> + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <sys/types.h>
> +
> +uint64_t
> +tc_get_timecount_md(void)
> +{
> + u_long scratch;
> + u_int64_t tb;
> +
> + __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;"
> +    " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch));
> + return tb;
> +}
> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> @@ -70,7 +70,7 @@
>  
>  /* provide definitions for these */
>  const dl_cb *_dl_cb __relro = NULL;
> -#if defined(__amd64)
> +#if defined(__amd64__) || defined(__powerpc__)
>  uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>  #else
>  uint64_t (*const tc_get_timecount)(void) = NULL;
> --- sys/arch/macppc/macppc/clock.c.before Sat May 30 23:28:00 2020
> +++ sys/arch/macppc/macppc/clock.c Sat May 30 20:35:47 2020
> @@ -57,7 +57,7 @@
>  static int32_t ticks_per_intr;
>  
>  static struct timecounter tb_timecounter = {
> - tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
> + tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 1
>  };
>  
>  /* calibrate the timecounter frequency for the listed models */
>

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
In reply to this post by George Koehler-2
> Date: Sun, 31 May 2020 00:25:00 -0400
> From: George Koehler <[hidden email]>
>
> On Sat, 30 May 2020 19:21:30 +0300
> Paul Irofti <[hidden email]> wrote:
>
> > Here is an updated diff with no libc bump.  Please use this one for
> > further testing.
>
> Your diff does amd64.
> Here is a diff to add macppc.  Apply after your diff.
>
> I have only tested clock_gettime(2) with CLOCK_REALTIME,
> by doing loops in Ruby like, $ ruby27 -e '10000.times{p Time.now}'
> The time increased steadily, and ktrace showed only a few system calls
> to clock_gettime(2).
>
> I copied ppc_mftb() from /sys/arch/powerpc/powerpc/cpu.h and renamed
> it to tc_get_timecount_md(), because #include <machine/cpu.h> doesn't
> provide ppc_mftb() if not _KERNEL.  It would be better to edit the
> kernel headers to give ppc_mftb() if _LIBC, but I haven't done that.
>
> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> might drop the tc_get_timecount function pointer and just always call
> the function #ifdef TIMEKEEP.
>
> PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's
> timebase and a "common page" from the kernel.  Their common page also
> had executable code for gettimeofday, memcpy, pthread_self, and a few
> other functions.  --George

Oh, and on the diff itself:

> Index: lib/libc/arch/powerpc/gen/Makefile.inc
> ===================================================================
> RCS file: /cvs/src/lib/libc/arch/powerpc/gen/Makefile.inc,v
> retrieving revision 1.14
> diff -u -p -r1.14 Makefile.inc
> --- lib/libc/arch/powerpc/gen/Makefile.inc 12 Apr 2012 16:14:09 -0000 1.14
> +++ lib/libc/arch/powerpc/gen/Makefile.inc 31 May 2020 03:20:58 -0000
> @@ -3,3 +3,4 @@ SRCS+= fabs.c
>  SRCS+= fpgetmask.c fpsetmask.c
>  SRCS+= fpgetround.c fpsetround.c
>  SRCS+= fpgetsticky.c fpsetsticky.c
> +SRCS+= usertc.c
> --- /dev/null Sat May 30 23:21:20 2020
> +++ lib/libc/arch/powerpc/gen/usertc.c Sat May 30 19:37:59 2020
> @@ -0,0 +1,44 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (C) 1995, 1996 Wolfgang Solfrank.
> + * Copyright (C) 1995, 1996 TooLs GmbH.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + * 3. All advertising materials mentioning features or use of this software
> + *    must display the following acknowledgement:
> + * This product includes software developed by TooLs GmbH.
> + * 4. The name of TooLs GmbH may not be used to endorse or promote products
> + *    derived from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
> + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
> + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
> + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
> + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
> + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <sys/types.h>
> +
> +uint64_t
> +tc_get_timecount_md(void)
> +{
> + u_long scratch;
> + u_int64_t tb;
> +
> + __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;"
> +    " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch));
> + return tb;
> +}

Would be good not to mux uint64_t and u_int64_t in the same bit of
code.  I'd use uint64_t and replace u_long with uint32_t.


> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> @@ -70,7 +70,7 @@
>  
>  /* provide definitions for these */
>  const dl_cb *_dl_cb __relro = NULL;
> -#if defined(__amd64)
> +#if defined(__amd64__) || defined(__powerpc__)
>  uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>  #else
>  uint64_t (*const tc_get_timecount)(void) = NULL;
> --- sys/arch/macppc/macppc/clock.c.before Sat May 30 23:28:00 2020
> +++ sys/arch/macppc/macppc/clock.c Sat May 30 20:35:47 2020
> @@ -57,7 +57,7 @@
>  static int32_t ticks_per_intr;
>  
>  static struct timecounter tb_timecounter = {
> - tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
> + tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 1
>  };
>  
>  /* calibrate the timecounter frequency for the listed models */
>

123456 ... 11