userland clock_gettime proof of concept

classic Classic list List threaded Threaded
203 messages Options
12345 ... 11
Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
> Discussions.
>
>   - /sbin/init init_main.c!start_init() map page? (deraadt@)
>     -> that is not the problem, the page should be mapped even there
>        by the sys_execve() call

Robert found the proper solution to this: move the find_timekeep bits in
_libc_preinit!

This helps with a lot of things:

  - removes the need for the find_timekeep() function
  - removes the nasty ELF exports
  - shrinks the diff

Good job, Robert!

What's left is the TSC discussion and the bikeshedding bits.

Paul


diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..70b70eb3ea0 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -30,6 +30,7 @@
 #include <link.h>
 #include <stdlib.h> /* atexit */
 #include <string.h>
+#include <time.h> /* timekeep */
 #include <unistd.h>
 
 #include "init.h"
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definition for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -105,6 +107,9 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..1137dbcd44f 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -24,12 +24,16 @@
 extern PROTO_NORMAL(tzname);
 #endif
 
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+__END_HIDDEN_DECLS
+
 PROTO_NORMAL(asctime);
 PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d0b5dd1bdcd 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} clock_gettime.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..7c2883c31fd
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,64 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+#include <err.h>
+
+#include <sys/time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct __timekeep *timekeep;
+ unsigned int seq;
+
+ if (_timekeep == NULL)
+ return clock_gettime(clock_id, tp);
+ timekeep = _timekeep;
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ do {
+ seq = timekeep->seq;
+ *tp = timekeep->tp_realtime;
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ case CLOCK_UPTIME:
+ do {
+ seq = timekeep->seq;
+ *tp = timekeep->tp_uptime;
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ case CLOCK_MONOTONIC:
+ do {
+ seq = timekeep->seq;
+ *tp = timekeep->tp_monotonic;
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ case CLOCK_BOOTTIME:
+ do {
+ seq = timekeep->seq;
+ *tp = timekeep->tp_boottime;
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..15bf4db6fbd 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/time.h>
+
+struct uvm_object *timekeep_object;
+struct __timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct __timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct __timekeep *)va;
+ timekeep->major = 0;
+ timekeep->minor = 0;
+
+ timekeep->seq = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..9c67cb738de 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -35,6 +35,7 @@
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <dev/rndvar.h>
+#include <sys/time.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
@@ -480,6 +481,29 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_clock_gettime(void)
+{
+ if (timekeep == NULL)
+ return;
+
+ atomic_inc_int(&timekeep->seq);
+
+ /* CLOCK_REALTIME */
+ nanotime(&timekeep->tp_realtime);
+
+ /* CLOCK_UPTIME */
+ nanoruntime(&timekeep->tp_uptime);
+
+ /* CLOCK_MONOTONIC */
+ nanouptime(&timekeep->tp_monotonic);
+
+ /* CLOCK_BOOTTIME */
+ timekeep->tp_boottime = timekeep->tp_monotonic;
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +656,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_clock_gettime();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..f55b75f1e84 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 2004, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..93a79a220db 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -248,6 +248,8 @@ struct process {
  u_int ps_rtableid; /* Process routing table/domain. */
  char ps_nice; /* Process "nice" value. */
 
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
+
  struct uprof { /* profile arguments */
  caddr_t pr_base; /* buffer base */
  size_t  pr_size; /* buffer size */
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..be762be15e4 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,6 +163,17 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
+struct __timekeep {
+ uint8_t major; /* version major number */
+ uint8_t minor; /* version minor number */
+
+ volatile unsigned int seq; /* synchronization */
+ struct timespec tp_realtime; /* CLOCK_REALTIME */
+ struct timespec tp_uptime; /* CLOCK_UPTIME */
+ struct timespec tp_monotonic; /* CLOCK_MONOTONIC */
+ struct timespec tp_boottime; /* CLOCK_BOOTTIME */
+};
+
 #if defined(_KERNEL) || defined(_STANDALONE)
 #include <sys/_time.h>
 
@@ -396,6 +407,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
  return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
 }
 
+extern struct uvm_object *timekeep_object;
+extern struct __timekeep *timekeep;
 #else /* !_KERNEL */
 #include <time.h>
 

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Paul Irofti-4
Hi,

Here is a new iteration of the diff which includes support for MD high
resolution clocks. Currently only implements TSC on amd64. If the
MD function is not defined, it fallsback to the syscall.

There is the question of the skew fix, but that will be addressed in a
separate kernel diff that will not affect the current diff at all.

I could not find a way to find on which processor the process is running
on from userland without going through a syscall. If there is one please
let me know. It would make things easier.

In the meantime I have also gotten positive feedback from various
testers that run this on their main machine.

Anyway, I think we can decide on the struct name and the auxiliary
vector ID and consider this done.

Thoughts?

Paul

diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..caa4452a3d9 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
  sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
  fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
new file mode 100644
index 00000000000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/rdtsc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+uint64_t
+tc_get_timecount_md(void)
+{
+ uint32_t hi, lo;
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..c5921851203 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -30,6 +30,7 @@
 #include <link.h>
 #include <stdlib.h> /* atexit */
 #include <string.h>
+#include <time.h> /* timekeep */
 #include <unistd.h>
 
 #include "init.h"
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definition for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ if (tc_get_timecount)
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
index 1286a96fe40..32f86eda50f 100644
--- lib/libc/gen/auth_subr.c
+++ lib/libc/gen/auth_subr.c
@@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_EXPIRED;
@@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_change) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_PWEXPIRED;
diff --git lib/libc/gen/time.c lib/libc/gen/time.c
index 3bbd0d733d1..b3ce9a800f1 100644
--- lib/libc/gen/time.c
+++ lib/libc/gen/time.c
@@ -36,7 +36,7 @@ time(time_t *t)
 {
  struct timeval tt;
 
- if (gettimeofday(&tt, NULL) == -1)
+ if (WRAP(gettimeofday)(&tt, NULL) == -1)
  return (-1);
  if (t)
  *t = (time_t)tt.tv_sec;
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..a5b20eec27a 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -20,11 +20,18 @@
 
 #include_next <sys/time.h>
 
+__BEGIN_HIDDEN_DECLS
+void _microtime(struct timeval *tvp, struct __timekeep *tk);
+void _nanotime(struct timespec *tsp, struct __timekeep *tk);
+void _nanoruntime(struct timespec *ts, struct __timekeep *tk);
+void _nanouptime(struct timespec *tsp, struct __timekeep *tk);
+__END_HIDDEN_DECLS
+
 PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
-PROTO_NORMAL(gettimeofday);
+PROTO_WRAP(gettimeofday);
 PROTO_NORMAL(setitimer);
 PROTO_NORMAL(settimeofday);
 PROTO_NORMAL(utimes);
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..44bd8e7c6e3 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -18,18 +18,26 @@
 #ifndef _LIBC_TIME_H_
 #define _LIBC_TIME_H_
 
+#include <sys/types.h>
+
 #include_next <time.h>
 
 #if 0
 extern PROTO_NORMAL(tzname);
 #endif
 
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+extern uint64_t (*const tc_get_timecount)(void);
+uint64_t tc_get_timecount_md(void);
+__END_HIDDEN_DECLS
+
 PROTO_NORMAL(asctime);
 PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
index 402d98cede4..917a6d42b8a 100644
--- lib/libc/rpc/auth_unix.c
+++ lib/libc/rpc/auth_unix.c
@@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
  /*
  * fill in param struct from the given params
  */
- (void)gettimeofday(&now,  NULL);
+ (void)WRAP(gettimeofday)(&now,  NULL);
  aup.aup_time = now.tv_sec;
  aup.aup_machname = machname;
  aup.aup_uid = uid;
@@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
  goto done;
 
  /* update the time and serialize in place */
- (void)gettimeofday(&now, NULL);
+ (void)WRAP(gettimeofday)(&now, NULL);
  aup.aup_time = now.tv_sec;
  xdrs.x_op = XDR_ENCODE;
  XDR_SETPOS(&xdrs, 0);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d57418d81bf 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c w_gettimeofday.c microtime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
  getgroups.o getitimer.o getpeername.o getpgid.o \
  getpriority.o getresgid.o getresuid.o \
  getrlimit.o getrusage.o getsid.o getsockname.o \
- getsockopt.o gettimeofday.o ioctl.o \
+ getsockopt.o ioctl.o \
  kevent.o kill.o kqueue.o ktrace.o lchown.o \
  link.o linkat.o listen.o lstat.o madvise.o \
  minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
@@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
+ clock_gettime.o gettimeofday.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
new file mode 100644
index 00000000000..6b7b65762e7
--- /dev/null
+++ lib/libc/sys/microtime.c
@@ -0,0 +1,157 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/time.h>
+
+#include <time.h>
+
+/*
+ * Return the difference between the timehands' counter value now and what
+ * was when we copied it to the timehands' offset_count.
+ */
+static __inline u_int
+tc_delta(struct __timekeep *tk)
+{
+ return ((tc_get_timecount() - tk->th_offset_count) &
+    tk->tc_counter_mask);
+}
+
+static inline void
+bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
+{
+ ct->sec = bt->sec;
+ if (bt->frac > bt->frac + x)
+ ct->sec++;
+ ct->frac = bt->frac + x;
+}
+
+static inline void
+BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
+{
+ ts->tv_sec = bt->sec;
+ ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static inline void
+BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
+{
+ tv->tv_sec = bt->sec;
+ tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static void
+binuptime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ *bt = tk->th_offset;
+ bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+static inline void
+bintimeadd(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec + ct->sec;
+ if (bt->frac > bt->frac + ct->frac)
+ dt->sec++;
+ dt->frac = bt->frac + ct->frac;
+}
+
+static inline void
+bintimesub(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec - ct->sec;
+ if (bt->frac < bt->frac - ct->frac)
+ dt->sec--;
+ dt->frac = bt->frac - ct->frac;
+}
+
+static void
+binruntime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ bintimeaddfrac(&tk->th_offset, tk->th_scale * tc_delta(tk), bt);
+ bintimesub(bt, &tk->th_naptime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+static void
+bintime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ *bt = tk->th_offset;
+ bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
+ bintimeadd(bt, &tk->th_boottime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+void
+_microtime(struct timeval *tvp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMEVAL(&bt, tvp);
+}
+
+void
+_nanotime(struct timespec *tsp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
+
+void
+_nanoruntime(struct timespec *ts, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ binruntime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, ts);
+}
+
+
+void
+_nanouptime(struct timespec *tsp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ binuptime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..27c504fc285
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,59 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+#include <err.h>
+
+#include <sys/time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct __timekeep *timekeep;
+ unsigned int seq;
+
+ if (_timekeep == NULL)
+ return clock_gettime(clock_id, tp);
+ timekeep = _timekeep;
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ do {
+ seq = timekeep->seq;
+ _nanotime(tp, timekeep);
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ case CLOCK_UPTIME:
+ do {
+ seq = timekeep->seq;
+ _nanoruntime(tp, timekeep);
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ case CLOCK_MONOTONIC:
+ case CLOCK_BOOTTIME:
+ do {
+ seq = timekeep->seq;
+ _nanouptime(tp, timekeep);
+ } while (seq == 0 || seq != timekeep->seq);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
new file mode 100644
index 00000000000..3cf77e96e37
--- /dev/null
+++ lib/libc/sys/w_gettimeofday.c
@@ -0,0 +1,42 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Robert Nagy <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/time.h>
+
+int
+WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
+{
+ struct __timekeep *timekeep = _timekeep;
+ static struct timezone zerotz = { 0, 0 };
+ unsigned int seq;
+
+ if (timekeep == NULL)
+ return gettimeofday(tp, tzp);
+
+ if (tp) {
+ do {
+ seq = timekeep->seq;
+ _microtime(tp, timekeep);
+ } while (seq == 0 || seq != timekeep->seq);
+ }
+
+ if (tzp)
+ tzp = &zerotz;
+
+ return 0;
+}
+DEF_WRAP(gettimeofday);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..15bf4db6fbd 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/time.h>
+
+struct uvm_object *timekeep_object;
+struct __timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct __timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct __timekeep *)va;
+ timekeep->major = 0;
+ timekeep->minor = 0;
+
+ timekeep->seq = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..1a6db0102c3 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -35,6 +35,7 @@
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <dev/rndvar.h>
+#include <sys/time.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
@@ -480,6 +481,34 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_update_timekeep(void)
+{
+ struct timehands *th;
+ u_int gen;
+
+ if (timekeep == NULL)
+ return;
+
+ atomic_inc_int(&timekeep->seq);
+
+ do {
+ th = timehands;
+ gen = th->th_generation;
+ membar_consumer();
+ timekeep->th_scale = th->th_scale;
+ timekeep->th_offset_count = th->th_offset_count;
+ timekeep->th_offset = th->th_offset;
+ timekeep->th_naptime = th->th_naptime;
+ timekeep->th_boottime = th->th_boottime;
+ timekeep->th_generation = th->th_generation;
+ timekeep->tc_counter_mask = th->th_counter->tc_counter_mask;
+ membar_consumer();
+ } while (gen == 0 || gen != th->th_generation);
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +661,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_update_timekeep();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..f55b75f1e84 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 2004, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..93a79a220db 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -248,6 +248,8 @@ struct process {
  u_int ps_rtableid; /* Process routing table/domain. */
  char ps_nice; /* Process "nice" value. */
 
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
+
  struct uprof { /* profile arguments */
  caddr_t pr_base; /* buffer base */
  size_t  pr_size; /* buffer size */
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..0b48f65f3f1 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,15 +163,30 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
-#if defined(_KERNEL) || defined(_STANDALONE)
-#include <sys/_time.h>
-
 /* Time expressed as seconds and fractions of a second + operations on it. */
 struct bintime {
  time_t sec;
  uint64_t frac;
 };
 
+struct __timekeep {
+ uint8_t major; /* version major number */
+ uint8_t minor; /* version minor number */
+
+ volatile unsigned int seq; /* synchronization */
+
+ u_int64_t th_scale; /* [w] */
+ u_int th_offset_count; /* [w] */
+ struct bintime th_offset; /* [w] */
+ struct bintime th_naptime; /* [w] */
+ struct bintime th_boottime; /* [tw] */
+ volatile u_int th_generation; /* [w] */
+ u_int tc_counter_mask; /* [I] */
+};
+
+#if defined(_KERNEL) || defined(_STANDALONE)
+#include <sys/_time.h>
+
 #define bintimecmp(btp, ctp, cmp) \
  ((btp)->sec == (ctp)->sec ? \
     (btp)->frac cmp (ctp)->frac : \
@@ -396,6 +411,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
  return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
 }
 
+extern struct uvm_object *timekeep_object;
+extern struct __timekeep *timekeep;
 #else /* !_KERNEL */
 #include <time.h>
 

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Stuart Henderson
I'm running it here.

On 2020/05/28 17:44, Paul Irofti wrote:

> diff --git lib/libc/shlib_version lib/libc/shlib_version
> index 06f98b01084..5fb0770494f 100644
> --- lib/libc/shlib_version
> +++ lib/libc/shlib_version
> @@ -1,4 +1,4 @@
>  major=96
> -minor=0
> +minor=1
>  # note: If changes were made to include/thread_private.h or if system calls
>  # were added/changed then librthread/shlib_version must also be updated.

Is the bump actually needed? Symbols.list is untouched so there's no change to
the exported symbols.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
> Is the bump actually needed? Symbols.list is untouched so there's no change to
> the exported symbols.

I am not sure if WRAP does not require it. Probably not. Otherwise as
the diff stands now (always falling back to the syscall if timekeep is
missing), I tend to agree with your statement :)

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
In reply to this post by Paul Irofti-4
> Date: Thu, 28 May 2020 17:44:31 +0300
> From: Paul Irofti <[hidden email]>
>
> Hi,
>
> Here is a new iteration of the diff which includes support for MD high
> resolution clocks. Currently only implements TSC on amd64. If the
> MD function is not defined, it fallsback to the syscall.
>
> There is the question of the skew fix, but that will be addressed in a
> separate kernel diff that will not affect the current diff at all.
>
> I could not find a way to find on which processor the process is running
> on from userland without going through a syscall. If there is one please
> let me know. It would make things easier.
>
> In the meantime I have also gotten positive feedback from various
> testers that run this on their main machine.
>
> Anyway, I think we can decide on the struct name and the auxiliary
> vector ID and consider this done.
>
> Thoughts?

This is getting us somewhere.

Still some issues though (besides the skew thing you already mention).

1. The synchronization mechanism is broken.  The seq member needs to
   be set to 0 while updating the struct and only set to the "next"
   value after completing the update of the full struct.  You need to
   be careful to avoid 0, otherwise the application will spin for a
   full timeslice while seq overflows into 0.

   However, since you now export the timehands generation, I'd really
   drop seq and use the timehands generation for synchronization.  It
   makes no sense to have both.

2. Since tc_update_timekeep() is called from tc_windup() it doesn't
   need to do the synchronization dance.

3. Like tc_windup, tc_update_timekeep() needs to have some
    membar_procer() calls in it instead of membar_consumer() calls.

4. There is no need to update th_counter_mask on every update.

5. What if the TSC is not available as a usable timecounter?  In that
   case libc should fall back on the system call.  But we need a way
   to communicate what the timecounter is and detect when we switch
   timecounters.  Maybe adding a timecounter ID to the page will help
   here.  But then MD code in libc will have to check the ID and
   dispatch to the right timecounter read function.

6. The major and minor fields probably should bbe uint32_t or maybe
    uint16_t.  You're not saving any space by making them uint8_t.

>
> Paul
>
> diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
> index e995309ed71..caa4452a3d9 100644
> --- lib/libc/arch/amd64/gen/Makefile.inc
> +++ lib/libc/arch/amd64/gen/Makefile.inc
> @@ -2,6 +2,6 @@
>  
>  SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
>   sigsetjmp.S
> -SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
> +SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
>  SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
>   fpsetround.S fpsetsticky.S
> diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
> new file mode 100644
> index 00000000000..b14c862c61a
> --- /dev/null
> +++ lib/libc/arch/amd64/gen/rdtsc.c
> @@ -0,0 +1,26 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +
> +uint64_t
> +tc_get_timecount_md(void)
> +{
> + uint32_t hi, lo;
> + asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
> + return ((uint64_t)lo)|(((uint64_t)hi)<<32);
> +}
> diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
> index cd056c85719..2b25d49f32a 100644
> --- lib/libc/asr/asr.c
> +++ lib/libc/asr/asr.c
> @@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
>   struct timespec pollstart, pollend, elapsed;
>   int r;
>  
> - if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
>   return -1;
>  
>   while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
> - if (clock_gettime(CLOCK_MONOTONIC, &pollend))
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
>   return -1;
>   timespecsub(&pollend, &pollstart, &elapsed);
>   timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
> @@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
>   asr->a_rtime = 0;
>   }
>  
> - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>   return;
>  
>   if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
> diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
> index 82de8fa33b7..02fd3013cc1 100644
> --- lib/libc/crypt/bcrypt.c
> +++ lib/libc/crypt/bcrypt.c
> @@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
>   char buf[_PASSWORD_LEN];
>   int duration;
>  
> - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
> + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
>   bcrypt_newhash("testpassword", r, buf, sizeof(buf));
> - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
> + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
>  
>   duration = after.tv_sec - before.tv_sec;
>   duration *= 1000000;
> diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
> index 270f54aada5..c5921851203 100644
> --- lib/libc/dlfcn/init.c
> +++ lib/libc/dlfcn/init.c
> @@ -30,6 +30,7 @@
>  #include <link.h>
>  #include <stdlib.h> /* atexit */
>  #include <string.h>
> +#include <time.h> /* timekeep */
>  #include <unistd.h>
>  
>  #include "init.h"
> @@ -45,8 +46,9 @@
>  /* XXX should be in an include file shared with csu */
>  char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
>  
> -/* provide definition for this */
> +/* provide definition for these */
>  int _pagesize = 0;
> +void *_timekeep = NULL;
>  
>  /*
>   * In dynamicly linked binaries environ and __progname are overriden by
> @@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
>  
>  /* provide definitions for these */
>  const dl_cb *_dl_cb __relro = NULL;
> +#if defined(__amd64)
> +uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> +#else
> +uint64_t (*const tc_get_timecount)(void) = NULL;
> +#endif
> +
>  
>  void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
>  void
> @@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
>   phnum = aux->au_v;
>   break;
>  #endif /* !PIC */
> + case AUX_openbsd_timekeep:
> + if (tc_get_timecount)
> + _timekeep = (void *)aux->au_v;
> + break;
>   }
>   }
>  
> diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
> index 1286a96fe40..32f86eda50f 100644
> --- lib/libc/gen/auth_subr.c
> +++ lib/libc/gen/auth_subr.c
> @@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
>  
>   if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
>   if (as->now.tv_sec == 0)
> - gettimeofday(&as->now, NULL);
> + WRAP(gettimeofday)(&as->now, NULL);
>   if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
>   as->state &= ~AUTH_ALLOW;
>   as->state |= AUTH_EXPIRED;
> @@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
>  
>   if (as->pwd && (quad_t)as->pwd->pw_change) {
>   if (as->now.tv_sec == 0)
> - gettimeofday(&as->now, NULL);
> + WRAP(gettimeofday)(&as->now, NULL);
>   if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
>   as->state &= ~AUTH_ALLOW;
>   as->state |= AUTH_PWEXPIRED;
> diff --git lib/libc/gen/time.c lib/libc/gen/time.c
> index 3bbd0d733d1..b3ce9a800f1 100644
> --- lib/libc/gen/time.c
> +++ lib/libc/gen/time.c
> @@ -36,7 +36,7 @@ time(time_t *t)
>  {
>   struct timeval tt;
>  
> - if (gettimeofday(&tt, NULL) == -1)
> + if (WRAP(gettimeofday)(&tt, NULL) == -1)
>   return (-1);
>   if (t)
>   *t = (time_t)tt.tv_sec;
> diff --git lib/libc/gen/times.c lib/libc/gen/times.c
> index 02e4dd44b5c..36841810d1b 100644
> --- lib/libc/gen/times.c
> +++ lib/libc/gen/times.c
> @@ -52,7 +52,7 @@ times(struct tms *tp)
>   return ((clock_t)-1);
>   tp->tms_cutime = CONVTCK(ru.ru_utime);
>   tp->tms_cstime = CONVTCK(ru.ru_stime);
> - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>   return ((clock_t)-1);
>   return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
>  }
> diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
> index 520a5954025..845cbe80356 100644
> --- lib/libc/gen/timespec_get.c
> +++ lib/libc/gen/timespec_get.c
> @@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
>  {
>   switch (base) {
>   case TIME_UTC:
> - if (clock_gettime(CLOCK_REALTIME, ts) == -1)
> + if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
>   return 0;
>   break;
>   default:
> diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
> index ed112320fa2..a5b20eec27a 100644
> --- lib/libc/hidden/sys/time.h
> +++ lib/libc/hidden/sys/time.h
> @@ -20,11 +20,18 @@
>  
>  #include_next <sys/time.h>
>  
> +__BEGIN_HIDDEN_DECLS
> +void _microtime(struct timeval *tvp, struct __timekeep *tk);
> +void _nanotime(struct timespec *tsp, struct __timekeep *tk);
> +void _nanoruntime(struct timespec *ts, struct __timekeep *tk);
> +void _nanouptime(struct timespec *tsp, struct __timekeep *tk);
> +__END_HIDDEN_DECLS
> +
>  PROTO_NORMAL(adjfreq);
>  PROTO_NORMAL(adjtime);
>  PROTO_NORMAL(futimes);
>  PROTO_NORMAL(getitimer);
> -PROTO_NORMAL(gettimeofday);
> +PROTO_WRAP(gettimeofday);
>  PROTO_NORMAL(setitimer);
>  PROTO_NORMAL(settimeofday);
>  PROTO_NORMAL(utimes);
> diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
> index 18c49f8fcb9..44bd8e7c6e3 100644
> --- lib/libc/hidden/time.h
> +++ lib/libc/hidden/time.h
> @@ -18,18 +18,26 @@
>  #ifndef _LIBC_TIME_H_
>  #define _LIBC_TIME_H_
>  
> +#include <sys/types.h>
> +
>  #include_next <time.h>
>  
>  #if 0
>  extern PROTO_NORMAL(tzname);
>  #endif
>  
> +__BEGIN_HIDDEN_DECLS
> +extern void *_timekeep;
> +extern uint64_t (*const tc_get_timecount)(void);
> +uint64_t tc_get_timecount_md(void);
> +__END_HIDDEN_DECLS
> +
>  PROTO_NORMAL(asctime);
>  PROTO_NORMAL(asctime_r);
>  PROTO_STD_DEPRECATED(clock);
>  PROTO_DEPRECATED(clock_getcpuclockid);
>  PROTO_NORMAL(clock_getres);
> -PROTO_NORMAL(clock_gettime);
> +PROTO_WRAP(clock_gettime);
>  PROTO_NORMAL(clock_settime);
>  PROTO_STD_DEPRECATED(ctime);
>  PROTO_DEPRECATED(ctime_r);
> diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
> index 763e420bb88..9babb28470a 100644
> --- lib/libc/net/res_random.c
> +++ lib/libc/net/res_random.c
> @@ -219,7 +219,7 @@ res_initid(void)
>   if (ru_prf != NULL)
>   arc4random_buf(ru_prf, sizeof(*ru_prf));
>  
> - clock_gettime(CLOCK_MONOTONIC, &ts);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>   ru_reseed = ts.tv_sec + RU_OUT;
>   ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
>  }
> @@ -232,7 +232,7 @@ __res_randomid(void)
>   u_int r;
>   static void *randomid_mutex;
>  
> - clock_gettime(CLOCK_MONOTONIC, &ts);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>   pid = getpid();
>  
>   _MUTEX_LOCK(&randomid_mutex);
> diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
> index 402d98cede4..917a6d42b8a 100644
> --- lib/libc/rpc/auth_unix.c
> +++ lib/libc/rpc/auth_unix.c
> @@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
>   /*
>   * fill in param struct from the given params
>   */
> - (void)gettimeofday(&now,  NULL);
> + (void)WRAP(gettimeofday)(&now,  NULL);
>   aup.aup_time = now.tv_sec;
>   aup.aup_machname = machname;
>   aup.aup_uid = uid;
> @@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
>   goto done;
>  
>   /* update the time and serialize in place */
> - (void)gettimeofday(&now, NULL);
> + (void)WRAP(gettimeofday)(&now, NULL);
>   aup.aup_time = now.tv_sec;
>   xdrs.x_op = XDR_ENCODE;
>   XDR_SETPOS(&xdrs, 0);
> diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
> index 8e6ef515b0e..927b4bf2028 100644
> --- lib/libc/rpc/clnt_tcp.c
> +++ lib/libc/rpc/clnt_tcp.c
> @@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
>   pfd[0].events = POLLIN;
>   TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
>   delta = wait;
> - clock_gettime(CLOCK_MONOTONIC, &start);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>   for (;;) {
>   r = ppoll(pfd, 1, &delta, NULL);
>   save_errno = errno;
>  
> - clock_gettime(CLOCK_MONOTONIC, &after);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>   timespecsub(&start, &after, &duration);
>   timespecsub(&wait, &duration, &delta);
>   if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
> index 68d01674410..92e1d5c350d 100644
> --- lib/libc/rpc/clnt_udp.c
> +++ lib/libc/rpc/clnt_udp.c
> @@ -265,7 +265,7 @@ send_again:
>   reply_msg.acpted_rply.ar_results.where = resultsp;
>   reply_msg.acpted_rply.ar_results.proc = xresults;
>  
> - clock_gettime(CLOCK_MONOTONIC, &start);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>   for (;;) {
>   switch (ppoll(pfd, 1, &wait, NULL)) {
>   case 0:
> @@ -283,7 +283,7 @@ send_again:
>   /* FALLTHROUGH */
>   case -1:
>   if (errno == EINTR) {
> - clock_gettime(CLOCK_MONOTONIC, &after);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>   timespecsub(&after, &start, &duration);
>   timespecadd(&time_waited, &duration, &time_waited);
>   if (timespeccmp(&time_waited, &timeout, <))
> diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
> index f9d7a70938f..6c99db84359 100644
> --- lib/libc/rpc/svc_tcp.c
> +++ lib/libc/rpc/svc_tcp.c
> @@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>   * A timeout is fatal for the connection.
>   */
>   delta = wait_per_try;
> - clock_gettime(CLOCK_MONOTONIC, &start);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>   pfd[0].fd = sock;
>   pfd[0].events = POLLIN;
>   do {
> @@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>   case -1:
>   if (errno != EINTR)
>   goto fatal_err;
> - clock_gettime(CLOCK_MONOTONIC, &after);
> + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>   timespecsub(&after, &start, &duration);
>   timespecsub(&wait_per_try, &duration, &delta);
>   if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/shlib_version lib/libc/shlib_version
> index 06f98b01084..5fb0770494f 100644
> --- lib/libc/shlib_version
> +++ lib/libc/shlib_version
> @@ -1,4 +1,4 @@
>  major=96
> -minor=0
> +minor=1
>  # note: If changes were made to include/thread_private.h or if system calls
>  # were added/changed then librthread/shlib_version must also be updated.
> diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
> index 34769576ced..d57418d81bf 100644
> --- lib/libc/sys/Makefile.inc
> +++ lib/libc/sys/Makefile.inc
> @@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
>  
>  # glue to offer userland wrappers for some syscalls
>  SRCS+= posix_madvise.c pthread_sigmask.c \
> - w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
> + w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
> + w_clock_gettime.c w_gettimeofday.c microtime.c
>  
>  # glue for compat with old syscall interfaces.
>  SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
> @@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
>  ASM= __semctl.o __syscall.o __thrsigdivert.o \
>   access.o acct.o adjfreq.o adjtime.o \
>   bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
> - clock_getres.o clock_gettime.o clock_settime.o \
> + clock_getres.o clock_settime.o \
>   dup.o dup2.o dup3.o \
>   execve.o \
>   faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
> @@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
>   getgroups.o getitimer.o getpeername.o getpgid.o \
>   getpriority.o getresgid.o getresuid.o \
>   getrlimit.o getrusage.o getsid.o getsockname.o \
> - getsockopt.o gettimeofday.o ioctl.o \
> + getsockopt.o ioctl.o \
>   kevent.o kill.o kqueue.o ktrace.o lchown.o \
>   link.o linkat.o listen.o lstat.o madvise.o \
>   minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
> @@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
>  SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
>  DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
>  
> -HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
> +HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
> + clock_gettime.o gettimeofday.o
>  PHIDDEN=${HIDDEN:.o=.po}
>  SHIDDEN=${HIDDEN:.o=.so}
>  DHIDDEN=${HIDDEN:.o=.do}
> diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
> new file mode 100644
> index 00000000000..6b7b65762e7
> --- /dev/null
> +++ lib/libc/sys/microtime.c
> @@ -0,0 +1,157 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/atomic.h>
> +#include <sys/time.h>
> +
> +#include <time.h>
> +
> +/*
> + * Return the difference between the timehands' counter value now and what
> + * was when we copied it to the timehands' offset_count.
> + */
> +static __inline u_int
> +tc_delta(struct __timekeep *tk)
> +{
> + return ((tc_get_timecount() - tk->th_offset_count) &
> +    tk->tc_counter_mask);
> +}
> +
> +static inline void
> +bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
> +{
> + ct->sec = bt->sec;
> + if (bt->frac > bt->frac + x)
> + ct->sec++;
> + ct->frac = bt->frac + x;
> +}
> +
> +static inline void
> +BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
> +{
> + ts->tv_sec = bt->sec;
> + ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
> +}
> +
> +static inline void
> +BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
> +{
> + tv->tv_sec = bt->sec;
> + tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
> +}
> +
> +static void
> +binuptime(struct bintime *bt, struct __timekeep *tk)
> +{
> + u_int gen;
> +
> + do {
> + gen = tk->th_generation;
> + membar_consumer();
> + *bt = tk->th_offset;
> + bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
> + membar_consumer();
> + } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +static inline void
> +bintimeadd(const struct bintime *bt, const struct bintime *ct,
> +    struct bintime *dt)
> +{
> + dt->sec = bt->sec + ct->sec;
> + if (bt->frac > bt->frac + ct->frac)
> + dt->sec++;
> + dt->frac = bt->frac + ct->frac;
> +}
> +
> +static inline void
> +bintimesub(const struct bintime *bt, const struct bintime *ct,
> +    struct bintime *dt)
> +{
> + dt->sec = bt->sec - ct->sec;
> + if (bt->frac < bt->frac - ct->frac)
> + dt->sec--;
> + dt->frac = bt->frac - ct->frac;
> +}
> +
> +static void
> +binruntime(struct bintime *bt, struct __timekeep *tk)
> +{
> + u_int gen;
> +
> + do {
> + gen = tk->th_generation;
> + membar_consumer();
> + bintimeaddfrac(&tk->th_offset, tk->th_scale * tc_delta(tk), bt);
> + bintimesub(bt, &tk->th_naptime, bt);
> + membar_consumer();
> + } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +static void
> +bintime(struct bintime *bt, struct __timekeep *tk)
> +{
> + u_int gen;
> +
> + do {
> + gen = tk->th_generation;
> + membar_consumer();
> + *bt = tk->th_offset;
> + bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
> + bintimeadd(bt, &tk->th_boottime, bt);
> + membar_consumer();
> + } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +void
> +_microtime(struct timeval *tvp, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + bintime(&bt, tk);
> + BINTIME_TO_TIMEVAL(&bt, tvp);
> +}
> +
> +void
> +_nanotime(struct timespec *tsp, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + bintime(&bt, tk);
> + BINTIME_TO_TIMESPEC(&bt, tsp);
> +}
> +
> +void
> +_nanoruntime(struct timespec *ts, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + binruntime(&bt, tk);
> + BINTIME_TO_TIMESPEC(&bt, ts);
> +}
> +
> +
> +void
> +_nanouptime(struct timespec *tsp, struct __timekeep *tk)
> +{
> + struct bintime bt;
> +
> + binuptime(&bt, tk);
> + BINTIME_TO_TIMESPEC(&bt, tsp);
> +}
> diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
> new file mode 100644
> index 00000000000..27c504fc285
> --- /dev/null
> +++ lib/libc/sys/w_clock_gettime.c
> @@ -0,0 +1,59 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <stdlib.h>
> +#include <time.h>
> +#include <err.h>
> +
> +#include <sys/time.h>
> +
> +int
> +WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
> +{
> + struct __timekeep *timekeep;
> + unsigned int seq;
> +
> + if (_timekeep == NULL)
> + return clock_gettime(clock_id, tp);
> + timekeep = _timekeep;
> +
> + switch (clock_id) {
> + case CLOCK_REALTIME:
> + do {
> + seq = timekeep->seq;
> + _nanotime(tp, timekeep);
> + } while (seq == 0 || seq != timekeep->seq);
> + break;
> + case CLOCK_UPTIME:
> + do {
> + seq = timekeep->seq;
> + _nanoruntime(tp, timekeep);
> + } while (seq == 0 || seq != timekeep->seq);
> + break;
> + case CLOCK_MONOTONIC:
> + case CLOCK_BOOTTIME:
> + do {
> + seq = timekeep->seq;
> + _nanouptime(tp, timekeep);
> + } while (seq == 0 || seq != timekeep->seq);
> + break;
> + default:
> + return clock_gettime(clock_id, tp);
> + }
> + return 0;
> +}
> +DEF_WRAP(clock_gettime);
> diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
> new file mode 100644
> index 00000000000..3cf77e96e37
> --- /dev/null
> +++ lib/libc/sys/w_gettimeofday.c
> @@ -0,0 +1,42 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Robert Nagy <[hidden email]>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/time.h>
> +
> +int
> +WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
> +{
> + struct __timekeep *timekeep = _timekeep;
> + static struct timezone zerotz = { 0, 0 };
> + unsigned int seq;
> +
> + if (timekeep == NULL)
> + return gettimeofday(tp, tzp);
> +
> + if (tp) {
> + do {
> + seq = timekeep->seq;
> + _microtime(tp, timekeep);
> + } while (seq == 0 || seq != timekeep->seq);
> + }
> +
> + if (tzp)
> + tzp = &zerotz;
> +
> + return 0;
> +}
> +DEF_WRAP(gettimeofday);
> diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
> index 788890add89..df2239438d2 100644
> --- lib/libc/thread/synch.h
> +++ lib/libc/thread/synch.h
> @@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
>   if (abs == NULL)
>   return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
>  
> - if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
> + if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
>   return (EINVAL);
>  
>   rel.tv_sec = abs->tv_sec - rel.tv_sec;
> diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
> index 9b5b8eb3acf..59bc923a6fb 100644
> --- sys/kern/exec_elf.c
> +++ sys/kern/exec_elf.c
> @@ -124,7 +124,7 @@ extern char *syscallnames[];
>  /*
>   * How many entries are in the AuxInfo array we pass to the process?
>   */
> -#define ELF_AUX_ENTRIES 8
> +#define ELF_AUX_ENTRIES 9
>  
>  /*
>   * This is the OpenBSD ELF emul
> @@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
>   a->au_v = ap->arg_entry;
>   a++;
>  
> + a->au_id = AUX_openbsd_timekeep;
> + a->au_v = p->p_p->ps_timekeep;
> + a++;
> +
>   a->au_id = AUX_null;
>   a->au_v = 0;
>   a++;
> diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
> index 20480c2fc28..15bf4db6fbd 100644
> --- sys/kern/kern_exec.c
> +++ sys/kern/kern_exec.c
> @@ -64,6 +64,11 @@
>  #include <uvm/uvm_extern.h>
>  #include <machine/tcb.h>
>  
> +#include <sys/time.h>
> +
> +struct uvm_object *timekeep_object;
> +struct __timekeep* timekeep;
> +
>  void unveil_destroy(struct process *ps);
>  
>  const struct kmem_va_mode kv_exec = {
> @@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
>   */
>  int exec_sigcode_map(struct process *, struct emul *);
>  
> +/*
> + * Map the shared timekeep page.
> + */
> +int exec_timekeep_map(struct process *);
> +
>  /*
>   * If non-zero, stackgap_random specifies the upper limit of the random gap size
>   * added to the fixed stack position. Must be n^2.
> @@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
>   /* map the process's signal trampoline code */
>   if (exec_sigcode_map(pr, pack.ep_emul))
>   goto free_pack_abort;
> + /* map the process's timekeep page */
> + if (exec_timekeep_map(pr))
> + goto free_pack_abort;
>  
>  #ifdef __HAVE_EXEC_MD_MAP
>   /* perform md specific mappings that process might need */
> @@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e)
>  
>   return (0);
>  }
> +
> +int
> +exec_timekeep_map(struct process *pr)
> +{
> + size_t timekeep_sz = sizeof(struct __timekeep);
> +
> + /*
> + * Similar to the sigcode object, except that there is a single timekeep
> + * object, and not one per emulation.
> + */
> + if (timekeep_object == NULL) {
> + vaddr_t va;
> +
> + timekeep_object = uao_create(timekeep_sz, 0);
> + uao_reference(timekeep_object);
> +
> + if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
> +    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
> +    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
> + uao_detach(timekeep_object);
> + return (ENOMEM);
> + }
> +
> + timekeep = (struct __timekeep *)va;
> + timekeep->major = 0;
> + timekeep->minor = 0;
> +
> + timekeep->seq = 0;
> + }
> +
> + uao_reference(timekeep_object);
> + if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
> +    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
> +    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
> + uao_detach(timekeep_object);
> + return (ENOMEM);
> + }
> +
> + return (0);
> +}
> diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
> index 4b9eedf50b9..1a6db0102c3 100644
> --- sys/kern/kern_tc.c
> +++ sys/kern/kern_tc.c
> @@ -35,6 +35,7 @@
>  #include <sys/queue.h>
>  #include <sys/malloc.h>
>  #include <dev/rndvar.h>
> +#include <sys/time.h>
>  
>  /*
>   * A large step happens on boot.  This constant detects such steps.
> @@ -480,6 +481,34 @@ tc_setclock(const struct timespec *ts)
>  #endif
>  }
>  
> +void
> +tc_update_timekeep(void)
> +{
> + struct timehands *th;
> + u_int gen;
> +
> + if (timekeep == NULL)
> + return;
> +
> + atomic_inc_int(&timekeep->seq);
> +
> + do {
> + th = timehands;
> + gen = th->th_generation;
> + membar_consumer();
> + timekeep->th_scale = th->th_scale;
> + timekeep->th_offset_count = th->th_offset_count;
> + timekeep->th_offset = th->th_offset;
> + timekeep->th_naptime = th->th_naptime;
> + timekeep->th_boottime = th->th_boottime;
> + timekeep->th_generation = th->th_generation;
> + timekeep->tc_counter_mask = th->th_counter->tc_counter_mask;
> + membar_consumer();
> + } while (gen == 0 || gen != th->th_generation);
> +
> + return;
> +}
> +
>  /*
>   * Initialize the next struct timehands in the ring and make
>   * it the active timehands.  Along the way we might switch to a different
> @@ -632,6 +661,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
>   time_uptime = th->th_offset.sec;
>   membar_producer();
>   timehands = th;
> +
> + tc_update_timekeep();
>  }
>  
>  /* Report or change the active timecounter hardware. */
> diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
> index a40e0510273..f55b75f1e84 100644
> --- sys/sys/exec_elf.h
> +++ sys/sys/exec_elf.h
> @@ -691,7 +691,8 @@ enum AuxID {
>   AUX_sun_uid = 2000, /* euid */
>   AUX_sun_ruid = 2001, /* ruid */
>   AUX_sun_gid = 2002, /* egid */
> - AUX_sun_rgid = 2003 /* rgid */
> + AUX_sun_rgid = 2003, /* rgid */
> + AUX_openbsd_timekeep = 2004, /* userland clock_gettime */
>  };
>  
>  struct elf_args {
> diff --git sys/sys/proc.h sys/sys/proc.h
> index 357c0c0d52c..93a79a220db 100644
> --- sys/sys/proc.h
> +++ sys/sys/proc.h
> @@ -248,6 +248,8 @@ struct process {
>   u_int ps_rtableid; /* Process routing table/domain. */
>   char ps_nice; /* Process "nice" value. */
>  
> + vaddr_t ps_timekeep; /* User pointer to timekeep */
> +
>   struct uprof { /* profile arguments */
>   caddr_t pr_base; /* buffer base */
>   size_t  pr_size; /* buffer size */
> diff --git sys/sys/time.h sys/sys/time.h
> index e758a64ce07..0b48f65f3f1 100644
> --- sys/sys/time.h
> +++ sys/sys/time.h
> @@ -163,15 +163,30 @@ struct clockinfo {
>  };
>  #endif /* __BSD_VISIBLE */
>  
> -#if defined(_KERNEL) || defined(_STANDALONE)
> -#include <sys/_time.h>
> -
>  /* Time expressed as seconds and fractions of a second + operations on it. */
>  struct bintime {
>   time_t sec;
>   uint64_t frac;
>  };
>  
> +struct __timekeep {
> + uint8_t major; /* version major number */
> + uint8_t minor; /* version minor number */
> +
> + volatile unsigned int seq; /* synchronization */
> +
> + u_int64_t th_scale; /* [w] */
> + u_int th_offset_count; /* [w] */
> + struct bintime th_offset; /* [w] */
> + struct bintime th_naptime; /* [w] */
> + struct bintime th_boottime; /* [tw] */
> + volatile u_int th_generation; /* [w] */
> + u_int tc_counter_mask; /* [I] */
> +};
> +
> +#if defined(_KERNEL) || defined(_STANDALONE)
> +#include <sys/_time.h>
> +
>  #define bintimecmp(btp, ctp, cmp) \
>   ((btp)->sec == (ctp)->sec ? \
>      (btp)->frac cmp (ctp)->frac : \
> @@ -396,6 +411,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
>   return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
>  }
>  
> +extern struct uvm_object *timekeep_object;
> +extern struct __timekeep *timekeep;
>  #else /* !_KERNEL */
>  #include <time.h>
>  
>
>

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
> 5. What if the TSC is not available as a usable timecounter?  In that
>    case libc should fall back on the system call.  But we need a way
>    to communicate what the timecounter is and detect when we switch
>    timecounters.  Maybe adding a timecounter ID to the page will help
>    here.  But then MD code in libc will have to check the ID and
>    dispatch to the right timecounter read function.

I fixed 1--4 and 6, but with 5 the solutions I found are a bit
convoluted and involve string passing and parsing if we are to pass this
information to libc.

Would it be acceptable to add a memember to struct timecounter that
states whether the clock is libc ready or not? This means that when you
add support for a new clock in libc you also have to touch the kernel to
set that bit...

On the other hand the code would be clean and safe:

if (timekeep == NULL || timekeep->tc_supported)
        clock_gettime();

/* rest of wrapper function */

What do you think?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On Thu, May 28, 2020 at 10:27:03PM +0300, Paul Irofti wrote:

> > 5. What if the TSC is not available as a usable timecounter?  In that
> >    case libc should fall back on the system call.  But we need a way
> >    to communicate what the timecounter is and detect when we switch
> >    timecounters.  Maybe adding a timecounter ID to the page will help
> >    here.  But then MD code in libc will have to check the ID and
> >    dispatch to the right timecounter read function.
>
> I fixed 1--4 and 6, but with 5 the solutions I found are a bit
> convoluted and involve string passing and parsing if we are to pass this
> information to libc.
>
> Would it be acceptable to add a memember to struct timecounter that
> states whether the clock is libc ready or not? This means that when you
> add support for a new clock in libc you also have to touch the kernel to
> set that bit...
>
> On the other hand the code would be clean and safe:

if (timekeep == NULL || !timekeep->tc_supported)
  clock_gettime();

that's what I meant, of course...

The tc_supported bit would be set in the kernel when the timecounter is
changed. I have those bits inside tc_update_timekeep() already for the
tc_counter_mask.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Marc Espie-2
In reply to this post by Paul Irofti-4
On Thu, May 28, 2020 at 05:44:31PM +0300, Paul Irofti wrote:

> Hi,
>
> Here is a new iteration of the diff which includes support for MD high
> resolution clocks. Currently only implements TSC on amd64. If the
> MD function is not defined, it fallsback to the syscall.
>
> There is the question of the skew fix, but that will be addressed in a
> separate kernel diff that will not affect the current diff at all.
>
> I could not find a way to find on which processor the process is running
> on from userland without going through a syscall. If there is one please
> let me know. It would make things easier.
>
> In the meantime I have also gotten positive feedback from various
> testers that run this on their main machine.
>
> Anyway, I think we can decide on the struct name and the auxiliary
> vector ID and consider this done.
>
> Thoughts?
>
> Paul

This appears to work just fine here on my desktop.

cpu0: AMD A10-5700 APU with Radeon(tm) HD Graphics, 3417.45 MHz, 15-10-01
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,XOP,SKINIT,WDT,FMA4,TCE,NODEID,TBM,TOPEXT,CPCTR,ITSC,BMI1

(other bits of dmesg available on demand)

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
On Thu, May 28, 2020 at 07:43:55PM +0200, Mark Kettenis wrote:

> > Date: Thu, 28 May 2020 17:44:31 +0300
> > From: Paul Irofti <[hidden email]>
> >
> > Hi,
> >
> > Here is a new iteration of the diff which includes support for MD high
> > resolution clocks. Currently only implements TSC on amd64. If the
> > MD function is not defined, it fallsback to the syscall.
> >
> > There is the question of the skew fix, but that will be addressed in a
> > separate kernel diff that will not affect the current diff at all.
> >
> > I could not find a way to find on which processor the process is running
> > on from userland without going through a syscall. If there is one please
> > let me know. It would make things easier.
> >
> > In the meantime I have also gotten positive feedback from various
> > testers that run this on their main machine.
> >
> > Anyway, I think we can decide on the struct name and the auxiliary
> > vector ID and consider this done.
> >
> > Thoughts?
>
> This is getting us somewhere.
>
> Still some issues though (besides the skew thing you already mention).
>
> 1. The synchronization mechanism is broken.  The seq member needs to
>    be set to 0 while updating the struct and only set to the "next"
>    value after completing the update of the full struct.  You need to
>    be careful to avoid 0, otherwise the application will spin for a
>    full timeslice while seq overflows into 0.
>
>    However, since you now export the timehands generation, I'd really
>    drop seq and use the timehands generation for synchronization.  It
>    makes no sense to have both.
>
> 2. Since tc_update_timekeep() is called from tc_windup() it doesn't
>    need to do the synchronization dance.
>
> 3. Like tc_windup, tc_update_timekeep() needs to have some
>     membar_procer() calls in it instead of membar_consumer() calls.
>
> 4. There is no need to update th_counter_mask on every update.
>
> 5. What if the TSC is not available as a usable timecounter?  In that
>    case libc should fall back on the system call.  But we need a way
>    to communicate what the timecounter is and detect when we switch
>    timecounters.  Maybe adding a timecounter ID to the page will help
>    here.  But then MD code in libc will have to check the ID and
>    dispatch to the right timecounter read function.
>
> 6. The major and minor fields probably should bbe uint32_t or maybe
>     uint16_t.  You're not saving any space by making them uint8_t.

Here is a new diff that addresses the issues stated above. I went with
adding a new field in timecounter. This can be used as an ID further on
and also turned into a sysctl if needed.


diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..caa4452a3d9 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
  sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
  fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
new file mode 100644
index 00000000000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/rdtsc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+uint64_t
+tc_get_timecount_md(void)
+{
+ uint32_t hi, lo;
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..c5921851203 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -30,6 +30,7 @@
 #include <link.h>
 #include <stdlib.h> /* atexit */
 #include <string.h>
+#include <time.h> /* timekeep */
 #include <unistd.h>
 
 #include "init.h"
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definition for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ if (tc_get_timecount)
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
index 1286a96fe40..32f86eda50f 100644
--- lib/libc/gen/auth_subr.c
+++ lib/libc/gen/auth_subr.c
@@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_EXPIRED;
@@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_change) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_PWEXPIRED;
diff --git lib/libc/gen/time.c lib/libc/gen/time.c
index 3bbd0d733d1..b3ce9a800f1 100644
--- lib/libc/gen/time.c
+++ lib/libc/gen/time.c
@@ -36,7 +36,7 @@ time(time_t *t)
 {
  struct timeval tt;
 
- if (gettimeofday(&tt, NULL) == -1)
+ if (WRAP(gettimeofday)(&tt, NULL) == -1)
  return (-1);
  if (t)
  *t = (time_t)tt.tv_sec;
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..a5b20eec27a 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -20,11 +20,18 @@
 
 #include_next <sys/time.h>
 
+__BEGIN_HIDDEN_DECLS
+void _microtime(struct timeval *tvp, struct __timekeep *tk);
+void _nanotime(struct timespec *tsp, struct __timekeep *tk);
+void _nanoruntime(struct timespec *ts, struct __timekeep *tk);
+void _nanouptime(struct timespec *tsp, struct __timekeep *tk);
+__END_HIDDEN_DECLS
+
 PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
-PROTO_NORMAL(gettimeofday);
+PROTO_WRAP(gettimeofday);
 PROTO_NORMAL(setitimer);
 PROTO_NORMAL(settimeofday);
 PROTO_NORMAL(utimes);
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..44bd8e7c6e3 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -18,18 +18,26 @@
 #ifndef _LIBC_TIME_H_
 #define _LIBC_TIME_H_
 
+#include <sys/types.h>
+
 #include_next <time.h>
 
 #if 0
 extern PROTO_NORMAL(tzname);
 #endif
 
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+extern uint64_t (*const tc_get_timecount)(void);
+uint64_t tc_get_timecount_md(void);
+__END_HIDDEN_DECLS
+
 PROTO_NORMAL(asctime);
 PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
index 402d98cede4..917a6d42b8a 100644
--- lib/libc/rpc/auth_unix.c
+++ lib/libc/rpc/auth_unix.c
@@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
  /*
  * fill in param struct from the given params
  */
- (void)gettimeofday(&now,  NULL);
+ (void)WRAP(gettimeofday)(&now,  NULL);
  aup.aup_time = now.tv_sec;
  aup.aup_machname = machname;
  aup.aup_uid = uid;
@@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
  goto done;
 
  /* update the time and serialize in place */
- (void)gettimeofday(&now, NULL);
+ (void)WRAP(gettimeofday)(&now, NULL);
  aup.aup_time = now.tv_sec;
  xdrs.x_op = XDR_ENCODE;
  XDR_SETPOS(&xdrs, 0);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d57418d81bf 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c w_gettimeofday.c microtime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
  getgroups.o getitimer.o getpeername.o getpgid.o \
  getpriority.o getresgid.o getresuid.o \
  getrlimit.o getrusage.o getsid.o getsockname.o \
- getsockopt.o gettimeofday.o ioctl.o \
+ getsockopt.o ioctl.o \
  kevent.o kill.o kqueue.o ktrace.o lchown.o \
  link.o linkat.o listen.o lstat.o madvise.o \
  minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
@@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
+ clock_gettime.o gettimeofday.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
new file mode 100644
index 00000000000..6b7b65762e7
--- /dev/null
+++ lib/libc/sys/microtime.c
@@ -0,0 +1,157 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/time.h>
+
+#include <time.h>
+
+/*
+ * Return the difference between the timehands' counter value now and what
+ * was when we copied it to the timehands' offset_count.
+ */
+static __inline u_int
+tc_delta(struct __timekeep *tk)
+{
+ return ((tc_get_timecount() - tk->th_offset_count) &
+    tk->tc_counter_mask);
+}
+
+static inline void
+bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
+{
+ ct->sec = bt->sec;
+ if (bt->frac > bt->frac + x)
+ ct->sec++;
+ ct->frac = bt->frac + x;
+}
+
+static inline void
+BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
+{
+ ts->tv_sec = bt->sec;
+ ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static inline void
+BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
+{
+ tv->tv_sec = bt->sec;
+ tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static void
+binuptime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ *bt = tk->th_offset;
+ bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+static inline void
+bintimeadd(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec + ct->sec;
+ if (bt->frac > bt->frac + ct->frac)
+ dt->sec++;
+ dt->frac = bt->frac + ct->frac;
+}
+
+static inline void
+bintimesub(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec - ct->sec;
+ if (bt->frac < bt->frac - ct->frac)
+ dt->sec--;
+ dt->frac = bt->frac - ct->frac;
+}
+
+static void
+binruntime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ bintimeaddfrac(&tk->th_offset, tk->th_scale * tc_delta(tk), bt);
+ bintimesub(bt, &tk->th_naptime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+static void
+bintime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ *bt = tk->th_offset;
+ bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
+ bintimeadd(bt, &tk->th_boottime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+void
+_microtime(struct timeval *tvp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMEVAL(&bt, tvp);
+}
+
+void
+_nanotime(struct timespec *tsp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
+
+void
+_nanoruntime(struct timespec *ts, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ binruntime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, ts);
+}
+
+
+void
+_nanouptime(struct timespec *tsp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ binuptime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..4559c6b087d
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,48 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+#include <err.h>
+
+#include <sys/time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct __timekeep *timekeep = _timekeep;
+
+ if (timekeep == NULL || timekeep->tc_user == 0)
+ return clock_gettime(clock_id, tp);
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ _nanotime(tp, timekeep);
+ break;
+ case CLOCK_UPTIME:
+ _nanoruntime(tp, timekeep);
+ break;
+ case CLOCK_MONOTONIC:
+ case CLOCK_BOOTTIME:
+ _nanouptime(tp, timekeep);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
new file mode 100644
index 00000000000..4897945d3ec
--- /dev/null
+++ lib/libc/sys/w_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Robert Nagy <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/time.h>
+
+int
+WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
+{
+ struct __timekeep *timekeep = _timekeep;
+ static struct timezone zerotz = { 0, 0 };
+
+ if (timekeep == NULL || timekeep->tc_user == 0)
+ return gettimeofday(tp, tzp);
+
+ if (tp)
+ _microtime(tp, timekeep);
+
+ if (tzp)
+ tzp = &zerotz;
+
+ return 0;
+}
+DEF_WRAP(gettimeofday);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/arch/alpha/alpha/clock.c sys/arch/alpha/alpha/clock.c
index 3f5f2c5b42b..6eaf8b107c6 100644
--- sys/arch/alpha/alpha/clock.c
+++ sys/arch/alpha/alpha/clock.c
@@ -64,7 +64,7 @@ int clk_irq = 0;
 
 u_int rpcc_get_timecount(struct timecounter *);
 struct timecounter rpcc_timecounter = {
- rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL
+ rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index 7a1dcb4ad75..3db93d88dec 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -50,7 +50,7 @@ extern u_int32_t lapic_per_second;
 #endif
 
 struct timecounter tsc_timecounter = {
- tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL
+ tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL, 1
 };
 
 uint64_t
diff --git sys/arch/amd64/isa/clock.c sys/arch/amd64/isa/clock.c
index 613f7ee0e0f..00da0c6a8d0 100644
--- sys/arch/amd64/isa/clock.c
+++ sys/arch/amd64/isa/clock.c
@@ -116,7 +116,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 
 int clockintr(void *);
diff --git sys/arch/arm64/dev/agtimer.c sys/arch/arm64/dev/agtimer.c
index 29394141ad5..6b7c6db862f 100644
--- sys/arch/arm64/dev/agtimer.c
+++ sys/arch/arm64/dev/agtimer.c
@@ -43,7 +43,7 @@ int32_t agtimer_frequency = TIMER_FREQUENCY;
 u_int agtimer_get_timecount(struct timecounter *);
 
 static struct timecounter agtimer_timecounter = {
- agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL
+ agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL, 0
 };
 
 struct agtimer_pcpu_softc {
diff --git sys/arch/armv7/omap/gptimer.c sys/arch/armv7/omap/gptimer.c
index 7605845d5e2..061542d532f 100644
--- sys/arch/armv7/omap/gptimer.c
+++ sys/arch/armv7/omap/gptimer.c
@@ -117,7 +117,7 @@ int gptimer_irq = 0;
 u_int gptimer_get_timecount(struct timecounter *);
 
 static struct timecounter gptimer_timecounter = {
- gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL
+ gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL, 0
 };
 
 volatile u_int32_t nexttickevent;
diff --git sys/arch/armv7/sunxi/sxitimer.c sys/arch/armv7/sunxi/sxitimer.c
index 14a243c78d0..41028f9a602 100644
--- sys/arch/armv7/sunxi/sxitimer.c
+++ sys/arch/armv7/sunxi/sxitimer.c
@@ -89,7 +89,7 @@ void sxitimer_delay(u_int);
 u_int sxitimer_get_timecount(struct timecounter *);
 
 static struct timecounter sxitimer_timecounter = {
- sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL
+ sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL, 0
 };
 
 bus_space_tag_t sxitimer_iot;
diff --git sys/arch/hppa/dev/clock.c sys/arch/hppa/dev/clock.c
index 4c594ab5ec7..8cce6c3a893 100644
--- sys/arch/hppa/dev/clock.c
+++ sys/arch/hppa/dev/clock.c
@@ -47,7 +47,7 @@ int cpu_hardclock(void *);
 u_int itmr_get_timecount(struct timecounter *);
 
 struct timecounter itmr_timecounter = {
- itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL
+ itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/i386/isa/clock.c sys/arch/i386/isa/clock.c
index 09a6db983f2..dd74bd425ad 100644
--- sys/arch/i386/isa/clock.c
+++ sys/arch/i386/isa/clock.c
@@ -129,7 +129,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 struct mutex timer_mutex = MUTEX_INITIALIZER(IPL_HIGH);
 u_long rtclock_tval;
diff --git sys/arch/i386/pci/geodesc.c sys/arch/i386/pci/geodesc.c
index 9d9f061eef9..bb8e4c7f9ae 100644
--- sys/arch/i386/pci/geodesc.c
+++ sys/arch/i386/pci/geodesc.c
@@ -65,7 +65,9 @@ struct timecounter geodesc_timecounter = {
  0xffffffff, /* counter_mask */
  27000000, /* frequency */
  "GEOTSC", /* name */
- 2000 /* quality */
+ 2000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 int
diff --git sys/arch/i386/pci/gscpm.c sys/arch/i386/pci/gscpm.c
index 8b8aa4ac430..a6f324e66f3 100644
--- sys/arch/i386/pci/gscpm.c
+++ sys/arch/i386/pci/gscpm.c
@@ -55,7 +55,9 @@ struct timecounter gscpm_timecounter = {
  0xffffff, /* counter_mask */
  3579545, /* frequency */
  "GSCPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach gscpm_ca = {
diff --git sys/arch/i386/pci/ichpcib.c sys/arch/i386/pci/ichpcib.c
index 6abf1627de2..90814d2dba0 100644
--- sys/arch/i386/pci/ichpcib.c
+++ sys/arch/i386/pci/ichpcib.c
@@ -64,6 +64,8 @@ struct timecounter ichpcib_timecounter = {
  3579545, /* frequency */
  "ICHPM", /* name */
  1000 /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach ichpcib_ca = {
diff --git sys/arch/loongson/loongson/generic3a_machdep.c sys/arch/loongson/loongson/generic3a_machdep.c
index ac3f1db6ccd..61da18ebff7 100644
--- sys/arch/loongson/loongson/generic3a_machdep.c
+++ sys/arch/loongson/loongson/generic3a_machdep.c
@@ -99,6 +99,8 @@ struct timecounter rs780e_timecounter = {
  .tc_frequency = HPET_FREQ,
  .tc_name = "hpet",
  .tc_quality = 100
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /* Firmware entry points */
diff --git sys/arch/luna88k/luna88k/clock.c sys/arch/luna88k/luna88k/clock.c
index a04120987e0..6580a4a46bf 100644
--- sys/arch/luna88k/luna88k/clock.c
+++ sys/arch/luna88k/luna88k/clock.c
@@ -112,7 +112,9 @@ struct timecounter clock_tc = {
  .tc_counter_mask = 0xffffffff,
  .tc_frequency = 0, /* will be filled in */
  .tc_name = "clock",
- .tc_quality = 0
+ .tc_quality = 0,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /*
diff --git sys/arch/macppc/macppc/clock.c sys/arch/macppc/macppc/clock.c
index 4a44a92cfc0..8c3ad620be8 100644
--- sys/arch/macppc/macppc/clock.c
+++ sys/arch/macppc/macppc/clock.c
@@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
 static int32_t ticks_per_intr;
 
 static struct timecounter tb_timecounter = {
- tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL
+ tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
 };
 
 /* calibrate the timecounter frequency for the listed models */
diff --git sys/arch/mips64/mips64/mips64_machdep.c sys/arch/mips64/mips64/mips64_machdep.c
index d4a42ed5acc..5c4dbadb5bb 100644
--- sys/arch/mips64/mips64/mips64_machdep.c
+++ sys/arch/mips64/mips64/mips64_machdep.c
@@ -327,7 +327,9 @@ struct timecounter cp0_timecounter = {
  0xffffffff, /* counter_mask */
  0, /* frequency */
  "CP0", /* name */
- 0 /* quality */
+ 0, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 u_int
diff --git sys/arch/octeon/octeon/machdep.c sys/arch/octeon/octeon/machdep.c
index 604cb3be3ab..9d4d8564d5c 100644
--- sys/arch/octeon/octeon/machdep.c
+++ sys/arch/octeon/octeon/machdep.c
@@ -152,8 +152,9 @@ struct timecounter ioclock_timecounter = {
  .tc_name = "ioclock",
  .tc_quality = 0, /* ioclock can be overridden
  * by cp0 counter */
- .tc_priv = 0 /* clock register,
+ .tc_priv = 0, /* clock register,
  * determined at runtime */
+ .tc_user = 0, /* expose to user */
 };
 
 static int
diff --git sys/arch/sgi/sgi/ip27_machdep.c sys/arch/sgi/sgi/ip27_machdep.c
index ba7fa558b96..7b3fa05ddd5 100644
--- sys/arch/sgi/sgi/ip27_machdep.c
+++ sys/arch/sgi/sgi/ip27_machdep.c
@@ -111,7 +111,29 @@ struct timecounter ip27_hub_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncated to 32 bits. */
  .tc_frequency = 1250000,
  .tc_name = "hubrt",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = 0,
+ .tc_user = 0,
+};
+
+static int
+atoi(const char *s)
+{
+ int n, neg;
+
+ n = 0;
+ neg = 0;
+
+ while (*s == '-') {
+ s++;
+ neg = !neg;
+ }
+
+ while (*s != '\0') {
+ if (*s < '0' || *s > '9')
+ break;
+
+ n = (10 * n) + (*s - '0');
 };
 
 volatile uint64_t ip27_spinup_a0;
diff --git sys/arch/sgi/xbow/xheart.c sys/arch/sgi/xbow/xheart.c
index 56b29915c70..827775512ac 100644
--- sys/arch/sgi/xbow/xheart.c
+++ sys/arch/sgi/xbow/xheart.c
@@ -83,7 +83,9 @@ struct timecounter xheart_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncate 52-bit counter to 32-bit */
  .tc_frequency = 12500000,
  .tc_name = "heart",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 extern uint32_t ip30_lights_frob(uint32_t, struct trapframe *);
diff --git sys/arch/sparc64/dev/psycho.c sys/arch/sparc64/dev/psycho.c
index e24f804dff6..1a7a1afa8c2 100644
--- sys/arch/sparc64/dev/psycho.c
+++ sys/arch/sparc64/dev/psycho.c
@@ -127,7 +127,7 @@ extern struct sparc_pci_chipset _sparc_pci_chipset;
 u_int stick_get_timecount(struct timecounter *);
 
 struct timecounter stick_timecounter = {
- stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL
+ stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 0
 };
 
 /*
diff --git sys/arch/sparc64/sparc64/clock.c sys/arch/sparc64/sparc64/clock.c
index fd5e8a9c15b..5c2e47d386b 100644
--- sys/arch/sparc64/sparc64/clock.c
+++ sys/arch/sparc64/sparc64/clock.c
@@ -109,13 +109,13 @@ struct cfdriver clock_cd = {
 u_int tick_get_timecount(struct timecounter *);
 
 struct timecounter tick_timecounter = {
- tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL
+ tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
 };
 
 u_int sys_tick_get_timecount(struct timecounter *);
 
 struct timecounter sys_tick_timecounter = {
- sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL
+ sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
 };
 
 /*
diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
index d0ee72cec9b..13177a909da 100644
--- sys/dev/acpi/acpihpet.c
+++ sys/dev/acpi/acpihpet.c
@@ -45,7 +45,9 @@ static struct timecounter hpet_timecounter = {
  0xffffffff, /* counter_mask (32 bits) */
  0, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define HPET_TIMERS 3
diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
index cdc8c99a17a..89b5a397e47 100644
--- sys/dev/acpi/acpitimer.c
+++ sys/dev/acpi/acpitimer.c
@@ -36,7 +36,9 @@ static struct timecounter acpi_timecounter = {
  0x00ffffff, /* counter_mask (24 bits) */
  ACPI_FREQUENCY, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct acpitimer_softc {
diff --git sys/dev/pci/amdpm.c sys/dev/pci/amdpm.c
index 3abe03de722..017611e4b31 100644
--- sys/dev/pci/amdpm.c
+++ sys/dev/pci/amdpm.c
@@ -83,7 +83,9 @@ static struct timecounter amdpm_timecounter = {
  0xffffff, /* counter_mask */
  AMDPM_FREQUENCY, /* frequency */
  "AMDPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define AMDPM_CONFREG 0x40
diff --git sys/dev/pci/viapm.c sys/dev/pci/viapm.c
index db806eedf80..ce33cd175e6 100644
--- sys/dev/pci/viapm.c
+++ sys/dev/pci/viapm.c
@@ -177,7 +177,9 @@ static struct timecounter viapm_timecounter = {
  0xffffff, /* counter_mask */
  VIAPM_FREQUENCY, /* frequency */
  "VIAPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct timeout viapm_timeout;
diff --git sys/dev/pv/hyperv.c sys/dev/pv/hyperv.c
index 3ab2ae22831..8e326cf2502 100644
--- sys/dev/pv/hyperv.c
+++ sys/dev/pv/hyperv.c
@@ -143,7 +143,7 @@ struct {
 };
 
 struct timecounter hv_timecounter = {
- hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
+ hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001, NULL, 0
 };
 
 struct cfdriver hyperv_cd = {
diff --git sys/dev/pv/pvclock.c sys/dev/pv/pvclock.c
index 6b242f7448d..b80e4d2a484 100644
--- sys/dev/pv/pvclock.c
+++ sys/dev/pv/pvclock.c
@@ -74,7 +74,7 @@ struct cfdriver pvclock_cd = {
 };
 
 struct timecounter pvclock_timecounter = {
- pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
+ pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0
 };
 
 int
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..0eb316ff233 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/time.h>
+
+struct uvm_object *timekeep_object;
+struct __timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,41 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct __timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct __timekeep *)va;
+ timekeep->major = 0;
+ timekeep->minor = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..2a92937b7a0 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -35,6 +35,7 @@
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <dev/rndvar.h>
+#include <sys/time.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
@@ -64,7 +65,7 @@ dummy_get_timecount(struct timecounter *tc)
 }
 
 static struct timecounter dummy_timecounter = {
- dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
+ dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0
 };
 
 /*
@@ -480,6 +481,36 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_update_timekeep(void)
+{
+ static struct timecounter *last_tc = NULL;
+
+ struct timehands *th;
+ u_int gen;
+
+ if (timekeep == NULL)
+ return;
+
+ th = timehands;
+ gen = th->th_generation;
+ membar_producer();
+ timekeep->th_scale = th->th_scale;
+ timekeep->th_offset_count = th->th_offset_count;
+ timekeep->th_offset = th->th_offset;
+ timekeep->th_naptime = th->th_naptime;
+ timekeep->th_boottime = th->th_boottime;
+ timekeep->th_generation = th->th_generation;
+ if (last_tc != th->th_counter) {
+ timekeep->tc_counter_mask = th->th_counter->tc_counter_mask;
+ timekeep->tc_user = th->th_counter->tc_user;
+ last_tc = th->th_counter;
+ }
+ membar_producer();
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +663,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_update_timekeep();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..f55b75f1e84 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 2004, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..93a79a220db 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -248,6 +248,8 @@ struct process {
  u_int ps_rtableid; /* Process routing table/domain. */
  char ps_nice; /* Process "nice" value. */
 
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
+
  struct uprof { /* profile arguments */
  caddr_t pr_base; /* buffer base */
  size_t  pr_size; /* buffer size */
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..b38e8d4834e 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,15 +163,30 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
-#if defined(_KERNEL) || defined(_STANDALONE)
-#include <sys/_time.h>
-
 /* Time expressed as seconds and fractions of a second + operations on it. */
 struct bintime {
  time_t sec;
  uint64_t frac;
 };
 
+struct __timekeep {
+ uint32_t major; /* version major number */
+ uint32_t minor; /* version minor number */
+
+ u_int64_t th_scale;
+ unsigned int th_offset_count;
+ struct bintime th_offset;
+ struct bintime th_naptime;
+ struct bintime th_boottime;
+ volatile unsigned int th_generation;
+
+ unsigned int tc_user;
+ unsigned int tc_counter_mask;
+};
+
+#if defined(_KERNEL) || defined(_STANDALONE)
+#include <sys/_time.h>
+
 #define bintimecmp(btp, ctp, cmp) \
  ((btp)->sec == (ctp)->sec ? \
     (btp)->frac cmp (ctp)->frac : \
@@ -396,6 +411,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
  return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
 }
 
+extern struct uvm_object *timekeep_object;
+extern struct __timekeep *timekeep;
 #else /* !_KERNEL */
 #include <time.h>
 
diff --git sys/sys/timetc.h sys/sys/timetc.h
index ce81c3475a0..5160f5e738b 100644
--- sys/sys/timetc.h
+++ sys/sys/timetc.h
@@ -80,6 +80,8 @@ struct timecounter {
  */
  void *tc_priv; /* [I] */
  /* Pointer to the timecounter's private parts. */
+ char tc_user; /* [I] */
+ /* Expose this timecounter to userland.  Set in softc. */
  SLIST_ENTRY(timecounter) tc_next; /* [I] */
  /* Pointer to the next timecounter. */
  int64_t tc_freq_adj; /* [tw] */

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Stuart Henderson
On 2020/05/29 13:50, Paul Irofti wrote:

> +struct __timekeep {
> + uint32_t major; /* version major number */
> + uint32_t minor; /* version minor number */
> +
> + u_int64_t th_scale;
> + unsigned int th_offset_count;
> + struct bintime th_offset;
> + struct bintime th_naptime;
> + struct bintime th_boottime;
> + volatile unsigned int th_generation;
> +
> + unsigned int tc_user;
> + unsigned int tc_counter_mask;
> +};

Ah good, you got rid of u_int, that was causing problems with port builds.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-05-29 15:45, Stuart Henderson wrote:

> On 2020/05/29 13:50, Paul Irofti wrote:
>> +struct __timekeep {
>> + uint32_t major; /* version major number */
>> + uint32_t minor; /* version minor number */
>> +
>> + u_int64_t th_scale;
>> + unsigned int th_offset_count;
>> + struct bintime th_offset;
>> + struct bintime th_naptime;
>> + struct bintime th_boottime;
>> + volatile unsigned int th_generation;
>> +
>> + unsigned int tc_user;
>> + unsigned int tc_counter_mask;
>> +};
>
> Ah good, you got rid of u_int, that was causing problems with port builds.

Yeah, I got a few reports about that :) Such a stupid type anyway...

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
In reply to this post by Stuart Henderson
> Date: Fri, 29 May 2020 13:45:37 +0100
> From: Stuart Henderson <[hidden email]>
>
> On 2020/05/29 13:50, Paul Irofti wrote:
> > +struct __timekeep {
> > + uint32_t major; /* version major number */
> > + uint32_t minor; /* version minor number */
> > +
> > + u_int64_t th_scale;
> > + unsigned int th_offset_count;
> > + struct bintime th_offset;
> > + struct bintime th_naptime;
> > + struct bintime th_boottime;
> > + volatile unsigned int th_generation;
> > +
> > + unsigned int tc_user;
> > + unsigned int tc_counter_mask;
> > +};
>
> Ah good, you got rid of u_int, that was causing problems with port builds.

That in itself is a problem.  This means <time.h> is the wrong place
for this struct.  We need to find a better place for this.

Since this is now closely linked to the timecounter stuff
<sys/timetc.h> would be an obvious place.  Now that file has:

#ifndef _KERNEL
#error "no user-serviceable parts inside"
#endif

you could change that into

#if !defined(_KERNEL) && !defined(_LIBC)
#error "no user-serviceable parts inside"
#endif

and make sure you #define _LIBC brefore uncluding this file where it
is needed.  As few places as possible obviously.


Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-05-29 16:00, Mark Kettenis wrote:

>> Date: Fri, 29 May 2020 13:45:37 +0100
>> From: Stuart Henderson <[hidden email]>
>>
>> On 2020/05/29 13:50, Paul Irofti wrote:
>>> +struct __timekeep {
>>> + uint32_t major; /* version major number */
>>> + uint32_t minor; /* version minor number */
>>> +
>>> + u_int64_t th_scale;
>>> + unsigned int th_offset_count;
>>> + struct bintime th_offset;
>>> + struct bintime th_naptime;
>>> + struct bintime th_boottime;
>>> + volatile unsigned int th_generation;
>>> +
>>> + unsigned int tc_user;
>>> + unsigned int tc_counter_mask;
>>> +};
>>
>> Ah good, you got rid of u_int, that was causing problems with port builds.
>
> That in itself is a problem.  This means <time.h> is the wrong place
> for this struct.  We need to find a better place for this.
>
> Since this is now closely linked to the timecounter stuff
> <sys/timetc.h> would be an obvious place.  Now that file has:
>
> #ifndef _KERNEL
> #error "no user-serviceable parts inside"
> #endif
>
> you could change that into
>
> #if !defined(_KERNEL) && !defined(_LIBC)
> #error "no user-serviceable parts inside"
> #endif
>
> and make sure you #define _LIBC brefore uncluding this file where it
> is needed.  As few places as possible obviously.

Hmmm... so this would make it libc bound. I don't see anything wrong
with it, but is it what we want?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Paul Irofti <[hidden email]> wrote:

> On 2020-05-29 16:00, Mark Kettenis wrote:
> >> Date: Fri, 29 May 2020 13:45:37 +0100
> >> From: Stuart Henderson <[hidden email]>
> >>
> >> On 2020/05/29 13:50, Paul Irofti wrote:
> >>> +struct __timekeep {
> >>> + uint32_t major; /* version major number */
> >>> + uint32_t minor; /* version minor number */
> >>> +
> >>> + u_int64_t th_scale;
> >>> + unsigned int th_offset_count;
> >>> + struct bintime th_offset;
> >>> + struct bintime th_naptime;
> >>> + struct bintime th_boottime;
> >>> + volatile unsigned int th_generation;
> >>> +
> >>> + unsigned int tc_user;
> >>> + unsigned int tc_counter_mask;
> >>> +};
> >>
> >> Ah good, you got rid of u_int, that was causing problems with port builds.
> >
> > That in itself is a problem.  This means <time.h> is the wrong place
> > for this struct.  We need to find a better place for this.
> >
> > Since this is now closely linked to the timecounter stuff
> > <sys/timetc.h> would be an obvious place.  Now that file has:
> >
> > #ifndef _KERNEL
> > #error "no user-serviceable parts inside"
> > #endif
> >
> > you could change that into
> >
> > #if !defined(_KERNEL) && !defined(_LIBC)
> > #error "no user-serviceable parts inside"
> > #endif
> >
> > and make sure you #define _LIBC brefore uncluding this file where it
> > is needed.  As few places as possible obviously.
>
> Hmmm... so this would make it libc bound. I don't see anything wrong
> with it, but is it what we want?

what a strange comment

in our world, libc (and subjuncts crt0 and ld.so) are THE ONLY valid
interfaces to the kernel

Another place to put it is in elf, since it is tied to a new elf marker.

Tremendous caution is required, these include files must not reference
each other in a way that requires excessive #include which will break
public software

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
> From: "Theo de Raadt" <[hidden email]>
> Date: Fri, 29 May 2020 07:26:50 -0600
>
> Paul Irofti <[hidden email]> wrote:
>
> > On 2020-05-29 16:00, Mark Kettenis wrote:
> > >> Date: Fri, 29 May 2020 13:45:37 +0100
> > >> From: Stuart Henderson <[hidden email]>
> > >>
> > >> On 2020/05/29 13:50, Paul Irofti wrote:
> > >>> +struct __timekeep {
> > >>> + uint32_t major; /* version major number */
> > >>> + uint32_t minor; /* version minor number */
> > >>> +
> > >>> + u_int64_t th_scale;
> > >>> + unsigned int th_offset_count;
> > >>> + struct bintime th_offset;
> > >>> + struct bintime th_naptime;
> > >>> + struct bintime th_boottime;
> > >>> + volatile unsigned int th_generation;
> > >>> +
> > >>> + unsigned int tc_user;
> > >>> + unsigned int tc_counter_mask;
> > >>> +};
> > >>
> > >> Ah good, you got rid of u_int, that was causing problems with port builds.
> > >
> > > That in itself is a problem.  This means <time.h> is the wrong place
> > > for this struct.  We need to find a better place for this.
> > >
> > > Since this is now closely linked to the timecounter stuff
> > > <sys/timetc.h> would be an obvious place.  Now that file has:
> > >
> > > #ifndef _KERNEL
> > > #error "no user-serviceable parts inside"
> > > #endif
> > >
> > > you could change that into
> > >
> > > #if !defined(_KERNEL) && !defined(_LIBC)
> > > #error "no user-serviceable parts inside"
> > > #endif
> > >
> > > and make sure you #define _LIBC brefore uncluding this file where it
> > > is needed.  As few places as possible obviously.
> >
> > Hmmm... so this would make it libc bound. I don't see anything wrong
> > with it, but is it what we want?
>
> what a strange comment
>
> in our world, libc (and subjuncts crt0 and ld.so) are THE ONLY valid
> interfaces to the kernel
>
> Another place to put it is in elf, since it is tied to a new elf marker.

The timekeep page isn't really tied to ELF; it's just used as a
mechanism to communicate the address of the page.  Paul could have
chosen a different mechanism such as sysctl (see vm.psstrings).

> Tremendous caution is required, these include files must not reference
> each other in a way that requires excessive #include which will break
> public software

Right, that is why <sys/timetc.h> is safe.  It currently produces an
error if you include it from userland.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Claudio Jeker
In reply to this post by Stuart Henderson
On Fri, May 29, 2020 at 01:45:37PM +0100, Stuart Henderson wrote:

> On 2020/05/29 13:50, Paul Irofti wrote:
> > +struct __timekeep {
> > + uint32_t major; /* version major number */
> > + uint32_t minor; /* version minor number */
> > +
> > + u_int64_t th_scale;
> > + unsigned int th_offset_count;
> > + struct bintime th_offset;
> > + struct bintime th_naptime;
> > + struct bintime th_boottime;
> > + volatile unsigned int th_generation;
> > +
> > + unsigned int tc_user;
> > + unsigned int tc_counter_mask;
> > +};
>
> Ah good, you got rid of u_int, that was causing problems with port builds.

Probably the u_int64_t should also be changed to uint64_t. At least I
think code should not mix u_intXY_t and uintXY_t.

--
:wq Claudio

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:

> > Date: Fri, 29 May 2020 13:45:37 +0100
> > From: Stuart Henderson <[hidden email]>
> >
> > On 2020/05/29 13:50, Paul Irofti wrote:
> > > +struct __timekeep {
> > > + uint32_t major; /* version major number */
> > > + uint32_t minor; /* version minor number */
> > > +
> > > + u_int64_t th_scale;
> > > + unsigned int th_offset_count;
> > > + struct bintime th_offset;
> > > + struct bintime th_naptime;
> > > + struct bintime th_boottime;
> > > + volatile unsigned int th_generation;
> > > +
> > > + unsigned int tc_user;
> > > + unsigned int tc_counter_mask;
> > > +};
> >
> > Ah good, you got rid of u_int, that was causing problems with port builds.
>
> That in itself is a problem.  This means <time.h> is the wrong place
> for this struct.  We need to find a better place for this.
>
> Since this is now closely linked to the timecounter stuff
> <sys/timetc.h> would be an obvious place.  Now that file has:
>
> #ifndef _KERNEL
> #error "no user-serviceable parts inside"
> #endif
>
> you could change that into
>
> #if !defined(_KERNEL) && !defined(_LIBC)
> #error "no user-serviceable parts inside"
> #endif
>
> and make sure you #define _LIBC brefore uncluding this file where it
> is needed.  As few places as possible obviously.

Done. Also includes claudio@'s observation.


diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..caa4452a3d9 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,6 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
  sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
  fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
new file mode 100644
index 00000000000..b14c862c61a
--- /dev/null
+++ lib/libc/arch/amd64/gen/rdtsc.c
@@ -0,0 +1,26 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+uint64_t
+tc_get_timecount_md(void)
+{
+ uint32_t hi, lo;
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..1ff97c12b7b 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -20,6 +20,7 @@
 
 #include <sys/types.h>
 #include <sys/syscall.h>
+#include <sys/timetc.h> /* timekeep */
 
 #ifndef PIC
 #include <sys/mman.h>
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definition for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
 
 /* provide definitions for these */
 const dl_cb *_dl_cb __relro = NULL;
+#if defined(__amd64)
+uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
+#else
+uint64_t (*const tc_get_timecount)(void) = NULL;
+#endif
+
 
 void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
 void
@@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ if (tc_get_timecount)
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
index 1286a96fe40..32f86eda50f 100644
--- lib/libc/gen/auth_subr.c
+++ lib/libc/gen/auth_subr.c
@@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_EXPIRED;
@@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_change) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_PWEXPIRED;
diff --git lib/libc/gen/time.c lib/libc/gen/time.c
index 3bbd0d733d1..b3ce9a800f1 100644
--- lib/libc/gen/time.c
+++ lib/libc/gen/time.c
@@ -36,7 +36,7 @@ time(time_t *t)
 {
  struct timeval tt;
 
- if (gettimeofday(&tt, NULL) == -1)
+ if (WRAP(gettimeofday)(&tt, NULL) == -1)
  return (-1);
  if (t)
  *t = (time_t)tt.tv_sec;
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..df717021cab 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -24,7 +24,7 @@ PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
-PROTO_NORMAL(gettimeofday);
+PROTO_WRAP(gettimeofday);
 PROTO_NORMAL(setitimer);
 PROTO_NORMAL(settimeofday);
 PROTO_NORMAL(utimes);
diff --git lib/libc/hidden/sys/timetc.h lib/libc/hidden/sys/timetc.h
new file mode 100644
index 00000000000..08d505e4214
--- /dev/null
+++ lib/libc/hidden/sys/timetc.h
@@ -0,0 +1,39 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LIBC_SYS_TIMETC_H_
+#define _LIBC_SYS_TIMETC_H_
+
+#define _LIBC
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include_next <sys/timetc.h>
+
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+
+extern uint64_t (*const tc_get_timecount)(void);
+uint64_t tc_get_timecount_md(void);
+
+void _microtime(struct timeval *tvp, struct __timekeep *tk);
+void _nanotime(struct timespec *tsp, struct __timekeep *tk);
+void _nanoruntime(struct timespec *ts, struct __timekeep *tk);
+void _nanouptime(struct timespec *tsp, struct __timekeep *tk);
+__END_HIDDEN_DECLS
+
+#endif /* !_LIBC_SYS_TIMETC_H_ */
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..d8e1e0caf64 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -29,7 +29,7 @@ PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
index 402d98cede4..917a6d42b8a 100644
--- lib/libc/rpc/auth_unix.c
+++ lib/libc/rpc/auth_unix.c
@@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
  /*
  * fill in param struct from the given params
  */
- (void)gettimeofday(&now,  NULL);
+ (void)WRAP(gettimeofday)(&now,  NULL);
  aup.aup_time = now.tv_sec;
  aup.aup_machname = machname;
  aup.aup_uid = uid;
@@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
  goto done;
 
  /* update the time and serialize in place */
- (void)gettimeofday(&now, NULL);
+ (void)WRAP(gettimeofday)(&now, NULL);
  aup.aup_time = now.tv_sec;
  xdrs.x_op = XDR_ENCODE;
  XDR_SETPOS(&xdrs, 0);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d57418d81bf 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c w_gettimeofday.c microtime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
  getgroups.o getitimer.o getpeername.o getpgid.o \
  getpriority.o getresgid.o getresuid.o \
  getrlimit.o getrusage.o getsid.o getsockname.o \
- getsockopt.o gettimeofday.o ioctl.o \
+ getsockopt.o ioctl.o \
  kevent.o kill.o kqueue.o ktrace.o lchown.o \
  link.o linkat.o listen.o lstat.o madvise.o \
  minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
@@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
+ clock_gettime.o gettimeofday.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
new file mode 100644
index 00000000000..344ef44af22
--- /dev/null
+++ lib/libc/sys/microtime.c
@@ -0,0 +1,157 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/timetc.h>
+
+#include <time.h>
+
+/*
+ * Return the difference between the timehands' counter value now and what
+ * was when we copied it to the timehands' offset_count.
+ */
+static __inline u_int
+tc_delta(struct __timekeep *tk)
+{
+ return ((tc_get_timecount() - tk->th_offset_count) &
+    tk->tc_counter_mask);
+}
+
+static inline void
+bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
+{
+ ct->sec = bt->sec;
+ if (bt->frac > bt->frac + x)
+ ct->sec++;
+ ct->frac = bt->frac + x;
+}
+
+static inline void
+BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
+{
+ ts->tv_sec = bt->sec;
+ ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static inline void
+BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
+{
+ tv->tv_sec = bt->sec;
+ tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static void
+binuptime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ *bt = tk->th_offset;
+ bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+static inline void
+bintimeadd(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec + ct->sec;
+ if (bt->frac > bt->frac + ct->frac)
+ dt->sec++;
+ dt->frac = bt->frac + ct->frac;
+}
+
+static inline void
+bintimesub(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec - ct->sec;
+ if (bt->frac < bt->frac - ct->frac)
+ dt->sec--;
+ dt->frac = bt->frac - ct->frac;
+}
+
+static void
+binruntime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ bintimeaddfrac(&tk->th_offset, tk->th_scale * tc_delta(tk), bt);
+ bintimesub(bt, &tk->th_naptime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+static void
+bintime(struct bintime *bt, struct __timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->th_generation;
+ membar_consumer();
+ *bt = tk->th_offset;
+ bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
+ bintimeadd(bt, &tk->th_boottime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->th_generation);
+}
+
+void
+_microtime(struct timeval *tvp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMEVAL(&bt, tvp);
+}
+
+void
+_nanotime(struct timespec *tsp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
+
+void
+_nanoruntime(struct timespec *ts, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ binruntime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, ts);
+}
+
+
+void
+_nanouptime(struct timespec *tsp, struct __timekeep *tk)
+{
+ struct bintime bt;
+
+ binuptime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..d14406fe408
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,46 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+#include <time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct __timekeep *timekeep = _timekeep;
+
+ if (timekeep == NULL || timekeep->tc_user == 0)
+ return clock_gettime(clock_id, tp);
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ _nanotime(tp, timekeep);
+ break;
+ case CLOCK_UPTIME:
+ _nanoruntime(tp, timekeep);
+ break;
+ case CLOCK_MONOTONIC:
+ case CLOCK_BOOTTIME:
+ _nanouptime(tp, timekeep);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
new file mode 100644
index 00000000000..88276935675
--- /dev/null
+++ lib/libc/sys/w_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Robert Nagy <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+int
+WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
+{
+ struct __timekeep *timekeep = _timekeep;
+ static struct timezone zerotz = { 0, 0 };
+
+ if (timekeep == NULL || timekeep->tc_user == 0)
+ return gettimeofday(tp, tzp);
+
+ if (tp)
+ _microtime(tp, timekeep);
+
+ if (tzp)
+ tzp = &zerotz;
+
+ return 0;
+}
+DEF_WRAP(gettimeofday);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/arch/alpha/alpha/clock.c sys/arch/alpha/alpha/clock.c
index 3f5f2c5b42b..6eaf8b107c6 100644
--- sys/arch/alpha/alpha/clock.c
+++ sys/arch/alpha/alpha/clock.c
@@ -64,7 +64,7 @@ int clk_irq = 0;
 
 u_int rpcc_get_timecount(struct timecounter *);
 struct timecounter rpcc_timecounter = {
- rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL
+ rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index 7a1dcb4ad75..3db93d88dec 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -50,7 +50,7 @@ extern u_int32_t lapic_per_second;
 #endif
 
 struct timecounter tsc_timecounter = {
- tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL
+ tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL, 1
 };
 
 uint64_t
diff --git sys/arch/amd64/isa/clock.c sys/arch/amd64/isa/clock.c
index 613f7ee0e0f..00da0c6a8d0 100644
--- sys/arch/amd64/isa/clock.c
+++ sys/arch/amd64/isa/clock.c
@@ -116,7 +116,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 
 int clockintr(void *);
diff --git sys/arch/arm64/dev/agtimer.c sys/arch/arm64/dev/agtimer.c
index 29394141ad5..6b7c6db862f 100644
--- sys/arch/arm64/dev/agtimer.c
+++ sys/arch/arm64/dev/agtimer.c
@@ -43,7 +43,7 @@ int32_t agtimer_frequency = TIMER_FREQUENCY;
 u_int agtimer_get_timecount(struct timecounter *);
 
 static struct timecounter agtimer_timecounter = {
- agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL
+ agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL, 0
 };
 
 struct agtimer_pcpu_softc {
diff --git sys/arch/armv7/omap/gptimer.c sys/arch/armv7/omap/gptimer.c
index 7605845d5e2..061542d532f 100644
--- sys/arch/armv7/omap/gptimer.c
+++ sys/arch/armv7/omap/gptimer.c
@@ -117,7 +117,7 @@ int gptimer_irq = 0;
 u_int gptimer_get_timecount(struct timecounter *);
 
 static struct timecounter gptimer_timecounter = {
- gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL
+ gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL, 0
 };
 
 volatile u_int32_t nexttickevent;
diff --git sys/arch/armv7/sunxi/sxitimer.c sys/arch/armv7/sunxi/sxitimer.c
index 14a243c78d0..41028f9a602 100644
--- sys/arch/armv7/sunxi/sxitimer.c
+++ sys/arch/armv7/sunxi/sxitimer.c
@@ -89,7 +89,7 @@ void sxitimer_delay(u_int);
 u_int sxitimer_get_timecount(struct timecounter *);
 
 static struct timecounter sxitimer_timecounter = {
- sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL
+ sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL, 0
 };
 
 bus_space_tag_t sxitimer_iot;
diff --git sys/arch/hppa/dev/clock.c sys/arch/hppa/dev/clock.c
index 4c594ab5ec7..8cce6c3a893 100644
--- sys/arch/hppa/dev/clock.c
+++ sys/arch/hppa/dev/clock.c
@@ -47,7 +47,7 @@ int cpu_hardclock(void *);
 u_int itmr_get_timecount(struct timecounter *);
 
 struct timecounter itmr_timecounter = {
- itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL
+ itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/i386/isa/clock.c sys/arch/i386/isa/clock.c
index 09a6db983f2..dd74bd425ad 100644
--- sys/arch/i386/isa/clock.c
+++ sys/arch/i386/isa/clock.c
@@ -129,7 +129,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 struct mutex timer_mutex = MUTEX_INITIALIZER(IPL_HIGH);
 u_long rtclock_tval;
diff --git sys/arch/i386/pci/geodesc.c sys/arch/i386/pci/geodesc.c
index 9d9f061eef9..bb8e4c7f9ae 100644
--- sys/arch/i386/pci/geodesc.c
+++ sys/arch/i386/pci/geodesc.c
@@ -65,7 +65,9 @@ struct timecounter geodesc_timecounter = {
  0xffffffff, /* counter_mask */
  27000000, /* frequency */
  "GEOTSC", /* name */
- 2000 /* quality */
+ 2000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 int
diff --git sys/arch/i386/pci/gscpm.c sys/arch/i386/pci/gscpm.c
index 8b8aa4ac430..a6f324e66f3 100644
--- sys/arch/i386/pci/gscpm.c
+++ sys/arch/i386/pci/gscpm.c
@@ -55,7 +55,9 @@ struct timecounter gscpm_timecounter = {
  0xffffff, /* counter_mask */
  3579545, /* frequency */
  "GSCPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach gscpm_ca = {
diff --git sys/arch/i386/pci/ichpcib.c sys/arch/i386/pci/ichpcib.c
index 6abf1627de2..90814d2dba0 100644
--- sys/arch/i386/pci/ichpcib.c
+++ sys/arch/i386/pci/ichpcib.c
@@ -64,6 +64,8 @@ struct timecounter ichpcib_timecounter = {
  3579545, /* frequency */
  "ICHPM", /* name */
  1000 /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach ichpcib_ca = {
diff --git sys/arch/loongson/loongson/generic3a_machdep.c sys/arch/loongson/loongson/generic3a_machdep.c
index ac3f1db6ccd..61da18ebff7 100644
--- sys/arch/loongson/loongson/generic3a_machdep.c
+++ sys/arch/loongson/loongson/generic3a_machdep.c
@@ -99,6 +99,8 @@ struct timecounter rs780e_timecounter = {
  .tc_frequency = HPET_FREQ,
  .tc_name = "hpet",
  .tc_quality = 100
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /* Firmware entry points */
diff --git sys/arch/luna88k/luna88k/clock.c sys/arch/luna88k/luna88k/clock.c
index a04120987e0..6580a4a46bf 100644
--- sys/arch/luna88k/luna88k/clock.c
+++ sys/arch/luna88k/luna88k/clock.c
@@ -112,7 +112,9 @@ struct timecounter clock_tc = {
  .tc_counter_mask = 0xffffffff,
  .tc_frequency = 0, /* will be filled in */
  .tc_name = "clock",
- .tc_quality = 0
+ .tc_quality = 0,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /*
diff --git sys/arch/macppc/macppc/clock.c sys/arch/macppc/macppc/clock.c
index 4a44a92cfc0..8c3ad620be8 100644
--- sys/arch/macppc/macppc/clock.c
+++ sys/arch/macppc/macppc/clock.c
@@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
 static int32_t ticks_per_intr;
 
 static struct timecounter tb_timecounter = {
- tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL
+ tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
 };
 
 /* calibrate the timecounter frequency for the listed models */
diff --git sys/arch/mips64/mips64/mips64_machdep.c sys/arch/mips64/mips64/mips64_machdep.c
index d4a42ed5acc..5c4dbadb5bb 100644
--- sys/arch/mips64/mips64/mips64_machdep.c
+++ sys/arch/mips64/mips64/mips64_machdep.c
@@ -327,7 +327,9 @@ struct timecounter cp0_timecounter = {
  0xffffffff, /* counter_mask */
  0, /* frequency */
  "CP0", /* name */
- 0 /* quality */
+ 0, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 u_int
diff --git sys/arch/octeon/octeon/machdep.c sys/arch/octeon/octeon/machdep.c
index 604cb3be3ab..9d4d8564d5c 100644
--- sys/arch/octeon/octeon/machdep.c
+++ sys/arch/octeon/octeon/machdep.c
@@ -152,8 +152,9 @@ struct timecounter ioclock_timecounter = {
  .tc_name = "ioclock",
  .tc_quality = 0, /* ioclock can be overridden
  * by cp0 counter */
- .tc_priv = 0 /* clock register,
+ .tc_priv = 0, /* clock register,
  * determined at runtime */
+ .tc_user = 0, /* expose to user */
 };
 
 static int
diff --git sys/arch/sgi/sgi/ip27_machdep.c sys/arch/sgi/sgi/ip27_machdep.c
index ba7fa558b96..7b3fa05ddd5 100644
--- sys/arch/sgi/sgi/ip27_machdep.c
+++ sys/arch/sgi/sgi/ip27_machdep.c
@@ -111,7 +111,29 @@ struct timecounter ip27_hub_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncated to 32 bits. */
  .tc_frequency = 1250000,
  .tc_name = "hubrt",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = 0,
+ .tc_user = 0,
+};
+
+static int
+atoi(const char *s)
+{
+ int n, neg;
+
+ n = 0;
+ neg = 0;
+
+ while (*s == '-') {
+ s++;
+ neg = !neg;
+ }
+
+ while (*s != '\0') {
+ if (*s < '0' || *s > '9')
+ break;
+
+ n = (10 * n) + (*s - '0');
 };
 
 volatile uint64_t ip27_spinup_a0;
diff --git sys/arch/sgi/xbow/xheart.c sys/arch/sgi/xbow/xheart.c
index 56b29915c70..827775512ac 100644
--- sys/arch/sgi/xbow/xheart.c
+++ sys/arch/sgi/xbow/xheart.c
@@ -83,7 +83,9 @@ struct timecounter xheart_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncate 52-bit counter to 32-bit */
  .tc_frequency = 12500000,
  .tc_name = "heart",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 extern uint32_t ip30_lights_frob(uint32_t, struct trapframe *);
diff --git sys/arch/sparc64/dev/psycho.c sys/arch/sparc64/dev/psycho.c
index e24f804dff6..1a7a1afa8c2 100644
--- sys/arch/sparc64/dev/psycho.c
+++ sys/arch/sparc64/dev/psycho.c
@@ -127,7 +127,7 @@ extern struct sparc_pci_chipset _sparc_pci_chipset;
 u_int stick_get_timecount(struct timecounter *);
 
 struct timecounter stick_timecounter = {
- stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL
+ stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 0
 };
 
 /*
diff --git sys/arch/sparc64/sparc64/clock.c sys/arch/sparc64/sparc64/clock.c
index fd5e8a9c15b..5c2e47d386b 100644
--- sys/arch/sparc64/sparc64/clock.c
+++ sys/arch/sparc64/sparc64/clock.c
@@ -109,13 +109,13 @@ struct cfdriver clock_cd = {
 u_int tick_get_timecount(struct timecounter *);
 
 struct timecounter tick_timecounter = {
- tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL
+ tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
 };
 
 u_int sys_tick_get_timecount(struct timecounter *);
 
 struct timecounter sys_tick_timecounter = {
- sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL
+ sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
 };
 
 /*
diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
index d0ee72cec9b..13177a909da 100644
--- sys/dev/acpi/acpihpet.c
+++ sys/dev/acpi/acpihpet.c
@@ -45,7 +45,9 @@ static struct timecounter hpet_timecounter = {
  0xffffffff, /* counter_mask (32 bits) */
  0, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define HPET_TIMERS 3
diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
index cdc8c99a17a..89b5a397e47 100644
--- sys/dev/acpi/acpitimer.c
+++ sys/dev/acpi/acpitimer.c
@@ -36,7 +36,9 @@ static struct timecounter acpi_timecounter = {
  0x00ffffff, /* counter_mask (24 bits) */
  ACPI_FREQUENCY, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct acpitimer_softc {
diff --git sys/dev/pci/amdpm.c sys/dev/pci/amdpm.c
index 3abe03de722..017611e4b31 100644
--- sys/dev/pci/amdpm.c
+++ sys/dev/pci/amdpm.c
@@ -83,7 +83,9 @@ static struct timecounter amdpm_timecounter = {
  0xffffff, /* counter_mask */
  AMDPM_FREQUENCY, /* frequency */
  "AMDPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define AMDPM_CONFREG 0x40
diff --git sys/dev/pci/viapm.c sys/dev/pci/viapm.c
index db806eedf80..ce33cd175e6 100644
--- sys/dev/pci/viapm.c
+++ sys/dev/pci/viapm.c
@@ -177,7 +177,9 @@ static struct timecounter viapm_timecounter = {
  0xffffff, /* counter_mask */
  VIAPM_FREQUENCY, /* frequency */
  "VIAPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct timeout viapm_timeout;
diff --git sys/dev/pv/hyperv.c sys/dev/pv/hyperv.c
index 3ab2ae22831..8e326cf2502 100644
--- sys/dev/pv/hyperv.c
+++ sys/dev/pv/hyperv.c
@@ -143,7 +143,7 @@ struct {
 };
 
 struct timecounter hv_timecounter = {
- hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
+ hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001, NULL, 0
 };
 
 struct cfdriver hyperv_cd = {
diff --git sys/dev/pv/pvclock.c sys/dev/pv/pvclock.c
index 6b242f7448d..b80e4d2a484 100644
--- sys/dev/pv/pvclock.c
+++ sys/dev/pv/pvclock.c
@@ -74,7 +74,7 @@ struct cfdriver pvclock_cd = {
 };
 
 struct timecounter pvclock_timecounter = {
- pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
+ pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0
 };
 
 int
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..8fbcb0c43ec 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/timetc.h>
+
+struct uvm_object *timekeep_object;
+struct __timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,41 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct __timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct __timekeep *)va;
+ timekeep->major = 0;
+ timekeep->minor = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..a3465d3dafc 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -64,7 +64,7 @@ dummy_get_timecount(struct timecounter *tc)
 }
 
 static struct timecounter dummy_timecounter = {
- dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
+ dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0
 };
 
 /*
@@ -480,6 +480,36 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_update_timekeep(void)
+{
+ static struct timecounter *last_tc = NULL;
+
+ struct timehands *th;
+ u_int gen;
+
+ if (timekeep == NULL)
+ return;
+
+ th = timehands;
+ gen = th->th_generation;
+ membar_producer();
+ timekeep->th_scale = th->th_scale;
+ timekeep->th_offset_count = th->th_offset_count;
+ timekeep->th_offset = th->th_offset;
+ timekeep->th_naptime = th->th_naptime;
+ timekeep->th_boottime = th->th_boottime;
+ timekeep->th_generation = th->th_generation;
+ if (last_tc != th->th_counter) {
+ timekeep->tc_counter_mask = th->th_counter->tc_counter_mask;
+ timekeep->tc_user = th->th_counter->tc_user;
+ last_tc = th->th_counter;
+ }
+ membar_producer();
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +662,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_update_timekeep();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..f55b75f1e84 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 2004, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..93a79a220db 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -248,6 +248,8 @@ struct process {
  u_int ps_rtableid; /* Process routing table/domain. */
  char ps_nice; /* Process "nice" value. */
 
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
+
  struct uprof { /* profile arguments */
  caddr_t pr_base; /* buffer base */
  size_t  pr_size; /* buffer size */
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..3882bac6c55 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,15 +163,15 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
-#if defined(_KERNEL) || defined(_STANDALONE)
-#include <sys/_time.h>
-
 /* Time expressed as seconds and fractions of a second + operations on it. */
 struct bintime {
  time_t sec;
  uint64_t frac;
 };
 
+#if defined(_KERNEL) || defined(_STANDALONE)
+#include <sys/_time.h>
+
 #define bintimecmp(btp, ctp, cmp) \
  ((btp)->sec == (ctp)->sec ? \
     (btp)->frac cmp (ctp)->frac : \
diff --git sys/sys/timetc.h sys/sys/timetc.h
index ce81c3475a0..3dff89a3f47 100644
--- sys/sys/timetc.h
+++ sys/sys/timetc.h
@@ -24,7 +24,7 @@
 #ifndef _SYS_TIMETC_H_
 #define _SYS_TIMETC_H_
 
-#ifndef _KERNEL
+#if !defined(_KERNEL) && !defined(_LIBC)
 #error "no user-serviceable parts inside"
 #endif
 
@@ -80,6 +80,8 @@ struct timecounter {
  */
  void *tc_priv; /* [I] */
  /* Pointer to the timecounter's private parts. */
+ char tc_user; /* [I] */
+ /* Expose this timecounter to userland.  Set in softc. */
  SLIST_ENTRY(timecounter) tc_next; /* [I] */
  /* Pointer to the next timecounter. */
  int64_t tc_freq_adj; /* [tw] */
@@ -88,11 +90,29 @@ struct timecounter {
  /* Precision of the counter.  Computed in tc_init(). */
 };
 
+struct __timekeep {
+ uint32_t major; /* version major number */
+ uint32_t minor; /* version minor number */
+
+ uint64_t th_scale;
+ unsigned int th_offset_count;
+ struct bintime th_offset;
+ struct bintime th_naptime;
+ struct bintime th_boottime;
+ volatile unsigned int th_generation;
+
+ unsigned int tc_user;
+ unsigned int tc_counter_mask;
+};
+
 struct rwlock;
 extern struct rwlock tc_lock;
 
 extern struct timecounter *timecounter;
 
+extern struct uvm_object *timekeep_object;
+extern struct __timekeep *timekeep;
+
 u_int64_t tc_getfrequency(void);
 u_int64_t tc_getprecision(void);
 void tc_init(struct timecounter *tc);

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Robert Nagy
On 29/05/20 17:51 +0300, Paul Irofti wrote:

> On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 29 May 2020 13:45:37 +0100
> > > From: Stuart Henderson <[hidden email]>
> > >
> > > On 2020/05/29 13:50, Paul Irofti wrote:
> > > > +struct __timekeep {
> > > > + uint32_t major; /* version major number */
> > > > + uint32_t minor; /* version minor number */
> > > > +
> > > > + u_int64_t th_scale;
> > > > + unsigned int th_offset_count;
> > > > + struct bintime th_offset;
> > > > + struct bintime th_naptime;
> > > > + struct bintime th_boottime;
> > > > + volatile unsigned int th_generation;
> > > > +
> > > > + unsigned int tc_user;
> > > > + unsigned int tc_counter_mask;
> > > > +};
> > >
> > > Ah good, you got rid of u_int, that was causing problems with port builds.
> >
> > That in itself is a problem.  This means <time.h> is the wrong place
> > for this struct.  We need to find a better place for this.
> >
> > Since this is now closely linked to the timecounter stuff
> > <sys/timetc.h> would be an obvious place.  Now that file has:
> >
> > #ifndef _KERNEL
> > #error "no user-serviceable parts inside"
> > #endif
> >
> > you could change that into
> >
> > #if !defined(_KERNEL) && !defined(_LIBC)
> > #error "no user-serviceable parts inside"
> > #endif
> >
> > and make sure you #define _LIBC brefore uncluding this file where it
> > is needed.  As few places as possible obviously.
>
> Done. Also includes claudio@'s observation.

I think if there are no more header changes, this should be commited to
have wider testing. We are also just after tree unlock so it feels like
the right time, and since there is no library bump we can easily revert
if there is a need for that.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
> Date: Sat, 30 May 2020 10:32:15 +0200
> From: Robert Nagy <[hidden email]>
>
> On 29/05/20 17:51 +0300, Paul Irofti wrote:
> > On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> > > > Date: Fri, 29 May 2020 13:45:37 +0100
> > > > From: Stuart Henderson <[hidden email]>
> > > >
> > > > On 2020/05/29 13:50, Paul Irofti wrote:
> > > > > +struct __timekeep {
> > > > > + uint32_t major; /* version major number */
> > > > > + uint32_t minor; /* version minor number */
> > > > > +
> > > > > + u_int64_t th_scale;
> > > > > + unsigned int th_offset_count;
> > > > > + struct bintime th_offset;
> > > > > + struct bintime th_naptime;
> > > > > + struct bintime th_boottime;
> > > > > + volatile unsigned int th_generation;
> > > > > +
> > > > > + unsigned int tc_user;
> > > > > + unsigned int tc_counter_mask;
> > > > > +};
> > > >
> > > > Ah good, you got rid of u_int, that was causing problems with port builds.
> > >
> > > That in itself is a problem.  This means <time.h> is the wrong place
> > > for this struct.  We need to find a better place for this.
> > >
> > > Since this is now closely linked to the timecounter stuff
> > > <sys/timetc.h> would be an obvious place.  Now that file has:
> > >
> > > #ifndef _KERNEL
> > > #error "no user-serviceable parts inside"
> > > #endif
> > >
> > > you could change that into
> > >
> > > #if !defined(_KERNEL) && !defined(_LIBC)
> > > #error "no user-serviceable parts inside"
> > > #endif
> > >
> > > and make sure you #define _LIBC brefore uncluding this file where it
> > > is needed.  As few places as possible obviously.
> >
> > Done. Also includes claudio@'s observation.
>
> I think if there are no more header changes, this should be commited to
> have wider testing. We are also just after tree unlock so it feels like
> the right time, and since there is no library bump we can easily revert
> if there is a need for that.

Not ready yet.

I also would like to see at least one non-amd64 platform supported
before we settle on this approach.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Robert Nagy
On 30/05/20 10:40 +0200, Mark Kettenis wrote:

> > Date: Sat, 30 May 2020 10:32:15 +0200
> > From: Robert Nagy <[hidden email]>
> >
> > On 29/05/20 17:51 +0300, Paul Irofti wrote:
> > > On Fri, May 29, 2020 at 03:00:50PM +0200, Mark Kettenis wrote:
> > > > > Date: Fri, 29 May 2020 13:45:37 +0100
> > > > > From: Stuart Henderson <[hidden email]>
> > > > >
> > > > > On 2020/05/29 13:50, Paul Irofti wrote:
> > > > > > +struct __timekeep {
> > > > > > + uint32_t major; /* version major number */
> > > > > > + uint32_t minor; /* version minor number */
> > > > > > +
> > > > > > + u_int64_t th_scale;
> > > > > > + unsigned int th_offset_count;
> > > > > > + struct bintime th_offset;
> > > > > > + struct bintime th_naptime;
> > > > > > + struct bintime th_boottime;
> > > > > > + volatile unsigned int th_generation;
> > > > > > +
> > > > > > + unsigned int tc_user;
> > > > > > + unsigned int tc_counter_mask;
> > > > > > +};
> > > > >
> > > > > Ah good, you got rid of u_int, that was causing problems with port builds.
> > > >
> > > > That in itself is a problem.  This means <time.h> is the wrong place
> > > > for this struct.  We need to find a better place for this.
> > > >
> > > > Since this is now closely linked to the timecounter stuff
> > > > <sys/timetc.h> would be an obvious place.  Now that file has:
> > > >
> > > > #ifndef _KERNEL
> > > > #error "no user-serviceable parts inside"
> > > > #endif
> > > >
> > > > you could change that into
> > > >
> > > > #if !defined(_KERNEL) && !defined(_LIBC)
> > > > #error "no user-serviceable parts inside"
> > > > #endif
> > > >
> > > > and make sure you #define _LIBC brefore uncluding this file where it
> > > > is needed.  As few places as possible obviously.
> > >
> > > Done. Also includes claudio@'s observation.
> >
> > I think if there are no more header changes, this should be commited to
> > have wider testing. We are also just after tree unlock so it feels like
> > the right time, and since there is no library bump we can easily revert
> > if there is a need for that.
>
> Not ready yet.
>
> I also would like to see at least one non-amd64 platform supported
> before we settle on this approach.


Which one would you prefer? arm64?

12345 ... 11