userland clock_gettime proof of concept

classic Classic list List threaded Threaded
181 messages Options
1234567 ... 10
Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Mark Kettenis <[hidden email]> wrote:

> > I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> > not sure, but one might move the list of arches to dlfcn/Makefile.inc
> > and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> > might drop the tc_get_timecount function pointer and just always call
> > the function #ifdef TIMEKEEP.
>
> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
> catching that.  The benefit of the TIMEKEEP define would be that we
> can eliminate the fallback code completely on architectures that don't
> implement this functionality.

...

> > --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> > +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> > @@ -70,7 +70,7 @@
> >  
> >  /* provide definitions for these */
> >  const dl_cb *_dl_cb __relro = NULL;
> > -#if defined(__amd64)
> > +#if defined(__amd64__) || defined(__powerpc__)
> >  uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> >  #else
> >  uint64_t (*const tc_get_timecount)(void) = NULL;

1) I think adding _md to the name is superflous.  There will never
   be a MI version, so tc_get_timecount() is enough.

2) I hope we can get away from #ifdef __ arch__.
   Maybe this can be split into architectures which
      a) have a function called tc_get_timecount()
   or
      b) tc_get_timecount is #define'd to NULL, though I don't
         know which MD include file to do that in

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-05-31 18:25, Theo de Raadt wrote:

> Mark Kettenis <[hidden email]> wrote:
>
>>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
>>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
>>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
>>> might drop the tc_get_timecount function pointer and just always call
>>> the function #ifdef TIMEKEEP.
>>
>> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
>> catching that.  The benefit of the TIMEKEEP define would be that we
>> can eliminate the fallback code completely on architectures that don't
>> implement this functionality.
>
> ...

Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
see now it is commented out...

>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
>>> @@ -70,7 +70,7 @@
>>>  
>>>   /* provide definitions for these */
>>>   const dl_cb *_dl_cb __relro = NULL;
>>> -#if defined(__amd64)
>>> +#if defined(__amd64__) || defined(__powerpc__)
>>>   uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>>>   #else
>>>   uint64_t (*const tc_get_timecount)(void) = NULL;
>
> 1) I think adding _md to the name is superflous.  There will never
>     be a MI version, so tc_get_timecount() is enough.

What about pvclock(4)?

> 2) I hope we can get away from #ifdef __ arch__.
>     Maybe this can be split into architectures which
>        a) have a function called tc_get_timecount()
>     or
>        b) tc_get_timecount is #define'd to NULL, though I don't
>           know which MD include file to do that in

If we go with something like this or with something like -DTIMEKEEP, how
do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
Split them in MD headers? But then we end up in the same place. Sort of.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Paul Irofti <[hidden email]> wrote:

> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which
> I see now it is commented out...
>
> >>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> >>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> >>> @@ -70,7 +70,7 @@
> >>>     /* provide definitions for these */
> >>>   const dl_cb *_dl_cb __relro = NULL;
> >>> -#if defined(__amd64)
> >>> +#if defined(__amd64__) || defined(__powerpc__)
> >>>   uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> >>>   #else
> >>>   uint64_t (*const tc_get_timecount)(void) = NULL;
> >
> > 1) I think adding _md to the name is superflous.  There will never
> >     be a MI version, so tc_get_timecount() is enough.
>
> What about pvclock(4)?

What about it?  Is it MI?

> > 2) I hope we can get away from #ifdef __ arch__.
> >     Maybe this can be split into architectures which
> >        a) have a function called tc_get_timecount()
> >     or
> >        b) tc_get_timecount is #define'd to NULL, though I don't
> >           know which MD include file to do that in
>
> If we go with something like this or with something like -DTIMEKEEP,
> how do we handle the different PROTO_WRAP vs. PROTO_NORMAL
> declarations? Split them in MD headers? But then we end up in the same
> place. Sort of.

Sorry you lost me here.  But go ahead, continue with your plan which will
result in 6-line chunk which will look something like this:

#if defined || defined || defined || defined || defined || defined ||
 defined || defined || defined || defined || defined || defined ||
 defined || defined


Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-05-31 19:17, Theo de Raadt wrote:

> Paul Irofti <[hidden email]> wrote:
>
>> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which
>> I see now it is commented out...
>>
>>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
>>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
>>>>> @@ -70,7 +70,7 @@
>>>>>      /* provide definitions for these */
>>>>>    const dl_cb *_dl_cb __relro = NULL;
>>>>> -#if defined(__amd64)
>>>>> +#if defined(__amd64__) || defined(__powerpc__)
>>>>>    uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>>>>>    #else
>>>>>    uint64_t (*const tc_get_timecount)(void) = NULL;
>>>
>>> 1) I think adding _md to the name is superflous.  There will never
>>>      be a MI version, so tc_get_timecount() is enough.
>>
>> What about pvclock(4)?
>
> What about it?  Is it MI?

It is used by two architectures. There is also glxpcib. Of course we can
have a copy of each in arch/*/usertc.c

>
>>> 2) I hope we can get away from #ifdef __ arch__.
>>>      Maybe this can be split into architectures which
>>>         a) have a function called tc_get_timecount()
>>>      or
>>>         b) tc_get_timecount is #define'd to NULL, though I don't
>>>            know which MD include file to do that in
>>
>> If we go with something like this or with something like -DTIMEKEEP,
>> how do we handle the different PROTO_WRAP vs. PROTO_NORMAL
>> declarations? Split them in MD headers? But then we end up in the same
>> place. Sort of.
>
> Sorry you lost me here.  

I was talking about kettenis@'s idea that this will help us lose the
wrapper on architectures that do not support user clock_gettime.

In lib/libc/hidden/time.h we have PROTO_WRAP(clock_gettime); instead of
PROTO_NORMAL(clock_gettime) and all the libc calls to clock_gettime()
are now WRAP(clock_gettime).

So we will have to keep the wrapper even for architectures that just
fallback to the system call.

The only way not to do that is to add a different macro? Or to create MD
files for wrap and non-wrap architectures.

 > [aggressive defined-defined comment]

I am more than happy to lose the ifdef maze as you know I always have
been in such situations.

The reason I put that there is because I could not figure out another
way to do that nicely.

My diff has already split this into arch's that have tc_get_timecount()
and those that do not; your point a).

We need to do point b): identify a proper include file.

Paul

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Paul Irofti <[hidden email]> wrote:

> On 2020-05-31 19:17, Theo de Raadt wrote:
> > Paul Irofti <[hidden email]> wrote:
> >
> >> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which
> >> I see now it is commented out...
> >>
> >>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> >>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> >>>>> @@ -70,7 +70,7 @@
> >>>>>      /* provide definitions for these */
> >>>>>    const dl_cb *_dl_cb __relro = NULL;
> >>>>> -#if defined(__amd64)
> >>>>> +#if defined(__amd64__) || defined(__powerpc__)
> >>>>>    uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> >>>>>    #else
> >>>>>    uint64_t (*const tc_get_timecount)(void) = NULL;
> >>>
> >>> 1) I think adding _md to the name is superflous.  There will never
> >>>      be a MI version, so tc_get_timecount() is enough.
> >>
> >> What about pvclock(4)?
> >
> > What about it?  Is it MI?
>
> It is used by two architectures. There is also glxpcib. Of course we
> can have a copy of each in arch/*/usertc.c

You plan to flip between supporting functions on the fly?

How do you know to flip?

It sounds insane and designing for a problem which doesn't exist.

Obviously on an architecture there must be *one function* that does
the job, using whatever it finds available.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
In reply to this post by Paul Irofti-4
> From: Paul Irofti <[hidden email]>
> Date: Sun, 31 May 2020 19:12:54 +0300
>
> On 2020-05-31 18:25, Theo de Raadt wrote:
> > Mark Kettenis <[hidden email]> wrote:
> >
> >>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> >>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> >>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> >>> might drop the tc_get_timecount function pointer and just always call
> >>> the function #ifdef TIMEKEEP.
> >>
> >> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
> >> catching that.  The benefit of the TIMEKEEP define would be that we
> >> can eliminate the fallback code completely on architectures that don't
> >> implement this functionality.
> >
> > ...
>
> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
> see now it is commented out...
>
> >>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> >>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> >>> @@ -70,7 +70,7 @@
> >>>  
> >>>   /* provide definitions for these */
> >>>   const dl_cb *_dl_cb __relro = NULL;
> >>> -#if defined(__amd64)
> >>> +#if defined(__amd64__) || defined(__powerpc__)
> >>>   uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> >>>   #else
> >>>   uint64_t (*const tc_get_timecount)(void) = NULL;
> >
> > 1) I think adding _md to the name is superflous.  There will never
> >     be a MI version, so tc_get_timecount() is enough.
>
> What about pvclock(4)?

What about it?  Seems to me what you're really thinking of here is how
to support more than just one timecounter for a specific architecture.
Your function pointer is not really going to help in that case.
You'll need to dispatch to the right function based on some sort of
machine-specific clock ID.

Oh and BTW, I don't think you're ever going to support pvclock(4).
Take a look at the code and think how you would do all that magic in
userland...

> > 2) I hope we can get away from #ifdef __ arch__.
> >     Maybe this can be split into architectures which
> >        a) have a function called tc_get_timecount()
> >     or
> >        b) tc_get_timecount is #define'd to NULL, though I don't
> >           know which MD include file to do that in
>
> If we go with something like this or with something like -DTIMEKEEP, how
> do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
> Split them in MD headers? But then we end up in the same place. Sort of.

Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function *on all architecture* that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Mark Kettenis <[hidden email]> wrote:

> On sparc64 we need to support both tick_timecounter and
> sys_tick_timecounter.  So we need some sort of clockid value to
> distnguish between those two.  I already suggested to use the tc_user
> field of the timecounter for that.  0 means that a timecounter is not
> usable in userland, a (small) positive integer means a specific
> timecounter type.  The code in libc will need to know whether a
> particular timecounter type can be supported.  My proposal would be to
> implement a function *on all architecture* that takes the clockid as
> an argument and returns a pointer to the function that implements
> support for that timecounter.  On architectures without support, ir
> when called with a clockid that isn't supported, that function would
> simply return NULL.

I agree.

The alternative being tried here is to do it all at link-time.  I don't
think that is flexible enough to cover all the architectures.
Determining this at startup, and following a pointer is the natural
approach.

(There has been some pressure to get this in before it covers all the
architectures and this kind of discussion is why I think such a
premature "and then we'll fix it in the tree" procedure is wrong).

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
On 2020-05-31 20:46, Mark Kettenis wrote:

>> From: Paul Irofti <[hidden email]>
>> Date: Sun, 31 May 2020 19:12:54 +0300
>>
>> On 2020-05-31 18:25, Theo de Raadt wrote:
>>> Mark Kettenis <[hidden email]> wrote:
>>>
>>>>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
>>>>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
>>>>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
>>>>> might drop the tc_get_timecount function pointer and just always call
>>>>> the function #ifdef TIMEKEEP.
>>>>
>>>> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
>>>> catching that.  The benefit of the TIMEKEEP define would be that we
>>>> can eliminate the fallback code completely on architectures that don't
>>>> implement this functionality.
>>>
>>> ...
>>
>> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
>> see now it is commented out...
>>
>>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
>>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
>>>>> @@ -70,7 +70,7 @@
>>>>>    
>>>>>    /* provide definitions for these */
>>>>>    const dl_cb *_dl_cb __relro = NULL;
>>>>> -#if defined(__amd64)
>>>>> +#if defined(__amd64__) || defined(__powerpc__)
>>>>>    uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>>>>>    #else
>>>>>    uint64_t (*const tc_get_timecount)(void) = NULL;
>>>
>>> 1) I think adding _md to the name is superflous.  There will never
>>>      be a MI version, so tc_get_timecount() is enough.
>>
>> What about pvclock(4)?
>
> What about it?  Seems to me what you're really thinking of here is how
> to support more than just one timecounter for a specific architecture.
> Your function pointer is not really going to help in that case.
> You'll need to dispatch to the right function based on some sort of
> machine-specific clock ID.
>
> Oh and BTW, I don't think you're ever going to support pvclock(4).
> Take a look at the code and think how you would do all that magic in
> userland...
>
>>> 2) I hope we can get away from #ifdef __ arch__.
>>>      Maybe this can be split into architectures which
>>>         a) have a function called tc_get_timecount()
>>>      or
>>>         b) tc_get_timecount is #define'd to NULL, though I don't
>>>            know which MD include file to do that in
>>
>> If we go with something like this or with something like -DTIMEKEEP, how
>> do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
>> Split them in MD headers? But then we end up in the same place. Sort of.
>
> Forget about all that for a moment.  Here is an alternative suggestion:
>
> On sparc64 we need to support both tick_timecounter and
> sys_tick_timecounter.  So we need some sort of clockid value to
> distnguish between those two.  I already suggested to use the tc_user
> field of the timecounter for that.  0 means that a timecounter is not
> usable in userland, a (small) positive integer means a specific
> timecounter type.  The code in libc will need to know whether a
> particular timecounter type can be supported.  My proposal would be to
> implement a function *on all architecture* that takes the clockid as
> an argument and returns a pointer to the function that implements
> support for that timecounter.  On architectures without support, ir
> when called with a clockid that isn't supported, that function would
> simply return NULL.

Sure. All architectures will register their clocks with a unique ID in
timetc.h, right? And then we do clockfun[clockid]() in libc, right?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Paul Irofti <[hidden email]> wrote:

> > Forget about all that for a moment.  Here is an alternative suggestion:
> >
> > On sparc64 we need to support both tick_timecounter and
> > sys_tick_timecounter.  So we need some sort of clockid value to
> > distnguish between those two.  I already suggested to use the tc_user
> > field of the timecounter for that.  0 means that a timecounter is not
> > usable in userland, a (small) positive integer means a specific
> > timecounter type.  The code in libc will need to know whether a
> > particular timecounter type can be supported.  My proposal would be to
> > implement a function *on all architecture* that takes the clockid as
> > an argument and returns a pointer to the function that implements
> > support for that timecounter.  On architectures without support, ir
> > when called with a clockid that isn't supported, that function would
> > simply return NULL.
>
> Sure. All architectures will register their clocks with a unique ID in
> timetc.h, right? And then we do clockfun[clockid]() in libc, right?

No, don't do that on every call -- instead, get the function pointer once.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Theo de Raadt-2
On 2020-05-31 20:51, Theo de Raadt wrote:
> (There has been some pressure to get this in before it covers all the
> architectures and this kind of discussion is why I think such a
> premature "and then we'll fix it in the tree" procedure is wrong).

Again, I hope not from me. I am in no rush with this diff nor do I want
to put any pressure to get this in. I am quite happy that you feel this
is a good thing and I am also happy that you are helping me get this in
proper shape.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Christian Weisgerber
In reply to this post by Theo de Raadt-2
On 2020-05-31, "Theo de Raadt" <[hidden email]> wrote:

>> > particular timecounter type can be supported.  My proposal would be to
>> > implement a function *on all architecture* that takes the clockid as
>> > an argument and returns a pointer to the function that implements
>> > support for that timecounter.  On architectures without support, ir
>> > when called with a clockid that isn't supported, that function would
>> > simply return NULL.
>>
>> Sure. All architectures will register their clocks with a unique ID in
>> timetc.h, right? And then we do clockfun[clockid]() in libc, right?
>
> No, don't do that on every call -- instead, get the function pointer once.

Then you still need to check on every call whether the clockid has
changed (because the kern.timecounter.hardware sysctl was changed)
and refetch the function pointer in that case.

--
Christian "naddy" Weisgerber                          [hidden email]

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Theo de Raadt-2
Christian Weisgerber <[hidden email]> wrote:

> On 2020-05-31, "Theo de Raadt" <[hidden email]> wrote:
>
> >> > particular timecounter type can be supported.  My proposal would be to
> >> > implement a function *on all architecture* that takes the clockid as
> >> > an argument and returns a pointer to the function that implements
> >> > support for that timecounter.  On architectures without support, ir
> >> > when called with a clockid that isn't supported, that function would
> >> > simply return NULL.
> >>
> >> Sure. All architectures will register their clocks with a unique ID in
> >> timetc.h, right? And then we do clockfun[clockid]() in libc, right?
> >
> > No, don't do that on every call -- instead, get the function pointer once.
>
> Then you still need to check on every call whether the clockid has
> changed (because the kern.timecounter.hardware sysctl was changed)
> and refetch the function pointer in that case.

Then really, we should remove that sysctl support.

Because otherwise I don't see how it can work.  Aren't there deadlock
or spinning conditions?  Or at minimum, situtions where time won't flow
linearly.

All of that tweaking is junk working around bugs and incomplete software.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Christian Weisgerber
Theo de Raadt:

> > Then you still need to check on every call whether the clockid has
> > changed (because the kern.timecounter.hardware sysctl was changed)
> > and refetch the function pointer in that case.
>
> Then really, we should remove that sysctl support.
>
> Because otherwise I don't see how it can work.  Aren't there deadlock
> or spinning conditions?  Or at minimum, situtions where time won't flow
> linearly.

The patch as-is works fine when kern.timecounter.hardware is toggled
back and forth between tsc with userland gettime support and, say,
acpihpet0 without.  tk_user marks whether the currently selected
timecounter has userland support, the wrapper checks that and falls
back to the system call.

Reading the code, I don't see a reason why this shouldn't work fine,
and experimentally it does work.  Changing kern.timecounter.hardware
is transparent.

kettenis@'s suggestion of extending this from on/off to
clockid1/clockid2/.../off makes sense to me.  Most architectures
will likely only have one timecounter for which userland support
is possible, but sparc64 has indeed two (tick, stick).

--
Christian "naddy" Weisgerber                          [hidden email]

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
On 2020-05-31 20:46, Mark Kettenis wrote:

>> From: Paul Irofti <[hidden email]>
>> Date: Sun, 31 May 2020 19:12:54 +0300
>>
>> On 2020-05-31 18:25, Theo de Raadt wrote:
>>> Mark Kettenis <[hidden email]> wrote:
>>>
>>>>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
>>>>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
>>>>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
>>>>> might drop the tc_get_timecount function pointer and just always call
>>>>> the function #ifdef TIMEKEEP.
>>>>
>>>> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
>>>> catching that.  The benefit of the TIMEKEEP define would be that we
>>>> can eliminate the fallback code completely on architectures that don't
>>>> implement this functionality.
>>>
>>> ...
>>
>> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
>> see now it is commented out...
>>
>>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
>>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
>>>>> @@ -70,7 +70,7 @@
>>>>>    
>>>>>    /* provide definitions for these */
>>>>>    const dl_cb *_dl_cb __relro = NULL;
>>>>> -#if defined(__amd64)
>>>>> +#if defined(__amd64__) || defined(__powerpc__)
>>>>>    uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>>>>>    #else
>>>>>    uint64_t (*const tc_get_timecount)(void) = NULL;
>>>
>>> 1) I think adding _md to the name is superflous.  There will never
>>>      be a MI version, so tc_get_timecount() is enough.
>>
>> What about pvclock(4)?
>
> What about it?  Seems to me what you're really thinking of here is how
> to support more than just one timecounter for a specific architecture.
> Your function pointer is not really going to help in that case.
> You'll need to dispatch to the right function based on some sort of
> machine-specific clock ID.
>
> Oh and BTW, I don't think you're ever going to support pvclock(4).
> Take a look at the code and think how you would do all that magic in
> userland...
>
>>> 2) I hope we can get away from #ifdef __ arch__.
>>>      Maybe this can be split into architectures which
>>>         a) have a function called tc_get_timecount()
>>>      or
>>>         b) tc_get_timecount is #define'd to NULL, though I don't
>>>            know which MD include file to do that in
>>
>> If we go with something like this or with something like -DTIMEKEEP, how
>> do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
>> Split them in MD headers? But then we end up in the same place. Sort of.
>
> Forget about all that for a moment.  Here is an alternative suggestion:
>
> On sparc64 we need to support both tick_timecounter and
> sys_tick_timecounter.  So we need some sort of clockid value to
> distnguish between those two.  I already suggested to use the tc_user
> field of the timecounter for that.  0 means that a timecounter is not
> usable in userland, a (small) positive integer means a specific
> timecounter type.  The code in libc will need to know whether a
> particular timecounter type can be supported.  My proposal would be to
> implement a function *on all architecture* that takes the clockid as
> an argument and returns a pointer to the function that implements
> support for that timecounter.  On architectures without support, ir
> when called with a clockid that isn't supported, that function would
> simply return NULL.
>


What if we declare in libc/arch/*/SYS.h tc_get_timecount to either be
NULL or the prototype of a function. (I know SYS.c is a bit of a
stretch, if not we can create a separate header usertc.h?) And then we
use tc_user to be an ID for architectures such as sparc64 that have more
than one clock and inside libc/*/gen/usertc.c we check which is it and
call a local static function based on it?

Would that be OK?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
> From: Paul Irofti <[hidden email]>
> Date: Tue, 2 Jun 2020 16:23:30 +0300
>
> On 2020-05-31 20:46, Mark Kettenis wrote:
> >> From: Paul Irofti <[hidden email]>
> >> Date: Sun, 31 May 2020 19:12:54 +0300
> >>
> >> On 2020-05-31 18:25, Theo de Raadt wrote:
> >>> Mark Kettenis <[hidden email]> wrote:
> >>>
> >>>>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> >>>>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> >>>>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> >>>>> might drop the tc_get_timecount function pointer and just always call
> >>>>> the function #ifdef TIMEKEEP.
> >>>>
> >>>> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
> >>>> catching that.  The benefit of the TIMEKEEP define would be that we
> >>>> can eliminate the fallback code completely on architectures that don't
> >>>> implement this functionality.
> >>>
> >>> ...
> >>
> >> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
> >> see now it is commented out...
> >>
> >>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> >>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> >>>>> @@ -70,7 +70,7 @@
> >>>>>    
> >>>>>    /* provide definitions for these */
> >>>>>    const dl_cb *_dl_cb __relro = NULL;
> >>>>> -#if defined(__amd64)
> >>>>> +#if defined(__amd64__) || defined(__powerpc__)
> >>>>>    uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> >>>>>    #else
> >>>>>    uint64_t (*const tc_get_timecount)(void) = NULL;
> >>>
> >>> 1) I think adding _md to the name is superflous.  There will never
> >>>      be a MI version, so tc_get_timecount() is enough.
> >>
> >> What about pvclock(4)?
> >
> > What about it?  Seems to me what you're really thinking of here is how
> > to support more than just one timecounter for a specific architecture.
> > Your function pointer is not really going to help in that case.
> > You'll need to dispatch to the right function based on some sort of
> > machine-specific clock ID.
> >
> > Oh and BTW, I don't think you're ever going to support pvclock(4).
> > Take a look at the code and think how you would do all that magic in
> > userland...
> >
> >>> 2) I hope we can get away from #ifdef __ arch__.
> >>>      Maybe this can be split into architectures which
> >>>         a) have a function called tc_get_timecount()
> >>>      or
> >>>         b) tc_get_timecount is #define'd to NULL, though I don't
> >>>            know which MD include file to do that in
> >>
> >> If we go with something like this or with something like -DTIMEKEEP, how
> >> do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
> >> Split them in MD headers? But then we end up in the same place. Sort of.
> >
> > Forget about all that for a moment.  Here is an alternative suggestion:
> >
> > On sparc64 we need to support both tick_timecounter and
> > sys_tick_timecounter.  So we need some sort of clockid value to
> > distnguish between those two.  I already suggested to use the tc_user
> > field of the timecounter for that.  0 means that a timecounter is not
> > usable in userland, a (small) positive integer means a specific
> > timecounter type.  The code in libc will need to know whether a
> > particular timecounter type can be supported.  My proposal would be to
> > implement a function *on all architecture* that takes the clockid as
> > an argument and returns a pointer to the function that implements
> > support for that timecounter.  On architectures without support, ir
> > when called with a clockid that isn't supported, that function would
> > simply return NULL.
> >
>
>
> What if we declare in libc/arch/*/SYS.h tc_get_timecount to either be
> NULL or the prototype of a function. (I know SYS.c is a bit of a
> stretch, if not we can create a separate header usertc.h?) And then we
> use tc_user to be an ID for architectures such as sparc64 that have more
> than one clock and inside libc/*/gen/usertc.c we check which is it and
> call a local static function based on it?
>
> Would that be OK?

How are you going to support multiple timecounters on an architecture?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On 2020-06-02 16:29, Mark Kettenis wrote:

>> From: Paul Irofti <[hidden email]>
>> Date: Tue, 2 Jun 2020 16:23:30 +0300
>>
>> On 2020-05-31 20:46, Mark Kettenis wrote:
>>>> From: Paul Irofti <[hidden email]>
>>>> Date: Sun, 31 May 2020 19:12:54 +0300
>>>>
>>>> On 2020-05-31 18:25, Theo de Raadt wrote:
>>>>> Mark Kettenis <[hidden email]> wrote:
>>>>>
>>>>>>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
>>>>>>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
>>>>>>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
>>>>>>> might drop the tc_get_timecount function pointer and just always call
>>>>>>> the function #ifdef TIMEKEEP.
>>>>>>
>>>>>> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
>>>>>> catching that.  The benefit of the TIMEKEEP define would be that we
>>>>>> can eliminate the fallback code completely on architectures that don't
>>>>>> implement this functionality.
>>>>>
>>>>> ...
>>>>
>>>> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
>>>> see now it is commented out...
>>>>
>>>>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
>>>>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
>>>>>>> @@ -70,7 +70,7 @@
>>>>>>>    
>>>>>>>     /* provide definitions for these */
>>>>>>>     const dl_cb *_dl_cb __relro = NULL;
>>>>>>> -#if defined(__amd64)
>>>>>>> +#if defined(__amd64__) || defined(__powerpc__)
>>>>>>>     uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
>>>>>>>     #else
>>>>>>>     uint64_t (*const tc_get_timecount)(void) = NULL;
>>>>>
>>>>> 1) I think adding _md to the name is superflous.  There will never
>>>>>       be a MI version, so tc_get_timecount() is enough.
>>>>
>>>> What about pvclock(4)?
>>>
>>> What about it?  Seems to me what you're really thinking of here is how
>>> to support more than just one timecounter for a specific architecture.
>>> Your function pointer is not really going to help in that case.
>>> You'll need to dispatch to the right function based on some sort of
>>> machine-specific clock ID.
>>>
>>> Oh and BTW, I don't think you're ever going to support pvclock(4).
>>> Take a look at the code and think how you would do all that magic in
>>> userland...
>>>
>>>>> 2) I hope we can get away from #ifdef __ arch__.
>>>>>       Maybe this can be split into architectures which
>>>>>          a) have a function called tc_get_timecount()
>>>>>       or
>>>>>          b) tc_get_timecount is #define'd to NULL, though I don't
>>>>>             know which MD include file to do that in
>>>>
>>>> If we go with something like this or with something like -DTIMEKEEP, how
>>>> do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
>>>> Split them in MD headers? But then we end up in the same place. Sort of.
>>>
>>> Forget about all that for a moment.  Here is an alternative suggestion:
>>>
>>> On sparc64 we need to support both tick_timecounter and
>>> sys_tick_timecounter.  So we need some sort of clockid value to
>>> distnguish between those two.  I already suggested to use the tc_user
>>> field of the timecounter for that.  0 means that a timecounter is not
>>> usable in userland, a (small) positive integer means a specific
>>> timecounter type.  The code in libc will need to know whether a
>>> particular timecounter type can be supported.  My proposal would be to
>>> implement a function *on all architecture* that takes the clockid as
>>> an argument and returns a pointer to the function that implements
>>> support for that timecounter.  On architectures without support, ir
>>> when called with a clockid that isn't supported, that function would
>>> simply return NULL.
>>>
>>
>>
>> What if we declare in libc/arch/*/SYS.h tc_get_timecount to either be
>> NULL or the prototype of a function. (I know SYS.c is a bit of a
>> stretch, if not we can create a separate header usertc.h?) And then we
>> use tc_user to be an ID for architectures such as sparc64 that have more
>> than one clock and inside libc/*/gen/usertc.c we check which is it and
>> call a local static function based on it?
>>
>> Would that be OK?
>
> How are you going to support multiple timecounters on an architecture?

Let's say tsc sets tc_user=1 and acpihpet sets tc_user=2. Then in
libc/arch/amd64/gen/usertc.c I do:

static uint64_t
rdtsc()
{
         uint32_t hi, lo;
         asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
         return ((uint64_t)lo)|(((uint64_t)hi)<<32);
}

static uint64_t
acpihpet()
{
         return rdtsc(); /* JUST TO COMPILE */
}

static uint64_t (*get_tc[])(void) =
{
         rdtsc,
         acpihpet,
};

uint64_t
tc_get_timecount_md(struct timekeep *tk)
{
         return (*get_tc[tk->tk_user])();
}

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Mark Kettenis
> Cc: [hidden email], [hidden email], [hidden email]
> From: Paul Irofti <[hidden email]>
> Date: Tue, 2 Jun 2020 16:45:16 +0300
>
> On 2020-06-02 16:29, Mark Kettenis wrote:
> >> From: Paul Irofti <[hidden email]>
> >> Date: Tue, 2 Jun 2020 16:23:30 +0300
> >>
> >> On 2020-05-31 20:46, Mark Kettenis wrote:
> >>>> From: Paul Irofti <[hidden email]>
> >>>> Date: Sun, 31 May 2020 19:12:54 +0300
> >>>>
> >>>> On 2020-05-31 18:25, Theo de Raadt wrote:
> >>>>> Mark Kettenis <[hidden email]> wrote:
> >>>>>
> >>>>>>> I changed __amd64 to __amd64__ because I didn't find __powerpc.  I'm
> >>>>>>> not sure, but one might move the list of arches to dlfcn/Makefile.inc
> >>>>>>> and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX.  One
> >>>>>>> might drop the tc_get_timecount function pointer and just always call
> >>>>>>> the function #ifdef TIMEKEEP.
> >>>>>>
> >>>>>> Yes, we prefer the __xxx__ variants in OpenBSD code; thanks for
> >>>>>> catching that.  The benefit of the TIMEKEEP define would be that we
> >>>>>> can eliminate the fallback code completely on architectures that don't
> >>>>>> implement this functionality.
> >>>>>
> >>>>> ...
> >>>>
> >>>> Yeah, I just followed the dlfcn/dlfcn_stubs.c example from libc. Which I
> >>>> see now it is commented out...
> >>>>
> >>>>>>> --- lib/libc/dlfcn/init.c.before Sat May 30 23:26:35 2020
> >>>>>>> +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020
> >>>>>>> @@ -70,7 +70,7 @@
> >>>>>>>    
> >>>>>>>     /* provide definitions for these */
> >>>>>>>     const dl_cb *_dl_cb __relro = NULL;
> >>>>>>> -#if defined(__amd64)
> >>>>>>> +#if defined(__amd64__) || defined(__powerpc__)
> >>>>>>>     uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> >>>>>>>     #else
> >>>>>>>     uint64_t (*const tc_get_timecount)(void) = NULL;
> >>>>>
> >>>>> 1) I think adding _md to the name is superflous.  There will never
> >>>>>       be a MI version, so tc_get_timecount() is enough.
> >>>>
> >>>> What about pvclock(4)?
> >>>
> >>> What about it?  Seems to me what you're really thinking of here is how
> >>> to support more than just one timecounter for a specific architecture.
> >>> Your function pointer is not really going to help in that case.
> >>> You'll need to dispatch to the right function based on some sort of
> >>> machine-specific clock ID.
> >>>
> >>> Oh and BTW, I don't think you're ever going to support pvclock(4).
> >>> Take a look at the code and think how you would do all that magic in
> >>> userland...
> >>>
> >>>>> 2) I hope we can get away from #ifdef __ arch__.
> >>>>>       Maybe this can be split into architectures which
> >>>>>          a) have a function called tc_get_timecount()
> >>>>>       or
> >>>>>          b) tc_get_timecount is #define'd to NULL, though I don't
> >>>>>             know which MD include file to do that in
> >>>>
> >>>> If we go with something like this or with something like -DTIMEKEEP, how
> >>>> do we handle the different PROTO_WRAP vs. PROTO_NORMAL declarations?
> >>>> Split them in MD headers? But then we end up in the same place. Sort of.
> >>>
> >>> Forget about all that for a moment.  Here is an alternative suggestion:
> >>>
> >>> On sparc64 we need to support both tick_timecounter and
> >>> sys_tick_timecounter.  So we need some sort of clockid value to
> >>> distnguish between those two.  I already suggested to use the tc_user
> >>> field of the timecounter for that.  0 means that a timecounter is not
> >>> usable in userland, a (small) positive integer means a specific
> >>> timecounter type.  The code in libc will need to know whether a
> >>> particular timecounter type can be supported.  My proposal would be to
> >>> implement a function *on all architecture* that takes the clockid as
> >>> an argument and returns a pointer to the function that implements
> >>> support for that timecounter.  On architectures without support, ir
> >>> when called with a clockid that isn't supported, that function would
> >>> simply return NULL.
> >>>
> >>
> >>
> >> What if we declare in libc/arch/*/SYS.h tc_get_timecount to either be
> >> NULL or the prototype of a function. (I know SYS.c is a bit of a
> >> stretch, if not we can create a separate header usertc.h?) And then we
> >> use tc_user to be an ID for architectures such as sparc64 that have more
> >> than one clock and inside libc/*/gen/usertc.c we check which is it and
> >> call a local static function based on it?
> >>
> >> Would that be OK?
> >
> > How are you going to support multiple timecounters on an architecture?
>
> Let's say tsc sets tc_user=1 and acpihpet sets tc_user=2. Then in
> libc/arch/amd64/gen/usertc.c I do:
>
> static uint64_t
> rdtsc()
> {
>          uint32_t hi, lo;
>          asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
>          return ((uint64_t)lo)|(((uint64_t)hi)<<32);
> }
>
> static uint64_t
> acpihpet()
> {
>          return rdtsc(); /* JUST TO COMPILE */
> }
>
> static uint64_t (*get_tc[])(void) =
> {
>          rdtsc,
>          acpihpet,
> };
>
> uint64_t
> tc_get_timecount_md(struct timekeep *tk)
> {
>          return (*get_tc[tk->tk_user])();
> }

Ignoring the off-by-one in the array access, how is this going to work
if we add a new timecounter on the kernel side that has tc_user = 3?

So I'm suggesting again that we need a function that checks whether
libc actually supports a particular timecounter type.  And I propose
that we implement that function on *all* architectures which solves
the issue of finding an MD header file.

Note that implementing this isn't entirely trivial as there are
potential TOCTOU issues.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
>>> How are you going to support multiple timecounters on an architecture?
>>
>> Let's say tsc sets tc_user=1 and acpihpet sets tc_user=2. Then in
>> libc/arch/amd64/gen/usertc.c I do:
>>
>> static uint64_t
>> rdtsc()
>> {
>>           uint32_t hi, lo;
>>           asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
>>           return ((uint64_t)lo)|(((uint64_t)hi)<<32);
>> }
>>
>> static uint64_t
>> acpihpet()
>> {
>>           return rdtsc(); /* JUST TO COMPILE */
>> }
>>
>> static uint64_t (*get_tc[])(void) =
>> {
>>           rdtsc,
>>           acpihpet,
>> };
>>
>> uint64_t
>> tc_get_timecount_md(struct timekeep *tk)
>> {
>>           return (*get_tc[tk->tk_user])();
>> }
>
> Ignoring the off-by-one in the array access, how is this going to work
> if we add a new timecounter on the kernel side that has tc_user = 3?


We can add a check for array length. It was just code to prove how we
could do it. Kernel tc_user values have to be in sync with libc, of
course. No matter if we go this way or not.

> So I'm suggesting again that we need a function that checks whether
> libc actually supports a particular timecounter type.  And I propose
> that we implement that function on *all* architectures which solves
> the issue of finding an MD header file.
>
> Note that implementing this isn't entirely trivial as there are
> potential TOCTOU issues.

Ok. If we do that, then isn't this just going to be a sparse function
pointer table on each arhitecture: most entries are NULL and the ones
for the current arch are set to gettime functions for the corresponding
clocks. And then things would look like above, right? But with no MD
includes. Yes. Is that how you see it?

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
In reply to this post by Mark Kettenis
On 2020-05-31 20:46, Mark Kettenis wrote:

> Forget about all that for a moment.  Here is an alternative suggestion:
>
> On sparc64 we need to support both tick_timecounter and
> sys_tick_timecounter.  So we need some sort of clockid value to
> distnguish between those two.  I already suggested to use the tc_user
> field of the timecounter for that.  0 means that a timecounter is not
> usable in userland, a (small) positive integer means a specific
> timecounter type.  The code in libc will need to know whether a
> particular timecounter type can be supported.  My proposal would be to
> implement a function*on all architecture*  that takes the clockid as
> an argument and returns a pointer to the function that implements
> support for that timecounter.  On architectures without support, ir
> when called with a clockid that isn't supported, that function would
> simply return NULL.

I am sorry, but the more I try to implement this in a sane way, the more
obvious it is that it is not possible. I would rather have a define
sausage than something like this.

I will try to think of something else that avoids the defines, but I do
not think that your proposal is a valid solution.

Reply | Threaded
Open this post in threaded view
|

Re: userland clock_gettime proof of concept

Paul Irofti-4
On Wed, Jun 03, 2020 at 05:13:42PM +0300, Paul Irofti wrote:

> On 2020-05-31 20:46, Mark Kettenis wrote:
> > Forget about all that for a moment.  Here is an alternative suggestion:
> >
> > On sparc64 we need to support both tick_timecounter and
> > sys_tick_timecounter.  So we need some sort of clockid value to
> > distnguish between those two.  I already suggested to use the tc_user
> > field of the timecounter for that.  0 means that a timecounter is not
> > usable in userland, a (small) positive integer means a specific
> > timecounter type.  The code in libc will need to know whether a
> > particular timecounter type can be supported.  My proposal would be to
> > implement a function*on all architecture*  that takes the clockid as
> > an argument and returns a pointer to the function that implements
> > support for that timecounter.  On architectures without support, ir
> > when called with a clockid that isn't supported, that function would
> > simply return NULL.
>
> I am sorry, but the more I try to implement this in a sane way, the more
> obvious it is that it is not possible. I would rather have a define sausage
> than something like this.
>
> I will try to think of something else that avoids the defines, but I do not
> think that your proposal is a valid solution.

OK. I think I found an elegant way around this using the Makefile
system: if usertc.c is not present in the arch/${MACHINE}/gen, then a
stub gen/usertc.c file is built that just sets the function pointer to
NULL. This avoids the need for the define checks in dlfcn/init.c and I
think fixes the rest of the issues discussed around this bit.

Also included in the diff are a few other fixes and regression tests.
I left the rdtsc and acpihpet example (with no functional acpihpet
support) just to show-case how we can handle multiple clocks on
architectures that have them.

I could not add support for other architectures as I still do not have
access to my machines.

Is the Makefile approach good enough?


diff --git lib/libc/arch/amd64/gen/Makefile.inc lib/libc/arch/amd64/gen/Makefile.inc
index e995309ed71..f6349e2b974 100644
--- lib/libc/arch/amd64/gen/Makefile.inc
+++ lib/libc/arch/amd64/gen/Makefile.inc
@@ -2,6 +2,7 @@
 
 SRCS+= _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
  sigsetjmp.S
-SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c
+SRCS+= fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c signbitl.c \
+ usertc.c
 SRCS+= flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S \
  fpsetround.S fpsetsticky.S
diff --git lib/libc/arch/amd64/gen/usertc.c lib/libc/arch/amd64/gen/usertc.c
new file mode 100644
index 00000000000..cec1b484865
--- /dev/null
+++ lib/libc/arch/amd64/gen/usertc.c
@@ -0,0 +1,46 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/timetc.h>
+
+static uint64_t
+rdtsc()
+{
+ uint32_t hi, lo;
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
+
+static uint64_t
+acpihpet()
+{
+ return rdtsc(); /* JUST TO COMPILE */
+}
+
+static uint64_t (*get_tc[])(void) =
+{
+ rdtsc,
+ acpihpet,
+};
+
+uint64_t
+tc_get_timecount(struct timekeep *tk)
+{
+ return (*get_tc[tk->tk_user - 1])();
+}
+uint64_t (*const _tc_get_timecount)(struct timekeep *tk) = tc_get_timecount;
diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout)
  struct timespec pollstart, pollend, elapsed;
  int r;
 
- if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
  return -1;
 
  while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
- if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
  return -1;
  timespecsub(&pollend, &pollstart, &elapsed);
  timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
  asr->a_rtime = 0;
  }
 
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return;
 
  if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
  char buf[_PASSWORD_LEN];
  int duration;
 
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
  bcrypt_newhash("testpassword", r, buf, sizeof(buf));
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+ WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
  duration = after.tv_sec - before.tv_sec;
  duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..19d0a2c3ad6 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -20,6 +20,7 @@
 
 #include <sys/types.h>
 #include <sys/syscall.h>
+#include <sys/timetc.h> /* timekeep */
 
 #ifndef PIC
 #include <sys/mman.h>
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definitions for these */
 int _pagesize = 0;
+void *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -105,6 +107,10 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
  phnum = aux->au_v;
  break;
 #endif /* !PIC */
+ case AUX_openbsd_timekeep:
+ if (_tc_get_timecount)
+ _timekeep = (void *)aux->au_v;
+ break;
  }
  }
 
diff --git lib/libc/gen/Makefile.inc lib/libc/gen/Makefile.inc
index cf36ead40e9..da52d9fbb13 100644
--- lib/libc/gen/Makefile.inc
+++ lib/libc/gen/Makefile.inc
@@ -27,6 +27,10 @@ SRCS+=  alarm.c assert.c auth_subr.c authenticate.c \
         utime.c valloc.c vis.c wait.c wait3.c waitpid.c warn.c \
         warnc.c warnx.c vwarn.c vwarnc.c vwarnx.c verr.c verrc.c verrx.c
 
+.if !exists (${LIBCSRCDIR}/arch/${MACHINE_CPU}/gen/usertc.c)
+SRCS+=  usertc.c
+.endif
+
 # machine-dependent gen sources
 # m-d Makefile.inc must include sources for:
 # _setjmp() fabs() frexp() infinity ldexp() modf() __nan
diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
index 1286a96fe40..32f86eda50f 100644
--- lib/libc/gen/auth_subr.c
+++ lib/libc/gen/auth_subr.c
@@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_EXPIRED;
@@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
 
  if (as->pwd && (quad_t)as->pwd->pw_change) {
  if (as->now.tv_sec == 0)
- gettimeofday(&as->now, NULL);
+ WRAP(gettimeofday)(&as->now, NULL);
  if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
  as->state &= ~AUTH_ALLOW;
  as->state |= AUTH_PWEXPIRED;
diff --git lib/libc/gen/time.c lib/libc/gen/time.c
index 3bbd0d733d1..b3ce9a800f1 100644
--- lib/libc/gen/time.c
+++ lib/libc/gen/time.c
@@ -36,7 +36,7 @@ time(time_t *t)
 {
  struct timeval tt;
 
- if (gettimeofday(&tt, NULL) == -1)
+ if (WRAP(gettimeofday)(&tt, NULL) == -1)
  return (-1);
  if (t)
  *t = (time_t)tt.tv_sec;
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
  return ((clock_t)-1);
  tp->tms_cutime = CONVTCK(ru.ru_utime);
  tp->tms_cstime = CONVTCK(ru.ru_stime);
- if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
  return ((clock_t)-1);
  return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
  switch (base) {
  case TIME_UTC:
- if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+ if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
  return 0;
  break;
  default:
diff --git lib/libc/gen/usertc.c lib/libc/gen/usertc.c
new file mode 100644
index 00000000000..43fa1728361
--- /dev/null
+++ lib/libc/gen/usertc.c
@@ -0,0 +1,24 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/timetc.h>
+
+/*
+ * Stub. Add implementation in arch/${MACHINE_CPU}/gen/usertc.c.
+ */
+uint64_t (*const _tc_get_timecount)(struct timekeep *tk) = NULL;
diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
index ed112320fa2..df717021cab 100644
--- lib/libc/hidden/sys/time.h
+++ lib/libc/hidden/sys/time.h
@@ -24,7 +24,7 @@ PROTO_NORMAL(adjfreq);
 PROTO_NORMAL(adjtime);
 PROTO_NORMAL(futimes);
 PROTO_NORMAL(getitimer);
-PROTO_NORMAL(gettimeofday);
+PROTO_WRAP(gettimeofday);
 PROTO_NORMAL(setitimer);
 PROTO_NORMAL(settimeofday);
 PROTO_NORMAL(utimes);
diff --git lib/libc/hidden/sys/timetc.h lib/libc/hidden/sys/timetc.h
new file mode 100644
index 00000000000..b0c982ebe57
--- /dev/null
+++ lib/libc/hidden/sys/timetc.h
@@ -0,0 +1,38 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _LIBC_SYS_TIMETC_H_
+#define _LIBC_SYS_TIMETC_H_
+
+#define _LIBC
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include_next <sys/timetc.h>
+
+__BEGIN_HIDDEN_DECLS
+extern void *_timekeep;
+
+extern uint64_t (*const _tc_get_timecount)(struct timekeep *tk);
+
+void _microtime(struct timeval *tvp, struct timekeep *tk);
+void _nanotime(struct timespec *tsp, struct timekeep *tk);
+void _nanoruntime(struct timespec *ts, struct timekeep *tk);
+void _nanouptime(struct timespec *tsp, struct timekeep *tk);
+__END_HIDDEN_DECLS
+
+#endif /* !_LIBC_SYS_TIMETC_H_ */
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..d8e1e0caf64 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -29,7 +29,7 @@ PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
  if (ru_prf != NULL)
  arc4random_buf(ru_prf, sizeof(*ru_prf));
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  ru_reseed = ts.tv_sec + RU_OUT;
  ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
  u_int r;
  static void *randomid_mutex;
 
- clock_gettime(CLOCK_MONOTONIC, &ts);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
  pid = getpid();
 
  _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
index 402d98cede4..917a6d42b8a 100644
--- lib/libc/rpc/auth_unix.c
+++ lib/libc/rpc/auth_unix.c
@@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int len, int *aup_gids)
  /*
  * fill in param struct from the given params
  */
- (void)gettimeofday(&now,  NULL);
+ (void)WRAP(gettimeofday)(&now,  NULL);
  aup.aup_time = now.tv_sec;
  aup.aup_machname = machname;
  aup.aup_uid = uid;
@@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
  goto done;
 
  /* update the time and serialize in place */
- (void)gettimeofday(&now, NULL);
+ (void)WRAP(gettimeofday)(&now, NULL);
  aup.aup_time = now.tv_sec;
  xdrs.x_op = XDR_ENCODE;
  XDR_SETPOS(&xdrs, 0);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
  pfd[0].events = POLLIN;
  TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
  delta = wait;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  r = ppoll(pfd, 1, &delta, NULL);
  save_errno = errno;
 
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&start, &after, &duration);
  timespecsub(&wait, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
  reply_msg.acpted_rply.ar_results.where = resultsp;
  reply_msg.acpted_rply.ar_results.proc = xresults;
 
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  for (;;) {
  switch (ppoll(pfd, 1, &wait, NULL)) {
  case 0:
@@ -283,7 +283,7 @@ send_again:
  /* FALLTHROUGH */
  case -1:
  if (errno == EINTR) {
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecadd(&time_waited, &duration, &time_waited);
  if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  * A timeout is fatal for the connection.
  */
  delta = wait_per_try;
- clock_gettime(CLOCK_MONOTONIC, &start);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
  pfd[0].fd = sock;
  pfd[0].events = POLLIN;
  do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
  case -1:
  if (errno != EINTR)
  goto fatal_err;
- clock_gettime(CLOCK_MONOTONIC, &after);
+ WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
  timespecsub(&after, &start, &duration);
  timespecsub(&wait_per_try, &duration, &delta);
  if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d57418d81bf 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
- w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+ w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+ w_clock_gettime.c w_gettimeofday.c microtime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c
 ASM= __semctl.o __syscall.o __thrsigdivert.o \
  access.o acct.o adjfreq.o adjtime.o \
  bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
- clock_getres.o clock_gettime.o clock_settime.o \
+ clock_getres.o clock_settime.o \
  dup.o dup2.o dup3.o \
  execve.o \
  faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -54,7 +55,7 @@ ASM= __semctl.o __syscall.o __thrsigdivert.o \
  getgroups.o getitimer.o getpeername.o getpgid.o \
  getpriority.o getresgid.o getresuid.o \
  getrlimit.o getrusage.o getsid.o getsockname.o \
- getsockopt.o gettimeofday.o ioctl.o \
+ getsockopt.o ioctl.o \
  kevent.o kill.o kqueue.o ktrace.o lchown.o \
  link.o linkat.o listen.o lstat.o madvise.o \
  minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
@@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} \
+ clock_gettime.o gettimeofday.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
new file mode 100644
index 00000000000..6eb3cd40d28
--- /dev/null
+++ lib/libc/sys/microtime.c
@@ -0,0 +1,157 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2000 Poul-Henning Kamp <[hidden email]>
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/timetc.h>
+
+#include <time.h>
+
+/*
+ * Return the difference between the timehands' counter value now and what
+ * was when we copied it to the timehands' offset_count.
+ */
+static inline u_int
+tc_delta(struct timekeep *tk)
+{
+ return ((_tc_get_timecount(tk) - tk->tk_offset_count) &
+    tk->tk_counter_mask);
+}
+
+static inline void
+bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
+{
+ ct->sec = bt->sec;
+ if (bt->frac > bt->frac + x)
+ ct->sec++;
+ ct->frac = bt->frac + x;
+}
+
+static inline void
+BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
+{
+ ts->tv_sec = bt->sec;
+ ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static inline void
+BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
+{
+ tv->tv_sec = bt->sec;
+ tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
+}
+
+static void
+binuptime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ *bt = tk->tk_offset;
+ bintimeaddfrac(bt, tk->tk_scale * tc_delta(tk), bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+static inline void
+bintimeadd(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec + ct->sec;
+ if (bt->frac > bt->frac + ct->frac)
+ dt->sec++;
+ dt->frac = bt->frac + ct->frac;
+}
+
+static inline void
+bintimesub(const struct bintime *bt, const struct bintime *ct,
+    struct bintime *dt)
+{
+ dt->sec = bt->sec - ct->sec;
+ if (bt->frac < bt->frac - ct->frac)
+ dt->sec--;
+ dt->frac = bt->frac - ct->frac;
+}
+
+static void
+binruntime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ bintimeaddfrac(&tk->tk_offset, tk->tk_scale * tc_delta(tk), bt);
+ bintimesub(bt, &tk->tk_naptime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+static void
+bintime(struct bintime *bt, struct timekeep *tk)
+{
+ u_int gen;
+
+ do {
+ gen = tk->tk_generation;
+ membar_consumer();
+ *bt = tk->tk_offset;
+ bintimeaddfrac(bt, tk->tk_scale * tc_delta(tk), bt);
+ bintimeadd(bt, &tk->tk_boottime, bt);
+ membar_consumer();
+ } while (gen == 0 || gen != tk->tk_generation);
+}
+
+void
+_microtime(struct timeval *tvp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMEVAL(&bt, tvp);
+}
+
+void
+_nanotime(struct timespec *tsp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ bintime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
+
+void
+_nanoruntime(struct timespec *ts, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ binruntime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, ts);
+}
+
+
+void
+_nanouptime(struct timespec *tsp, struct timekeep *tk)
+{
+ struct bintime bt;
+
+ binuptime(&bt, tk);
+ BINTIME_TO_TIMESPEC(&bt, tsp);
+}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..ff72c9fb64d
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,46 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+#include <time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+ struct timekeep *timekeep = _timekeep;
+
+ if (timekeep == NULL || timekeep->tk_user == 0)
+ return clock_gettime(clock_id, tp);
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ _nanotime(tp, timekeep);
+ break;
+ case CLOCK_UPTIME:
+ _nanoruntime(tp, timekeep);
+ break;
+ case CLOCK_MONOTONIC:
+ case CLOCK_BOOTTIME:
+ _nanouptime(tp, timekeep);
+ break;
+ default:
+ return clock_gettime(clock_id, tp);
+ }
+ return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
new file mode 100644
index 00000000000..0b198190485
--- /dev/null
+++ lib/libc/sys/w_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Robert Nagy <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/timetc.h>
+
+int
+WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
+{
+ struct timekeep *timekeep = _timekeep;
+ static struct timezone zerotz = { 0, 0 };
+
+ if (timekeep == NULL || timekeep->tk_user == 0)
+ return gettimeofday(tp, tzp);
+
+ if (tp)
+ _microtime(tp, timekeep);
+
+ if (tzp)
+ tzp = &zerotz;
+
+ return 0;
+}
+DEF_WRAP(gettimeofday);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *
  if (abs == NULL)
  return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
- if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+ if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
  return (EINVAL);
 
  rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git regress/lib/libc/timekeep/Makefile regress/lib/libc/timekeep/Makefile
new file mode 100644
index 00000000000..a7f3080290d
--- /dev/null
+++ regress/lib/libc/timekeep/Makefile
@@ -0,0 +1,5 @@
+# $OpenBSD$
+
+PROGS= test_clock_gettime test_time_skew test_gettimeofday
+
+.include <bsd.regress.mk>
diff --git regress/lib/libc/timekeep/test_clock_gettime.c regress/lib/libc/timekeep/test_clock_gettime.c
new file mode 100644
index 00000000000..859ec368215
--- /dev/null
+++ regress/lib/libc/timekeep/test_clock_gettime.c
@@ -0,0 +1,43 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <assert.h>
+#include <time.h>
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+
+void
+check()
+{
+ struct timespec tp = {0};
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_BOOTTIME, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_UPTIME, &tp));
+
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tp));
+ ASSERT_EQ(0, clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp));
+
+}
+
+int main()
+{
+ check();
+ return 0;
+}
diff --git regress/lib/libc/timekeep/test_gettimeofday.c regress/lib/libc/timekeep/test_gettimeofday.c
new file mode 100644
index 00000000000..ea90a1be7e0
--- /dev/null
+++ regress/lib/libc/timekeep/test_gettimeofday.c
@@ -0,0 +1,37 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <assert.h>
+#include <sys/time.h>
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+
+void
+check()
+{
+ struct timeval tv = {0};
+ struct timezone tzp;
+
+ ASSERT_EQ(0, gettimeofday(&tv, NULL));
+ ASSERT_EQ(0, gettimeofday(&tv, &tzp));
+}
+
+int main()
+{
+ check();
+ return 0;
+}
diff --git regress/lib/libc/timekeep/test_time_skew.c regress/lib/libc/timekeep/test_time_skew.c
new file mode 100644
index 00000000000..dfa9481c091
--- /dev/null
+++ regress/lib/libc/timekeep/test_time_skew.c
@@ -0,0 +1,55 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/time.h>
+
+#include <assert.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define ASSERT_EQ(a, b) assert((a) == (b))
+#define ASSERT_NE(a, b) assert((a) != (b))
+
+void
+check()
+{
+         struct timespec tp1, tp2, tout;
+
+         tout.tv_sec = 0;
+         tout.tv_nsec = 100000;
+
+         ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &tp1));
+
+         nanosleep(&tout, NULL);
+
+         ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &tp2));
+
+         /* tp1 should never be larger than tp2 */
+         ASSERT_NE(1, timespeccmp(&tp1, &tp2, >));
+}
+
+int
+main(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; i++)
+ check();
+
+ return 0;
+}
diff --git sys/arch/alpha/alpha/clock.c sys/arch/alpha/alpha/clock.c
index 3f5f2c5b42b..6eaf8b107c6 100644
--- sys/arch/alpha/alpha/clock.c
+++ sys/arch/alpha/alpha/clock.c
@@ -64,7 +64,7 @@ int clk_irq = 0;
 
 u_int rpcc_get_timecount(struct timecounter *);
 struct timecounter rpcc_timecounter = {
- rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL
+ rpcc_get_timecount, NULL, ~0u, 0, "rpcc", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
index 7a1dcb4ad75..3db93d88dec 100644
--- sys/arch/amd64/amd64/tsc.c
+++ sys/arch/amd64/amd64/tsc.c
@@ -50,7 +50,7 @@ extern u_int32_t lapic_per_second;
 #endif
 
 struct timecounter tsc_timecounter = {
- tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL
+ tsc_get_timecount, NULL, ~0u, 0, "tsc", -1000, NULL, 1
 };
 
 uint64_t
diff --git sys/arch/amd64/isa/clock.c sys/arch/amd64/isa/clock.c
index 613f7ee0e0f..00da0c6a8d0 100644
--- sys/arch/amd64/isa/clock.c
+++ sys/arch/amd64/isa/clock.c
@@ -116,7 +116,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 
 int clockintr(void *);
diff --git sys/arch/arm64/dev/agtimer.c sys/arch/arm64/dev/agtimer.c
index 29394141ad5..6b7c6db862f 100644
--- sys/arch/arm64/dev/agtimer.c
+++ sys/arch/arm64/dev/agtimer.c
@@ -43,7 +43,7 @@ int32_t agtimer_frequency = TIMER_FREQUENCY;
 u_int agtimer_get_timecount(struct timecounter *);
 
 static struct timecounter agtimer_timecounter = {
- agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL
+ agtimer_get_timecount, NULL, 0x7fffffff, 0, "agtimer", 0, NULL, 0
 };
 
 struct agtimer_pcpu_softc {
diff --git sys/arch/armv7/omap/gptimer.c sys/arch/armv7/omap/gptimer.c
index 7605845d5e2..061542d532f 100644
--- sys/arch/armv7/omap/gptimer.c
+++ sys/arch/armv7/omap/gptimer.c
@@ -117,7 +117,7 @@ int gptimer_irq = 0;
 u_int gptimer_get_timecount(struct timecounter *);
 
 static struct timecounter gptimer_timecounter = {
- gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL
+ gptimer_get_timecount, NULL, 0x7fffffff, 0, "gptimer", 0, NULL, 0
 };
 
 volatile u_int32_t nexttickevent;
diff --git sys/arch/armv7/sunxi/sxitimer.c sys/arch/armv7/sunxi/sxitimer.c
index 14a243c78d0..41028f9a602 100644
--- sys/arch/armv7/sunxi/sxitimer.c
+++ sys/arch/armv7/sunxi/sxitimer.c
@@ -89,7 +89,7 @@ void sxitimer_delay(u_int);
 u_int sxitimer_get_timecount(struct timecounter *);
 
 static struct timecounter sxitimer_timecounter = {
- sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL
+ sxitimer_get_timecount, NULL, 0xffffffff, 0, "sxitimer", 0, NULL, 0
 };
 
 bus_space_tag_t sxitimer_iot;
diff --git sys/arch/hppa/dev/clock.c sys/arch/hppa/dev/clock.c
index 4c594ab5ec7..8cce6c3a893 100644
--- sys/arch/hppa/dev/clock.c
+++ sys/arch/hppa/dev/clock.c
@@ -47,7 +47,7 @@ int cpu_hardclock(void *);
 u_int itmr_get_timecount(struct timecounter *);
 
 struct timecounter itmr_timecounter = {
- itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL
+ itmr_get_timecount, NULL, 0xffffffff, 0, "itmr", 0, NULL, 0
 };
 
 extern todr_chip_handle_t todr_handle;
diff --git sys/arch/i386/isa/clock.c sys/arch/i386/isa/clock.c
index 09a6db983f2..dd74bd425ad 100644
--- sys/arch/i386/isa/clock.c
+++ sys/arch/i386/isa/clock.c
@@ -129,7 +129,7 @@ u_int i8254_get_timecount(struct timecounter *tc);
 u_int i8254_simple_get_timecount(struct timecounter *tc);
 
 static struct timecounter i8254_timecounter = {
- i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL
+ i8254_get_timecount, NULL, ~0u, TIMER_FREQ, "i8254", 0, NULL, 0
 };
 struct mutex timer_mutex = MUTEX_INITIALIZER(IPL_HIGH);
 u_long rtclock_tval;
diff --git sys/arch/i386/pci/geodesc.c sys/arch/i386/pci/geodesc.c
index 9d9f061eef9..bb8e4c7f9ae 100644
--- sys/arch/i386/pci/geodesc.c
+++ sys/arch/i386/pci/geodesc.c
@@ -65,7 +65,9 @@ struct timecounter geodesc_timecounter = {
  0xffffffff, /* counter_mask */
  27000000, /* frequency */
  "GEOTSC", /* name */
- 2000 /* quality */
+ 2000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 int
diff --git sys/arch/i386/pci/gscpm.c sys/arch/i386/pci/gscpm.c
index 8b8aa4ac430..a6f324e66f3 100644
--- sys/arch/i386/pci/gscpm.c
+++ sys/arch/i386/pci/gscpm.c
@@ -55,7 +55,9 @@ struct timecounter gscpm_timecounter = {
  0xffffff, /* counter_mask */
  3579545, /* frequency */
  "GSCPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach gscpm_ca = {
diff --git sys/arch/i386/pci/ichpcib.c sys/arch/i386/pci/ichpcib.c
index 6abf1627de2..629a86a14ff 100644
--- sys/arch/i386/pci/ichpcib.c
+++ sys/arch/i386/pci/ichpcib.c
@@ -63,7 +63,9 @@ struct timecounter ichpcib_timecounter = {
  0xffffff, /* counter_mask */
  3579545, /* frequency */
  "ICHPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0 /* expose to user */
 };
 
 struct cfattach ichpcib_ca = {
diff --git sys/arch/loongson/loongson/generic3a_machdep.c sys/arch/loongson/loongson/generic3a_machdep.c
index ac3f1db6ccd..53489b07549 100644
--- sys/arch/loongson/loongson/generic3a_machdep.c
+++ sys/arch/loongson/loongson/generic3a_machdep.c
@@ -98,7 +98,9 @@ struct timecounter rs780e_timecounter = {
  .tc_counter_mask = 0xffffffffu, /* truncated to 32 bits */
  .tc_frequency = HPET_FREQ,
  .tc_name = "hpet",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /* Firmware entry points */
diff --git sys/arch/luna88k/luna88k/clock.c sys/arch/luna88k/luna88k/clock.c
index a04120987e0..6580a4a46bf 100644
--- sys/arch/luna88k/luna88k/clock.c
+++ sys/arch/luna88k/luna88k/clock.c
@@ -112,7 +112,9 @@ struct timecounter clock_tc = {
  .tc_counter_mask = 0xffffffff,
  .tc_frequency = 0, /* will be filled in */
  .tc_name = "clock",
- .tc_quality = 0
+ .tc_quality = 0,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 /*
diff --git sys/arch/macppc/macppc/clock.c sys/arch/macppc/macppc/clock.c
index 4a44a92cfc0..8c3ad620be8 100644
--- sys/arch/macppc/macppc/clock.c
+++ sys/arch/macppc/macppc/clock.c
@@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320;
 static int32_t ticks_per_intr;
 
 static struct timecounter tb_timecounter = {
- tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL
+ tb_get_timecount, NULL, 0x7fffffff, 0, "tb", 0, NULL, 0
 };
 
 /* calibrate the timecounter frequency for the listed models */
diff --git sys/arch/mips64/mips64/mips64_machdep.c sys/arch/mips64/mips64/mips64_machdep.c
index d4a42ed5acc..5c4dbadb5bb 100644
--- sys/arch/mips64/mips64/mips64_machdep.c
+++ sys/arch/mips64/mips64/mips64_machdep.c
@@ -327,7 +327,9 @@ struct timecounter cp0_timecounter = {
  0xffffffff, /* counter_mask */
  0, /* frequency */
  "CP0", /* name */
- 0 /* quality */
+ 0, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 u_int
diff --git sys/arch/octeon/octeon/machdep.c sys/arch/octeon/octeon/machdep.c
index 1387af284ca..2b22a9e942c 100644
--- sys/arch/octeon/octeon/machdep.c
+++ sys/arch/octeon/octeon/machdep.c
@@ -151,8 +151,9 @@ struct timecounter ioclock_timecounter = {
  .tc_name = "ioclock",
  .tc_quality = 0, /* ioclock can be overridden
  * by cp0 counter */
- .tc_priv = 0 /* clock register,
+ .tc_priv = 0, /* clock register,
  * determined at runtime */
+ .tc_user = 0, /* expose to user */
 };
 
 static int
diff --git sys/arch/sgi/sgi/ip27_machdep.c sys/arch/sgi/sgi/ip27_machdep.c
index ba7fa558b96..2a2cc144242 100644
--- sys/arch/sgi/sgi/ip27_machdep.c
+++ sys/arch/sgi/sgi/ip27_machdep.c
@@ -111,7 +111,9 @@ struct timecounter ip27_hub_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncated to 32 bits. */
  .tc_frequency = 1250000,
  .tc_name = "hubrt",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = 0,
+ .tc_user = 0,
 };
 
 volatile uint64_t ip27_spinup_a0;
diff --git sys/arch/sgi/xbow/xheart.c sys/arch/sgi/xbow/xheart.c
index 56b29915c70..827775512ac 100644
--- sys/arch/sgi/xbow/xheart.c
+++ sys/arch/sgi/xbow/xheart.c
@@ -83,7 +83,9 @@ struct timecounter xheart_timecounter = {
  .tc_counter_mask = 0xffffffff, /* truncate 52-bit counter to 32-bit */
  .tc_frequency = 12500000,
  .tc_name = "heart",
- .tc_quality = 100
+ .tc_quality = 100,
+ .tc_priv = NULL,
+ .tc_user = 0,
 };
 
 extern uint32_t ip30_lights_frob(uint32_t, struct trapframe *);
diff --git sys/arch/sparc64/dev/psycho.c sys/arch/sparc64/dev/psycho.c
index e24f804dff6..1a7a1afa8c2 100644
--- sys/arch/sparc64/dev/psycho.c
+++ sys/arch/sparc64/dev/psycho.c
@@ -127,7 +127,7 @@ extern struct sparc_pci_chipset _sparc_pci_chipset;
 u_int stick_get_timecount(struct timecounter *);
 
 struct timecounter stick_timecounter = {
- stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL
+ stick_get_timecount, NULL, ~0u, 0, "stick", 1000, NULL, 0
 };
 
 /*
diff --git sys/arch/sparc64/sparc64/clock.c sys/arch/sparc64/sparc64/clock.c
index fd5e8a9c15b..5c2e47d386b 100644
--- sys/arch/sparc64/sparc64/clock.c
+++ sys/arch/sparc64/sparc64/clock.c
@@ -109,13 +109,13 @@ struct cfdriver clock_cd = {
 u_int tick_get_timecount(struct timecounter *);
 
 struct timecounter tick_timecounter = {
- tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL
+ tick_get_timecount, NULL, ~0u, 0, "tick", 0, NULL, 0
 };
 
 u_int sys_tick_get_timecount(struct timecounter *);
 
 struct timecounter sys_tick_timecounter = {
- sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL
+ sys_tick_get_timecount, NULL, ~0u, 0, "sys_tick", 1000, NULL, 0
 };
 
 /*
diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
index d0ee72cec9b..13177a909da 100644
--- sys/dev/acpi/acpihpet.c
+++ sys/dev/acpi/acpihpet.c
@@ -45,7 +45,9 @@ static struct timecounter hpet_timecounter = {
  0xffffffff, /* counter_mask (32 bits) */
  0, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define HPET_TIMERS 3
diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
index cdc8c99a17a..89b5a397e47 100644
--- sys/dev/acpi/acpitimer.c
+++ sys/dev/acpi/acpitimer.c
@@ -36,7 +36,9 @@ static struct timecounter acpi_timecounter = {
  0x00ffffff, /* counter_mask (24 bits) */
  ACPI_FREQUENCY, /* frequency */
  0, /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct acpitimer_softc {
diff --git sys/dev/pci/amdpm.c sys/dev/pci/amdpm.c
index 6df82858016..9610d5bc1f0 100644
--- sys/dev/pci/amdpm.c
+++ sys/dev/pci/amdpm.c
@@ -82,7 +82,9 @@ static struct timecounter amdpm_timecounter = {
  0xffffff, /* counter_mask */
  AMDPM_FREQUENCY, /* frequency */
  "AMDPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 #define AMDPM_CONFREG 0x40
diff --git sys/dev/pci/viapm.c sys/dev/pci/viapm.c
index db806eedf80..ce33cd175e6 100644
--- sys/dev/pci/viapm.c
+++ sys/dev/pci/viapm.c
@@ -177,7 +177,9 @@ static struct timecounter viapm_timecounter = {
  0xffffff, /* counter_mask */
  VIAPM_FREQUENCY, /* frequency */
  "VIAPM", /* name */
- 1000 /* quality */
+ 1000, /* quality */
+ NULL, /* private bits */
+ 0, /* expose to user */
 };
 
 struct timeout viapm_timeout;
diff --git sys/dev/pv/hyperv.c sys/dev/pv/hyperv.c
index b32facdacb1..b9ee2feec4c 100644
--- sys/dev/pv/hyperv.c
+++ sys/dev/pv/hyperv.c
@@ -141,7 +141,7 @@ struct {
 };
 
 struct timecounter hv_timecounter = {
- hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001
+ hv_gettime, 0, 0xffffffff, 10000000, "hyperv", 9001, NULL, 0
 };
 
 struct cfdriver hyperv_cd = {
diff --git sys/dev/pv/pvclock.c sys/dev/pv/pvclock.c
index 6b242f7448d..b80e4d2a484 100644
--- sys/dev/pv/pvclock.c
+++ sys/dev/pv/pvclock.c
@@ -74,7 +74,7 @@ struct cfdriver pvclock_cd = {
 };
 
 struct timecounter pvclock_timecounter = {
- pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
+ pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL, 0
 };
 
 int
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES 8
+#define ELF_AUX_ENTRIES 9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
  a->au_v = ap->arg_entry;
  a++;
 
+ a->au_id = AUX_openbsd_timekeep;
+ a->au_v = p->p_p->ps_timekeep;
+ a++;
+
  a->au_id = AUX_null;
  a->au_v = 0;
  a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..8869f1fb89f 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/timetc.h>
+
+struct uvm_object *timekeep_object;
+struct timekeep* timekeep;
+
 void unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
  /* map the process's signal trampoline code */
  if (exec_sigcode_map(pr, pack.ep_emul))
  goto free_pack_abort;
+ /* map the process's timekeep page */
+ if (exec_timekeep_map(pr))
+ goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
  /* perform md specific mappings that process might need */
@@ -863,3 +876,41 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
  return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+ size_t timekeep_sz = sizeof(struct timekeep);
+
+ /*
+ * Similar to the sigcode object, except that there is a single timekeep
+ * object, and not one per emulation.
+ */
+ if (timekeep_object == NULL) {
+ vaddr_t va;
+
+ timekeep_object = uao_create(timekeep_sz, 0);
+ uao_reference(timekeep_object);
+
+ if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object,
+    0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
+    MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ timekeep = (struct timekeep *)va;
+ timekeep->tk_major = 0;
+ timekeep->tk_minor = 0;
+ }
+
+ uao_reference(timekeep_object);
+ if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz),
+    timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+    MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+ uao_detach(timekeep_object);
+ return (ENOMEM);
+ }
+
+ return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 88d4a3379f9..47efbdd0b78 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -63,7 +63,7 @@ dummy_get_timecount(struct timecounter *tc)
 }
 
 static struct timecounter dummy_timecounter = {
- dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
+ dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, 0
 };
 
 /*
@@ -479,6 +479,34 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_update_timekeep(void)
+{
+ static struct timecounter *last_tc = NULL;
+ struct timehands *th;
+
+ if (timekeep == NULL)
+ return;
+
+ th = timehands;
+ timekeep->tk_generation = 0;
+ membar_producer();
+ timekeep->tk_scale = th->th_scale;
+ timekeep->tk_offset_count = th->th_offset_count;
+ timekeep->tk_offset = th->th_offset;
+ timekeep->tk_naptime = th->th_naptime;
+ timekeep->tk_boottime = th->th_boottime;
+ if (last_tc != th->th_counter) {
+ timekeep->tk_counter_mask = th->th_counter->tc_counter_mask;
+ timekeep->tk_user = th->th_counter->tc_user;
+ last_tc = th->th_counter;
+ }
+ membar_producer();
+ timekeep->tk_generation = th->th_generation;
+
+ return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -631,6 +659,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
  time_uptime = th->th_offset.sec;
  membar_producer();
  timehands = th;
+
+ tc_update_timekeep();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..3084ed595a6 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
  AUX_sun_uid = 2000, /* euid */
  AUX_sun_ruid = 2001, /* ruid */
  AUX_sun_gid = 2002, /* egid */
- AUX_sun_rgid = 2003 /* rgid */
+ AUX_sun_rgid = 2003, /* rgid */
+ AUX_openbsd_timekeep = 4000, /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..c6d54572bdd 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -242,6 +242,7 @@ struct process {
  char ps_comm[MAXCOMLEN+1];
 
  vaddr_t ps_strings; /* User pointers to argv/env */
+ vaddr_t ps_timekeep; /* User pointer to timekeep */
  vaddr_t ps_sigcode; /* User pointer to the signal code */
  vaddr_t ps_sigcoderet; /* User pointer to sigreturn retPC */
  u_long ps_sigcookie;
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..bcd3acd034d 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,7 +163,7 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
-#if defined(_KERNEL) || defined(_STANDALONE)
+#if defined(_KERNEL) || defined(_STANDALONE) || defined (_LIBC)
 #include <sys/_time.h>
 
 /* Time expressed as seconds and fractions of a second + operations on it. */
@@ -171,6 +171,9 @@ struct bintime {
  time_t sec;
  uint64_t frac;
 };
+#endif
+
+#if defined(_KERNEL) || defined(_STANDALONE)
 
 #define bintimecmp(btp, ctp, cmp) \
  ((btp)->sec == (ctp)->sec ? \
diff --git sys/sys/timetc.h sys/sys/timetc.h
index ce81c3475a0..22658b00da2 100644
--- sys/sys/timetc.h
+++ sys/sys/timetc.h
@@ -24,7 +24,7 @@
 #ifndef _SYS_TIMETC_H_
 #define _SYS_TIMETC_H_
 
-#ifndef _KERNEL
+#if !defined(_KERNEL) && !defined(_LIBC)
 #error "no user-serviceable parts inside"
 #endif
 
@@ -80,6 +80,8 @@ struct timecounter {
  */
  void *tc_priv; /* [I] */
  /* Pointer to the timecounter's private parts. */
+ int tc_user; /* [I] */
+ /* Expose this timecounter to userland. */
  SLIST_ENTRY(timecounter) tc_next; /* [I] */
  /* Pointer to the next timecounter. */
  int64_t tc_freq_adj; /* [tw] */
@@ -88,11 +90,31 @@ struct timecounter {
  /* Precision of the counter.  Computed in tc_init(). */
 };
 
+struct timekeep {
+ uint32_t tk_major; /* version major number */
+ uint32_t tk_minor; /* version minor number */
+
+ /* timehands members */
+ uint64_t tk_scale;
+ u_int tk_offset_count;
+ struct bintime tk_offset;
+ struct bintime tk_naptime;
+ struct bintime tk_boottime;
+ volatile u_int tk_generation;
+
+ /* timecounter members */
+ int tk_user;
+ u_int tk_counter_mask;
+};
+
 struct rwlock;
 extern struct rwlock tc_lock;
 
 extern struct timecounter *timecounter;
 
+extern struct uvm_object *timekeep_object;
+extern struct timekeep *timekeep;
+
 u_int64_t tc_getfrequency(void);
 u_int64_t tc_getprecision(void);
 void tc_init(struct timecounter *tc);

1234567 ... 10