TSC synchronization on MP machines

classic Classic list List threaded Threaded
22 messages Options
12
Reply | Threaded
Open this post in threaded view
|

TSC synchronization on MP machines

Paul Irofti-4
Hi,

Here is an initial diff, adapted from NetBSD, that synchronizes TSC
clocks across cores.

CPU0 is the reference clock and all others are skewed. During CPU
initialization the clocks synchronize by keeping a registry of each CPU
clock skewness and adapting the TSC read routine accordingly.

I choose this implementation over what FreeBSD is doing (which is just
copying Linux really), because it is clean and elegant.

I would love to hear reports from machines that were broken by this.
Mine, which never exhibited the problem in the first place, run just
fine with the following diff. In fact I am writting this message on one
such machine.

Also constructive comments are more than welcomed!

Notes:

- cpu_counter_serializing() could probably have a better name
  (tsc _read for example)
- the PAUSE instruction is probably not needed
- acpi(4) suspend and resume bits are left out on purpose, but should
  be trivial to add once the current diff settles

Paul Irofti

Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
+++ arch/amd64/amd64/cpu.c 27 Jun 2019 11:55:08 -0000
@@ -96,6 +96,7 @@
 #include <machine/gdt.h>
 #include <machine/pio.h>
 #include <machine/vmmvar.h>
+#include <machine/tsc.h>
 
 #if NLAPIC > 0
 #include <machine/i82489reg.h>
@@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci)
  cr4 = rcr4();
  lcr4(cr4 & ~CR4_PGE);
  lcr4(cr4);
+
+ /* Synchronize TSC */
+ if (!CPU_IS_PRIMARY(ci))
+      tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +813,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
  int i;
+ u_long s;
 
  ci->ci_flags |= CPUF_AP;
 
@@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /*
+ * Synchronize time stamp counters. Invalidate cache and do
+ * twice (in tsc_sync_bp) to minimize possible cache effects.
+ * Disable interrupts to try and rule out any external
+ * interference.
+ */
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
  }
 
+
  if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
  atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
 
@@ -852,6 +870,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
  int i;
+ int64_t drift;
+ u_long s;
 
  atomic_setbits_int(&ci->ci_flags, CPUF_GO);
 
@@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /* Synchronize TSC again, check for drift. */
+ drift = ci->cpu_cc_skew;
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ drift -= ci->cpu_cc_skew;
+ printf("TSC skew=%lld drift=%lld\n",
+    (long long)ci->cpu_cc_skew, (long long)drift);
+ tsc_sync_drift(drift);
  }
 }
 
@@ -888,7 +919,13 @@ cpu_hatch(void *v)
  panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+ /*
+ * Synchronize the TSC for the first time. Note that interrupts are
+ * off at this point.
+ */
+ wbinvd();
  ci->ci_flags |= CPUF_PRESENT;
+ tsc_sync_ap(ci);
 
  lapic_enable();
  lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
+++ arch/amd64/amd64/tsc.c 27 Jun 2019 11:55:08 -0000
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
  * Copyright (c) 2017 Adam Steen <[hidden email]>
  * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
+ * Copyright (c) 2019 Paul Irofti <[hidden email]>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
+#include <sys/atomic.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
@@ -33,6 +36,13 @@ int tsc_recalibrate;
 uint64_t tsc_frequency;
 int tsc_is_invariant;
 
+static int64_t tsc_drift_max = 250; /* max cycles */
+static int64_t tsc_drift_observed;
+static bool tsc_good;
+
+static volatile int64_t tsc_sync_val;
+static volatile struct cpu_info *tsc_sync_cpu;
+
 uint tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
  return;
  tsc_frequency = freq;
  tsc_timecounter.tc_frequency = freq;
-#ifndef MULTIPROCESSOR
  if (tsc_is_invariant)
  tsc_timecounter.tc_quality = 2000;
-#endif
 }
 
 void
@@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci
  tsc_frequency = tsc_freq_cpuid(ci);
  tsc_is_invariant = 1;
 
+ tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
+
  /* Newer CPUs don't require recalibration */
  if (tsc_frequency > 0) {
  tsc_timecounter.tc_frequency = tsc_frequency;
-#ifndef MULTIPROCESSOR
  tsc_timecounter.tc_quality = 2000;
-#endif
  } else {
  tsc_recalibrate = 1;
  tsc_frequency = cpufreq;
@@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci
  calibrate_tsc_freq();
  }
 
+ if (tsc_drift_observed > tsc_drift_max) {
+ printf("ERROR: %lld cycle TSC drift observed\n",
+    (long long)tsc_drift_observed);
+ tsc_timecounter.tc_quality = -100;
+ tsc_is_invariant = 0;
+ }
+
  tc_init(&tsc_timecounter);
+}
+
+static uint64_t
+cpu_counter_serializing(struct cpu_info *ci)
+{
+ if (tsc_good)
+ return rdmsr(MSR_TSC);
+ else
+ return (rdtsc() + ci->cpu_cc_skew);
+}
+
+/*
+ * Record drift (in clock cycles).  Called during AP startup.
+ */
+void
+tsc_sync_drift(int64_t drift)
+{
+
+ if (drift < 0)
+ drift = -drift;
+ if (drift > tsc_drift_observed)
+ tsc_drift_observed = drift;
+}
+
+/*
+ * Called during startup of APs, by the boot processor.  Interrupts
+ * are disabled on entry.
+ */
+static void
+tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
+{
+ uint64_t bptsc;
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
+ panic("tsc_sync_bp: 1");
+ }
+
+ /* Flag it and read our TSC. */
+ atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ bptsc = cpu_counter_serializing(ci) >> 1;
+
+ /* Wait for remote to complete, and read ours again. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
+ membar_consumer();
+ }
+ bptsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Wait for the results to come in. */
+ while (tsc_sync_cpu == ci) {
+ pause();
+ }
+ if (tsc_sync_cpu != NULL) {
+ panic("tsc_sync_bp: 2");
+ }
+
+ *bptscp = bptsc;
+ *aptscp = tsc_sync_val;
+}
+
+void
+tsc_sync_bp(struct cpu_info *ci)
+{
+ uint64_t bptsc, aptsc;
+
+ tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
+ tsc_read_bp(ci, &bptsc, &aptsc);
+
+ /* Compute final value to adjust for skew. */
+ ci->cpu_cc_skew = bptsc - aptsc;
+}
+
+/*
+ * Called during startup of AP, by the AP itself.  Interrupts are
+ * disabled on entry.
+ */
+static void
+tsc_post_ap(struct cpu_info *ci)
+{
+ uint64_t tsc;
+
+ /* Wait for go-ahead from primary. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
+ membar_consumer();
+ }
+ tsc = (cpu_counter_serializing(ci) >> 1);
+
+ /* Instruct primary to read its counter. */
+ atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ tsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Post result.  Ensure the whole value goes out atomically. */
+ (void)atomic_swap_64(&tsc_sync_val, tsc);
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
+ panic("tsc_sync_ap");
+ }
+}
+
+void
+tsc_sync_ap(struct cpu_info *ci)
+{
+
+ tsc_post_ap(ci);
+ tsc_post_ap(ci);
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.131
diff -u -p -u -p -r1.131 cpu.h
--- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
+++ arch/amd64/include/cpu.h 27 Jun 2019 11:55:08 -0000
@@ -206,6 +206,8 @@ struct cpu_info {
  union vmm_cpu_cap ci_vmm_cap;
  paddr_t ci_vmxon_region_pa;
  struct vmxon_region *ci_vmxon_region;
+
+ int64_t cpu_cc_skew; /* counter skew vs cpu0 */
 };
 
 #define CPUF_BSP 0x0001 /* CPU is the original BSP */
@@ -221,6 +223,7 @@ struct cpu_info {
 #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
 #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
 
+#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
 #define CPUF_PRESENT 0x1000 /* CPU is present */
 #define CPUF_RUNNING 0x2000 /* CPU is running */
 #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
Index: arch/amd64/include/cpufunc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
retrieving revision 1.33
diff -u -p -u -p -r1.33 cpufunc.h
--- arch/amd64/include/cpufunc.h 26 Mar 2019 19:32:46 -0000 1.33
+++ arch/amd64/include/cpufunc.h 27 Jun 2019 11:55:08 -0000
@@ -282,6 +282,11 @@ mfence(void)
  __asm volatile("mfence" : : : "memory");
 }
 
+static __inline void
+pause(void)
+{
+ __asm volatile("pause" : : : "memory");
+}
 static __inline u_int64_t
 rdtsc(void)
 {
Index: arch/amd64/include/tsc.h
===================================================================
RCS file: arch/amd64/include/tsc.h
diff -N arch/amd64/include/tsc.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ arch/amd64/include/tsc.h 27 Jun 2019 11:55:08 -0000
@@ -0,0 +1,25 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2019 Paul Irofti <[hidden email]>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _MACHINE_TSC_H_
+#define _MACHINE_TSC_H_
+
+void tsc_sync_drift(int64_t);
+void tsc_sync_bp(struct cpu_info *);
+void tsc_sync_ap(struct cpu_info *);
+
+#endif /* !_MACHINE_TSC_H_ */

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Timo Myyrä-6
Paul Irofti <[hidden email]> writes:

> Hi,
>
> Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> clocks across cores.
>
> CPU0 is the reference clock and all others are skewed. During CPU
> initialization the clocks synchronize by keeping a registry of each CPU
> clock skewness and adapting the TSC read routine accordingly.
>
> I choose this implementation over what FreeBSD is doing (which is just
> copying Linux really), because it is clean and elegant.
>
> I would love to hear reports from machines that were broken by this.
> Mine, which never exhibited the problem in the first place, run just
> fine with the following diff. In fact I am writting this message on one
> such machine.
>
> Also constructive comments are more than welcomed!
>
> Notes:
>
> - cpu_counter_serializing() could probably have a better name
>   (tsc _read for example)
> - the PAUSE instruction is probably not needed
> - acpi(4) suspend and resume bits are left out on purpose, but should
>   be trivial to add once the current diff settles
>
> Paul Irofti
>
> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 27 Jun 2019 11:55:08 -0000
> @@ -96,6 +96,7 @@
>  #include <machine/gdt.h>
>  #include <machine/pio.h>
>  #include <machine/vmmvar.h>
> +#include <machine/tsc.h>
>  
>  #if NLAPIC > 0
>  #include <machine/i82489reg.h>
> @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (!CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +813,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and do
> + * twice (in tsc_sync_bp) to minimize possible cache effects.
> + * Disable interrupts to try and rule out any external
> + * interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
>   }
>  
> +
>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
>   atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
>  
> @@ -852,6 +870,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->cpu_cc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->cpu_cc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->cpu_cc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +919,13 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 27 Jun 2019 11:55:08 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +static int64_t tsc_drift_max = 250; /* max cycles */
> +static int64_t tsc_drift_observed;
> +static bool tsc_good;
> +
> +static volatile int64_t tsc_sync_val;
> +static volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci
>   tsc_frequency = tsc_freq_cpuid(ci);
>   tsc_is_invariant = 1;
>  
> + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -100;
> + tsc_is_invariant = 0;
> + }
> +
>   tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> + if (tsc_good)
> + return rdmsr(MSR_TSC);
> + else
> + return (rdtsc() + ci->cpu_cc_skew);
> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> +
> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
> + panic("tsc_sync_bp: 1");
> + }
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = cpu_counter_serializing(ci) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
> + membar_consumer();
> + }
> + bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci) {
> + pause();
> + }
> + if (tsc_sync_cpu != NULL) {
> + panic("tsc_sync_bp: 2");
> + }
> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->cpu_cc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
> + membar_consumer();
> + }
> + tsc = (cpu_counter_serializing(ci) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
> + panic("tsc_sync_ap");
> + }
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> +
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 27 Jun 2019 11:55:08 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t cpu_cc_skew; /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpufunc.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
> retrieving revision 1.33
> diff -u -p -u -p -r1.33 cpufunc.h
> --- arch/amd64/include/cpufunc.h 26 Mar 2019 19:32:46 -0000 1.33
> +++ arch/amd64/include/cpufunc.h 27 Jun 2019 11:55:08 -0000
> @@ -282,6 +282,11 @@ mfence(void)
>   __asm volatile("mfence" : : : "memory");
>  }
>  
> +static __inline void
> +pause(void)
> +{
> + __asm volatile("pause" : : : "memory");
> +}
>  static __inline u_int64_t
>  rdtsc(void)
>  {
> Index: arch/amd64/include/tsc.h
> ===================================================================
> RCS file: arch/amd64/include/tsc.h
> diff -N arch/amd64/include/tsc.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ arch/amd64/include/tsc.h 27 Jun 2019 11:55:08 -0000
> @@ -0,0 +1,25 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and/or distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _MACHINE_TSC_H_
> +#define _MACHINE_TSC_H_
> +
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
> +#endif /* !_MACHINE_TSC_H_ */

This doesn't seem to help with my thinkpad e485.
I have still lot of key repeats.

OpenBSD 6.5-current (GENERIC.MP) #9: Fri Jun 28 07:41:58 EEST 2019
    [hidden email]:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 8172609536 (7794MB)
avail mem = 7912329216 (7545MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.1 @ 0x98707000 (59 entries)
bios0: vendor LENOVO version "R0UET66W (1.46 )" date 10/26/2018
bios0: LENOVO 20KUCTO1WW
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP SSDT SSDT CRAT CDIT SSDT TPM2 UEFI MSDM BATB HPET APIC MCFG SBST VFCT IVRS FPDT SSDT SSDT SSDT BGRT UEFI SSDT
acpi0: wakeup devices GPP0(S3) GPP1(S3) GPP2(S3) GPP3(S3) GPP4(S3) GPP5(S3) GPP6(S3) GP17(S3) XHC0(S3) XHC1(S3) GP18(S3) LID_(S3) SLPB(S3)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpihpet0 at acpi0: 14318180 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.60 MHz, 17-11-00
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 24MHz
cpu0: mwait min=64, max=64, C-substates=1.1, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu1: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu1: smt 1, core 0, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu2: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu2: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu2: smt 0, core 1, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu3: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu3: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu3: smt 1, core 1, package 0
cpu4 at mainbus0: apid 4 (application processor)
cpu4: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu4: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu4: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu4: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu4: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu4: smt 0, core 2, package 0
cpu5 at mainbus0: apid 5 (application processor)
cpu5: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu5: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu5: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu5: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu5: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu5: smt 1, core 2, package 0
cpu6 at mainbus0: apid 6 (application processor)
cpu6: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu6: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu6: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu6: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu6: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu6: smt 0, core 3, package 0
cpu7 at mainbus0: apid 7 (application processor)
cpu7: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu7: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu7: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu7: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu7: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu7: smt 1, core 3, package 0
ioapic0 at mainbus0: apid 32 pa 0xfec00000, version 21, 24 pins, can't remap
ioapic1 at mainbus0: apid 33 pa 0xfec01000, version 21, 32 pins, can't remap
acpimcfg0 at acpi0
acpimcfg0: addr 0xf8000000, bus 0-63
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus 1 (GPP0)
acpiprt2 at acpi0: bus 2 (GPP1)
acpiprt3 at acpi0: bus 3 (GPP2)
acpiprt4 at acpi0: bus -1 (GPP3)
acpiprt5 at acpi0: bus -1 (GPP4)
acpiprt6 at acpi0: bus 4 (GPP5)
acpiprt7 at acpi0: bus -1 (GPP6)
acpiprt8 at acpi0: bus 5 (GP17)
acpiprt9 at acpi0: bus 6 (GP18)
acpiec0 at acpi0
acpicpu0 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu1 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu2 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu3 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu4 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu5 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu6 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu7 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpipwrres0 at acpi0: P0ST, resource for SATA
acpipwrres1 at acpi0: P3ST, resource for SATA
acpibtn0 at acpi0: PWRB
acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
acpicmos0 at acpi0
acpibat0 at acpi0: BAT0 model "01AV445" serial  2591 type LiP oem "LGC"
acpiac0 at acpi0: AC unit online
acpithinkpad0 at acpi0
"SMB0001" at acpi0 not configured
acpibtn1 at acpi0: LID_
acpibtn2 at acpi0: SLPB
"PNP0C14" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"STM7304" at acpi0 not configured
"USBC000" at acpi0 not configured
acpivideo0 at acpi0: VGA_
cpu0: 1996 MHz: speeds: 2000 1700 1600 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "AMD AMD64 17h/1xh Root Complex" rev 0x00
"AMD AMD64 17h/1xh IOMMU" rev 0x00 at pci0 dev 0 function 2 not configured
pchb1 at pci0 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb0 at pci0 dev 1 function 1 "AMD AMD64 17h/1xh PCIE" rev 0x00: msi
pci1 at ppb0 bus 1
nvme0 at pci1 dev 0 function 0 vendor "Lenovo", unknown product 0x0003 rev 0x00: msix, NVMe 1.2
nvme0: LENSE20256GMSP34MEAT2TA, firmware 2.6.8341, serial 1227066205564
scsibus1 at nvme0: 1 targets
sd0 at scsibus1 targ 0 lun 0: <NVMe, LENSE20256GMSP34, 2.6.> SCSI4 0/direct fixed
sd0: 244198MB, 512 bytes/sector, 500118192 sectors
ppb1 at pci0 dev 1 function 2 "AMD AMD64 17h/1xh PCIE" rev 0x00: msi
pci2 at ppb1 bus 2
re0 at pci2 dev 0 function 0 "Realtek 8168" rev 0x10: RTL8168GU/8111GU (0x5080), msi, address e8:6a:64:33:83:cc
rgephy0 at re0 phy 7: RTL8251 PHY, rev. 0
ppb2 at pci0 dev 1 function 3 "AMD AMD64 17h/1xh PCIE" rev 0x00: msi
pci3 at ppb2 bus 3
sdhc0 at pci3 dev 0 function 0 "O2 Micro 0Z8621 SD/MMC" rev 0x01: apic 33 int 8
sdhc0: SDHC 4.0, 50 MHz base clock
sdmmc0 at sdhc0: 4-bit, sd high-speed, mmc high-speed, dma
ppb3 at pci0 dev 1 function 6 "AMD AMD64 17h/1xh PCIE" rev 0x00: msi
pci4 at ppb3 bus 4
iwm0 at pci4 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
pchb2 at pci0 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb4 at pci0 dev 8 function 1 "AMD AMD64 17h/1xh PCIE" rev 0x00
pci5 at ppb4 bus 5
amdgpu0 at pci5 dev 0 function 0 "ATI Radeon Vega" rev 0xc4
drm0 at amdgpu0
amdgpu0: msi
azalia0 at pci5 dev 0 function 1 "ATI Radeon Vega HD Audio" rev 0x00: msi
azalia0: no supported codecs
ccp0 at pci5 dev 0 function 2 "AMD AMD64 17h/1xh Crypto" rev 0x00
xhci0 at pci5 dev 0 function 3 "AMD AMD64 17h/1xh xHCI" rev 0x00: msi, xHCI 1.10
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
xhci1 at pci5 dev 0 function 4 "AMD AMD64 17h/1xh xHCI" rev 0x00: msi, xHCI 1.10
usb1 at xhci1: USB revision 3.0
uhub1 at usb1 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
azalia1 at pci5 dev 0 function 6 "AMD AMD64 17h/1xh HD Audio" rev 0x00: apic 33 int 30
azalia1: codecs: Conexant/0x5111
audio0 at azalia1
ppb5 at pci0 dev 8 function 2 "AMD AMD64 17h/1xh PCIE" rev 0x00
pci6 at ppb5 bus 6
ahci0 at pci6 dev 0 function 0 "AMD FCH AHCI" rev 0x61: msi, AHCI 1.3.1
ahci0: port 0: 6.0Gb/s
scsibus2 at ahci0: 32 targets
sd1 at scsibus2 targ 0 lun 0: <ATA, Samsung SSD 850, EXM0> SCSI3 0/direct fixed naa.50025388400c34c6
sd1: 488386MB, 512 bytes/sector, 1000215216 sectors, thin
"AMD FCH SMBus" rev 0x61 at pci0 dev 20 function 0 not configured
pcib0 at pci0 dev 20 function 3 "AMD FCH LPC" rev 0x51
pchb3 at pci0 dev 24 function 0 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb4 at pci0 dev 24 function 1 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb5 at pci0 dev 24 function 2 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb6 at pci0 dev 24 function 3 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb7 at pci0 dev 24 function 4 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb8 at pci0 dev 24 function 5 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb9 at pci0 dev 24 function 6 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb10 at pci0 dev 24 function 7 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
isa0 at pcib0
isadma0 at isa0
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pms0 at pckbc0 (aux slot)
wsmouse0 at pms0 mux 0
wsmouse1 at pms0 mux 0
pms0: Synaptics clickpad, firmware 8.16, 0x1e2b1 0x940300
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: SVM/RVI
efifb at mainbus0 not configured
uhidev0 at uhub0 port 3 configuration 1 interface 0 "Microsoft Microsoft Notebook Optical Mouse with Tilt Wheel" rev 2.00/1.20 addr 2
uhidev0: iclass 3/1, 24 report ids
ums0 at uhidev0 reportid 17: 3 buttons, Z dir
wsmouse2 at ums0 mux 0
uhid0 at uhidev0 reportid 18: input=0, output=0, feature=1
uhid1 at uhidev0 reportid 19: input=1, output=0, feature=0
uhid2 at uhidev0 reportid 23: input=0, output=0, feature=1
uhid3 at uhidev0 reportid 24: input=0, output=0, feature=1
vscsi0 at root
scsibus3 at vscsi0: 256 targets
softraid0 at root
scsibus4 at softraid0: 256 targets
sd2 at scsibus4 targ 1 lun 0: <OPENBSD, SR CRYPTO, 006> SCSI2 0/direct fixed
sd2: 227678MB, 512 bytes/sector, 466284711 sectors
root on sd2a (88532b67c09ce3ee.a) swap on sd2b dump on sd2b
TSC skew=-6129185140 drift=170
TSC skew=-6129184900 drift=-10
TSC skew=-6129184890 drift=-20
TSC skew=-6129184910 drift=30
TSC skew=-6129184910 drift=10
TSC skew=-6129184900 drift=20
TSC skew=-6129184910 drift=30
iwm0: hw rev 0x230, fw ver 22.361476.0, address 68:ec:c5:ad:9a:cb
initializing kernel modesetting (RAVEN 0x1002:0x15DD 0x17AA:0x506F 0xC4).
amdgpu0: 1920x1080, 32bpp
wsdisplay0 at amdgpu0 mux 1: console (std, vt100 emulation), using wskbd0
wsdisplay0: screen 1-5 added (std, vt100 emulation)

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Paul Irofti-4
Hi,

Thanks for the report!

This does not look correct.

TSC skew=-6129185140 drift=170
TSC skew=-6129184900 drift=-10
TSC skew=-6129184890 drift=-20
TSC skew=-6129184910 drift=30
TSC skew=-6129184910 drift=10
TSC skew=-6129184900 drift=20
TSC skew=-6129184910 drift=30


I'll be back with some printf's.

Paul

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mark Kettenis
In reply to this post by Paul Irofti-4
> Date: Thu, 27 Jun 2019 15:08:00 +0300
> From: Paul Irofti <[hidden email]>
>
> Hi,
>
> Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> clocks across cores.
>
> CPU0 is the reference clock and all others are skewed. During CPU
> initialization the clocks synchronize by keeping a registry of each CPU
> clock skewness and adapting the TSC read routine accordingly.
>
> I choose this implementation over what FreeBSD is doing (which is just
> copying Linux really), because it is clean and elegant.
>
> I would love to hear reports from machines that were broken by this.
> Mine, which never exhibited the problem in the first place, run just
> fine with the following diff. In fact I am writting this message on one
> such machine.
>
> Also constructive comments are more than welcomed!
>
> Notes:
>
> - cpu_counter_serializing() could probably have a better name
>   (tsc _read for example)
> - the PAUSE instruction is probably not needed
> - acpi(4) suspend and resume bits are left out on purpose, but should
>   be trivial to add once the current diff settles
>
> Paul Irofti

I don't think we want to introduce a <machine/tsc.h> header file.

The code suffers from some NetBSD-isms, so that'll need to be fixed.
I pointed some of them out below.

Also, how accurate is your skew detection?  What skew is detected on a
machine that (supposedly) has the TSCs in sync?  The result will be
that you actually slightly desync the counters on different CPUs.

I think Linux uses the TSC_ADJUST MSR and compares its value across
cores.  If the skew is small and the TSC_ADJUST values are the same
across cores it skips the TSC adjustments.

Cheers,

Mark

>
> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 27 Jun 2019 11:55:08 -0000
> @@ -96,6 +96,7 @@
>  #include <machine/gdt.h>
>  #include <machine/pio.h>
>  #include <machine/vmmvar.h>
> +#include <machine/tsc.h>
>  
>  #if NLAPIC > 0
>  #include <machine/i82489reg.h>
> @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (!CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +813,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and do
> + * twice (in tsc_sync_bp) to minimize possible cache effects.
> + * Disable interrupts to try and rule out any external
> + * interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
>   }
>  
> +

Please don't introduce additional empty lines.

>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
>   atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
>  
> @@ -852,6 +870,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->cpu_cc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->cpu_cc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->cpu_cc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +919,13 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 27 Jun 2019 11:55:08 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +static int64_t tsc_drift_max = 250; /* max cycles */
> +static int64_t tsc_drift_observed;
> +static bool tsc_good;
> +
> +static volatile int64_t tsc_sync_val;
> +static volatile struct cpu_info *tsc_sync_cpu;
> +

No static in the kernel please.

>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci
>   tsc_frequency = tsc_freq_cpuid(ci);
>   tsc_is_invariant = 1;
>  
> + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -100;
> + tsc_is_invariant = 0;
> + }
> +
>   tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> + if (tsc_good)
> + return rdmsr(MSR_TSC);
> + else
> + return (rdtsc() + ci->cpu_cc_skew);
> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> +

No blank line here.

> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
> + panic("tsc_sync_bp: 1");
> + }
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = cpu_counter_serializing(ci) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
> + membar_consumer();
> + }
> + bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci) {
> + pause();

Maybe this should be CPU_BUSY_CYCLE()?

> + }
> + if (tsc_sync_cpu != NULL) {
> + panic("tsc_sync_bp: 2");
> + }

There is a bit of excessive use of curly braces in this function.

> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->cpu_cc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
> + membar_consumer();
> + }
> + tsc = (cpu_counter_serializing(ci) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
> + panic("tsc_sync_ap");
> + }
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> +
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 27 Jun 2019 11:55:08 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t cpu_cc_skew; /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpufunc.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
> retrieving revision 1.33
> diff -u -p -u -p -r1.33 cpufunc.h
> --- arch/amd64/include/cpufunc.h 26 Mar 2019 19:32:46 -0000 1.33
> +++ arch/amd64/include/cpufunc.h 27 Jun 2019 11:55:08 -0000
> @@ -282,6 +282,11 @@ mfence(void)
>   __asm volatile("mfence" : : : "memory");
>  }
>  
> +static __inline void
> +pause(void)
> +{
> + __asm volatile("pause" : : : "memory");
> +}
>  static __inline u_int64_t
>  rdtsc(void)
>  {
> Index: arch/amd64/include/tsc.h
> ===================================================================
> RCS file: arch/amd64/include/tsc.h
> diff -N arch/amd64/include/tsc.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ arch/amd64/include/tsc.h 27 Jun 2019 11:55:08 -0000
> @@ -0,0 +1,25 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
> + *
> + * Permission to use, copy, modify, and/or distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _MACHINE_TSC_H_
> +#define _MACHINE_TSC_H_
> +
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
> +#endif /* !_MACHINE_TSC_H_ */
>
>

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Paul Irofti-4
On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:

> > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > From: Paul Irofti <[hidden email]>
> >
> > Hi,
> >
> > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > clocks across cores.
> >
> > CPU0 is the reference clock and all others are skewed. During CPU
> > initialization the clocks synchronize by keeping a registry of each CPU
> > clock skewness and adapting the TSC read routine accordingly.
> >
> > I choose this implementation over what FreeBSD is doing (which is just
> > copying Linux really), because it is clean and elegant.
> >
> > I would love to hear reports from machines that were broken by this.
> > Mine, which never exhibited the problem in the first place, run just
> > fine with the following diff. In fact I am writting this message on one
> > such machine.
> >
> > Also constructive comments are more than welcomed!
> >
> > Notes:
> >
> > - cpu_counter_serializing() could probably have a better name
> >   (tsc _read for example)
> > - the PAUSE instruction is probably not needed
> > - acpi(4) suspend and resume bits are left out on purpose, but should
> >   be trivial to add once the current diff settles
> >
> > Paul Irofti
>
> I don't think we want to introduce a <machine/tsc.h> header file.
>
> The code suffers from some NetBSD-isms, so that'll need to be fixed.
> I pointed some of them out below.
>
> Also, how accurate is your skew detection?  What skew is detected on a
> machine that (supposedly) has the TSCs in sync?  The result will be
> that you actually slightly desync the counters on different CPUs.
>
> I think Linux uses the TSC_ADJUST MSR and compares its value across
> cores.  If the skew is small and the TSC_ADJUST values are the same
> across cores it skips the TSC adjustments.

Hi,

Here is an updated diff with a few bugs eliminated from the previous and
with most of the concerns I got in private and from Mark fixed.

I will do the TSC_ADJUST_MSR dance in another iteration if the current
incarnation turns out to be correct for machines suffering from TSCs not
in sync.

The thing I am mostly worried about now is in the following sum

 uint
 tsc_get_timecount(struct timecounter *tc)
 {
        return rdtsc() + curcpu()->cpu_cc_skew;
 }
 
can one term be executed on one CPU and the other on another? Is there a
way to protect this from happening other than locking?

I see NetBSD is checking for a change in the number of context switches
of the current process.

My plan is to have a fix in the tree before 6.6 is released, so I would
love to hear your thoughts and reports on this.

Thanks,
Paul


Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
+++ arch/amd64/amd64/cpu.c 2 Aug 2019 10:25:04 -0000
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
  cr4 = rcr4();
  lcr4(cr4 & ~CR4_PGE);
  lcr4(cr4);
+
+ /* Synchronize TSC */
+ if (!CPU_IS_PRIMARY(ci))
+      tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
  int i;
+ u_long s;
 
  ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /*
+ * Synchronize time stamp counters. Invalidate cache and do
+ * twice (in tsc_sync_bp) to minimize possible cache effects.
+ * Disable interrupts to try and rule out any external
+ * interference.
+ */
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
  }
 
  if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +868,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
  int i;
+ int64_t drift;
+ u_long s;
 
  atomic_setbits_int(&ci->ci_flags, CPUF_GO);
 
@@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /* Synchronize TSC again, check for drift. */
+ drift = ci->cpu_cc_skew;
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ drift -= ci->cpu_cc_skew;
+ printf("TSC skew=%lld drift=%lld\n",
+    (long long)ci->cpu_cc_skew, (long long)drift);
+ tsc_sync_drift(drift);
  }
 }
 
@@ -888,7 +917,13 @@ cpu_hatch(void *v)
  panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+ /*
+ * Synchronize the TSC for the first time. Note that interrupts are
+ * off at this point.
+ */
+ wbinvd();
  ci->ci_flags |= CPUF_PRESENT;
+ tsc_sync_ap(ci);
 
  lapic_enable();
  lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
+++ arch/amd64/amd64/tsc.c 2 Aug 2019 10:25:04 -0000
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
  * Copyright (c) 2017 Adam Steen <[hidden email]>
  * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
+ * Copyright (c) 2019 Paul Irofti <[hidden email]>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
+#include <sys/atomic.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
@@ -33,6 +36,13 @@ int tsc_recalibrate;
 uint64_t tsc_frequency;
 int tsc_is_invariant;
 
+int64_t tsc_drift_max = 250; /* max cycles */
+int64_t tsc_drift_observed;
+bool tsc_good;
+
+volatile int64_t tsc_sync_val;
+volatile struct cpu_info *tsc_sync_cpu;
+
 uint tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
  return;
  tsc_frequency = freq;
  tsc_timecounter.tc_frequency = freq;
-#ifndef MULTIPROCESSOR
  if (tsc_is_invariant)
  tsc_timecounter.tc_quality = 2000;
-#endif
 }
 
 void
@@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
 uint
 tsc_get_timecount(struct timecounter *tc)
 {
- return rdtsc();
+ return rdtsc() + curcpu()->cpu_cc_skew;
 }
 
 void
 tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
 {
- if (!(ci->ci_flags & CPUF_PRIMARY) ||
-    !(ci->ci_flags & CPUF_CONST_TSC) ||
+ if (!(ci->ci_flags & CPUF_CONST_TSC) ||
     !(ci->ci_flags & CPUF_INVAR_TSC))
  return;
 
  tsc_frequency = tsc_freq_cpuid(ci);
  tsc_is_invariant = 1;
 
+ tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
+
  /* Newer CPUs don't require recalibration */
  if (tsc_frequency > 0) {
  tsc_timecounter.tc_frequency = tsc_frequency;
-#ifndef MULTIPROCESSOR
  tsc_timecounter.tc_quality = 2000;
-#endif
  } else {
  tsc_recalibrate = 1;
  tsc_frequency = cpufreq;
@@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
  calibrate_tsc_freq();
  }
 
- tc_init(&tsc_timecounter);
+ if (tsc_drift_observed > tsc_drift_max) {
+ printf("ERROR: %lld cycle TSC drift observed\n",
+    (long long)tsc_drift_observed);
+ tsc_timecounter.tc_quality = -100;
+ tsc_is_invariant = 0;
+ }
+
+ printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
+    (long long)ci->cpu_cc_skew, (long long)tsc_drift_observed);
+
+ if (ci->ci_flags & CPUF_PRIMARY)
+ tc_init(&tsc_timecounter);
+}
+
+static uint64_t
+cpu_counter_serializing(struct cpu_info *ci)
+{
+ if (tsc_good)
+ return rdmsr(MSR_TSC);
+ else
+ return (rdtsc() + ci->cpu_cc_skew);
+}
+
+/*
+ * Record drift (in clock cycles).  Called during AP startup.
+ */
+void
+tsc_sync_drift(int64_t drift)
+{
+ if (drift < 0)
+ drift = -drift;
+ if (drift > tsc_drift_observed)
+ tsc_drift_observed = drift;
+}
+
+/*
+ * Called during startup of APs, by the boot processor.  Interrupts
+ * are disabled on entry.
+ */
+static void
+tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
+{
+ uint64_t bptsc;
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
+ panic("tsc_sync_bp: 1");
+
+ /* Flag it and read our TSC. */
+ atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ bptsc = cpu_counter_serializing(ci) >> 1;
+
+ /* Wait for remote to complete, and read ours again. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
+ membar_consumer();
+ bptsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Wait for the results to come in. */
+ while (tsc_sync_cpu == ci)
+ CPU_BUSY_CYCLE();
+ if (tsc_sync_cpu != NULL)
+ panic("tsc_sync_bp: 2");
+
+ *bptscp = bptsc;
+ *aptscp = tsc_sync_val;
+}
+
+void
+tsc_sync_bp(struct cpu_info *ci)
+{
+ uint64_t bptsc, aptsc;
+
+ tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
+ tsc_read_bp(ci, &bptsc, &aptsc);
+
+ /* Compute final value to adjust for skew. */
+ ci->cpu_cc_skew = bptsc - aptsc;
+}
+
+/*
+ * Called during startup of AP, by the AP itself.  Interrupts are
+ * disabled on entry.
+ */
+static void
+tsc_post_ap(struct cpu_info *ci)
+{
+ uint64_t tsc;
+
+ /* Wait for go-ahead from primary. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
+ membar_consumer();
+ tsc = (cpu_counter_serializing(ci) >> 1);
+
+ /* Instruct primary to read its counter. */
+ atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ tsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Post result.  Ensure the whole value goes out atomically. */
+ (void)atomic_swap_64(&tsc_sync_val, tsc);
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
+ panic("tsc_sync_ap");
+}
+
+void
+tsc_sync_ap(struct cpu_info *ci)
+{
+ tsc_post_ap(ci);
+ tsc_post_ap(ci);
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.131
diff -u -p -u -p -r1.131 cpu.h
--- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
+++ arch/amd64/include/cpu.h 2 Aug 2019 10:25:04 -0000
@@ -206,6 +206,8 @@ struct cpu_info {
  union vmm_cpu_cap ci_vmm_cap;
  paddr_t ci_vmxon_region_pa;
  struct vmxon_region *ci_vmxon_region;
+
+ int64_t cpu_cc_skew; /* counter skew vs cpu0 */
 };
 
 #define CPUF_BSP 0x0001 /* CPU is the original BSP */
@@ -221,6 +223,7 @@ struct cpu_info {
 #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
 #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
 
+#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
 #define CPUF_PRESENT 0x1000 /* CPU is present */
 #define CPUF_RUNNING 0x2000 /* CPU is running */
 #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
Index: arch/amd64/include/cpuvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 cpuvar.h
--- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
+++ arch/amd64/include/cpuvar.h 2 Aug 2019 10:25:04 -0000
@@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
 void cpu_init_first(void);
 void cpu_adjust_tsc_freq(uint64_t (*)());
 
+void tsc_sync_drift(int64_t);
+void tsc_sync_bp(struct cpu_info *);
+void tsc_sync_ap(struct cpu_info *);
+
 #endif

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mark Kettenis
> Date: Fri, 2 Aug 2019 13:29:37 +0300
> From: Paul Irofti <[hidden email]>
>
> On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > > From: Paul Irofti <[hidden email]>
> > >
> > > Hi,
> > >
> > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > > clocks across cores.
> > >
> > > CPU0 is the reference clock and all others are skewed. During CPU
> > > initialization the clocks synchronize by keeping a registry of each CPU
> > > clock skewness and adapting the TSC read routine accordingly.
> > >
> > > I choose this implementation over what FreeBSD is doing (which is just
> > > copying Linux really), because it is clean and elegant.
> > >
> > > I would love to hear reports from machines that were broken by this.
> > > Mine, which never exhibited the problem in the first place, run just
> > > fine with the following diff. In fact I am writting this message on one
> > > such machine.
> > >
> > > Also constructive comments are more than welcomed!
> > >
> > > Notes:
> > >
> > > - cpu_counter_serializing() could probably have a better name
> > >   (tsc _read for example)
> > > - the PAUSE instruction is probably not needed
> > > - acpi(4) suspend and resume bits are left out on purpose, but should
> > >   be trivial to add once the current diff settles
> > >
> > > Paul Irofti
> >
> > I don't think we want to introduce a <machine/tsc.h> header file.
> >
> > The code suffers from some NetBSD-isms, so that'll need to be fixed.
> > I pointed some of them out below.
> >
> > Also, how accurate is your skew detection?  What skew is detected on a
> > machine that (supposedly) has the TSCs in sync?  The result will be
> > that you actually slightly desync the counters on different CPUs.
> >
> > I think Linux uses the TSC_ADJUST MSR and compares its value across
> > cores.  If the skew is small and the TSC_ADJUST values are the same
> > across cores it skips the TSC adjustments.
>
> Hi,
>
> Here is an updated diff with a few bugs eliminated from the previous and
> with most of the concerns I got in private and from Mark fixed.
>
> I will do the TSC_ADJUST_MSR dance in another iteration if the current
> incarnation turns out to be correct for machines suffering from TSCs not
> in sync.
>
> The thing I am mostly worried about now is in the following sum
>
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
> can one term be executed on one CPU and the other on another? Is there a
> way to protect this from happening other than locking?

Our kernel is non-preemptable so a context switch will only happen if
you sleep.  So that isn't an issue.

> I see NetBSD is checking for a change in the number of context switches
> of the current process.

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Bryan Steele-2
In reply to this post by Paul Irofti-4
On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote:

> Hi,
>
> Here is an updated diff with a few bugs eliminated from the previous and
> with most of the concerns I got in private and from Mark fixed.
>
> I will do the TSC_ADJUST_MSR dance in another iteration if the current
> incarnation turns out to be correct for machines suffering from TSCs not
> in sync.
>
> The thing I am mostly worried about now is in the following sum
>
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
> can one term be executed on one CPU and the other on another? Is there a
> way to protect this from happening other than locking?
>
> I see NetBSD is checking for a change in the number of context switches
> of the current process.
>
> My plan is to have a fix in the tree before 6.6 is released, so I would
> love to hear your thoughts and reports on this.
>
> Thanks,
> Paul
>
>
> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 2 Aug 2019 10:25:04 -0000
> @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (!CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +812,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and do
> + * twice (in tsc_sync_bp) to minimize possible cache effects.
> + * Disable interrupts to try and rule out any external
> + * interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
>   }
>  
>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> @@ -852,6 +868,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->cpu_cc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->cpu_cc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->cpu_cc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +917,13 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 2 Aug 2019 10:25:04 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +int64_t tsc_drift_max = 250; /* max cycles */
> +int64_t tsc_drift_observed;
> +bool tsc_good;
> +
> +volatile int64_t tsc_sync_val;
> +volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc();
> + return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
>      !(ci->ci_flags & CPUF_INVAR_TSC))
>   return;
>  
>   tsc_frequency = tsc_freq_cpuid(ci);
>   tsc_is_invariant = 1;
>  
> + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> - tc_init(&tsc_timecounter);
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -100;
> + tsc_is_invariant = 0;
> + }
> +
> + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> +    (long long)ci->cpu_cc_skew, (long long)tsc_drift_observed);
> +
> + if (ci->ci_flags & CPUF_PRIMARY)
> + tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> + if (tsc_good)
> + return rdmsr(MSR_TSC);
> + else
> + return (rdtsc() + ci->cpu_cc_skew);
> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> + panic("tsc_sync_bp: 1");
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = cpu_counter_serializing(ci) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> + membar_consumer();
> + bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci)
> + CPU_BUSY_CYCLE();
> + if (tsc_sync_cpu != NULL)
> + panic("tsc_sync_bp: 2");
> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->cpu_cc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> + membar_consumer();
> + tsc = (cpu_counter_serializing(ci) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> + panic("tsc_sync_ap");
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 2 Aug 2019 10:25:04 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t cpu_cc_skew; /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.9
> diff -u -p -u -p -r1.9 cpuvar.h
> --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
> +++ arch/amd64/include/cpuvar.h 2 Aug 2019 10:25:04 -0000
> @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
>  void cpu_init_first(void);
>  void cpu_adjust_tsc_freq(uint64_t (*)());
>  
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
>  #endif

It seems to work on the Matebook D w/ AMD Ryzen 5 2400U, which I believe
had problems with TSC.

OpenBSD 6.5-current (AMDGPU) #2: Fri Aug  2 12:32:13 EDT 2019
    [hidden email]:/home/brynet/src/sys/arch/amd64/compile/AMDGPU
real mem = 7396610048 (7053MB)
avail mem = 7159799808 (6828MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.10 @ 0x8c4ec000 (24 entries)
bios0: vendor HUAWEI version "1.22" date 02/26/2019
bios0: HUAWEI KPL-W0X
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP UEFI MSDM SSDT SSDT CRAT CDIT ASF! BOOT HPET APIC MCFG WSMT UEFI VFCT SSDT TPM2 IVRS SSDT SSDT SSDT SSDT FPDT BGRT
acpi0: wakeup devices GPP0(S4) GPP1(S4) GPP2(S4) GPP3(S4) GPP4(S4) GPP5(S4) GPP6(S4) GP17(S4) XHC0(S0) XHC1(S0) GP18(S4)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpihpet0 at acpi0: 14318180 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.62 MHz, 17-11-00
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 24MHz
cpu0: mwait min=64, max=64, C-substates=1.1, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu1: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-300 observed drift=0
cpu1: smt 1, core 0, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu2: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu2: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=30 observed drift=0
cpu2: smt 0, core 1, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu3: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu3: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=10 observed drift=0
cpu3: smt 1, core 1, package 0
cpu4 at mainbus0: apid 4 (application processor)
cpu4: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu4: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu4: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu4: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu4: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-70 observed drift=0
cpu4: smt 0, core 2, package 0
cpu5 at mainbus0: apid 5 (application processor)
cpu5: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu5: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu5: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu5: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu5: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=20 observed drift=0
cpu5: smt 1, core 2, package 0
cpu6 at mainbus0: apid 6 (application processor)
cpu6: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu6: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu6: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu6: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu6: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-30 observed drift=0
cpu6: smt 0, core 3, package 0
cpu7 at mainbus0: apid 7 (application processor)
cpu7: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu7: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu7: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu7: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu7: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu7: smt 1, core 3, package 0
ioapic0 at mainbus0: apid 4 pa 0xfec00000, version 21, 24 pins, remapped
ioapic1 at mainbus0: apid 5 pa 0xfec01000, version 21, 32 pins, remapped
acpimcfg0 at acpi0
acpimcfg0: addr 0xf8000000, bus 0-63
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (GPP0)
acpiprt2 at acpi0: bus 1 (GPP1)
acpiprt3 at acpi0: bus -1 (GPP2)
acpiprt4 at acpi0: bus -1 (GPP3)
acpiprt5 at acpi0: bus -1 (GPP4)
acpiprt6 at acpi0: bus -1 (GPP5)
acpiprt7 at acpi0: bus -1 (GPP6)
acpiprt8 at acpi0: bus 2 (GP17)
acpiprt9 at acpi0: bus 3 (GP18)
acpiec0 at acpi0
acpicpu0 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu1 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu2 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu3 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu4 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu5 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu6 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu7 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpipwrres0 at acpi0: P0ST, resource for SATA
acpipwrres1 at acpi0: P3ST, resource for SATA
acpibtn0 at acpi0: PWRB
acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
acpicmos0 at acpi0
"WDT0001" at acpi0 not configured
acpiac0 at acpi0: AC unit online
acpibat0 at acpi0: BAT1 model "HB4593R1ECW" serial 7748 type LIon oem "DYNAPACK"
acpibtn1 at acpi0: LID0
"AMDI0030" at acpi0 not configured
dwiic0 at acpi0 I2CC addr 0xfedc4000/0x1000
iic0 at dwiic0
ihidev0 at iic0 addr 0xa , can't establish interrupt (polling), vendor 0x56a product 0x48cf, WCOM48CF
ihidev0: 14 report ids
hid at ihidev0 reportid 2 not configured
hid at ihidev0 reportid 3 not configured
hid at ihidev0 reportid 4 not configured
hid at ihidev0 reportid 7 not configured
hid at ihidev0 reportid 8 not configured
hid at ihidev0 reportid 9 not configured
hid at ihidev0 reportid 10 not configured
ims0 at ihidev0 reportid 12: 1 button, tip
wsmouse0 at ims0 mux 0
hid at ihidev0 reportid 13 not configured
hid at ihidev0 reportid 14 not configured
dwiic1 at acpi0 I2CD addr 0xfedc5000/0x1000
iic1 at dwiic1
ihidev1 at iic1 addr 0x15, disabling for pms driver
"MSFT0101" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpivideo0 at acpi0: VGA_
acpivideo1 at acpi0: VGA_
acpivideo2 at acpi0: VGA_
cpu0: 1996 MHz: speeds: 2000 1700 1600 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "AMD AMD64 17h/1xh Root Complex" rev 0x00
"AMD AMD64 17h/1xh IOMMU" rev 0x00 at pci0 dev 0 function 2 not configured
pchb1 at pci0 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb0 at pci0 dev 1 function 2 "AMD AMD64 17h/1xh PCIE" rev 0x00: msi
pci1 at ppb0 bus 1
iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
pchb2 at pci0 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb1 at pci0 dev 8 function 1 "AMD AMD64 17h/1xh PCIE" rev 0x00
pci2 at ppb1 bus 2
amdgpu0 at pci2 dev 0 function 0 "ATI Radeon Vega" rev 0xc4
drm0 at amdgpu0
amdgpu0: msi
azalia0 at pci2 dev 0 function 1 "ATI Radeon Vega HD Audio" rev 0x00: msi
azalia0: no supported codecs
ccp0 at pci2 dev 0 function 2 "AMD AMD64 17h/1xh Crypto" rev 0x00
xhci0 at pci2 dev 0 function 3 "AMD AMD64 17h/1xh xHCI" rev 0x00: msi, xHCI 1.10
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
xhci1 at pci2 dev 0 function 4 "AMD AMD64 17h/1xh xHCI" rev 0x00: msi, xHCI 1.10
usb1 at xhci1: USB revision 3.0
uhub1 at usb1 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
"AMD AMD64 17h/1xh I2S Audio" rev 0x00 at pci2 dev 0 function 5 not configured
azalia1 at pci2 dev 0 function 6 "AMD AMD64 17h/1xh HD Audio" rev 0x00: apic 5 int 30
azalia1: codecs: Realtek/0x0256
audio0 at azalia1
ppb2 at pci0 dev 8 function 2 "AMD AMD64 17h/1xh PCIE" rev 0x00
pci3 at ppb2 bus 3
ahci0 at pci3 dev 0 function 0 "AMD FCH AHCI" rev 0x61: msi, AHCI 1.3.1
ahci0: port 1: 6.0Gb/s
scsibus1 at ahci0: 32 targets
sd0 at scsibus1 targ 1 lun 0: <ATA, SanDisk SD9SN8W2, X610> SCSI3 0/direct fixed naa.5001b448b6a2ccc5
sd0: 244198MB, 512 bytes/sector, 500118192 sectors, thin
"AMD FCH SMBus" rev 0x61 at pci0 dev 20 function 0 not configured
pcib0 at pci0 dev 20 function 3 "AMD FCH LPC" rev 0x51
pchb3 at pci0 dev 24 function 0 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb4 at pci0 dev 24 function 1 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb5 at pci0 dev 24 function 2 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb6 at pci0 dev 24 function 3 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb7 at pci0 dev 24 function 4 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb8 at pci0 dev 24 function 5 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb9 at pci0 dev 24 function 6 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb10 at pci0 dev 24 function 7 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
isa0 at pcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: probed fifo depth: 0 bytes
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pms0 at pckbc0 (aux slot)
wsmouse1 at pms0 mux 0
pms0: Elantech Clickpad, version 4, firmware 0x4f1001
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: SVM/RVI
efifb at mainbus0 not configured
uvideo0 at uhub1 port 1 configuration 1 interface 0 "SunplusIT Inc hm1091_techfront" rev 2.00/0.13 addr 2
video0 at uvideo0
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (467069f697e9c83d.a) swap on sd0b dump on sd0b
TSC skew=-230 drift=-70
TSC skew=-10 drift=40
TSC skew=-20 drift=30
TSC skew=-40 drift=-30
TSC skew=10 drift=10
TSC skew=0 drift=-30
TSC skew=0 drift=0
iwm0: hw rev 0x230, fw ver 22.361476.0, address 7c:76:35:ba:bd:28
initializing kernel modesetting (RAVEN 0x1002:0x15DD 0x19E5:0x3E06 0xC4).
amdgpu0: 1920x1080, 32bpp
wsdisplay0 at amdgpu0 mux 1: console (std, vt100 emulation), using wskbd0
wsdisplay0: screen 1-5 added (std, vt100 emulation)

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Paul Irofti-4
In reply to this post by Paul Irofti-4
On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote:

> On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > > From: Paul Irofti <[hidden email]>
> > >
> > > Hi,
> > >
> > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > > clocks across cores.
> > >
> > > CPU0 is the reference clock and all others are skewed. During CPU
> > > initialization the clocks synchronize by keeping a registry of each CPU
> > > clock skewness and adapting the TSC read routine accordingly.
> > >
> > > I choose this implementation over what FreeBSD is doing (which is just
> > > copying Linux really), because it is clean and elegant.
> > >
> > > I would love to hear reports from machines that were broken by this.
> > > Mine, which never exhibited the problem in the first place, run just
> > > fine with the following diff. In fact I am writting this message on one
> > > such machine.
> > >
> > > Also constructive comments are more than welcomed!
> > >
> > > Notes:
> > >
> > > - cpu_counter_serializing() could probably have a better name
> > >   (tsc _read for example)
> > > - the PAUSE instruction is probably not needed
> > > - acpi(4) suspend and resume bits are left out on purpose, but should
> > >   be trivial to add once the current diff settles
> > >
> > > Paul Irofti
> >
> > I don't think we want to introduce a <machine/tsc.h> header file.
> >
> > The code suffers from some NetBSD-isms, so that'll need to be fixed.
> > I pointed some of them out below.
> >
> > Also, how accurate is your skew detection?  What skew is detected on a
> > machine that (supposedly) has the TSCs in sync?  The result will be
> > that you actually slightly desync the counters on different CPUs.
> >
> > I think Linux uses the TSC_ADJUST MSR and compares its value across
> > cores.  If the skew is small and the TSC_ADJUST values are the same
> > across cores it skips the TSC adjustments.
>
> Hi,
>
> Here is an updated diff with a few bugs eliminated from the previous and
> with most of the concerns I got in private and from Mark fixed.
>
> I will do the TSC_ADJUST_MSR dance in another iteration if the current
> incarnation turns out to be correct for machines suffering from TSCs not
> in sync.
>
> The thing I am mostly worried about now is in the following sum
>
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
> can one term be executed on one CPU and the other on another? Is there a
> way to protect this from happening other than locking?
>
> I see NetBSD is checking for a change in the number of context switches
> of the current process.
>
> My plan is to have a fix in the tree before 6.6 is released, so I would
> love to hear your thoughts and reports on this.
>
> Thanks,
> Paul

Hi,

Here is a third version of the TSC diff that also take into
consideration the suspend-resume path which was ignored by the previous
thus rendering resume broken.

Have a go at it. Reports are welcome. So far I only got ONE report from
a machine with broken TSC :(

Paul


Index: arch/amd64/amd64/acpi_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
retrieving revision 1.86
diff -u -p -u -p -r1.86 acpi_machdep.c
--- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 -0000 1.86
+++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 -0000
@@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa;
 
 extern int acpi_savecpu(void) __returns_twice;
 
+extern int64_t tsc_drift_observed;
+
 #define ACPI_BIOS_RSDP_WINDOW_BASE        0xe0000
 #define ACPI_BIOS_RSDP_WINDOW_SIZE        0x20000
 
@@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc)
 {
  fpuinit(&cpu_info_primary);
 
+ cpu_info_primary.cpu_cc_skew = 0; /* futile */
+ tsc_drift_observed = 0; /* reset tsc drift on resume */
  cpu_init(&cpu_info_primary);
  cpu_ucode_apply(&cpu_info_primary);
 
Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
+++ arch/amd64/amd64/cpu.c 5 Aug 2019 13:54:34 -0000
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
  cr4 = rcr4();
  lcr4(cr4 & ~CR4_PGE);
  lcr4(cr4);
+
+ /* Synchronize TSC */
+ if (cold && !CPU_IS_PRIMARY(ci))
+      tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
  int i;
+ u_long s;
 
  ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /*
+ * Synchronize time stamp counters. Invalidate cache and do
+ * twice (in tsc_sync_bp) to minimize possible cache effects.
+ * Disable interrupts to try and rule out any external
+ * interference.
+ */
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
  }
 
  if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +868,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
  int i;
+ int64_t drift;
+ u_long s;
 
  atomic_setbits_int(&ci->ci_flags, CPUF_GO);
 
@@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else if (cold) {
+ /* Synchronize TSC again, check for drift. */
+ drift = ci->cpu_cc_skew;
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ drift -= ci->cpu_cc_skew;
+ printf("TSC skew=%lld drift=%lld\n",
+    (long long)ci->cpu_cc_skew, (long long)drift);
+ tsc_sync_drift(drift);
  }
 }
 
@@ -888,7 +917,14 @@ cpu_hatch(void *v)
  panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+ /*
+ * Synchronize the TSC for the first time. Note that interrupts are
+ * off at this point.
+ */
+ wbinvd();
  ci->ci_flags |= CPUF_PRESENT;
+ ci->cpu_cc_skew = 0; /* reset on resume */
+ tsc_sync_ap(ci);
 
  lapic_enable();
  lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
+++ arch/amd64/amd64/tsc.c 5 Aug 2019 13:54:34 -0000
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
  * Copyright (c) 2017 Adam Steen <[hidden email]>
  * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
+ * Copyright (c) 2019 Paul Irofti <[hidden email]>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
+#include <sys/atomic.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
@@ -33,6 +36,13 @@ int tsc_recalibrate;
 uint64_t tsc_frequency;
 int tsc_is_invariant;
 
+int64_t tsc_drift_max = 250; /* max cycles */
+int64_t tsc_drift_observed;
+bool tsc_good;
+
+volatile int64_t tsc_sync_val;
+volatile struct cpu_info *tsc_sync_cpu;
+
 uint tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
  return;
  tsc_frequency = freq;
  tsc_timecounter.tc_frequency = freq;
-#ifndef MULTIPROCESSOR
  if (tsc_is_invariant)
  tsc_timecounter.tc_quality = 2000;
-#endif
 }
 
 void
@@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
 uint
 tsc_get_timecount(struct timecounter *tc)
 {
- return rdtsc();
+ return rdtsc() + curcpu()->cpu_cc_skew;
 }
 
 void
 tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
 {
- if (!(ci->ci_flags & CPUF_PRIMARY) ||
-    !(ci->ci_flags & CPUF_CONST_TSC) ||
+ if (!(ci->ci_flags & CPUF_CONST_TSC) ||
     !(ci->ci_flags & CPUF_INVAR_TSC))
  return;
 
  tsc_frequency = tsc_freq_cpuid(ci);
  tsc_is_invariant = 1;
 
+ tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
+
  /* Newer CPUs don't require recalibration */
  if (tsc_frequency > 0) {
  tsc_timecounter.tc_frequency = tsc_frequency;
-#ifndef MULTIPROCESSOR
  tsc_timecounter.tc_quality = 2000;
-#endif
  } else {
  tsc_recalibrate = 1;
  tsc_frequency = cpufreq;
@@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
  calibrate_tsc_freq();
  }
 
- tc_init(&tsc_timecounter);
+ if (tsc_drift_observed > tsc_drift_max) {
+ printf("ERROR: %lld cycle TSC drift observed\n",
+    (long long)tsc_drift_observed);
+ tsc_timecounter.tc_quality = -100;
+ tsc_is_invariant = 0;
+ }
+
+ printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
+    (long long)ci->cpu_cc_skew, (long long)tsc_drift_observed);
+
+ if (ci->ci_flags & CPUF_PRIMARY)
+ tc_init(&tsc_timecounter);
+}
+
+static uint64_t
+cpu_counter_serializing(struct cpu_info *ci)
+{
+ if (tsc_good)
+ return rdmsr(MSR_TSC);
+ else
+ return (rdtsc() + ci->cpu_cc_skew);
+}
+
+/*
+ * Record drift (in clock cycles).  Called during AP startup.
+ */
+void
+tsc_sync_drift(int64_t drift)
+{
+ if (drift < 0)
+ drift = -drift;
+ if (drift > tsc_drift_observed)
+ tsc_drift_observed = drift;
+}
+
+/*
+ * Called during startup of APs, by the boot processor.  Interrupts
+ * are disabled on entry.
+ */
+static void
+tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
+{
+ uint64_t bptsc;
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
+ panic("tsc_sync_bp: 1");
+
+ /* Flag it and read our TSC. */
+ atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ bptsc = cpu_counter_serializing(ci) >> 1;
+
+ /* Wait for remote to complete, and read ours again. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
+ membar_consumer();
+ bptsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Wait for the results to come in. */
+ while (tsc_sync_cpu == ci)
+ CPU_BUSY_CYCLE();
+ if (tsc_sync_cpu != NULL)
+ panic("tsc_sync_bp: 2");
+
+ *bptscp = bptsc;
+ *aptscp = tsc_sync_val;
+}
+
+void
+tsc_sync_bp(struct cpu_info *ci)
+{
+ uint64_t bptsc, aptsc;
+
+ tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
+ tsc_read_bp(ci, &bptsc, &aptsc);
+
+ /* Compute final value to adjust for skew. */
+ ci->cpu_cc_skew = bptsc - aptsc;
+}
+
+/*
+ * Called during startup of AP, by the AP itself.  Interrupts are
+ * disabled on entry.
+ */
+static void
+tsc_post_ap(struct cpu_info *ci)
+{
+ uint64_t tsc;
+
+ /* Wait for go-ahead from primary. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
+ membar_consumer();
+ tsc = (cpu_counter_serializing(ci) >> 1);
+
+ /* Instruct primary to read its counter. */
+ atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ tsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Post result.  Ensure the whole value goes out atomically. */
+ (void)atomic_swap_64(&tsc_sync_val, tsc);
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
+ panic("tsc_sync_ap");
+}
+
+void
+tsc_sync_ap(struct cpu_info *ci)
+{
+ tsc_post_ap(ci);
+ tsc_post_ap(ci);
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.131
diff -u -p -u -p -r1.131 cpu.h
--- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
+++ arch/amd64/include/cpu.h 5 Aug 2019 13:54:34 -0000
@@ -206,6 +206,8 @@ struct cpu_info {
  union vmm_cpu_cap ci_vmm_cap;
  paddr_t ci_vmxon_region_pa;
  struct vmxon_region *ci_vmxon_region;
+
+ int64_t cpu_cc_skew; /* counter skew vs cpu0 */
 };
 
 #define CPUF_BSP 0x0001 /* CPU is the original BSP */
@@ -221,6 +223,7 @@ struct cpu_info {
 #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
 #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
 
+#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
 #define CPUF_PRESENT 0x1000 /* CPU is present */
 #define CPUF_RUNNING 0x2000 /* CPU is running */
 #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
Index: arch/amd64/include/cpuvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 cpuvar.h
--- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
+++ arch/amd64/include/cpuvar.h 5 Aug 2019 13:54:34 -0000
@@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
 void cpu_init_first(void);
 void cpu_adjust_tsc_freq(uint64_t (*)());
 
+void tsc_sync_drift(int64_t);
+void tsc_sync_bp(struct cpu_info *);
+void tsc_sync_ap(struct cpu_info *);
+
 #endif

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Bryan Steele-2
On Mon, Aug 05, 2019 at 04:58:27PM +0300, Paul Irofti wrote:

> Hi,
>
> Here is a third version of the TSC diff that also take into
> consideration the suspend-resume path which was ignored by the previous
> thus rendering resume broken.
>
> Have a go at it. Reports are welcome. So far I only got ONE report from
> a machine with broken TSC :(
>
> Paul
>
>
> Index: arch/amd64/amd64/acpi_machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
> retrieving revision 1.86
> diff -u -p -u -p -r1.86 acpi_machdep.c
> --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 -0000 1.86
> +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 -0000
> @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa;
>  
>  extern int acpi_savecpu(void) __returns_twice;
>  
> +extern int64_t tsc_drift_observed;
> +
>  #define ACPI_BIOS_RSDP_WINDOW_BASE        0xe0000
>  #define ACPI_BIOS_RSDP_WINDOW_SIZE        0x20000
>  
> @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc)
>  {
>   fpuinit(&cpu_info_primary);
>  
> + cpu_info_primary.cpu_cc_skew = 0; /* futile */
> + tsc_drift_observed = 0; /* reset tsc drift on resume */
>   cpu_init(&cpu_info_primary);
>   cpu_ucode_apply(&cpu_info_primary);
>  
> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 5 Aug 2019 13:54:34 -0000
> @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (cold && !CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +812,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and do
> + * twice (in tsc_sync_bp) to minimize possible cache effects.
> + * Disable interrupts to try and rule out any external
> + * interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
>   }
>  
>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> @@ -852,6 +868,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else if (cold) {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->cpu_cc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->cpu_cc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->cpu_cc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +917,14 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + ci->cpu_cc_skew = 0; /* reset on resume */
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 5 Aug 2019 13:54:34 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +int64_t tsc_drift_max = 250; /* max cycles */
> +int64_t tsc_drift_observed;
> +bool tsc_good;
> +
> +volatile int64_t tsc_sync_val;
> +volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc();
> + return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
>      !(ci->ci_flags & CPUF_INVAR_TSC))
>   return;
>  
>   tsc_frequency = tsc_freq_cpuid(ci);
>   tsc_is_invariant = 1;
>  
> + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> - tc_init(&tsc_timecounter);
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -100;
> + tsc_is_invariant = 0;
> + }
> +
> + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> +    (long long)ci->cpu_cc_skew, (long long)tsc_drift_observed);
> +
> + if (ci->ci_flags & CPUF_PRIMARY)
> + tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> + if (tsc_good)
> + return rdmsr(MSR_TSC);
> + else
> + return (rdtsc() + ci->cpu_cc_skew);
> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> + panic("tsc_sync_bp: 1");
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = cpu_counter_serializing(ci) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> + membar_consumer();
> + bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci)
> + CPU_BUSY_CYCLE();
> + if (tsc_sync_cpu != NULL)
> + panic("tsc_sync_bp: 2");
> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->cpu_cc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> + membar_consumer();
> + tsc = (cpu_counter_serializing(ci) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> + panic("tsc_sync_ap");
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 5 Aug 2019 13:54:34 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t cpu_cc_skew; /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.9
> diff -u -p -u -p -r1.9 cpuvar.h
> --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
> +++ arch/amd64/include/cpuvar.h 5 Aug 2019 13:54:34 -0000
> @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
>  void cpu_init_first(void);
>  void cpu_adjust_tsc_freq(uint64_t (*)());
>  
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
>  #endif

I tested this, and confirmed that it's automatically preferring the tsc
timecounter.hadware. I waited a hour or two and it seems fine, and also
ran regress/sys/kern/clock_gettime with no issues reported.

As mentioned seperately, I'm not able to test suspend/resume as it is
not currently working on this machine.

-Bryan.

OpenBSD 6.5-current (AMDGPU) #7: Mon Aug  5 14:03:35 EDT 2019
    [hidden email]:/home/brynet/src/sys/arch/amd64/compile/AMDGPU
real mem = 7396610048 (7053MB)
avail mem = 7159779328 (6828MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.10 @ 0x8c4ec000 (24 entries)
bios0: vendor HUAWEI version "1.22" date 02/26/2019
bios0: HUAWEI KPL-W0X
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP UEFI MSDM SSDT SSDT CRAT CDIT ASF! BOOT HPET APIC MCFG WSMT UEFI VFCT SSDT TPM2 IVRS SSDT SSDT SSDT SSDT FPDT BGRT
acpi0: wakeup devices GPP0(S4) GPP1(S4) GPP2(S4) GPP3(S4) GPP4(S4) GPP5(S4) GPP6(S4) GP17(S4) XHC0(S0) XHC1(S0) GP18(S4)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpihpet0 at acpi0: 14318180 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.62 MHz, 17-11-00
cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 24MHz
cpu0: mwait min=64, max=64, C-substates=1.1, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu1: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu1: smt 1, core 0, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu2: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu2: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-30 observed drift=0
cpu2: smt 0, core 1, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu3: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu3: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu3: smt 1, core 1, package 0
cpu4 at mainbus0: apid 4 (application processor)
cpu4: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu4: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu4: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu4: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu4: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-30 observed drift=0
cpu4: smt 0, core 2, package 0
cpu5 at mainbus0: apid 5 (application processor)
cpu5: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu5: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu5: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu5: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu5: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-20 observed drift=0
cpu5: smt 1, core 2, package 0
cpu6 at mainbus0: apid 6 (application processor)
cpu6: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu6: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu6: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu6: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu6: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-70 observed drift=0
cpu6: smt 0, core 3, package 0
cpu7 at mainbus0: apid 7 (application processor)
cpu7: AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx, 1996.23 MHz, 17-11-00
cpu7: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu7: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
cpu7: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
cpu7: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
tsc_timecounter_init: TSC skew=-40 observed drift=0
cpu7: smt 1, core 3, package 0
ioapic0 at mainbus0: apid 4 pa 0xfec00000, version 21, 24 pins, remapped
ioapic1 at mainbus0: apid 5 pa 0xfec01000, version 21, 32 pins, remapped
acpimcfg0 at acpi0
acpimcfg0: addr 0xf8000000, bus 0-63
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (GPP0)
acpiprt2 at acpi0: bus 1 (GPP1)
acpiprt3 at acpi0: bus -1 (GPP2)
acpiprt4 at acpi0: bus -1 (GPP3)
acpiprt5 at acpi0: bus -1 (GPP4)
acpiprt6 at acpi0: bus -1 (GPP5)
acpiprt7 at acpi0: bus -1 (GPP6)
acpiprt8 at acpi0: bus 2 (GP17)
acpiprt9 at acpi0: bus 3 (GP18)
acpiec0 at acpi0
acpicpu0 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu1 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu2 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu3 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu4 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu5 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu6 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu7 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpipwrres0 at acpi0: P0ST, resource for SATA
acpipwrres1 at acpi0: P3ST, resource for SATA
acpibtn0 at acpi0: PWRB
acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
acpicmos0 at acpi0
"WDT0001" at acpi0 not configured
acpiac0 at acpi0: AC unit online
acpibat0 at acpi0: BAT1 model "HB4593R1ECW" serial 7748 type LIon oem "DYNAPACK"
acpibtn1 at acpi0: LID0
"AMDI0030" at acpi0 not configured
dwiic0 at acpi0 I2CC addr 0xfedc4000/0x1000
iic0 at dwiic0
ihidev0 at iic0 addr 0xa , can't establish interrupt (polling), vendor 0x56a product 0x48cf, WCOM48CF
ihidev0: 14 report ids
hid at ihidev0 reportid 2 not configured
hid at ihidev0 reportid 3 not configured
hid at ihidev0 reportid 4 not configured
hid at ihidev0 reportid 7 not configured
hid at ihidev0 reportid 8 not configured
hid at ihidev0 reportid 9 not configured
hid at ihidev0 reportid 10 not configured
ims0 at ihidev0 reportid 12: 1 button, tip
wsmouse0 at ims0 mux 0
hid at ihidev0 reportid 13 not configured
hid at ihidev0 reportid 14 not configured
dwiic1 at acpi0 I2CD addr 0xfedc5000/0x1000
iic1 at dwiic1
ihidev1 at iic1 addr 0x15, disabling for pms driver
"MSFT0101" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpivideo0 at acpi0: VGA_
acpivideo1 at acpi0: VGA_
acpivideo2 at acpi0: VGA_
cpu0: 1996 MHz: speeds: 2000 1700 1600 MHz
pci0 at mainbus0 bus 0
pchb0 at pci0 dev 0 function 0 "AMD AMD64 17h/1xh Root Complex" rev 0x00
"AMD AMD64 17h/1xh IOMMU" rev 0x00 at pci0 dev 0 function 2 not configured
pchb1 at pci0 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb0 at pci0 dev 1 function 2 "AMD AMD64 17h/1xh PCIE" rev 0x00: msi
pci1 at ppb0 bus 1
iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
pchb2 at pci0 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb1 at pci0 dev 8 function 1 "AMD AMD64 17h/1xh PCIE" rev 0x00
pci2 at ppb1 bus 2
amdgpu0 at pci2 dev 0 function 0 "ATI Radeon Vega" rev 0xc4
drm0 at amdgpu0
amdgpu0: msi
azalia0 at pci2 dev 0 function 1 "ATI Radeon Vega HD Audio" rev 0x00: msi
azalia0: no supported codecs
ccp0 at pci2 dev 0 function 2 "AMD AMD64 17h/1xh Crypto" rev 0x00
xhci0 at pci2 dev 0 function 3 "AMD AMD64 17h/1xh xHCI" rev 0x00: msi, xHCI 1.10
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
xhci1 at pci2 dev 0 function 4 "AMD AMD64 17h/1xh xHCI" rev 0x00: msi, xHCI 1.10
usb1 at xhci1: USB revision 3.0
uhub1 at usb1 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 addr 1
"AMD AMD64 17h/1xh I2S Audio" rev 0x00 at pci2 dev 0 function 5 not configured
azalia1 at pci2 dev 0 function 6 "AMD AMD64 17h/1xh HD Audio" rev 0x00: apic 5 int 30
azalia1: codecs: Realtek/0x0256
audio0 at azalia1
ppb2 at pci0 dev 8 function 2 "AMD AMD64 17h/1xh PCIE" rev 0x00
pci3 at ppb2 bus 3
ahci0 at pci3 dev 0 function 0 "AMD FCH AHCI" rev 0x61: msi, AHCI 1.3.1
ahci0: port 1: 6.0Gb/s
scsibus1 at ahci0: 32 targets
sd0 at scsibus1 targ 1 lun 0: <ATA, SanDisk SD9SN8W2, X610> SCSI3 0/direct fixed naa.5001b448b6a2ccc5
sd0: 244198MB, 512 bytes/sector, 500118192 sectors, thin
"AMD FCH SMBus" rev 0x61 at pci0 dev 20 function 0 not configured
pcib0 at pci0 dev 20 function 3 "AMD FCH LPC" rev 0x51
pchb3 at pci0 dev 24 function 0 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb4 at pci0 dev 24 function 1 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb5 at pci0 dev 24 function 2 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb6 at pci0 dev 24 function 3 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb7 at pci0 dev 24 function 4 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb8 at pci0 dev 24 function 5 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb9 at pci0 dev 24 function 6 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
pchb10 at pci0 dev 24 function 7 "AMD AMD64 17h/1xh Data Fabric" rev 0x00
isa0 at pcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: probed fifo depth: 0 bytes
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pms0 at pckbc0 (aux slot)
wsmouse1 at pms0 mux 0
pms0: Elantech Clickpad, version 4, firmware 0x4f1001
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: SVM/RVI
efifb at mainbus0 not configured
uvideo0 at uhub1 port 1 configuration 1 interface 0 "SunplusIT Inc hm1091_techfront" rev 2.00/0.13 addr 2
video0 at uvideo0
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (467069f697e9c83d.a) swap on sd0b dump on sd0b
iwm0: hw rev 0x230, fw ver 22.361476.0, address 7c:76:35:ba:bd:28
initializing kernel modesetting (RAVEN 0x1002:0x15DD 0x19E5:0x3E06 0xC4).
amdgpu0: 1920x1080, 32bpp
wsdisplay0 at amdgpu0 mux 1: console (std, vt100 emulation), using wskbd0
wsdisplay0: screen 1-5 added (std, vt100 emulation)

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mark Kettenis
In reply to this post by Paul Irofti-4
> Date: Mon, 5 Aug 2019 16:58:27 +0300
> From: Paul Irofti <[hidden email]>
>
> On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote:
> > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > > > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > > > From: Paul Irofti <[hidden email]>
> > > >
> > > > Hi,
> > > >
> > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > > > clocks across cores.
> > > >
> > > > CPU0 is the reference clock and all others are skewed. During CPU
> > > > initialization the clocks synchronize by keeping a registry of each CPU
> > > > clock skewness and adapting the TSC read routine accordingly.
> > > >
> > > > I choose this implementation over what FreeBSD is doing (which is just
> > > > copying Linux really), because it is clean and elegant.
> > > >
> > > > I would love to hear reports from machines that were broken by this.
> > > > Mine, which never exhibited the problem in the first place, run just
> > > > fine with the following diff. In fact I am writting this message on one
> > > > such machine.
> > > >
> > > > Also constructive comments are more than welcomed!
> > > >
> > > > Notes:
> > > >
> > > > - cpu_counter_serializing() could probably have a better name
> > > >   (tsc _read for example)
> > > > - the PAUSE instruction is probably not needed
> > > > - acpi(4) suspend and resume bits are left out on purpose, but should
> > > >   be trivial to add once the current diff settles
> > > >
> > > > Paul Irofti
> > >
> > > I don't think we want to introduce a <machine/tsc.h> header file.
> > >
> > > The code suffers from some NetBSD-isms, so that'll need to be fixed.
> > > I pointed some of them out below.
> > >
> > > Also, how accurate is your skew detection?  What skew is detected on a
> > > machine that (supposedly) has the TSCs in sync?  The result will be
> > > that you actually slightly desync the counters on different CPUs.
> > >
> > > I think Linux uses the TSC_ADJUST MSR and compares its value across
> > > cores.  If the skew is small and the TSC_ADJUST values are the same
> > > across cores it skips the TSC adjustments.
> >
> > Hi,
> >
> > Here is an updated diff with a few bugs eliminated from the previous and
> > with most of the concerns I got in private and from Mark fixed.
> >
> > I will do the TSC_ADJUST_MSR dance in another iteration if the current
> > incarnation turns out to be correct for machines suffering from TSCs not
> > in sync.
> >
> > The thing I am mostly worried about now is in the following sum
> >
> >  uint
> >  tsc_get_timecount(struct timecounter *tc)
> >  {
> > return rdtsc() + curcpu()->cpu_cc_skew;
> >  }
> >  
> > can one term be executed on one CPU and the other on another? Is there a
> > way to protect this from happening other than locking?
> >
> > I see NetBSD is checking for a change in the number of context switches
> > of the current process.
> >
> > My plan is to have a fix in the tree before 6.6 is released, so I would
> > love to hear your thoughts and reports on this.
> >
> > Thanks,
> > Paul
>
> Hi,
>
> Here is a third version of the TSC diff that also take into
> consideration the suspend-resume path which was ignored by the previous
> thus rendering resume broken.

Hmm, wat is this diff supposed to do upon suspend-resume?  I'm fairly
certain that you'll need to recalibrate the skew in the resume path.
But it doesn't seem to do that.  Or at least it doesn't print any
messages.

Further comments/questions/requests below...


Anyway, here is what gets reported on my x1c3:

cpu0: Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz, 2494.66 MHz, 06-3d-04

tsc_timecounter_init: TSC skew=0 observed drift=0
tsc_timecounter_init: TSC skew=-344 observed drift=0
tsc_timecounter_init: TSC skew=-8 observed drift=0
tsc_timecounter_init: TSC skew=-26 observed drift=0

> Index: arch/amd64/amd64/acpi_machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
> retrieving revision 1.86
> diff -u -p -u -p -r1.86 acpi_machdep.c
> --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 -0000 1.86
> +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 -0000
> @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa;
>  
>  extern int acpi_savecpu(void) __returns_twice;
>  
> +extern int64_t tsc_drift_observed;
> +
>  #define ACPI_BIOS_RSDP_WINDOW_BASE        0xe0000
>  #define ACPI_BIOS_RSDP_WINDOW_SIZE        0x20000
>  
> @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc)
>  {
>   fpuinit(&cpu_info_primary);
>  
> + cpu_info_primary.cpu_cc_skew = 0; /* futile */

So you'll drop it from the final version?

> + tsc_drift_observed = 0; /* reset tsc drift on resume */

What is the point of this?  Do you think that after a suspend/resume
cycle the TSCs are suddenly not drifting anymore?

>   cpu_init(&cpu_info_primary);
>   cpu_ucode_apply(&cpu_info_primary);
>  
> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 5 Aug 2019 13:54:34 -0000
> @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (cold && !CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +812,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and do
> + * twice (in tsc_sync_bp) to minimize possible cache effects.

Do what twice?

> + * Disable interrupts to try and rule out any external
> + * interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
>   }
>  
>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> @@ -852,6 +868,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else if (cold) {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->cpu_cc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->cpu_cc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->cpu_cc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +917,14 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + ci->cpu_cc_skew = 0; /* reset on resume */
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 5 Aug 2019 13:54:34 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +int64_t tsc_drift_max = 250; /* max cycles */
> +int64_t tsc_drift_observed;
> +bool tsc_good;

We don't use bool in the kernel.  And this variable is probably better
named...

> +
> +volatile int64_t tsc_sync_val;
> +volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc();
> + return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
>      !(ci->ci_flags & CPUF_INVAR_TSC))
>   return;
>  
>   tsc_frequency = tsc_freq_cpuid(ci);
>   tsc_is_invariant = 1;
>  
> + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +

... something like tsc_is_running, since that is what you seem to be
testing here.

>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> - tc_init(&tsc_timecounter);
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -100;
> + tsc_is_invariant = 0;
> + }
> +
> + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> +    (long long)ci->cpu_cc_skew, (long long)tsc_drift_observed);
> +
> + if (ci->ci_flags & CPUF_PRIMARY)
> + tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> + if (tsc_good)
> + return rdmsr(MSR_TSC);
> + else
> + return (rdtsc() + ci->cpu_cc_skew);

Why are you adding the skew here?

> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> + panic("tsc_sync_bp: 1");
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = cpu_counter_serializing(ci) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> + membar_consumer();
> + bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci)
> + CPU_BUSY_CYCLE();
> + if (tsc_sync_cpu != NULL)
> + panic("tsc_sync_bp: 2");
> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->cpu_cc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> + membar_consumer();
> + tsc = (cpu_counter_serializing(ci) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> + panic("tsc_sync_ap");
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 5 Aug 2019 13:54:34 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t cpu_cc_skew; /* counter skew vs cpu0 */

Rename this to ci_tsc_skew?

>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */

tab vs. spaces

>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.9
> diff -u -p -u -p -r1.9 cpuvar.h
> --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
> +++ arch/amd64/include/cpuvar.h 5 Aug 2019 13:54:34 -0000
> @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
>  void cpu_init_first(void);
>  void cpu_adjust_tsc_freq(uint64_t (*)());
>  
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
>  #endif
>

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mark Kettenis
In reply to this post by Paul Irofti-4
> Date: Mon, 5 Aug 2019 16:58:27 +0300
> From: Paul Irofti <[hidden email]>
>
> On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote:
> > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > > > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > > > From: Paul Irofti <[hidden email]>
> > > >
> > > > Hi,
> > > >
> > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > > > clocks across cores.
> > > >
> > > > CPU0 is the reference clock and all others are skewed. During CPU
> > > > initialization the clocks synchronize by keeping a registry of each CPU
> > > > clock skewness and adapting the TSC read routine accordingly.
> > > >
> > > > I choose this implementation over what FreeBSD is doing (which is just
> > > > copying Linux really), because it is clean and elegant.
> > > >
> > > > I would love to hear reports from machines that were broken by this.
> > > > Mine, which never exhibited the problem in the first place, run just
> > > > fine with the following diff. In fact I am writting this message on one
> > > > such machine.
> > > >
> > > > Also constructive comments are more than welcomed!
> > > >
> > > > Notes:
> > > >
> > > > - cpu_counter_serializing() could probably have a better name
> > > >   (tsc _read for example)
> > > > - the PAUSE instruction is probably not needed
> > > > - acpi(4) suspend and resume bits are left out on purpose, but should
> > > >   be trivial to add once the current diff settles
> > > >
> > > > Paul Irofti
> > >
> > > I don't think we want to introduce a <machine/tsc.h> header file.
> > >
> > > The code suffers from some NetBSD-isms, so that'll need to be fixed.
> > > I pointed some of them out below.
> > >
> > > Also, how accurate is your skew detection?  What skew is detected on a
> > > machine that (supposedly) has the TSCs in sync?  The result will be
> > > that you actually slightly desync the counters on different CPUs.
> > >
> > > I think Linux uses the TSC_ADJUST MSR and compares its value across
> > > cores.  If the skew is small and the TSC_ADJUST values are the same
> > > across cores it skips the TSC adjustments.
> >
> > Hi,
> >
> > Here is an updated diff with a few bugs eliminated from the previous and
> > with most of the concerns I got in private and from Mark fixed.
> >
> > I will do the TSC_ADJUST_MSR dance in another iteration if the current
> > incarnation turns out to be correct for machines suffering from TSCs not
> > in sync.
> >
> > The thing I am mostly worried about now is in the following sum
> >
> >  uint
> >  tsc_get_timecount(struct timecounter *tc)
> >  {
> > return rdtsc() + curcpu()->cpu_cc_skew;
> >  }
> >  
> > can one term be executed on one CPU and the other on another? Is there a
> > way to protect this from happening other than locking?
> >
> > I see NetBSD is checking for a change in the number of context switches
> > of the current process.
> >
> > My plan is to have a fix in the tree before 6.6 is released, so I would
> > love to hear your thoughts and reports on this.
> >
> > Thanks,
> > Paul
>
> Hi,
>
> Here is a third version of the TSC diff that also take into
> consideration the suspend-resume path which was ignored by the previous
> thus rendering resume broken.

Also...

> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 5 Aug 2019 13:54:34 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +int64_t tsc_drift_max = 250; /* max cycles */
> +int64_t tsc_drift_observed;
> +bool tsc_good;
> +
> +volatile int64_t tsc_sync_val;
> +volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc();
> + return rdtsc() + curcpu()->cpu_cc_skew;
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
>      !(ci->ci_flags & CPUF_INVAR_TSC))
>   return;
>  
>   tsc_frequency = tsc_freq_cpuid(ci);
>   tsc_is_invariant = 1;
>  
> + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> - tc_init(&tsc_timecounter);
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -100;
> + tsc_is_invariant = 0;
> + }

How is this ever going to knock out the tsc timecounter on a system
with less than 10 cores?

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mike Larkin-2
In reply to this post by Mark Kettenis
On Tue, Aug 06, 2019 at 12:38:51AM +0200, Mark Kettenis wrote:

> > Date: Mon, 5 Aug 2019 16:58:27 +0300
> > From: Paul Irofti <[hidden email]>
> >
> > On Fri, Aug 02, 2019 at 01:29:37PM +0300, Paul Irofti wrote:
> > > On Mon, Jul 01, 2019 at 10:32:51AM +0200, Mark Kettenis wrote:
> > > > > Date: Thu, 27 Jun 2019 15:08:00 +0300
> > > > > From: Paul Irofti <[hidden email]>
> > > > >
> > > > > Hi,
> > > > >
> > > > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> > > > > clocks across cores.
> > > > >
> > > > > CPU0 is the reference clock and all others are skewed. During CPU
> > > > > initialization the clocks synchronize by keeping a registry of each CPU
> > > > > clock skewness and adapting the TSC read routine accordingly.
> > > > >
> > > > > I choose this implementation over what FreeBSD is doing (which is just
> > > > > copying Linux really), because it is clean and elegant.
> > > > >
> > > > > I would love to hear reports from machines that were broken by this.
> > > > > Mine, which never exhibited the problem in the first place, run just
> > > > > fine with the following diff. In fact I am writting this message on one
> > > > > such machine.
> > > > >
> > > > > Also constructive comments are more than welcomed!
> > > > >
> > > > > Notes:
> > > > >
> > > > > - cpu_counter_serializing() could probably have a better name
> > > > >   (tsc _read for example)
> > > > > - the PAUSE instruction is probably not needed
> > > > > - acpi(4) suspend and resume bits are left out on purpose, but should
> > > > >   be trivial to add once the current diff settles
> > > > >
> > > > > Paul Irofti
> > > >
> > > > I don't think we want to introduce a <machine/tsc.h> header file.
> > > >
> > > > The code suffers from some NetBSD-isms, so that'll need to be fixed.
> > > > I pointed some of them out below.
> > > >
> > > > Also, how accurate is your skew detection?  What skew is detected on a
> > > > machine that (supposedly) has the TSCs in sync?  The result will be
> > > > that you actually slightly desync the counters on different CPUs.
> > > >
> > > > I think Linux uses the TSC_ADJUST MSR and compares its value across
> > > > cores.  If the skew is small and the TSC_ADJUST values are the same
> > > > across cores it skips the TSC adjustments.
> > >
> > > Hi,
> > >
> > > Here is an updated diff with a few bugs eliminated from the previous and
> > > with most of the concerns I got in private and from Mark fixed.
> > >
> > > I will do the TSC_ADJUST_MSR dance in another iteration if the current
> > > incarnation turns out to be correct for machines suffering from TSCs not
> > > in sync.
> > >
> > > The thing I am mostly worried about now is in the following sum
> > >
> > >  uint
> > >  tsc_get_timecount(struct timecounter *tc)
> > >  {
> > > return rdtsc() + curcpu()->cpu_cc_skew;
> > >  }
> > >  
> > > can one term be executed on one CPU and the other on another? Is there a
> > > way to protect this from happening other than locking?
> > >
> > > I see NetBSD is checking for a change in the number of context switches
> > > of the current process.
> > >
> > > My plan is to have a fix in the tree before 6.6 is released, so I would
> > > love to hear your thoughts and reports on this.
> > >
> > > Thanks,
> > > Paul
> >
> > Hi,
> >
> > Here is a third version of the TSC diff that also take into
> > consideration the suspend-resume path which was ignored by the previous
> > thus rendering resume broken.
>
> Hmm, wat is this diff supposed to do upon suspend-resume?  I'm fairly
> certain that you'll need to recalibrate the skew in the resume path.
> But it doesn't seem to do that.  Or at least it doesn't print any
> messages.

I agree with kettenis. You definitely will need to re-calculate and apply
the skews on resume.

-ml

>
> Further comments/questions/requests below...
>
>
> Anyway, here is what gets reported on my x1c3:
>
> cpu0: Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz, 2494.66 MHz, 06-3d-04
>
> tsc_timecounter_init: TSC skew=0 observed drift=0
> tsc_timecounter_init: TSC skew=-344 observed drift=0
> tsc_timecounter_init: TSC skew=-8 observed drift=0
> tsc_timecounter_init: TSC skew=-26 observed drift=0
>
> > Index: arch/amd64/amd64/acpi_machdep.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
> > retrieving revision 1.86
> > diff -u -p -u -p -r1.86 acpi_machdep.c
> > --- arch/amd64/amd64/acpi_machdep.c 23 Oct 2018 17:51:32 -0000 1.86
> > +++ arch/amd64/amd64/acpi_machdep.c 5 Aug 2019 13:54:33 -0000
> > @@ -60,6 +60,8 @@ extern paddr_t tramp_pdirpa;
> >  
> >  extern int acpi_savecpu(void) __returns_twice;
> >  
> > +extern int64_t tsc_drift_observed;
> > +
> >  #define ACPI_BIOS_RSDP_WINDOW_BASE        0xe0000
> >  #define ACPI_BIOS_RSDP_WINDOW_SIZE        0x20000
> >  
> > @@ -481,6 +483,8 @@ acpi_resume_cpu(struct acpi_softc *sc)
> >  {
> >   fpuinit(&cpu_info_primary);
> >  
> > + cpu_info_primary.cpu_cc_skew = 0; /* futile */
>
> So you'll drop it from the final version?
>
> > + tsc_drift_observed = 0; /* reset tsc drift on resume */
>
> What is the point of this?  Do you think that after a suspend/resume
> cycle the TSCs are suddenly not drifting anymore?
>
> >   cpu_init(&cpu_info_primary);
> >   cpu_ucode_apply(&cpu_info_primary);
> >  
> > Index: arch/amd64/amd64/cpu.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> > retrieving revision 1.137
> > diff -u -p -u -p -r1.137 cpu.c
> > --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> > +++ arch/amd64/amd64/cpu.c 5 Aug 2019 13:54:34 -0000
> > @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
> >   cr4 = rcr4();
> >   lcr4(cr4 & ~CR4_PGE);
> >   lcr4(cr4);
> > +
> > + /* Synchronize TSC */
> > + if (cold && !CPU_IS_PRIMARY(ci))
> > +      tsc_sync_ap(ci);
> >  #endif
> >  }
> >  
> > @@ -808,6 +812,7 @@ void
> >  cpu_start_secondary(struct cpu_info *ci)
> >  {
> >   int i;
> > + u_long s;
> >  
> >   ci->ci_flags |= CPUF_AP;
> >  
> > @@ -828,6 +833,17 @@ cpu_start_secondary(struct cpu_info *ci)
> >   printf("dropping into debugger; continue from here to resume boot\n");
> >   db_enter();
> >  #endif
> > + } else {
> > + /*
> > + * Synchronize time stamp counters. Invalidate cache and do
> > + * twice (in tsc_sync_bp) to minimize possible cache effects.
>
> Do what twice?
>
> > + * Disable interrupts to try and rule out any external
> > + * interference.
> > + */
> > + s = intr_disable();
> > + wbinvd();
> > + tsc_sync_bp(ci);
> > + intr_restore(s);
> >   }
> >  
> >   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> > @@ -852,6 +868,8 @@ void
> >  cpu_boot_secondary(struct cpu_info *ci)
> >  {
> >   int i;
> > + int64_t drift;
> > + u_long s;
> >  
> >   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
> >  
> > @@ -864,6 +882,17 @@ cpu_boot_secondary(struct cpu_info *ci)
> >   printf("dropping into debugger; continue from here to resume boot\n");
> >   db_enter();
> >  #endif
> > + } else if (cold) {
> > + /* Synchronize TSC again, check for drift. */
> > + drift = ci->cpu_cc_skew;
> > + s = intr_disable();
> > + wbinvd();
> > + tsc_sync_bp(ci);
> > + intr_restore(s);
> > + drift -= ci->cpu_cc_skew;
> > + printf("TSC skew=%lld drift=%lld\n",
> > +    (long long)ci->cpu_cc_skew, (long long)drift);
> > + tsc_sync_drift(drift);
> >   }
> >  }
> >  
> > @@ -888,7 +917,14 @@ cpu_hatch(void *v)
> >   panic("%s: already running!?", ci->ci_dev->dv_xname);
> >  #endif
> >  
> > + /*
> > + * Synchronize the TSC for the first time. Note that interrupts are
> > + * off at this point.
> > + */
> > + wbinvd();
> >   ci->ci_flags |= CPUF_PRESENT;
> > + ci->cpu_cc_skew = 0; /* reset on resume */
> > + tsc_sync_ap(ci);
> >  
> >   lapic_enable();
> >   lapic_startclock();
> > Index: arch/amd64/amd64/tsc.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> > retrieving revision 1.11
> > diff -u -p -u -p -r1.11 tsc.c
> > --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> > +++ arch/amd64/amd64/tsc.c 5 Aug 2019 13:54:34 -0000
> > @@ -1,8 +1,10 @@
> >  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
> >  /*
> > + * Copyright (c) 2008 The NetBSD Foundation, Inc.
> >   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
> >   * Copyright (c) 2017 Adam Steen <[hidden email]>
> >   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> > + * Copyright (c) 2019 Paul Irofti <[hidden email]>
> >   *
> >   * Permission to use, copy, modify, and distribute this software for any
> >   * purpose with or without fee is hereby granted, provided that the above
> > @@ -20,6 +22,7 @@
> >  #include <sys/param.h>
> >  #include <sys/systm.h>
> >  #include <sys/timetc.h>
> > +#include <sys/atomic.h>
> >  
> >  #include <machine/cpu.h>
> >  #include <machine/cpufunc.h>
> > @@ -33,6 +36,13 @@ int tsc_recalibrate;
> >  uint64_t tsc_frequency;
> >  int tsc_is_invariant;
> >  
> > +int64_t tsc_drift_max = 250; /* max cycles */
> > +int64_t tsc_drift_observed;
> > +bool tsc_good;
>
> We don't use bool in the kernel.  And this variable is probably better
> named...
>
> > +
> > +volatile int64_t tsc_sync_val;
> > +volatile struct cpu_info *tsc_sync_cpu;
> > +
> >  uint tsc_get_timecount(struct timecounter *tc);
> >  
> >  struct timecounter tsc_timecounter = {
> > @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
> >   return;
> >   tsc_frequency = freq;
> >   tsc_timecounter.tc_frequency = freq;
> > -#ifndef MULTIPROCESSOR
> >   if (tsc_is_invariant)
> >   tsc_timecounter.tc_quality = 2000;
> > -#endif
> >  }
> >  
> >  void
> > @@ -194,26 +202,25 @@ cpu_recalibrate_tsc(struct timecounter *
> >  uint
> >  tsc_get_timecount(struct timecounter *tc)
> >  {
> > - return rdtsc();
> > + return rdtsc() + curcpu()->cpu_cc_skew;
> >  }
> >  
> >  void
> >  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
> >  {
> > - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> > -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> > + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
> >      !(ci->ci_flags & CPUF_INVAR_TSC))
> >   return;
> >  
> >   tsc_frequency = tsc_freq_cpuid(ci);
> >   tsc_is_invariant = 1;
> >  
> > + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> > +
>
> ... something like tsc_is_running, since that is what you seem to be
> testing here.
>
> >   /* Newer CPUs don't require recalibration */
> >   if (tsc_frequency > 0) {
> >   tsc_timecounter.tc_frequency = tsc_frequency;
> > -#ifndef MULTIPROCESSOR
> >   tsc_timecounter.tc_quality = 2000;
> > -#endif
> >   } else {
> >   tsc_recalibrate = 1;
> >   tsc_frequency = cpufreq;
> > @@ -221,5 +228,112 @@ tsc_timecounter_init(struct cpu_info *ci
> >   calibrate_tsc_freq();
> >   }
> >  
> > - tc_init(&tsc_timecounter);
> > + if (tsc_drift_observed > tsc_drift_max) {
> > + printf("ERROR: %lld cycle TSC drift observed\n",
> > +    (long long)tsc_drift_observed);
> > + tsc_timecounter.tc_quality = -100;
> > + tsc_is_invariant = 0;
> > + }
> > +
> > + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> > +    (long long)ci->cpu_cc_skew, (long long)tsc_drift_observed);
> > +
> > + if (ci->ci_flags & CPUF_PRIMARY)
> > + tc_init(&tsc_timecounter);
> > +}
> > +
> > +static uint64_t
> > +cpu_counter_serializing(struct cpu_info *ci)
> > +{
> > + if (tsc_good)
> > + return rdmsr(MSR_TSC);
> > + else
> > + return (rdtsc() + ci->cpu_cc_skew);
>
> Why are you adding the skew here?
>
> > +}
> > +
> > +/*
> > + * Record drift (in clock cycles).  Called during AP startup.
> > + */
> > +void
> > +tsc_sync_drift(int64_t drift)
> > +{
> > + if (drift < 0)
> > + drift = -drift;
> > + if (drift > tsc_drift_observed)
> > + tsc_drift_observed = drift;
> > +}
> > +
> > +/*
> > + * Called during startup of APs, by the boot processor.  Interrupts
> > + * are disabled on entry.
> > + */
> > +static void
> > +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> > +{
> > + uint64_t bptsc;
> > +
> > + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> > + panic("tsc_sync_bp: 1");
> > +
> > + /* Flag it and read our TSC. */
> > + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> > + bptsc = cpu_counter_serializing(ci) >> 1;
> > +
> > + /* Wait for remote to complete, and read ours again. */
> > + while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> > + membar_consumer();
> > + bptsc += (cpu_counter_serializing(ci) >> 1);
> > +
> > + /* Wait for the results to come in. */
> > + while (tsc_sync_cpu == ci)
> > + CPU_BUSY_CYCLE();
> > + if (tsc_sync_cpu != NULL)
> > + panic("tsc_sync_bp: 2");
> > +
> > + *bptscp = bptsc;
> > + *aptscp = tsc_sync_val;
> > +}
> > +
> > +void
> > +tsc_sync_bp(struct cpu_info *ci)
> > +{
> > + uint64_t bptsc, aptsc;
> > +
> > + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> > + tsc_read_bp(ci, &bptsc, &aptsc);
> > +
> > + /* Compute final value to adjust for skew. */
> > + ci->cpu_cc_skew = bptsc - aptsc;
> > +}
> > +
> > +/*
> > + * Called during startup of AP, by the AP itself.  Interrupts are
> > + * disabled on entry.
> > + */
> > +static void
> > +tsc_post_ap(struct cpu_info *ci)
> > +{
> > + uint64_t tsc;
> > +
> > + /* Wait for go-ahead from primary. */
> > + while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> > + membar_consumer();
> > + tsc = (cpu_counter_serializing(ci) >> 1);
> > +
> > + /* Instruct primary to read its counter. */
> > + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> > + tsc += (cpu_counter_serializing(ci) >> 1);
> > +
> > + /* Post result.  Ensure the whole value goes out atomically. */
> > + (void)atomic_swap_64(&tsc_sync_val, tsc);
> > +
> > + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> > + panic("tsc_sync_ap");
> > +}
> > +
> > +void
> > +tsc_sync_ap(struct cpu_info *ci)
> > +{
> > + tsc_post_ap(ci);
> > + tsc_post_ap(ci);
> >  }
> > Index: arch/amd64/include/cpu.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> > retrieving revision 1.131
> > diff -u -p -u -p -r1.131 cpu.h
> > --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> > +++ arch/amd64/include/cpu.h 5 Aug 2019 13:54:34 -0000
> > @@ -206,6 +206,8 @@ struct cpu_info {
> >   union vmm_cpu_cap ci_vmm_cap;
> >   paddr_t ci_vmxon_region_pa;
> >   struct vmxon_region *ci_vmxon_region;
> > +
> > + int64_t cpu_cc_skew; /* counter skew vs cpu0 */
>
> Rename this to ci_tsc_skew?
>
> >  };
> >  
> >  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> > @@ -221,6 +223,7 @@ struct cpu_info {
> >  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
> >  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
> >  
> > +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>
> tab vs. spaces
>
> >  #define CPUF_PRESENT 0x1000 /* CPU is present */
> >  #define CPUF_RUNNING 0x2000 /* CPU is running */
> >  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> > Index: arch/amd64/include/cpuvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> > retrieving revision 1.9
> > diff -u -p -u -p -r1.9 cpuvar.h
> > --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
> > +++ arch/amd64/include/cpuvar.h 5 Aug 2019 13:54:34 -0000
> > @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
> >  void cpu_init_first(void);
> >  void cpu_adjust_tsc_freq(uint64_t (*)());
> >  
> > +void tsc_sync_drift(int64_t);
> > +void tsc_sync_bp(struct cpu_info *);
> > +void tsc_sync_ap(struct cpu_info *);
> > +
> >  #endif
> >
>

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Paul Irofti-4
In reply to this post by Paul Irofti-4
Hi,

Here is a fourth diff addressing all the issues so far, that have been
mainly pointed out by kettenis@, thanks!

Changes:
        - stop resetting the observed drift as it does not affect tsc
          re-initialization on resume, thus removing all changes from
          acpi_machdep.c
        - fix comment and put a temporary pretty printf of resume
        - rename cpu_cc_skew to ci_tsc_skew
        - remove unfinished code using MSR_TSC for synchronization (to
          be added later on together with the missing IA32_TSC_ADJUST
          wrmsr commands)

All other technical issues were discussed and settled in private and
require no change to the former diff.


For testing you can also use the regress test after booting with tsc as
default clock and waiting for an hour or so to let the clocks go wild:

  # cd /usr/src/regress/sys/kern/clock_gettime
  # make regress

There is another test program flying around the mailing lists I guess,
but I could not locate it now so if someone is kind enough to reply with
the code, that would be lovely!

Paul


Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
+++ arch/amd64/amd64/cpu.c 6 Aug 2019 20:19:27 -0000
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
  cr4 = rcr4();
  lcr4(cr4 & ~CR4_PGE);
  lcr4(cr4);
+
+ /* Synchronize TSC */
+ if (cold && !CPU_IS_PRIMARY(ci))
+      tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
  int i;
+ u_long s;
 
  ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /*
+ * Synchronize time stamp counters. Invalidate cache and
+ * synchronize twice (in tsc_sync_bp) to minimize possible
+ * cache effects. Disable interrupts to try and rule out any
+ * external interference.
+ */
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
  }
 
  if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +869,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
  int i;
+ int64_t drift;
+ u_long s;
 
  atomic_setbits_int(&ci->ci_flags, CPUF_GO);
 
@@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else if (cold) {
+ /* Synchronize TSC again, check for drift. */
+ drift = ci->ci_tsc_skew;
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ drift -= ci->ci_tsc_skew;
+ printf("TSC skew=%lld drift=%lld\n",
+    (long long)ci->ci_tsc_skew, (long long)drift);
+ tsc_sync_drift(drift);
  }
 }
 
@@ -888,7 +918,14 @@ cpu_hatch(void *v)
  panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+ /*
+ * Synchronize the TSC for the first time. Note that interrupts are
+ * off at this point.
+ */
+ wbinvd();
  ci->ci_flags |= CPUF_PRESENT;
+ ci->ci_tsc_skew = 0; /* reset on resume */
+ tsc_sync_ap(ci);
 
  lapic_enable();
  lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
+++ arch/amd64/amd64/tsc.c 6 Aug 2019 20:19:27 -0000
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
  * Copyright (c) 2017 Adam Steen <[hidden email]>
  * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
+ * Copyright (c) 2019 Paul Irofti <[hidden email]>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
+#include <sys/atomic.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
@@ -33,6 +36,12 @@ int tsc_recalibrate;
 uint64_t tsc_frequency;
 int tsc_is_invariant;
 
+int64_t tsc_drift_max = 250; /* max cycles */
+int64_t tsc_drift_observed;
+
+volatile int64_t tsc_sync_val;
+volatile struct cpu_info *tsc_sync_cpu;
+
 uint tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +181,8 @@ calibrate_tsc_freq(void)
  return;
  tsc_frequency = freq;
  tsc_timecounter.tc_frequency = freq;
-#ifndef MULTIPROCESSOR
  if (tsc_is_invariant)
  tsc_timecounter.tc_quality = 2000;
-#endif
 }
 
 void
@@ -194,14 +201,13 @@ cpu_recalibrate_tsc(struct timecounter *
 uint
 tsc_get_timecount(struct timecounter *tc)
 {
- return rdtsc();
+ return rdtsc() + curcpu()->ci_tsc_skew;
 }
 
 void
 tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
 {
- if (!(ci->ci_flags & CPUF_PRIMARY) ||
-    !(ci->ci_flags & CPUF_CONST_TSC) ||
+ if (!(ci->ci_flags & CPUF_CONST_TSC) ||
     !(ci->ci_flags & CPUF_INVAR_TSC))
  return;
 
@@ -211,9 +217,7 @@ tsc_timecounter_init(struct cpu_info *ci
  /* Newer CPUs don't require recalibration */
  if (tsc_frequency > 0) {
  tsc_timecounter.tc_frequency = tsc_frequency;
-#ifndef MULTIPROCESSOR
  tsc_timecounter.tc_quality = 2000;
-#endif
  } else {
  tsc_recalibrate = 1;
  tsc_frequency = cpufreq;
@@ -221,5 +225,109 @@ tsc_timecounter_init(struct cpu_info *ci
  calibrate_tsc_freq();
  }
 
- tc_init(&tsc_timecounter);
+ if (tsc_drift_observed > tsc_drift_max) {
+ printf("ERROR: %lld cycle TSC drift observed\n",
+    (long long)tsc_drift_observed);
+ tsc_timecounter.tc_quality = -1000;
+ tsc_is_invariant = 0;
+ }
+
+ printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
+    (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
+
+ if (ci->ci_flags & CPUF_PRIMARY)
+ tc_init(&tsc_timecounter);
+}
+
+static uint64_t
+cpu_counter_serializing(struct cpu_info *ci)
+{
+ return (rdtsc() + ci->ci_tsc_skew);
+}
+
+/*
+ * Record drift (in clock cycles).  Called during AP startup.
+ */
+void
+tsc_sync_drift(int64_t drift)
+{
+ if (drift < 0)
+ drift = -drift;
+ if (drift > tsc_drift_observed)
+ tsc_drift_observed = drift;
+}
+
+/*
+ * Called during startup of APs, by the boot processor.  Interrupts
+ * are disabled on entry.
+ */
+static void
+tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
+{
+ uint64_t bptsc;
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
+ panic("tsc_sync_bp: 1");
+
+ /* Flag it and read our TSC. */
+ atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ bptsc = cpu_counter_serializing(ci) >> 1;
+
+ /* Wait for remote to complete, and read ours again. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
+ membar_consumer();
+ bptsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Wait for the results to come in. */
+ while (tsc_sync_cpu == ci)
+ CPU_BUSY_CYCLE();
+ if (tsc_sync_cpu != NULL)
+ panic("tsc_sync_bp: 2");
+
+ *bptscp = bptsc;
+ *aptscp = tsc_sync_val;
+}
+
+void
+tsc_sync_bp(struct cpu_info *ci)
+{
+ uint64_t bptsc, aptsc;
+
+ tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
+ tsc_read_bp(ci, &bptsc, &aptsc);
+
+ /* Compute final value to adjust for skew. */
+ ci->ci_tsc_skew = bptsc - aptsc;
+}
+
+/*
+ * Called during startup of AP, by the AP itself.  Interrupts are
+ * disabled on entry.
+ */
+static void
+tsc_post_ap(struct cpu_info *ci)
+{
+ uint64_t tsc;
+
+ /* Wait for go-ahead from primary. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
+ membar_consumer();
+ tsc = (cpu_counter_serializing(ci) >> 1);
+
+ /* Instruct primary to read its counter. */
+ atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ tsc += (cpu_counter_serializing(ci) >> 1);
+
+ /* Post result.  Ensure the whole value goes out atomically. */
+ (void)atomic_swap_64(&tsc_sync_val, tsc);
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
+ panic("tsc_sync_ap");
+}
+
+void
+tsc_sync_ap(struct cpu_info *ci)
+{
+ tsc_post_ap(ci);
+ tsc_post_ap(ci);
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.131
diff -u -p -u -p -r1.131 cpu.h
--- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
+++ arch/amd64/include/cpu.h 6 Aug 2019 20:19:27 -0000
@@ -206,6 +206,8 @@ struct cpu_info {
  union vmm_cpu_cap ci_vmm_cap;
  paddr_t ci_vmxon_region_pa;
  struct vmxon_region *ci_vmxon_region;
+
+ int64_t ci_tsc_skew; /* counter skew vs cpu0 */
 };
 
 #define CPUF_BSP 0x0001 /* CPU is the original BSP */
@@ -221,6 +223,7 @@ struct cpu_info {
 #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
 #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
 
+#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
 #define CPUF_PRESENT 0x1000 /* CPU is present */
 #define CPUF_RUNNING 0x2000 /* CPU is running */
 #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
Index: arch/amd64/include/cpuvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 cpuvar.h
--- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
+++ arch/amd64/include/cpuvar.h 6 Aug 2019 20:19:27 -0000
@@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
 void cpu_init_first(void);
 void cpu_adjust_tsc_freq(uint64_t (*)());
 
+void tsc_sync_drift(int64_t);
+void tsc_sync_bp(struct cpu_info *);
+void tsc_sync_ap(struct cpu_info *);
+
 #endif

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Theo Buehler-3
> There is another test program flying around the mailing lists I guess,
> but I could not locate it now so if someone is kind enough to reply with
> the code, that would be lovely!

I think this is the program you're looking for:
https://marc.info/?l=openbsd-tech&m=155978126517159&w=2

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Theo Buehler-3
/*
 * Version of https://marc.info/?l=openbsd-tech&m=155978126517159&w=2
 * without non-breaking spaces, thus appeasing -Wunicode-whitespace.
 */
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>

int
main(void)
{
        int r;
        struct timespec tp1, tp2, tout;

        tout.tv_sec = 0;
        tout.tv_nsec = 100000;

        for (;;) {
                r = clock_gettime(CLOCK_MONOTONIC, &tp1);
                if (r == -1) {
                        perror("clock_gettime");
                        exit(-1);
                }

                nanosleep(&tout, NULL);

                r = clock_gettime(CLOCK_MONOTONIC, &tp2);
                if (r == -1) {
                        perror("clock_gettime");
                        exit(-1);
                }
       
                // tp1 should never be larger than tp2
                r = timespeccmp(&tp1, &tp2, >);
                if (r == 1) {
                        printf("timespeccmp failed\n");
                        printf("tp1 s:%lld n:%ld\n", tp1.tv_sec, tp1.tv_nsec);
                        printf("tp2 s:%lld n:%ld\n", tp2.tv_sec, tp2.tv_nsec);
                        exit(-1);
                }
        }

        return 0;
}

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Claudio Jeker
In reply to this post by Paul Irofti-4
On Tue, Aug 06, 2019 at 11:29:30PM +0300, Paul Irofti wrote:

> Hi,
>
> Here is a fourth diff addressing all the issues so far, that have been
> mainly pointed out by kettenis@, thanks!
>
> Changes:
> - stop resetting the observed drift as it does not affect tsc
>  re-initialization on resume, thus removing all changes from
>  acpi_machdep.c
> - fix comment and put a temporary pretty printf of resume
> - rename cpu_cc_skew to ci_tsc_skew
> - remove unfinished code using MSR_TSC for synchronization (to
>  be added later on together with the missing IA32_TSC_ADJUST
>  wrmsr commands)
>
> All other technical issues were discussed and settled in private and
> require no change to the former diff.
>
>
> For testing you can also use the regress test after booting with tsc as
> default clock and waiting for an hour or so to let the clocks go wild:
>
>   # cd /usr/src/regress/sys/kern/clock_gettime
>   # make regress
>
> There is another test program flying around the mailing lists I guess,
> but I could not locate it now so if someone is kind enough to reply with
> the code, that would be lovely!
>

Works fine on my AMD Phenom(tm) II X6 1055T system. I remember some TSC
issues with this box but running the test code posted by tb@ never
triggered even during a make build.

--
:wq Claudio

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mark Kettenis
In reply to this post by Paul Irofti-4
> Date: Tue, 6 Aug 2019 23:29:30 +0300
> From: Paul Irofti <[hidden email]>
>
> Hi,
>
> Here is a fourth diff addressing all the issues so far, that have been
> mainly pointed out by kettenis@, thanks!
>
> Changes:
> - stop resetting the observed drift as it does not affect tsc
>  re-initialization on resume, thus removing all changes from
>  acpi_machdep.c
> - fix comment and put a temporary pretty printf of resume
> - rename cpu_cc_skew to ci_tsc_skew
> - remove unfinished code using MSR_TSC for synchronization (to
>  be added later on together with the missing IA32_TSC_ADJUST
>  wrmsr commands)
>
> All other technical issues were discussed and settled in private and
> require no change to the former diff.
>
>
> For testing you can also use the regress test after booting with tsc as
> default clock and waiting for an hour or so to let the clocks go wild:
>
>   # cd /usr/src/regress/sys/kern/clock_gettime
>   # make regress
>
> There is another test program flying around the mailing lists I guess,
> but I could not locate it now so if someone is kind enough to reply with
> the code, that would be lovely!
>
> Paul

Hi Paul,

Still some small questions/issues now that the MSR thing has been
cleared up.

With those issues fixed, this is ok kettenis@

> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 6 Aug 2019 20:19:27 -0000
> @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (cold && !CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +812,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and
> + * synchronize twice (in tsc_sync_bp) to minimize possible
> + * cache effects. Disable interrupts to try and rule out any
> + * external interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
>   }
>  
>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> @@ -852,6 +869,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else if (cold) {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->ci_tsc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->ci_tsc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->ci_tsc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +918,14 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + ci->ci_tsc_skew = 0; /* reset on resume */
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 6 Aug 2019 20:19:27 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,12 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +int64_t tsc_drift_max = 250; /* max cycles */

There is no real reason why this is a variable isn't it?  Could be a
#define which would save some space.

> +int64_t tsc_drift_observed;
> +
> +volatile int64_t tsc_sync_val;
> +volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +181,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -194,14 +201,13 @@ cpu_recalibrate_tsc(struct timecounter *
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc();
> + return rdtsc() + curcpu()->ci_tsc_skew;
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
>      !(ci->ci_flags & CPUF_INVAR_TSC))
>   return;
>  
> @@ -211,9 +217,7 @@ tsc_timecounter_init(struct cpu_info *ci
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +225,109 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> - tc_init(&tsc_timecounter);
> + if (tsc_drift_observed > tsc_drift_max) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -1000;
> + tsc_is_invariant = 0;
> + }
> +
> + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> +    (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
> +
> + if (ci->ci_flags & CPUF_PRIMARY)
> + tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> + return (rdtsc() + ci->ci_tsc_skew);
> +}

Please no static in the kernel.  If you want to have this inlined,
make it a "static inline".  Also please use a tsc-prefixed name;
i.e. tsc_get_serializing().  Howver the "serializing" bit of the name
is misleading as the RDTSC instruction is not serializing.

To make it serializing you'll need LFENCE/MFENCE instructions before
and/or after the RDTSC instruction.  Funny thing is that NetBSD, where
this code is coming from doesn't do that.  But they did have that
"tsc_good" codepath that used RDMSR.

In the end, I don't think it matters.  At best it avoids a little bit
of jitter in the skew measurements.  What I'd do is simply remove this
function completely and replace it with calls to rdtsc().  The code
that uses it only cares about differences between TSC readings and
therefore adding the skew makes no difference.  I think that makes it
easier to understand what's going on.  Also means we don't have to
argue about the name ;).

> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void

No static please.

> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> + panic("tsc_sync_bp: 1");
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = cpu_counter_serializing(ci) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> + membar_consumer();
> + bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci)
> + CPU_BUSY_CYCLE();
> + if (tsc_sync_cpu != NULL)
> + panic("tsc_sync_bp: 2");
> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->ci_tsc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void

No static please.

> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> + membar_consumer();
> + tsc = (cpu_counter_serializing(ci) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (cpu_counter_serializing(ci) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> + panic("tsc_sync_ap");
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 6 Aug 2019 20:19:27 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t ci_tsc_skew; /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.9
> diff -u -p -u -p -r1.9 cpuvar.h
> --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
> +++ arch/amd64/include/cpuvar.h 6 Aug 2019 20:19:27 -0000
> @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
>  void cpu_init_first(void);
>  void cpu_adjust_tsc_freq(uint64_t (*)());
>  
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
>  #endif
>
>

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Hrvoje Popovski
In reply to this post by Paul Irofti-4
On 6.8.2019. 22:29, Paul Irofti wrote:

> Hi,
>
> Here is a fourth diff addressing all the issues so far, that have been
> mainly pointed out by kettenis@, thanks!
>
> Changes:
> - stop resetting the observed drift as it does not affect tsc
>  re-initialization on resume, thus removing all changes from
>  acpi_machdep.c
> - fix comment and put a temporary pretty printf of resume
> - rename cpu_cc_skew to ci_tsc_skew
> - remove unfinished code using MSR_TSC for synchronization (to
>  be added later on together with the missing IA32_TSC_ADJUST
>  wrmsr commands)
>
> All other technical issues were discussed and settled in private and
> require no change to the former diff.
>
>
> For testing you can also use the regress test after booting with tsc as
> default clock and waiting for an hour or so to let the clocks go wild:
>
>   # cd /usr/src/regress/sys/kern/clock_gettime
>   # make regress
>
> There is another test program flying around the mailing lists I guess,
> but I could not locate it now so if someone is kind enough to reply with
> the code, that would be lovely!
>
> Paul

Hi,

I applied this diff on Dell R6415 with AMD EPYC 7551P with 32/64 cores,
run regress and test program that tb@ pointed out .. and everything seem
right ..


r6415# sysctl kern.timecounter.hardware
kern.timecounter.hardware=tsc


r6415# dmesg | grep tsc_timecounter_init
tsc_timecounter_init: TSC skew=0 observed drift=0
tsc_timecounter_init: TSC skew=-260 observed drift=0
tsc_timecounter_init: TSC skew=-240 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=120 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
tsc_timecounter_init: TSC skew=-520 observed drift=0
tsc_timecounter_init: TSC skew=-440 observed drift=0
tsc_timecounter_init: TSC skew=-10 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-440 observed drift=0
tsc_timecounter_init: TSC skew=110 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-440 observed drift=0
tsc_timecounter_init: TSC skew=-30 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-430 observed drift=0
tsc_timecounter_init: TSC skew=110 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=-450 observed drift=0
tsc_timecounter_init: TSC skew=20 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=-450 observed drift=0
tsc_timecounter_init: TSC skew=110 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
tsc_timecounter_init: TSC skew=-440 observed drift=0
tsc_timecounter_init: TSC skew=-10 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-450 observed drift=0
tsc_timecounter_init: TSC skew=130 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
tsc_timecounter_init: TSC skew=-430 observed drift=0
tsc_timecounter_init: TSC skew=70 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
tsc_timecounter_init: TSC skew=-510 observed drift=0
tsc_timecounter_init: TSC skew=-430 observed drift=0
tsc_timecounter_init: TSC skew=140 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-430 observed drift=0
tsc_timecounter_init: TSC skew=20 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-500 observed drift=0
tsc_timecounter_init: TSC skew=-430 observed drift=0
tsc_timecounter_init: TSC skew=-380 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-490 observed drift=0
tsc_timecounter_init: TSC skew=-470 observed drift=0
tsc_timecounter_init: TSC skew=0 observed drift=0
tsc_timecounter_init: TSC skew=-460 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
tsc_timecounter_init: TSC skew=-440 observed drift=0
tsc_timecounter_init: TSC skew=130 observed drift=0
tsc_timecounter_init: TSC skew=-450 observed drift=0
tsc_timecounter_init: TSC skew=-510 observed drift=0
tsc_timecounter_init: TSC skew=-480 observed drift=0
r6415# dmesg | grep tsc_timecounter_init | wc -l
      64



OpenBSD 6.5-current (GENERIC.MP) #4: Wed Aug  7 13:45:08 CEST 2019
    [hidden email]:/sys/arch/amd64/compile/GENERIC.MP
real mem = 274525384704 (261807MB)
avail mem = 266195267584 (253863MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.2 @ 0x6eab3000 (67 entries)
bios0: vendor Dell Inc. version "1.9.3" date 06/25/2019
bios0: Dell Inc. PowerEdge R6415
acpi0 at bios0: ACPI 6.0
acpi0: sleep states S0 S5
acpi0: tables DSDT FACP SSDT BERT HEST SSDT SRAT MSCT SLIT CRAT EINJ
SLIC HPET APIC MCFG SSDT IVRS SSDT
acpi0: wakeup devices PC00(S5) PC01(S5) PC02(S5) PC03(S5)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpihpet0 at acpi0: 14318180 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD EPYC 7551P 32-Core Processor, 1996.63 MHz, 17-01-02
cpu0:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, C-substates=1.1, IBE
cpu1 at mainbus0: apid 16 (application processor)
TSC skew=-260
cpu1: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu1:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu1: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-260 observed drift=0
cpu1: smt 0, core 8, package 1
cpu2 at mainbus0: apid 32 (application processor)
TSC skew=-240
cpu2: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu2:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu2: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu2: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-240 observed drift=0
cpu2: smt 0, core 16, package 2
cpu3 at mainbus0: apid 48 (application processor)
TSC skew=-470
cpu3: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu3:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu3: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu3: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu3: smt 0, core 24, package 3
cpu4 at mainbus0: apid 8 (application processor)
TSC skew=120
cpu4: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu4:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu4: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu4: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu4: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=120 observed drift=0
cpu4: smt 0, core 4, package 0
cpu5 at mainbus0: apid 24 (application processor)
TSC skew=-480
cpu5: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu5:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu5: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu5: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu5: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu5: smt 0, core 12, package 1
cpu6 at mainbus0: apid 40 (application processor)
TSC skew=-520
cpu6: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu6:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu6: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu6: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu6: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-520 observed drift=0
cpu6: smt 0, core 20, package 2
cpu7 at mainbus0: apid 56 (application processor)
TSC skew=-440
cpu7: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu7:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu7: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu7: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu7: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-440 observed drift=0
cpu7: smt 0, core 28, package 3
cpu8 at mainbus0: apid 2 (application processor)
TSC skew=-10
cpu8: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu8:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu8: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu8: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu8: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-10 observed drift=0
cpu8: smt 0, core 1, package 0
cpu9 at mainbus0: apid 18 (application processor)
TSC skew=-460
cpu9: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu9:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu9: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu9: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu9: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu9: smt 0, core 9, package 1
cpu10 at mainbus0: apid 34 (application processor)
TSC skew=-490
cpu10: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu10:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu10: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu10: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu10: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu10: smt 0, core 17, package 2
cpu11 at mainbus0: apid 50 (application processor)
TSC skew=-440
cpu11: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu11:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu11: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu11: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu11: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-440 observed drift=0
cpu11: smt 0, core 25, package 3
cpu12 at mainbus0: apid 10 (application processor)
TSC skew=110
cpu12: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu12:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu12: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu12: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu12: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=110 observed drift=0
cpu12: smt 0, core 5, package 0
cpu13 at mainbus0: apid 26 (application processor)
TSC skew=-470
cpu13: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu13:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu13: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu13: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu13: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu13: smt 0, core 13, package 1
cpu14 at mainbus0: apid 42 (application processor)
TSC skew=-490
cpu14: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu14:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu14: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu14: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu14: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu14: smt 0, core 21, package 2
cpu15 at mainbus0: apid 58 (application processor)
TSC skew=-440
cpu15: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu15:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu15: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu15: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu15: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-440 observed drift=0
cpu15: smt 0, core 29, package 3
cpu16 at mainbus0: apid 4 (application processor)
TSC skew=-30
cpu16: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu16:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu16: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu16: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu16: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-30 observed drift=0
cpu16: smt 0, core 2, package 0
cpu17 at mainbus0: apid 20 (application processor)
TSC skew=-470
cpu17: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu17:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu17: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu17: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu17: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu17: smt 0, core 10, package 1
cpu18 at mainbus0: apid 36 (application processor)
TSC skew=-490
cpu18: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu18:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu18: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu18: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu18: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu18: smt 0, core 18, package 2
cpu19 at mainbus0: apid 52 (application processor)
TSC skew=-430
cpu19: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu19:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu19: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu19: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu19: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-430 observed drift=0
cpu19: smt 0, core 26, package 3
cpu20 at mainbus0: apid 12 (application processor)
TSC skew=110
cpu20: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu20:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu20: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu20: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu20: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=110 observed drift=0
cpu20: smt 0, core 6, package 0
cpu21 at mainbus0: apid 28 (application processor)
TSC skew=-480
cpu21: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu21:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu21: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu21: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu21: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu21: smt 0, core 14, package 1
cpu22 at mainbus0: apid 44 (application processor)
TSC skew=-470
cpu22: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu22:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu22: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu22: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu22: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu22: smt 0, core 22, package 2
cpu23 at mainbus0: apid 60 (application processor)
TSC skew=-450
cpu23: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu23:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu23: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu23: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu23: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-450 observed drift=0
cpu23: smt 0, core 30, package 3
cpu24 at mainbus0: apid 6 (application processor)
TSC skew=20
cpu24: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu24:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu24: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu24: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu24: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=20 observed drift=0
cpu24: smt 0, core 3, package 0
cpu25 at mainbus0: apid 22 (application processor)
TSC skew=-470
cpu25: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu25:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu25: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu25: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu25: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu25: smt 0, core 11, package 1
cpu26 at mainbus0: apid 38 (application processor)
TSC skew=-470
cpu26: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu26:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu26: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu26: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu26: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu26: smt 0, core 19, package 2
cpu27 at mainbus0: apid 54 (application processor)
TSC skew=-450
cpu27: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu27:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu27: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu27: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu27: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-450 observed drift=0
cpu27: smt 0, core 27, package 3
cpu28 at mainbus0: apid 14 (application processor)
TSC skew=110
cpu28: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu28:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu28: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu28: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu28: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=110 observed drift=0
cpu28: smt 0, core 7, package 0
cpu29 at mainbus0: apid 30 (application processor)
TSC skew=-460
cpu29: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu29:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu29: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu29: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu29: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu29: smt 0, core 15, package 1
cpu30 at mainbus0: apid 46 (application processor)
TSC skew=-480
cpu30: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu30:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu30: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu30: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu30: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu30: smt 0, core 23, package 2
cpu31 at mainbus0: apid 62 (application processor)
TSC skew=-440
cpu31: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu31:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu31: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu31: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu31: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-440 observed drift=0
cpu31: smt 0, core 31, package 3
cpu32 at mainbus0: apid 1 (application processor)
TSC skew=-10
cpu32: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu32:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu32: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu32: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu32: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-10 observed drift=0
cpu32: smt 1, core 0, package 0
cpu33 at mainbus0: apid 17 (application processor)
TSC skew=-460
cpu33: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu33:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu33: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu33: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu33: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu33: smt 1, core 8, package 1
cpu34 at mainbus0: apid 33 (application processor)
TSC skew=-490
cpu34: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu34:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu34: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu34: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu34: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu34: smt 1, core 16, package 2
cpu35 at mainbus0: apid 49 (application processor)
TSC skew=-450
cpu35: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu35:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu35: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu35: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu35: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-450 observed drift=0
cpu35: smt 1, core 24, package 3
cpu36 at mainbus0: apid 9 (application processor)
TSC skew=130
cpu36: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu36:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu36: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu36: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu36: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=130 observed drift=0
cpu36: smt 1, core 4, package 0
cpu37 at mainbus0: apid 25 (application processor)
TSC skew=-460
cpu37: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu37:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu37: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu37: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu37: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu37: smt 1, core 12, package 1
cpu38 at mainbus0: apid 41 (application processor)
TSC skew=-480
cpu38: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu38:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu38: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu38: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu38: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu38: smt 1, core 20, package 2
cpu39 at mainbus0: apid 57 (application processor)
TSC skew=-430
cpu39: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu39:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu39: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu39: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu39: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-430 observed drift=0
cpu39: smt 1, core 28, package 3
cpu40 at mainbus0: apid 3 (application processor)
TSC skew=70
cpu40: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu40:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu40: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu40: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu40: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=70 observed drift=0
cpu40: smt 1, core 1, package 0
cpu41 at mainbus0: apid 19 (application processor)
TSC skew=-480
cpu41: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu41:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu41: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu41: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu41: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu41: smt 1, core 9, package 1
cpu42 at mainbus0: apid 35 (application processor)
TSC skew=-510
cpu42: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu42:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu42: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu42: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu42: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-510 observed drift=0
cpu42: smt 1, core 17, package 2
cpu43 at mainbus0: apid 51 (application processor)
TSC skew=-430
cpu43: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu43:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu43: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu43: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu43: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-430 observed drift=0
cpu43: smt 1, core 25, package 3
cpu44 at mainbus0: apid 11 (application processor)
TSC skew=140
cpu44: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu44:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu44: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu44: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu44: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=140 observed drift=0
cpu44: smt 1, core 5, package 0
cpu45 at mainbus0: apid 27 (application processor)
TSC skew=-460
cpu45: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu45:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu45: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu45: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu45: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu45: smt 1, core 13, package 1
cpu46 at mainbus0: apid 43 (application processor)
TSC skew=-490
cpu46: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu46:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu46: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu46: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu46: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu46: smt 1, core 21, package 2
cpu47 at mainbus0: apid 59 (application processor)
TSC skew=-430
cpu47: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu47:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu47: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu47: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu47: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-430 observed drift=0
cpu47: smt 1, core 29, package 3
cpu48 at mainbus0: apid 5 (application processor)
TSC skew=20
cpu48: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu48:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu48: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu48: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu48: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=20 observed drift=0
cpu48: smt 1, core 2, package 0
cpu49 at mainbus0: apid 21 (application processor)
TSC skew=-490
cpu49: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu49:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu49: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu49: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu49: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu49: smt 1, core 10, package 1
cpu50 at mainbus0: apid 37 (application processor)
TSC skew=-500
cpu50: AMD EPYC 7551P 32-Core Processor, 1996.25 MHz, 17-01-02
cpu50:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu50: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu50: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu50: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-500 observed drift=0
cpu50: smt 1, core 18, package 2
cpu51 at mainbus0: apid 53 (application processor)
TSC skew=-430
cpu51: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu51:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu51: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu51: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu51: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-430 observed drift=0
cpu51: smt 1, core 26, package 3
cpu52 at mainbus0: apid 13 (application processor)
TSC skew=-380
cpu52: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu52:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu52: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu52: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu52: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-380 observed drift=0
cpu52: smt 1, core 6, package 0
cpu53 at mainbus0: apid 29 (application processor)
TSC skew=-460
cpu53: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu53:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu53: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu53: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu53: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu53: smt 1, core 14, package 1
cpu54 at mainbus0: apid 45 (application processor)
TSC skew=-490
cpu54: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu54:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu54: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu54: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu54: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-490 observed drift=0
cpu54: smt 1, core 22, package 2
cpu55 at mainbus0: apid 61 (application processor)
TSC skew=-470
cpu55: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu55:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu55: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu55: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu55: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-470 observed drift=0
cpu55: smt 1, core 30, package 3
cpu56 at mainbus0: apid 7 (application processor)
TSC skew=0
cpu56: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu56:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu56: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu56: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu56: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=0 observed drift=0
cpu56: smt 1, core 3, package 0
cpu57 at mainbus0: apid 23 (application processor)
TSC skew=-460
cpu57: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu57:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu57: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu57: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu57: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-460 observed drift=0
cpu57: smt 1, core 11, package 1
cpu58 at mainbus0: apid 39 (application processor)
TSC skew=-480
cpu58: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu58:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu58: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu58: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu58: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu58: smt 1, core 19, package 2
cpu59 at mainbus0: apid 55 (application processor)
TSC skew=-440
cpu59: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu59:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu59: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu59: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu59: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-440 observed drift=0
cpu59: smt 1, core 27, package 3
cpu60 at mainbus0: apid 15 (application processor)
TSC skew=130
cpu60: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu60:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu60: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu60: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu60: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=130 observed drift=0
cpu60: smt 1, core 7, package 0
cpu61 at mainbus0: apid 31 (application processor)
TSC skew=-450
cpu61: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu61:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu61: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu61: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu61: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-450 observed drift=0
cpu61: smt 1, core 15, package 1
cpu62 at mainbus0: apid 47 (application processor)
TSC skew=-510
cpu62: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu62:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu62: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu62: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu62: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-510 observed drift=0
cpu62: smt 1, core 23, package 2
cpu63 at mainbus0: apid 63 (application processor)
TSC skew=-480
cpu63: AMD EPYC 7551P 32-Core Processor, 1996.26 MHz, 17-01-02
cpu63:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu63: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB
64b/line 8-way L2 cache, 64MB 64b/line 64-way L3 cache
cpu63: ITLB 64 4KB entries fully associative, 64 4MB entries fully
associative
cpu63: DTLB 64 4KB entries fully associative, 64 4MB entries fully
associative
tsc_timecounter_init: TSC skew=-480 observed drift=0
cpu63: smt 1, core 31, package 3
ioapic0 at mainbus0: apid 128 pa 0xfec00000, version 21, 24 pins, can't
remap
ioapic1 at mainbus0: apid 129 pa 0xfd880000, version 21, 32 pins, can't
remap
ioapic2 at mainbus0: apid 130 pa 0xe0900000, version 21, 32 pins, can't
remap
ioapic3 at mainbus0: apid 131 pa 0xc5900000, version 21, 32 pins, can't
remap
ioapic4 at mainbus0: apid 132 pa 0xaa900000, version 21, 32 pins, can't
remap
acpimadt0: unknown apic structure type 80
acpimadt0: unknown apic structure type 80
acpimadt0: unknown apic structure type 80
acpimadt0: unknown apic structure type 80
acpimcfg0 at acpi0
acpimcfg0: addr 0x80000000, bus 0-255
acpiprt0 at acpi0: bus 0 (PC00)
acpiprt1 at acpi0: bus 1 (D011)
acpiprt2 at acpi0: bus 2 (D012)
acpiprt3 at acpi0: bus 3 (D013)
acpiprt4 at acpi0: bus 4 (D014)
acpiprt5 at acpi0: bus -1 (D021)
acpiprt6 at acpi0: bus 5 (D031)
acpiprt7 at acpi0: bus -1 (D032)
acpiprt8 at acpi0: bus -1 (D033)
acpiprt9 at acpi0: bus -1 (D034)
acpiprt10 at acpi0: bus -1 (D041)
acpiprt11 at acpi0: bus 6 (D071)
acpiprt12 at acpi0: bus 7 (D081)
acpiprt13 at acpi0: bus 64 (PC01)
acpiprt14 at acpi0: bus -1 (D011)
acpiprt15 at acpi0: bus 65 (D012)
acpiprt16 at acpi0: bus 66 (D013)
acpiprt17 at acpi0: bus -1 (D014)
acpiprt18 at acpi0: bus -1 (D021)
acpiprt19 at acpi0: bus -1 (D031)
acpiprt20 at acpi0: bus -1 (D032)
acpiprt21 at acpi0: bus -1 (D033)
acpiprt22 at acpi0: bus -1 (D034)
acpiprt23 at acpi0: bus -1 (D041)
acpiprt24 at acpi0: bus 67 (D071)
acpiprt25 at acpi0: bus 68 (D081)
acpiprt26 at acpi0: bus 128 (PC02)
acpiprt27 at acpi0: bus 129 (D011)
acpiprt28 at acpi0: bus 130 (D012)
acpiprt29 at acpi0: bus -1 (D00F)
acpiprt30 at acpi0: bus -1 (D013)
acpiprt31 at acpi0: bus -1 (D014)
acpiprt32 at acpi0: bus -1 (D021)
acpiprt33 at acpi0: bus 132 (D031)
acpiprt34 at acpi0: bus -1 (D032)
acpiprt35 at acpi0: bus -1 (D033)
acpiprt36 at acpi0: bus -1 (D034)
acpiprt37 at acpi0: bus -1 (D041)
acpiprt38 at acpi0: bus 133 (D071)
acpiprt39 at acpi0: bus 134 (D081)
acpiprt40 at acpi0: bus 192 (PC03)
acpiprt41 at acpi0: bus 193 (D011)
acpiprt42 at acpi0: bus -1 (D012)
acpiprt43 at acpi0: bus -1 (D013)
acpiprt44 at acpi0: bus -1 (D014)
acpiprt45 at acpi0: bus -1 (D021)
acpiprt46 at acpi0: bus 194 (D031)
acpiprt47 at acpi0: bus 195 (D032)
acpiprt48 at acpi0: bus 196 (D033)
acpiprt49 at acpi0: bus 197 (D034)
acpiprt50 at acpi0: bus -1 (D041)
acpiprt51 at acpi0: bus 198 (D071)
acpiprt52 at acpi0: bus 199 (D081)
"PNP0C33" at acpi0 not configured
acpibtn0 at acpi0: PWRB
acpipci0 at acpi0 PC00: 0x00000000 0x00000011 0x00000001
acpicmos0 at acpi0
"IPI0001" at acpi0 not configured
acpipci1 at acpi0 PC01: 0x00000000 0x00000011 0x00000001
acpipci2 at acpi0 PC02: 0x00000000 0x00000011 0x00000001
acpipci3 at acpi0 PC03: 0x00000000 0x00000011 0x00000001
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
"ACPI0007" at acpi0 not configured
ipmi at mainbus0 not configured
pci0 at mainbus0 bus 0
0:1:1: bridge mem address conflict 0xf7c00000/0x400000
0:1:2: bridge mem address conflict 0xf7800000/0x400000
0:1:3: bridge mem address conflict 0xf7400000/0x400000
0:1:4: bridge mem address conflict 0xf7000000/0x400000
0:3:1: bridge mem address conflict 0xe2000000/0x4000000
0:7:1: bridge mem address conflict 0xf8200000/0x300000
0:8:1: bridge mem address conflict 0xf8000000/0x200000
pchb0 at pci0 dev 0 function 0 "AMD AMD64 17h Root Complex" rev 0x00
"AMD AMD64 17h IOMMU" rev 0x00 at pci0 dev 0 function 2 not configured
pchb1 at pci0 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb0 at pci0 dev 1 function 1 "AMD AMD64 17h PCIE" rev 0x00: msi
pci1 at ppb0 bus 1
ppb1 at pci0 dev 1 function 2 "AMD AMD64 17h PCIE" rev 0x00: msi
pci2 at ppb1 bus 2
ppb2 at pci0 dev 1 function 3 "AMD AMD64 17h PCIE" rev 0x00: msi
pci3 at ppb2 bus 3
ppb3 at pci0 dev 1 function 4 "AMD AMD64 17h PCIE" rev 0x00: msi
pci4 at ppb3 bus 4
pchb2 at pci0 dev 2 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb3 at pci0 dev 3 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb4 at pci0 dev 3 function 1 "AMD AMD64 17h PCIE" rev 0x00: msi
pci5 at ppb4 bus 5
5:0:0: mem address conflict 0xe4000000/0x2000000
5:0:1: mem address conflict 0xe2000000/0x2000000
mcx0 at pci5 dev 0 function 0 "Mellanox ConnectX-4 Lx" rev 0x00: FW
64.0.0000, unsupported command interface 2816
mcx1 at pci5 dev 0 function 1 "Mellanox ConnectX-4 Lx" rev 0x00: unable
to map register memory
pchb4 at pci0 dev 4 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb5 at pci0 dev 7 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb5 at pci0 dev 7 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci6 at ppb5 bus 6
vendor "AMD", unknown product 0x145a (class instrumentation unknown
subclass 0x00, rev 0x00) at pci6 dev 0 function 0 not configured
ccp0 at pci6 dev 0 function 2 "AMD AMD64 17h Crypto" rev 0x00
xhci0 at pci6 dev 0 function 3 "AMD AMD64 17h xHCI" rev 0x00: msi, xHCI
ff.ff
xhci0: reset timeout
xhci0: init failed, error=5
pchb6 at pci0 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb6 at pci0 dev 8 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci7 at ppb6 bus 7
vendor "AMD", unknown product 0x1455 (class instrumentation unknown
subclass 0x00, rev 0x00) at pci7 dev 0 function 0 not configured
ccp1 at pci7 dev 0 function 1 "AMD AMD64 17h Crypto" rev 0x00
"AMD FCH SMBus" rev 0x59 at pci0 dev 20 function 0 not configured
pcib0 at pci0 dev 20 function 3 "AMD FCH LPC" rev 0x51
pchb7 at pci0 dev 24 function 0 "AMD AMD64 17h Data Fabric" rev 0x00
pchb8 at pci0 dev 24 function 1 "AMD AMD64 17h Data Fabric" rev 0x00
pchb9 at pci0 dev 24 function 2 "AMD AMD64 17h Data Fabric" rev 0x00
pchb10 at pci0 dev 24 function 3 "AMD AMD64 17h Data Fabric" rev 0x00
pchb11 at pci0 dev 24 function 4 "AMD AMD64 17h Data Fabric" rev 0x00
pchb12 at pci0 dev 24 function 5 "AMD AMD64 17h Data Fabric" rev 0x00
pchb13 at pci0 dev 24 function 6 "AMD AMD64 17h Data Fabric" rev 0x00
pchb14 at pci0 dev 24 function 7 "AMD AMD64 17h Data Fabric" rev 0x00
pchb15 at pci0 dev 25 function 0 "AMD AMD64 17h Data Fabric" rev 0x00
pchb16 at pci0 dev 25 function 1 "AMD AMD64 17h Data Fabric" rev 0x00
pchb17 at pci0 dev 25 function 2 "AMD AMD64 17h Data Fabric" rev 0x00
pchb18 at pci0 dev 25 function 3 "AMD AMD64 17h Data Fabric" rev 0x00
pchb19 at pci0 dev 25 function 4 "AMD AMD64 17h Data Fabric" rev 0x00
pchb20 at pci0 dev 25 function 5 "AMD AMD64 17h Data Fabric" rev 0x00
pchb21 at pci0 dev 25 function 6 "AMD AMD64 17h Data Fabric" rev 0x00
pchb22 at pci0 dev 25 function 7 "AMD AMD64 17h Data Fabric" rev 0x00
pchb23 at pci0 dev 26 function 0 "AMD AMD64 17h Data Fabric" rev 0x00
pchb24 at pci0 dev 26 function 1 "AMD AMD64 17h Data Fabric" rev 0x00
pchb25 at pci0 dev 26 function 2 "AMD AMD64 17h Data Fabric" rev 0x00
pchb26 at pci0 dev 26 function 3 "AMD AMD64 17h Data Fabric" rev 0x00
pchb27 at pci0 dev 26 function 4 "AMD AMD64 17h Data Fabric" rev 0x00
pchb28 at pci0 dev 26 function 5 "AMD AMD64 17h Data Fabric" rev 0x00
pchb29 at pci0 dev 26 function 6 "AMD AMD64 17h Data Fabric" rev 0x00
pchb30 at pci0 dev 26 function 7 "AMD AMD64 17h Data Fabric" rev 0x00
pchb31 at pci0 dev 27 function 0 "AMD AMD64 17h Data Fabric" rev 0x00
pchb32 at pci0 dev 27 function 1 "AMD AMD64 17h Data Fabric" rev 0x00
pchb33 at pci0 dev 27 function 2 "AMD AMD64 17h Data Fabric" rev 0x00
pchb34 at pci0 dev 27 function 3 "AMD AMD64 17h Data Fabric" rev 0x00
pchb35 at pci0 dev 27 function 4 "AMD AMD64 17h Data Fabric" rev 0x00
pchb36 at pci0 dev 27 function 5 "AMD AMD64 17h Data Fabric" rev 0x00
pchb37 at pci0 dev 27 function 6 "AMD AMD64 17h Data Fabric" rev 0x00
pchb38 at pci0 dev 27 function 7 "AMD AMD64 17h Data Fabric" rev 0x00
isa0 at pcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
com1: console
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
vga0 at isa0 port 0x3b0/48 iomem 0xa0000/131072
wsdisplay0 at vga0 mux 1: console (80x25, vt100 emulation)
wsdisplay0: screen 1-5 added (80x25, vt100 emulation)
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
pci8 at mainbus0 bus 64
pchb39 at pci8 dev 0 function 0 "AMD AMD64 17h Root Complex" rev 0x00
"AMD AMD64 17h IOMMU" rev 0x00 at pci8 dev 0 function 2 not configured
pchb40 at pci8 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb7 at pci8 dev 1 function 2 "AMD AMD64 17h PCIE" rev 0x00: msi
pci9 at ppb7 bus 65
ppb8 at pci8 dev 1 function 3 "AMD AMD64 17h PCIE" rev 0x00: msi
pci10 at ppb8 bus 66
pchb41 at pci8 dev 2 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb42 at pci8 dev 3 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb43 at pci8 dev 4 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb44 at pci8 dev 7 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb9 at pci8 dev 7 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci11 at ppb9 bus 67
vendor "AMD", unknown product 0x145a (class instrumentation unknown
subclass 0x00, rev 0x00) at pci11 dev 0 function 0 not configured
ccp2 at pci11 dev 0 function 2 "AMD AMD64 17h Crypto" rev 0x00
xhci1 at pci11 dev 0 function 3 "AMD AMD64 17h xHCI" rev 0x00: msi, xHCI 1.0
usb0 at xhci1: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev
3.00/1.00 addr 1
pchb45 at pci8 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb10 at pci8 dev 8 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci12 at ppb10 bus 68
vendor "AMD", unknown product 0x1455 (class instrumentation unknown
subclass 0x00, rev 0x00) at pci12 dev 0 function 0 not configured
ccp3 at pci12 dev 0 function 1 "AMD AMD64 17h Crypto" rev 0x00
pci13 at mainbus0 bus 128
pchb46 at pci13 dev 0 function 0 "AMD AMD64 17h Root Complex" rev 0x00
"AMD AMD64 17h IOMMU" rev 0x00 at pci13 dev 0 function 2 not configured
pchb47 at pci13 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb11 at pci13 dev 1 function 1 "AMD AMD64 17h PCIE" rev 0x00: msi
pci14 at ppb11 bus 129
129:0:0: rom address conflict 0xfffc0000/0x40000
129:0:1: rom address conflict 0xfffc0000/0x40000
bge0 at pci14 dev 0 function 0 "Broadcom BCM5720" rev 0x00, BCM5720 A0
(0x5720000), APE firmware NCSI 1.4.26.0: msi, address 4c:d9:8f:40:4d:86
brgphy0 at bge0 phy 1: BCM5720C 10/100/1000baseT PHY, rev. 0
bge1 at pci14 dev 0 function 1 "Broadcom BCM5720" rev 0x00, BCM5720 A0
(0x5720000), APE firmware NCSI 1.4.26.0: msi, address 4c:d9:8f:40:4d:87
brgphy1 at bge1 phy 2: BCM5720C 10/100/1000baseT PHY, rev. 0
ppb12 at pci13 dev 1 function 2 "AMD AMD64 17h PCIE" rev 0x00: msi
pci15 at ppb12 bus 130
ppb13 at pci15 dev 0 function 0 unknown vendor 0x1556 product 0xbe00 rev
0x02
pci16 at ppb13 bus 131
vendor "Matrox", unknown product 0x0536 (class display subclass VGA, rev
0x04) at pci16 dev 0 function 0 not configured
pchb48 at pci13 dev 2 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb49 at pci13 dev 3 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb14 at pci13 dev 3 function 1 "AMD AMD64 17h PCIE" rev 0x00: msi
pci17 at ppb14 bus 132
ahci0 at pci17 dev 0 function 0 "Marvell 88SE9230 AHCI" rev 0x11: msi,
AHCI 1.2
ahci0: port 0: 6.0Gb/s
ahci0: port 2: 1.5Gb/s
scsibus1 at ahci0: 32 targets
ahci0: stopping the port, softreset slot 31 was still active.
ahci0: failed to stop port, cannot softreset
ahci0: failed to stop port, cannot softreset
ahci0: failed to stop port, cannot softreset
sd0 at scsibus1 targ 0 lun 0: <ATA, DELLBOSS VD, MV.R> SCSI3 0/direct
fixed t10.ATA_DELLBOSS_VD_07bc48ea7b5f0010_
sd0: 228872MB, 512 bytes/sector, 468731008 sectors, thin
ahci0: stopping the port, softreset slot 31 was still active.
ahci0: failed to stop port, cannot softreset
pchb50 at pci13 dev 4 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb51 at pci13 dev 7 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb15 at pci13 dev 7 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci18 at ppb15 bus 133
vendor "AMD", unknown product 0x145a (class instrumentation unknown
subclass 0x00, rev 0x00) at pci18 dev 0 function 0 not configured
ccp4 at pci18 dev 0 function 2 "AMD AMD64 17h Crypto" rev 0x00
pchb52 at pci13 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb16 at pci13 dev 8 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci19 at ppb16 bus 134
vendor "AMD", unknown product 0x1455 (class instrumentation unknown
subclass 0x00, rev 0x00) at pci19 dev 0 function 0 not configured
ccp5 at pci19 dev 0 function 1 "AMD AMD64 17h Crypto" rev 0x00
ahci1 at pci19 dev 0 function 2 "AMD FCH AHCI" rev 0x51: msi, AHCI 1.3.1
scsibus2 at ahci1: 32 targets
pci20 at mainbus0 bus 192
192:1:1: bridge mem address conflict 0x90000000/0x200000
pchb53 at pci20 dev 0 function 0 "AMD AMD64 17h Root Complex" rev 0x00
"AMD AMD64 17h IOMMU" rev 0x00 at pci20 dev 0 function 2 not configured
pchb54 at pci20 dev 1 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb17 at pci20 dev 1 function 1 "AMD AMD64 17h PCIE" rev 0x00: msi
pci21 at ppb17 bus 193
193:0:0: mem address conflict 0x90000000/0x100000
193:0:0: mem address conflict 0x90100000/0x100000
mfii0 at pci21 dev 0 function 0 "Symbios Logic MegaRAID SAS3508" rev
0x01: msi
mfii0: firmware fault
pchb55 at pci20 dev 2 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb56 at pci20 dev 3 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb18 at pci20 dev 3 function 1 "AMD AMD64 17h PCIE" rev 0x00: msi
pci22 at ppb18 bus 194
ppb19 at pci20 dev 3 function 2 "AMD AMD64 17h PCIE" rev 0x00: msi
pci23 at ppb19 bus 195
ppb20 at pci20 dev 3 function 3 "AMD AMD64 17h PCIE" rev 0x00: msi
pci24 at ppb20 bus 196
ppb21 at pci20 dev 3 function 4 "AMD AMD64 17h PCIE" rev 0x00: msi
pci25 at ppb21 bus 197
pchb57 at pci20 dev 4 function 0 "AMD AMD64 17h PCIE" rev 0x00
pchb58 at pci20 dev 7 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb22 at pci20 dev 7 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci26 at ppb22 bus 198
vendor "AMD", unknown product 0x145a (class instrumentation unknown
subclass 0x00, rev 0x00) at pci26 dev 0 function 0 not configured
ccp6 at pci26 dev 0 function 2 "AMD AMD64 17h Crypto" rev 0x00
pchb59 at pci20 dev 8 function 0 "AMD AMD64 17h PCIE" rev 0x00
ppb23 at pci20 dev 8 function 1 "AMD AMD64 17h PCIE" rev 0x00
pci27 at ppb23 bus 199
vendor "AMD", unknown product 0x1455 (class instrumentation unknown
subclass 0x00, rev 0x00) at pci27 dev 0 function 0 not configured
ccp7 at pci27 dev 0 function 1 "AMD AMD64 17h Crypto" rev 0x00
vmm0 at mainbus0: SVM/RVI
uhub1 at uhub0 port 1 configuration 1 interface 0 "vendor 0x1604 product
0x10c0" rev 2.00/0.00 addr 2
uhub2 at uhub1 port 1 configuration 1 interface 0 "vendor 0x1604 product
0x10c0" rev 2.00/0.00 addr 3
uhub3 at uhub1 port 4 configuration 1 interface 0 "vendor 0x1604 product
0x10c0" rev 2.00/0.00 addr 4
vscsi0 at root
scsibus3 at vscsi0: 256 targets
softraid0 at root
scsibus4 at softraid0: 256 targets
root on sd0a (e715342dd1e5469f.a) swap on sd0b dump on sd0b

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Paul Irofti-4
In reply to this post by Mark Kettenis
On Wed, Aug 07, 2019 at 02:55:54PM +0200, Mark Kettenis wrote:

> > Date: Tue, 6 Aug 2019 23:29:30 +0300
> > From: Paul Irofti <[hidden email]>
> >
> > Hi,
> >
> > Here is a fourth diff addressing all the issues so far, that have been
> > mainly pointed out by kettenis@, thanks!
> >
> > Changes:
> > - stop resetting the observed drift as it does not affect tsc
> >  re-initialization on resume, thus removing all changes from
> >  acpi_machdep.c
> > - fix comment and put a temporary pretty printf of resume
> > - rename cpu_cc_skew to ci_tsc_skew
> > - remove unfinished code using MSR_TSC for synchronization (to
> >  be added later on together with the missing IA32_TSC_ADJUST
> >  wrmsr commands)
> >
> > All other technical issues were discussed and settled in private and
> > require no change to the former diff.
> >
> >
> > For testing you can also use the regress test after booting with tsc as
> > default clock and waiting for an hour or so to let the clocks go wild:
> >
> >   # cd /usr/src/regress/sys/kern/clock_gettime
> >   # make regress
> >
> > There is another test program flying around the mailing lists I guess,
> > but I could not locate it now so if someone is kind enough to reply with
> > the code, that would be lovely!
> >
> > Paul
>
> Hi Paul,
>
> Still some small questions/issues now that the MSR thing has been
> cleared up.
>
> With those issues fixed, this is ok kettenis@

Hi Mark,

I have addressed all your comments in the diff below.

I changed cpu_serializing_counter() with tsc_get_timecount() as they
were the same function now that msr is gone. Hope that is not too gross.

Thank you for another review.

Paul

Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
+++ arch/amd64/amd64/cpu.c 9 Aug 2019 07:16:40 -0000
@@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
  cr4 = rcr4();
  lcr4(cr4 & ~CR4_PGE);
  lcr4(cr4);
+
+ /* Synchronize TSC */
+ if (cold && !CPU_IS_PRIMARY(ci))
+      tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +812,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
  int i;
+ u_long s;
 
  ci->ci_flags |= CPUF_AP;
 
@@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else {
+ /*
+ * Synchronize time stamp counters. Invalidate cache and
+ * synchronize twice (in tsc_sync_bp) to minimize possible
+ * cache effects. Disable interrupts to try and rule out any
+ * external interference.
+ */
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
  }
 
  if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
@@ -852,6 +869,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
  int i;
+ int64_t drift;
+ u_long s;
 
  atomic_setbits_int(&ci->ci_flags, CPUF_GO);
 
@@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
  printf("dropping into debugger; continue from here to resume boot\n");
  db_enter();
 #endif
+ } else if (cold) {
+ /* Synchronize TSC again, check for drift. */
+ drift = ci->ci_tsc_skew;
+ s = intr_disable();
+ wbinvd();
+ tsc_sync_bp(ci);
+ intr_restore(s);
+ drift -= ci->ci_tsc_skew;
+ printf("TSC skew=%lld drift=%lld\n",
+    (long long)ci->ci_tsc_skew, (long long)drift);
+ tsc_sync_drift(drift);
  }
 }
 
@@ -888,7 +918,14 @@ cpu_hatch(void *v)
  panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+ /*
+ * Synchronize the TSC for the first time. Note that interrupts are
+ * off at this point.
+ */
+ wbinvd();
  ci->ci_flags |= CPUF_PRESENT;
+ ci->ci_tsc_skew = 0; /* reset on resume */
+ tsc_sync_ap(ci);
 
  lapic_enable();
  lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
+++ arch/amd64/amd64/tsc.c 9 Aug 2019 07:16:41 -0000
@@ -1,8 +1,10 @@
 /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
  * Copyright (c) 2017 Adam Steen <[hidden email]>
  * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
+ * Copyright (c) 2019 Paul Irofti <[hidden email]>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
+#include <sys/atomic.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
@@ -33,6 +36,12 @@ int tsc_recalibrate;
 uint64_t tsc_frequency;
 int tsc_is_invariant;
 
+#define TSC_DRIFT_MAX 250
+int64_t tsc_drift_observed;
+
+volatile int64_t tsc_sync_val;
+volatile struct cpu_info *tsc_sync_cpu;
+
 uint tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +181,8 @@ calibrate_tsc_freq(void)
  return;
  tsc_frequency = freq;
  tsc_timecounter.tc_frequency = freq;
-#ifndef MULTIPROCESSOR
  if (tsc_is_invariant)
  tsc_timecounter.tc_quality = 2000;
-#endif
 }
 
 void
@@ -194,14 +201,13 @@ cpu_recalibrate_tsc(struct timecounter *
 uint
 tsc_get_timecount(struct timecounter *tc)
 {
- return rdtsc();
+ return rdtsc() + curcpu()->ci_tsc_skew;
 }
 
 void
 tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
 {
- if (!(ci->ci_flags & CPUF_PRIMARY) ||
-    !(ci->ci_flags & CPUF_CONST_TSC) ||
+ if (!(ci->ci_flags & CPUF_CONST_TSC) ||
     !(ci->ci_flags & CPUF_INVAR_TSC))
  return;
 
@@ -211,9 +217,7 @@ tsc_timecounter_init(struct cpu_info *ci
  /* Newer CPUs don't require recalibration */
  if (tsc_frequency > 0) {
  tsc_timecounter.tc_frequency = tsc_frequency;
-#ifndef MULTIPROCESSOR
  tsc_timecounter.tc_quality = 2000;
-#endif
  } else {
  tsc_recalibrate = 1;
  tsc_frequency = cpufreq;
@@ -221,5 +225,103 @@ tsc_timecounter_init(struct cpu_info *ci
  calibrate_tsc_freq();
  }
 
- tc_init(&tsc_timecounter);
+ if (tsc_drift_observed > TSC_DRIFT_MAX) {
+ printf("ERROR: %lld cycle TSC drift observed\n",
+    (long long)tsc_drift_observed);
+ tsc_timecounter.tc_quality = -1000;
+ tsc_is_invariant = 0;
+ }
+
+ printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
+    (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
+
+ if (ci->ci_flags & CPUF_PRIMARY)
+ tc_init(&tsc_timecounter);
+}
+
+/*
+ * Record drift (in clock cycles).  Called during AP startup.
+ */
+void
+tsc_sync_drift(int64_t drift)
+{
+ if (drift < 0)
+ drift = -drift;
+ if (drift > tsc_drift_observed)
+ tsc_drift_observed = drift;
+}
+
+/*
+ * Called during startup of APs, by the boot processor.  Interrupts
+ * are disabled on entry.
+ */
+void
+tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
+{
+ uint64_t bptsc;
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
+ panic("tsc_sync_bp: 1");
+
+ /* Flag it and read our TSC. */
+ atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ bptsc = tsc_get_timecount(NULL) >> 1;
+
+ /* Wait for remote to complete, and read ours again. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
+ membar_consumer();
+ bptsc += (tsc_get_timecount(NULL) >> 1);
+
+ /* Wait for the results to come in. */
+ while (tsc_sync_cpu == ci)
+ CPU_BUSY_CYCLE();
+ if (tsc_sync_cpu != NULL)
+ panic("tsc_sync_bp: 2");
+
+ *bptscp = bptsc;
+ *aptscp = tsc_sync_val;
+}
+
+void
+tsc_sync_bp(struct cpu_info *ci)
+{
+ uint64_t bptsc, aptsc;
+
+ tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
+ tsc_read_bp(ci, &bptsc, &aptsc);
+
+ /* Compute final value to adjust for skew. */
+ ci->ci_tsc_skew = bptsc - aptsc;
+}
+
+/*
+ * Called during startup of AP, by the AP itself.  Interrupts are
+ * disabled on entry.
+ */
+void
+tsc_post_ap(struct cpu_info *ci)
+{
+ uint64_t tsc;
+
+ /* Wait for go-ahead from primary. */
+ while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
+ membar_consumer();
+ tsc = (tsc_get_timecount(NULL) >> 1);
+
+ /* Instruct primary to read its counter. */
+ atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+ tsc += (tsc_get_timecount(NULL) >> 1);
+
+ /* Post result.  Ensure the whole value goes out atomically. */
+ (void)atomic_swap_64(&tsc_sync_val, tsc);
+
+ if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
+ panic("tsc_sync_ap");
+}
+
+void
+tsc_sync_ap(struct cpu_info *ci)
+{
+ tsc_post_ap(ci);
+ tsc_post_ap(ci);
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.131
diff -u -p -u -p -r1.131 cpu.h
--- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
+++ arch/amd64/include/cpu.h 9 Aug 2019 07:16:41 -0000
@@ -206,6 +206,8 @@ struct cpu_info {
  union vmm_cpu_cap ci_vmm_cap;
  paddr_t ci_vmxon_region_pa;
  struct vmxon_region *ci_vmxon_region;
+
+ int64_t ci_tsc_skew; /* counter skew vs cpu0 */
 };
 
 #define CPUF_BSP 0x0001 /* CPU is the original BSP */
@@ -221,6 +223,7 @@ struct cpu_info {
 #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
 #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
 
+#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
 #define CPUF_PRESENT 0x1000 /* CPU is present */
 #define CPUF_RUNNING 0x2000 /* CPU is running */
 #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
Index: arch/amd64/include/cpuvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 cpuvar.h
--- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
+++ arch/amd64/include/cpuvar.h 9 Aug 2019 07:16:41 -0000
@@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
 void cpu_init_first(void);
 void cpu_adjust_tsc_freq(uint64_t (*)());
 
+void tsc_sync_drift(int64_t);
+void tsc_sync_bp(struct cpu_info *);
+void tsc_sync_ap(struct cpu_info *);
+
 #endif

Reply | Threaded
Open this post in threaded view
|

Re: TSC synchronization on MP machines

Mark Kettenis
> Date: Fri, 9 Aug 2019 10:21:14 +0300
> From: Paul Irofti <[hidden email]>
>
> On Wed, Aug 07, 2019 at 02:55:54PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 6 Aug 2019 23:29:30 +0300
> > > From: Paul Irofti <[hidden email]>
> > >
> > > Hi,
> > >
> > > Here is a fourth diff addressing all the issues so far, that have been
> > > mainly pointed out by kettenis@, thanks!
> > >
> > > Changes:
> > > - stop resetting the observed drift as it does not affect tsc
> > >  re-initialization on resume, thus removing all changes from
> > >  acpi_machdep.c
> > > - fix comment and put a temporary pretty printf of resume
> > > - rename cpu_cc_skew to ci_tsc_skew
> > > - remove unfinished code using MSR_TSC for synchronization (to
> > >  be added later on together with the missing IA32_TSC_ADJUST
> > >  wrmsr commands)
> > >
> > > All other technical issues were discussed and settled in private and
> > > require no change to the former diff.
> > >
> > >
> > > For testing you can also use the regress test after booting with tsc as
> > > default clock and waiting for an hour or so to let the clocks go wild:
> > >
> > >   # cd /usr/src/regress/sys/kern/clock_gettime
> > >   # make regress
> > >
> > > There is another test program flying around the mailing lists I guess,
> > > but I could not locate it now so if someone is kind enough to reply with
> > > the code, that would be lovely!
> > >
> > > Paul
> >
> > Hi Paul,
> >
> > Still some small questions/issues now that the MSR thing has been
> > cleared up.
> >
> > With those issues fixed, this is ok kettenis@
>
> Hi Mark,
>
> I have addressed all your comments in the diff below.
>
> I changed cpu_serializing_counter() with tsc_get_timecount() as they
> were the same function now that msr is gone. Hope that is not too gross.

That doesn't work as tsc_get_timecount() returns a 32-bit integer.

<pirofti> I can not just rdtsc because that means changing the drift max

I don't understand that comment.  Whether you add the previous skew or
not makes no difference when calculating the new skew since you're
adding the same value to both TSC counts when calculating the
difference.  The measured skew shouldn't change.  And when calculating
the drift you simply subtract the new skew from the old skew.  So
nothing changes.  Or am I missing something?


> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137
> +++ arch/amd64/amd64/cpu.c 9 Aug 2019 07:16:40 -0000
> @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci)
>   cr4 = rcr4();
>   lcr4(cr4 & ~CR4_PGE);
>   lcr4(cr4);
> +
> + /* Synchronize TSC */
> + if (cold && !CPU_IS_PRIMARY(ci))
> +      tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +812,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>   int i;
> + u_long s;
>  
>   ci->ci_flags |= CPUF_AP;
>  
> @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else {
> + /*
> + * Synchronize time stamp counters. Invalidate cache and
> + * synchronize twice (in tsc_sync_bp) to minimize possible
> + * cache effects. Disable interrupts to try and rule out any
> + * external interference.
> + */
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
>   }
>  
>   if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> @@ -852,6 +869,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>   int i;
> + int64_t drift;
> + u_long s;
>  
>   atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>   printf("dropping into debugger; continue from here to resume boot\n");
>   db_enter();
>  #endif
> + } else if (cold) {
> + /* Synchronize TSC again, check for drift. */
> + drift = ci->ci_tsc_skew;
> + s = intr_disable();
> + wbinvd();
> + tsc_sync_bp(ci);
> + intr_restore(s);
> + drift -= ci->ci_tsc_skew;
> + printf("TSC skew=%lld drift=%lld\n",
> +    (long long)ci->ci_tsc_skew, (long long)drift);
> + tsc_sync_drift(drift);
>   }
>  }
>  
> @@ -888,7 +918,14 @@ cpu_hatch(void *v)
>   panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> + /*
> + * Synchronize the TSC for the first time. Note that interrupts are
> + * off at this point.
> + */
> + wbinvd();
>   ci->ci_flags |= CPUF_PRESENT;
> + ci->ci_tsc_skew = 0; /* reset on resume */
> + tsc_sync_ap(ci);
>  
>   lapic_enable();
>   lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11
> +++ arch/amd64/amd64/tsc.c 9 Aug 2019 07:16:41 -0000
> @@ -1,8 +1,10 @@
>  /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <[hidden email]>
>   * Copyright (c) 2017 Adam Steen <[hidden email]>
>   * Copyright (c) 2017 Mike Belopuhov <[hidden email]>
> + * Copyright (c) 2019 Paul Irofti <[hidden email]>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,12 @@ int tsc_recalibrate;
>  uint64_t tsc_frequency;
>  int tsc_is_invariant;
>  
> +#define TSC_DRIFT_MAX 250
> +int64_t tsc_drift_observed;
> +
> +volatile int64_t tsc_sync_val;
> +volatile struct cpu_info *tsc_sync_cpu;
> +
>  uint tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +181,8 @@ calibrate_tsc_freq(void)
>   return;
>   tsc_frequency = freq;
>   tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>   if (tsc_is_invariant)
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -194,14 +201,13 @@ cpu_recalibrate_tsc(struct timecounter *
>  uint
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc();
> + return rdtsc() + curcpu()->ci_tsc_skew;
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> - if (!(ci->ci_flags & CPUF_PRIMARY) ||
> -    !(ci->ci_flags & CPUF_CONST_TSC) ||
> + if (!(ci->ci_flags & CPUF_CONST_TSC) ||
>      !(ci->ci_flags & CPUF_INVAR_TSC))
>   return;
>  
> @@ -211,9 +217,7 @@ tsc_timecounter_init(struct cpu_info *ci
>   /* Newer CPUs don't require recalibration */
>   if (tsc_frequency > 0) {
>   tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>   tsc_timecounter.tc_quality = 2000;
> -#endif
>   } else {
>   tsc_recalibrate = 1;
>   tsc_frequency = cpufreq;
> @@ -221,5 +225,103 @@ tsc_timecounter_init(struct cpu_info *ci
>   calibrate_tsc_freq();
>   }
>  
> - tc_init(&tsc_timecounter);
> + if (tsc_drift_observed > TSC_DRIFT_MAX) {
> + printf("ERROR: %lld cycle TSC drift observed\n",
> +    (long long)tsc_drift_observed);
> + tsc_timecounter.tc_quality = -1000;
> + tsc_is_invariant = 0;
> + }
> +
> + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__,
> +    (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
> +
> + if (ci->ci_flags & CPUF_PRIMARY)
> + tc_init(&tsc_timecounter);
> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> + if (drift < 0)
> + drift = -drift;
> + if (drift > tsc_drift_observed)
> + tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> + uint64_t bptsc;
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> + panic("tsc_sync_bp: 1");
> +
> + /* Flag it and read our TSC. */
> + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + bptsc = tsc_get_timecount(NULL) >> 1;
> +
> + /* Wait for remote to complete, and read ours again. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> + membar_consumer();
> + bptsc += (tsc_get_timecount(NULL) >> 1);
> +
> + /* Wait for the results to come in. */
> + while (tsc_sync_cpu == ci)
> + CPU_BUSY_CYCLE();
> + if (tsc_sync_cpu != NULL)
> + panic("tsc_sync_bp: 2");
> +
> + *bptscp = bptsc;
> + *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> + uint64_t bptsc, aptsc;
> +
> + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> + tsc_read_bp(ci, &bptsc, &aptsc);
> +
> + /* Compute final value to adjust for skew. */
> + ci->ci_tsc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> + uint64_t tsc;
> +
> + /* Wait for go-ahead from primary. */
> + while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> + membar_consumer();
> + tsc = (tsc_get_timecount(NULL) >> 1);
> +
> + /* Instruct primary to read its counter. */
> + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> + tsc += (tsc_get_timecount(NULL) >> 1);
> +
> + /* Post result.  Ensure the whole value goes out atomically. */
> + (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> + panic("tsc_sync_ap");
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> + tsc_post_ap(ci);
> + tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131
> +++ arch/amd64/include/cpu.h 9 Aug 2019 07:16:41 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>   union vmm_cpu_cap ci_vmm_cap;
>   paddr_t ci_vmxon_region_pa;
>   struct vmxon_region *ci_vmxon_region;
> +
> + int64_t ci_tsc_skew; /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP 0x0001 /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
>  
> +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000 /* CPU is present */
>  #define CPUF_RUNNING 0x2000 /* CPU is running */
>  #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */
> Index: arch/amd64/include/cpuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.9
> diff -u -p -u -p -r1.9 cpuvar.h
> --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9
> +++ arch/amd64/include/cpuvar.h 9 Aug 2019 07:16:41 -0000
> @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *);
>  void cpu_init_first(void);
>  void cpu_adjust_tsc_freq(uint64_t (*)());
>  
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
>  #endif
>
>

12