systat(1): vmstat: compute rates with CLOCK_UPTIME

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

systat(1): vmstat: compute rates with CLOCK_UPTIME

Scott Cheloha
Hi,

systat(1)'s vmstat view displays rates for things like interrupts.
Strangely, it uses CPU time to compute these rates, not real time.

This is potentially misleading, particularly on an MP system.  If I
have 4 cores running on a HZ=100 kernel I expect ~400 clock interrupts
per second.  But systat(1) tells me I have 100 because we have 4
seconds worth of CPU time for every second of real time that elapses.

I don't like it.  I want to see how many interrupts there really were.

This patch changes the vmstat view to use CLOCK_UPTIME to measure
elapsed time and uses that when computing rates.  The "Big Bar" is
still drawn using CPU time, but for everything else I think you would
want a rate based on the elapsed real time.  Using CPU time isn't
intuitive.

We want CLOCK_UPTIME, not CLOCK_MONOTONIC, because we aren't
interested in what the machine was doing when it was suspended.

I have not changed dinfo() to keep the patch simple, but we should be
using CLOCK_UPTIME there, too.

Thoughts?

Index: vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/vmstat.c,v
retrieving revision 1.91
diff -u -p -r1.91 vmstat.c
--- vmstat.c 28 Jun 2019 13:35:04 -0000 1.91
+++ vmstat.c 16 Sep 2020 03:56:14 -0000
@@ -320,7 +320,7 @@ labelkre(void)
 #define PUTRATE(fld, l, c, w) \
  do { \
  Y(fld); \
- putint((int)((float)s.fld/etime + 0.5), l, c, w); \
+ putint((int)((float)s.fld / eruntime + 0.5), l, c, w); \
  } while (0)
 #define MAXFAIL 5
 
@@ -330,12 +330,18 @@ static char cpuorder[] = { CP_INTR, CP_S
 void
 showkre(void)
 {
+ static struct timespec prev;
+ struct timespec elapsed, now;
  float f1, f2;
  int psiz;
  u_int64_t inttotal, intcnt;
  int i, l, c;
  static int failcnt = 0, first_run = 0;
- double etime;
+ double ecputime, eruntime;
+
+ clock_gettime(CLOCK_UPTIME, &now);
+ timespecsub(&now, &prev, &elapsed);
+ prev = now;
 
  if (state == TIME) {
  if (!first_run) {
@@ -343,12 +349,13 @@ showkre(void)
  return;
  }
  }
- etime = 0;
+ eruntime = elapsed.tv_sec + elapsed.tv_nsec / 1000000000.0;
+ ecputime = 0;
  for (i = 0; i < CPUSTATES; i++) {
  X(cpustats.cs_time);
- etime += s.cpustats.cs_time[i];
+ ecputime += s.cpustats.cs_time[i];
  }
- if (etime < 5.0) { /* < 5 ticks - ignore this trash */
+ if (ecputime < 5.0) { /* < 5 ticks - ignore this trash */
  if (failcnt++ >= MAXFAIL) {
  error("The alternate system clock has died!");
  failcnt = 0;
@@ -356,12 +363,12 @@ showkre(void)
  return;
  }
  failcnt = 0;
- etime /= hertz;
+ ecputime /= hertz;
  inttotal = 0;
  for (i = 0; i < nintr; i++) {
  t = intcnt = s.intrcnt[i];
  s.intrcnt[i] -= s1.intrcnt[i];
- intcnt = (u_int64_t)((float)s.intrcnt[i]/etime + 0.5);
+ intcnt = (u_int64_t)((float)s.intrcnt[i] / eruntime + 0.5);
  inttotal += intcnt;
  if (intrloc[i] != 0)
  putuint64(intcnt, intrloc[i], INTSCOL, 8);

Reply | Threaded
Open this post in threaded view
|

Re: systat(1): vmstat: compute rates with CLOCK_UPTIME

Theo de Raadt-2
Two days ago during my work on ongoing work for non-acpi suspend,
kettenis and I observed the same thing.

your diff works very well for me.

Reply | Threaded
Open this post in threaded view
|

Re: systat(1): vmstat: compute rates with CLOCK_UPTIME

Scott Cheloha
On Wed, Sep 16, 2020 at 01:20:16AM -0600, Theo de Raadt wrote:
> Two days ago during my work on ongoing work for non-acpi suspend,
> kettenis and I observed the same thing.
>
> your diff works very well for me.

Okay, so I'm not the only one.

Let's do the full patch:

- All rates in the vmstat view are now computed using elapsed
  real time, not elapsed CPU time.

- Measure elapsed real time with  CLOCK_UPTIME and not
  CLOCK_MONOTONIC because we don't care about time spent
  suspended.

- Pass the elapsed time to dinfo() to use when computing
  I/O rates instead of assuming how much time has elapsed.

- Keep drawing the big bar using CPU time.

Would appreciate more tests from people who depend upon systat(1)
regularly.  Does the vmstat view on your machine look reasonable with
this patch?

Pending more tests, ok?

Index: vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/vmstat.c,v
retrieving revision 1.91
diff -u -p -r1.91 vmstat.c
--- vmstat.c 28 Jun 2019 13:35:04 -0000 1.91
+++ vmstat.c 17 Sep 2020 00:24:00 -0000
@@ -83,7 +83,7 @@ static enum state { BOOT, TIME, RUN } st
 static void allocinfo(struct Info *);
 static void copyinfo(struct Info *, struct Info *);
 static float cputime(int);
-static void dinfo(int, int);
+static void dinfo(int, int, double);
 static void getinfo(struct Info *);
 void putint(int, int, int, int);
 void putintmk(int, int, int, int);
@@ -320,7 +320,7 @@ labelkre(void)
 #define PUTRATE(fld, l, c, w) \
  do { \
  Y(fld); \
- putint((int)((float)s.fld/etime + 0.5), l, c, w); \
+ putint((int)((float)s.fld / eruntime + 0.5), l, c, w); \
  } while (0)
 #define MAXFAIL 5
 
@@ -330,12 +330,18 @@ static char cpuorder[] = { CP_INTR, CP_S
 void
 showkre(void)
 {
+ static struct timespec prev;
+ struct timespec elapsed, now;
  float f1, f2;
  int psiz;
  u_int64_t inttotal, intcnt;
  int i, l, c;
  static int failcnt = 0, first_run = 0;
- double etime;
+ double ecputime, eruntime;
+
+ clock_gettime(CLOCK_UPTIME, &now);
+ timespecsub(&now, &prev, &elapsed);
+ prev = now;
 
  if (state == TIME) {
  if (!first_run) {
@@ -343,12 +349,13 @@ showkre(void)
  return;
  }
  }
- etime = 0;
+ eruntime = elapsed.tv_sec + elapsed.tv_nsec / 1000000000.0;
+ ecputime = 0;
  for (i = 0; i < CPUSTATES; i++) {
  X(cpustats.cs_time);
- etime += s.cpustats.cs_time[i];
+ ecputime += s.cpustats.cs_time[i];
  }
- if (etime < 5.0) { /* < 5 ticks - ignore this trash */
+ if (ecputime < 5.0) { /* < 5 ticks - ignore this trash */
  if (failcnt++ >= MAXFAIL) {
  error("The alternate system clock has died!");
  failcnt = 0;
@@ -356,12 +363,12 @@ showkre(void)
  return;
  }
  failcnt = 0;
- etime /= hertz;
+ ecputime /= hertz;
  inttotal = 0;
  for (i = 0; i < nintr; i++) {
  t = intcnt = s.intrcnt[i];
  s.intrcnt[i] -= s1.intrcnt[i];
- intcnt = (u_int64_t)((float)s.intrcnt[i]/etime + 0.5);
+ intcnt = (u_int64_t)((float)s.intrcnt[i] / eruntime + 0.5);
  inttotal += intcnt;
  if (intrloc[i] != 0)
  putuint64(intcnt, intrloc[i], INTSCOL, 8);
@@ -451,7 +458,7 @@ showkre(void)
  mvprintw(DISKROW, DISKCOL + 5 + c,
     " %*s", l, dr_name[i]);
  c += 1 + l;
- dinfo(i, c);
+ dinfo(i, c, eruntime);
  }
  /* and pad the DRIVESPACE */
  l = DRIVESPACE - c;
@@ -674,11 +681,9 @@ copyinfo(struct Info *from, struct Info
 }
 
 static void
-dinfo(int dn, int c)
+dinfo(int dn, int c, double etime)
 {
- double words, atime, etime;
-
- etime = naptime;
+ double words, atime;
 
  c += DISKCOL;