Speed up snmpwalk

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

Speed up snmpwalk

Gerhard Roth-2
Hi,

a snmpwalk of HOST-RESOURCES-MIB::hrSWRunTable doesn't scale very well
with an increasing number of running processes.

For every process and each of the 7 elements of the table, mib_hrswrun()
would call kinfo_proc() which queried all the processes running on the
system and sort them by pid.

The patch below keeps the results cached and updates the list of processes
at maximum once every 5 seconds.

Gerhard


Index: usr.sbin/snmpd/mib.c
===================================================================
RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
retrieving revision 1.86
diff -u -p -u -p -r1.86 mib.c
--- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
+++ usr.sbin/snmpd/mib.c 22 May 2018 08:17:16 -0000
@@ -861,11 +861,18 @@ int
 kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
 {
  static struct kinfo_proc *kp = NULL;
- static size_t nkp = 0;
+ static struct kinfo_proc **klist = NULL;
+ static size_t nkp = 0, nklist = 0;
+ static time_t tinfo = 0;
  int mib[] = { CTL_KERN, KERN_PROC,
     KERN_PROC_ALL, 0, sizeof(*kp), 0 };
- struct kinfo_proc **klist;
+ struct kinfo_proc **knew;
  size_t size, count, i;
+ time_t now;
+
+ (void)time(&now);
+ if (now - tinfo < 5 && kp != NULL && klist != NULL)
+ goto cached;
 
  for (;;) {
  size = nkp * sizeof(*kp);
@@ -892,23 +899,26 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
  }
  nkp = count;
  }
+ tinfo = now;
 
- klist = calloc(count, sizeof(*klist));
- if (klist == NULL)
+ knew = recallocarray(klist, nklist, count, sizeof(*klist));
+ if (knew == NULL)
  return (-1);
+ klist = knew;
+ nklist = count;
 
- for (i = 0; i < count; i++)
+ for (i = 0; i < nklist; i++)
  klist[i] = &kp[i];
- qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
+ qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
 
+cached:
  *kinfo = NULL;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < nklist; i++) {
  if (klist[i]->p_pid >= (int32_t)idx) {
  *kinfo = klist[i];
  break;
  }
  }
- free(klist);
 
  return (0);
 }

Reply | Threaded
Open this post in threaded view
|

Re: Speed up snmpwalk

Claudio Jeker
On Tue, May 22, 2018 at 10:26:30AM +0200, Gerhard Roth wrote:

> Hi,
>
> a snmpwalk of HOST-RESOURCES-MIB::hrSWRunTable doesn't scale very well
> with an increasing number of running processes.
>
> For every process and each of the 7 elements of the table, mib_hrswrun()
> would call kinfo_proc() which queried all the processes running on the
> system and sort them by pid.
>
> The patch below keeps the results cached and updates the list of processes
> at maximum once every 5 seconds.
>
> Gerhard
>

Agreed, two minor nits inline

>
> Index: usr.sbin/snmpd/mib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
> retrieving revision 1.86
> diff -u -p -u -p -r1.86 mib.c
> --- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
> +++ usr.sbin/snmpd/mib.c 22 May 2018 08:17:16 -0000
> @@ -861,11 +861,18 @@ int
>  kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
>  {
>   static struct kinfo_proc *kp = NULL;
> - static size_t nkp = 0;
> + static struct kinfo_proc **klist = NULL;
> + static size_t nkp = 0, nklist = 0;
> + static time_t tinfo = 0;
>   int mib[] = { CTL_KERN, KERN_PROC,
>      KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> - struct kinfo_proc **klist;
> + struct kinfo_proc **knew;
>   size_t size, count, i;
> + time_t now;
> +
> + (void)time(&now);

I think using clock_gettime(CLOCK_MONOTONIC, ...) would be prefered here.
Just in case the clock jumps after snmpd was started.

> + if (now - tinfo < 5 && kp != NULL && klist != NULL)
> + goto cached;
>  
>   for (;;) {
>   size = nkp * sizeof(*kp);
> @@ -892,23 +899,26 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
>   }
>   nkp = count;
>   }
> + tinfo = now;
>  
> - klist = calloc(count, sizeof(*klist));
> - if (klist == NULL)
> + knew = recallocarray(klist, nklist, count, sizeof(*klist));
> + if (knew == NULL)
>   return (-1);
> + klist = knew;
> + nklist = count;
>  
> - for (i = 0; i < count; i++)
> + for (i = 0; i < nklist; i++)
>   klist[i] = &kp[i];
> - qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
> + qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
>  
> +cached:
>   *kinfo = NULL;
> - for (i = 0; i < count; i++) {
> + for (i = 0; i < nklist; i++) {
>   if (klist[i]->p_pid >= (int32_t)idx) {
>   *kinfo = klist[i];
>   break;
>   }
>   }
> - free(klist);

IIRC snmpd is also doing a big cleanup round on exit like some other
daemons, this will result in a "leak" there. IMO this is fine. Just wanted
to point out. I think a cleaner approach would be to run a timer, that
frees klist after 5 seconds and the lookup code would then just skip the
fetching if the pointer is != NULL. Also has the benefit that large
process tables are not cached for a long time if there are no requests
anymore.

>   return (0);
>  }
>

--
:wq Claudio

Reply | Threaded
Open this post in threaded view
|

Re: Speed up snmpwalk

Gerhard Roth-2
On Tue, 22 May 2018 11:05:48 +0200 Claudio Jeker <[hidden email]> wrote:

> On Tue, May 22, 2018 at 10:26:30AM +0200, Gerhard Roth wrote:
> > Hi,
> >
> > a snmpwalk of HOST-RESOURCES-MIB::hrSWRunTable doesn't scale very well
> > with an increasing number of running processes.
> >
> > For every process and each of the 7 elements of the table, mib_hrswrun()
> > would call kinfo_proc() which queried all the processes running on the
> > system and sort them by pid.
> >
> > The patch below keeps the results cached and updates the list of processes
> > at maximum once every 5 seconds.
> >
> > Gerhard
> >  
>
> Agreed, two minor nits inline
>
> >
> > Index: usr.sbin/snmpd/mib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
> > retrieving revision 1.86
> > diff -u -p -u -p -r1.86 mib.c
> > --- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
> > +++ usr.sbin/snmpd/mib.c 22 May 2018 08:17:16 -0000
> > @@ -861,11 +861,18 @@ int
> >  kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
> >  {
> >   static struct kinfo_proc *kp = NULL;
> > - static size_t nkp = 0;
> > + static struct kinfo_proc **klist = NULL;
> > + static size_t nkp = 0, nklist = 0;
> > + static time_t tinfo = 0;
> >   int mib[] = { CTL_KERN, KERN_PROC,
> >      KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> > - struct kinfo_proc **klist;
> > + struct kinfo_proc **knew;
> >   size_t size, count, i;
> > + time_t now;
> > +
> > + (void)time(&now);  
>
> I think using clock_gettime(CLOCK_MONOTONIC, ...) would be prefered here.
> Just in case the clock jumps after snmpd was started.
>
> > + if (now - tinfo < 5 && kp != NULL && klist != NULL)
> > + goto cached;
> >  
> >   for (;;) {
> >   size = nkp * sizeof(*kp);
> > @@ -892,23 +899,26 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
> >   }
> >   nkp = count;
> >   }
> > + tinfo = now;
> >  
> > - klist = calloc(count, sizeof(*klist));
> > - if (klist == NULL)
> > + knew = recallocarray(klist, nklist, count, sizeof(*klist));
> > + if (knew == NULL)
> >   return (-1);
> > + klist = knew;
> > + nklist = count;
> >  
> > - for (i = 0; i < count; i++)
> > + for (i = 0; i < nklist; i++)
> >   klist[i] = &kp[i];
> > - qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
> > + qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
> >  
> > +cached:
> >   *kinfo = NULL;
> > - for (i = 0; i < count; i++) {
> > + for (i = 0; i < nklist; i++) {
> >   if (klist[i]->p_pid >= (int32_t)idx) {
> >   *kinfo = klist[i];
> >   break;
> >   }
> >   }
> > - free(klist);  
>
> IIRC snmpd is also doing a big cleanup round on exit like some other
> daemons, this will result in a "leak" there. IMO this is fine. Just wanted
> to point out. I think a cleaner approach would be to run a timer, that
> frees klist after 5 seconds and the lookup code would then just skip the
> fetching if the pointer is != NULL. Also has the benefit that large
> process tables are not cached for a long time if there are no requests
> anymore.
>
> >   return (0);
> >  }
> >  
>



So here's an updated diff that uses a timer to purge the cache after
5 seconds:


Index: usr.sbin/snmpd/mib.c
===================================================================
RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
retrieving revision 1.86
diff -u -p -u -p -r1.86 mib.c
--- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
+++ usr.sbin/snmpd/mib.c 22 May 2018 09:57:57 -0000
@@ -414,6 +414,7 @@ int mib_hrswrun(struct oid *, struct be
 
 int kinfo_proc_comp(const void *, const void *);
 int kinfo_proc(u_int32_t, struct kinfo_proc **);
+void kinfo_proc_free(void);
 int kinfo_args(struct kinfo_proc *, char **);
 
 static struct oid hr_mib[] = {
@@ -857,24 +858,29 @@ kinfo_proc_comp(const void *a, const voi
  return (((*k1)->p_pid > (*k2)->p_pid) ? 1 : -1);
 }
 
+static struct event  kinfo_timer;
+static struct kinfo_proc *kp = NULL;
+static struct kinfo_proc **klist = NULL;
+static size_t  nkp = 0, nklist = 0;
+
 int
 kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
 {
- static struct kinfo_proc *kp = NULL;
- static size_t nkp = 0;
- int mib[] = { CTL_KERN, KERN_PROC,
-    KERN_PROC_ALL, 0, sizeof(*kp), 0 };
- struct kinfo_proc **klist;
- size_t size, count, i;
+ int mib[] = { CTL_KERN, KERN_PROC,
+    KERN_PROC_ALL, 0, sizeof(*kp), 0 };
+ size_t size, count, i;
+ struct timeval timer;
+
+ if (kp != NULL && klist != NULL)
+ goto cached;
 
+ kinfo_proc_free();
  for (;;) {
  size = nkp * sizeof(*kp);
  mib[5] = nkp;
  if (sysctl(mib, sizeofa(mib), kp, &size, NULL, 0) == -1) {
  if (errno == ENOMEM) {
- free(kp);
- kp = NULL;
- nkp = 0;
+ kinfo_proc_free();
  continue;
  }
 
@@ -887,30 +893,50 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
 
  kp = malloc(size);
  if (kp == NULL) {
- nkp = 0;
+ kinfo_proc_free();
  return (-1);
  }
  nkp = count;
  }
 
  klist = calloc(count, sizeof(*klist));
- if (klist == NULL)
+ if (klist == NULL) {
+ kinfo_proc_free();
  return (-1);
+ }
+ nklist = count;
 
- for (i = 0; i < count; i++)
+ for (i = 0; i < nklist; i++)
  klist[i] = &kp[i];
- qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
+ qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
 
+ evtimer_set(&kinfo_timer, (void (*)(int, short, void *))kinfo_proc_free,
+    NULL);
+ timer.tv_sec = 5;
+ timer.tv_usec = 0;
+ evtimer_add(&kinfo_timer, &timer);
+
+cached:
  *kinfo = NULL;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < nklist; i++) {
  if (klist[i]->p_pid >= (int32_t)idx) {
  *kinfo = klist[i];
  break;
  }
  }
- free(klist);
 
  return (0);
+}
+
+void
+kinfo_proc_free(void)
+{
+ free(kp);
+ kp = NULL;
+ nkp = 0;
+ free(klist);
+ klist = NULL;
+ nklist = 0;
 }
 
 int

Reply | Threaded
Open this post in threaded view
|

Re: Speed up snmpwalk

Jeremie Courreges-Anglas-2


[...]

> So here's an updated diff that uses a timer to purge the cache after
> 5 seconds:

LGTM except for...

>
> Index: usr.sbin/snmpd/mib.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
> retrieving revision 1.86
> diff -u -p -u -p -r1.86 mib.c
> --- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
> +++ usr.sbin/snmpd/mib.c 22 May 2018 09:57:57 -0000
> @@ -414,6 +414,7 @@ int mib_hrswrun(struct oid *, struct be
>  
>  int kinfo_proc_comp(const void *, const void *);
>  int kinfo_proc(u_int32_t, struct kinfo_proc **);
> +void kinfo_proc_free(void);
>  int kinfo_args(struct kinfo_proc *, char **);
>  
>  static struct oid hr_mib[] = {
> @@ -857,24 +858,29 @@ kinfo_proc_comp(const void *a, const voi
>   return (((*k1)->p_pid > (*k2)->p_pid) ? 1 : -1);
>  }
>  
> +static struct event  kinfo_timer;
> +static struct kinfo_proc *kp = NULL;
> +static struct kinfo_proc **klist = NULL;
> +static size_t  nkp = 0, nklist = 0;
> +
>  int
>  kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
>  {
> - static struct kinfo_proc *kp = NULL;
> - static size_t nkp = 0;
> - int mib[] = { CTL_KERN, KERN_PROC,
> -    KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> - struct kinfo_proc **klist;
> - size_t size, count, i;
> + int mib[] = { CTL_KERN, KERN_PROC,
> +    KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> + size_t size, count, i;
> + struct timeval timer;
> +
> + if (kp != NULL && klist != NULL)
> + goto cached;
>  
> + kinfo_proc_free();
>   for (;;) {
>   size = nkp * sizeof(*kp);
>   mib[5] = nkp;
>   if (sysctl(mib, sizeofa(mib), kp, &size, NULL, 0) == -1) {
>   if (errno == ENOMEM) {
> - free(kp);
> - kp = NULL;
> - nkp = 0;
> + kinfo_proc_free();
>   continue;
>   }
>  
> @@ -887,30 +893,50 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
>  
>   kp = malloc(size);
>   if (kp == NULL) {
> - nkp = 0;
> + kinfo_proc_free();
>   return (-1);
>   }
>   nkp = count;
>   }
>  
>   klist = calloc(count, sizeof(*klist));
> - if (klist == NULL)
> + if (klist == NULL) {
> + kinfo_proc_free();
>   return (-1);
> + }
> + nklist = count;
>  
> - for (i = 0; i < count; i++)
> + for (i = 0; i < nklist; i++)
>   klist[i] = &kp[i];
> - qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
> + qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
>  
> + evtimer_set(&kinfo_timer, (void (*)(int, short, void *))kinfo_proc_free,

this cast which looks dubious.  Can you please add a function with
the appropriate signature?

        void
        kinfo_timer_cb(int fd, short event, void *arg)
        {
                kinfo_proc_free();
        }

With this point addressed, ok jca@


> +    NULL);
> + timer.tv_sec = 5;
> + timer.tv_usec = 0;
> + evtimer_add(&kinfo_timer, &timer);
> +
> +cached:
>   *kinfo = NULL;
> - for (i = 0; i < count; i++) {
> + for (i = 0; i < nklist; i++) {
>   if (klist[i]->p_pid >= (int32_t)idx) {
>   *kinfo = klist[i];
>   break;
>   }
>   }
> - free(klist);
>  
>   return (0);
> +}
> +
> +void
> +kinfo_proc_free(void)
> +{
> + free(kp);
> + kp = NULL;
> + nkp = 0;
> + free(klist);
> + klist = NULL;
> + nklist = 0;
>  }
>  
>  int



--
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE

Reply | Threaded
Open this post in threaded view
|

Re: Speed up snmpwalk

Claudio Jeker
On Thu, May 24, 2018 at 01:39:28PM +0200, Jeremie Courreges-Anglas wrote:

>
>
> [...]
>
> > So here's an updated diff that uses a timer to purge the cache after
> > 5 seconds:
>
> LGTM except for...
>
> >
> > Index: usr.sbin/snmpd/mib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
> > retrieving revision 1.86
> > diff -u -p -u -p -r1.86 mib.c
> > --- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
> > +++ usr.sbin/snmpd/mib.c 22 May 2018 09:57:57 -0000
> > @@ -414,6 +414,7 @@ int mib_hrswrun(struct oid *, struct be
> >  
> >  int kinfo_proc_comp(const void *, const void *);
> >  int kinfo_proc(u_int32_t, struct kinfo_proc **);
> > +void kinfo_proc_free(void);
> >  int kinfo_args(struct kinfo_proc *, char **);
> >  
> >  static struct oid hr_mib[] = {
> > @@ -857,24 +858,29 @@ kinfo_proc_comp(const void *a, const voi
> >   return (((*k1)->p_pid > (*k2)->p_pid) ? 1 : -1);
> >  }
> >  
> > +static struct event  kinfo_timer;
> > +static struct kinfo_proc *kp = NULL;
> > +static struct kinfo_proc **klist = NULL;
> > +static size_t  nkp = 0, nklist = 0;
> > +
> >  int
> >  kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
> >  {
> > - static struct kinfo_proc *kp = NULL;
> > - static size_t nkp = 0;
> > - int mib[] = { CTL_KERN, KERN_PROC,
> > -    KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> > - struct kinfo_proc **klist;
> > - size_t size, count, i;
> > + int mib[] = { CTL_KERN, KERN_PROC,
> > +    KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> > + size_t size, count, i;
> > + struct timeval timer;
> > +
> > + if (kp != NULL && klist != NULL)
> > + goto cached;
> >  
> > + kinfo_proc_free();
> >   for (;;) {
> >   size = nkp * sizeof(*kp);
> >   mib[5] = nkp;
> >   if (sysctl(mib, sizeofa(mib), kp, &size, NULL, 0) == -1) {
> >   if (errno == ENOMEM) {
> > - free(kp);
> > - kp = NULL;
> > - nkp = 0;
> > + kinfo_proc_free();
> >   continue;
> >   }
> >  
> > @@ -887,30 +893,50 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
> >  
> >   kp = malloc(size);
> >   if (kp == NULL) {
> > - nkp = 0;
> > + kinfo_proc_free();
> >   return (-1);
> >   }
> >   nkp = count;
> >   }
> >  
> >   klist = calloc(count, sizeof(*klist));
> > - if (klist == NULL)
> > + if (klist == NULL) {
> > + kinfo_proc_free();
> >   return (-1);
> > + }
> > + nklist = count;
> >  
> > - for (i = 0; i < count; i++)
> > + for (i = 0; i < nklist; i++)
> >   klist[i] = &kp[i];
> > - qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
> > + qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
> >  
> > + evtimer_set(&kinfo_timer, (void (*)(int, short, void *))kinfo_proc_free,
>
> this cast which looks dubious.  Can you please add a function with
> the appropriate signature?
>
> void
> kinfo_timer_cb(int fd, short event, void *arg)
> {
> kinfo_proc_free();
> }
>
> With this point addressed, ok jca@
>
>
> > +    NULL);
> > + timer.tv_sec = 5;
> > + timer.tv_usec = 0;
> > + evtimer_add(&kinfo_timer, &timer);
> > +
> > +cached:
> >   *kinfo = NULL;
> > - for (i = 0; i < count; i++) {
> > + for (i = 0; i < nklist; i++) {
> >   if (klist[i]->p_pid >= (int32_t)idx) {
> >   *kinfo = klist[i];
> >   break;
> >   }
> >   }
> > - free(klist);
> >  
> >   return (0);
> > +}
> > +
> > +void
> > +kinfo_proc_free(void)
> > +{
> > + free(kp);
> > + kp = NULL;
> > + nkp = 0;
> > + free(klist);
> > + klist = NULL;
> > + nklist = 0;
> >  }
> >  
> >  int
>

Agreed and agreed :)
I didn't like the cast of the callback. With your addition this is also OK
by me.

--
:wq Claudio