On Tue, 22 May 2018 11:05:48 +0200 Claudio Jeker <[email protected]>
wrote:
> On Tue, May 22, 2018 at 10:26:30AM +0200, Gerhard Roth wrote:
> > Hi,
> >
> > a snmpwalk of HOST-RESOURCES-MIB::hrSWRunTable doesn't scale very well
> > with an increasing number of running processes.
> >
> > For every process and each of the 7 elements of the table, mib_hrswrun()
> > would call kinfo_proc() which queried all the processes running on the
> > system and sort them by pid.
> >
> > The patch below keeps the results cached and updates the list of processes
> > at maximum once every 5 seconds.
> >
> > Gerhard
> >
>
> Agreed, two minor nits inline
>
> >
> > Index: usr.sbin/snmpd/mib.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
> > retrieving revision 1.86
> > diff -u -p -u -p -r1.86 mib.c
> > --- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
> > +++ usr.sbin/snmpd/mib.c 22 May 2018 08:17:16 -0000
> > @@ -861,11 +861,18 @@ int
> > kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
> > {
> > static struct kinfo_proc *kp = NULL;
> > - static size_t nkp = 0;
> > + static struct kinfo_proc **klist = NULL;
> > + static size_t nkp = 0, nklist = 0;
> > + static time_t tinfo = 0;
> > int mib[] = { CTL_KERN, KERN_PROC,
> > KERN_PROC_ALL, 0, sizeof(*kp), 0 };
> > - struct kinfo_proc **klist;
> > + struct kinfo_proc **knew;
> > size_t size, count, i;
> > + time_t now;
> > +
> > + (void)time(&now);
>
> I think using clock_gettime(CLOCK_MONOTONIC, ...) would be prefered here.
> Just in case the clock jumps after snmpd was started.
>
> > + if (now - tinfo < 5 && kp != NULL && klist != NULL)
> > + goto cached;
> >
> > for (;;) {
> > size = nkp * sizeof(*kp);
> > @@ -892,23 +899,26 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
> > }
> > nkp = count;
> > }
> > + tinfo = now;
> >
> > - klist = calloc(count, sizeof(*klist));
> > - if (klist == NULL)
> > + knew = recallocarray(klist, nklist, count, sizeof(*klist));
> > + if (knew == NULL)
> > return (-1);
> > + klist = knew;
> > + nklist = count;
> >
> > - for (i = 0; i < count; i++)
> > + for (i = 0; i < nklist; i++)
> > klist[i] = &kp[i];
> > - qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
> > + qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
> >
> > +cached:
> > *kinfo = NULL;
> > - for (i = 0; i < count; i++) {
> > + for (i = 0; i < nklist; i++) {
> > if (klist[i]->p_pid >= (int32_t)idx) {
> > *kinfo = klist[i];
> > break;
> > }
> > }
> > - free(klist);
>
> IIRC snmpd is also doing a big cleanup round on exit like some other
> daemons, this will result in a "leak" there. IMO this is fine. Just wanted
> to point out. I think a cleaner approach would be to run a timer, that
> frees klist after 5 seconds and the lookup code would then just skip the
> fetching if the pointer is != NULL. Also has the benefit that large
> process tables are not cached for a long time if there are no requests
> anymore.
>
> > return (0);
> > }
> >
>
So here's an updated diff that uses a timer to purge the cache after
5 seconds:
Index: usr.sbin/snmpd/mib.c
===================================================================
RCS file: /cvs/src/usr.sbin/snmpd/mib.c,v
retrieving revision 1.86
diff -u -p -u -p -r1.86 mib.c
--- usr.sbin/snmpd/mib.c 9 May 2018 13:56:46 -0000 1.86
+++ usr.sbin/snmpd/mib.c 22 May 2018 09:57:57 -0000
@@ -414,6 +414,7 @@ int mib_hrswrun(struct oid *, struct be
int kinfo_proc_comp(const void *, const void *);
int kinfo_proc(u_int32_t, struct kinfo_proc **);
+void kinfo_proc_free(void);
int kinfo_args(struct kinfo_proc *, char **);
static struct oid hr_mib[] = {
@@ -857,24 +858,29 @@ kinfo_proc_comp(const void *a, const voi
return (((*k1)->p_pid > (*k2)->p_pid) ? 1 : -1);
}
+static struct event kinfo_timer;
+static struct kinfo_proc *kp = NULL;
+static struct kinfo_proc **klist = NULL;
+static size_t nkp = 0, nklist = 0;
+
int
kinfo_proc(u_int32_t idx, struct kinfo_proc **kinfo)
{
- static struct kinfo_proc *kp = NULL;
- static size_t nkp = 0;
- int mib[] = { CTL_KERN, KERN_PROC,
- KERN_PROC_ALL, 0, sizeof(*kp), 0 };
- struct kinfo_proc **klist;
- size_t size, count, i;
+ int mib[] = { CTL_KERN, KERN_PROC,
+ KERN_PROC_ALL, 0, sizeof(*kp), 0 };
+ size_t size, count, i;
+ struct timeval timer;
+
+ if (kp != NULL && klist != NULL)
+ goto cached;
+ kinfo_proc_free();
for (;;) {
size = nkp * sizeof(*kp);
mib[5] = nkp;
if (sysctl(mib, sizeofa(mib), kp, &size, NULL, 0) == -1) {
if (errno == ENOMEM) {
- free(kp);
- kp = NULL;
- nkp = 0;
+ kinfo_proc_free();
continue;
}
@@ -887,30 +893,50 @@ kinfo_proc(u_int32_t idx, struct kinfo_p
kp = malloc(size);
if (kp == NULL) {
- nkp = 0;
+ kinfo_proc_free();
return (-1);
}
nkp = count;
}
klist = calloc(count, sizeof(*klist));
- if (klist == NULL)
+ if (klist == NULL) {
+ kinfo_proc_free();
return (-1);
+ }
+ nklist = count;
- for (i = 0; i < count; i++)
+ for (i = 0; i < nklist; i++)
klist[i] = &kp[i];
- qsort(klist, count, sizeof(*klist), kinfo_proc_comp);
+ qsort(klist, nklist, sizeof(*klist), kinfo_proc_comp);
+ evtimer_set(&kinfo_timer, (void (*)(int, short, void *))kinfo_proc_free,
+ NULL);
+ timer.tv_sec = 5;
+ timer.tv_usec = 0;
+ evtimer_add(&kinfo_timer, &timer);
+
+cached:
*kinfo = NULL;
- for (i = 0; i < count; i++) {
+ for (i = 0; i < nklist; i++) {
if (klist[i]->p_pid >= (int32_t)idx) {
*kinfo = klist[i];
break;
}
}
- free(klist);
return (0);
+}
+
+void
+kinfo_proc_free(void)
+{
+ free(kp);
+ kp = NULL;
+ nkp = 0;
+ free(klist);
+ klist = NULL;
+ nklist = 0;
}
int