help kmem pressure in pool

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

help kmem pressure in pool

Ted Unangst-6
We have iterated on various designs to reduce pressure on kmem map over time.
One of the problems is that sometimes you want to allocate something in intr
context (NOWAIT), and other times in process context (WAITOK), but then at
free time they get all glommed together. This happens in pool.

Since pool always has a little extra info about the page it has allocated in
the form of the pool header, we can remember how a page was allocated and free
it to the right place. Thus we can make better choices about allocating and
avoid grabbing mbufs and clusters from the tiny kmem map for waitok callers.

What this diff does:

Add a flags to the pool allocator free functions. The default multi page
allocator will use this to defer to the non-intr code path if WAITOK.
The addition of the pool_gc task means we can always free from process
context.

This diff could use testing on some weird machines, or systems that have
experienced kmem pressure, etc.


Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.223
diff -u -p -r1.223 subr_pool.c
--- kern/subr_pool.c 8 Jun 2018 15:38:15 -0000 1.223
+++ kern/subr_pool.c 21 Jan 2019 04:15:13 -0000
@@ -155,6 +155,7 @@ struct pool_page_header {
  caddr_t ph_colored; /* page's colored address */
  unsigned long ph_magic;
  int ph_tick;
+ int ph_flags;
 };
 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
@@ -225,13 +226,13 @@ void pool_get_done(struct pool *, void
 void pool_runqueue(struct pool *, int);
 
 void *pool_allocator_alloc(struct pool *, int, int *);
-void pool_allocator_free(struct pool *, void *);
+void pool_allocator_free(struct pool *, int, void *);
 
 /*
  * The default pool allocator.
  */
 void *pool_page_alloc(struct pool *, int, int *);
-void pool_page_free(struct pool *, void *);
+void pool_page_free(struct pool *, int, void *);
 
 /*
  * safe for interrupts; this is the default allocator
@@ -243,7 +244,7 @@ struct pool_allocator pool_allocator_sin
 };
 
 void *pool_multi_alloc(struct pool *, int, int *);
-void pool_multi_free(struct pool *, void *);
+void pool_multi_free(struct pool *, int, void *);
 
 struct pool_allocator pool_allocator_multi = {
  pool_multi_alloc,
@@ -252,7 +253,7 @@ struct pool_allocator pool_allocator_mul
 };
 
 void *pool_multi_alloc_ni(struct pool *, int, int *);
-void pool_multi_free_ni(struct pool *, void *);
+void pool_multi_free_ni(struct pool *, int, void *);
 
 struct pool_allocator pool_allocator_multi_ni = {
  pool_multi_alloc_ni,
@@ -787,7 +788,6 @@ pool_do_get(struct pool *pp, int flags,
 void
 pool_put(struct pool *pp, void *v)
 {
- struct pool_page_header *ph, *freeph = NULL;
 
 #ifdef DIAGNOSTIC
  if (v == NULL)
@@ -808,19 +808,8 @@ pool_put(struct pool *pp, void *v)
  pp->pr_nout--;
  pp->pr_nput++;
 
- /* is it time to free a page? */
- if (pp->pr_nidle > pp->pr_maxpages &&
-    (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
-    (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
- freeph = ph;
- pool_p_remove(pp, freeph);
- }
-
  pl_leave(pp, &pp->pr_lock);
 
- if (freeph != NULL)
- pool_p_free(pp, freeph);
-
  if (!TAILQ_EMPTY(&pp->pr_requests)) {
  pl_enter(pp, &pp->pr_requests_lock);
  pool_runqueue(pp, PR_NOWAIT);
@@ -933,10 +922,11 @@ pool_p_alloc(struct pool *pp, int flags,
  else {
  ph = pool_get(&phpool, flags);
  if (ph == NULL) {
- pool_allocator_free(pp, addr);
+ pool_allocator_free(pp, flags, addr);
  return (NULL);
  }
  }
+ ph->ph_flags = flags;
 
  XSIMPLEQ_INIT(&ph->ph_items);
  ph->ph_page = addr;
@@ -1010,7 +1000,7 @@ pool_p_free(struct pool *pp, struct pool
 #endif
  }
 
- pool_allocator_free(pp, ph->ph_page);
+ pool_allocator_free(pp, ph->ph_flags, ph->ph_page);
 
  if (!POOL_INPGHDR(pp))
  pool_put(&phpool, ph);
@@ -1616,11 +1606,11 @@ pool_allocator_alloc(struct pool *pp, in
 }
 
 void
-pool_allocator_free(struct pool *pp, void *v)
+pool_allocator_free(struct pool *pp, int flags, void *v)
 {
  struct pool_allocator *pa = pp->pr_alloc;
 
- (*pa->pa_free)(pp, v);
+ (*pa->pa_free)(pp, flags, v);
 }
 
 void *
@@ -1635,7 +1625,7 @@ pool_page_alloc(struct pool *pp, int fla
 }
 
 void
-pool_page_free(struct pool *pp, void *v)
+pool_page_free(struct pool *pp, int flags, void *v)
 {
  km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
 }
@@ -1648,6 +1638,9 @@ pool_multi_alloc(struct pool *pp, int fl
  void *v;
  int s;
 
+ if (flags & PR_WAITOK)
+ return pool_multi_alloc_ni(pp, flags, slowdown);
+
  if (POOL_INPGHDR(pp))
  kv.kv_align = pp->pr_pgsize;
 
@@ -1662,11 +1655,16 @@ pool_multi_alloc(struct pool *pp, int fl
 }
 
 void
-pool_multi_free(struct pool *pp, void *v)
+pool_multi_free(struct pool *pp, int flags, void *v)
 {
  struct kmem_va_mode kv = kv_intrsafe;
  int s;
 
+ if (flags & PR_WAITOK) {
+ pool_multi_free_ni(pp, flags, v);
+ return;
+ }
+
  if (POOL_INPGHDR(pp))
  kv.kv_align = pp->pr_pgsize;
 
@@ -1696,7 +1694,7 @@ pool_multi_alloc_ni(struct pool *pp, int
 }
 
 void
-pool_multi_free_ni(struct pool *pp, void *v)
+pool_multi_free_ni(struct pool *pp, int flags, void *v)
 {
  struct kmem_va_mode kv = kv_any;
 
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.265
diff -u -p -r1.265 uipc_mbuf.c
--- kern/uipc_mbuf.c 9 Jan 2019 16:37:27 -0000 1.265
+++ kern/uipc_mbuf.c 21 Jan 2019 04:14:45 -0000
@@ -139,7 +139,7 @@ unsigned int mbuf_mem_limit; /* how much
 unsigned int mbuf_mem_alloc; /* how much memory has been allocated */
 
 void *m_pool_alloc(struct pool *, int, int *);
-void m_pool_free(struct pool *, void *);
+void m_pool_free(struct pool *, int, void *);
 
 struct pool_allocator m_pool_allocator = {
  m_pool_alloc,
@@ -1454,9 +1454,9 @@ m_pool_alloc(struct pool *pp, int flags,
 }
 
 void
-m_pool_free(struct pool *pp, void *v)
+m_pool_free(struct pool *pp, int flags, void *v)
 {
- (*pool_allocator_multi.pa_free)(pp, v);
+ (*pool_allocator_multi.pa_free)(pp, flags, v);
 
  mtx_enter(&m_pool_mtx);
  mbuf_mem_alloc -= pp->pr_pgsize;
Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.74
diff -u -p -r1.74 pool.h
--- sys/pool.h 13 Aug 2017 20:26:33 -0000 1.74
+++ sys/pool.h 21 Jan 2019 04:12:35 -0000
@@ -107,7 +107,7 @@ TAILQ_HEAD(pool_requests, pool_request);
 
 struct pool_allocator {
  void *(*pa_alloc)(struct pool *, int, int *);
- void (*pa_free)(struct pool *, void *);
+ void (*pa_free)(struct pool *, int, void *);
  size_t   pa_pagesz;
 };