Skip to content

Commit 3c1ea5c

Browse files
committed
slab: sheaf prefilling for guaranteed allocations
Add functions for efficient guaranteed allocations e.g. in a critical section that cannot sleep, when the exact number of allocations is not known beforehand, but an upper limit can be calculated. kmem_cache_prefill_sheaf() returns a sheaf containing at least given number of objects. kmem_cache_alloc_from_sheaf() will allocate an object from the sheaf and is guaranteed not to fail until depleted. kmem_cache_return_sheaf() is for giving the sheaf back to the slab allocator after the critical section. This will also attempt to refill it to cache's sheaf capacity for better efficiency of sheaves handling, but it's not stricly necessary to succeed. kmem_cache_refill_sheaf() can be used to refill a previously obtained sheaf to requested size. If the current size is sufficient, it does nothing. If the requested size exceeds cache's sheaf_capacity and the sheaf's current capacity, the sheaf will be replaced with a new one, hence the indirect pointer parameter. kmem_cache_sheaf_size() can be used to query the current size. The implementation supports requesting sizes that exceed cache's sheaf_capacity, but it is not efficient - such "oversize" sheaves are allocated fresh in kmem_cache_prefill_sheaf() and flushed and freed immediately by kmem_cache_return_sheaf(). kmem_cache_refill_sheaf() might be especially ineffective when replacing a sheaf with a new one of a larger capacity. It is therefore better to size cache's sheaf_capacity accordingly to make oversize sheaves exceptional. CONFIG_SLUB_STATS counters are added for sheaf prefill and return operations. A prefill or return is considered _fast when it is able to grab or return a percpu spare sheaf (even if the sheaf needs a refill to satisfy the request, as those should amortize over time), and _slow otherwise (when the barn or even sheaf allocation/freeing has to be involved). sheaf_prefill_oversize is provided to determine how many prefills were oversize (counter for oversize returns is not necessary as all oversize refills result in oversize returns). When slub_debug is enabled for a cache with sheaves, no percpu sheaves exist for it, but the prefill functionality is still provided simply by all prefilled sheaves becoming oversize. If percpu sheaves are not created for a cache due to not passing the sheaf_capacity argument on cache creation, the prefills also work through oversize sheaves, but there's a WARN_ON_ONCE() to indicate the omission. Reviewed-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent ec66e0d commit 3c1ea5c

2 files changed

Lines changed: 279 additions & 0 deletions

File tree

include/linux/slab.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
829829
int node) __assume_slab_alignment __malloc;
830830
#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
831831

832+
struct slab_sheaf *
833+
kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
834+
835+
int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
836+
struct slab_sheaf **sheafp, unsigned int size);
837+
838+
void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
839+
struct slab_sheaf *sheaf);
840+
841+
void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp,
842+
struct slab_sheaf *sheaf) __assume_slab_alignment __malloc;
843+
#define kmem_cache_alloc_from_sheaf(...) \
844+
alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__))
845+
846+
unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf);
847+
832848
/*
833849
* These macros allow declaring a kmem_buckets * parameter alongside size, which
834850
* can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call

mm/slub.c

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,11 @@ enum stat_item {
401401
BARN_GET_FAIL, /* Failed to get full sheaf from barn */
402402
BARN_PUT, /* Put full sheaf to barn */
403403
BARN_PUT_FAIL, /* Failed to put full sheaf to barn */
404+
SHEAF_PREFILL_FAST, /* Sheaf prefill grabbed the spare sheaf */
405+
SHEAF_PREFILL_SLOW, /* Sheaf prefill found no spare sheaf */
406+
SHEAF_PREFILL_OVERSIZE, /* Allocation of oversize sheaf for prefill */
407+
SHEAF_RETURN_FAST, /* Sheaf return reattached spare sheaf */
408+
SHEAF_RETURN_SLOW, /* Sheaf return could not reattach spare */
404409
NR_SLUB_STAT_ITEMS
405410
};
406411

@@ -462,6 +467,8 @@ struct slab_sheaf {
462467
union {
463468
struct rcu_head rcu_head;
464469
struct list_head barn_list;
470+
/* only used for prefilled sheafs */
471+
unsigned int capacity;
465472
};
466473
struct kmem_cache *cache;
467474
unsigned int size;
@@ -2838,6 +2845,30 @@ static void barn_put_full_sheaf(struct node_barn *barn, struct slab_sheaf *sheaf
28382845
spin_unlock_irqrestore(&barn->lock, flags);
28392846
}
28402847

2848+
static struct slab_sheaf *barn_get_full_or_empty_sheaf(struct node_barn *barn)
2849+
{
2850+
struct slab_sheaf *sheaf = NULL;
2851+
unsigned long flags;
2852+
2853+
spin_lock_irqsave(&barn->lock, flags);
2854+
2855+
if (barn->nr_full) {
2856+
sheaf = list_first_entry(&barn->sheaves_full, struct slab_sheaf,
2857+
barn_list);
2858+
list_del(&sheaf->barn_list);
2859+
barn->nr_full--;
2860+
} else if (barn->nr_empty) {
2861+
sheaf = list_first_entry(&barn->sheaves_empty,
2862+
struct slab_sheaf, barn_list);
2863+
list_del(&sheaf->barn_list);
2864+
barn->nr_empty--;
2865+
}
2866+
2867+
spin_unlock_irqrestore(&barn->lock, flags);
2868+
2869+
return sheaf;
2870+
}
2871+
28412872
/*
28422873
* If a full sheaf is available, return it and put the supplied empty one to
28432874
* barn. We ignore the limit on empty sheaves as the number of sheaves doesn't
@@ -5036,6 +5067,228 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int nod
50365067
}
50375068
EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
50385069

5070+
/*
5071+
* returns a sheaf that has at least the requested size
5072+
* when prefilling is needed, do so with given gfp flags
5073+
*
5074+
* return NULL if sheaf allocation or prefilling failed
5075+
*/
5076+
struct slab_sheaf *
5077+
kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
5078+
{
5079+
struct slub_percpu_sheaves *pcs;
5080+
struct slab_sheaf *sheaf = NULL;
5081+
5082+
if (unlikely(size > s->sheaf_capacity)) {
5083+
5084+
/*
5085+
* slab_debug disables cpu sheaves intentionally so all
5086+
* prefilled sheaves become "oversize" and we give up on
5087+
* performance for the debugging. Same with SLUB_TINY.
5088+
* Creating a cache without sheaves and then requesting a
5089+
* prefilled sheaf is however not expected, so warn.
5090+
*/
5091+
WARN_ON_ONCE(s->sheaf_capacity == 0 &&
5092+
!IS_ENABLED(CONFIG_SLUB_TINY) &&
5093+
!(s->flags & SLAB_DEBUG_FLAGS));
5094+
5095+
sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
5096+
if (!sheaf)
5097+
return NULL;
5098+
5099+
stat(s, SHEAF_PREFILL_OVERSIZE);
5100+
sheaf->cache = s;
5101+
sheaf->capacity = size;
5102+
5103+
if (!__kmem_cache_alloc_bulk(s, gfp, size,
5104+
&sheaf->objects[0])) {
5105+
kfree(sheaf);
5106+
return NULL;
5107+
}
5108+
5109+
sheaf->size = size;
5110+
5111+
return sheaf;
5112+
}
5113+
5114+
local_lock(&s->cpu_sheaves->lock);
5115+
pcs = this_cpu_ptr(s->cpu_sheaves);
5116+
5117+
if (pcs->spare) {
5118+
sheaf = pcs->spare;
5119+
pcs->spare = NULL;
5120+
stat(s, SHEAF_PREFILL_FAST);
5121+
} else {
5122+
stat(s, SHEAF_PREFILL_SLOW);
5123+
sheaf = barn_get_full_or_empty_sheaf(get_barn(s));
5124+
if (sheaf && sheaf->size)
5125+
stat(s, BARN_GET);
5126+
else
5127+
stat(s, BARN_GET_FAIL);
5128+
}
5129+
5130+
local_unlock(&s->cpu_sheaves->lock);
5131+
5132+
5133+
if (!sheaf)
5134+
sheaf = alloc_empty_sheaf(s, gfp);
5135+
5136+
if (sheaf && sheaf->size < size) {
5137+
if (refill_sheaf(s, sheaf, gfp)) {
5138+
sheaf_flush_unused(s, sheaf);
5139+
free_empty_sheaf(s, sheaf);
5140+
sheaf = NULL;
5141+
}
5142+
}
5143+
5144+
if (sheaf)
5145+
sheaf->capacity = s->sheaf_capacity;
5146+
5147+
return sheaf;
5148+
}
5149+
5150+
/*
5151+
* Use this to return a sheaf obtained by kmem_cache_prefill_sheaf()
5152+
*
5153+
* If the sheaf cannot simply become the percpu spare sheaf, but there's space
5154+
* for a full sheaf in the barn, we try to refill the sheaf back to the cache's
5155+
* sheaf_capacity to avoid handling partially full sheaves.
5156+
*
5157+
* If the refill fails because gfp is e.g. GFP_NOWAIT, or the barn is full, the
5158+
* sheaf is instead flushed and freed.
5159+
*/
5160+
void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
5161+
struct slab_sheaf *sheaf)
5162+
{
5163+
struct slub_percpu_sheaves *pcs;
5164+
struct node_barn *barn;
5165+
5166+
if (unlikely(sheaf->capacity != s->sheaf_capacity)) {
5167+
sheaf_flush_unused(s, sheaf);
5168+
kfree(sheaf);
5169+
return;
5170+
}
5171+
5172+
local_lock(&s->cpu_sheaves->lock);
5173+
pcs = this_cpu_ptr(s->cpu_sheaves);
5174+
barn = get_barn(s);
5175+
5176+
if (!pcs->spare) {
5177+
pcs->spare = sheaf;
5178+
sheaf = NULL;
5179+
stat(s, SHEAF_RETURN_FAST);
5180+
}
5181+
5182+
local_unlock(&s->cpu_sheaves->lock);
5183+
5184+
if (!sheaf)
5185+
return;
5186+
5187+
stat(s, SHEAF_RETURN_SLOW);
5188+
5189+
/*
5190+
* If the barn has too many full sheaves or we fail to refill the sheaf,
5191+
* simply flush and free it.
5192+
*/
5193+
if (data_race(barn->nr_full) >= MAX_FULL_SHEAVES ||
5194+
refill_sheaf(s, sheaf, gfp)) {
5195+
sheaf_flush_unused(s, sheaf);
5196+
free_empty_sheaf(s, sheaf);
5197+
return;
5198+
}
5199+
5200+
barn_put_full_sheaf(barn, sheaf);
5201+
stat(s, BARN_PUT);
5202+
}
5203+
5204+
/*
5205+
* refill a sheaf previously returned by kmem_cache_prefill_sheaf to at least
5206+
* the given size
5207+
*
5208+
* the sheaf might be replaced by a new one when requesting more than
5209+
* s->sheaf_capacity objects if such replacement is necessary, but the refill
5210+
* fails (returning -ENOMEM), the existing sheaf is left intact
5211+
*
5212+
* In practice we always refill to full sheaf's capacity.
5213+
*/
5214+
int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
5215+
struct slab_sheaf **sheafp, unsigned int size)
5216+
{
5217+
struct slab_sheaf *sheaf;
5218+
5219+
/*
5220+
* TODO: do we want to support *sheaf == NULL to be equivalent of
5221+
* kmem_cache_prefill_sheaf() ?
5222+
*/
5223+
if (!sheafp || !(*sheafp))
5224+
return -EINVAL;
5225+
5226+
sheaf = *sheafp;
5227+
if (sheaf->size >= size)
5228+
return 0;
5229+
5230+
if (likely(sheaf->capacity >= size)) {
5231+
if (likely(sheaf->capacity == s->sheaf_capacity))
5232+
return refill_sheaf(s, sheaf, gfp);
5233+
5234+
if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size,
5235+
&sheaf->objects[sheaf->size])) {
5236+
return -ENOMEM;
5237+
}
5238+
sheaf->size = sheaf->capacity;
5239+
5240+
return 0;
5241+
}
5242+
5243+
/*
5244+
* We had a regular sized sheaf and need an oversize one, or we had an
5245+
* oversize one already but need a larger one now.
5246+
* This should be a very rare path so let's not complicate it.
5247+
*/
5248+
sheaf = kmem_cache_prefill_sheaf(s, gfp, size);
5249+
if (!sheaf)
5250+
return -ENOMEM;
5251+
5252+
kmem_cache_return_sheaf(s, gfp, *sheafp);
5253+
*sheafp = sheaf;
5254+
return 0;
5255+
}
5256+
5257+
/*
5258+
* Allocate from a sheaf obtained by kmem_cache_prefill_sheaf()
5259+
*
5260+
* Guaranteed not to fail as many allocations as was the requested size.
5261+
* After the sheaf is emptied, it fails - no fallback to the slab cache itself.
5262+
*
5263+
* The gfp parameter is meant only to specify __GFP_ZERO or __GFP_ACCOUNT
5264+
* memcg charging is forced over limit if necessary, to avoid failure.
5265+
*/
5266+
void *
5267+
kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
5268+
struct slab_sheaf *sheaf)
5269+
{
5270+
void *ret = NULL;
5271+
bool init;
5272+
5273+
if (sheaf->size == 0)
5274+
goto out;
5275+
5276+
ret = sheaf->objects[--sheaf->size];
5277+
5278+
init = slab_want_init_on_alloc(gfp, s);
5279+
5280+
/* add __GFP_NOFAIL to force successful memcg charging */
5281+
slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size);
5282+
out:
5283+
trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE);
5284+
5285+
return ret;
5286+
}
5287+
5288+
unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
5289+
{
5290+
return sheaf->size;
5291+
}
50395292
/*
50405293
* To avoid unnecessary overhead, we pass through large allocation requests
50415294
* directly to the page allocator. We use __GFP_COMP, because we will need to
@@ -8578,6 +8831,11 @@ STAT_ATTR(BARN_GET, barn_get);
85788831
STAT_ATTR(BARN_GET_FAIL, barn_get_fail);
85798832
STAT_ATTR(BARN_PUT, barn_put);
85808833
STAT_ATTR(BARN_PUT_FAIL, barn_put_fail);
8834+
STAT_ATTR(SHEAF_PREFILL_FAST, sheaf_prefill_fast);
8835+
STAT_ATTR(SHEAF_PREFILL_SLOW, sheaf_prefill_slow);
8836+
STAT_ATTR(SHEAF_PREFILL_OVERSIZE, sheaf_prefill_oversize);
8837+
STAT_ATTR(SHEAF_RETURN_FAST, sheaf_return_fast);
8838+
STAT_ATTR(SHEAF_RETURN_SLOW, sheaf_return_slow);
85818839
#endif /* CONFIG_SLUB_STATS */
85828840

85838841
#ifdef CONFIG_KFENCE
@@ -8678,6 +8936,11 @@ static struct attribute *slab_attrs[] = {
86788936
&barn_get_fail_attr.attr,
86798937
&barn_put_attr.attr,
86808938
&barn_put_fail_attr.attr,
8939+
&sheaf_prefill_fast_attr.attr,
8940+
&sheaf_prefill_slow_attr.attr,
8941+
&sheaf_prefill_oversize_attr.attr,
8942+
&sheaf_return_fast_attr.attr,
8943+
&sheaf_return_slow_attr.attr,
86818944
#endif
86828945
#ifdef CONFIG_FAILSLAB
86838946
&failslab_attr.attr,

0 commit comments

Comments
 (0)