Week 5 - Slub Allocator

本文章涵蓋基礎 Slub Allocator 的運作流程。

分配

分配主要由 slab_alloc_node 負責,經過一些檢查,呼叫 __slab_alloc_node
使用 GFP_ZERO 清空時,僅會清空 orig_size 大小的內容,而非整個 object。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
bool init = false;

s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
return NULL;

object = kfence_alloc(s, orig_size, gfpflags);
if (unlikely(object))
goto out;

object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);

maybe_wipe_obj_freeptr(s, object);
init = slab_want_init_on_alloc(gfpflags, s);

out:
/*
* When init equals 'true', like for kzalloc() family, only
* @orig_size bytes might be zeroed instead of s->object_size
* In case this fails due to memcg_slab_post_alloc_hook(),
* object is set to NULL
*/
slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);

return object;
}

__slab_alloc_node

__slab_alloc_node 主要判斷 cpu_stab 上有無空的 object,若沒有則呼叫 __slab_alloc
tid 是一個單調遞增的值,每次 CPU 的 freelist 變更時,tid 都會更新。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
struct kmem_cache_cpu *c;
struct slab *slab;
unsigned long tid;
void *object;

redo:
/*
* 分配途中可能被 preempt,從一個 cpu 轉到另一個 cpu
* 導致後面更新 freelist 時,抓到錯的 cpu freelist,或是當前 cpu freelist 已經被更改
* tid 是一個單調遞增的值,每次 freelist 變更時,tid 都會更新
* 稍後會用 cmpxchg 驗證 tid,若驗證失敗則回到 redo
*/
c = raw_cpu_ptr(s->cpu_slab);
tid = READ_ONCE(c->tid);

// 保證 tid 讀完才讀 c->freelist
barrier();

object = c->freelist;
slab = c->slab;

// 若 freelist 上沒東西,或當前無使用的 slab,則呼叫 __slab_alloc
if (!USE_LOCKLESS_FAST_PATH() ||
unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
} else {
void *next_object = get_freepointer_safe(s, object);

/*
* 現在我們拿到 freelist 第二個 object,準備讓 cpu_slab->freelist 變成它
* 但中途可能發生 context switch,因此要檢查資料是否被替換過

* cmpxchg atomically 進行以下操作
* 1. 檢查 s->cpu_slab->freelist == object
* 2. 檢查 s->cpu_slab->tid == tid
* 3. 若未被改變,替換 s->cpu_slab->freelist = next_object, s->cpu_slab->tid = next_tid(tid)
* 4. 回傳是否被改變
*/
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
next_object, next_tid(tid)))) {

note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}

// 將 freelist 指向的 object 先預存起來
// 若 __slab_alloc_node 再次被調用,則由 get_freepointer_safe 得到 object
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}

return object;
}

__slab_alloc___slab_alloc 的 wrapper,用於重新讀取 cpu_slab。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
{
void *p;

#ifdef CONFIG_PREEMPT_COUNT
c = slub_get_cpu_ptr(s->cpu_slab);
#endif

p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size);
#ifdef CONFIG_PREEMPT_COUNT
slub_put_cpu_ptr(s->cpu_slab);
#endif
return p;
}

___slab_alloc

如果當前 kmem_cache_cpu 沒有 slab,則跳到 new_slab 要一個 slab。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c, unsigned int orig_size)
{
void *freelist;
struct slab *slab;
unsigned long flags;
struct partial_context pc;

stat(s, ALLOC_SLOWPATH);

reread_slab:

slab = READ_ONCE(c->slab);
if (!slab) {
if (unlikely(node != NUMA_NO_NODE &&
!node_isset(node, slab_nodes)))
node = NUMA_NO_NODE;
goto new_slab;
}

redo

  • 如果 slab 不屬於當前 node,則跳到 deactivate_slab
  • 檢查 slab == c->slab,如果中間有被 preempt 造成不一致,回到 reread_slab
  • 如果 c->freelist 有東西,表示可以直接從 freelist 拿 object,跳至 load_freelist
  • 若 freelist 仍為空,從 slab 拿 freelist,裡面仍是空的話,new_slab 索取新的 slab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
redo:

if (unlikely(!node_match(slab, node))) {
if (!node_isset(node, slab_nodes)) {
node = NUMA_NO_NODE;
} else {
stat(s, ALLOC_NODE_MISMATCH);
goto deactivate_slab;
}
}

if (unlikely(!pfmemalloc_match(slab, gfpflags)))
goto deactivate_slab;

local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(slab != c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_slab;
}
freelist = c->freelist;
if (freelist)
goto load_freelist;

freelist = get_freelist(s, slab);

if (!freelist) {
c->slab = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
}

stat(s, ALLOC_REFILL);

總結一下,若 slab 出問題,就會進入 new_slab 或 deactivate_slab 以修正 slab。
c->freelist 存在,則拿 c->freelist,若沒有則拿 slab 的。
連 slab 都沒有 freelist 的話,同樣 new_slab。

load_freelist 條件是 freelist, slab 都存在且 node 正確,這應該在 __slab_alloc_node 就被過濾掉了,為何會走到這裡?
原因可能是 USE_LOCKLESS_FAST_PATH() == false,或是中途被 preempt 導致經過 redo 進來這裡。

get_freelist() 函数主要獲取 slab->freelist,將 slab->freelist 設為 NULL 並回傳原来的 freelist。如同前面講的,當 slab 被一個 cpu 使用時,其 freelist 會被清空。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
{
struct slab new;
unsigned long counters;
void *freelist;

lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));

do {
freelist = slab->freelist;
counters = slab->counters;

new.counters = counters;
VM_BUG_ON(!new.frozen);

new.inuse = slab->objects;
new.frozen = freelist != NULL;

} while (!__cmpxchg_double_slab(s, slab,
freelist, counters,
NULL, new.counters,
"get_freelist"));

/* __cmpxchg_double_slab 裡面是
* cmpxchg_double(&slab->freelist, &slab->counters,
* freelist_old, counters_old,
* freelist_new, counters_new)
*
* 若 freelist, counter 沒變,則 slab->freelist = NULL, slab->counters = new.counters
*/

return freelist;
}

load_freelist

load_freelist 假設 c->freelist,更新 c->freelist 成下一個 object。注意不是拿一個 freelist 回傳,因為我們在 redo 已經拿到了。

1
2
3
4
5
6
7
8
9
load_freelist:

lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));

VM_BUG_ON(!c->slab->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
return freelist;

其實 load_freelist 做的事幾乎跟 fast path 一樣,只是變成用 lock 確保同步。

get_freepointer 負責從一個 object 找到下一個 object 的位址,底下是 freelist_ptr,若有開 harden freelist 則會與一些值 xor。

1
2
3
4
5
6
7
8
9
10
11

static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
unsigned long ptr_addr)
{
#ifdef CONFIG_SLAB_FREELIST_HARDENED
return (void *)((unsigned long)ptr ^ s->random ^
swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
#else
return ptr;
#endif
}

注意 get_freelist 是從 slab 獲取 freelist,會清空 slab->freelist。而 get_freepointer 從一個 freelist 獲取下一個 object 位址,應相鑑別。

deactivate_slab

將 cpu cache 資料清空,調用 deactivate_slab 函數。

1
2
3
4
5
6
7
8
9
10
11
12
13
deactivate_slab:

local_lock_irqsave(&s->cpu_slab->lock, flags);
if (slab != c->slab) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_slab;
}
freelist = c->freelist;
c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, slab, freelist);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int free_delta = 0;
enum slab_modes mode = M_NONE;
void *nextfree, *freelist_iter, *freelist_tail;
int tail = DEACTIVATE_TO_HEAD;
unsigned long flags = 0;
struct slab new;
struct slab old;

if (slab->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
tail = DEACTIVATE_TO_TAIL;
}

/*
* 統計 c->freelist 上的 object 數量
* 若 freelist 損壞,則放棄後面的部分,freelist_tail 紀錄完好的最後一個 object
*/
freelist_tail = NULL;
freelist_iter = freelist;
while (freelist_iter) {
nextfree = get_freepointer(s, freelist_iter);

if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
break;

freelist_tail = freelist_iter;
free_delta++;

freelist_iter = nextfree;
}

/*
* 解凍 slab(使它不屬於任何 cpu),將 per-cpu freelist 接到 slab->freelist 前面
*/
redo:

old.freelist = READ_ONCE(slab->freelist);
old.counters = READ_ONCE(slab->counters);
VM_BUG_ON(!old.frozen);

new.counters = old.counters;
if (freelist_tail) {
new.inuse -= free_delta;
set_freepointer(s, freelist_tail, old.freelist);
new.freelist = freelist;
} else
new.freelist = old.freelist;

new.frozen = 0;

// 若 slab 上的 object 全部空閒,且 node 的 partial slab 數量大於 kmem_cache->min_partial,則將 slab 歸還給 buddy system
// 若 new.freelist 還有空閒的 object,放進 partial list
if (!new.inuse && n->nr_partial >= s->min_partial) {
mode = M_FREE;
} else if (new.freelist) {
mode = M_PARTIAL;
spin_lock_irqsave(&n->list_lock, flags);
} else {
mode = M_FULL_NOLIST;
}

if (!cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab")) {
if (mode == M_PARTIAL)
spin_unlock_irqrestore(&n->list_lock, flags);
goto redo;
}

if (mode == M_PARTIAL) {
add_partial(n, slab, tail);
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, tail);
} else if (mode == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, slab);
stat(s, FREE_SLAB);
} else if (mode == M_FULL_NOLIST) {
stat(s, DEACTIVATE_FULL);
}
}

new_slab

從 partial list 拿一個 slab。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
new_slab:

if (slub_percpu_partial(c)) { //有 percpu partial
local_lock_irqsave(&s->cpu_slab->lock, flags);
// 應該有兩條路能跳到 new_slab,一個是 slab 為空,一個是 slab->freelist 為空,且它們都將 c->slab 設為 NULL
// 若 c->slab 又有東西了,則跳回 reread_slab
if (unlikely(c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto reread_slab;
}
// 被 preempt 且 partial 空了
if (unlikely(!slub_percpu_partial(c))) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
goto new_objects;
}

// 拿一個 c->partial slab,跳回 redo
slab = c->slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, CPU_PARTIAL_ALLOC);
goto redo;
}

new_objects

獲取 node partial slab 的 freelist,或跟 buddy system 要一塊。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
new_objects:

pc.flags = gfpflags;
pc.slab = &slab;
pc.orig_size = orig_size;

// 獲取 node partial slab 的 freelist
freelist = get_partial(s, node, &pc);
if (freelist)
goto check_new_slab;

// 跟 buddy system 要一塊
slub_put_cpu_ptr(s->cpu_slab);
slab = new_slab(s, gfpflags, node);
c = slub_get_cpu_ptr(s->cpu_slab);

if (unlikely(!slab)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}

stat(s, ALLOC_SLAB);

if (kmem_cache_debug(s)) {
freelist = alloc_single_from_new_slab(s, slab, orig_size);

if (unlikely(!freelist))
goto new_objects;

if (s->flags & SLAB_STORE_USER)
set_track(s, freelist, TRACK_ALLOC, addr);

return freelist;
}

freelist = slab->freelist;
slab->freelist = NULL;
slab->inuse = slab->objects;
slab->frozen = 1;

inc_slabs_node(s, slab_nid(slab), slab->objects);

若此 cache 有開啟 debug flag,追蹤資訊。

pfmemalloc 是 linux 確保最低記憶體大小的機制,假設現在記憶體快滿了,需要 kswapd 來做 swapping,但 kswapd 本身也需要記憶體。
因此 linux 將一部分記憶體標記為 PF_MEMALLOC 保留區,只有 PF_MEMALLOC 標誌的 process 能使用它。
若 slab 來自 PF_MEMALLOC 且當前 process 無 PF_MEMALLOC 標誌,則將 slab 返回給 kmem_cache_node,確保不會從 fast path 用到它。最後, free object 仍要丟給使用者。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
check_new_slab:

if (kmem_cache_debug(s)) {
/*
* For debug caches here we had to go through
* alloc_single_from_partial() so just store the tracking info
* and return the object
*/
if (s->flags & SLAB_STORE_USER)
set_track(s, freelist, TRACK_ALLOC, addr);

return freelist;
}

if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
/*
* For !pfmemalloc_match() case we don't load freelist so that
* we don't make further mismatched allocations easier.
*/
deactivate_slab(s, slab, get_freepointer(s, freelist));
return freelist;
}

將新的 slab 給 cpu_slab,跳到 load_freelist。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
retry_load_slab:

local_lock_irqsave(&s->cpu_slab->lock, flags);
if (unlikely(c->slab)) {
void *flush_freelist = c->freelist;
struct slab *flush_slab = c->slab;

c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);

local_unlock_irqrestore(&s->cpu_slab->lock, flags);

deactivate_slab(s, flush_slab, flush_freelist);

stat(s, CPUSLAB_FLUSH);

goto retry_load_slab;
}
c->slab = slab;

goto load_freelist;
}

總結

object 分配共有 fast path 和 slow path,起始點在 slab_alloc_node

如果 cpu 上有 slab, freelist,則走 fast path,也就是取用 cpu 上的 freelist,並更新 freelist。整個過程用 cmpxchg 指令取代 lock,避免同步問題。
若條件不符合,則進入 ___slab_alloc 走 slow path,過程中可能發生 context switch 導致使用的 cpu 不一致或 freelist 被更改,裡面用 cpu lock 來解決此問題。

如果發現 cpu slab, freelist 又出現了,則直接拿裡面的 object,並 load_freelist 更新 freelist。
如果 node 不 match、佔用到保留記憶體,或是要新的 slab 後發現 cpu 又有 slab 可用,則 deactivate_slab 歸還給 kmem_cache_node 或 buddy system。
如果 slab 有 freelist 而 cpu 沒有,使用 slab 的 freelist。

如果 freelist 無 object 可用,new_slab 從 cpu partial 拿。
連 partial 都沒有的話,new_object 從 node partial 拿或跟 buddy system 要一塊。

所以取用 slab 的優先級是這樣的:

  1. cpu freelist (slab freelist)
  2. cpu partial
  3. node partial
  4. buddy system

這個網站有清楚的示意圖,可以去看看。

釋放

do_slab_free

do_slab_free 允許釋放多個 object 連成的 list。
slab != c->slab,則呼叫 __slab_free 走 slow path。
fast path 將 object 放回當前 cpu 用的 slab,USE_LOCKLESS_FAST_PATH 啟用時,透過 cmpxchg 確保同步,否則用 lock。兩者皆使用 set_freepointer 連接 list 後,放回 cpu freelist 上。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
static __always_inline void do_slab_free(struct kmem_cache *s,
struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
void **freelist;

redo:
/*
* Determine the currently cpus per cpu slab.
* The cpu may change afterward. However that does not matter since
* data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succeed.
*/
c = raw_cpu_ptr(s->cpu_slab);
tid = READ_ONCE(c->tid);

/* Same with comment on barrier() in slab_alloc_node() */
barrier();

if (unlikely(slab != c->slab)) {
__slab_free(s, slab, head, tail_obj, cnt, addr);
return;
}

if (USE_LOCKLESS_FAST_PATH()) {
freelist = READ_ONCE(c->freelist);

set_freepointer(s, tail_obj, freelist);

if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
freelist, tid,
head, next_tid(tid)))) {

note_cmpxchg_failure("slab_free", s, tid);
goto redo;
}
} else {
/* Update the free list under the local lock */
local_lock(&s->cpu_slab->lock);
c = this_cpu_ptr(s->cpu_slab);
if (unlikely(slab != c->slab)) {
local_unlock(&s->cpu_slab->lock);
goto redo;
}
tid = c->tid;
freelist = c->freelist;

set_freepointer(s, tail_obj, freelist);
c->freelist = head;
c->tid = next_tid(tid);

local_unlock(&s->cpu_slab->lock);
}
stat(s, FREE_FASTPATH);
}

__slab_free

將要 free 的 list 接在 slab 的 freelist 前面,判斷此 slab 應丟給 node 或 cpu。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
static void __slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)

{
void *prior;
int was_frozen;
struct slab new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long flags;

stat(s, FREE_SLOWPATH);

if (kfence_free(head))
return;

if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
free_to_partial_list(s, slab, head, tail, cnt, addr);
return;
}

do {
if (unlikely(n)) {
spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
prior = slab->freelist;
counters = slab->counters;
set_freepointer(s, tail, prior);
new.counters = counters;
was_frozen = new.frozen;
new.inuse -= cnt;
if ((!new.inuse || !prior) && !was_frozen) {
// 如果 cpu 有開啟 partial list 功能,且原來的 slab 無空閒 object,準備丟給 percpu
if (kmem_cache_has_cpu_partial(s) && !prior) {
new.frozen = 1;
} else { // 否則準備丟給 node
n = get_node(s, slab_nid(slab));
spin_lock_irqsave(&n->list_lock, flags);
}
}

} while (!cmpxchg_double_slab(s, slab,
prior, counters,
head, new.counters,
"__slab_free"));
// prior, counters 和 slab 內容一致,就賦予新的 head, counters

// 要放進 percpu
if (likely(!n)) {
// 原本就屬於 percpu 的話,就不用另外放 partial 一次
if (likely(was_frozen))
stat(s, FREE_FROZEN);
} else if (new.frozen) {
put_cpu_partial(s, slab, 1);
stat(s, CPU_PARTIAL_FREE);
}

return;
}

// slab 準備丟給 node,slab 裡面完全是空的,且無法放進 partial list,則 slab_empty 處理
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
goto slab_empty;

// 原本的 slab 裡面都是 inuse object,現在 free 了一些東西進去,則從 full 轉到 partial
// 進入 __slab_free 的一定是屬於 node 的 slab(否則走 fastpath)
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
remove_full(s, n, slab);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);
return;

slab_empty:
if (prior) {
/*
* Slab on the partial list.
*/
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
} else {
/* Slab must be on the full list */
remove_full(s, n, slab);
}

spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
discard_slab(s, slab);
}

參考資料