Week 6 - IPC & FUSE

本文章涵蓋基礎 kernel pwn 常用的 IPC 和 FUSE 使用方式。

IPC & Kernel pwn

Message Queue

  • int msgget(key_t key, int msgflg)
    返回一個 id 表示新增的 message queue,不同 process 若傳入相同 key,則能使用同一個 queue。傳入 IPC_PRIVATE 新建一個 queue。msgflg 表示權限等資訊。
  • int msgctl(int msqid, int cmd, struct msqid_ds *buf)
    對 queue 做各種操作,如 IPC_STAT 表示將狀態資訊存在 buf,IPC_RMID 刪除 queue。若 process 結束前不刪除 queue,將一直留在 kernel 裡。
  • int msgsnd(int msqid, const void *msgp, size_t msgsz, int msgflg)
    msgp 結構由使用者定義,第一個欄位必包含 mtype,其餘可自定義。若 message queue 滿了,此函數會被 block。
    1
    2
    3
    4
    struct msgbuf {
    long mtype;
    char mtext[1];
    }
  • ssize_t msgrcv(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg)
    msgtyp 可過濾訊息,msgtyp == 0 時不過濾,msgtyp < 0 回傳最小且小於 msgtyp 絕對值的訊息。
    msgflg 傳入 MSG_COPY 不會將訊息從 queue 裡面移除,但這時 msgtyp 改為表示 queue 的第幾個訊息。

調用 msgsnd 時,kernel 創建 msg_msg 存放一條訊息,msg_msg->m_list 將多條訊息串起來。

1
2
3
4
5
6
7
struct msg_msg {
struct list_head m_list;
long m_type;
size_t m_ts;
struct msg_msgseg *next;
void *security;
};

msg_queue 作為 list head。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
struct msg_queue {
struct kern_ipc_perm q_perm;
time64_t q_stime; /* last msgsnd time */
time64_t q_rtime; /* last msgrcv time */
time64_t q_ctime; /* last change time */
unsigned long q_cbytes; /* current number of bytes on queue */
unsigned long q_qnum; /* number of messages in queue */
unsigned long q_qbytes; /* max number of bytes on queue */
struct pid *q_lspid; /* pid of last msgsnd */
struct pid *q_lrpid; /* last receive pid */

struct list_head q_messages;
struct list_head q_receivers;
struct list_head q_senders;
} __randomize_layout;

msg_msg 分配大小依使用者資料不同。若小於一個 page,則大小為資料長度加 header size。若大於一個 page,使用多個 msg_msgseg 放置資料,用 next 串起來。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
static struct msg_msg *alloc_msg(size_t len)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
size_t alen;

alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
if (msg == NULL)
return NULL;

msg->next = NULL;
msg->security = NULL;

len -= alen;
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;

cond_resched();

alen = min(len, DATALEN_SEG);
seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
if (seg == NULL)
goto out_err;
*pseg = seg;
seg->next = NULL;
pseg = &seg->next;
len -= alen;
}

return msg;

out_err:
free_msg(msg);
return NULL;
}

攻擊

msg_msg 大小是不固定的,而讀取長度是由 m_ts 判斷,改大就能讀 msg_msg 下面的資料。
當 data size + header size 剛好是一個 page,kernel 會多連結一個 msg_msgseg,改 m_ts 也能洩漏 msg_msgseg 附近資料。
若能同時改 msg_msg->m_ts 和 msg_msg->next,則可做到任意地址讀,但讀取時是依賴 next 來找資料的,需注意下一塊的 next 要指向 NULL。
msg_msg->next 指向下一塊 msg_msgmsg_queue,將 msg_msg 並排在一起後越界讀,就能拿到 heap 位置。
由於此結構大小可變換,且可以做到越界讀、任意讀和任意寫,常被拿來做 kernel 攻擊。

Pipe

半雙工,管道有容量限制,容量滿時 write 會被 block。背後原理是創建共享文件,process 讀寫虛擬文件內容。
虛擬文件的 inode->i_pipe 儲存此 pipe 的 buffer,bufs 可拿來洩漏 heap 上的資料。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct pipe_inode_info {
struct mutex mutex;
wait_queue_head_t rd_wait, wr_wait;
unsigned int head;
unsigned int tail;
unsigned int max_usage;
unsigned int ring_size;
#ifdef CONFIG_WATCH_QUEUE
bool note_loss;
#endif
unsigned int nr_accounted;
unsigned int readers;
unsigned int writers;
unsigned int files;
unsigned int r_counter;
unsigned int w_counter;
struct page *tmp_page;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
struct user_struct *user;
#ifdef CONFIG_WATCH_QUEUE
struct watch_queue *watch_queue;
#endif
};

Shared Memory

已放在 Week 1。

Socket

sk_buff 表示一個在 kernel 正被處理的封包,head, data, tail, end 指向封包的 header, data 範圍。next, prev 將 sk_buff 串成 list,開頭是 sk_buff_head

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct sk_buff {
union {
struct {
/* These two members must be first. */
struct sk_buff *next;
struct sk_buff *prev;

// ...
};

// ...

/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;
sk_buff_data_t end;
unsigned char *head,*data;
unsigned int truesize;
refcount_t users;

#ifdef CONFIG_SKB_EXTENSIONS
/* only useable after checking ->active_extensions != 0 */
struct skb_ext *extensions;
#endif
};

sk_buff 從獨立的 cache 分配,但其封包內容是從一般 cache 分配的。封包內容後面(sk_buff->end 指向的地方)為以下 header:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct skb_shared_info {
__u8 flags;
__u8 meta_len;
__u8 nr_frags;
__u8 tx_flags;
unsigned short gso_size;
/* Warning: this field is not always filled in (UFO)! */
unsigned short gso_segs;
struct sk_buff *frag_list;
struct skb_shared_hwtstamps hwtstamps;
unsigned int gso_type;
u32 tskey;

/*
* Warning : all fields before dataref are cleared in __alloc_skb()
*/
atomic_t dataref;

/* Intermediate layers must ensure that destructor_arg
* remains valid until skb destructor */
void * destructor_arg;

/* must be last field, see pskb_expand_head() */
skb_frag_t frags[MAX_SKB_FRAGS];
};

skb_shared_info 大小是 320 bytes,也就是說最小會在 512 bytes 的 cache 裡。向 socket 送出/收取資料,即可分配/釋放 sk_buffskb_shared_infosocketpair 能夠方便地創建一組 socket。

packet_sock 用來在 OSI data link layer 收發資料,開啟 socket 加入參數 AF_PACKET 即可創建一個。儲存在 2048 的 cache。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
struct packet_sock {
/* struct sock has to be the first member of packet_sock */
struct sock sk;
struct packet_fanout *fanout;
union tpacket_stats_u stats;
struct packet_ring_buffer rx_ring;
struct packet_ring_buffer tx_ring;
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
unsigned int running; /* bind_lock must be held */
unsigned int auxdata:1, /* writer must hold sock lock */
origdev:1,
has_vnet_hdr:1,
tp_loss:1,
tp_tx_has_off:1;
int pressure;
int ifindex; /* bound device */
__be16 num;
struct packet_rollover *rollover;
struct packet_mclist *mclist;
atomic_t mapped;
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
unsigned int tp_tstamp;
struct completion skb_completion;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);
struct packet_type prot_hook ____cacheline_aligned_in_smp;
atomic_t tp_drops ____cacheline_aligned_in_smp;
};

FUSE

使用 FUSE 做攻擊時一定要實作 getattr,回傳 stat 結構,表示一個檔案/目錄的資訊。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct stat {
dev_t st_dev; /* ID of device containing file */
ino_t st_ino; /* Inode number */
mode_t st_mode; /* File type and mode */
nlink_t st_nlink; /* Number of hard links */
uid_t st_uid; /* User ID of owner */
gid_t st_gid; /* Group ID of owner */
dev_t st_rdev; /* Device ID (if special file) */
off_t st_size; /* Total size, in bytes */
blksize_t st_blksize; /* Block size for filesystem I/O */
blkcnt_t st_blocks; /* Number of 512 B blocks allocated */

/* Since POSIX.1-2008, this structure supports nanosecond
precision for the following timestamp fields.
For the details before POSIX.1-2008, see VERSIONS. */

struct timespec st_atim; /* Time of last access */
struct timespec st_mtim; /* Time of last modification */
struct timespec st_ctim; /* Time of last status change */

#define st_atime st_atim.tv_sec /* Backward compatibility */
#define st_mtime st_mtim.tv_sec
#define st_ctime st_ctim.tv_sec
};

而 st_mode 又包含 rwx 權限、檔案類型,以這些 flag 表示。

1
2
3
4
5
6
7
8
9
10
11
#define S_IFMT  00170000 /* mask */
#define S_IFSOCK 0140000 /* socket */
#define S_IFLNK 0120000 /* symlink */
#define S_IFREG 0100000 /* normal file */
#define S_IFBLK 0060000 /* symlink */
#define S_IFDIR 0040000 /* directory */
#define S_IFCHR 0020000 /* char device */
#define S_IFIFO 0010000 /* fifo */
#define S_ISUID 0004000 /* suid */
#define S_ISGID 0002000 /* sgid */
#define S_ISVTX 0001000 /* sticky, only root, owner can delete and rename */
  • init(struct fuse_conn_info *conn)
    FUSE 運行時用來初始化,可以不用定義。
  • destroy(void* private_data)
    FUSE 結束時運行,private_data 來自 init 的回傳值。
  • getattr(const char* path, struct stat* stbuf)
    回傳 file attributes 至 stbuf
  • readlink(const char* path, char* buf, size_t size)
    回傳 link 指向的路徑至 buf。
  • open(const char* path, struct fuse_file_info* fi)
    打開一個檔案,fi 紀錄 open file 的設定,例如 direct_io, flush, nonseekable 等,fi->fh 紀錄 file 的 handle number,但也可以不指定。
  • read(const char* path, char* buf, size_t size, off_t offset, struct fuse_file_info* fi)
    讀資料到 buf。
  • write(const char* path, char* buf, size_t size, off_t offset, struct fuse_file_info* fi)
  • setxattr(const char* path, const char* name, const char* value, size_t size, int flags)
  • ioctl(const char* path, int cmd, void* arg, struct fuse_file_info* fi, unsigned int flags, void* data)

使用 fuse 時,可直接呼叫 fuse_main

1
2
3
4
5
6
7
8
9
10
11
12
13
static const struct fuse_operations fioc_oper = {
.getattr = fioc_getattr,
.readdir = fioc_readdir,
.truncate = fioc_truncate,
.open = fioc_open,
.read = fioc_read,
.write = fioc_write,
.ioctl = fioc_ioctl,
};

int main(int argc, char *argv[]) {
return fuse_main(argc, argv, &fioc_oper, NULL);
}

不過在實際用 FUSE 攻擊時,可能同時執行主程式並把 FUSE 丟到另一個 thread。這時不太適合用 fuse_main,因為它預設跑在 main thread。
我們可以用 fuse_mount 掛載到一個路徑,fuse_set_signal_handlers 使 FUSE 監聽 signal,fuse_new, fuse_loop_mt 執行。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void *fuse_start(void *_arg) {
struct fuse_args args = FUSE_ARGS_INIT(0, NULL);
struct fuse_chan *chan;
struct fuse *fuse;

if (mkdir("/tmp/test", 0777))
fatal("mkdir(\"/tmp/test\")");

if (!(chan = fuse_mount("/tmp/test", &args)))
fatal("fuse_mount");

if (!(fuse = fuse_new(chan, &args, &fops, sizeof(fops), NULL))) {
fuse_unmount("/tmp/test", chan);
fatal("fuse_new");
}

fuse_set_signal_handlers(fuse_get_session(fuse));

if (sched_setaffinity(0, sizeof(cpu_set_t), &cpu0))
fatal("sched_setaffinity");

puts("[+] Start fuse loop");
fuse_loop_mt(fuse);

fuse_unmount("/tmp/test", chan);
}

參考資料