Fuzz前置技能-unicorn模拟执行
Unicorn是一个轻量级, 多平台, 多架构的CPU模拟器框架,基于qemu开发,它可以代替CPU模拟代码的执行,常用于恶意代码分析,Fuzz等,该项目被用于Radare2逆向分析框架,GEF(gdb的pwn分析插件),Pwndbg,Angr符号执行框架等多个著名项目。
开发准备
官网
编译安装
安装提供了两种方式:
-
从源安装
-
OSX(homebrew)
brew install unicorn # homebrew安装好后需要设置library的全局变量 export DYLD_LIBRARY_PATH=/usr/local/opt/unicorn/lib/:$DYLD_LIBRARY_PATH # 然后安装python库 pip install unicorn
-
Linux参考官方安装手册
-
Winodws参考官方安装手册
-
-
从源码构建
-
下载源码包:
# 构建 ./make.sh sudo ./make.sh install # 安装python 库 pip install unicorn # 升级unicorn库 pip install unicorn --upgrade
其他交叉编译(如IOS、ARM、Android)参考官方文档
-
快速入门
安装好unicorn后,可以用下面的测试用例来检测unicorn的功能是否可用
Python
-
以下为Python调用unicorn框架测试代码
from __future__ import print_function from unicorn import * from unicorn.x86_const import * # 要模拟执行的指令 X86_CODE32 = b"\x41\x4a" # INC ecx; DEC edx # 模拟执行的起始地址 ADDRESS = 0x1000000 print("Emulate i386 code") try: # 初始化模拟X86-32模式 mu = Uc(UC_ARCH_X86, UC_MODE_32) # 为模拟执行申请2MB的空间 mu.mem_map(ADDRESS, 2 * 1024 * 1024) # 向内存写入执行的指令 mu.mem_write(ADDRESS, X86_CODE32) # 初始化寄存器的值,方便执行后观察结果 mu.reg_write(UC_X86_REG_ECX, 0x1234) mu.reg_write(UC_X86_REG_EDX, 0x7890) # 在无限时间和无限指令中模拟代码 mu.emu_start(ADDRESS, ADDRESS + len(X86_CODE32)) # 现在打印执行后寄存器中的结果 print("Emulation done. Below is the CPU context") r_ecx = mu.reg_read(UC_X86_REG_ECX) r_edx = mu.reg_read(UC_X86_REG_EDX) print(">>> ECX = 0x%x" %r_ecx) print(">>> EDX = 0x%x" %r_edx) except UcError as e: print("ERROR: %s" % e)
最终输出结果:
Emulate i386 code Emulation done. Below is the CPU context >>> ECX = 0x1235 >>> EDX = 0x788f
看到结果,ECX被加了1,并且EDX被减去1,表示python可以成功调用unicorn
C
-
以下为C调用unicorn框架测试代码
#include<stdio.h> #include<unicorn/unicorn.h> #include<string.h> #define ADDRESS 0x1000000 #define X86_CODE32 "\x41\x4a\x66\x0f\xef\xc1" // INC ecx; DEC edx; PXOR xmm0, xmm1 // 在终端中输出起始地址和硬编码大小 static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); } // hook 回调函数,用于监视程序运行时的变化 static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { int eflags; printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); uc_reg_read(uc, UC_X86_REG_EFLAGS, &eflags); //获取寄存器值放入eflags变量中 printf(">>> --- EFLAGS is 0x%x\n", eflags); // Uncomment below code to stop the emulation using uc_emu_stop() // if (address == 0x1000009) // uc_emu_stop(uc); } static void test_i386(void) { uc_engine *uc; uc_err err; uint32_t tmp; uc_hook trace1, trace2; int r_ecx = 0x1234; // ECX 寄存器 int r_edx = 0x7890; // EDX 寄存器 // XMM0 、 XMM1 寄存器, 数组分别为低64位和高64位 uint64_t r_xmm0[2] = {0x08090a0b0c0d0e0f, 0x0001020304050607}; uint64_t r_xmm1[2] = {0x8090a0b0c0d0e0f0, 0x0010203040506070}; printf("Emulate i386 code\n"); // 初始化x86环境 err = uc_open(UC_ARCH_X86, UC_MODE_32, &uc); if (err) { printf("Failed on uc_open() with error returned: %u\n", err); return; } // 为模拟执行代码申请 2MB 内存 uc_mem_map(uc, ADDRESS, 2 * 1024 * 1024, UC_PROT_ALL); // 向目标地址写入opcode if (uc_mem_write(uc, ADDRESS, X86_CODE32, sizeof(X86_CODE32) - 1)) { printf("Failed to write emulation code to memory, quit!\n"); return; } // 初始化寄存器ECX、EDX、XMM0、XMM1 uc_reg_write(uc, UC_X86_REG_ECX, &r_ecx); uc_reg_write(uc, UC_X86_REG_EDX, &r_edx); uc_reg_write(uc, UC_X86_REG_XMM0, &r_xmm0); uc_reg_write(uc, UC_X86_REG_XMM1, &r_xmm1); // 在函数内插桩,成功时会调用回调函数 uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0); // 每当代码执行时调用回调函数 uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, 1, 0); // 模拟执行 err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(X86_CODE32) - 1, 0, 0); if (err) { printf("Failed on uc_emu_start() with error returned %u: %s\n", err, uc_strerror(err)); } // 最后输出一些模拟执行完成后寄存器的值 printf(">>> Emulation done. Below is the CPU context\n"); uc_reg_read(uc, UC_X86_REG_ECX, &r_ecx); uc_reg_read(uc, UC_X86_REG_EDX, &r_edx); uc_reg_read(uc, UC_X86_REG_XMM0, &r_xmm0); printf(">>> ECX = 0x%x\n", r_ecx); printf(">>> EDX = 0x%x\n", r_edx); printf(">>> XMM0 = 0x%.16"PRIx64"%.16"PRIx64"\n", r_xmm0[1], r_xmm0[0]); // 读取内存中的内容 if (!uc_mem_read(uc, ADDRESS, &tmp, sizeof(tmp))) printf(">>> Read 4 bytes from [0x%x] = 0x%x\n", ADDRESS, tmp); else printf(">>> Failed to read 4 bytes from [0x%x]\n", ADDRESS); // 最后需要关闭,否则会导致内存泄露 uc_close(uc); } int main(){ test_i386(); return 0; }
编辑Makefile进行编译:
LDFLAGS += $(shell pkg-config --libs glib-2.0) -lpthread -lm -lunicorn all: test2 %: %.c $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@
上面的Makefile等同于命令:
cc test2.c -L/usr/local/Cellar/glib/2.70.1/lib -L/usr/local/opt/gettext/lib -lglib-2.0 -lintl -lpthread -lm -lunicorn -o test2
运行结果如下:
Emulate i386 code >>> Tracing basic block at 0x1000000, block size = 0x6 >>> Tracing instruction at 0x1000000, instruction size = 0x1 >>> --- EFLAGS is 0x0 >>> Tracing instruction at 0x1000001, instruction size = 0x1 >>> --- EFLAGS is 0x4 >>> Tracing instruction at 0x1000002, instruction size = 0x4 >>> --- EFLAGS is 0x10 >>> Emulation done. Below is the CPU context >>> ECX = 0x1235 >>> EDX = 0x788f >>> XMM0 = 0x00112233445566778899aabbccddeeff >>> Read 4 bytes from [0x1000000] = 0xf664a41
Go
-
Go语言需要安装packge
go get github.com/unicorn-engine/unicorn/bindings/go/unicorn
-
示例代码如下
package main import ( "fmt" "github.com/unicorn-engine/unicorn/bindings/go/unicorn" ) func main() { un,_:=unicorn.NewUnicorn(unicorn.ARCH_X86,unicorn.MODE_32) code := []byte{184,210,4,0,0} // mov eax,1234 un.MemMap(0x1000,0x1000) un.MemWrite(0x1000,code) err:=un.Start(0x1000,0x1000+uint64(len(code))) if err!=nil{ panic(err) } eax,_:=un.RegRead(unicorn.X86_REG_EAX) fmt.Println(eax) }
最终输出结果:1234。注意这里是十进制
其他unicorn示例
还有其他更多的python示例,也包含其他编程语言的示例,其中Go、Java、ruby、rust、pascal等的示例代码,可以参考链接:
unicorn/bindings at master · unicorn-engine/unicorn
API参考
C语言函数定义在unicorn.h头文件中,Python函数定义在unicorn_const.py和unicorn.py中,函数和定义简短,用时再看也来得及。
总结
简单来说,可以把unicorn理解成一个CPU,把需要执行的代码片段和内存空间布局好,unicorn会执行代码片段,并返回结果。unicorn可以模拟执行多种架构的指令,比如ARM、x86、MIPS等,并且有多种语言的API接口,其中我比较喜欢用的是Python、C和Go,可以根据自己喜欢的语言基于unicorn进行开发,写出自己的一些工具,比如fuzzer、恶意代码分析工具、二进制插桩、加密算法分析等。