趣探 Mach-O:FishHook 解析
- Mach-O文件格式: 趣探 Mach-O:文件格式分析
- 動態鏈接相關知識: Mach-O 的動態鏈接過程 、 趣探 Mach-O:加載過程
- 對操作系統、編譯原理的理解:深入解析Mac OS X & iOS操作系統 、 程序員的自我修養
本文的闡述順序按照函數調用過程來進行
Fishhook 可以做什么
在此借用阿里百川的一張分析圖,可以比較清晰的了解 FishHook 發揮了哪些作用
阿里百川
FishHook 在這里是對動態鏈接庫起作用,修改對應的函數實現
對于動態鏈接庫里面的 C 函數,第一次調用的時候,我們會得到函數和實現地址的對應關系,函數的實現地址存放在一個叫 la_symbol_ptr 的地方,第二次調用的時候,直接通過 la_symbol_ptr 找到函數地址就可以,不再需要繁瑣的獲取函數地址的過程。
那么,上圖的含義就很明了了
在程序運行時,動態鏈接的 C 函數 dynamic(...) 地址記錄在 DATA segment 下的 la_symbol_ptr 中;初始時,程序只知道 dynamic 函數的符號名而不知道函數的實現地址;首次調用時,程序通過 TEXT segment 中的 stub_helper 取得綁定信息,通過 dyld_stub_binder 來更新 la_symbol_ptr 中的符號實現地址;這樣,再次調用時,就可以通過 la_symbol_ptr 直接找到 dynamic 函數的實現;如果我們需要替換 dynamic 函數的實現,只需要修改 __la_symbol_ptr 即可,也就是我們要談的 Fishhook
Fishhook 的實現
通過 fishhook的官方文檔 可以知道, Fishhook 的使用方法大致如下:
static int (*original_open)(const char *, int, ...);
int new_open(const char *path, int oflag, ...) {
va_listap = {0};
mode_tmode = 0;
if ((oflag & O_CREAT) != 0) {
// mode only applies to O_CREAT
va_start(ap, oflag);
mode = va_arg(ap, int);
va_end(ap);
printf("Calling real open('%s', %d, %d)\n", path, oflag, mode);
return original_open(path, oflag, mode);
} else {
printf("Calling real open('%s', %d)\n", path, oflag);
return original_open(path, oflag, mode);
}
}
int main(int argc, const char * argv[]) {
@autoreleasepool {
struct rebindingopen_rebinding = { "open", new_open, (void *)&original_open };
rebind_symbols((struct rebinding[1]){open_rebinding}, 1);
__unusedint fd = open(argv[0], O_RDONLY);
}
return 0;
}
先從函數的入口, rebind_symbols 開始談起吧, rebind_symbols 主要是使用 _dyld_register_func_for_add_image 來注冊回調函數,在加載動態庫的時候執行一些操作
int rebind_symbols(struct rebindingrebindings[], size_trebindings_nel) {
// 調用 prepend_rebindings 的函數,將整個 rebindings 數組添加到 _rebindings_head 這個私有鏈表的頭部
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retvalnext 的值來判斷是否為第一次調用
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i
對于 prepend_rebindings 的代碼如下
// 鏈表的數組結構
struct rebindings_entry {
struct rebinding *rebindings;
size_trebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebindingrebindings[],
size_tnel) {
struct rebindings_entry *new_entry = malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
new_entry->rebindings = malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
// 將 rebindings 插入到鏈表頭部
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
new_entry->rebindings_nel = nel;
new_entry->next = *rebindings_head;
*rebindings_head = new_entry;
return 0;
}
基礎結構解釋
Dl_info
/*
- Structure filled in by dladdr().
/
typedef struct dl_info {
const char dli_fname; / Pathname of shared object /
void dli_fbase; / Base address of shared object /
const char dli_sname; / Name of nearest symbol /
void dli_saddr; / Address of nearest symbol /
} Dl_info;
</code></pre>
我們一會經過 dladdr() 處理后的有效信息都會放進這個結構體中
- fname: 路徑名,例如
/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation
- dli_fbase: 共享對象的的起始地址(Base address of shared object,比如上面的 CoreFoundation)
- dli_saddr : 符號的地址
- dli_sname: 符號的名字,即下面的第四列的函數信息
Thread 0:
0 libsystem_kernel.dylib 0x11135810a __semwait_signal + 94474
1 libsystem_c.dylib 0x1110dab0b sleep + 518923
2 QYPerformanceMonitor 0x10dda4f1b -[ViewControllertableView:cellForRowAtIndexPath:] + 7963
3 UIKit 0x10ed4d4f4 -[UITableView_createPreparedCellForGlobalRow:withIndexPath:willDisplay:] + 1586420
LC_SYMTAB
struct symtab_command {
uint32_t cmd; /
LC_SYMTAB /
uint32_t cmdsize; / sizeof(struct symtab_command) /
uint32_t symoff; / symbol table offset /
uint32_t nsyms; / number of symbol table entries /
uint32_t stroff; / string table offset /
uint32_t strsize; / string table size in bytes /
};
</code></pre>
主要是提供符號表的偏移量,以及元素個數,還有字符串表的偏移和其長度。符號表在 Mach-O 目標文件中的地址可以通過 LC_SYMTAB 加載命令指定的 symoff 找到,對應的符號名稱在 stroff ,總共有 nsyms 條符號信息
LC_DYSYMTAB
這個數組結構有些復雜,有興趣的可以閱讀 loader.h 文件,內部標示了動態符號表的偏移量和符號個數
struct dysymtab_command {
uint32_tcmd; /
LC_DYSYMTAB /
uint32_tcmdsize; / sizeof(struct dysymtab_command) /
uint32_tindirectsymoff; / file offset to the indirect symbol table /
uint32_tnindirectsyms; / number of indirect symbol table entries /
.......
</code></pre>
_rebind_symbols_for_image
對于關鍵的代碼 _rebind_symbols_for_image 如下
static void rebind_symbols_for_image(struct rebindings_entry
rebindings,
const struct mach_header header,
intptr_tslide) {
Dl_infoinfo;
if (dladdr(header, &info) == 0) {
return;
}
// segment_command_64
segment_command_t cur_seg_cmd;
segment_command_t linkedit_segment = NULL;
// LC_SYMTAB
struct symtab_command symtab_cmd = NULL;
// LC_DYSYMTAB
struct dysymtab_command dysymtab_cmd = NULL;
// 下面是要尋找load_command,所以越過mach_header_t
uintptr_tcur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t )cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
//遍歷尋找LINKEDIT
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
//遍歷尋找lc_symtab
symtab_cmd = (struct symtab_command)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
//遍歷尋找lc_dysymtab
dysymtab_cmd = (struct dysymtab_command)cur_seg_cmd;
}
}
</code></pre>
為什么要尋找這個幾個 LoadCommand 的信息呢?就如上面介紹的
LINKEDIT 、 LC_DYSYMTAB 、 LC_SYMTAB 都提供了重要的信息。</p>
__LINKEDIT段 含有為動態鏈接庫使用的原始數據,比如符號,字符串,重定位表條目等等
閱讀下面的代碼之前,先來看一個計算公式
鏈接時程序的基址 = __LINKEDIT.VM_Address – __LINKEDIT.File_Offset + silde 的改變值
這里出現了一個 slide ,那么 slide 是啥呢?先看一下 ASLR
ASLR:Address space layout randomization ,將可執行程序隨機裝載到內存中,這里的隨機只是偏移,而不是打亂,具體做法就是通過內核將 Mach-O 的段“平移”某個隨機系數。 slide 正是 ASLR 引入的偏移
也就是說程序的基址等于 __LINKEDIT 的地址減去偏移量,然后再加上 ASLR 造成的偏移
// 鏈接時程序的基址
uintptr_tlinkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
// 符號表的地址 = 基址 + 符號表偏移量
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 字符串表的地址 = 基址 + 字符串表偏移量
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// 動態符號表地址 = 基址 + 動態符號表偏移量
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
符號表中的元素都是 nlist_t 結構體, nlist_t 中有很多學問,這里先看一下他的基礎結構
/*
- This is the symbol table entry structure for 32-bit architectures.
/
struct nlist {
union {
uint32_tn_strx; / index into the string table /
} n_un;
uint8_tn_type; / type flag, see below /
uint8_tn_sect; / section number or NO_SECT /
int16_tn_desc; / see /
uint32_tn_value; / value of this symbol (or stab offset) */
};
</code></pre>
然后再次遍歷 loadcommands ,尋找 __DATA 和 __DATA_CONST 的 section ,并對對 __nl_symbol_ptr 以及 __la_symbol_ptr 進行 rebind
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
//找到__DATA和__DATA_CONST的section,對__nl_symbol_ptr以及__la_symbol_ptr進行rebind
for (uint j = 0; j nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// sect為Section,symtab為符號表,strtab字符串表,indirect_symtab動態符號表(indirect symbol table)
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
perform_rebinding_with_section
nl_symbol_ptr 和 la_symbol_ptr section中的 reserved1 字段指明對應的 indirect symbol table 起始的 index ,
For the two relevant sections, the section headers (struct sections from ) provide an offset (in the reserved1 field) into what is known as the indirect symbol table. The indirect symbol table, which is located in the LINKEDIT segment of the binary, is just an array of indexes into the symbol table (also in LINKEDIT) whose order is identical to that of the pointers in the non-lazy and lazy symbol sections
So, given struct section nl_symbol_ptr, the corresponding index in the symbol table of the first address in that section is indirect_symbol_table[nl_symbol_ptr->reserved1]. The symbol table itself is an array of struct nlists (see ), and each nlist contains an index into the string table in LINKEDIT which where the actual symbol names are stored. So, for each pointer nl_symbol_ptr and __la_symbol_ptr, we are able to find the corresponding symbol and then the corresponding string to compare against the requested symbol names, and if there is a match, we replace the pointer in the section with the replacement.
結合英文,看下面的代碼就很容易理解
// sect為Section,symtab為符號表,strtab字符串表,indirect_symtab動態符號表(indirect symbol table)
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_tslide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
// `nl_symbol_ptr`和`la_symbol_ptr`section中的`reserved1`字段指明對應的`indirect symbol table`起始的index
//動態符號表中第一個解析的符號的起始地址
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i size / sizeof(void *); i++) {
// 符號表的index
uint32_tsymtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
//獲取每一個需要動態解析的符號在符號表中的偏移量
uint32_tstrtab_offset = symtab[symtab_index].n_un.n_strx;
//通過字符串表偏移量獲取符號對應的字符串(符號的名字)
char *symbol_name = strtab + strtab_offset;
上面的代碼其實就可以用官方的一個圖片很直觀的表示

走到這里是找到了字符串表對應的符號(字符串)
如何替換實現
遍歷 rebindings 數組,符號進行比較,相同的符號就進行實現替換,這里的代碼比較清晰,直接貼出
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j rebindings_nel; j++) {
if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
參考鏈接
- 動態修改 C 語言函數的實現
- mrh的Fihshook源碼分析
- fishhook
- 深入解析Mac OS X & iOS操作系統
- 程序員的自我修養
- 編譯體系漫游
來自:http://ios.jobbole.com/92918/