1*2810ac1bSKiyoung Kim// Program explore is evolved from the code discussed in more depth 2*2810ac1bSKiyoung Kim// here: 3*2810ac1bSKiyoung Kim// 4*2810ac1bSKiyoung Kim// https://github.com/golang/go/issues/3405 5*2810ac1bSKiyoung Kim// 6*2810ac1bSKiyoung Kim// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only 7*2810ac1bSKiyoung Kim// applies to the calling thread, since 8*2810ac1bSKiyoung Kim// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03 9*2810ac1bSKiyoung Kim// the seccomp filter application forces the setting to be mirrored on 10*2810ac1bSKiyoung Kim// all the threads of a process. 11*2810ac1bSKiyoung Kim// 12*2810ac1bSKiyoung Kim// Based on the command line options, we can manipulate the program to 13*2810ac1bSKiyoung Kim// behave in various ways. Example command lines: 14*2810ac1bSKiyoung Kim// 15*2810ac1bSKiyoung Kim// sudo ./explore 16*2810ac1bSKiyoung Kim// sudo ./explore --kill=false 17*2810ac1bSKiyoung Kim// sudo ./explore --kill=false --errno=0 18*2810ac1bSKiyoung Kim// 19*2810ac1bSKiyoung Kim// Supported Go toolchains are after go1.10. Those prior to go1.15 20*2810ac1bSKiyoung Kim// require this environment variable to be set to build successfully: 21*2810ac1bSKiyoung Kim// 22*2810ac1bSKiyoung Kim// export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" 23*2810ac1bSKiyoung Kim// 24*2810ac1bSKiyoung Kim// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too, 25*2810ac1bSKiyoung Kim// demonstrating native nocgo support for seccomp features. 26*2810ac1bSKiyoung Kimpackage main 27*2810ac1bSKiyoung Kim 28*2810ac1bSKiyoung Kimimport ( 29*2810ac1bSKiyoung Kim "flag" 30*2810ac1bSKiyoung Kim "fmt" 31*2810ac1bSKiyoung Kim "log" 32*2810ac1bSKiyoung Kim "runtime" 33*2810ac1bSKiyoung Kim "syscall" 34*2810ac1bSKiyoung Kim "time" 35*2810ac1bSKiyoung Kim "unsafe" 36*2810ac1bSKiyoung Kim 37*2810ac1bSKiyoung Kim "kernel.org/pub/linux/libs/security/libcap/psx" 38*2810ac1bSKiyoung Kim) 39*2810ac1bSKiyoung Kim 40*2810ac1bSKiyoung Kimvar ( 41*2810ac1bSKiyoung Kim withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall") 42*2810ac1bSKiyoung Kim delays = flag.Bool("delays", false, "use this to pause the program at various places") 43*2810ac1bSKiyoung Kim kill = flag.Bool("kill", true, "kill the process if setuid attempted") 44*2810ac1bSKiyoung Kim errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno") 45*2810ac1bSKiyoung Kim) 46*2810ac1bSKiyoung Kim 47*2810ac1bSKiyoung Kimconst ( 48*2810ac1bSKiyoung Kim prSetNoNewPrivs = 38 49*2810ac1bSKiyoung Kim 50*2810ac1bSKiyoung Kim sysSeccomp = 317 // x86_64 syscall number 51*2810ac1bSKiyoung Kim seccompSetModeFilter = 1 // uses user-supplied filter. 52*2810ac1bSKiyoung Kim seccompFilterFlagTsync = (1 << 0) // mirror filtering on all threads. 53*2810ac1bSKiyoung Kim seccompRetErrno = 0x00050000 // returns an errno 54*2810ac1bSKiyoung Kim seccompRetData = 0x0000ffff // mask for RET data payload (ex. errno) 55*2810ac1bSKiyoung Kim seccompRetKillProcess = 0x80000000 // kill the whole process immediately 56*2810ac1bSKiyoung Kim seccompRetTrap = 0x00030000 // disallow and force a SIGSYS 57*2810ac1bSKiyoung Kim seccompRetAllow = 0x7fff0000 58*2810ac1bSKiyoung Kim 59*2810ac1bSKiyoung Kim bpfLd = 0x00 60*2810ac1bSKiyoung Kim bpfJmp = 0x05 61*2810ac1bSKiyoung Kim bpfRet = 0x06 62*2810ac1bSKiyoung Kim 63*2810ac1bSKiyoung Kim bpfW = 0x00 64*2810ac1bSKiyoung Kim 65*2810ac1bSKiyoung Kim bpfAbs = 0x20 66*2810ac1bSKiyoung Kim bpfJeq = 0x10 67*2810ac1bSKiyoung Kim 68*2810ac1bSKiyoung Kim bpfK = 0x00 69*2810ac1bSKiyoung Kim 70*2810ac1bSKiyoung Kim auditArchX86_64 = 3221225534 // HACK: I don't understand this value 71*2810ac1bSKiyoung Kim archNr = auditArchX86_64 72*2810ac1bSKiyoung Kim 73*2810ac1bSKiyoung Kim syscallNr = 0 74*2810ac1bSKiyoung Kim) 75*2810ac1bSKiyoung Kim 76*2810ac1bSKiyoung Kim// SockFilter is a single filter block. 77*2810ac1bSKiyoung Kimtype SockFilter struct { 78*2810ac1bSKiyoung Kim // Code is the filter code instruction. 79*2810ac1bSKiyoung Kim Code uint16 80*2810ac1bSKiyoung Kim // Jt is the target for a true result from the code execution. 81*2810ac1bSKiyoung Kim Jt uint8 82*2810ac1bSKiyoung Kim // Jf is the target for a false result from the code execution. 83*2810ac1bSKiyoung Kim Jf uint8 84*2810ac1bSKiyoung Kim // K is a generic multiuse field 85*2810ac1bSKiyoung Kim K uint32 86*2810ac1bSKiyoung Kim} 87*2810ac1bSKiyoung Kim 88*2810ac1bSKiyoung Kim// SockFProg is a 89*2810ac1bSKiyoung Kimtype SockFProg struct { 90*2810ac1bSKiyoung Kim // Len is the number of contiguous SockFilter blocks that can 91*2810ac1bSKiyoung Kim // be found at *Filter. 92*2810ac1bSKiyoung Kim Len uint16 93*2810ac1bSKiyoung Kim // Filter is the address of the first SockFilter block of a 94*2810ac1bSKiyoung Kim // program sequence. 95*2810ac1bSKiyoung Kim Filter *SockFilter 96*2810ac1bSKiyoung Kim} 97*2810ac1bSKiyoung Kim 98*2810ac1bSKiyoung Kim// SockFilterSlice is a subprogram filter. 99*2810ac1bSKiyoung Kimtype SockFilterSlice []SockFilter 100*2810ac1bSKiyoung Kim 101*2810ac1bSKiyoung Kimfunc bpfStmt(code uint16, k uint32) SockFilter { 102*2810ac1bSKiyoung Kim return SockFilter{code, 0, 0, k} 103*2810ac1bSKiyoung Kim} 104*2810ac1bSKiyoung Kim 105*2810ac1bSKiyoung Kimfunc bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter { 106*2810ac1bSKiyoung Kim return SockFilter{code, jt, jf, k} 107*2810ac1bSKiyoung Kim} 108*2810ac1bSKiyoung Kim 109*2810ac1bSKiyoung Kimfunc validateArchitecture() []SockFilter { 110*2810ac1bSKiyoung Kim return []SockFilter{ 111*2810ac1bSKiyoung Kim bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4. 112*2810ac1bSKiyoung Kim bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0), 113*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetKillProcess), 114*2810ac1bSKiyoung Kim } 115*2810ac1bSKiyoung Kim} 116*2810ac1bSKiyoung Kim 117*2810ac1bSKiyoung Kimfunc examineSyscall() []SockFilter { 118*2810ac1bSKiyoung Kim return []SockFilter{ 119*2810ac1bSKiyoung Kim bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr), 120*2810ac1bSKiyoung Kim } 121*2810ac1bSKiyoung Kim} 122*2810ac1bSKiyoung Kim 123*2810ac1bSKiyoung Kimfunc allowSyscall(syscallNum uint32) []SockFilter { 124*2810ac1bSKiyoung Kim return []SockFilter{ 125*2810ac1bSKiyoung Kim bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), 126*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetAllow), 127*2810ac1bSKiyoung Kim } 128*2810ac1bSKiyoung Kim} 129*2810ac1bSKiyoung Kim 130*2810ac1bSKiyoung Kimfunc disallowSyscall(syscallNum, errno uint32) []SockFilter { 131*2810ac1bSKiyoung Kim return []SockFilter{ 132*2810ac1bSKiyoung Kim bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), 133*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)), 134*2810ac1bSKiyoung Kim } 135*2810ac1bSKiyoung Kim} 136*2810ac1bSKiyoung Kim 137*2810ac1bSKiyoung Kimfunc killProcess() []SockFilter { 138*2810ac1bSKiyoung Kim return []SockFilter{ 139*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetKillProcess), 140*2810ac1bSKiyoung Kim } 141*2810ac1bSKiyoung Kim} 142*2810ac1bSKiyoung Kim 143*2810ac1bSKiyoung Kimfunc notifyProcessAndDie() []SockFilter { 144*2810ac1bSKiyoung Kim return []SockFilter{ 145*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetTrap), 146*2810ac1bSKiyoung Kim } 147*2810ac1bSKiyoung Kim} 148*2810ac1bSKiyoung Kim 149*2810ac1bSKiyoung Kimfunc trapOnSyscall(syscallNum uint32) []SockFilter { 150*2810ac1bSKiyoung Kim return []SockFilter{ 151*2810ac1bSKiyoung Kim bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), 152*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetTrap), 153*2810ac1bSKiyoung Kim } 154*2810ac1bSKiyoung Kim} 155*2810ac1bSKiyoung Kim 156*2810ac1bSKiyoung Kimfunc allGood() []SockFilter { 157*2810ac1bSKiyoung Kim return []SockFilter{ 158*2810ac1bSKiyoung Kim bpfStmt(bpfRet+bpfK, seccompRetAllow), 159*2810ac1bSKiyoung Kim } 160*2810ac1bSKiyoung Kim} 161*2810ac1bSKiyoung Kim 162*2810ac1bSKiyoung Kim// prctl executes the prctl - unless the --psx commandline argument is 163*2810ac1bSKiyoung Kim// used, this is on a single thread. 164*2810ac1bSKiyoung Kim//go:uintptrescapes 165*2810ac1bSKiyoung Kimfunc prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error { 166*2810ac1bSKiyoung Kim var e syscall.Errno 167*2810ac1bSKiyoung Kim if *withPSX { 168*2810ac1bSKiyoung Kim _, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) 169*2810ac1bSKiyoung Kim } else { 170*2810ac1bSKiyoung Kim _, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) 171*2810ac1bSKiyoung Kim } 172*2810ac1bSKiyoung Kim if e != 0 { 173*2810ac1bSKiyoung Kim return e 174*2810ac1bSKiyoung Kim } 175*2810ac1bSKiyoung Kim if *delays { 176*2810ac1bSKiyoung Kim fmt.Println("prctl'd - check now") 177*2810ac1bSKiyoung Kim time.Sleep(1 * time.Minute) 178*2810ac1bSKiyoung Kim } 179*2810ac1bSKiyoung Kim return nil 180*2810ac1bSKiyoung Kim} 181*2810ac1bSKiyoung Kim 182*2810ac1bSKiyoung Kim// SeccompSetModeFilter is our wrapper for performing our seccomp system call. 183*2810ac1bSKiyoung Kim//go:uintptrescapes 184*2810ac1bSKiyoung Kimfunc SeccompSetModeFilter(prog *SockFProg) error { 185*2810ac1bSKiyoung Kim if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 { 186*2810ac1bSKiyoung Kim return e 187*2810ac1bSKiyoung Kim } 188*2810ac1bSKiyoung Kim return nil 189*2810ac1bSKiyoung Kim} 190*2810ac1bSKiyoung Kim 191*2810ac1bSKiyoung Kimvar empty func() 192*2810ac1bSKiyoung Kim 193*2810ac1bSKiyoung Kimfunc lockProcessThread(pick bool) { 194*2810ac1bSKiyoung Kim // Make sure we are 195*2810ac1bSKiyoung Kim pid := uintptr(syscall.Getpid()) 196*2810ac1bSKiyoung Kim runtime.LockOSThread() 197*2810ac1bSKiyoung Kim for { 198*2810ac1bSKiyoung Kim tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0) 199*2810ac1bSKiyoung Kim if (tid == pid) == pick { 200*2810ac1bSKiyoung Kim fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick) 201*2810ac1bSKiyoung Kim break 202*2810ac1bSKiyoung Kim } 203*2810ac1bSKiyoung Kim runtime.UnlockOSThread() 204*2810ac1bSKiyoung Kim go func() { 205*2810ac1bSKiyoung Kim time.Sleep(1 * time.Microsecond) 206*2810ac1bSKiyoung Kim }() 207*2810ac1bSKiyoung Kim runtime.Gosched() 208*2810ac1bSKiyoung Kim runtime.LockOSThread() 209*2810ac1bSKiyoung Kim } 210*2810ac1bSKiyoung Kim} 211*2810ac1bSKiyoung Kim 212*2810ac1bSKiyoung Kim// applyPolicy uploads the program sequence. 213*2810ac1bSKiyoung Kimfunc applyPolicy(prog *SockFProg) { 214*2810ac1bSKiyoung Kim // Without PSX we can't guarantee the thread we execute the 215*2810ac1bSKiyoung Kim // seccomp call on will be the same one that we disabled new 216*2810ac1bSKiyoung Kim // privs on. With PSX, the disabling of new privs is mirrored 217*2810ac1bSKiyoung Kim // on all threads. 218*2810ac1bSKiyoung Kim if !*withPSX { 219*2810ac1bSKiyoung Kim lockProcessThread(false) 220*2810ac1bSKiyoung Kim defer runtime.UnlockOSThread() 221*2810ac1bSKiyoung Kim } 222*2810ac1bSKiyoung Kim 223*2810ac1bSKiyoung Kim // This is required to load a filter without privilege. 224*2810ac1bSKiyoung Kim if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil { 225*2810ac1bSKiyoung Kim log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err) 226*2810ac1bSKiyoung Kim } 227*2810ac1bSKiyoung Kim 228*2810ac1bSKiyoung Kim fmt.Println("Applying syscall policy...") 229*2810ac1bSKiyoung Kim if err := SeccompSetModeFilter(prog); err != nil { 230*2810ac1bSKiyoung Kim log.Fatalf("seccomp_set_mode_filter: %v", err) 231*2810ac1bSKiyoung Kim } 232*2810ac1bSKiyoung Kim fmt.Println("...Policy applied") 233*2810ac1bSKiyoung Kim} 234*2810ac1bSKiyoung Kim 235*2810ac1bSKiyoung Kimfunc main() { 236*2810ac1bSKiyoung Kim flag.Parse() 237*2810ac1bSKiyoung Kim 238*2810ac1bSKiyoung Kim if *delays { 239*2810ac1bSKiyoung Kim fmt.Println("check first", syscall.Getpid()) 240*2810ac1bSKiyoung Kim time.Sleep(60 * time.Second) 241*2810ac1bSKiyoung Kim } 242*2810ac1bSKiyoung Kim 243*2810ac1bSKiyoung Kim var filter []SockFilter 244*2810ac1bSKiyoung Kim filter = append(filter, validateArchitecture()...) 245*2810ac1bSKiyoung Kim 246*2810ac1bSKiyoung Kim // Grab the system call number. 247*2810ac1bSKiyoung Kim filter = append(filter, examineSyscall()...) 248*2810ac1bSKiyoung Kim 249*2810ac1bSKiyoung Kim // List disallowed syscalls. 250*2810ac1bSKiyoung Kim for _, x := range []uint32{ 251*2810ac1bSKiyoung Kim syscall.SYS_SETUID, 252*2810ac1bSKiyoung Kim } { 253*2810ac1bSKiyoung Kim if *kill { 254*2810ac1bSKiyoung Kim filter = append(filter, trapOnSyscall(x)...) 255*2810ac1bSKiyoung Kim } else { 256*2810ac1bSKiyoung Kim filter = append(filter, disallowSyscall(x, uint32(*errno))...) 257*2810ac1bSKiyoung Kim } 258*2810ac1bSKiyoung Kim } 259*2810ac1bSKiyoung Kim 260*2810ac1bSKiyoung Kim filter = append(filter, allGood()...) 261*2810ac1bSKiyoung Kim 262*2810ac1bSKiyoung Kim prog := &SockFProg{ 263*2810ac1bSKiyoung Kim Len: uint16(len(filter)), 264*2810ac1bSKiyoung Kim Filter: &filter[0], 265*2810ac1bSKiyoung Kim } 266*2810ac1bSKiyoung Kim 267*2810ac1bSKiyoung Kim applyPolicy(prog) 268*2810ac1bSKiyoung Kim 269*2810ac1bSKiyoung Kim // Ensure we are running on the TID=PID. 270*2810ac1bSKiyoung Kim lockProcessThread(true) 271*2810ac1bSKiyoung Kim 272*2810ac1bSKiyoung Kim log.Print("Now it is time to try to run something privileged...") 273*2810ac1bSKiyoung Kim if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 { 274*2810ac1bSKiyoung Kim log.Fatalf("setuid failed with an error: %v", e) 275*2810ac1bSKiyoung Kim } 276*2810ac1bSKiyoung Kim log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1") 277*2810ac1bSKiyoung Kim} 278