1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build dragonfly || netbsd || (openbsd && mips64)
6
7package syscall
8
9import (
10	"runtime"
11	"unsafe"
12)
13
14type SysProcAttr struct {
15	Chroot     string      // Chroot.
16	Credential *Credential // Credential.
17	Ptrace     bool        // Enable tracing.
18	Setsid     bool        // Create session.
19	// Setpgid sets the process group ID of the child to Pgid,
20	// or, if Pgid == 0, to the new child's process ID.
21	Setpgid bool
22	// Setctty sets the controlling terminal of the child to
23	// file descriptor Ctty. Ctty must be a descriptor number
24	// in the child process: an index into ProcAttr.Files.
25	// This is only meaningful if Setsid is true.
26	Setctty bool
27	Noctty  bool // Detach fd 0 from controlling terminal
28	Ctty    int  // Controlling TTY fd
29	// Foreground places the child process group in the foreground.
30	// This implies Setpgid. The Ctty field must be set to
31	// the descriptor of the controlling TTY.
32	// Unlike Setctty, in this case Ctty must be a descriptor
33	// number in the parent process.
34	Foreground bool
35	Pgid       int // Child's process group ID if Setpgid.
36}
37
38// Implemented in runtime package.
39func runtime_BeforeFork()
40func runtime_AfterFork()
41func runtime_AfterForkInChild()
42
43// Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child.
44// If a dup or exec fails, write the errno error to pipe.
45// (Pipe is close-on-exec so if exec succeeds, it will be closed.)
46// In the child, this function must not acquire any locks, because
47// they might have been locked at the time of the fork. This means
48// no rescheduling, no malloc calls, and no new stack segments.
49// For the same reason compiler does not race instrument it.
50// The calls to RawSyscall are okay because they are assembly
51// functions that do not grow the stack.
52//
53//go:norace
54func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
55	// Declare all variables at top in case any
56	// declarations require heap allocation (e.g., err1).
57	var (
58		r1              uintptr
59		err1            Errno
60		nextfd          int
61		i               int
62		pgrp            _C_int
63		cred            *Credential
64		ngroups, groups uintptr
65	)
66
67	rlim := origRlimitNofile.Load()
68
69	// guard against side effects of shuffling fds below.
70	// Make sure that nextfd is beyond any currently open files so
71	// that we can't run the risk of overwriting any of them.
72	fd := make([]int, len(attr.Files))
73	nextfd = len(attr.Files)
74	for i, ufd := range attr.Files {
75		if nextfd < int(ufd) {
76			nextfd = int(ufd)
77		}
78		fd[i] = int(ufd)
79	}
80	nextfd++
81
82	// About to call fork.
83	// No more allocation or calls of non-assembly functions.
84	runtime_BeforeFork()
85	r1, _, err1 = RawSyscall(SYS_FORK, 0, 0, 0)
86	if err1 != 0 {
87		runtime_AfterFork()
88		return 0, err1
89	}
90
91	if r1 != 0 {
92		// parent; return PID
93		runtime_AfterFork()
94		return int(r1), 0
95	}
96
97	// Fork succeeded, now in child.
98
99	// Enable tracing if requested.
100	if sys.Ptrace {
101		_, _, err1 = RawSyscall(SYS_PTRACE, uintptr(PTRACE_TRACEME), 0, 0)
102		if err1 != 0 {
103			goto childerror
104		}
105	}
106
107	// Session ID
108	if sys.Setsid {
109		_, _, err1 = RawSyscall(SYS_SETSID, 0, 0, 0)
110		if err1 != 0 {
111			goto childerror
112		}
113	}
114
115	// Set process group
116	if sys.Setpgid || sys.Foreground {
117		// Place child in process group.
118		_, _, err1 = RawSyscall(SYS_SETPGID, 0, uintptr(sys.Pgid), 0)
119		if err1 != 0 {
120			goto childerror
121		}
122	}
123
124	if sys.Foreground {
125		// This should really be pid_t, however _C_int (aka int32) is
126		// generally equivalent.
127		pgrp = _C_int(sys.Pgid)
128		if pgrp == 0 {
129			r1, _, err1 = RawSyscall(SYS_GETPID, 0, 0, 0)
130			if err1 != 0 {
131				goto childerror
132			}
133
134			pgrp = _C_int(r1)
135		}
136
137		// Place process group in foreground.
138		_, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSPGRP), uintptr(unsafe.Pointer(&pgrp)))
139		if err1 != 0 {
140			goto childerror
141		}
142	}
143
144	// Restore the signal mask. We do this after TIOCSPGRP to avoid
145	// having the kernel send a SIGTTOU signal to the process group.
146	runtime_AfterForkInChild()
147
148	// Chroot
149	if chroot != nil {
150		_, _, err1 = RawSyscall(SYS_CHROOT, uintptr(unsafe.Pointer(chroot)), 0, 0)
151		if err1 != 0 {
152			goto childerror
153		}
154	}
155
156	// User and groups
157	if cred = sys.Credential; cred != nil {
158		ngroups = uintptr(len(cred.Groups))
159		groups = uintptr(0)
160		if ngroups > 0 {
161			groups = uintptr(unsafe.Pointer(&cred.Groups[0]))
162		}
163		if !cred.NoSetGroups {
164			_, _, err1 = RawSyscall(SYS_SETGROUPS, ngroups, groups, 0)
165			if err1 != 0 {
166				goto childerror
167			}
168		}
169		_, _, err1 = RawSyscall(SYS_SETGID, uintptr(cred.Gid), 0, 0)
170		if err1 != 0 {
171			goto childerror
172		}
173		_, _, err1 = RawSyscall(SYS_SETUID, uintptr(cred.Uid), 0, 0)
174		if err1 != 0 {
175			goto childerror
176		}
177	}
178
179	// Chdir
180	if dir != nil {
181		_, _, err1 = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
182		if err1 != 0 {
183			goto childerror
184		}
185	}
186
187	// Pass 1: look for fd[i] < i and move those up above len(fd)
188	// so that pass 2 won't stomp on an fd it needs later.
189	if pipe < nextfd {
190		if runtime.GOOS == "netbsd" || (runtime.GOOS == "openbsd" && runtime.GOARCH == "mips64") {
191			_, _, err1 = RawSyscall(_SYS_DUP3, uintptr(pipe), uintptr(nextfd), O_CLOEXEC)
192		} else if runtime.GOOS == "dragonfly" {
193			_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(pipe), _F_DUP2FD_CLOEXEC, uintptr(nextfd))
194		} else {
195			_, _, err1 = RawSyscall(SYS_DUP2, uintptr(pipe), uintptr(nextfd), 0)
196			if err1 != 0 {
197				goto childerror
198			}
199			_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(nextfd), F_SETFD, FD_CLOEXEC)
200		}
201		if err1 != 0 {
202			goto childerror
203		}
204		pipe = nextfd
205		nextfd++
206	}
207	for i = 0; i < len(fd); i++ {
208		if fd[i] >= 0 && fd[i] < i {
209			if nextfd == pipe { // don't stomp on pipe
210				nextfd++
211			}
212			if runtime.GOOS == "netbsd" || (runtime.GOOS == "openbsd" && runtime.GOARCH == "mips64") {
213				_, _, err1 = RawSyscall(_SYS_DUP3, uintptr(fd[i]), uintptr(nextfd), O_CLOEXEC)
214			} else if runtime.GOOS == "dragonfly" {
215				_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), _F_DUP2FD_CLOEXEC, uintptr(nextfd))
216			} else {
217				_, _, err1 = RawSyscall(SYS_DUP2, uintptr(fd[i]), uintptr(nextfd), 0)
218				if err1 != 0 {
219					goto childerror
220				}
221				_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(nextfd), F_SETFD, FD_CLOEXEC)
222			}
223			if err1 != 0 {
224				goto childerror
225			}
226			fd[i] = nextfd
227			nextfd++
228		}
229	}
230
231	// Pass 2: dup fd[i] down onto i.
232	for i = 0; i < len(fd); i++ {
233		if fd[i] == -1 {
234			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
235			continue
236		}
237		if fd[i] == i {
238			// dup2(i, i) won't clear close-on-exec flag on Linux,
239			// probably not elsewhere either.
240			_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), F_SETFD, 0)
241			if err1 != 0 {
242				goto childerror
243			}
244			continue
245		}
246		// The new fd is created NOT close-on-exec,
247		// which is exactly what we want.
248		_, _, err1 = RawSyscall(SYS_DUP2, uintptr(fd[i]), uintptr(i), 0)
249		if err1 != 0 {
250			goto childerror
251		}
252	}
253
254	// By convention, we don't close-on-exec the fds we are
255	// started with, so if len(fd) < 3, close 0, 1, 2 as needed.
256	// Programs that know they inherit fds >= 3 will need
257	// to set them close-on-exec.
258	for i = len(fd); i < 3; i++ {
259		RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
260	}
261
262	// Detach fd 0 from tty
263	if sys.Noctty {
264		_, _, err1 = RawSyscall(SYS_IOCTL, 0, uintptr(TIOCNOTTY), 0)
265		if err1 != 0 {
266			goto childerror
267		}
268	}
269
270	// Set the controlling TTY to Ctty
271	if sys.Setctty {
272		_, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSCTTY), 0)
273		if err1 != 0 {
274			goto childerror
275		}
276	}
277
278	// Restore original rlimit.
279	if rlim != nil {
280		RawSyscall(SYS_SETRLIMIT, uintptr(RLIMIT_NOFILE), uintptr(unsafe.Pointer(rlim)), 0)
281	}
282
283	// Time to exec.
284	_, _, err1 = RawSyscall(SYS_EXECVE,
285		uintptr(unsafe.Pointer(argv0)),
286		uintptr(unsafe.Pointer(&argv[0])),
287		uintptr(unsafe.Pointer(&envv[0])))
288
289childerror:
290	// send error code on pipe
291	RawSyscall(SYS_WRITE, uintptr(pipe), uintptr(unsafe.Pointer(&err1)), unsafe.Sizeof(err1))
292	for {
293		RawSyscall(SYS_EXIT, 253, 0, 0)
294	}
295}
296
297// forkAndExecFailureCleanup cleans up after an exec failure.
298func forkAndExecFailureCleanup(attr *ProcAttr, sys *SysProcAttr) {
299	// Nothing to do.
300}
301