1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build unix || (js && wasm) || wasip1
6
7package poll
8
9import (
10	"internal/itoa"
11	"internal/syscall/unix"
12	"io"
13	"sync/atomic"
14	"syscall"
15)
16
17// FD is a file descriptor. The net and os packages use this type as a
18// field of a larger type representing a network connection or OS file.
19type FD struct {
20	// Lock sysfd and serialize access to Read and Write methods.
21	fdmu fdMutex
22
23	// System file descriptor. Immutable until Close.
24	Sysfd int
25
26	// Platform dependent state of the file descriptor.
27	SysFile
28
29	// I/O poller.
30	pd pollDesc
31
32	// Semaphore signaled when file is closed.
33	csema uint32
34
35	// Non-zero if this file has been set to blocking mode.
36	isBlocking uint32
37
38	// Whether this is a streaming descriptor, as opposed to a
39	// packet-based descriptor like a UDP socket. Immutable.
40	IsStream bool
41
42	// Whether a zero byte read indicates EOF. This is false for a
43	// message based socket connection.
44	ZeroReadIsEOF bool
45
46	// Whether this is a file rather than a network socket.
47	isFile bool
48}
49
50// Init initializes the FD. The Sysfd field should already be set.
51// This can be called multiple times on a single FD.
52// The net argument is a network name from the net package (e.g., "tcp"),
53// or "file".
54// Set pollable to true if fd should be managed by runtime netpoll.
55func (fd *FD) Init(net string, pollable bool) error {
56	fd.SysFile.init()
57
58	// We don't actually care about the various network types.
59	if net == "file" {
60		fd.isFile = true
61	}
62	if !pollable {
63		fd.isBlocking = 1
64		return nil
65	}
66	err := fd.pd.init(fd)
67	if err != nil {
68		// If we could not initialize the runtime poller,
69		// assume we are using blocking mode.
70		fd.isBlocking = 1
71	}
72	return err
73}
74
75// Destroy closes the file descriptor. This is called when there are
76// no remaining references.
77func (fd *FD) destroy() error {
78	// Poller may want to unregister fd in readiness notification mechanism,
79	// so this must be executed before CloseFunc.
80	fd.pd.close()
81
82	err := fd.SysFile.destroy(fd.Sysfd)
83
84	fd.Sysfd = -1
85	runtime_Semrelease(&fd.csema)
86	return err
87}
88
89// Close closes the FD. The underlying file descriptor is closed by the
90// destroy method when there are no remaining references.
91func (fd *FD) Close() error {
92	if !fd.fdmu.increfAndClose() {
93		return errClosing(fd.isFile)
94	}
95
96	// Unblock any I/O.  Once it all unblocks and returns,
97	// so that it cannot be referring to fd.sysfd anymore,
98	// the final decref will close fd.sysfd. This should happen
99	// fairly quickly, since all the I/O is non-blocking, and any
100	// attempts to block in the pollDesc will return errClosing(fd.isFile).
101	fd.pd.evict()
102
103	// The call to decref will call destroy if there are no other
104	// references.
105	err := fd.decref()
106
107	// Wait until the descriptor is closed. If this was the only
108	// reference, it is already closed. Only wait if the file has
109	// not been set to blocking mode, as otherwise any current I/O
110	// may be blocking, and that would block the Close.
111	// No need for an atomic read of isBlocking, increfAndClose means
112	// we have exclusive access to fd.
113	if fd.isBlocking == 0 {
114		runtime_Semacquire(&fd.csema)
115	}
116
117	return err
118}
119
120// SetBlocking puts the file into blocking mode.
121func (fd *FD) SetBlocking() error {
122	if err := fd.incref(); err != nil {
123		return err
124	}
125	defer fd.decref()
126	// Atomic store so that concurrent calls to SetBlocking
127	// do not cause a race condition. isBlocking only ever goes
128	// from 0 to 1 so there is no real race here.
129	atomic.StoreUint32(&fd.isBlocking, 1)
130	return syscall.SetNonblock(fd.Sysfd, false)
131}
132
133// Darwin and FreeBSD can't read or write 2GB+ files at a time,
134// even on 64-bit systems.
135// The same is true of socket implementations on many systems.
136// See golang.org/issue/7812 and golang.org/issue/16266.
137// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
138const maxRW = 1 << 30
139
140// Read implements io.Reader.
141func (fd *FD) Read(p []byte) (int, error) {
142	if err := fd.readLock(); err != nil {
143		return 0, err
144	}
145	defer fd.readUnlock()
146	if len(p) == 0 {
147		// If the caller wanted a zero byte read, return immediately
148		// without trying (but after acquiring the readLock).
149		// Otherwise syscall.Read returns 0, nil which looks like
150		// io.EOF.
151		// TODO(bradfitz): make it wait for readability? (Issue 15735)
152		return 0, nil
153	}
154	if err := fd.pd.prepareRead(fd.isFile); err != nil {
155		return 0, err
156	}
157	if fd.IsStream && len(p) > maxRW {
158		p = p[:maxRW]
159	}
160	for {
161		n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
162		if err != nil {
163			n = 0
164			if err == syscall.EAGAIN && fd.pd.pollable() {
165				if err = fd.pd.waitRead(fd.isFile); err == nil {
166					continue
167				}
168			}
169		}
170		err = fd.eofError(n, err)
171		return n, err
172	}
173}
174
175// Pread wraps the pread system call.
176func (fd *FD) Pread(p []byte, off int64) (int, error) {
177	// Call incref, not readLock, because since pread specifies the
178	// offset it is independent from other reads.
179	// Similarly, using the poller doesn't make sense for pread.
180	if err := fd.incref(); err != nil {
181		return 0, err
182	}
183	if fd.IsStream && len(p) > maxRW {
184		p = p[:maxRW]
185	}
186	var (
187		n   int
188		err error
189	)
190	for {
191		n, err = syscall.Pread(fd.Sysfd, p, off)
192		if err != syscall.EINTR {
193			break
194		}
195	}
196	if err != nil {
197		n = 0
198	}
199	fd.decref()
200	err = fd.eofError(n, err)
201	return n, err
202}
203
204// ReadFrom wraps the recvfrom network call.
205func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
206	if err := fd.readLock(); err != nil {
207		return 0, nil, err
208	}
209	defer fd.readUnlock()
210	if err := fd.pd.prepareRead(fd.isFile); err != nil {
211		return 0, nil, err
212	}
213	for {
214		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
215		if err != nil {
216			if err == syscall.EINTR {
217				continue
218			}
219			n = 0
220			if err == syscall.EAGAIN && fd.pd.pollable() {
221				if err = fd.pd.waitRead(fd.isFile); err == nil {
222					continue
223				}
224			}
225		}
226		err = fd.eofError(n, err)
227		return n, sa, err
228	}
229}
230
231// ReadFromInet4 wraps the recvfrom network call for IPv4.
232func (fd *FD) ReadFromInet4(p []byte, from *syscall.SockaddrInet4) (int, error) {
233	if err := fd.readLock(); err != nil {
234		return 0, err
235	}
236	defer fd.readUnlock()
237	if err := fd.pd.prepareRead(fd.isFile); err != nil {
238		return 0, err
239	}
240	for {
241		n, err := unix.RecvfromInet4(fd.Sysfd, p, 0, from)
242		if err != nil {
243			if err == syscall.EINTR {
244				continue
245			}
246			n = 0
247			if err == syscall.EAGAIN && fd.pd.pollable() {
248				if err = fd.pd.waitRead(fd.isFile); err == nil {
249					continue
250				}
251			}
252		}
253		err = fd.eofError(n, err)
254		return n, err
255	}
256}
257
258// ReadFromInet6 wraps the recvfrom network call for IPv6.
259func (fd *FD) ReadFromInet6(p []byte, from *syscall.SockaddrInet6) (int, error) {
260	if err := fd.readLock(); err != nil {
261		return 0, err
262	}
263	defer fd.readUnlock()
264	if err := fd.pd.prepareRead(fd.isFile); err != nil {
265		return 0, err
266	}
267	for {
268		n, err := unix.RecvfromInet6(fd.Sysfd, p, 0, from)
269		if err != nil {
270			if err == syscall.EINTR {
271				continue
272			}
273			n = 0
274			if err == syscall.EAGAIN && fd.pd.pollable() {
275				if err = fd.pd.waitRead(fd.isFile); err == nil {
276					continue
277				}
278			}
279		}
280		err = fd.eofError(n, err)
281		return n, err
282	}
283}
284
285// ReadMsg wraps the recvmsg network call.
286func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
287	if err := fd.readLock(); err != nil {
288		return 0, 0, 0, nil, err
289	}
290	defer fd.readUnlock()
291	if err := fd.pd.prepareRead(fd.isFile); err != nil {
292		return 0, 0, 0, nil, err
293	}
294	for {
295		n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
296		if err != nil {
297			if err == syscall.EINTR {
298				continue
299			}
300			// TODO(dfc) should n and oobn be set to 0
301			if err == syscall.EAGAIN && fd.pd.pollable() {
302				if err = fd.pd.waitRead(fd.isFile); err == nil {
303					continue
304				}
305			}
306		}
307		err = fd.eofError(n, err)
308		return n, oobn, sysflags, sa, err
309	}
310}
311
312// ReadMsgInet4 is ReadMsg, but specialized for syscall.SockaddrInet4.
313func (fd *FD) ReadMsgInet4(p []byte, oob []byte, flags int, sa4 *syscall.SockaddrInet4) (int, int, int, error) {
314	if err := fd.readLock(); err != nil {
315		return 0, 0, 0, err
316	}
317	defer fd.readUnlock()
318	if err := fd.pd.prepareRead(fd.isFile); err != nil {
319		return 0, 0, 0, err
320	}
321	for {
322		n, oobn, sysflags, err := unix.RecvmsgInet4(fd.Sysfd, p, oob, flags, sa4)
323		if err != nil {
324			if err == syscall.EINTR {
325				continue
326			}
327			// TODO(dfc) should n and oobn be set to 0
328			if err == syscall.EAGAIN && fd.pd.pollable() {
329				if err = fd.pd.waitRead(fd.isFile); err == nil {
330					continue
331				}
332			}
333		}
334		err = fd.eofError(n, err)
335		return n, oobn, sysflags, err
336	}
337}
338
339// ReadMsgInet6 is ReadMsg, but specialized for syscall.SockaddrInet6.
340func (fd *FD) ReadMsgInet6(p []byte, oob []byte, flags int, sa6 *syscall.SockaddrInet6) (int, int, int, error) {
341	if err := fd.readLock(); err != nil {
342		return 0, 0, 0, err
343	}
344	defer fd.readUnlock()
345	if err := fd.pd.prepareRead(fd.isFile); err != nil {
346		return 0, 0, 0, err
347	}
348	for {
349		n, oobn, sysflags, err := unix.RecvmsgInet6(fd.Sysfd, p, oob, flags, sa6)
350		if err != nil {
351			if err == syscall.EINTR {
352				continue
353			}
354			// TODO(dfc) should n and oobn be set to 0
355			if err == syscall.EAGAIN && fd.pd.pollable() {
356				if err = fd.pd.waitRead(fd.isFile); err == nil {
357					continue
358				}
359			}
360		}
361		err = fd.eofError(n, err)
362		return n, oobn, sysflags, err
363	}
364}
365
366// Write implements io.Writer.
367func (fd *FD) Write(p []byte) (int, error) {
368	if err := fd.writeLock(); err != nil {
369		return 0, err
370	}
371	defer fd.writeUnlock()
372	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
373		return 0, err
374	}
375	var nn int
376	for {
377		max := len(p)
378		if fd.IsStream && max-nn > maxRW {
379			max = nn + maxRW
380		}
381		n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
382		if n > 0 {
383			if n > max-nn {
384				// This can reportedly happen when using
385				// some VPN software. Issue #61060.
386				// If we don't check this we will panic
387				// with slice bounds out of range.
388				// Use a more informative panic.
389				panic("invalid return from write: got " + itoa.Itoa(n) + " from a write of " + itoa.Itoa(max-nn))
390			}
391			nn += n
392		}
393		if nn == len(p) {
394			return nn, err
395		}
396		if err == syscall.EAGAIN && fd.pd.pollable() {
397			if err = fd.pd.waitWrite(fd.isFile); err == nil {
398				continue
399			}
400		}
401		if err != nil {
402			return nn, err
403		}
404		if n == 0 {
405			return nn, io.ErrUnexpectedEOF
406		}
407	}
408}
409
410// Pwrite wraps the pwrite system call.
411func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
412	// Call incref, not writeLock, because since pwrite specifies the
413	// offset it is independent from other writes.
414	// Similarly, using the poller doesn't make sense for pwrite.
415	if err := fd.incref(); err != nil {
416		return 0, err
417	}
418	defer fd.decref()
419	var nn int
420	for {
421		max := len(p)
422		if fd.IsStream && max-nn > maxRW {
423			max = nn + maxRW
424		}
425		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
426		if err == syscall.EINTR {
427			continue
428		}
429		if n > 0 {
430			nn += n
431		}
432		if nn == len(p) {
433			return nn, err
434		}
435		if err != nil {
436			return nn, err
437		}
438		if n == 0 {
439			return nn, io.ErrUnexpectedEOF
440		}
441	}
442}
443
444// WriteToInet4 wraps the sendto network call for IPv4 addresses.
445func (fd *FD) WriteToInet4(p []byte, sa *syscall.SockaddrInet4) (int, error) {
446	if err := fd.writeLock(); err != nil {
447		return 0, err
448	}
449	defer fd.writeUnlock()
450	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
451		return 0, err
452	}
453	for {
454		err := unix.SendtoInet4(fd.Sysfd, p, 0, sa)
455		if err == syscall.EINTR {
456			continue
457		}
458		if err == syscall.EAGAIN && fd.pd.pollable() {
459			if err = fd.pd.waitWrite(fd.isFile); err == nil {
460				continue
461			}
462		}
463		if err != nil {
464			return 0, err
465		}
466		return len(p), nil
467	}
468}
469
470// WriteToInet6 wraps the sendto network call for IPv6 addresses.
471func (fd *FD) WriteToInet6(p []byte, sa *syscall.SockaddrInet6) (int, error) {
472	if err := fd.writeLock(); err != nil {
473		return 0, err
474	}
475	defer fd.writeUnlock()
476	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
477		return 0, err
478	}
479	for {
480		err := unix.SendtoInet6(fd.Sysfd, p, 0, sa)
481		if err == syscall.EINTR {
482			continue
483		}
484		if err == syscall.EAGAIN && fd.pd.pollable() {
485			if err = fd.pd.waitWrite(fd.isFile); err == nil {
486				continue
487			}
488		}
489		if err != nil {
490			return 0, err
491		}
492		return len(p), nil
493	}
494}
495
496// WriteTo wraps the sendto network call.
497func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
498	if err := fd.writeLock(); err != nil {
499		return 0, err
500	}
501	defer fd.writeUnlock()
502	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
503		return 0, err
504	}
505	for {
506		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
507		if err == syscall.EINTR {
508			continue
509		}
510		if err == syscall.EAGAIN && fd.pd.pollable() {
511			if err = fd.pd.waitWrite(fd.isFile); err == nil {
512				continue
513			}
514		}
515		if err != nil {
516			return 0, err
517		}
518		return len(p), nil
519	}
520}
521
522// WriteMsg wraps the sendmsg network call.
523func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
524	if err := fd.writeLock(); err != nil {
525		return 0, 0, err
526	}
527	defer fd.writeUnlock()
528	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
529		return 0, 0, err
530	}
531	for {
532		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
533		if err == syscall.EINTR {
534			continue
535		}
536		if err == syscall.EAGAIN && fd.pd.pollable() {
537			if err = fd.pd.waitWrite(fd.isFile); err == nil {
538				continue
539			}
540		}
541		if err != nil {
542			return n, 0, err
543		}
544		return n, len(oob), err
545	}
546}
547
548// WriteMsgInet4 is WriteMsg specialized for syscall.SockaddrInet4.
549func (fd *FD) WriteMsgInet4(p []byte, oob []byte, sa *syscall.SockaddrInet4) (int, int, error) {
550	if err := fd.writeLock(); err != nil {
551		return 0, 0, err
552	}
553	defer fd.writeUnlock()
554	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
555		return 0, 0, err
556	}
557	for {
558		n, err := unix.SendmsgNInet4(fd.Sysfd, p, oob, sa, 0)
559		if err == syscall.EINTR {
560			continue
561		}
562		if err == syscall.EAGAIN && fd.pd.pollable() {
563			if err = fd.pd.waitWrite(fd.isFile); err == nil {
564				continue
565			}
566		}
567		if err != nil {
568			return n, 0, err
569		}
570		return n, len(oob), err
571	}
572}
573
574// WriteMsgInet6 is WriteMsg specialized for syscall.SockaddrInet6.
575func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (int, int, error) {
576	if err := fd.writeLock(); err != nil {
577		return 0, 0, err
578	}
579	defer fd.writeUnlock()
580	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
581		return 0, 0, err
582	}
583	for {
584		n, err := unix.SendmsgNInet6(fd.Sysfd, p, oob, sa, 0)
585		if err == syscall.EINTR {
586			continue
587		}
588		if err == syscall.EAGAIN && fd.pd.pollable() {
589			if err = fd.pd.waitWrite(fd.isFile); err == nil {
590				continue
591			}
592		}
593		if err != nil {
594			return n, 0, err
595		}
596		return n, len(oob), err
597	}
598}
599
600// Accept wraps the accept network call.
601func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
602	if err := fd.readLock(); err != nil {
603		return -1, nil, "", err
604	}
605	defer fd.readUnlock()
606
607	if err := fd.pd.prepareRead(fd.isFile); err != nil {
608		return -1, nil, "", err
609	}
610	for {
611		s, rsa, errcall, err := accept(fd.Sysfd)
612		if err == nil {
613			return s, rsa, "", err
614		}
615		switch err {
616		case syscall.EINTR:
617			continue
618		case syscall.EAGAIN:
619			if fd.pd.pollable() {
620				if err = fd.pd.waitRead(fd.isFile); err == nil {
621					continue
622				}
623			}
624		case syscall.ECONNABORTED:
625			// This means that a socket on the listen
626			// queue was closed before we Accept()ed it;
627			// it's a silly error, so try again.
628			continue
629		}
630		return -1, nil, errcall, err
631	}
632}
633
634// Fchmod wraps syscall.Fchmod.
635func (fd *FD) Fchmod(mode uint32) error {
636	if err := fd.incref(); err != nil {
637		return err
638	}
639	defer fd.decref()
640	return ignoringEINTR(func() error {
641		return syscall.Fchmod(fd.Sysfd, mode)
642	})
643}
644
645// Fstat wraps syscall.Fstat
646func (fd *FD) Fstat(s *syscall.Stat_t) error {
647	if err := fd.incref(); err != nil {
648		return err
649	}
650	defer fd.decref()
651	return ignoringEINTR(func() error {
652		return syscall.Fstat(fd.Sysfd, s)
653	})
654}
655
656// dupCloexecUnsupported indicates whether F_DUPFD_CLOEXEC is supported by the kernel.
657var dupCloexecUnsupported atomic.Bool
658
659// DupCloseOnExec dups fd and marks it close-on-exec.
660func DupCloseOnExec(fd int) (int, string, error) {
661	if syscall.F_DUPFD_CLOEXEC != 0 && !dupCloexecUnsupported.Load() {
662		r0, err := unix.Fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
663		if err == nil {
664			return r0, "", nil
665		}
666		switch err {
667		case syscall.EINVAL, syscall.ENOSYS:
668			// Old kernel, or js/wasm (which returns
669			// ENOSYS). Fall back to the portable way from
670			// now on.
671			dupCloexecUnsupported.Store(true)
672		default:
673			return -1, "fcntl", err
674		}
675	}
676	return dupCloseOnExecOld(fd)
677}
678
679// Dup duplicates the file descriptor.
680func (fd *FD) Dup() (int, string, error) {
681	if err := fd.incref(); err != nil {
682		return -1, "", err
683	}
684	defer fd.decref()
685	return DupCloseOnExec(fd.Sysfd)
686}
687
688// On Unix variants only, expose the IO event for the net code.
689
690// WaitWrite waits until data can be written to fd.
691func (fd *FD) WaitWrite() error {
692	return fd.pd.waitWrite(fd.isFile)
693}
694
695// WriteOnce is for testing only. It makes a single write call.
696func (fd *FD) WriteOnce(p []byte) (int, error) {
697	if err := fd.writeLock(); err != nil {
698		return 0, err
699	}
700	defer fd.writeUnlock()
701	return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
702}
703
704// RawRead invokes the user-defined function f for a read operation.
705func (fd *FD) RawRead(f func(uintptr) bool) error {
706	if err := fd.readLock(); err != nil {
707		return err
708	}
709	defer fd.readUnlock()
710	if err := fd.pd.prepareRead(fd.isFile); err != nil {
711		return err
712	}
713	for {
714		if f(uintptr(fd.Sysfd)) {
715			return nil
716		}
717		if err := fd.pd.waitRead(fd.isFile); err != nil {
718			return err
719		}
720	}
721}
722
723// RawWrite invokes the user-defined function f for a write operation.
724func (fd *FD) RawWrite(f func(uintptr) bool) error {
725	if err := fd.writeLock(); err != nil {
726		return err
727	}
728	defer fd.writeUnlock()
729	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
730		return err
731	}
732	for {
733		if f(uintptr(fd.Sysfd)) {
734			return nil
735		}
736		if err := fd.pd.waitWrite(fd.isFile); err != nil {
737			return err
738		}
739	}
740}
741
742// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
743func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
744	for {
745		n, err := fn(fd, p)
746		if err != syscall.EINTR {
747			return n, err
748		}
749	}
750}
751