1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build unix || (js && wasm) || wasip1 6 7package poll 8 9import ( 10 "internal/itoa" 11 "internal/syscall/unix" 12 "io" 13 "sync/atomic" 14 "syscall" 15) 16 17// FD is a file descriptor. The net and os packages use this type as a 18// field of a larger type representing a network connection or OS file. 19type FD struct { 20 // Lock sysfd and serialize access to Read and Write methods. 21 fdmu fdMutex 22 23 // System file descriptor. Immutable until Close. 24 Sysfd int 25 26 // Platform dependent state of the file descriptor. 27 SysFile 28 29 // I/O poller. 30 pd pollDesc 31 32 // Semaphore signaled when file is closed. 33 csema uint32 34 35 // Non-zero if this file has been set to blocking mode. 36 isBlocking uint32 37 38 // Whether this is a streaming descriptor, as opposed to a 39 // packet-based descriptor like a UDP socket. Immutable. 40 IsStream bool 41 42 // Whether a zero byte read indicates EOF. This is false for a 43 // message based socket connection. 44 ZeroReadIsEOF bool 45 46 // Whether this is a file rather than a network socket. 47 isFile bool 48} 49 50// Init initializes the FD. The Sysfd field should already be set. 51// This can be called multiple times on a single FD. 52// The net argument is a network name from the net package (e.g., "tcp"), 53// or "file". 54// Set pollable to true if fd should be managed by runtime netpoll. 55func (fd *FD) Init(net string, pollable bool) error { 56 fd.SysFile.init() 57 58 // We don't actually care about the various network types. 59 if net == "file" { 60 fd.isFile = true 61 } 62 if !pollable { 63 fd.isBlocking = 1 64 return nil 65 } 66 err := fd.pd.init(fd) 67 if err != nil { 68 // If we could not initialize the runtime poller, 69 // assume we are using blocking mode. 70 fd.isBlocking = 1 71 } 72 return err 73} 74 75// Destroy closes the file descriptor. This is called when there are 76// no remaining references. 77func (fd *FD) destroy() error { 78 // Poller may want to unregister fd in readiness notification mechanism, 79 // so this must be executed before CloseFunc. 80 fd.pd.close() 81 82 err := fd.SysFile.destroy(fd.Sysfd) 83 84 fd.Sysfd = -1 85 runtime_Semrelease(&fd.csema) 86 return err 87} 88 89// Close closes the FD. The underlying file descriptor is closed by the 90// destroy method when there are no remaining references. 91func (fd *FD) Close() error { 92 if !fd.fdmu.increfAndClose() { 93 return errClosing(fd.isFile) 94 } 95 96 // Unblock any I/O. Once it all unblocks and returns, 97 // so that it cannot be referring to fd.sysfd anymore, 98 // the final decref will close fd.sysfd. This should happen 99 // fairly quickly, since all the I/O is non-blocking, and any 100 // attempts to block in the pollDesc will return errClosing(fd.isFile). 101 fd.pd.evict() 102 103 // The call to decref will call destroy if there are no other 104 // references. 105 err := fd.decref() 106 107 // Wait until the descriptor is closed. If this was the only 108 // reference, it is already closed. Only wait if the file has 109 // not been set to blocking mode, as otherwise any current I/O 110 // may be blocking, and that would block the Close. 111 // No need for an atomic read of isBlocking, increfAndClose means 112 // we have exclusive access to fd. 113 if fd.isBlocking == 0 { 114 runtime_Semacquire(&fd.csema) 115 } 116 117 return err 118} 119 120// SetBlocking puts the file into blocking mode. 121func (fd *FD) SetBlocking() error { 122 if err := fd.incref(); err != nil { 123 return err 124 } 125 defer fd.decref() 126 // Atomic store so that concurrent calls to SetBlocking 127 // do not cause a race condition. isBlocking only ever goes 128 // from 0 to 1 so there is no real race here. 129 atomic.StoreUint32(&fd.isBlocking, 1) 130 return syscall.SetNonblock(fd.Sysfd, false) 131} 132 133// Darwin and FreeBSD can't read or write 2GB+ files at a time, 134// even on 64-bit systems. 135// The same is true of socket implementations on many systems. 136// See golang.org/issue/7812 and golang.org/issue/16266. 137// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned. 138const maxRW = 1 << 30 139 140// Read implements io.Reader. 141func (fd *FD) Read(p []byte) (int, error) { 142 if err := fd.readLock(); err != nil { 143 return 0, err 144 } 145 defer fd.readUnlock() 146 if len(p) == 0 { 147 // If the caller wanted a zero byte read, return immediately 148 // without trying (but after acquiring the readLock). 149 // Otherwise syscall.Read returns 0, nil which looks like 150 // io.EOF. 151 // TODO(bradfitz): make it wait for readability? (Issue 15735) 152 return 0, nil 153 } 154 if err := fd.pd.prepareRead(fd.isFile); err != nil { 155 return 0, err 156 } 157 if fd.IsStream && len(p) > maxRW { 158 p = p[:maxRW] 159 } 160 for { 161 n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p) 162 if err != nil { 163 n = 0 164 if err == syscall.EAGAIN && fd.pd.pollable() { 165 if err = fd.pd.waitRead(fd.isFile); err == nil { 166 continue 167 } 168 } 169 } 170 err = fd.eofError(n, err) 171 return n, err 172 } 173} 174 175// Pread wraps the pread system call. 176func (fd *FD) Pread(p []byte, off int64) (int, error) { 177 // Call incref, not readLock, because since pread specifies the 178 // offset it is independent from other reads. 179 // Similarly, using the poller doesn't make sense for pread. 180 if err := fd.incref(); err != nil { 181 return 0, err 182 } 183 if fd.IsStream && len(p) > maxRW { 184 p = p[:maxRW] 185 } 186 var ( 187 n int 188 err error 189 ) 190 for { 191 n, err = syscall.Pread(fd.Sysfd, p, off) 192 if err != syscall.EINTR { 193 break 194 } 195 } 196 if err != nil { 197 n = 0 198 } 199 fd.decref() 200 err = fd.eofError(n, err) 201 return n, err 202} 203 204// ReadFrom wraps the recvfrom network call. 205func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) { 206 if err := fd.readLock(); err != nil { 207 return 0, nil, err 208 } 209 defer fd.readUnlock() 210 if err := fd.pd.prepareRead(fd.isFile); err != nil { 211 return 0, nil, err 212 } 213 for { 214 n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0) 215 if err != nil { 216 if err == syscall.EINTR { 217 continue 218 } 219 n = 0 220 if err == syscall.EAGAIN && fd.pd.pollable() { 221 if err = fd.pd.waitRead(fd.isFile); err == nil { 222 continue 223 } 224 } 225 } 226 err = fd.eofError(n, err) 227 return n, sa, err 228 } 229} 230 231// ReadFromInet4 wraps the recvfrom network call for IPv4. 232func (fd *FD) ReadFromInet4(p []byte, from *syscall.SockaddrInet4) (int, error) { 233 if err := fd.readLock(); err != nil { 234 return 0, err 235 } 236 defer fd.readUnlock() 237 if err := fd.pd.prepareRead(fd.isFile); err != nil { 238 return 0, err 239 } 240 for { 241 n, err := unix.RecvfromInet4(fd.Sysfd, p, 0, from) 242 if err != nil { 243 if err == syscall.EINTR { 244 continue 245 } 246 n = 0 247 if err == syscall.EAGAIN && fd.pd.pollable() { 248 if err = fd.pd.waitRead(fd.isFile); err == nil { 249 continue 250 } 251 } 252 } 253 err = fd.eofError(n, err) 254 return n, err 255 } 256} 257 258// ReadFromInet6 wraps the recvfrom network call for IPv6. 259func (fd *FD) ReadFromInet6(p []byte, from *syscall.SockaddrInet6) (int, error) { 260 if err := fd.readLock(); err != nil { 261 return 0, err 262 } 263 defer fd.readUnlock() 264 if err := fd.pd.prepareRead(fd.isFile); err != nil { 265 return 0, err 266 } 267 for { 268 n, err := unix.RecvfromInet6(fd.Sysfd, p, 0, from) 269 if err != nil { 270 if err == syscall.EINTR { 271 continue 272 } 273 n = 0 274 if err == syscall.EAGAIN && fd.pd.pollable() { 275 if err = fd.pd.waitRead(fd.isFile); err == nil { 276 continue 277 } 278 } 279 } 280 err = fd.eofError(n, err) 281 return n, err 282 } 283} 284 285// ReadMsg wraps the recvmsg network call. 286func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) { 287 if err := fd.readLock(); err != nil { 288 return 0, 0, 0, nil, err 289 } 290 defer fd.readUnlock() 291 if err := fd.pd.prepareRead(fd.isFile); err != nil { 292 return 0, 0, 0, nil, err 293 } 294 for { 295 n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags) 296 if err != nil { 297 if err == syscall.EINTR { 298 continue 299 } 300 // TODO(dfc) should n and oobn be set to 0 301 if err == syscall.EAGAIN && fd.pd.pollable() { 302 if err = fd.pd.waitRead(fd.isFile); err == nil { 303 continue 304 } 305 } 306 } 307 err = fd.eofError(n, err) 308 return n, oobn, sysflags, sa, err 309 } 310} 311 312// ReadMsgInet4 is ReadMsg, but specialized for syscall.SockaddrInet4. 313func (fd *FD) ReadMsgInet4(p []byte, oob []byte, flags int, sa4 *syscall.SockaddrInet4) (int, int, int, error) { 314 if err := fd.readLock(); err != nil { 315 return 0, 0, 0, err 316 } 317 defer fd.readUnlock() 318 if err := fd.pd.prepareRead(fd.isFile); err != nil { 319 return 0, 0, 0, err 320 } 321 for { 322 n, oobn, sysflags, err := unix.RecvmsgInet4(fd.Sysfd, p, oob, flags, sa4) 323 if err != nil { 324 if err == syscall.EINTR { 325 continue 326 } 327 // TODO(dfc) should n and oobn be set to 0 328 if err == syscall.EAGAIN && fd.pd.pollable() { 329 if err = fd.pd.waitRead(fd.isFile); err == nil { 330 continue 331 } 332 } 333 } 334 err = fd.eofError(n, err) 335 return n, oobn, sysflags, err 336 } 337} 338 339// ReadMsgInet6 is ReadMsg, but specialized for syscall.SockaddrInet6. 340func (fd *FD) ReadMsgInet6(p []byte, oob []byte, flags int, sa6 *syscall.SockaddrInet6) (int, int, int, error) { 341 if err := fd.readLock(); err != nil { 342 return 0, 0, 0, err 343 } 344 defer fd.readUnlock() 345 if err := fd.pd.prepareRead(fd.isFile); err != nil { 346 return 0, 0, 0, err 347 } 348 for { 349 n, oobn, sysflags, err := unix.RecvmsgInet6(fd.Sysfd, p, oob, flags, sa6) 350 if err != nil { 351 if err == syscall.EINTR { 352 continue 353 } 354 // TODO(dfc) should n and oobn be set to 0 355 if err == syscall.EAGAIN && fd.pd.pollable() { 356 if err = fd.pd.waitRead(fd.isFile); err == nil { 357 continue 358 } 359 } 360 } 361 err = fd.eofError(n, err) 362 return n, oobn, sysflags, err 363 } 364} 365 366// Write implements io.Writer. 367func (fd *FD) Write(p []byte) (int, error) { 368 if err := fd.writeLock(); err != nil { 369 return 0, err 370 } 371 defer fd.writeUnlock() 372 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 373 return 0, err 374 } 375 var nn int 376 for { 377 max := len(p) 378 if fd.IsStream && max-nn > maxRW { 379 max = nn + maxRW 380 } 381 n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max]) 382 if n > 0 { 383 if n > max-nn { 384 // This can reportedly happen when using 385 // some VPN software. Issue #61060. 386 // If we don't check this we will panic 387 // with slice bounds out of range. 388 // Use a more informative panic. 389 panic("invalid return from write: got " + itoa.Itoa(n) + " from a write of " + itoa.Itoa(max-nn)) 390 } 391 nn += n 392 } 393 if nn == len(p) { 394 return nn, err 395 } 396 if err == syscall.EAGAIN && fd.pd.pollable() { 397 if err = fd.pd.waitWrite(fd.isFile); err == nil { 398 continue 399 } 400 } 401 if err != nil { 402 return nn, err 403 } 404 if n == 0 { 405 return nn, io.ErrUnexpectedEOF 406 } 407 } 408} 409 410// Pwrite wraps the pwrite system call. 411func (fd *FD) Pwrite(p []byte, off int64) (int, error) { 412 // Call incref, not writeLock, because since pwrite specifies the 413 // offset it is independent from other writes. 414 // Similarly, using the poller doesn't make sense for pwrite. 415 if err := fd.incref(); err != nil { 416 return 0, err 417 } 418 defer fd.decref() 419 var nn int 420 for { 421 max := len(p) 422 if fd.IsStream && max-nn > maxRW { 423 max = nn + maxRW 424 } 425 n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn)) 426 if err == syscall.EINTR { 427 continue 428 } 429 if n > 0 { 430 nn += n 431 } 432 if nn == len(p) { 433 return nn, err 434 } 435 if err != nil { 436 return nn, err 437 } 438 if n == 0 { 439 return nn, io.ErrUnexpectedEOF 440 } 441 } 442} 443 444// WriteToInet4 wraps the sendto network call for IPv4 addresses. 445func (fd *FD) WriteToInet4(p []byte, sa *syscall.SockaddrInet4) (int, error) { 446 if err := fd.writeLock(); err != nil { 447 return 0, err 448 } 449 defer fd.writeUnlock() 450 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 451 return 0, err 452 } 453 for { 454 err := unix.SendtoInet4(fd.Sysfd, p, 0, sa) 455 if err == syscall.EINTR { 456 continue 457 } 458 if err == syscall.EAGAIN && fd.pd.pollable() { 459 if err = fd.pd.waitWrite(fd.isFile); err == nil { 460 continue 461 } 462 } 463 if err != nil { 464 return 0, err 465 } 466 return len(p), nil 467 } 468} 469 470// WriteToInet6 wraps the sendto network call for IPv6 addresses. 471func (fd *FD) WriteToInet6(p []byte, sa *syscall.SockaddrInet6) (int, error) { 472 if err := fd.writeLock(); err != nil { 473 return 0, err 474 } 475 defer fd.writeUnlock() 476 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 477 return 0, err 478 } 479 for { 480 err := unix.SendtoInet6(fd.Sysfd, p, 0, sa) 481 if err == syscall.EINTR { 482 continue 483 } 484 if err == syscall.EAGAIN && fd.pd.pollable() { 485 if err = fd.pd.waitWrite(fd.isFile); err == nil { 486 continue 487 } 488 } 489 if err != nil { 490 return 0, err 491 } 492 return len(p), nil 493 } 494} 495 496// WriteTo wraps the sendto network call. 497func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) { 498 if err := fd.writeLock(); err != nil { 499 return 0, err 500 } 501 defer fd.writeUnlock() 502 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 503 return 0, err 504 } 505 for { 506 err := syscall.Sendto(fd.Sysfd, p, 0, sa) 507 if err == syscall.EINTR { 508 continue 509 } 510 if err == syscall.EAGAIN && fd.pd.pollable() { 511 if err = fd.pd.waitWrite(fd.isFile); err == nil { 512 continue 513 } 514 } 515 if err != nil { 516 return 0, err 517 } 518 return len(p), nil 519 } 520} 521 522// WriteMsg wraps the sendmsg network call. 523func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) { 524 if err := fd.writeLock(); err != nil { 525 return 0, 0, err 526 } 527 defer fd.writeUnlock() 528 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 529 return 0, 0, err 530 } 531 for { 532 n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0) 533 if err == syscall.EINTR { 534 continue 535 } 536 if err == syscall.EAGAIN && fd.pd.pollable() { 537 if err = fd.pd.waitWrite(fd.isFile); err == nil { 538 continue 539 } 540 } 541 if err != nil { 542 return n, 0, err 543 } 544 return n, len(oob), err 545 } 546} 547 548// WriteMsgInet4 is WriteMsg specialized for syscall.SockaddrInet4. 549func (fd *FD) WriteMsgInet4(p []byte, oob []byte, sa *syscall.SockaddrInet4) (int, int, error) { 550 if err := fd.writeLock(); err != nil { 551 return 0, 0, err 552 } 553 defer fd.writeUnlock() 554 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 555 return 0, 0, err 556 } 557 for { 558 n, err := unix.SendmsgNInet4(fd.Sysfd, p, oob, sa, 0) 559 if err == syscall.EINTR { 560 continue 561 } 562 if err == syscall.EAGAIN && fd.pd.pollable() { 563 if err = fd.pd.waitWrite(fd.isFile); err == nil { 564 continue 565 } 566 } 567 if err != nil { 568 return n, 0, err 569 } 570 return n, len(oob), err 571 } 572} 573 574// WriteMsgInet6 is WriteMsg specialized for syscall.SockaddrInet6. 575func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (int, int, error) { 576 if err := fd.writeLock(); err != nil { 577 return 0, 0, err 578 } 579 defer fd.writeUnlock() 580 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 581 return 0, 0, err 582 } 583 for { 584 n, err := unix.SendmsgNInet6(fd.Sysfd, p, oob, sa, 0) 585 if err == syscall.EINTR { 586 continue 587 } 588 if err == syscall.EAGAIN && fd.pd.pollable() { 589 if err = fd.pd.waitWrite(fd.isFile); err == nil { 590 continue 591 } 592 } 593 if err != nil { 594 return n, 0, err 595 } 596 return n, len(oob), err 597 } 598} 599 600// Accept wraps the accept network call. 601func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) { 602 if err := fd.readLock(); err != nil { 603 return -1, nil, "", err 604 } 605 defer fd.readUnlock() 606 607 if err := fd.pd.prepareRead(fd.isFile); err != nil { 608 return -1, nil, "", err 609 } 610 for { 611 s, rsa, errcall, err := accept(fd.Sysfd) 612 if err == nil { 613 return s, rsa, "", err 614 } 615 switch err { 616 case syscall.EINTR: 617 continue 618 case syscall.EAGAIN: 619 if fd.pd.pollable() { 620 if err = fd.pd.waitRead(fd.isFile); err == nil { 621 continue 622 } 623 } 624 case syscall.ECONNABORTED: 625 // This means that a socket on the listen 626 // queue was closed before we Accept()ed it; 627 // it's a silly error, so try again. 628 continue 629 } 630 return -1, nil, errcall, err 631 } 632} 633 634// Fchmod wraps syscall.Fchmod. 635func (fd *FD) Fchmod(mode uint32) error { 636 if err := fd.incref(); err != nil { 637 return err 638 } 639 defer fd.decref() 640 return ignoringEINTR(func() error { 641 return syscall.Fchmod(fd.Sysfd, mode) 642 }) 643} 644 645// Fstat wraps syscall.Fstat 646func (fd *FD) Fstat(s *syscall.Stat_t) error { 647 if err := fd.incref(); err != nil { 648 return err 649 } 650 defer fd.decref() 651 return ignoringEINTR(func() error { 652 return syscall.Fstat(fd.Sysfd, s) 653 }) 654} 655 656// dupCloexecUnsupported indicates whether F_DUPFD_CLOEXEC is supported by the kernel. 657var dupCloexecUnsupported atomic.Bool 658 659// DupCloseOnExec dups fd and marks it close-on-exec. 660func DupCloseOnExec(fd int) (int, string, error) { 661 if syscall.F_DUPFD_CLOEXEC != 0 && !dupCloexecUnsupported.Load() { 662 r0, err := unix.Fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0) 663 if err == nil { 664 return r0, "", nil 665 } 666 switch err { 667 case syscall.EINVAL, syscall.ENOSYS: 668 // Old kernel, or js/wasm (which returns 669 // ENOSYS). Fall back to the portable way from 670 // now on. 671 dupCloexecUnsupported.Store(true) 672 default: 673 return -1, "fcntl", err 674 } 675 } 676 return dupCloseOnExecOld(fd) 677} 678 679// Dup duplicates the file descriptor. 680func (fd *FD) Dup() (int, string, error) { 681 if err := fd.incref(); err != nil { 682 return -1, "", err 683 } 684 defer fd.decref() 685 return DupCloseOnExec(fd.Sysfd) 686} 687 688// On Unix variants only, expose the IO event for the net code. 689 690// WaitWrite waits until data can be written to fd. 691func (fd *FD) WaitWrite() error { 692 return fd.pd.waitWrite(fd.isFile) 693} 694 695// WriteOnce is for testing only. It makes a single write call. 696func (fd *FD) WriteOnce(p []byte) (int, error) { 697 if err := fd.writeLock(); err != nil { 698 return 0, err 699 } 700 defer fd.writeUnlock() 701 return ignoringEINTRIO(syscall.Write, fd.Sysfd, p) 702} 703 704// RawRead invokes the user-defined function f for a read operation. 705func (fd *FD) RawRead(f func(uintptr) bool) error { 706 if err := fd.readLock(); err != nil { 707 return err 708 } 709 defer fd.readUnlock() 710 if err := fd.pd.prepareRead(fd.isFile); err != nil { 711 return err 712 } 713 for { 714 if f(uintptr(fd.Sysfd)) { 715 return nil 716 } 717 if err := fd.pd.waitRead(fd.isFile); err != nil { 718 return err 719 } 720 } 721} 722 723// RawWrite invokes the user-defined function f for a write operation. 724func (fd *FD) RawWrite(f func(uintptr) bool) error { 725 if err := fd.writeLock(); err != nil { 726 return err 727 } 728 defer fd.writeUnlock() 729 if err := fd.pd.prepareWrite(fd.isFile); err != nil { 730 return err 731 } 732 for { 733 if f(uintptr(fd.Sysfd)) { 734 return nil 735 } 736 if err := fd.pd.waitWrite(fd.isFile); err != nil { 737 return err 738 } 739 } 740} 741 742// ignoringEINTRIO is like ignoringEINTR, but just for IO calls. 743func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) { 744 for { 745 n, err := fn(fd, p) 746 if err != syscall.EINTR { 747 return n, err 748 } 749 } 750} 751