// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 /* * Copyright (C) 2018 HUAWEI, Inc. * http://www.huawei.com/ * Created by Li Guifu */ #ifndef _LARGEFILE64_SOURCE #define _LARGEFILE64_SOURCE #endif #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include "erofs/internal.h" #ifdef HAVE_LINUX_FS_H #include #endif #ifdef HAVE_LINUX_FALLOC_H #include #endif #ifdef HAVE_SYS_STATFS_H #include #endif #define EROFS_MODNAME "erofs_io" #include "erofs/print.h" int erofs_io_fstat(struct erofs_vfile *vf, struct stat *buf) { if (__erofs_unlikely(cfg.c_dry_run)) { buf->st_size = 0; buf->st_mode = S_IFREG | 0777; return 0; } if (vf->ops) return vf->ops->fstat(vf, buf); return fstat(vf->fd, buf); } ssize_t erofs_io_pwrite(struct erofs_vfile *vf, const void *buf, u64 pos, size_t len) { ssize_t ret, written = 0; if (__erofs_unlikely(cfg.c_dry_run)) return 0; if (vf->ops) return vf->ops->pwrite(vf, buf, pos, len); pos += vf->offset; do { #ifdef HAVE_PWRITE64 ret = pwrite64(vf->fd, buf, len, (off64_t)pos); #else ret = pwrite(vf->fd, buf, len, (off_t)pos); #endif if (ret <= 0) { if (!ret) break; if (errno != EINTR) { erofs_err("failed to write: %s", strerror(errno)); return -errno; } ret = 0; } buf += ret; pos += ret; written += ret; } while (written < len); return written; } int erofs_io_fsync(struct erofs_vfile *vf) { int ret; if (__erofs_unlikely(cfg.c_dry_run)) return 0; if (vf->ops) return vf->ops->fsync(vf); ret = fsync(vf->fd); if (ret) { erofs_err("failed to fsync(!): %s", strerror(errno)); return -errno; } return 0; } ssize_t erofs_io_fallocate(struct erofs_vfile *vf, u64 offset, size_t len, bool zeroout) { static const char zero[EROFS_MAX_BLOCK_SIZE] = {0}; ssize_t ret; if (__erofs_unlikely(cfg.c_dry_run)) return 0; if (vf->ops) return vf->ops->fallocate(vf, offset, len, zeroout); #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) if (!zeroout && fallocate(vf->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset + vf->offset, len) >= 0) return 0; #endif while (len > EROFS_MAX_BLOCK_SIZE) { ret = erofs_io_pwrite(vf, zero, offset, EROFS_MAX_BLOCK_SIZE); if (ret < 0) return ret; len -= ret; offset += ret; } return erofs_io_pwrite(vf, zero, offset, len) == len ? 0 : -EIO; } int erofs_io_ftruncate(struct erofs_vfile *vf, u64 length) { int ret; struct stat st; if (__erofs_unlikely(cfg.c_dry_run)) return 0; if (vf->ops) return vf->ops->ftruncate(vf, length); ret = fstat(vf->fd, &st); if (ret) { erofs_err("failed to fstat: %s", strerror(errno)); return -errno; } length += vf->offset; if (S_ISBLK(st.st_mode) || st.st_size == length) return 0; return ftruncate(vf->fd, length); } ssize_t erofs_io_pread(struct erofs_vfile *vf, void *buf, u64 pos, size_t len) { ssize_t ret, read = 0; if (__erofs_unlikely(cfg.c_dry_run)) return 0; if (vf->ops) return vf->ops->pread(vf, buf, pos, len); pos += vf->offset; do { #ifdef HAVE_PREAD64 ret = pread64(vf->fd, buf, len, (off64_t)pos); #else ret = pread(vf->fd, buf, len, (off_t)pos); #endif if (ret <= 0) { if (!ret) break; if (errno != EINTR) { erofs_err("failed to read: %s", strerror(errno)); return -errno; } ret = 0; } pos += ret; buf += ret; read += ret; } while (read < len); return read; } static int erofs_get_bdev_size(int fd, u64 *bytes) { errno = ENOTSUP; #ifdef BLKGETSIZE64 if (ioctl(fd, BLKGETSIZE64, bytes) >= 0) return 0; #endif #ifdef BLKGETSIZE { unsigned long size; if (ioctl(fd, BLKGETSIZE, &size) >= 0) { *bytes = ((u64)size << 9); return 0; } } #endif return -errno; } #if defined(__linux__) && !defined(BLKDISCARD) #define BLKDISCARD _IO(0x12, 119) #endif static int erofs_bdev_discard(int fd, u64 block, u64 count) { #ifdef BLKDISCARD u64 range[2] = { block, count }; return ioctl(fd, BLKDISCARD, &range); #else return -EOPNOTSUPP; #endif } int erofs_dev_open(struct erofs_sb_info *sbi, const char *dev, int flags) { bool ro = (flags & O_ACCMODE) == O_RDONLY; bool truncate = flags & O_TRUNC; struct stat st; int fd, ret; #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS) bool again = false; repeat: #endif fd = open(dev, (ro ? O_RDONLY : O_RDWR | O_CREAT) | O_BINARY, 0644); if (fd < 0) { erofs_err("failed to open %s: %s", dev, strerror(errno)); return -errno; } if (ro || !truncate) goto out; ret = fstat(fd, &st); if (ret) { erofs_err("failed to fstat(%s): %s", dev, strerror(errno)); close(fd); return -errno; } switch (st.st_mode & S_IFMT) { case S_IFBLK: ret = erofs_get_bdev_size(fd, &sbi->devsz); if (ret) { erofs_err("failed to get block device size(%s): %s", dev, strerror(errno)); close(fd); return ret; } sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi)); ret = erofs_bdev_discard(fd, 0, sbi->devsz); if (ret) erofs_err("failed to erase block device(%s): %s", dev, erofs_strerror(ret)); break; case S_IFREG: if (st.st_size) { #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS) struct statfs stfs; if (again) { close(fd); return -ENOTEMPTY; } /* * fses like EXT4 and BTRFS will flush dirty blocks * after truncate(0) even after the writeback happens * (see kernel commit 7d8f9f7d150d and ccd2506bd431), * which is NOT our intention. Let's work around this. */ if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 || stfs.f_type == 0x9123683E)) { close(fd); unlink(dev); again = true; goto repeat; } #endif ret = ftruncate(fd, 0); if (ret) { erofs_err("failed to ftruncate(%s).", dev); close(fd); return -errno; } } sbi->devblksz = st.st_blksize; break; default: erofs_err("bad file type (%s, %o).", dev, st.st_mode); close(fd); return -EINVAL; } out: sbi->devname = strdup(dev); if (!sbi->devname) { close(fd); return -ENOMEM; } sbi->bdev.fd = fd; erofs_info("successfully to open %s", dev); return 0; } void erofs_dev_close(struct erofs_sb_info *sbi) { if (!sbi->bdev.ops) close(sbi->bdev.fd); free(sbi->devname); sbi->devname = NULL; sbi->bdev.fd = -1; } void erofs_blob_closeall(struct erofs_sb_info *sbi) { unsigned int i; for (i = 0; i < sbi->nblobs; ++i) close(sbi->blobfd[i]); sbi->nblobs = 0; } int erofs_blob_open_ro(struct erofs_sb_info *sbi, const char *dev) { int fd = open(dev, O_RDONLY | O_BINARY); if (fd < 0) { erofs_err("failed to open(%s).", dev); return -errno; } sbi->blobfd[sbi->nblobs] = fd; erofs_info("successfully to open blob%u %s", sbi->nblobs, dev); ++sbi->nblobs; return 0; } ssize_t erofs_dev_read(struct erofs_sb_info *sbi, int device_id, void *buf, u64 offset, size_t len) { ssize_t read; if (device_id) { if (device_id >= sbi->nblobs) { erofs_err("invalid device id %d", device_id); return -EIO; } read = erofs_io_pread(&((struct erofs_vfile) { .fd = sbi->blobfd[device_id - 1], }), buf, offset, len); } else { read = erofs_io_pread(&sbi->bdev, buf, offset, len); } if (read < 0) return read; if (read < len) { erofs_info("reach EOF of device @ %llu, pading with zeroes", offset | 0ULL); memset(buf + read, 0, len - read); } return 0; } static ssize_t __erofs_copy_file_range(int fd_in, u64 *off_in, int fd_out, u64 *off_out, size_t length) { size_t copied = 0; char buf[8192]; /* * Main copying loop. The buffer size is arbitrary and is a * trade-off between stack size consumption, cache usage, and * amortization of system call overhead. */ while (length > 0) { size_t to_read; ssize_t read_count; char *end, *p; to_read = min_t(size_t, length, sizeof(buf)); #ifdef HAVE_PREAD64 read_count = pread64(fd_in, buf, to_read, *off_in); #else read_count = pread(fd_in, buf, to_read, *off_in); #endif if (read_count == 0) /* End of file reached prematurely. */ return copied; if (read_count < 0) { /* Report the number of bytes copied so far. */ if (copied > 0) return copied; return -1; } *off_in += read_count; /* Write the buffer part which was read to the destination. */ end = buf + read_count; for (p = buf; p < end; ) { ssize_t write_count; #ifdef HAVE_PWRITE64 write_count = pwrite64(fd_out, p, end - p, *off_out); #else write_count = pwrite(fd_out, p, end - p, *off_out); #endif if (write_count < 0) { /* * Adjust the input read position to match what * we have written, so that the caller can pick * up after the error. */ size_t written = p - buf; /* * NB: This needs to be signed so that we can * form the negative value below. */ ssize_t overread = read_count - written; *off_in -= overread; /* Report the number of bytes copied so far. */ if (copied + written > 0) return copied + written; return -1; } p += write_count; *off_out += write_count; } /* Write loop. */ copied += read_count; length -= read_count; } return copied; } ssize_t erofs_copy_file_range(int fd_in, u64 *off_in, int fd_out, u64 *off_out, size_t length) { #ifdef HAVE_COPY_FILE_RANGE off64_t off64_in = *off_in, off64_out = *off_out; ssize_t ret; ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out, length, 0); if (ret >= 0) goto out; if (errno != ENOSYS && errno != EXDEV) { ret = -errno; out: *off_in = off64_in; *off_out = off64_out; return ret; } #endif return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length); } ssize_t erofs_io_read(struct erofs_vfile *vf, void *buf, size_t bytes) { ssize_t i = 0; if (vf->ops) return vf->ops->read(vf, buf, bytes); while (bytes) { int len = bytes > INT_MAX ? INT_MAX : bytes; int ret; ret = read(vf->fd, buf + i, len); if (ret < 1) { if (ret == 0) { break; } else if (errno != EINTR) { erofs_err("failed to read : %s", strerror(errno)); return -errno; } } bytes -= ret; i += ret; } return i; } #ifdef HAVE_SYS_SENDFILE_H #include #endif off_t erofs_io_lseek(struct erofs_vfile *vf, u64 offset, int whence) { if (vf->ops) return vf->ops->lseek(vf, offset, whence); return lseek(vf->fd, offset, whence); } int erofs_io_xcopy(struct erofs_vfile *vout, off_t pos, struct erofs_vfile *vin, unsigned int len, bool noseek) { if (vout->ops) return vout->ops->xcopy(vout, pos, vin, len, noseek); if (len && !vin->ops) { off_t ret __maybe_unused; #ifdef HAVE_COPY_FILE_RANGE ret = copy_file_range(vin->fd, NULL, vout->fd, &pos, len, 0); if (ret > 0) len -= ret; #endif #if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE) if (len && !noseek) { ret = lseek(vout->fd, pos, SEEK_SET); if (ret == pos) { ret = sendfile(vout->fd, vin->fd, NULL, len); if (ret > 0) { pos += ret; len -= ret; } } } #endif } do { char buf[32768]; int ret = min_t(unsigned int, len, sizeof(buf)); ret = erofs_io_read(vin, buf, ret); if (ret < 0) return ret; if (ret > 0) { ret = erofs_io_pwrite(vout, buf, pos, ret); if (ret < 0) return ret; pos += ret; } len -= ret; } while (len); return 0; }