// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 /* * erofs-utils/lib/blobchunk.c * * Copyright (C) 2021, Alibaba Cloud */ #define _GNU_SOURCE #include "erofs/hashmap.h" #include "erofs/blobchunk.h" #include "erofs/block_list.h" #include "erofs/cache.h" #include "sha256.h" #include struct erofs_blobchunk { union { struct hashmap_entry ent; struct list_head list; }; char sha256[32]; unsigned int device_id; union { erofs_off_t chunksize; erofs_off_t sourceoffset; }; erofs_blk_t blkaddr; }; static struct hashmap blob_hashmap; static FILE *blobfile; static erofs_blk_t remapped_base; static erofs_off_t datablob_size; static bool multidev; static struct erofs_buffer_head *bh_devt; struct erofs_blobchunk erofs_holechunk = { .blkaddr = EROFS_NULL_ADDR, }; static LIST_HEAD(unhashed_blobchunks); struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id, erofs_blk_t blkaddr, erofs_off_t sourceoffset) { struct erofs_blobchunk *chunk; chunk = calloc(1, sizeof(struct erofs_blobchunk)); if (!chunk) return ERR_PTR(-ENOMEM); chunk->device_id = device_id; chunk->blkaddr = blkaddr; chunk->sourceoffset = sourceoffset; list_add_tail(&chunk->list, &unhashed_blobchunks); return chunk; } static struct erofs_blobchunk *erofs_blob_getchunk(struct erofs_sb_info *sbi, u8 *buf, erofs_off_t chunksize) { static u8 zeroed[EROFS_MAX_BLOCK_SIZE]; struct erofs_blobchunk *chunk; unsigned int hash, padding; u8 sha256[32]; erofs_off_t blkpos; int ret; erofs_sha256(buf, chunksize, sha256); hash = memhash(sha256, sizeof(sha256)); chunk = hashmap_get_from_hash(&blob_hashmap, hash, sha256); if (chunk) { DBG_BUGON(chunksize != chunk->chunksize); sbi->saved_by_deduplication += chunksize; if (chunk->blkaddr == erofs_holechunk.blkaddr) { chunk = &erofs_holechunk; erofs_dbg("Found duplicated hole chunk"); } else { erofs_dbg("Found duplicated chunk at %u", chunk->blkaddr); } return chunk; } chunk = malloc(sizeof(struct erofs_blobchunk)); if (!chunk) return ERR_PTR(-ENOMEM); chunk->chunksize = chunksize; memcpy(chunk->sha256, sha256, sizeof(sha256)); blkpos = ftell(blobfile); DBG_BUGON(erofs_blkoff(sbi, blkpos)); if (sbi->extra_devices) chunk->device_id = 1; else chunk->device_id = 0; chunk->blkaddr = erofs_blknr(sbi, blkpos); erofs_dbg("Writing chunk (%u bytes) to %u", chunksize, chunk->blkaddr); ret = fwrite(buf, chunksize, 1, blobfile); if (ret == 1) { padding = erofs_blkoff(sbi, chunksize); if (padding) { padding = erofs_blksiz(sbi) - padding; ret = fwrite(zeroed, padding, 1, blobfile); } } if (ret < 1) { free(chunk); return ERR_PTR(-ENOSPC); } hashmap_entry_init(&chunk->ent, hash); hashmap_add(&blob_hashmap, chunk); return chunk; } static int erofs_blob_hashmap_cmp(const void *a, const void *b, const void *key) { const struct erofs_blobchunk *ec1 = container_of((struct hashmap_entry *)a, struct erofs_blobchunk, ent); const struct erofs_blobchunk *ec2 = container_of((struct hashmap_entry *)b, struct erofs_blobchunk, ent); return memcmp(ec1->sha256, key ? key : ec2->sha256, sizeof(ec1->sha256)); } int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off) { struct erofs_inode_chunk_index idx = {0}; erofs_blk_t extent_start = EROFS_NULL_ADDR; erofs_blk_t extent_end, chunkblks; erofs_off_t source_offset; unsigned int dst, src, unit; bool first_extent = true; if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; chunkblks = 1U << (inode->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); for (dst = src = 0; dst < inode->extent_isize; src += sizeof(void *), dst += unit) { struct erofs_blobchunk *chunk; chunk = *(void **)(inode->chunkindexes + src); if (chunk->blkaddr == EROFS_NULL_ADDR) { idx.blkaddr = EROFS_NULL_ADDR; } else if (chunk->device_id) { DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)); idx.blkaddr = chunk->blkaddr; extent_start = EROFS_NULL_ADDR; } else { idx.blkaddr = remapped_base + chunk->blkaddr; } if (extent_start == EROFS_NULL_ADDR || idx.blkaddr != extent_end) { if (extent_start != EROFS_NULL_ADDR) { tarerofs_blocklist_write(extent_start, extent_end - extent_start, source_offset); erofs_droid_blocklist_write_extent(inode, extent_start, extent_end - extent_start, first_extent, false); first_extent = false; } extent_start = idx.blkaddr; source_offset = chunk->sourceoffset; } extent_end = idx.blkaddr + chunkblks; idx.device_id = cpu_to_le16(chunk->device_id); idx.blkaddr = cpu_to_le32(idx.blkaddr); if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE) memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit); else memcpy(inode->chunkindexes + dst, &idx, sizeof(idx)); } off = roundup(off, unit); if (extent_start != EROFS_NULL_ADDR) tarerofs_blocklist_write(extent_start, extent_end - extent_start, source_offset); erofs_droid_blocklist_write_extent(inode, extent_start, extent_start == EROFS_NULL_ADDR ? 0 : extent_end - extent_start, first_extent, true); return erofs_dev_write(inode->sbi, inode->chunkindexes, off, inode->extent_isize); } int erofs_blob_mergechunks(struct erofs_inode *inode, unsigned int chunkbits, unsigned int new_chunkbits) { struct erofs_sb_info *sbi = inode->sbi; unsigned int dst, src, unit, count; if (new_chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) new_chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; if (chunkbits >= new_chunkbits) /* no need to merge */ goto out; if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; count = round_up(inode->i_size, 1ULL << new_chunkbits) >> new_chunkbits; for (dst = src = 0; dst < count; ++dst) { *((void **)inode->chunkindexes + dst) = *((void **)inode->chunkindexes + src); src += 1U << (new_chunkbits - chunkbits); } DBG_BUGON(count * unit >= inode->extent_isize); inode->extent_isize = count * unit; chunkbits = new_chunkbits; out: inode->u.chunkformat = (chunkbits - sbi->blkszbits) | (inode->u.chunkformat & ~EROFS_CHUNK_FORMAT_BLKBITS_MASK); return 0; } static void erofs_update_minextblks(struct erofs_sb_info *sbi, erofs_off_t start, erofs_off_t end, erofs_blk_t *minextblks) { erofs_blk_t lb; lb = lowbit((end - start) >> sbi->blkszbits); if (lb && lb < *minextblks) *minextblks = lb; } static bool erofs_blob_can_merge(struct erofs_sb_info *sbi, struct erofs_blobchunk *lastch, struct erofs_blobchunk *chunk) { if (!lastch) return true; if (lastch == &erofs_holechunk && chunk == &erofs_holechunk) return true; if (lastch->device_id == chunk->device_id && erofs_pos(sbi, lastch->blkaddr) + lastch->chunksize == erofs_pos(sbi, chunk->blkaddr)) return true; return false; } int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd, erofs_off_t startoff) { struct erofs_sb_info *sbi = inode->sbi; unsigned int chunkbits = cfg.c_chunkbits; unsigned int count, unit; struct erofs_blobchunk *chunk, *lastch; struct erofs_inode_chunk_index *idx; erofs_off_t pos, len, chunksize, interval_start; erofs_blk_t minextblks; u8 *chunkdata; int ret; #ifdef SEEK_DATA /* if the file is fully sparsed, use one big chunk instead */ if (lseek(fd, startoff, SEEK_DATA) < 0 && errno == ENXIO) { chunkbits = ilog2(inode->i_size - 1) + 1; if (chunkbits < sbi->blkszbits) chunkbits = sbi->blkszbits; } #endif if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; chunksize = 1ULL << chunkbits; count = DIV_ROUND_UP(inode->i_size, chunksize); if (sbi->extra_devices) inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); else unit = EROFS_BLOCK_MAP_ENTRY_SIZE; chunkdata = malloc(chunksize); if (!chunkdata) return -ENOMEM; inode->extent_isize = count * unit; inode->chunkindexes = malloc(count * max(sizeof(*idx), sizeof(void *))); if (!inode->chunkindexes) { ret = -ENOMEM; goto err; } idx = inode->chunkindexes; lastch = NULL; minextblks = BLK_ROUND_UP(sbi, inode->i_size); interval_start = 0; for (pos = 0; pos < inode->i_size; pos += len) { #ifdef SEEK_DATA off_t offset = lseek(fd, pos + startoff, SEEK_DATA); if (offset < 0) { if (errno != ENXIO) offset = pos; else offset = ((pos >> chunkbits) + 1) << chunkbits; } else { offset -= startoff; if (offset != (offset & ~(chunksize - 1))) { offset &= ~(chunksize - 1); if (lseek(fd, offset + startoff, SEEK_SET) != startoff + offset) { ret = -EIO; goto err; } } } if (offset > pos) { if (!erofs_blob_can_merge(sbi, lastch, &erofs_holechunk)) { erofs_update_minextblks(sbi, interval_start, pos, &minextblks); interval_start = pos; } do { *(void **)idx++ = &erofs_holechunk; pos += chunksize; } while (pos < offset); DBG_BUGON(pos != offset); lastch = &erofs_holechunk; len = 0; continue; } #endif len = min_t(u64, inode->i_size - pos, chunksize); ret = read(fd, chunkdata, len); if (ret < len) { ret = -EIO; goto err; } chunk = erofs_blob_getchunk(sbi, chunkdata, len); if (IS_ERR(chunk)) { ret = PTR_ERR(chunk); goto err; } if (!erofs_blob_can_merge(sbi, lastch, chunk)) { erofs_update_minextblks(sbi, interval_start, pos, &minextblks); interval_start = pos; } *(void **)idx++ = chunk; lastch = chunk; } erofs_update_minextblks(sbi, interval_start, pos, &minextblks); inode->datalayout = EROFS_INODE_CHUNK_BASED; free(chunkdata); return erofs_blob_mergechunks(inode, chunkbits, ilog2(minextblks) + sbi->blkszbits); err: free(inode->chunkindexes); inode->chunkindexes = NULL; free(chunkdata); return ret; } int erofs_write_zero_inode(struct erofs_inode *inode) { struct erofs_sb_info *sbi = inode->sbi; unsigned int chunkbits = ilog2(inode->i_size - 1) + 1; unsigned int count; erofs_off_t chunksize, len, pos; struct erofs_inode_chunk_index *idx; if (chunkbits < sbi->blkszbits) chunkbits = sbi->blkszbits; if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; inode->u.chunkformat |= chunkbits - sbi->blkszbits; chunksize = 1ULL << chunkbits; count = DIV_ROUND_UP(inode->i_size, chunksize); inode->extent_isize = count * EROFS_BLOCK_MAP_ENTRY_SIZE; idx = calloc(count, max(sizeof(*idx), sizeof(void *))); if (!idx) return -ENOMEM; inode->chunkindexes = idx; for (pos = 0; pos < inode->i_size; pos += len) { struct erofs_blobchunk *chunk; len = min_t(erofs_off_t, inode->i_size - pos, chunksize); chunk = erofs_get_unhashed_chunk(0, EROFS_NULL_ADDR, -1); if (IS_ERR(chunk)) { free(inode->chunkindexes); inode->chunkindexes = NULL; return PTR_ERR(chunk); } *(void **)idx++ = chunk; } inode->datalayout = EROFS_INODE_CHUNK_BASED; return 0; } int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset) { struct erofs_sb_info *sbi = inode->sbi; unsigned int chunkbits = ilog2(inode->i_size - 1) + 1; unsigned int count, unit, device_id; erofs_off_t chunksize, len, pos; erofs_blk_t blkaddr; struct erofs_inode_chunk_index *idx; if (chunkbits < sbi->blkszbits) chunkbits = sbi->blkszbits; if (chunkbits - sbi->blkszbits > EROFS_CHUNK_FORMAT_BLKBITS_MASK) chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + sbi->blkszbits; inode->u.chunkformat |= chunkbits - sbi->blkszbits; if (sbi->extra_devices) { device_id = 1; inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; unit = sizeof(struct erofs_inode_chunk_index); DBG_BUGON(erofs_blkoff(sbi, data_offset)); blkaddr = erofs_blknr(sbi, data_offset); } else { device_id = 0; unit = EROFS_BLOCK_MAP_ENTRY_SIZE; DBG_BUGON(erofs_blkoff(sbi, datablob_size)); blkaddr = erofs_blknr(sbi, datablob_size); datablob_size += round_up(inode->i_size, erofs_blksiz(sbi)); } chunksize = 1ULL << chunkbits; count = DIV_ROUND_UP(inode->i_size, chunksize); inode->extent_isize = count * unit; idx = calloc(count, max(sizeof(*idx), sizeof(void *))); if (!idx) return -ENOMEM; inode->chunkindexes = idx; for (pos = 0; pos < inode->i_size; pos += len) { struct erofs_blobchunk *chunk; len = min_t(erofs_off_t, inode->i_size - pos, chunksize); chunk = erofs_get_unhashed_chunk(device_id, blkaddr, data_offset); if (IS_ERR(chunk)) { free(inode->chunkindexes); inode->chunkindexes = NULL; return PTR_ERR(chunk); } *(void **)idx++ = chunk; blkaddr += erofs_blknr(sbi, len); data_offset += len; } inode->datalayout = EROFS_INODE_CHUNK_BASED; return 0; } int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi) { struct erofs_buffer_head *bh; ssize_t length; u64 pos_in, pos_out; ssize_t ret; if (blobfile) { fflush(blobfile); length = ftell(blobfile); if (length < 0) return -errno; if (sbi->extra_devices) sbi->devs[0].blocks = erofs_blknr(sbi, length); else datablob_size = length; } if (sbi->extra_devices) { unsigned int i, ret; erofs_blk_t nblocks; nblocks = erofs_mapbh(sbi->bmgr, NULL); pos_out = erofs_btell(bh_devt, false); i = 0; do { struct erofs_deviceslot dis = { .mapped_blkaddr = cpu_to_le32(nblocks), .blocks = cpu_to_le32(sbi->devs[i].blocks), }; memcpy(dis.tag, sbi->devs[i].tag, sizeof(dis.tag)); ret = erofs_dev_write(sbi, &dis, pos_out, sizeof(dis)); if (ret) return ret; pos_out += sizeof(dis); nblocks += sbi->devs[i].blocks; } while (++i < sbi->extra_devices); bh_devt->op = &erofs_drop_directly_bhops; erofs_bdrop(bh_devt, false); return 0; } bh = erofs_balloc(sbi->bmgr, DATA, datablob_size, 0, 0); if (IS_ERR(bh)) return PTR_ERR(bh); erofs_mapbh(NULL, bh->block); pos_out = erofs_btell(bh, false); remapped_base = erofs_blknr(sbi, pos_out); pos_out += sbi->bdev.offset; if (blobfile) { pos_in = 0; ret = erofs_copy_file_range(fileno(blobfile), &pos_in, sbi->bdev.fd, &pos_out, datablob_size); ret = ret < datablob_size ? -EIO : 0; } else { ret = erofs_io_ftruncate(&sbi->bdev, pos_out + datablob_size); } bh->op = &erofs_drop_directly_bhops; erofs_bdrop(bh, false); return ret; } void erofs_blob_exit(void) { struct hashmap_iter iter; struct hashmap_entry *e; struct erofs_blobchunk *bc, *n; if (blobfile) fclose(blobfile); /* Disable hashmap shrink, effectively disabling rehash. * This way we can iterate over entire hashmap efficiently * and safely by using hashmap_iter_next() */ hashmap_disable_shrink(&blob_hashmap); e = hashmap_iter_first(&blob_hashmap, &iter); while (e) { bc = container_of((struct hashmap_entry *)e, struct erofs_blobchunk, ent); DBG_BUGON(hashmap_remove(&blob_hashmap, e) != e); free(bc); e = hashmap_iter_next(&iter); } DBG_BUGON(hashmap_free(&blob_hashmap)); list_for_each_entry_safe(bc, n, &unhashed_blobchunks, list) { list_del(&bc->list); free(bc); } } static int erofs_insert_zerochunk(erofs_off_t chunksize) { u8 *zeros; struct erofs_blobchunk *chunk; u8 sha256[32]; unsigned int hash; int ret = 0; zeros = calloc(1, chunksize); if (!zeros) return -ENOMEM; erofs_sha256(zeros, chunksize, sha256); free(zeros); hash = memhash(sha256, sizeof(sha256)); chunk = malloc(sizeof(struct erofs_blobchunk)); if (!chunk) return -ENOMEM; chunk->chunksize = chunksize; /* treat chunk filled with zeros as hole */ chunk->blkaddr = erofs_holechunk.blkaddr; memcpy(chunk->sha256, sha256, sizeof(sha256)); hashmap_entry_init(&chunk->ent, hash); hashmap_add(&blob_hashmap, chunk); return ret; } int erofs_blob_init(const char *blobfile_path, erofs_off_t chunksize) { if (!blobfile_path) { #ifdef HAVE_TMPFILE64 blobfile = tmpfile64(); #else blobfile = tmpfile(); #endif multidev = false; } else { blobfile = fopen(blobfile_path, "wb"); multidev = true; } if (!blobfile) return -EACCES; hashmap_init(&blob_hashmap, erofs_blob_hashmap_cmp, 0); return erofs_insert_zerochunk(chunksize); } int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices) { if (!devices) return 0; sbi->devs = calloc(devices, sizeof(sbi->devs[0])); if (!sbi->devs) return -ENOMEM; bh_devt = erofs_balloc(sbi->bmgr, DEVT, sizeof(struct erofs_deviceslot) * devices, 0, 0); if (IS_ERR(bh_devt)) { free(sbi->devs); return PTR_ERR(bh_devt); } erofs_mapbh(NULL, bh_devt->block); bh_devt->op = &erofs_skip_write_bhops; sbi->devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; sbi->extra_devices = devices; erofs_sb_set_device_table(sbi); return 0; }