Lines Matching +full:pre +full:- +full:verified
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * zswap.c - zswap driver file
7 * RAM-based memory pool. This can result in a significant I/O reduction on
32 #include <linux/page-flags.h>
125 /* Enable/disable memory pressure-based shrinker. */
154 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
157 * needs to be verified that it's still valid in the tree.
184 * swpentry - associated swap entry, the offset indexes into the red-black tree
185 * length - the length in bytes of the compressed page data. Needed during
187 * referenced - true if the entry recently entered the zswap pool. Unset by the
191 * pool - the zswap_pool the entry's data is in
192 * handle - zpool allocation handle that stores the compressed page data
193 * objcg - the obj_cgroup that the compressed memory is charged to
194 * lru - handle to the pool's lru used to evict pages.
209 /* RCU-protected iteration */
241 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
242 zpool_get_type((p)->zpool))
273 pool->zpool = zpool_create_pool(type, name, gfp); in zswap_pool_create()
274 if (!pool->zpool) { in zswap_pool_create()
278 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); in zswap_pool_create()
280 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); in zswap_pool_create()
282 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); in zswap_pool_create()
283 if (!pool->acomp_ctx) { in zswap_pool_create()
289 mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex); in zswap_pool_create()
292 &pool->node); in zswap_pool_create()
299 ret = percpu_ref_init(&pool->ref, __zswap_pool_empty, in zswap_pool_create()
303 INIT_LIST_HEAD(&pool->list); in zswap_pool_create()
310 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); in zswap_pool_create()
312 if (pool->acomp_ctx) in zswap_pool_create()
313 free_percpu(pool->acomp_ctx); in zswap_pool_create()
314 if (pool->zpool) in zswap_pool_create()
315 zpool_destroy_pool(pool->zpool); in zswap_pool_create()
366 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); in zswap_pool_destroy()
367 free_percpu(pool->acomp_ctx); in zswap_pool_destroy()
369 zpool_destroy_pool(pool->zpool); in zswap_pool_destroy()
381 WARN_ON(!percpu_ref_is_zero(&pool->ref)); in __zswap_pool_release()
382 percpu_ref_exit(&pool->ref); in __zswap_pool_release()
400 list_del_rcu(&pool->list); in __zswap_pool_empty()
402 INIT_WORK(&pool->release_work, __zswap_pool_release); in __zswap_pool_empty()
403 schedule_work(&pool->release_work); in __zswap_pool_empty()
413 return percpu_ref_tryget(&pool->ref); in zswap_pool_tryget()
419 percpu_ref_get(&pool->ref); in zswap_pool_get()
424 percpu_ref_put(&pool->ref); in zswap_pool_put()
460 /* type and compressor must be null-terminated */
468 if (strcmp(pool->tfm_name, compressor)) in zswap_pool_find_get()
470 if (strcmp(zpool_get_type(pool->zpool), type)) in zswap_pool_find_get()
498 total += zpool_get_total_pages(pool->zpool); in zswap_total_pages()
526 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) in zswap_pool_changed()
531 /* val must be a null-terminated string */
543 /* if this is load-time (pre-init) param setting, in __zswap_param_set()
553 ret = -ENODEV; in __zswap_param_set()
564 return -ENOENT; in __zswap_param_set()
570 return -ENOENT; in __zswap_param_set()
575 return -EINVAL; in __zswap_param_set()
584 list_del_rcu(&pool->list); in __zswap_param_set()
597 percpu_ref_resurrect(&pool->ref); in __zswap_param_set()
606 ret = -EINVAL; in __zswap_param_set()
612 list_add_rcu(&pool->list, &zswap_pools); in __zswap_param_set()
615 /* add the possibly pre-existing pool to the end of the pools in __zswap_param_set()
619 list_add_tail_rcu(&pool->list, &zswap_pools); in __zswap_param_set()
629 * when the other param is changed. We already verified this in __zswap_param_set()
640 percpu_ref_kill(&put_pool->ref); in __zswap_param_set()
660 int ret = -ENODEV; in zswap_enabled_param_set()
662 /* if this is load-time (pre-init) param setting, only set param. */ in zswap_enabled_param_set()
694 return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; in mem_cgroup_from_entry()
727 list_lru_add(list_lru, &entry->lru, nid, memcg); in zswap_lru_add()
739 list_lru_del(list_lru, &entry->lru, nid, memcg); in zswap_lru_del()
745 atomic_long_set(&lruvec->zswap_lruvec_state.nr_disk_swapins, 0); in zswap_lruvec_state_init()
754 atomic_long_inc(&lruvec->zswap_lruvec_state.nr_disk_swapins); in zswap_folio_swapin()
806 zpool_free(entry->pool->zpool, entry->handle); in zswap_entry_free()
807 zswap_pool_put(entry->pool); in zswap_entry_free()
808 if (entry->objcg) { in zswap_entry_free()
809 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); in zswap_entry_free()
810 obj_cgroup_put(entry->objcg); in zswap_entry_free()
822 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); in zswap_cpu_comp_prepare()
830 ret = -ENOMEM; in zswap_cpu_comp_prepare()
834 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); in zswap_cpu_comp_prepare()
837 pool->tfm_name, PTR_ERR(acomp)); in zswap_cpu_comp_prepare()
845 pool->tfm_name); in zswap_cpu_comp_prepare()
846 ret = -ENOMEM; in zswap_cpu_comp_prepare()
855 mutex_lock(&acomp_ctx->mutex); in zswap_cpu_comp_prepare()
856 crypto_init_wait(&acomp_ctx->wait); in zswap_cpu_comp_prepare()
864 crypto_req_done, &acomp_ctx->wait); in zswap_cpu_comp_prepare()
866 acomp_ctx->buffer = buffer; in zswap_cpu_comp_prepare()
867 acomp_ctx->acomp = acomp; in zswap_cpu_comp_prepare()
868 acomp_ctx->is_sleepable = acomp_is_async(acomp); in zswap_cpu_comp_prepare()
869 acomp_ctx->req = req; in zswap_cpu_comp_prepare()
870 mutex_unlock(&acomp_ctx->mutex); in zswap_cpu_comp_prepare()
883 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); in zswap_cpu_comp_dead()
891 mutex_lock(&acomp_ctx->mutex); in zswap_cpu_comp_dead()
892 req = acomp_ctx->req; in zswap_cpu_comp_dead()
893 acomp = acomp_ctx->acomp; in zswap_cpu_comp_dead()
894 buffer = acomp_ctx->buffer; in zswap_cpu_comp_dead()
895 acomp_ctx->req = NULL; in zswap_cpu_comp_dead()
896 acomp_ctx->acomp = NULL; in zswap_cpu_comp_dead()
897 acomp_ctx->buffer = NULL; in zswap_cpu_comp_dead()
898 mutex_unlock(&acomp_ctx->mutex); in zswap_cpu_comp_dead()
918 acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); in acomp_ctx_get_cpu_lock()
919 mutex_lock(&acomp_ctx->mutex); in acomp_ctx_get_cpu_lock()
920 if (likely(acomp_ctx->req)) in acomp_ctx_get_cpu_lock()
924 * getting the per-CPU ctx but before the mutex was acquired. If in acomp_ctx_get_cpu_lock()
926 * already freed ctx->req (among other things) and set it to in acomp_ctx_get_cpu_lock()
929 mutex_unlock(&acomp_ctx->mutex); in acomp_ctx_get_cpu_lock()
935 mutex_unlock(&acomp_ctx->mutex); in acomp_ctx_put_unlock()
952 dst = acomp_ctx->buffer; in zswap_compress()
957 * We need PAGE_SIZE * 2 here since there maybe over-compression case, in zswap_compress()
958 * and hardware-accelerators may won't check the dst buffer size, so in zswap_compress()
962 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); in zswap_compress()
976 comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); in zswap_compress()
977 dlen = acomp_ctx->req->dlen; in zswap_compress()
981 zpool = pool->zpool; in zswap_compress()
993 entry->handle = handle; in zswap_compress()
994 entry->length = dlen; in zswap_compress()
997 if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC) in zswap_compress()
1010 struct zpool *zpool = entry->pool->zpool; in zswap_decompress()
1015 acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); in zswap_decompress()
1016 src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); in zswap_decompress()
1021 * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer, in zswap_decompress()
1026 if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) || in zswap_decompress()
1028 memcpy(acomp_ctx->buffer, src, entry->length); in zswap_decompress()
1029 src = acomp_ctx->buffer; in zswap_decompress()
1030 zpool_unmap_handle(zpool, entry->handle); in zswap_decompress()
1033 sg_init_one(&input, src, entry->length); in zswap_decompress()
1036 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); in zswap_decompress()
1037 BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); in zswap_decompress()
1038 BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); in zswap_decompress()
1040 if (src != acomp_ctx->buffer) in zswap_decompress()
1041 zpool_unmap_handle(zpool, entry->handle); in zswap_decompress()
1077 return -ENOMEM; in zswap_writeback_entry()
1088 return -EEXIST; in zswap_writeback_entry()
1105 return -ENOMEM; in zswap_writeback_entry()
1111 if (entry->objcg) in zswap_writeback_entry()
1112 count_objcg_events(entry->objcg, ZSWPWB, 1); in zswap_writeback_entry()
1139 * adjusted by the pool activities - if the pool is dominated by new entries
1168 if (entry->referenced) { in shrink_memcg_cb()
1169 entry->referenced = false; in shrink_memcg_cb()
1194 * We don't do any trylocking; -ENOMEM comes closest, in shrink_memcg_cb()
1198 list_move_tail(item, &l->list); in shrink_memcg_cb()
1203 * until the entry is verified to still be alive in the tree. in shrink_memcg_cb()
1205 swpentry = entry->swpentry; in shrink_memcg_cb()
1211 spin_unlock(&l->lock); in shrink_memcg_cb()
1224 if (writeback_result == -EEXIST && encountered_page_in_swapcache) { in shrink_memcg_cb()
1242 !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { in zswap_shrinker_scan()
1243 sc->nr_scanned = 0; in zswap_shrinker_scan()
1259 struct mem_cgroup *memcg = sc->memcg; in zswap_shrinker_count()
1260 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); in zswap_shrinker_count()
1262 &lruvec->zswap_lruvec_state.nr_disk_swapins; in zswap_shrinker_count()
1272 * rules (may_enter_fs()), which apply on a per-folio basis. in zswap_shrinker_count()
1274 if (!gfp_has_io_fs(sc->gfp_mask)) in zswap_shrinker_count()
1278 * For memcg, use the cgroup-wide ZSWAP stats since we don't in zswap_shrinker_count()
1279 * have them per-node and thus per-lruvec. Careful if memcg is in zswap_shrinker_count()
1280 * runtime-disabled: we can get sc->memcg == NULL, which is ok in zswap_shrinker_count()
1283 * Without memcg, use the zswap pool-wide metrics. in zswap_shrinker_count()
1311 nr_remain = nr_disk_swapins_cur - nr_freeable; in zswap_shrinker_count()
1315 nr_freeable -= nr_disk_swapins_cur - nr_remain; in zswap_shrinker_count()
1333 shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); in zswap_alloc_shrinker()
1337 shrinker->scan_objects = zswap_shrinker_scan; in zswap_alloc_shrinker()
1338 shrinker->count_objects = zswap_shrinker_count; in zswap_alloc_shrinker()
1339 shrinker->batch = 0; in zswap_alloc_shrinker()
1340 shrinker->seeks = DEFAULT_SEEKS; in zswap_alloc_shrinker()
1349 return -ENOENT; in shrink_memcg()
1356 return -ENOENT; in shrink_memcg()
1363 scanned += 1 - nr_to_walk; in shrink_memcg()
1367 return -ENOENT; in shrink_memcg()
1369 return shrunk ? 0 : -EAGAIN; in shrink_memcg()
1382 * Global reclaim will select cgroup in a round-robin fashion from all in shrink_worker()
1384 * writeback-disabled memcgs (memory.zswap.writeback=0) are not in shrink_worker()
1389 * - No writeback-candidate memcgs found in a memcg tree walk. in shrink_worker()
1390 * - Shrinking a writeback-candidate memcg failed. in shrink_worker()
1442 * There are no writeback-candidate pages in the memcg. in shrink_worker()
1447 if (ret == -ENOENT) in shrink_worker()
1485 WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err); in zswap_store_page()
1508 obj_cgroup_charge_zswap(objcg, entry->length); in zswap_store_page()
1522 entry->pool = pool; in zswap_store_page()
1523 entry->swpentry = page_swpentry; in zswap_store_page()
1524 entry->objcg = objcg; in zswap_store_page()
1525 entry->referenced = true; in zswap_store_page()
1526 if (entry->length) { in zswap_store_page()
1527 INIT_LIST_HEAD(&entry->lru); in zswap_store_page()
1534 zpool_free(pool->zpool, entry->handle); in zswap_store_page()
1543 swp_entry_t swp = folio->swap; in zswap_store()
1628 swp_entry_t swp = folio->swap; in zswap_load()
1654 * in-memory copies outweighs any benefits of caching the in zswap_load()
1673 if (entry->objcg) in zswap_load()
1674 count_objcg_events(entry->objcg, ZSWPIN, 1); in zswap_load()
1708 return -ENOMEM; in zswap_swapon()
1761 return -ENODEV; in zswap_debugfs_init()
1814 shrink_wq = alloc_workqueue("zswap-shrink", in zswap_setup()
1830 pr_info("loaded using pool %s/%s\n", pool->tfm_name, in zswap_setup()
1831 zpool_get_type(pool->zpool)); in zswap_setup()
1832 list_add(&pool->list, &zswap_pools); in zswap_setup()
1854 /* if built-in, we aren't unloaded on failure; don't allow use */ in zswap_setup()
1857 return -ENOMEM; in zswap_setup()