Blame - mm/zswap.c - kernel/hikey-linaro

blob: 0ffcad03baea50fa5a9f2e73657db653ed628614 [file] [log] [blame]

Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	1	/*
				2	* zswap.c - zswap driver file
				3	*
				4	* zswap is a backend for frontswap that takes pages that are in the process
				5	* of being swapped out and attempts to compress and store them in a
				6	* RAM-based memory pool. This can result in a significant I/O reduction on
				7	* the swap device and, in the case where decompressing from RAM is faster
				8	* than reading from the swap device, can also improve workload performance.
				9	*
				10	* Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
				11	*
				12	* This program is free software; you can redistribute it and/or
				13	* modify it under the terms of the GNU General Public License
				14	* as published by the Free Software Foundation; either version 2
				15	* of the License, or (at your option) any later version.
				16	*
				17	* This program is distributed in the hope that it will be useful,
				18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				20	* GNU General Public License for more details.
				21	*/
				22
				23	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				24
				25	#include <linux/module.h>
				26	#include <linux/cpu.h>
				27	#include <linux/highmem.h>
				28	#include <linux/slab.h>
				29	#include <linux/spinlock.h>
				30	#include <linux/types.h>
				31	#include <linux/atomic.h>
				32	#include <linux/frontswap.h>
				33	#include <linux/rbtree.h>
				34	#include <linux/swap.h>
				35	#include <linux/crypto.h>
				36	#include <linux/mempool.h>
				37	#include <linux/zbud.h>
				38
				39	#include <linux/mm_types.h>
				40	#include <linux/page-flags.h>
				41	#include <linux/swapops.h>
				42	#include <linux/writeback.h>
				43	#include <linux/pagemap.h>
				44
				45	/*********************************
				46	* statistics
				47	**********************************/
				48	/* Number of memory pages used by the compressed pool */
				49	static u64 zswap_pool_pages;
				50	/* The number of compressed pages currently stored in zswap */
				51	static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
				52
				53	/*
				54	* The statistics below are not protected from concurrent access for
				55	* performance reasons so they may not be a 100% accurate. However,
				56	* they do provide useful information on roughly how many times a
				57	* certain event is occurring.
				58	*/
				59
				60	/* Pool limit was hit (see zswap_max_pool_percent) */
				61	static u64 zswap_pool_limit_hit;
				62	/* Pages written back when pool limit was reached */
				63	static u64 zswap_written_back_pages;
				64	/* Store failed due to a reclaim failure after pool limit was reached */
				65	static u64 zswap_reject_reclaim_fail;
				66	/* Compressed page was too big for the allocator to (optimally) store */
				67	static u64 zswap_reject_compress_poor;
				68	/* Store failed because underlying allocator could not get memory */
				69	static u64 zswap_reject_alloc_fail;
				70	/* Store failed because the entry metadata could not be allocated (rare) */
				71	static u64 zswap_reject_kmemcache_fail;
				72	/* Duplicate store was encountered (rare) */
				73	static u64 zswap_duplicate_entry;
				74
				75	/*********************************
				76	* tunables
				77	**********************************/
				78	/* Enable/disable zswap (disabled by default, fixed at boot for now) */
				79	static bool zswap_enabled __read_mostly;
				80	module_param_named(enabled, zswap_enabled, bool, 0);
				81
				82	/* Compressor to be used by zswap (fixed at boot for now) */
				83	#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
				84	static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
				85	module_param_named(compressor, zswap_compressor, charp, 0);
				86
				87	/* The maximum percentage of memory that the compressed pool can occupy */
				88	static unsigned int zswap_max_pool_percent = 20;
				89	module_param_named(max_pool_percent,
				90	zswap_max_pool_percent, uint, 0644);
				91
				92	/*********************************
				93	* compression functions
				94	**********************************/
				95	/* per-cpu compression transforms */
				96	static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
				97
				98	enum comp_op {
				99	ZSWAP_COMPOP_COMPRESS,
				100	ZSWAP_COMPOP_DECOMPRESS
				101	};
				102
				103	static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
				104	u8 dst, unsigned int dlen)
				105	{
				106	struct crypto_comp *tfm;
				107	int ret;
				108
				109	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
				110	switch (op) {
				111	case ZSWAP_COMPOP_COMPRESS:
				112	ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
				113	break;
				114	case ZSWAP_COMPOP_DECOMPRESS:
				115	ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
				116	break;
				117	default:
				118	ret = -EINVAL;
				119	}
				120
				121	put_cpu();
				122	return ret;
				123	}
				124
				125	static int __init zswap_comp_init(void)
				126	{
				127	if (!crypto_has_comp(zswap_compressor, 0, 0)) {
				128	pr_info("%s compressor not available\n", zswap_compressor);
				129	/* fall back to default compressor */
				130	zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
				131	if (!crypto_has_comp(zswap_compressor, 0, 0))
				132	/* can't even load the default compressor */
				133	return -ENODEV;
				134	}
				135	pr_info("using %s compressor\n", zswap_compressor);
				136
				137	/* alloc percpu transforms */
				138	zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
				139	if (!zswap_comp_pcpu_tfms)
				140	return -ENOMEM;
				141	return 0;
				142	}
				143
				144	static void zswap_comp_exit(void)
				145	{
				146	/* free percpu transforms */
				147	if (zswap_comp_pcpu_tfms)
				148	free_percpu(zswap_comp_pcpu_tfms);
				149	}
				150
				151	/*********************************
				152	* data structures
				153	**********************************/
				154	/*
				155	* struct zswap_entry
				156	*
				157	* This structure contains the metadata for tracking a single compressed
				158	* page within zswap.
				159	*
				160	* rbnode - links the entry into red-black tree for the appropriate swap type
				161	* refcount - the number of outstanding reference to the entry. This is needed
				162	* to protect against premature freeing of the entry by code
				163	* concurent calls to load, invalidate, and writeback. The lock
				164	* for the zswap_tree structure that contains the entry must
				165	* be held while changing the refcount. Since the lock must
				166	* be held, there is no reason to also make refcount atomic.
				167	* offset - the swap offset for the entry. Index into the red-black tree.
				168	* handle - zsmalloc allocation handle that stores the compressed page data
				169	* length - the length in bytes of the compressed page data. Needed during
				170	* decompression
				171	*/
				172	struct zswap_entry {
				173	struct rb_node rbnode;
				174	pgoff_t offset;
				175	int refcount;
				176	unsigned int length;
				177	unsigned long handle;
				178	};
				179
				180	struct zswap_header {
				181	swp_entry_t swpentry;
				182	};
				183
				184	/*
				185	* The tree lock in the zswap_tree struct protects a few things:
				186	* - the rbtree
				187	* - the refcount field of each entry in the tree
				188	*/
				189	struct zswap_tree {
				190	struct rb_root rbroot;
				191	spinlock_t lock;
				192	struct zbud_pool *pool;
				193	};
				194
				195	static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
				196
				197	/*********************************
				198	* zswap entry functions
				199	**********************************/
				200	static struct kmem_cache *zswap_entry_cache;
				201
				202	static int zswap_entry_cache_create(void)
				203	{
				204	zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
				205	return (zswap_entry_cache == NULL);
				206	}
				207
				208	static void zswap_entry_cache_destory(void)
				209	{
				210	kmem_cache_destroy(zswap_entry_cache);
				211	}
				212
				213	static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
				214	{
				215	struct zswap_entry *entry;
				216	entry = kmem_cache_alloc(zswap_entry_cache, gfp);
				217	if (!entry)
				218	return NULL;
				219	entry->refcount = 1;
				220	return entry;
				221	}
				222
				223	static void zswap_entry_cache_free(struct zswap_entry *entry)
				224	{
				225	kmem_cache_free(zswap_entry_cache, entry);
				226	}
				227
				228	/* caller must hold the tree lock */
				229	static void zswap_entry_get(struct zswap_entry *entry)
				230	{
				231	entry->refcount++;
				232	}
				233
				234	/* caller must hold the tree lock */
				235	static int zswap_entry_put(struct zswap_entry *entry)
				236	{
				237	entry->refcount--;
				238	return entry->refcount;
				239	}
				240
				241	/*********************************
				242	* rbtree functions
				243	**********************************/
				244	static struct zswap_entry zswap_rb_search(struct rb_root root, pgoff_t offset)
				245	{
				246	struct rb_node *node = root->rb_node;
				247	struct zswap_entry *entry;
				248
				249	while (node) {
				250	entry = rb_entry(node, struct zswap_entry, rbnode);
				251	if (entry->offset > offset)
				252	node = node->rb_left;
				253	else if (entry->offset < offset)
				254	node = node->rb_right;
				255	else
				256	return entry;
				257	}
				258	return NULL;
				259	}
				260
				261	/*
				262	* In the case that a entry with the same offset is found, a pointer to
				263	* the existing entry is stored in dupentry and the function returns -EEXIST
				264	*/
				265	static int zswap_rb_insert(struct rb_root root, struct zswap_entry entry,
				266	struct zswap_entry **dupentry)
				267	{
				268	struct rb_node *link = &root->rb_node, parent = NULL;
				269	struct zswap_entry *myentry;
				270
				271	while (*link) {
				272	parent = *link;
				273	myentry = rb_entry(parent, struct zswap_entry, rbnode);
				274	if (myentry->offset > entry->offset)
				275	link = &(*link)->rb_left;
				276	else if (myentry->offset < entry->offset)
				277	link = &(*link)->rb_right;
				278	else {
				279	*dupentry = myentry;
				280	return -EEXIST;
				281	}
				282	}
				283	rb_link_node(&entry->rbnode, parent, link);
				284	rb_insert_color(&entry->rbnode, root);
				285	return 0;
				286	}
				287
				288	/*********************************
				289	* per-cpu code
				290	**********************************/
				291	static DEFINE_PER_CPU(u8 *, zswap_dstmem);
				292
				293	static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
				294	{
				295	struct crypto_comp *tfm;
				296	u8 *dst;
				297
				298	switch (action) {
				299	case CPU_UP_PREPARE:
				300	tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
				301	if (IS_ERR(tfm)) {
				302	pr_err("can't allocate compressor transform\n");
				303	return NOTIFY_BAD;
				304	}
				305	*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
				306	dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
				307	if (!dst) {
				308	pr_err("can't allocate compressor buffer\n");
				309	crypto_free_comp(tfm);
				310	*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
				311	return NOTIFY_BAD;
				312	}
				313	per_cpu(zswap_dstmem, cpu) = dst;
				314	break;
				315	case CPU_DEAD:
				316	case CPU_UP_CANCELED:
				317	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
				318	if (tfm) {
				319	crypto_free_comp(tfm);
				320	*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
				321	}
				322	dst = per_cpu(zswap_dstmem, cpu);
				323	kfree(dst);
				324	per_cpu(zswap_dstmem, cpu) = NULL;
				325	break;
				326	default:
				327	break;
				328	}
				329	return NOTIFY_OK;
				330	}
				331
				332	static int zswap_cpu_notifier(struct notifier_block *nb,
				333	unsigned long action, void *pcpu)
				334	{
				335	unsigned long cpu = (unsigned long)pcpu;
				336	return __zswap_cpu_notifier(action, cpu);
				337	}
				338
				339	static struct notifier_block zswap_cpu_notifier_block = {
				340	.notifier_call = zswap_cpu_notifier
				341	};
				342
				343	static int zswap_cpu_init(void)
				344	{
				345	unsigned long cpu;
				346
				347	get_online_cpus();
				348	for_each_online_cpu(cpu)
				349	if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
				350	goto cleanup;
				351	register_cpu_notifier(&zswap_cpu_notifier_block);
				352	put_online_cpus();
				353	return 0;
				354
				355	cleanup:
				356	for_each_online_cpu(cpu)
				357	__zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
				358	put_online_cpus();
				359	return -ENOMEM;
				360	}
				361
				362	/*********************************
				363	* helpers
				364	**********************************/
				365	static bool zswap_is_full(void)
				366	{
				367	return (totalram_pages * zswap_max_pool_percent / 100 <
				368	zswap_pool_pages);
				369	}
				370
				371	/*
				372	* Carries out the common pattern of freeing and entry's zsmalloc allocation,
				373	* freeing the entry itself, and decrementing the number of stored pages.
				374	*/
				375	static void zswap_free_entry(struct zswap_tree tree, struct zswap_entry entry)
				376	{
				377	zbud_free(tree->pool, entry->handle);
				378	zswap_entry_cache_free(entry);
				379	atomic_dec(&zswap_stored_pages);
				380	zswap_pool_pages = zbud_get_pool_size(tree->pool);
				381	}
				382
				383	/*********************************
				384	* writeback code
				385	**********************************/
				386	/* return enum for zswap_get_swap_cache_page */
				387	enum zswap_get_swap_ret {
				388	ZSWAP_SWAPCACHE_NEW,
				389	ZSWAP_SWAPCACHE_EXIST,
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame^]	390	ZSWAP_SWAPCACHE_FAIL,
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	391	};
				392
				393	/*
				394	* zswap_get_swap_cache_page
				395	*
				396	* This is an adaption of read_swap_cache_async()
				397	*
				398	* This function tries to find a page with the given swap entry
				399	* in the swapper_space address space (the swap cache). If the page
				400	* is found, it is returned in retpage. Otherwise, a page is allocated,
				401	* added to the swap cache, and returned in retpage.
				402	*
				403	* If success, the swap cache page is returned in retpage
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame^]	404	* Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
				405	* Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
				406	* the new page is added to swapcache and locked
				407	* Returns ZSWAP_SWAPCACHE_FAIL on error
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	408	*/
				409	static int zswap_get_swap_cache_page(swp_entry_t entry,
				410	struct page **retpage)
				411	{
				412	struct page found_page, new_page = NULL;
Sunghan Suh	822518d	2013-09-11 14:20:22 -0700	[diff] [blame]	413	struct address_space *swapper_space = swap_address_space(entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	414	int err;
				415
				416	*retpage = NULL;
				417	do {
				418	/*
				419	* First check the swap cache. Since this is normally
				420	* called after lookup_swap_cache() failed, re-calling
				421	* that would confuse statistics.
				422	*/
				423	found_page = find_get_page(swapper_space, entry.val);
				424	if (found_page)
				425	break;
				426
				427	/*
				428	* Get a new page to read into from swap.
				429	*/
				430	if (!new_page) {
				431	new_page = alloc_page(GFP_KERNEL);
				432	if (!new_page)
				433	break; /* Out of memory */
				434	}
				435
				436	/*
				437	* call radix_tree_preload() while we can wait.
				438	*/
				439	err = radix_tree_preload(GFP_KERNEL);
				440	if (err)
				441	break;
				442
				443	/*
				444	* Swap entry may have been freed since our caller observed it.
				445	*/
				446	err = swapcache_prepare(entry);
				447	if (err == -EEXIST) { /* seems racy */
				448	radix_tree_preload_end();
				449	continue;
				450	}
				451	if (err) { /* swp entry is obsolete ? */
				452	radix_tree_preload_end();
				453	break;
				454	}
				455
				456	/* May fail (-ENOMEM) if radix-tree node allocation failed. */
				457	__set_page_locked(new_page);
				458	SetPageSwapBacked(new_page);
				459	err = __add_to_swap_cache(new_page, entry);
				460	if (likely(!err)) {
				461	radix_tree_preload_end();
				462	lru_cache_add_anon(new_page);
				463	*retpage = new_page;
				464	return ZSWAP_SWAPCACHE_NEW;
				465	}
				466	radix_tree_preload_end();
				467	ClearPageSwapBacked(new_page);
				468	__clear_page_locked(new_page);
				469	/*
				470	* add_to_swap_cache() doesn't return -EEXIST, so we can safely
				471	* clear SWAP_HAS_CACHE flag.
				472	*/
				473	swapcache_free(entry, NULL);
				474	} while (err != -ENOMEM);
				475
				476	if (new_page)
				477	page_cache_release(new_page);
				478	if (!found_page)
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame^]	479	return ZSWAP_SWAPCACHE_FAIL;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	480	*retpage = found_page;
				481	return ZSWAP_SWAPCACHE_EXIST;
				482	}
				483
				484	/*
				485	* Attempts to free an entry by adding a page to the swap cache,
				486	* decompressing the entry data into the page, and issuing a
				487	* bio write to write the page back to the swap device.
				488	*
				489	* This can be thought of as a "resumed writeback" of the page
				490	* to the swap device. We are basically resuming the same swap
				491	* writeback path that was intercepted with the frontswap_store()
				492	* in the first place. After the page has been decompressed into
				493	* the swap cache, the compressed version stored by zswap can be
				494	* freed.
				495	*/
				496	static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
				497	{
				498	struct zswap_header *zhdr;
				499	swp_entry_t swpentry;
				500	struct zswap_tree *tree;
				501	pgoff_t offset;
				502	struct zswap_entry *entry;
				503	struct page *page;
				504	u8 src, dst;
				505	unsigned int dlen;
				506	int ret, refcount;
				507	struct writeback_control wbc = {
				508	.sync_mode = WB_SYNC_NONE,
				509	};
				510
				511	/* extract swpentry from data */
				512	zhdr = zbud_map(pool, handle);
				513	swpentry = zhdr->swpentry; /* here */
				514	zbud_unmap(pool, handle);
				515	tree = zswap_trees[swp_type(swpentry)];
				516	offset = swp_offset(swpentry);
				517	BUG_ON(pool != tree->pool);
				518
				519	/* find and ref zswap entry */
				520	spin_lock(&tree->lock);
				521	entry = zswap_rb_search(&tree->rbroot, offset);
				522	if (!entry) {
				523	/* entry was invalidated */
				524	spin_unlock(&tree->lock);
				525	return 0;
				526	}
				527	zswap_entry_get(entry);
				528	spin_unlock(&tree->lock);
				529	BUG_ON(offset != entry->offset);
				530
				531	/* try to allocate swap cache page */
				532	switch (zswap_get_swap_cache_page(swpentry, &page)) {
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame^]	533	case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	534	ret = -ENOMEM;
				535	goto fail;
				536
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame^]	537	case ZSWAP_SWAPCACHE_EXIST:
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	538	/* page is already in the swap cache, ignore for now */
				539	page_cache_release(page);
				540	ret = -EEXIST;
				541	goto fail;
				542
				543	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
				544	/* decompress */
				545	dlen = PAGE_SIZE;
				546	src = (u8 *)zbud_map(tree->pool, entry->handle) +
				547	sizeof(struct zswap_header);
				548	dst = kmap_atomic(page);
				549	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
				550	entry->length, dst, &dlen);
				551	kunmap_atomic(dst);
				552	zbud_unmap(tree->pool, entry->handle);
				553	BUG_ON(ret);
				554	BUG_ON(dlen != PAGE_SIZE);
				555
				556	/* page is up to date */
				557	SetPageUptodate(page);
				558	}
				559
Weijie Yang	b349acc	2013-11-12 15:07:52 -0800	[diff] [blame]	560	/* move it to the tail of the inactive list after end_writeback */
				561	SetPageReclaim(page);
				562
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	563	/* start writeback */
				564	__swap_writepage(page, &wbc, end_swap_bio_write);
				565	page_cache_release(page);
				566	zswap_written_back_pages++;
				567
				568	spin_lock(&tree->lock);
				569
				570	/* drop local reference */
				571	zswap_entry_put(entry);
				572	/* drop the initial reference from entry creation */
				573	refcount = zswap_entry_put(entry);
				574
				575	/*
				576	* There are three possible values for refcount here:
				577	* (1) refcount is 1, load is in progress, unlink from rbtree,
				578	* load will free
				579	* (2) refcount is 0, (normal case) entry is valid,
				580	* remove from rbtree and free entry
				581	* (3) refcount is -1, invalidate happened during writeback,
				582	* free entry
				583	*/
				584	if (refcount >= 0) {
				585	/* no invalidate yet, remove from rbtree */
				586	rb_erase(&entry->rbnode, &tree->rbroot);
				587	}
				588	spin_unlock(&tree->lock);
				589	if (refcount <= 0) {
				590	/* free the entry */
				591	zswap_free_entry(tree, entry);
				592	return 0;
				593	}
				594	return -EAGAIN;
				595
				596	fail:
				597	spin_lock(&tree->lock);
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame^]	598	refcount = zswap_entry_put(entry);
				599	if (refcount <= 0) {
				600	/* invalidate happened, consider writeback as success */
				601	zswap_free_entry(tree, entry);
				602	ret = 0;
				603	}
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	604	spin_unlock(&tree->lock);
				605	return ret;
				606	}
				607
				608	/*********************************
				609	* frontswap hooks
				610	**********************************/
				611	/* attempts to compress and store an single page */
				612	static int zswap_frontswap_store(unsigned type, pgoff_t offset,
				613	struct page *page)
				614	{
				615	struct zswap_tree *tree = zswap_trees[type];
				616	struct zswap_entry entry, dupentry;
				617	int ret;
				618	unsigned int dlen = PAGE_SIZE, len;
				619	unsigned long handle;
				620	char *buf;
				621	u8 src, dst;
				622	struct zswap_header *zhdr;
				623
				624	if (!tree) {
				625	ret = -ENODEV;
				626	goto reject;
				627	}
				628
				629	/* reclaim space if needed */
				630	if (zswap_is_full()) {
				631	zswap_pool_limit_hit++;
				632	if (zbud_reclaim_page(tree->pool, 8)) {
				633	zswap_reject_reclaim_fail++;
				634	ret = -ENOMEM;
				635	goto reject;
				636	}
				637	}
				638
				639	/* allocate entry */
				640	entry = zswap_entry_cache_alloc(GFP_KERNEL);
				641	if (!entry) {
				642	zswap_reject_kmemcache_fail++;
				643	ret = -ENOMEM;
				644	goto reject;
				645	}
				646
				647	/* compress */
				648	dst = get_cpu_var(zswap_dstmem);
				649	src = kmap_atomic(page);
				650	ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
				651	kunmap_atomic(src);
				652	if (ret) {
				653	ret = -EINVAL;
				654	goto freepage;
				655	}
				656
				657	/* store */
				658	len = dlen + sizeof(struct zswap_header);
				659	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY \| __GFP_NOWARN,
				660	&handle);
				661	if (ret == -ENOSPC) {
				662	zswap_reject_compress_poor++;
				663	goto freepage;
				664	}
				665	if (ret) {
				666	zswap_reject_alloc_fail++;
				667	goto freepage;
				668	}
				669	zhdr = zbud_map(tree->pool, handle);
				670	zhdr->swpentry = swp_entry(type, offset);
				671	buf = (u8 *)(zhdr + 1);
				672	memcpy(buf, dst, dlen);
				673	zbud_unmap(tree->pool, handle);
				674	put_cpu_var(zswap_dstmem);
				675
				676	/* populate entry */
				677	entry->offset = offset;
				678	entry->handle = handle;
				679	entry->length = dlen;
				680
				681	/* map */
				682	spin_lock(&tree->lock);
				683	do {
				684	ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
				685	if (ret == -EEXIST) {
				686	zswap_duplicate_entry++;
				687	/* remove from rbtree */
				688	rb_erase(&dupentry->rbnode, &tree->rbroot);
				689	if (!zswap_entry_put(dupentry)) {
				690	/* free */
				691	zswap_free_entry(tree, dupentry);
				692	}
				693	}
				694	} while (ret == -EEXIST);
				695	spin_unlock(&tree->lock);
				696
				697	/* update stats */
				698	atomic_inc(&zswap_stored_pages);
				699	zswap_pool_pages = zbud_get_pool_size(tree->pool);
				700
				701	return 0;
				702
				703	freepage:
				704	put_cpu_var(zswap_dstmem);
				705	zswap_entry_cache_free(entry);
				706	reject:
				707	return ret;
				708	}
				709
				710	/*
				711	* returns 0 if the page was successfully decompressed
				712	* return -1 on entry not found or error
				713	*/
				714	static int zswap_frontswap_load(unsigned type, pgoff_t offset,
				715	struct page *page)
				716	{
				717	struct zswap_tree *tree = zswap_trees[type];
				718	struct zswap_entry *entry;
				719	u8 src, dst;
				720	unsigned int dlen;
				721	int refcount, ret;
				722
				723	/* find */
				724	spin_lock(&tree->lock);
				725	entry = zswap_rb_search(&tree->rbroot, offset);
				726	if (!entry) {
				727	/* entry was written back */
				728	spin_unlock(&tree->lock);
				729	return -1;
				730	}
				731	zswap_entry_get(entry);
				732	spin_unlock(&tree->lock);
				733
				734	/* decompress */
				735	dlen = PAGE_SIZE;
				736	src = (u8 *)zbud_map(tree->pool, entry->handle) +
				737	sizeof(struct zswap_header);
				738	dst = kmap_atomic(page);
				739	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
				740	dst, &dlen);
				741	kunmap_atomic(dst);
				742	zbud_unmap(tree->pool, entry->handle);
				743	BUG_ON(ret);
				744
				745	spin_lock(&tree->lock);
				746	refcount = zswap_entry_put(entry);
				747	if (likely(refcount)) {
				748	spin_unlock(&tree->lock);
				749	return 0;
				750	}
				751	spin_unlock(&tree->lock);
				752
				753	/*
				754	* We don't have to unlink from the rbtree because
				755	* zswap_writeback_entry() or zswap_frontswap_invalidate page()
				756	* has already done this for us if we are the last reference.
				757	*/
				758	/* free */
				759
				760	zswap_free_entry(tree, entry);
				761
				762	return 0;
				763	}
				764
				765	/* frees an entry in zswap */
				766	static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
				767	{
				768	struct zswap_tree *tree = zswap_trees[type];
				769	struct zswap_entry *entry;
				770	int refcount;
				771
				772	/* find */
				773	spin_lock(&tree->lock);
				774	entry = zswap_rb_search(&tree->rbroot, offset);
				775	if (!entry) {
				776	/* entry was written back */
				777	spin_unlock(&tree->lock);
				778	return;
				779	}
				780
				781	/* remove from rbtree */
				782	rb_erase(&entry->rbnode, &tree->rbroot);
				783
				784	/* drop the initial reference from entry creation */
				785	refcount = zswap_entry_put(entry);
				786
				787	spin_unlock(&tree->lock);
				788
				789	if (refcount) {
				790	/* writeback in progress, writeback will free */
				791	return;
				792	}
				793
				794	/* free */
				795	zswap_free_entry(tree, entry);
				796	}
				797
				798	/* frees all zswap entries for the given swap type */
				799	static void zswap_frontswap_invalidate_area(unsigned type)
				800	{
				801	struct zswap_tree *tree = zswap_trees[type];
Cody P Schafer	0bd4213	2013-09-11 14:25:33 -0700	[diff] [blame]	802	struct zswap_entry entry, n;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	803
				804	if (!tree)
				805	return;
				806
				807	/* walk the tree and free everything */
				808	spin_lock(&tree->lock);
Cody P Schafer	0bd4213	2013-09-11 14:25:33 -0700	[diff] [blame]	809	rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) {
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	810	zbud_free(tree->pool, entry->handle);
				811	zswap_entry_cache_free(entry);
				812	atomic_dec(&zswap_stored_pages);
				813	}
				814	tree->rbroot = RB_ROOT;
				815	spin_unlock(&tree->lock);
Weijie Yang	aa9bca0	2013-10-16 13:46:54 -0700	[diff] [blame]	816
				817	zbud_destroy_pool(tree->pool);
				818	kfree(tree);
				819	zswap_trees[type] = NULL;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	820	}
				821
				822	static struct zbud_ops zswap_zbud_ops = {
				823	.evict = zswap_writeback_entry
				824	};
				825
				826	static void zswap_frontswap_init(unsigned type)
				827	{
				828	struct zswap_tree *tree;
				829
				830	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
				831	if (!tree)
				832	goto err;
				833	tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
				834	if (!tree->pool)
				835	goto freetree;
				836	tree->rbroot = RB_ROOT;
				837	spin_lock_init(&tree->lock);
				838	zswap_trees[type] = tree;
				839	return;
				840
				841	freetree:
				842	kfree(tree);
				843	err:
				844	pr_err("alloc failed, zswap disabled for swap type %d\n", type);
				845	}
				846
				847	static struct frontswap_ops zswap_frontswap_ops = {
				848	.store = zswap_frontswap_store,
				849	.load = zswap_frontswap_load,
				850	.invalidate_page = zswap_frontswap_invalidate_page,
				851	.invalidate_area = zswap_frontswap_invalidate_area,
				852	.init = zswap_frontswap_init
				853	};
				854
				855	/*********************************
				856	* debugfs functions
				857	**********************************/
				858	#ifdef CONFIG_DEBUG_FS
				859	#include <linux/debugfs.h>
				860
				861	static struct dentry *zswap_debugfs_root;
				862
				863	static int __init zswap_debugfs_init(void)
				864	{
				865	if (!debugfs_initialized())
				866	return -ENODEV;
				867
				868	zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
				869	if (!zswap_debugfs_root)
				870	return -ENOMEM;
				871
				872	debugfs_create_u64("pool_limit_hit", S_IRUGO,
				873	zswap_debugfs_root, &zswap_pool_limit_hit);
				874	debugfs_create_u64("reject_reclaim_fail", S_IRUGO,
				875	zswap_debugfs_root, &zswap_reject_reclaim_fail);
				876	debugfs_create_u64("reject_alloc_fail", S_IRUGO,
				877	zswap_debugfs_root, &zswap_reject_alloc_fail);
				878	debugfs_create_u64("reject_kmemcache_fail", S_IRUGO,
				879	zswap_debugfs_root, &zswap_reject_kmemcache_fail);
				880	debugfs_create_u64("reject_compress_poor", S_IRUGO,
				881	zswap_debugfs_root, &zswap_reject_compress_poor);
				882	debugfs_create_u64("written_back_pages", S_IRUGO,
				883	zswap_debugfs_root, &zswap_written_back_pages);
				884	debugfs_create_u64("duplicate_entry", S_IRUGO,
				885	zswap_debugfs_root, &zswap_duplicate_entry);
				886	debugfs_create_u64("pool_pages", S_IRUGO,
				887	zswap_debugfs_root, &zswap_pool_pages);
				888	debugfs_create_atomic_t("stored_pages", S_IRUGO,
				889	zswap_debugfs_root, &zswap_stored_pages);
				890
				891	return 0;
				892	}
				893
				894	static void __exit zswap_debugfs_exit(void)
				895	{
				896	debugfs_remove_recursive(zswap_debugfs_root);
				897	}
				898	#else
				899	static int __init zswap_debugfs_init(void)
				900	{
				901	return 0;
				902	}
				903
				904	static void __exit zswap_debugfs_exit(void) { }
				905	#endif
				906
				907	/*********************************
				908	* module init and exit
				909	**********************************/
				910	static int __init init_zswap(void)
				911	{
				912	if (!zswap_enabled)
				913	return 0;
				914
				915	pr_info("loading zswap\n");
				916	if (zswap_entry_cache_create()) {
				917	pr_err("entry cache creation failed\n");
				918	goto error;
				919	}
				920	if (zswap_comp_init()) {
				921	pr_err("compressor initialization failed\n");
				922	goto compfail;
				923	}
				924	if (zswap_cpu_init()) {
				925	pr_err("per-cpu initialization failed\n");
				926	goto pcpufail;
				927	}
				928	frontswap_register_ops(&zswap_frontswap_ops);
				929	if (zswap_debugfs_init())
				930	pr_warn("debugfs initialization failed\n");
				931	return 0;
				932	pcpufail:
				933	zswap_comp_exit();
				934	compfail:
				935	zswap_entry_cache_destory();
				936	error:
				937	return -ENOMEM;
				938	}
				939	/* must be late so crypto has time to come up */
				940	late_initcall(init_zswap);
				941
				942	MODULE_LICENSE("GPL");
				943	MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
				944	MODULE_DESCRIPTION("Compressed cache for swap pages");