Commit 077b1f83 authored by Dan Magenheimer's avatar Dan Magenheimer
Browse files

mm: cleancache core ops functions and config

This third patch of eight in this cleancache series provides
the core code for cleancache that interfaces between the hooks in
VFS and individual filesystems and a cleancache backend.  It also
includes build and config patches.

Two new files are added: mm/cleancache.c and include/linux/cleancache.h.

Note that CONFIG_CLEANCACHE can default to on; in systems that do
not provide a cleancache backend, all hooks devolve to a simple
check of a global enable flag, so performance impact should
be negligible but can be reduced to zero impact if config'ed off.
However for this first commit, it defaults to off.

Details and a FAQ can be found in Documentation/vm/cleancache.txt

Credits: Cleancache_ops design derived from Jeremy Fitzhardinge
design for tmem

[v8: fix exportfs call affecting btrfs]
[v8: use static inline function, not macro]
[v7: cleanup sysfs and remove cleancache prefix]
[v6: robustly handle buggy fs encode_fh actor definition]
[v5: clean up global usage and static var names]
[v5: simplify init hook and any future fs init changes]
[v5: cleaner non-global interface for ops registration]
[v4: interface must support exportfs FS's]
[v4: interface must support 64-bit FS on 32-bit kernel]
[v3: use one ops struct to avoid pointer hops]
[v3: document and ensure PageLocked reqts are met]
[v3: fix success/fail codes, change funcs to void]
[v2: use sane types]
Signed-off-by: default avatarDan Magenheimer <>
Reviewed-by: default avatarJeremy Fitzhardinge <>
Reviewed-by: default avatarKonrad Rzeszutek Wilk <>
Acked-by: default avatarAl Viro <>
Acked-by: default avatarAndrew Morton <>
Acked-by: default avatarNitin Gupta <>
Acked-by: default avatarMinchan Kim <>
Acked-by: default avatarAndreas Dilger <>
Acked-by: default avatarJan Beulich <>
Cc: Matthew Wilcox <>
Cc: Nick Piggin <>
Cc: Mel Gorman <>
Cc: Rik Van Riel <>
Cc: Chris Mason <>
Cc: Ted Ts'o <>
Cc: Mark Fasheh <>
Cc: Joel Becker <>
parent 9fdfdcf1
#include <linux/fs.h>
#include <linux/exportfs.h>
#include <linux/mm.h>
* cleancache requires every file with a page in cleancache to have a
* unique key unless/until the file is removed/truncated. For some
* filesystems, the inode number is unique, but for "modern" filesystems
* an exportable filehandle is required (see exportfs.h)
struct cleancache_filekey {
union {
ino_t ino;
} u;
struct cleancache_ops {
int (*init_fs)(size_t);
int (*init_shared_fs)(char *uuid, size_t);
int (*get_page)(int, struct cleancache_filekey,
pgoff_t, struct page *);
void (*put_page)(int, struct cleancache_filekey,
pgoff_t, struct page *);
void (*flush_page)(int, struct cleancache_filekey, pgoff_t);
void (*flush_inode)(int, struct cleancache_filekey);
void (*flush_fs)(int);
extern struct cleancache_ops
cleancache_register_ops(struct cleancache_ops *ops);
extern void __cleancache_init_fs(struct super_block *);
extern void __cleancache_init_shared_fs(char *, struct super_block *);
extern int __cleancache_get_page(struct page *);
extern void __cleancache_put_page(struct page *);
extern void __cleancache_flush_page(struct address_space *, struct page *);
extern void __cleancache_flush_inode(struct address_space *);
extern void __cleancache_flush_fs(struct super_block *);
extern int cleancache_enabled;
static inline bool cleancache_fs_enabled(struct page *page)
return page->mapping->host->i_sb->cleancache_poolid >= 0;
static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
return mapping->host->i_sb->cleancache_poolid >= 0;
#define cleancache_enabled (0)
#define cleancache_fs_enabled(_page) (0)
#define cleancache_fs_enabled_mapping(_page) (0)
* The shim layer provided by these inline functions allows the compiler
* to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE
* is disabled, to a single global variable check if CONFIG_CLEANCACHE
* is enabled but no cleancache "backend" has dynamically enabled it,
* and, for the most frequent cleancache ops, to a single global variable
* check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled
* and a cleancache backend has dynamically enabled cleancache, but the
* filesystem referenced by that cleancache op has not enabled cleancache.
* As a result, CONFIG_CLEANCACHE can be enabled by default with essentially
* no measurable performance impact.
static inline void cleancache_init_fs(struct super_block *sb)
if (cleancache_enabled)
static inline void cleancache_init_shared_fs(char *uuid, struct super_block *sb)
if (cleancache_enabled)
__cleancache_init_shared_fs(uuid, sb);
static inline int cleancache_get_page(struct page *page)
int ret = -1;
if (cleancache_enabled && cleancache_fs_enabled(page))
ret = __cleancache_get_page(page);
return ret;
static inline void cleancache_put_page(struct page *page)
if (cleancache_enabled && cleancache_fs_enabled(page))
static inline void cleancache_flush_page(struct address_space *mapping,
struct page *page)
/* careful... page->mapping is NULL sometimes when this is called */
if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
__cleancache_flush_page(mapping, page);
static inline void cleancache_flush_inode(struct address_space *mapping)
if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping))
static inline void cleancache_flush_fs(struct super_block *sb)
if (cleancache_enabled)
#endif /* _LINUX_CLEANCACHE_H */
......@@ -347,3 +347,26 @@ config NEED_PER_CPU_KM
depends on !SMP
default y
bool "Enable cleancache driver to cache clean pages if tmem is present"
default n
Cleancache can be thought of as a page-granularity victim cache
for clean pages that the kernel's pageframe replacement algorithm
(PFRA) would like to keep around, but can't since there isn't enough
memory. So when the PFRA "evicts" a page, it first attempts to use
cleancacne code to put the data contained in that page into
"transcendent memory", memory that is not directly accessible or
addressable by the kernel and is of unknown and possibly
time-varying size. And when a cleancache-enabled
filesystem wishes to access a page in a file on disk, it first
checks cleancache to see if it already contains it; if it does,
the page is copied into the kernel and a disk access is avoided.
When a transcendent memory driver is available (such as zcache or
Xen transcendent memory), a significant I/O reduction
may be achieved. When none is available, all cleancache calls
are reduced to a single pointer-compare-against-NULL resulting
in a negligible performance hit.
If unsure, say Y to enable cleancache
......@@ -49,3 +49,4 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
* Cleancache frontend
* This code provides the generic "frontend" layer to call a matching
* "backend" driver implementation of cleancache. See
* Documentation/vm/cleancache.txt for more information.
* Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
* Author: Dan Magenheimer
* This work is licensed under the terms of the GNU GPL, version 2.
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/exportfs.h>
#include <linux/mm.h>
#include <linux/cleancache.h>
* This global enablement flag may be read thousands of times per second
* by cleancache_get/put/flush even on systems where cleancache_ops
* is not claimed (e.g. cleancache is config'ed on but remains
* disabled), so is preferred to the slower alternative: a function
* call that checks a non-global.
int cleancache_enabled;
* cleancache_ops is set by cleancache_ops_register to contain the pointers
* to the cleancache "backend" implementation functions.
static struct cleancache_ops cleancache_ops;
/* useful stats available in /sys/kernel/mm/cleancache */
static unsigned long cleancache_succ_gets;
static unsigned long cleancache_failed_gets;
static unsigned long cleancache_puts;
static unsigned long cleancache_flushes;
* register operations for cleancache, returning previous thus allowing
* detection of multiple backends and possible nesting
struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
struct cleancache_ops old = cleancache_ops;
cleancache_ops = *ops;
cleancache_enabled = 1;
return old;
/* Called by a cleancache-enabled filesystem at time of mount */
void __cleancache_init_fs(struct super_block *sb)
sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
/* Called by a cleancache-enabled clustered filesystem at time of mount */
void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
sb->cleancache_poolid =
(*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
* If the filesystem uses exportable filehandles, use the filehandle as
* the key, else use the inode number.
static int cleancache_get_key(struct inode *inode,
struct cleancache_filekey *key)
int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
int len = 0, maxlen = CLEANCACHE_KEY_MAX;
struct super_block *sb = inode->i_sb;
key->u.ino = inode->i_ino;
if (sb->s_export_op != NULL) {
fhfn = sb->s_export_op->encode_fh;
if (fhfn) {
struct dentry d;
d.d_inode = inode;
len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
if (len <= 0 || len == 255)
return -1;
if (maxlen > CLEANCACHE_KEY_MAX)
return -1;
return 0;
* "Get" data from cleancache associated with the poolid/inode/index
* that were specified when the data was put to cleanache and, if
* successful, use it to fill the specified page with data and return 0.
* The pageframe is unchanged and returns -1 if the get fails.
* Page must be locked by caller.
int __cleancache_get_page(struct page *page)
int ret = -1;
int pool_id;
struct cleancache_filekey key = { .u.key = { 0 } };
pool_id = page->mapping->host->i_sb->cleancache_poolid;
if (pool_id < 0)
goto out;
if (cleancache_get_key(page->mapping->host, &key) < 0)
goto out;
ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
if (ret == 0)
return ret;
* "Put" data from a page to cleancache and associate it with the
* (previously-obtained per-filesystem) poolid and the page's,
* inode and page index. Page must be locked. Note that a put_page
* always "succeeds", though a subsequent get_page may succeed or fail.
void __cleancache_put_page(struct page *page)
int pool_id;
struct cleancache_filekey key = { .u.key = { 0 } };
pool_id = page->mapping->host->i_sb->cleancache_poolid;
if (pool_id >= 0 &&
cleancache_get_key(page->mapping->host, &key) >= 0) {
(*cleancache_ops.put_page)(pool_id, key, page->index, page);
* Flush any data from cleancache associated with the poolid and the
* page's inode and page index so that a subsequent "get" will fail.
void __cleancache_flush_page(struct address_space *mapping, struct page *page)
/* careful... page->mapping is NULL sometimes when this is called */
int pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };
if (pool_id >= 0) {
if (cleancache_get_key(mapping->host, &key) >= 0) {
(*cleancache_ops.flush_page)(pool_id, key, page->index);
* Flush all data from cleancache associated with the poolid and the
* mappings's inode so that all subsequent gets to this poolid/inode
* will fail.
void __cleancache_flush_inode(struct address_space *mapping)
int pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };
if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
(*cleancache_ops.flush_inode)(pool_id, key);
* Called by any cleancache-enabled filesystem at time of unmount;
* note that pool_id is surrendered and may be reutrned by a subsequent
* cleancache_init_fs or cleancache_init_shared_fs
void __cleancache_flush_fs(struct super_block *sb)
if (sb->cleancache_poolid >= 0) {
int old_poolid = sb->cleancache_poolid;
sb->cleancache_poolid = -1;
/* see Documentation/ABI/xxx/sysfs-kernel-mm-cleancache */
#define CLEANCACHE_SYSFS_RO(_name) \
static ssize_t cleancache_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf) \
{ \
return sprintf(buf, "%lu\n", cleancache_##_name); \
} \
static struct kobj_attribute cleancache_##_name##_attr = { \
.attr = { .name = __stringify(_name), .mode = 0444 }, \
.show = cleancache_##_name##_show, \
static struct attribute *cleancache_attrs[] = {
static struct attribute_group cleancache_attr_group = {
.attrs = cleancache_attrs,
.name = "cleancache",
#endif /* CONFIG_SYSFS */
static int __init init_cleancache(void)
int err;
err = sysfs_create_group(mm_kobj, &cleancache_attr_group);
#endif /* CONFIG_SYSFS */
return 0;
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment