Commit 9f5974c8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
parents a2d823bf ddae9c2e
......@@ -40,11 +40,10 @@
#include "xfs_rw.h"
#include "xfs_iomap.h"
#include <linux/mpage.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
struct writeback_control *wbc, void *, int, int);
#if defined(XFS_RW_TRACE)
void
......@@ -55,17 +54,15 @@ xfs_page_trace(
int mask)
{
xfs_inode_t *ip;
bhv_desc_t *bdp;
vnode_t *vp = LINVFS_GET_VP(inode);
loff_t isize = i_size_read(inode);
loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
loff_t offset = page_offset(page);
int delalloc = -1, unmapped = -1, unwritten = -1;
if (page_has_buffers(page))
xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
ip = XFS_BHVTOI(bdp);
ip = xfs_vtoi(vp);
if (!ip->i_rwtrace)
return;
......@@ -103,15 +100,56 @@ xfs_finish_ioend(
queue_work(xfsdatad_workqueue, &ioend->io_work);
}
/*
* We're now finished for good with this ioend structure.
* Update the page state via the associated buffer_heads,
* release holds on the inode and bio, and finally free
* up memory. Do not use the ioend after this.
*/
STATIC void
xfs_destroy_ioend(
xfs_ioend_t *ioend)
{
struct buffer_head *bh, *next;
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io(bh, ioend->io_uptodate);
}
vn_iowake(ioend->io_vnode);
mempool_free(ioend, xfs_ioend_pool);
}
/*
* Buffered IO write completion for delayed allocate extents.
* TODO: Update ondisk isize now that we know the file data
* has been flushed (i.e. the notorious "NULL file" problem).
*/
STATIC void
xfs_end_bio_delalloc(
void *data)
{
xfs_ioend_t *ioend = data;
xfs_destroy_ioend(ioend);
}
/*
* Buffered IO write completion for regular, written extents.
*/
STATIC void
xfs_end_bio_written(
void *data)
{
xfs_ioend_t *ioend = data;
xfs_destroy_ioend(ioend);
}
/*
* IO write completion for unwritten extents.
*
* Issue transactions to convert a buffer range from unwritten
* to written extents.
*/
......@@ -123,21 +161,10 @@ xfs_end_bio_unwritten(
vnode_t *vp = ioend->io_vnode;
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
struct buffer_head *bh, *next;
int error;
if (ioend->io_uptodate)
VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
/* ioend->io_buffer_head is only non-NULL for buffered I/O */
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io = NULL;
clear_buffer_unwritten(bh);
end_buffer_async_write(bh, ioend->io_uptodate);
}
xfs_destroy_ioend(ioend);
}
......@@ -149,7 +176,8 @@ xfs_end_bio_unwritten(
*/
STATIC xfs_ioend_t *
xfs_alloc_ioend(
struct inode *inode)
struct inode *inode,
unsigned int type)
{
xfs_ioend_t *ioend;
......@@ -162,45 +190,25 @@ xfs_alloc_ioend(
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_uptodate = 1; /* cleared if any I/O fails */
ioend->io_list = NULL;
ioend->io_type = type;
ioend->io_vnode = LINVFS_GET_VP(inode);
ioend->io_buffer_head = NULL;
ioend->io_buffer_tail = NULL;
atomic_inc(&ioend->io_vnode->v_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
if (type == IOMAP_UNWRITTEN)
INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
else if (type == IOMAP_DELAY)
INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc, ioend);
else
INIT_WORK(&ioend->io_work, xfs_end_bio_written, ioend);
return ioend;
}
void
linvfs_unwritten_done(
struct buffer_head *bh,
int uptodate)
{
xfs_ioend_t *ioend = bh->b_private;
static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
unsigned long flags;
ASSERT(buffer_unwritten(bh));
bh->b_end_io = NULL;
if (!uptodate)
ioend->io_uptodate = 0;
/*
* Deep magic here. We reuse b_private in the buffer_heads to build
* a chain for completing the I/O from user context after we've issued
* a transaction to convert the unwritten extent.
*/
spin_lock_irqsave(&unwritten_done_lock, flags);
bh->b_private = ioend->io_buffer_head;
ioend->io_buffer_head = bh;
spin_unlock_irqrestore(&unwritten_done_lock, flags);
xfs_finish_ioend(ioend);
}
STATIC int
xfs_map_blocks(
struct inode *inode,
......@@ -218,138 +226,260 @@ xfs_map_blocks(
return -error;
}
STATIC inline int
xfs_iomap_valid(
xfs_iomap_t *iomapp,
loff_t offset)
{
return offset >= iomapp->iomap_offset &&
offset < iomapp->iomap_offset + iomapp->iomap_bsize;
}
/*
* Finds the corresponding mapping in block @map array of the
* given @offset within a @page.
* BIO completion handler for buffered IO.
*/
STATIC xfs_iomap_t *
xfs_offset_to_map(
STATIC int
xfs_end_bio(
struct bio *bio,
unsigned int bytes_done,
int error)
{
xfs_ioend_t *ioend = bio->bi_private;
if (bio->bi_size)
return 1;
ASSERT(ioend);
ASSERT(atomic_read(&bio->bi_cnt) >= 1);
/* Toss bio and pass work off to an xfsdatad thread */
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
ioend->io_uptodate = 0;
bio->bi_private = NULL;
bio->bi_end_io = NULL;
bio_put(bio);
xfs_finish_ioend(ioend);
return 0;
}
STATIC void
xfs_submit_ioend_bio(
xfs_ioend_t *ioend,
struct bio *bio)
{
atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio;
submit_bio(WRITE, bio);
ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
bio_put(bio);
}
STATIC struct bio *
xfs_alloc_ioend_bio(
struct buffer_head *bh)
{
struct bio *bio;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
do {
bio = bio_alloc(GFP_NOIO, nvecs);
nvecs >>= 1;
} while (!bio);
ASSERT(bio->bi_private == NULL);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio_get(bio);
return bio;
}
STATIC void
xfs_start_buffer_writeback(
struct buffer_head *bh)
{
ASSERT(buffer_mapped(bh));
ASSERT(buffer_locked(bh));
ASSERT(!buffer_delay(bh));
ASSERT(!buffer_unwritten(bh));
mark_buffer_async_write(bh);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
}
STATIC void
xfs_start_page_writeback(
struct page *page,
xfs_iomap_t *iomapp,
unsigned long offset)
struct writeback_control *wbc,
int clear_dirty,
int buffers)
{
ASSERT(PageLocked(page));
ASSERT(!PageWriteback(page));
set_page_writeback(page);
if (clear_dirty)
clear_page_dirty(page);
unlock_page(page);
if (!buffers) {
end_page_writeback(page);
wbc->pages_skipped++; /* We didn't write this page */
}
}
static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
{
return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
}
/*
* Submit all of the bios for all of the ioends we have saved up,
* covering the initial writepage page and also any probed pages.
*/
STATIC void
xfs_submit_ioend(
xfs_ioend_t *ioend)
{
xfs_ioend_t *next;
struct buffer_head *bh;
struct bio *bio;
sector_t lastblock = 0;
do {
next = ioend->io_list;
bio = NULL;
for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
xfs_start_buffer_writeback(bh);
if (!bio) {
retry:
bio = xfs_alloc_ioend_bio(bh);
} else if (bh->b_blocknr != lastblock + 1) {
xfs_submit_ioend_bio(ioend, bio);
goto retry;
}
if (bio_add_buffer(bio, bh) != bh->b_size) {
xfs_submit_ioend_bio(ioend, bio);
goto retry;
}
lastblock = bh->b_blocknr;
}
if (bio)
xfs_submit_ioend_bio(ioend, bio);
xfs_finish_ioend(ioend);
} while ((ioend = next) != NULL);
}
/*
* Cancel submission of all buffer_heads so far in this endio.
* Toss the endio too. Only ever called for the initial page
* in a writepage request, so only ever one page.
*/
STATIC void
xfs_cancel_ioend(
xfs_ioend_t *ioend)
{
xfs_ioend_t *next;
struct buffer_head *bh, *next_bh;
do {
next = ioend->io_list;
bh = ioend->io_buffer_head;
do {
next_bh = bh->b_private;
clear_buffer_async_write(bh);
unlock_buffer(bh);
} while ((bh = next_bh) != NULL);
vn_iowake(ioend->io_vnode);
mempool_free(ioend, xfs_ioend_pool);
} while ((ioend = next) != NULL);
}
/*
* Test to see if we've been building up a completion structure for
* earlier buffers -- if so, we try to append to this ioend if we
* can, otherwise we finish off any current ioend and start another.
* Return true if we've finished the given ioend.
*/
STATIC void
xfs_add_to_ioend(
struct inode *inode,
struct buffer_head *bh,
xfs_off_t offset,
unsigned int type,
xfs_ioend_t **result,
int need_ioend)
{
loff_t full_offset; /* offset from start of file */
xfs_ioend_t *ioend = *result;
ASSERT(offset < PAGE_CACHE_SIZE);
if (!ioend || need_ioend || type != ioend->io_type) {
xfs_ioend_t *previous = *result;
full_offset = page->index; /* NB: using 64bit number */
full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */
full_offset += offset; /* offset from page start */
ioend = xfs_alloc_ioend(inode, type);
ioend->io_offset = offset;
ioend->io_buffer_head = bh;
ioend->io_buffer_tail = bh;
if (previous)
previous->io_list = ioend;
*result = ioend;
} else {
ioend->io_buffer_tail->b_private = bh;
ioend->io_buffer_tail = bh;
}
if (full_offset < iomapp->iomap_offset)
return NULL;
if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
return iomapp;
return NULL;
bh->b_private = NULL;
ioend->io_size += bh->b_size;
}
STATIC void
xfs_map_at_offset(
struct page *page,
struct buffer_head *bh,
unsigned long offset,
loff_t offset,
int block_bits,
xfs_iomap_t *iomapp)
{
xfs_daddr_t bn;
loff_t delta;
int sector_shift;
ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
delta = page->index;
delta <<= PAGE_CACHE_SHIFT;
delta += offset;
delta -= iomapp->iomap_offset;
delta >>= block_bits;
sector_shift = block_bits - BBSHIFT;
bn = iomapp->iomap_bn >> sector_shift;
bn += delta;
BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
bn = (iomapp->iomap_bn >> sector_shift) +
((offset - iomapp->iomap_offset) >> block_bits);
ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
lock_buffer(bh);
bh->b_blocknr = bn;
bh->b_bdev = iomapp->iomap_target->pbr_bdev;
bh->b_bdev = iomapp->iomap_target->bt_bdev;
set_buffer_mapped(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
}
/*
* Look for a page at index which is unlocked and contains our
* unwritten extent flagged buffers at its head. Returns page
* locked and with an extra reference count, and length of the
* unwritten extent component on this page that we can write,
* in units of filesystem blocks.
*/
STATIC struct page *
xfs_probe_unwritten_page(
struct address_space *mapping,
pgoff_t index,
xfs_iomap_t *iomapp,
xfs_ioend_t *ioend,
unsigned long max_offset,
unsigned long *fsbs,
unsigned int bbits)
{
struct page *page;
page = find_trylock_page(mapping, index);
if (!page)
return NULL;
if (PageWriteback(page))
goto out;
if (page->mapping && page_has_buffers(page)) {
struct buffer_head *bh, *head;
unsigned long p_offset = 0;
*fsbs = 0;
bh = head = page_buffers(page);
do {
if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
break;
if (!xfs_offset_to_map(page, iomapp, p_offset))
break;
if (p_offset >= max_offset)
break;
xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
set_buffer_unwritten_io(bh);
bh->b_private = ioend;
p_offset += bh->b_size;
(*fsbs)++;
} while ((bh = bh->b_this_page) != head);
if (p_offset)
return page;
}
out:
unlock_page(page);
return NULL;
}
/*
* Look for a page at index which is unlocked and not mapped
* yet - clustering for mmap write case.
* Look for a page at index that is suitable for clustering.
*/
STATIC unsigned int
xfs_probe_unmapped_page(
struct address_space *mapping,
pgoff_t index,
unsigned int pg_offset)
xfs_probe_page(
struct page *page,
unsigned int pg_offset,
int mapped)
{
struct page *page;
int ret = 0;
page = find_trylock_page(mapping, index);
if (!page)
return 0;
if (PageWriteback(page))
goto out;
return 0;
if (page->mapping && PageDirty(page)) {
if (page_has_buffers(page)) {
......@@ -357,79 +487,101 @@ xfs_probe_unmapped_page(
bh = head = page_buffers(page);
do {
if (buffer_mapped(bh) || !buffer_uptodate(bh))
if (!buffer_uptodate(bh))
break;
if (mapped != buffer_mapped(bh))
break;
ret += bh->b_size;
if (ret >= pg_offset)
break;
} while ((bh = bh->b_this_page) != head);
} else
ret = PAGE_CACHE_SIZE;
ret = mapped ? 0 : PAGE_CACHE_SIZE;
}
out:
unlock_page(page);
return ret;
}
STATIC unsigned int
xfs_probe_unmapped_cluster(
STATIC size_t
xfs_probe_cluster(
struct inode *inode,
struct page *startpage,
struct buffer_head *bh,
struct buffer_head *head)
struct buffer_head *head,
int mapped)
{
struct pagevec pvec;
pgoff_t tindex, tlast, tloff;
unsigned int pg_offset, len, total = 0;
struct address_space *mapping = inode->i_mapping;
size_t total = 0;
int done = 0, i;
/* First sum forwards in this page */
do {
if (buffer_mapped(bh))
break;
if (mapped != buffer_mapped(bh))
return total;
total += bh->b_size;
} while ((bh = bh->b_this_page) != head);
/* If we reached the end of the page, sum forwards in
* following pages.
*/
if (bh == head) {
tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
/* Prune this back to avoid pathological behavior */
tloff = min(tlast, startpage->index + 64);
for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
len = xfs_probe_unmapped_page(mapping, tindex,
PAGE_CACHE_SIZE);
if (!len)
return total;
/* if we reached the end of the page, sum forwards in following pages */
tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
tindex = startpage->index + 1;
/* Prune this back to avoid pathological behavior */
tloff = min(tlast, startpage->index + 64);
pagevec_init(&pvec, 0);
while (!done && tindex <= tloff) {
unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
break;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
size_t pg_offset, len = 0;
if (tindex == tlast) {
pg_offset =
i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
if (!pg_offset) {
done = 1;
break;
}