Commit f41def39 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-5.4-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlights are:

   - automatic recovery of a blacklisted filesystem session (Zheng Yan).
     This is disabled by default and can be enabled by mounting with the
     new "recover_session=clean" option.

   - serialize buffered reads and O_DIRECT writes (Jeff Layton). Care is
     taken to avoid serializing O_DIRECT reads and writes with each
     other, this is based on the exclusion scheme from NFS.

   - handle large osdmaps better in the face of fragmented memory
     (myself)

   - don't limit what security.* xattrs can be get or set (Jeff Layton).
     We were overly restrictive here, unnecessarily preventing things
     like file capability sets stored in security.capability from
     working.

   - allow copy_file_range() within the same inode and across different
     filesystems within the same cluster (Luis Henriques)"

* tag 'ceph-for-5.4-rc1' of git://github.com/ceph/ceph-client: (41 commits)
  ceph: call ceph_mdsc_destroy from destroy_fs_client
  libceph: use ceph_kvmalloc() for osdmap arrays
  libceph: avoid a __vmalloc() deadlock in ceph_kvmalloc()
  ceph: allow object copies across different filesystems in the same cluster
  ceph: include ceph_debug.h in cache.c
  ceph: move static keyword to the front of declarations
  rbd: pull rbd_img_request_create() dout out into the callers
  ceph: reconnect connection if session hang in opening state
  libceph: drop unused con parameter of calc_target()
  ceph: use release_pages() directly
  rbd: fix response length parameter for encoded strings
  ceph: allow arbitrary security.* xattrs
  ceph: only set CEPH_I_SEC_INITED if we got a MAC label
  ceph: turn ceph_security_invalidate_secctx into static inline
  ceph: add buffered/direct exclusionary locking for reads and writes
  libceph: handle OSD op ceph_pagelist_append() errors
  ceph: don't return a value from void function
  ceph: don't freeze during write page faults
  ceph: update the mtime when truncating up
  ceph: fix indentation in __get_snap_name()
  ...
parents 7b1373dd 3ee5a701
......@@ -158,6 +158,20 @@ Mount Options
copies. Currently, it's only used in copy_file_range, which will revert
to the default VFS implementation if this option is used.
recover_session=<no|clean>
Set auto reconnect mode in the case where the client is blacklisted. The
available modes are "no" and "clean". The default is "no".
* no: never attempt to reconnect when client detects that it has been
blacklisted. Operations will generally fail after being blacklisted.
* clean: client reconnects to the ceph cluster automatically when it
detects that it has been blacklisted. During reconnect, client drops
dirty data/metadata, invalidates page caches and writable file handles.
After reconnect, file locks become stale because the MDS loses track
of them. If an inode contains any stale file locks, read/write on the
inode is not allowed until applications release all stale file locks.
More Information
================
......
......@@ -1754,8 +1754,6 @@ static struct rbd_img_request *rbd_img_request_create(
mutex_init(&img_request->state_mutex);
kref_init(&img_request->kref);
dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
obj_op_name(op_type), img_request);
return img_request;
}
......@@ -2944,6 +2942,9 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
__set_bit(IMG_REQ_CHILD, &child_img_req->flags);
child_img_req->obj_request = obj_req;
dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req,
obj_req);
if (!rbd_img_is_write(img_req)) {
switch (img_req->data_type) {
case OBJ_REQUEST_BIO:
......@@ -4877,6 +4878,9 @@ static void rbd_queue_workfn(struct work_struct *work)
img_request->rq = rq;
snapc = NULL; /* img_request consumes a ref */
dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
img_request, obj_op_name(op_type), offset, length);
if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT)
result = rbd_img_fill_nodata(img_request, offset, length);
else
......@@ -5669,17 +5673,20 @@ static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
{
size_t size;
void *reply_buf;
int ret;
void *p;
reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
/* Response will be an encoded string, which includes a length */
size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX;
reply_buf = kzalloc(size, GFP_KERNEL);
if (!reply_buf)
return -ENOMEM;
ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_object_prefix",
NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
NULL, 0, reply_buf, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
goto out;
......@@ -6696,7 +6703,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
dout("rbd id object name is %s\n", oid.name);
/* Response will be an encoded string, which includes a length */
size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
response = kzalloc(size, GFP_NOIO);
if (!response) {
......@@ -6708,7 +6714,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
"get_id", NULL, 0,
response, RBD_IMAGE_ID_LEN_MAX);
response, size);
dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret == -ENOENT) {
image_id = kstrdup("", GFP_KERNEL);
......
......@@ -6,7 +6,7 @@
obj-$(CONFIG_CEPH_FS) += ceph.o
ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
export.o caps.o snap.o xattr.o quota.o \
export.o caps.o snap.o xattr.o quota.o io.o \
mds_client.o mdsmap.o strings.o ceph_frag.o \
debugfs.o
......
......@@ -189,8 +189,7 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
{
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int err = 0;
u64 off = page_offset(page);
u64 len = PAGE_SIZE;
......@@ -219,8 +218,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
off, &len,
err = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, off, &len,
ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0);
if (err == -ENOENT)
......@@ -228,6 +227,8 @@ static int ceph_do_readpage(struct file *filp, struct page *page)
if (err < 0) {
SetPageError(page);
ceph_fscache_readpage_cancel(inode, page);
if (err == -EBLACKLISTED)
fsc->blacklisted = true;
goto out;
}
if (err < PAGE_SIZE)
......@@ -266,6 +267,8 @@ static void finish_read(struct ceph_osd_request *req)
int i;
dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
if (rc == -EBLACKLISTED)
ceph_inode_to_client(inode)->blacklisted = true;
/* unlock all pages, zeroing any data we didn't read */
osd_data = osd_req_op_extent_osd_data(req, 0);
......@@ -323,7 +326,8 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
/* caller of readpages does not hold buffer and read caps
* (fadvise, madvise and readahead cases) */
int want = CEPH_CAP_FILE_CACHE;
ret = ceph_try_get_caps(ci, CEPH_CAP_FILE_RD, want, true, &got);
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
true, &got);
if (ret < 0) {
dout("start_read %p, error getting cap\n", inode);
} else if (!(got & want)) {
......@@ -569,7 +573,7 @@ static u64 get_writepages_data_length(struct inode *inode,
/*
* Write a single page, but leave the page locked.
*
* If we get a write error, set the page error bit, but still adjust the
* If we get a write error, mark the mapping for error, but still adjust the
* dirty page accounting (i.e., page is no longer dirty).
*/
static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
......@@ -640,9 +644,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
end_page_writeback(page);
return err;
}
if (err == -EBLACKLISTED)
fsc->blacklisted = true;
dout("writepage setting page/mapping error %d %p\n",
err, page);
SetPageError(page);
mapping_set_error(&inode->i_data, err);
wbc->pages_skipped++;
} else {
......@@ -679,23 +684,6 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
return err;
}
/*
* lame release_pages helper. release_pages() isn't exported to
* modules.
*/
static void ceph_release_pages(struct page **pages, int num)
{
struct pagevec pvec;
int i;
pagevec_init(&pvec);
for (i = 0; i < num; i++) {
if (pagevec_add(&pvec, pages[i]) == 0)
pagevec_release(&pvec);
}
pagevec_release(&pvec);
}
/*
* async writeback completion handler.
*
......@@ -720,6 +708,8 @@ static void writepages_finish(struct ceph_osd_request *req)
if (rc < 0) {
mapping_set_error(mapping, rc);
ceph_set_error_write(ci);
if (rc == -EBLACKLISTED)
fsc->blacklisted = true;
} else {
ceph_clear_error_write(ci);
}
......@@ -769,7 +759,7 @@ static void writepages_finish(struct ceph_osd_request *req)
dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
inode, osd_data->length, rc >= 0 ? num_pages : 0);
ceph_release_pages(osd_data->pages, num_pages);
release_pages(osd_data->pages, num_pages);
}
ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
......@@ -1452,7 +1442,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
got = 0;
err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
&got, &pinned_page);
if (err < 0)
goto out_restore;
......@@ -1540,6 +1531,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
if (!prealloc_cf)
return VM_FAULT_OOM;
sb_start_pagefault(inode->i_sb);
ceph_block_sigs(&oldset);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
......@@ -1568,7 +1560,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
got = 0;
err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
&got, NULL);
if (err < 0)
goto out_free;
......@@ -1614,6 +1606,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
ceph_put_cap_refs(ci, got);
out_free:
ceph_restore_sigs(&oldset);
sb_end_pagefault(inode->i_sb);
ceph_free_cap_flush(prealloc_cf);
if (err < 0)
ret = vmf_error(err);
......@@ -1946,12 +1939,17 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
if (err >= 0 || err == -ENOENT)
have |= POOL_READ;
else if (err != -EPERM)
else if (err != -EPERM) {
if (err == -EBLACKLISTED)
fsc->blacklisted = true;
goto out_unlock;
}
if (err2 == 0 || err2 == -EEXIST)
have |= POOL_WRITE;
else if (err2 != -EPERM) {
if (err2 == -EBLACKLISTED)
fsc->blacklisted = true;
err = err2;
goto out_unlock;
}
......@@ -1989,10 +1987,11 @@ out:
return err;
}
int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
int ceph_pool_perm_check(struct inode *inode, int need)
{
s64 pool;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_string *pool_ns;
s64 pool;
int ret, flags;
if (ci->i_vino.snap != CEPH_NOSNAP) {
......@@ -2004,7 +2003,7 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
return 0;
}
if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
if (ceph_test_mount_opt(ceph_inode_to_client(inode),
NOPOOLPERM))
return 0;
......
......@@ -6,6 +6,8 @@
* Written by Milosz Tanski (milosz@adfin.com)
*/
#include <linux/ceph/ceph_debug.h>
#include "super.h"
#include "cache.h"
......
......@@ -457,37 +457,6 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
return cap;
}
/*
* Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
*/
static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
{
struct ceph_cap *cap;
int mds = -1;
struct rb_node *p;
/* prefer mds with WR|BUFFER|EXCL caps */
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
mds = cap->mds;
if (cap->issued & (CEPH_CAP_FILE_WR |
CEPH_CAP_FILE_BUFFER |
CEPH_CAP_FILE_EXCL))
break;
}
return mds;
}
int ceph_get_cap_mds(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds;
spin_lock(&ci->i_ceph_lock);
mds = __ceph_get_cap_mds(ceph_inode(inode));
spin_unlock(&ci->i_ceph_lock);
return mds;
}
/*
* Called under i_ceph_lock.
*/
......@@ -628,7 +597,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
/*
* Add a capability under the given MDS session.
*
* Caller should hold session snap_rwsem (read) and s_mutex.
* Caller should hold session snap_rwsem (read) and ci->i_ceph_lock
*
* @fmode is the open file mode, if we are opening a file, otherwise
* it is < 0. (This is so we can atomically add the cap and add an
......@@ -645,6 +614,9 @@ void ceph_add_cap(struct inode *inode,
struct ceph_cap *cap;
int mds = session->s_mds;
int actual_wanted;
u32 gen;
lockdep_assert_held(&ci->i_ceph_lock);
dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
session->s_mds, cap_id, ceph_cap_string(issued), seq);
......@@ -656,6 +628,10 @@ void ceph_add_cap(struct inode *inode,
if (fmode >= 0)
wanted |= ceph_caps_for_mode(fmode);
spin_lock(&session->s_gen_ttl_lock);
gen = session->s_cap_gen;
spin_unlock(&session->s_gen_ttl_lock);
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
cap = *new_cap;
......@@ -681,7 +657,7 @@ void ceph_add_cap(struct inode *inode,
list_move_tail(&cap->session_caps, &session->s_caps);
spin_unlock(&session->s_cap_lock);
if (cap->cap_gen < session->s_cap_gen)
if (cap->cap_gen < gen)
cap->issued = cap->implemented = CEPH_CAP_PIN;
/*
......@@ -775,7 +751,7 @@ void ceph_add_cap(struct inode *inode,
cap->seq = seq;
cap->issue_seq = seq;
cap->mseq = mseq;
cap->cap_gen = session->s_cap_gen;
cap->cap_gen = gen;
if (fmode >= 0)
__ceph_get_fmode(ci, fmode);
......@@ -1284,10 +1260,6 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
* Make note of max_size reported/requested from mds, revoked caps
* that have now been implemented.
*
* Make half-hearted attempt ot to invalidate page cache if we are
* dropping RDCACHE. Note that this will leave behind locked pages
* that we'll then need to deal with elsewhere.
*
* Return non-zero if delayed release, or we experienced an error
* such that the caller should requeue + retry later.
*
......@@ -1746,11 +1718,11 @@ static bool __finish_cap_flush(struct ceph_mds_client *mdsc,
* Add dirty inode to the flushing list. Assigned a seq number so we
* can wait for caps to flush without starving.
*
* Called under i_ceph_lock.
* Called under i_ceph_lock. Returns the flush tid.
*/
static int __mark_caps_flushing(struct inode *inode,
static u64 __mark_caps_flushing(struct inode *inode,
struct ceph_mds_session *session, bool wake,
u64 *flush_tid, u64 *oldest_flush_tid)
u64 *oldest_flush_tid)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
......@@ -1789,8 +1761,7 @@ static int __mark_caps_flushing(struct inode *inode,
list_add_tail(&cf->i_list, &ci->i_cap_flush_list);
*flush_tid = cf->tid;
return flushing;
return cf->tid;
}
/*
......@@ -2028,11 +1999,6 @@ retry_locked:
}
ack:
if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
dout(" skipping %p I_NOFLUSH set\n", inode);
continue;
}
if (session && session != cap->session) {
dout("oops, wrong session %p mutex\n", session);
mutex_unlock(&session->s_mutex);
......@@ -2080,9 +2046,9 @@ ack:
}
if (cap == ci->i_auth_cap && ci->i_dirty_caps) {
flushing = __mark_caps_flushing(inode, session, false,
&flush_tid,
&oldest_flush_tid);
flushing = ci->i_dirty_caps;
flush_tid = __mark_caps_flushing(inode, session, false,
&oldest_flush_tid);
} else {
flushing = 0;
flush_tid = 0;
......@@ -2130,16 +2096,11 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
retry:
spin_lock(&ci->i_ceph_lock);
retry_locked:
if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
spin_unlock(&ci->i_ceph_lock);
dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
goto out;
}
if (ci->i_dirty_caps && ci->i_auth_cap) {
struct ceph_cap *cap = ci->i_auth_cap;
int delayed;
if (!session || session != cap->session) {
if (session != cap->session) {
spin_unlock(&ci->i_ceph_lock);
if (session)
mutex_unlock(&session->s_mutex);
......@@ -2161,8 +2122,9 @@ retry_locked:
goto retry_locked;
}
flushing = __mark_caps_flushing(inode, session, true,
&flush_tid, &oldest_flush_tid);
flushing = ci->i_dirty_caps;
flush_tid = __mark_caps_flushing(inode, session, true,
&oldest_flush_tid);
/* __send_cap drops i_ceph_lock */
delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
......@@ -2261,35 +2223,45 @@ static int unsafe_request_wait(struct inode *inode)
int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file->f_mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
u64 flush_tid;
int ret;
int ret, err;
int dirty;
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
ret = file_write_and_wait_range(file, start, end);
if (ret < 0)
goto out;
if (datasync)
goto out;
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
ret = unsafe_request_wait(inode);
err = unsafe_request_wait(inode);
/*
* only wait on non-file metadata writeback (the mds
* can recover size and mtime, so we don't need to
* wait for that)
*/
if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
ret = wait_event_interruptible(ci->i_cap_wq,
if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
err = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
if (err < 0)
ret = err;
if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) {
spin_lock(&file->f_lock);
err = errseq_check_and_advance(&ci->i_meta_err,
&fi->meta_err);
spin_unlock(&file->f_lock);
if (err < 0)
ret = err;
}
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
......@@ -2560,10 +2532,15 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
*
* FIXME: how does a 0 return differ from -EAGAIN?
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, bool nonblock, int *got)
enum {
NON_BLOCKING = 1,
CHECK_FILELOCK = 2,
};
static int try_get_cap_refs(struct inode *inode, int need, int want,
loff_t endoff, int flags, int *got)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
int ret = 0;
int have, implemented;
......@@ -2576,6 +2553,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
again:
spin_lock(&ci->i_ceph_lock);
if ((flags & CHECK_FILELOCK) &&
(ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
dout("try_get_cap_refs %p error filelock\n", inode);
ret = -EIO;
goto out_unlock;
}
/* make sure file is actually open */
file_wanted = __ceph_caps_file_wanted(ci);
if ((file_wanted & need) != need) {
......@@ -2637,7 +2621,7 @@ again:
* we can not call down_read() when
* task isn't in TASK_RUNNING state
*/
if (nonblock) {
if (flags & NON_BLOCKING) {
ret = -EAGAIN;
goto out_unlock;
}
......@@ -2731,18 +2715,19 @@ static void check_max_size(struct inode *inode, loff_t endoff)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
}
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
int ceph_try_get_caps(struct inode *inode, int need, int want,
bool nonblock, int *got)
{
int ret;
BUG_ON(need & ~CEPH_CAP_FILE_RD);
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
ret = ceph_pool_perm_check(ci, need);
ret = ceph_pool_perm_check(inode, need);
if (ret < 0)
return ret;
ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
ret = try_get_cap_refs(inode, need, want, 0,
(nonblock ? NON_BLOCKING : 0), got);
return ret == -EAGAIN ? 0 : ret;
}
......@@ -2751,30 +2736,40 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
int ceph_get_caps(struct file *filp, int need, int want,
loff_t endoff, int *got, struct page **pinned_page)
{
int _got, ret;
struct ceph_file_info *fi = filp->private_data;
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int ret, _got, flags;
ret = ceph_pool_perm_check(ci, need);
ret = ceph_pool_perm_check(inode, need);
if (ret < 0)
return ret;
if ((fi->fmode & CEPH_FILE_MODE_WR) &&
fi->filp_gen != READ_ONCE(fsc->filp_gen))
return -EBADF;
while (true) {
if (endoff > 0)
check_max_size(&ci->vfs_inode, endoff);
check_max_size(inode, endoff);
flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
_got = 0;
ret = try_get_cap_refs(ci, need, want, endoff,
false, &_got);
ret = try_get_cap_refs(inode, need, want, endoff,
flags, &_got);
if (ret == -EAGAIN)
continue;
if (!ret) {
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&ci->i_cap_wq, &wait);
while (!(ret = try_get_cap_refs(ci, need, want, endoff,
true, &_got))) {
flags |= NON_BLOCKING;
while (!(ret = try_get_cap_refs(inode, need, want,