Commit 523634db authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'libnvdimm-fixes-5.3-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
 "A collection of locking and async operations fixes for v5.3-rc2. These
  had been soaking in a branch targeting the merge window, but missed
  due to a regression hunt. This fixed up version has otherwise been in
  -next this past week with no reported issues.

  In order to gain confidence in the locking changes the pull also
  includes a debug / instrumentation patch to enable lockdep coverage
  for libnvdimm subsystem operations that depend on the device_lock for
  exclusion. As mentioned in the changelog it is a hack, but it works
  and documents the locking expectations of the sub-system in a way that
  others can use lockdep to verify. The driver core touches got an ack
  from Greg.

  Summary:

   - Fix duplicate device_unregister() calls (multiple threads competing
     to do unregister work when scheduling device removal from a sysfs
     attribute of the self-same device).

   - Fix badblocks registration order bug. Ensure region badblocks are
     initialized in advance of namespace registration.

   - Fix a deadlock between the bus lock and probe operations.

   - Export device-core infrastructure to coordinate async operations
     via the device ->dead state.

   - Add device-core infrastructure to validate device_lock() usage with
     lockdep"

* tag 'libnvdimm-fixes-5.3-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  driver-core, libnvdimm: Let device subsystems add local lockdep coverage
  libnvdimm/bus: Fix wait_nvdimm_bus_probe_idle() ABBA deadlock
  libnvdimm/bus: Stop holding nvdimm_bus_list_mutex over __nd_ioctl()
  libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant
  libnvdimm/region: Register badblocks before namespaces
  libnvdimm/bus: Prevent duplicate device_unregister() calls
  drivers/base: Introduce kill_device()
parents 5168afe6 87a30e1f
......@@ -1282,7 +1282,7 @@ static ssize_t hw_error_scrub_store(struct device *dev,
if (rc)
return rc;
device_lock(dev);
nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
......@@ -1299,7 +1299,7 @@ static ssize_t hw_error_scrub_store(struct device *dev,
break;
}
}
device_unlock(dev);
nfit_device_unlock(dev);
if (rc)
return rc;
return size;
......@@ -1319,7 +1319,7 @@ static ssize_t scrub_show(struct device *dev,
ssize_t rc = -ENXIO;
bool busy;
device_lock(dev);
nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (!nd_desc) {
device_unlock(dev);
......@@ -1339,7 +1339,7 @@ static ssize_t scrub_show(struct device *dev,
}
mutex_unlock(&acpi_desc->init_mutex);
device_unlock(dev);
nfit_device_unlock(dev);
return rc;
}
......@@ -1356,14 +1356,14 @@ static ssize_t scrub_store(struct device *dev,
if (val != 1)
return -EINVAL;
device_lock(dev);
nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
}
device_unlock(dev);
nfit_device_unlock(dev);
if (rc)
return rc;
return size;
......@@ -1749,9 +1749,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
struct acpi_device *adev = data;
struct device *dev = &adev->dev;
device_lock(dev->parent);
nfit_device_lock(dev->parent);
__acpi_nvdimm_notify(dev, event);
device_unlock(dev->parent);
nfit_device_unlock(dev->parent);
}
static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
......@@ -3457,8 +3457,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
struct device *dev = acpi_desc->dev;
/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
device_lock(dev);
device_unlock(dev);
nfit_device_lock(dev);
nfit_device_unlock(dev);
/* Bounce the init_mutex to complete initial registration */
mutex_lock(&acpi_desc->init_mutex);
......@@ -3602,8 +3602,8 @@ void acpi_nfit_shutdown(void *data)
* acpi_nfit_ars_rescan() submissions have had a chance to
* either submit or see ->cancel set.
*/
device_lock(bus_dev);
device_unlock(bus_dev);
nfit_device_lock(bus_dev);
nfit_device_unlock(bus_dev);
flush_workqueue(nfit_wq);
}
......@@ -3746,9 +3746,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
{
device_lock(&adev->dev);
nfit_device_lock(&adev->dev);
__acpi_nfit_notify(&adev->dev, adev->handle, event);
device_unlock(&adev->dev);
nfit_device_unlock(&adev->dev);
}
static const struct acpi_device_id acpi_nfit_ids[] = {
......
......@@ -312,6 +312,30 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
}
#ifdef CONFIG_PROVE_LOCKING
static inline void nfit_device_lock(struct device *dev)
{
device_lock(dev);
mutex_lock(&dev->lockdep_mutex);
}
static inline void nfit_device_unlock(struct device *dev)
{
mutex_unlock(&dev->lockdep_mutex);
device_unlock(dev);
}
#else
static inline void nfit_device_lock(struct device *dev)
{
device_lock(dev);
}
static inline void nfit_device_unlock(struct device *dev)
{
device_unlock(dev);
}
#endif
const guid_t *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void acpi_nfit_shutdown(void *data);
......
......@@ -1663,6 +1663,9 @@ void device_initialize(struct device *dev)
kobject_init(&dev->kobj, &device_ktype);
INIT_LIST_HEAD(&dev->dma_pools);
mutex_init(&dev->mutex);
#ifdef CONFIG_PROVE_LOCKING
mutex_init(&dev->lockdep_mutex);
#endif
lockdep_set_novalidate_class(&dev->mutex);
spin_lock_init(&dev->devres_lock);
INIT_LIST_HEAD(&dev->devres_head);
......@@ -2211,6 +2214,24 @@ void put_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(put_device);
bool kill_device(struct device *dev)
{
/*
* Require the device lock and set the "dead" flag to guarantee that
* the update behavior is consistent with the other bitfields near
* it and that we cannot have an asynchronous probe routine trying
* to run while we are tearing out the bus/class/sysfs from
* underneath the device.
*/
lockdep_assert_held(&dev->mutex);
if (dev->p->dead)
return false;
dev->p->dead = true;
return true;
}
EXPORT_SYMBOL_GPL(kill_device);
/**
* device_del - delete device from system.
* @dev: device.
......@@ -2230,15 +2251,8 @@ void device_del(struct device *dev)
struct kobject *glue_dir = NULL;
struct class_interface *class_intf;
/*
* Hold the device lock and set the "dead" flag to guarantee that
* the update behavior is consistent with the other bitfields near
* it and that we cannot have an asynchronous probe routine trying
* to run while we are tearing out the bus/class/sysfs from
* underneath the device.
*/
device_lock(dev);
dev->p->dead = true;
kill_device(dev);
device_unlock(dev);
/* Notify clients of device removal. This call must come
......
......@@ -62,14 +62,14 @@ static ssize_t sector_size_store(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
device_lock(dev);
nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
device_unlock(dev);
nd_device_unlock(dev);
return rc ? rc : len;
}
......@@ -91,11 +91,11 @@ static ssize_t uuid_store(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
device_lock(dev);
nd_device_lock(dev);
rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
device_unlock(dev);
nd_device_unlock(dev);
return rc ? rc : len;
}
......@@ -120,13 +120,13 @@ static ssize_t namespace_store(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
device_lock(dev);
nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
device_unlock(dev);
nd_device_unlock(dev);
return rc;
}
......@@ -138,14 +138,14 @@ static ssize_t size_show(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
device_lock(dev);
nd_device_lock(dev);
if (dev->driver)
rc = sprintf(buf, "%llu\n", nd_btt->size);
else {
/* no size to convey if the btt instance is disabled */
rc = -ENXIO;
}
device_unlock(dev);
nd_device_unlock(dev);
return rc;
}
......
......@@ -26,7 +26,7 @@
int nvdimm_major;
static int nvdimm_bus_major;
static struct class *nd_class;
struct class *nd_class;
static DEFINE_IDA(nd_ida);
static int to_nd_device_type(struct device *dev)
......@@ -73,7 +73,7 @@ static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
{
nvdimm_bus_lock(&nvdimm_bus->dev);
if (--nvdimm_bus->probe_active == 0)
wake_up(&nvdimm_bus->probe_wait);
wake_up(&nvdimm_bus->wait);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
......@@ -91,7 +91,10 @@ static int nvdimm_bus_probe(struct device *dev)
dev->driver->name, dev_name(dev));
nvdimm_bus_probe_start(nvdimm_bus);
debug_nvdimm_lock(dev);
rc = nd_drv->probe(dev);
debug_nvdimm_unlock(dev);
if (rc == 0)
nd_region_probe_success(nvdimm_bus, dev);
else
......@@ -113,8 +116,11 @@ static int nvdimm_bus_remove(struct device *dev)
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
int rc = 0;
if (nd_drv->remove)
if (nd_drv->remove) {
debug_nvdimm_lock(dev);
rc = nd_drv->remove(dev);
debug_nvdimm_unlock(dev);
}
nd_region_disable(nvdimm_bus, dev);
dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
......@@ -140,7 +146,7 @@ static void nvdimm_bus_shutdown(struct device *dev)
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
device_lock(dev);
nd_device_lock(dev);
if (dev->driver) {
struct nd_device_driver *nd_drv;
......@@ -148,7 +154,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event)
if (nd_drv->notify)
nd_drv->notify(dev, event);
}
device_unlock(dev);
nd_device_unlock(dev);
}
EXPORT_SYMBOL(nd_device_notify);
......@@ -296,7 +302,7 @@ static void nvdimm_bus_release(struct device *dev)
kfree(nvdimm_bus);
}
static bool is_nvdimm_bus(struct device *dev)
bool is_nvdimm_bus(struct device *dev)
{
return dev->release == nvdimm_bus_release;
}
......@@ -341,7 +347,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
init_waitqueue_head(&nvdimm_bus->probe_wait);
init_waitqueue_head(&nvdimm_bus->wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
......@@ -426,6 +432,9 @@ static int nd_bus_remove(struct device *dev)
list_del_init(&nvdimm_bus->list);
mutex_unlock(&nvdimm_bus_list_mutex);
wait_event(nvdimm_bus->wait,
atomic_read(&nvdimm_bus->ioctl_active) == 0);
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
......@@ -547,13 +556,38 @@ EXPORT_SYMBOL(nd_device_register);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
{
bool killed;
switch (mode) {
case ND_ASYNC:
/*
* In the async case this is being triggered with the
* device lock held and the unregistration work needs to
* be moved out of line iff this is thread has won the
* race to schedule the deletion.
*/
if (!kill_device(dev))
return;
get_device(dev);
async_schedule_domain(nd_async_device_unregister, dev,
&nd_async_domain);
break;
case ND_SYNC:
/*
* In the sync case the device is being unregistered due
* to a state change of the parent. Claim the kill state
* to synchronize against other unregistration requests,
* or otherwise let the async path handle it if the
* unregistration was already queued.
*/
nd_device_lock(dev);
killed = kill_device(dev);
nd_device_unlock(dev);
if (!killed)
return;
nd_synchronize();
device_unregister(dev);
break;
......@@ -859,10 +893,12 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
do {
if (nvdimm_bus->probe_active == 0)
break;
nvdimm_bus_unlock(&nvdimm_bus->dev);
wait_event(nvdimm_bus->probe_wait,
nvdimm_bus_unlock(dev);
nd_device_unlock(dev);
wait_event(nvdimm_bus->wait,
nvdimm_bus->probe_active == 0);
nvdimm_bus_lock(&nvdimm_bus->dev);
nd_device_lock(dev);
nvdimm_bus_lock(dev);
} while (true);
}
......@@ -945,20 +981,19 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
int read_only, unsigned int ioctl_cmd, unsigned long arg)
{
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
static char out_env[ND_CMD_MAX_ENVELOPE];
static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
struct device *dev = &nvdimm_bus->dev;
void __user *p = (void __user *) arg;
char *out_env = NULL, *in_env = NULL;
const char *cmd_name, *dimm_name;
u32 in_len = 0, out_len = 0;
unsigned int func = cmd;
unsigned long cmd_mask;
struct nd_cmd_pkg pkg;
int rc, i, cmd_rc;
void *buf = NULL;
u64 buf_len = 0;
void *buf;
if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd);
......@@ -989,7 +1024,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
case ND_CMD_ARS_START:
case ND_CMD_CLEAR_ERROR:
case ND_CMD_CALL:
dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
dev_dbg(dev, "'%s' command while read-only.\n",
nvdimm ? nvdimm_cmd_name(cmd)
: nvdimm_bus_cmd_name(cmd));
return -EPERM;
......@@ -998,6 +1033,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
/* process an input envelope */
in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
if (!in_env)
return -ENOMEM;
for (i = 0; i < desc->in_num; i++) {
u32 in_size, copy;
......@@ -1005,14 +1043,17 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (in_size == UINT_MAX) {
dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
__func__, dimm_name, cmd_name, i);
return -ENXIO;
rc = -ENXIO;
goto out;
}
if (in_len < sizeof(in_env))
copy = min_t(u32, sizeof(in_env) - in_len, in_size);
if (in_len < ND_CMD_MAX_ENVELOPE)
copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
else
copy = 0;
if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
return -EFAULT;
if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
rc = -EFAULT;
goto out;
}
in_len += in_size;
}
......@@ -1024,6 +1065,12 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
/* process an output envelope */
out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
if (!out_env) {
rc = -ENOMEM;
goto out;
}
for (i = 0; i < desc->out_num; i++) {
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
(u32 *) in_env, (u32 *) out_env, 0);
......@@ -1032,15 +1079,18 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (out_size == UINT_MAX) {
dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
dimm_name, cmd_name, i);
return -EFAULT;
rc = -EFAULT;
goto out;
}
if (out_len < sizeof(out_env))
copy = min_t(u32, sizeof(out_env) - out_len, out_size);
if (out_len < ND_CMD_MAX_ENVELOPE)
copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
else
copy = 0;
if (copy && copy_from_user(&out_env[out_len],
p + in_len + out_len, copy))
return -EFAULT;
p + in_len + out_len, copy)) {
rc = -EFAULT;
goto out;
}
out_len += out_size;
}
......@@ -1048,19 +1098,23 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (buf_len > ND_IOCTL_MAX_BUFLEN) {
dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
return -EINVAL;
rc = -EINVAL;
goto out;
}
buf = vmalloc(buf_len);
if (!buf)
return -ENOMEM;
if (!buf) {
rc = -ENOMEM;
goto out;
}
if (copy_from_user(buf, p, buf_len)) {
rc = -EFAULT;
goto out;
}
nvdimm_bus_lock(&nvdimm_bus->dev);
nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
goto out_unlock;
......@@ -1075,39 +1129,24 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
}
nvdimm_bus_unlock(&nvdimm_bus->dev);
if (copy_to_user(p, buf, buf_len))
rc = -EFAULT;
vfree(buf);
return rc;
out_unlock:
nvdimm_bus_unlock(&nvdimm_bus->dev);
out:
out_unlock:
nvdimm_bus_unlock(dev);
nd_device_unlock(dev);
out:
kfree(in_env);
kfree(out_env);
vfree(buf);
return rc;
}
static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long id = (long) file->private_data;
int rc = -ENXIO, ro;
struct nvdimm_bus *nvdimm_bus;
ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
mutex_lock(&nvdimm_bus_list_mutex);
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
if (nvdimm_bus->id == id) {
rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg);
break;
}
}
mutex_unlock(&nvdimm_bus_list_mutex);
return rc;
}
enum nd_ioctl_mode {
BUS_IOCTL,
DIMM_IOCTL,
};
static int match_dimm(struct device *dev, void *data)
{
......@@ -1122,31 +1161,62 @@ static int match_dimm(struct device *dev, void *data)
return 0;
}
static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
enum nd_ioctl_mode mode)
{
int rc = -ENXIO, ro;
struct nvdimm_bus *nvdimm_bus;
struct nvdimm_bus *nvdimm_bus, *found = NULL;
long id = (long) file->private_data;
struct nvdimm *nvdimm = NULL;
int rc, ro;
ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
mutex_lock(&nvdimm_bus_list_mutex);
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
struct device *dev = device_find_child(&nvdimm_bus->dev,
file->private_data, match_dimm);
struct nvdimm *nvdimm;
if (!dev)
continue;
if (mode == DIMM_IOCTL) {
struct device *dev;
dev = device_find_child(&nvdimm_bus->dev,
file->private_data, match_dimm);
if (!dev)
continue;
nvdimm = to_nvdimm(dev);
found = nvdimm_bus;
} else if (nvdimm_bus->id == id) {
found = nvdimm_bus;
}
nvdimm = to_nvdimm(dev);
rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
put_device(dev);
break;
if (found) {
atomic_inc(&nvdimm_bus->ioctl_active);
break;
}
}
mutex_unlock(&nvdimm_bus_list_mutex);
if (!found)
return -ENXIO;
nvdimm_bus = found;
rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
if (nvdimm)
put_device(&nvdimm->dev);
if (atomic_dec_and_test(&nvdimm_bus->ioctl_active))
wake_up(&nvdimm_bus->wait);
return rc;
}
static long bus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{