Commit 956a9202 authored by Ryan Wilson's avatar Ryan Wilson Committed by Konrad Rzeszutek Wilk
Browse files

xen-pcifront: Xen PCI frontend driver.

This is a port of the 2.6.18 Xen PCI front driver with fixes
to make it build under 2.6.34 and later (for the full list of
changes: git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git


historic/xen-pcifront-0.1). It also includes the fixes
to make it work properly.

[v2: Updated Kconfig, removed crud, added Reviewed-by]
[v3: Added 'static', fixed grant table leak, redid Kconfig]
[v4: Added one more 'static' and removed comments]
Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: default avatarStefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: default avatarJan Beulich <JBeulich@novell.com>
parent b78c9512
......@@ -40,6 +40,27 @@ config PCI_STUB
When in doubt, say N.
config XEN_PCIDEV_FRONTEND
tristate "Xen PCI Frontend"
depends on PCI && X86 && XEN
select HOTPLUG
select PCI_XEN
default y
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
config XEN_PCIDEV_FE_DEBUG
bool "Xen PCI Frontend debugging"
depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG
help
Say Y here if you want the Xen PCI frontend to produce a bunch of debug
messages to the system log. Select this if you are having a
problem with Xen PCI frontend support and want to see more of what is
going on.
When in doubt, say N.
config HT_IRQ
bool "Interrupts on hypertransport devices"
default y
......
......@@ -65,6 +65,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
obj-$(CONFIG_PCI_STUB) += pci-stub.o
obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
ifeq ($(CONFIG_PCI_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
/*
* Xen PCI Frontend.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#include <xen/page.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/msi.h>
#include <xen/xenbus.h>
#include <xen/interface/io/pciif.h>
#include <asm/xen/pci.h>
#include <linux/interrupt.h>
#include <asm/atomic.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/time.h>
#define INVALID_GRANT_REF (0)
#define INVALID_EVTCHN (-1)
struct pci_bus_entry {
struct list_head list;
struct pci_bus *bus;
};
#define _PDEVB_op_active (0)
#define PDEVB_op_active (1 << (_PDEVB_op_active))
struct pcifront_device {
struct xenbus_device *xdev;
struct list_head root_buses;
int evtchn;
int gnt_ref;
int irq;
/* Lock this when doing any operations in sh_info */
spinlock_t sh_info_lock;
struct xen_pci_sharedinfo *sh_info;
struct work_struct op_work;
unsigned long flags;
};
struct pcifront_sd {
int domain;
struct pcifront_device *pdev;
};
static inline struct pcifront_device *
pcifront_get_pdev(struct pcifront_sd *sd)
{
return sd->pdev;
}
static inline void pcifront_init_sd(struct pcifront_sd *sd,
unsigned int domain, unsigned int bus,
struct pcifront_device *pdev)
{
sd->domain = domain;
sd->pdev = pdev;
}
static DEFINE_SPINLOCK(pcifront_dev_lock);
static struct pcifront_device *pcifront_dev;
static int verbose_request;
module_param(verbose_request, int, 0644);
static int errno_to_pcibios_err(int errno)
{
switch (errno) {
case XEN_PCI_ERR_success:
return PCIBIOS_SUCCESSFUL;
case XEN_PCI_ERR_dev_not_found:
return PCIBIOS_DEVICE_NOT_FOUND;
case XEN_PCI_ERR_invalid_offset:
case XEN_PCI_ERR_op_failed:
return PCIBIOS_BAD_REGISTER_NUMBER;
case XEN_PCI_ERR_not_implemented:
return PCIBIOS_FUNC_NOT_SUPPORTED;
case XEN_PCI_ERR_access_denied:
return PCIBIOS_SET_FAILED;
}
return errno;
}
static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
{
if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
schedule_work(&pdev->op_work);
}
}
static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
{
int err = 0;
struct xen_pci_op *active_op = &pdev->sh_info->op;
unsigned long irq_flags;
evtchn_port_t port = pdev->evtchn;
unsigned irq = pdev->irq;
s64 ns, ns_timeout;
struct timeval tv;
spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
memcpy(active_op, op, sizeof(struct xen_pci_op));
/* Go */
wmb();
set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
notify_remote_via_evtchn(port);
/*
* We set a poll timeout of 3 seconds but give up on return after
* 2 seconds. It is better to time out too late rather than too early
* (in the latter case we end up continually re-executing poll() with a
* timeout in the past). 1s difference gives plenty of slack for error.
*/
do_gettimeofday(&tv);
ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
xen_clear_irq_pending(irq);
while (test_bit(_XEN_PCIF_active,
(unsigned long *)&pdev->sh_info->flags)) {
xen_poll_irq_timeout(irq, jiffies + 3*HZ);
xen_clear_irq_pending(irq);
do_gettimeofday(&tv);
ns = timeval_to_ns(&tv);
if (ns > ns_timeout) {
dev_err(&pdev->xdev->dev,
"pciback not responding!!!\n");
clear_bit(_XEN_PCIF_active,
(unsigned long *)&pdev->sh_info->flags);
err = XEN_PCI_ERR_dev_not_found;
goto out;
}
}
/*
* We might lose backend service request since we
* reuse same evtchn with pci_conf backend response. So re-schedule
* aer pcifront service.
*/
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&pdev->sh_info->flags)) {
dev_err(&pdev->xdev->dev,
"schedule aer pcifront service\n");
schedule_pcifront_aer_op(pdev);
}
memcpy(op, active_op, sizeof(struct xen_pci_op));
err = op->err;
out:
spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
return err;
}
/* Access to this function is spinlocked in drivers/pci/access.c */
static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *val)
{
int err = 0;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_conf_read,
.domain = pci_domain_nr(bus),
.bus = bus->number,
.devfn = devfn,
.offset = where,
.size = size,
};
struct pcifront_sd *sd = bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
if (verbose_request)
dev_info(&pdev->xdev->dev,
"read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), where, size);
err = do_pci_op(pdev, &op);
if (likely(!err)) {
if (verbose_request)
dev_info(&pdev->xdev->dev, "read got back value %x\n",
op.value);
*val = op.value;
} else if (err == -ENODEV) {
/* No device here, pretend that it just returned 0 */
err = 0;
*val = 0;
}
return errno_to_pcibios_err(err);
}
/* Access to this function is spinlocked in drivers/pci/access.c */
static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 val)
{
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_conf_write,
.domain = pci_domain_nr(bus),
.bus = bus->number,
.devfn = devfn,
.offset = where,
.size = size,
.value = val,
};
struct pcifront_sd *sd = bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
if (verbose_request)
dev_info(&pdev->xdev->dev,
"write dev=%04x:%02x:%02x.%01x - "
"offset %x size %d val %x\n",
pci_domain_nr(bus), bus->number,
PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
return errno_to_pcibios_err(do_pci_op(pdev, &op));
}
struct pci_ops pcifront_bus_ops = {
.read = pcifront_bus_read,
.write = pcifront_bus_write,
};
#ifdef CONFIG_PCI_MSI
static int pci_frontend_enable_msix(struct pci_dev *dev,
int **vector, int nvec)
{
int err;
int i;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_enable_msix,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
.value = nvec,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
struct msi_desc *entry;
if (nvec > SH_INFO_MAX_VEC) {
dev_err(&dev->dev, "too much vector for pci frontend: %x."
" Increase SH_INFO_MAX_VEC.\n", nvec);
return -EINVAL;
}
i = 0;
list_for_each_entry(entry, &dev->msi_list, list) {
op.msix_entries[i].entry = entry->msi_attrib.entry_nr;
/* Vector is useless at this point. */
op.msix_entries[i].vector = -1;
i++;
}
err = do_pci_op(pdev, &op);
if (likely(!err)) {
if (likely(!op.value)) {
/* we get the result */
for (i = 0; i < nvec; i++)
*(*vector+i) = op.msix_entries[i].vector;
return 0;
} else {
printk(KERN_DEBUG "enable msix get value %x\n",
op.value);
return op.value;
}
} else {
dev_err(&dev->dev, "enable msix get err %x\n", err);
return err;
}
}
static void pci_frontend_disable_msix(struct pci_dev *dev)
{
int err;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_disable_msix,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
err = do_pci_op(pdev, &op);
/* What should do for error ? */
if (err)
dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
}
static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
{
int err;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_enable_msi,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
err = do_pci_op(pdev, &op);
if (likely(!err)) {
*(*vector) = op.value;
} else {
dev_err(&dev->dev, "pci frontend enable msi failed for dev "
"%x:%x\n", op.bus, op.devfn);
err = -EINVAL;
}
return err;
}
static void pci_frontend_disable_msi(struct pci_dev *dev)
{
int err;
struct xen_pci_op op = {
.cmd = XEN_PCI_OP_disable_msi,
.domain = pci_domain_nr(dev->bus),
.bus = dev->bus->number,
.devfn = dev->devfn,
};
struct pcifront_sd *sd = dev->bus->sysdata;
struct pcifront_device *pdev = pcifront_get_pdev(sd);
err = do_pci_op(pdev, &op);
if (err == XEN_PCI_ERR_dev_not_found) {
/* XXX No response from backend, what shall we do? */
printk(KERN_DEBUG "get no response from backend for disable MSI\n");
return;
}
if (err)
/* how can pciback notify us fail? */
printk(KERN_DEBUG "get fake response frombackend\n");
}
static struct xen_pci_frontend_ops pci_frontend_ops = {
.enable_msi = pci_frontend_enable_msi,
.disable_msi = pci_frontend_disable_msi,
.enable_msix = pci_frontend_enable_msix,
.disable_msix = pci_frontend_disable_msix,
};
static void pci_frontend_registrar(int enable)
{
if (enable)
xen_pci_frontend = &pci_frontend_ops;
else
xen_pci_frontend = NULL;
};
#else
static inline void pci_frontend_registrar(int enable) { };
#endif /* CONFIG_PCI_MSI */
/* Claim resources for the PCI frontend as-is, backend won't allow changes */
static int pcifront_claim_resource(struct pci_dev *dev, void *data)
{
struct pcifront_device *pdev = data;
int i;
struct resource *r;
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
r = &dev->resource[i];
if (!r->parent && r->start && r->flags) {
dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
pci_name(dev), i);
if (pci_claim_resource(dev, i)) {
dev_err(&pdev->xdev->dev, "Could not claim "
"resource %s/%d! Device offline. Try "
"giving less than 4GB to domain.\n",
pci_name(dev), i);
}
}
}
return 0;
}
static int __devinit pcifront_scan_bus(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus,
struct pci_bus *b)
{
struct pci_dev *d;
unsigned int devfn;
/* Scan the bus for functions and add.
* We omit handling of PCI bridge attachment because pciback prevents
* bridges from being exported.
*/
for (devfn = 0; devfn < 0x100; devfn++) {
d = pci_get_slot(b, devfn);
if (d) {
/* Device is already known. */
pci_dev_put(d);
continue;
}
d = pci_scan_single_device(b, devfn);
if (d)
dev_info(&pdev->xdev->dev, "New device on "
"%04x:%02x:%02x.%02x found.\n", domain, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
}
return 0;
}
static int __devinit pcifront_scan_root(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus)
{
struct pci_bus *b;
struct pcifront_sd *sd = NULL;
struct pci_bus_entry *bus_entry = NULL;
int err = 0;
#ifndef CONFIG_PCI_DOMAINS
if (domain != 0) {
dev_err(&pdev->xdev->dev,
"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
dev_err(&pdev->xdev->dev,
"Please compile with CONFIG_PCI_DOMAINS\n");
err = -EINVAL;
goto err_out;
}
#endif
dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
domain, bus);
bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
sd = kmalloc(sizeof(*sd), GFP_KERNEL);
if (!bus_entry || !sd) {
err = -ENOMEM;
goto err_out;
}
pcifront_init_sd(sd, domain, bus, pdev);
b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
&pcifront_bus_ops, sd);
if (!b) {
dev_err(&pdev->xdev->dev,
"Error creating PCI Frontend Bus!\n");
err = -ENOMEM;
goto err_out;
}
bus_entry->bus = b;
list_add(&bus_entry->list, &pdev->root_buses);
/* pci_scan_bus_parented skips devices which do not have a have
* devfn==0. The pcifront_scan_bus enumerates all devfn. */
err = pcifront_scan_bus(pdev, domain, bus, b);
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
/* Create SysFS and notify udev of the devices. Aka: "going live" */
pci_bus_add_devices(b);
return err;
err_out:
kfree(bus_entry);
kfree(sd);
return err;
}
static int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
unsigned int domain, unsigned int bus)
{
int err;
struct pci_bus *b;
#ifndef CONFIG_PCI_DOMAINS
if (domain != 0) {
dev_err(&pdev->xdev->dev,
"PCI Root in non-zero PCI Domain! domain=%d\n", domain);
dev_err(&pdev->xdev->dev,
"Please compile with CONFIG_PCI_DOMAINS\n");
return -EINVAL;
}
#endif
dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
domain, bus);
b = pci_find_bus(domain, bus);
if (!b)
/* If the bus is unknown, create it. */
return pcifront_scan_root(pdev, domain, bus);
err = pcifront_scan_bus(pdev, domain, bus, b);
/* Claim resources before going "live" with our devices */
pci_walk_bus(b, pcifront_claim_resource, pdev);
/* Create SysFS and notify udev of the devices. Aka: "going live" */
pci_bus_add_devices(b);
return err;
}
static void free_root_bus_devs(struct pci_bus *bus)
{
struct pci_dev *dev;
while (!list_empty(&bus->devices)) {
dev = container_of(bus->devices.next, struct pci_dev,
bus_list);
dev_dbg(&dev->dev, "removing device\n");
pci_remove_bus_device(dev);
}
}
static void pcifront_free_roots(struct pcifront_device *pdev)
{
struct pci_bus_entry *bus_entry, *t;
dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
list_del(&bus_entry->list);
free_root_bus_devs(bus_entry->bus);
kfree(bus_entry->bus->sysdata);
device_unregister(bus_entry->bus->bridge);
pci_remove_bus(bus_entry->bus);
kfree(bus_entry);
}
}
static pci_ers_result_t pcifront_common_process(int cmd,
struct pcifront_device *pdev,
pci_channel_state_t state)
{
pci_ers_result_t result;
struct pci_driver *pdrv;
int bus = pdev->sh_info->aer_op.bus;
int devfn = pdev->sh_info->aer_op.devfn;
struct pci_dev *pcidev;
int flag = 0;
dev_dbg(&pdev->xdev->dev,
"pcifront AER process: cmd %x (bus:%x, devfn%x)",
cmd, bus, devfn);
result = PCI_ERS_RESULT_NONE;
pcidev = pci_get_bus_and_slot(bus, devfn);
if (!pcidev || !pcidev->driver) {
dev_err(&pcidev->dev,