Commit 35a17eb6 authored by David S. Miller's avatar David S. Miller
Browse files

[SPARC64]: Add PCI MSI support on Niagara.



This is kind of hokey, we could use the hardware provided facilities
much better.

MSIs are assosciated with MSI Queues.  MSI Queues generate interrupts
when any MSI assosciated with it is signalled.  This suggests a
two-tiered IRQ dispatch scheme:

	MSI Queue interrupt --> queue interrupt handler
		MSI dispatch --> driver interrupt handler

But we just get one-level under Linux currently.  What I'd like to do
is possibly stick the IRQ actions into a per-MSI-Queue data structure,
and dispatch them form there, but the generic IRQ layer doesn't
provide a way to do that right now.

So, the current kludge is to "ACK" the interrupt by processing the
MSI Queue data structures and ACK'ing them, then we run the actual
handler like normal.

We are wasting a lot of useful information, for example the MSI data
and address are provided with ever MSI, as well as a system tick if
available.  If we could pass this into the IRQ handler it could help
with certain things, in particular for PCI-Express error messages.

The MSI entries on sparc64 also tell you exactly which bus/device/fn
sent the MSI, which would be great for error handling when no
registered IRQ handler can service the interrupt.

We override the disable/enable IRQ chip methods in sun4v_msi, so we
have to call {mask,unmask}_msi_irq() directly from there.  This is
another ugly wart.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 68c92186
......@@ -22,6 +22,7 @@
#include <linux/seq_file.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
......@@ -87,7 +88,6 @@ struct ino_bucket ivector_table[NUM_IVECS] __attribute__ ((aligned (SMP_CACHE_BY
#define irq_work(__cpu) &(trap_block[(__cpu)].irq_worklist)
static unsigned int virt_to_real_irq_table[NR_IRQS];
static unsigned char virt_irq_cur = 1;
static unsigned char virt_irq_alloc(unsigned int real_irq)
{
......@@ -95,26 +95,32 @@ static unsigned char virt_irq_alloc(unsigned int real_irq)
BUILD_BUG_ON(NR_IRQS >= 256);
ent = virt_irq_cur;
for (ent = 1; ent < NR_IRQS; ent++) {
if (!virt_to_real_irq_table[ent])
break;
}
if (ent >= NR_IRQS) {
printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
return 0;
}
virt_irq_cur = ent + 1;
virt_to_real_irq_table[ent] = real_irq;
return ent;
}
#if 0 /* Currently unused. */
static unsigned char real_to_virt_irq(unsigned int real_irq)
static void virt_irq_free(unsigned int virt_irq)
{
struct ino_bucket *bucket = __bucket(real_irq);
unsigned int real_irq;
return bucket->virt_irq;
if (virt_irq >= NR_IRQS)
return;
real_irq = virt_to_real_irq_table[virt_irq];
virt_to_real_irq_table[virt_irq] = 0;
__bucket(real_irq)->virt_irq = 0;
}
#endif
static unsigned int virt_to_real_irq(unsigned char virt_irq)
{
......@@ -341,6 +347,20 @@ static void sun4v_irq_disable(unsigned int virt_irq)
}
}
#ifdef CONFIG_PCI_MSI
static void sun4v_msi_enable(unsigned int virt_irq)
{
sun4v_irq_enable(virt_irq);
unmask_msi_irq(virt_irq);
}
static void sun4v_msi_disable(unsigned int virt_irq)
{
mask_msi_irq(virt_irq);
sun4v_irq_disable(virt_irq);
}
#endif
static void sun4v_irq_end(unsigned int virt_irq)
{
struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
......@@ -398,6 +418,18 @@ static struct irq_chip sun4v_irq_ack = {
.end = sun4v_irq_end,
};
#ifdef CONFIG_PCI_MSI
static struct irq_chip sun4v_msi = {
.typename = "sun4v+msi",
.mask = mask_msi_irq,
.unmask = unmask_msi_irq,
.enable = sun4v_msi_enable,
.disable = sun4v_msi_disable,
.ack = run_pre_handler,
.end = sun4v_irq_end,
};
#endif
void irq_install_pre_handler(int virt_irq,
void (*func)(unsigned int, void *, void *),
void *arg1, void *arg2)
......@@ -411,7 +443,11 @@ void irq_install_pre_handler(int virt_irq,
chip = get_irq_chip(virt_irq);
if (chip == &sun4u_irq_ack ||
chip == &sun4v_irq_ack)
chip == &sun4v_irq_ack
#ifdef CONFIG_PCI_MSI
|| chip == &sun4v_msi
#endif
)
return;
chip = (chip == &sun4u_irq ?
......@@ -489,6 +525,56 @@ out:
return bucket->virt_irq;
}
#ifdef CONFIG_PCI_MSI
unsigned int sun4v_build_msi(u32 devhandle, unsigned int *virt_irq_p,
unsigned int msi_start, unsigned int msi_end)
{
struct ino_bucket *bucket;
struct irq_handler_data *data;
unsigned long sysino;
unsigned int devino;
BUG_ON(tlb_type != hypervisor);
/* Find a free devino in the given range. */
for (devino = msi_start; devino < msi_end; devino++) {
sysino = sun4v_devino_to_sysino(devhandle, devino);
bucket = &ivector_table[sysino];
if (!bucket->virt_irq)
break;
}
if (devino >= msi_end)
return 0;
sysino = sun4v_devino_to_sysino(devhandle, devino);
bucket = &ivector_table[sysino];
bucket->virt_irq = virt_irq_alloc(__irq(bucket));
*virt_irq_p = bucket->virt_irq;
set_irq_chip(bucket->virt_irq, &sun4v_msi);
data = get_irq_chip_data(bucket->virt_irq);
if (unlikely(data))
return devino;
data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
if (unlikely(!data)) {
prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
prom_halt();
}
set_irq_chip_data(bucket->virt_irq, data);
data->imap = ~0UL;
data->iclr = ~0UL;
return devino;
}
void sun4v_destroy_msi(unsigned int virt_irq)
{
virt_irq_free(virt_irq);
}
#endif
void ack_bad_irq(unsigned int virt_irq)
{
struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
......
......@@ -13,6 +13,8 @@
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/msi.h>
#include <linux/irq.h>
#include <linux/init.h>
#include <asm/uaccess.h>
......@@ -646,4 +648,37 @@ int pci_domain_nr(struct pci_bus *pbus)
}
EXPORT_SYMBOL(pci_domain_nr);
#ifdef CONFIG_PCI_MSI
int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
{
struct pcidev_cookie *pcp = pdev->sysdata;
struct pci_pbm_info *pbm = pcp->pbm;
struct pci_controller_info *p = pbm->parent;
int virt_irq, err;
if (!pbm->msi_num || !p->setup_msi_irq)
return -EINVAL;
err = p->setup_msi_irq(&virt_irq, pdev, desc);
if (err < 0)
return err;
return virt_irq;
}
void arch_teardown_msi_irq(unsigned int virt_irq)
{
struct msi_desc *entry = get_irq_data(virt_irq);
struct pci_dev *pdev = entry->dev;
struct pcidev_cookie *pcp = pdev->sysdata;
struct pci_pbm_info *pbm = pcp->pbm;
struct pci_controller_info *p = pbm->parent;
if (!pbm->msi_num || !p->setup_msi_irq)
return;
return p->teardown_msi_irq(virt_irq, pdev);
}
#endif /* !(CONFIG_PCI_MSI) */
#endif /* !(CONFIG_PCI) */
......@@ -10,6 +10,8 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <asm/pbm.h>
#include <asm/iommu.h>
......@@ -1074,6 +1076,443 @@ static void pci_sun4v_get_bus_range(struct pci_pbm_info *pbm)
}
#ifdef CONFIG_PCI_MSI
struct pci_sun4v_msiq_entry {
u64 version_type;
#define MSIQ_VERSION_MASK 0xffffffff00000000UL
#define MSIQ_VERSION_SHIFT 32
#define MSIQ_TYPE_MASK 0x00000000000000ffUL
#define MSIQ_TYPE_SHIFT 0
#define MSIQ_TYPE_NONE 0x00
#define MSIQ_TYPE_MSG 0x01
#define MSIQ_TYPE_MSI32 0x02
#define MSIQ_TYPE_MSI64 0x03
#define MSIQ_TYPE_INTX 0x08
#define MSIQ_TYPE_NONE2 0xff
u64 intx_sysino;
u64 reserved1;
u64 stick;
u64 req_id; /* bus/device/func */
#define MSIQ_REQID_BUS_MASK 0xff00UL
#define MSIQ_REQID_BUS_SHIFT 8
#define MSIQ_REQID_DEVICE_MASK 0x00f8UL
#define MSIQ_REQID_DEVICE_SHIFT 3
#define MSIQ_REQID_FUNC_MASK 0x0007UL
#define MSIQ_REQID_FUNC_SHIFT 0
u64 msi_address;
/* The format of this value is message type dependant.
* For MSI bits 15:0 are the data from the MSI packet.
* For MSI-X bits 31:0 are the data from the MSI packet.
* For MSG, the message code and message routing code where:
* bits 39:32 is the bus/device/fn of the msg target-id
* bits 18:16 is the message routing code
* bits 7:0 is the message code
* For INTx the low order 2-bits are:
* 00 - INTA
* 01 - INTB
* 10 - INTC
* 11 - INTD
*/
u64 msi_data;
u64 reserved2;
};
/* For now this just runs as a pre-handler for the real interrupt handler.
* So we just walk through the queue and ACK all the entries, update the
* head pointer, and return.
*
* In the longer term it would be nice to do something more integrated
* wherein we can pass in some of this MSI info to the drivers. This
* would be most useful for PCIe fabric error messages, although we could
* invoke those directly from the loop here in order to pass the info around.
*/
static void pci_sun4v_msi_prehandler(unsigned int ino, void *data1, void *data2)
{
struct pci_pbm_info *pbm = data1;
struct pci_sun4v_msiq_entry *base, *ep;
unsigned long msiqid, orig_head, head, type, err;
msiqid = (unsigned long) data2;
head = 0xdeadbeef;
err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, &head);
if (unlikely(err))
goto hv_error_get;
if (unlikely(head >= (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry))))
goto bad_offset;
head /= sizeof(struct pci_sun4v_msiq_entry);
orig_head = head;
base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
(pbm->msiq_ent_count *
sizeof(struct pci_sun4v_msiq_entry))));
ep = &base[head];
while ((ep->version_type & MSIQ_TYPE_MASK) != 0) {
type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
if (unlikely(type != MSIQ_TYPE_MSI32 &&
type != MSIQ_TYPE_MSI64))
goto bad_type;
pci_sun4v_msi_setstate(pbm->devhandle,
ep->msi_data /* msi_num */,
HV_MSISTATE_IDLE);
/* Clear the entry. */
ep->version_type &= ~MSIQ_TYPE_MASK;
/* Go to next entry in ring. */
head++;
if (head >= pbm->msiq_ent_count)
head = 0;
ep = &base[head];
}
if (likely(head != orig_head)) {
/* ACK entries by updating head pointer. */
head *= sizeof(struct pci_sun4v_msiq_entry);
err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
if (unlikely(err))
goto hv_error_set;
}
return;
hv_error_set:
printk(KERN_EMERG "MSI: Hypervisor set head gives error %lu\n", err);
goto hv_error_cont;
hv_error_get:
printk(KERN_EMERG "MSI: Hypervisor get head gives error %lu\n", err);
hv_error_cont:
printk(KERN_EMERG "MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
pbm->devhandle, msiqid, head);
return;
bad_offset:
printk(KERN_EMERG "MSI: Hypervisor gives bad offset %lx max(%lx)\n",
head, pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry));
return;
bad_type:
printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type);
return;
}
static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
{
unsigned long size, bits_per_ulong;
bits_per_ulong = sizeof(unsigned long) * 8;
size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
size /= 8;
BUG_ON(size % sizeof(unsigned long));
pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
if (!pbm->msi_bitmap)
return -ENOMEM;
return 0;
}
static void msi_bitmap_free(struct pci_pbm_info *pbm)
{
kfree(pbm->msi_bitmap);
pbm->msi_bitmap = NULL;
}
static int msi_queue_alloc(struct pci_pbm_info *pbm)
{
unsigned long q_size, alloc_size, pages, order;
int i;
q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
alloc_size = (pbm->msiq_num * q_size);
order = get_order(alloc_size);
pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
if (pages == 0UL) {
printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
order);
return -ENOMEM;
}
memset((char *)pages, 0, PAGE_SIZE << order);
pbm->msi_queues = (void *) pages;
for (i = 0; i < pbm->msiq_num; i++) {
unsigned long err, base = __pa(pages + (i * q_size));
unsigned long ret1, ret2;
err = pci_sun4v_msiq_conf(pbm->devhandle,
pbm->msiq_first + i,
base, pbm->msiq_ent_count);
if (err) {
printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
err);
goto h_error;
}
err = pci_sun4v_msiq_info(pbm->devhandle,
pbm->msiq_first + i,
&ret1, &ret2);
if (err) {
printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
err);
goto h_error;
}
if (ret1 != base || ret2 != pbm->msiq_ent_count) {
printk(KERN_ERR "MSI: Bogus qconf "
"expected[%lx:%x] got[%lx:%lx]\n",
base, pbm->msiq_ent_count,
ret1, ret2);
goto h_error;
}
}
return 0;
h_error:
free_pages(pages, order);
return -EINVAL;
}
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
u32 *val;
int len;
val = of_get_property(pbm->prom_node, "#msi-eqs", &len);
if (!val || len != 4)
goto no_msi;
pbm->msiq_num = *val;
if (pbm->msiq_num) {
struct msiq_prop {
u32 first_msiq;
u32 num_msiq;
u32 first_devino;
} *mqp;
struct msi_range_prop {
u32 first_msi;
u32 num_msi;
} *mrng;
struct addr_range_prop {
u32 msi32_high;
u32 msi32_low;
u32 msi32_len;
u32 msi64_high;
u32 msi64_low;
u32 msi64_len;
} *arng;
val = of_get_property(pbm->prom_node, "msi-eq-size", &len);
if (!val || len != 4)
goto no_msi;
pbm->msiq_ent_count = *val;
mqp = of_get_property(pbm->prom_node,
"msi-eq-to-devino", &len);
if (!mqp || len != sizeof(struct msiq_prop))
goto no_msi;
pbm->msiq_first = mqp->first_msiq;
pbm->msiq_first_devino = mqp->first_devino;
val = of_get_property(pbm->prom_node, "#msi", &len);
if (!val || len != 4)
goto no_msi;
pbm->msi_num = *val;
mrng = of_get_property(pbm->prom_node, "msi-ranges", &len);
if (!mrng || len != sizeof(struct msi_range_prop))
goto no_msi;
pbm->msi_first = mrng->first_msi;
val = of_get_property(pbm->prom_node, "msi-data-mask", &len);
if (!val || len != 4)
goto no_msi;
pbm->msi_data_mask = *val;
val = of_get_property(pbm->prom_node, "msix-data-width", &len);
if (!val || len != 4)
goto no_msi;
pbm->msix_data_width = *val;
arng = of_get_property(pbm->prom_node, "msi-address-ranges",
&len);
if (!arng || len != sizeof(struct addr_range_prop))
goto no_msi;
pbm->msi32_start = ((u64)arng->msi32_high << 32) |
(u64) arng->msi32_low;
pbm->msi64_start = ((u64)arng->msi64_high << 32) |
(u64) arng->msi64_low;
pbm->msi32_len = arng->msi32_len;
pbm->msi64_len = arng->msi64_len;
if (msi_bitmap_alloc(pbm))
goto no_msi;
if (msi_queue_alloc(pbm)) {
msi_bitmap_free(pbm);
goto no_msi;
}
printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
"devino[0x%x]\n",
pbm->name,
pbm->msiq_first, pbm->msiq_num,
pbm->msiq_ent_count,
pbm->msiq_first_devino);
printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
"width[%u]\n",
pbm->name,
pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
pbm->msix_data_width);
printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
"addr64[0x%lx:0x%x]\n",
pbm->name,
pbm->msi32_start, pbm->msi32_len,
pbm->msi64_start, pbm->msi64_len);
printk(KERN_INFO "%s: MSI queues at RA [%p]\n",
pbm->name,
pbm->msi_queues);
}
return;
no_msi:
pbm->msiq_num = 0;
printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
}
static int alloc_msi(struct pci_pbm_info *pbm)
{
int i;
for (i = 0; i < pbm->msi_num; i++) {
if (!test_and_set_bit(i, pbm->msi_bitmap))
return i + pbm->msi_first;
}
return -ENOENT;
}
static void free_msi(struct pci_pbm_info *pbm, int msi_num)
{
msi_num -= pbm->msi_first;
clear_bit(msi_num, pbm->msi_bitmap);
}
static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p,
struct pci_dev *pdev,
struct msi_desc *entry)
{
struct pcidev_cookie *pcp = pdev->sysdata;
struct pci_pbm_info *pbm = pcp->pbm;
unsigned long devino, msiqid;
struct msi_msg msg;
int msi_num, err;
*virt_irq_p = 0;
msi_num = alloc_msi(pbm);
if (msi_num < 0)
return msi_num;
devino = sun4v_build_msi(pbm->devhandle, virt_irq_p,
pbm->msiq_first_devino,
(pbm->msiq_first_devino +
pbm->msiq_num));
err = -ENOMEM;
if (!devino)
goto out_err;
set_irq_msi(*virt_irq_p, entry);
msiqid = ((devino - pbm->msiq_first_devino) +
pbm->msiq_first);
err = -EINVAL;
if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
if (err)
goto out_err;
if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
goto out_err;
if (pci_sun4v_msi_setmsiq(pbm->devhandle,
msi_num, msiqid,
(entry->msi_attrib.is_64 ?
HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
goto out_err;
if (pci_sun4v_msi_setstate(pbm->devhandle, msi_num, HV_MSISTATE_IDLE))
goto out_err;
if (pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_VALID))
goto out_err;
pcp->msi_num = msi_num;
if (entry->msi_attrib.is_64) {
msg.address_hi = pbm->msi64_start >> 32;
msg.address_lo = pbm->msi64_start & 0xffffffff;
} else {
msg.address_hi = 0;
msg.address_lo = pbm->msi32_start;
}