Anthony Liguori | 3343660 | 2007-11-12 21:30:26 -0600 | [diff] [blame^] | 1 | /* |
| 2 | * Virtio PCI driver |
| 3 | * |
| 4 | * This module allows virtio devices to be used over a virtual PCI device. |
| 5 | * This can be used with QEMU based VMMs like KVM or Xen. |
| 6 | * |
| 7 | * Copyright IBM Corp. 2007 |
| 8 | * |
| 9 | * Authors: |
| 10 | * Anthony Liguori <aliguori@us.ibm.com> |
| 11 | * |
| 12 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| 13 | * See the COPYING file in the top-level directory. |
| 14 | * |
| 15 | */ |
| 16 | |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/list.h> |
| 19 | #include <linux/pci.h> |
| 20 | #include <linux/interrupt.h> |
| 21 | #include <linux/virtio.h> |
| 22 | #include <linux/virtio_config.h> |
| 23 | #include <linux/virtio_ring.h> |
| 24 | #include <linux/virtio_pci.h> |
| 25 | #include <linux/highmem.h> |
| 26 | #include <linux/spinlock.h> |
| 27 | |
| 28 | MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); |
| 29 | MODULE_DESCRIPTION("virtio-pci"); |
| 30 | MODULE_LICENSE("GPL"); |
| 31 | MODULE_VERSION("1"); |
| 32 | |
| 33 | /* Our device structure */ |
| 34 | struct virtio_pci_device |
| 35 | { |
| 36 | struct virtio_device vdev; |
| 37 | struct pci_dev *pci_dev; |
| 38 | |
| 39 | /* the IO mapping for the PCI config space */ |
| 40 | void *ioaddr; |
| 41 | |
| 42 | /* a list of queues so we can dispatch IRQs */ |
| 43 | spinlock_t lock; |
| 44 | struct list_head virtqueues; |
| 45 | }; |
| 46 | |
| 47 | struct virtio_pci_vq_info |
| 48 | { |
| 49 | /* the actual virtqueue */ |
| 50 | struct virtqueue *vq; |
| 51 | |
| 52 | /* the number of entries in the queue */ |
| 53 | int num; |
| 54 | |
| 55 | /* the index of the queue */ |
| 56 | int queue_index; |
| 57 | |
| 58 | /* the virtual address of the ring queue */ |
| 59 | void *queue; |
| 60 | |
| 61 | /* the list node for the virtqueues list */ |
| 62 | struct list_head node; |
| 63 | }; |
| 64 | |
| 65 | /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ |
| 66 | static struct pci_device_id virtio_pci_id_table[] = { |
| 67 | { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, |
| 68 | { 0 }, |
| 69 | }; |
| 70 | |
| 71 | MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); |
| 72 | |
| 73 | /* A PCI device has it's own struct device and so does a virtio device so |
| 74 | * we create a place for the virtio devices to show up in sysfs. I think it |
| 75 | * would make more sense for virtio to not insist on having it's own device. */ |
| 76 | static struct device virtio_pci_root = { |
| 77 | .parent = NULL, |
| 78 | .bus_id = "virtio-pci", |
| 79 | }; |
| 80 | |
| 81 | /* Unique numbering for devices under the kvm root */ |
| 82 | static unsigned int dev_index; |
| 83 | |
| 84 | /* Convert a generic virtio device to our structure */ |
| 85 | static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) |
| 86 | { |
| 87 | return container_of(vdev, struct virtio_pci_device, vdev); |
| 88 | } |
| 89 | |
| 90 | /* virtio config->feature() implementation */ |
| 91 | static bool vp_feature(struct virtio_device *vdev, unsigned bit) |
| 92 | { |
| 93 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 94 | u32 mask; |
| 95 | |
| 96 | /* Since this function is supposed to have the side effect of |
| 97 | * enabling a queried feature, we simulate that by doing a read |
| 98 | * from the host feature bitmask and then writing to the guest |
| 99 | * feature bitmask */ |
| 100 | mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); |
| 101 | if (mask & (1 << bit)) { |
| 102 | mask |= (1 << bit); |
| 103 | iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); |
| 104 | } |
| 105 | |
| 106 | return !!(mask & (1 << bit)); |
| 107 | } |
| 108 | |
| 109 | /* virtio config->get() implementation */ |
| 110 | static void vp_get(struct virtio_device *vdev, unsigned offset, |
| 111 | void *buf, unsigned len) |
| 112 | { |
| 113 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 114 | void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; |
| 115 | u8 *ptr = buf; |
| 116 | int i; |
| 117 | |
| 118 | for (i = 0; i < len; i++) |
| 119 | ptr[i] = ioread8(ioaddr + i); |
| 120 | } |
| 121 | |
| 122 | /* the config->set() implementation. it's symmetric to the config->get() |
| 123 | * implementation */ |
| 124 | static void vp_set(struct virtio_device *vdev, unsigned offset, |
| 125 | const void *buf, unsigned len) |
| 126 | { |
| 127 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 128 | void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; |
| 129 | const u8 *ptr = buf; |
| 130 | int i; |
| 131 | |
| 132 | for (i = 0; i < len; i++) |
| 133 | iowrite8(ptr[i], ioaddr + i); |
| 134 | } |
| 135 | |
| 136 | /* config->{get,set}_status() implementations */ |
| 137 | static u8 vp_get_status(struct virtio_device *vdev) |
| 138 | { |
| 139 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 140 | return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); |
| 141 | } |
| 142 | |
| 143 | static void vp_set_status(struct virtio_device *vdev, u8 status) |
| 144 | { |
| 145 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 146 | /* We should never be setting status to 0. */ |
| 147 | BUG_ON(status == 0); |
| 148 | return iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); |
| 149 | } |
| 150 | |
| 151 | static void vp_reset(struct virtio_device *vdev) |
| 152 | { |
| 153 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 154 | /* 0 status means a reset. */ |
| 155 | return iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); |
| 156 | } |
| 157 | |
| 158 | /* the notify function used when creating a virt queue */ |
| 159 | static void vp_notify(struct virtqueue *vq) |
| 160 | { |
| 161 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); |
| 162 | struct virtio_pci_vq_info *info = vq->priv; |
| 163 | |
| 164 | /* we write the queue's selector into the notification register to |
| 165 | * signal the other end */ |
| 166 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); |
| 167 | } |
| 168 | |
| 169 | /* A small wrapper to also acknowledge the interrupt when it's handled. |
| 170 | * I really need an EIO hook for the vring so I can ack the interrupt once we |
| 171 | * know that we'll be handling the IRQ but before we invoke the callback since |
| 172 | * the callback may notify the host which results in the host attempting to |
| 173 | * raise an interrupt that we would then mask once we acknowledged the |
| 174 | * interrupt. */ |
| 175 | static irqreturn_t vp_interrupt(int irq, void *opaque) |
| 176 | { |
| 177 | struct virtio_pci_device *vp_dev = opaque; |
| 178 | struct virtio_pci_vq_info *info; |
| 179 | irqreturn_t ret = IRQ_NONE; |
| 180 | u8 isr; |
| 181 | |
| 182 | /* reading the ISR has the effect of also clearing it so it's very |
| 183 | * important to save off the value. */ |
| 184 | isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); |
| 185 | |
| 186 | /* It's definitely not us if the ISR was not high */ |
| 187 | if (!isr) |
| 188 | return IRQ_NONE; |
| 189 | |
| 190 | /* Configuration change? Tell driver if it wants to know. */ |
| 191 | if (isr & VIRTIO_PCI_ISR_CONFIG) { |
| 192 | struct virtio_driver *drv; |
| 193 | drv = container_of(vp_dev->vdev.dev.driver, |
| 194 | struct virtio_driver, driver); |
| 195 | |
| 196 | if (drv->config_changed) |
| 197 | drv->config_changed(&vp_dev->vdev); |
| 198 | } |
| 199 | |
| 200 | spin_lock(&vp_dev->lock); |
| 201 | list_for_each_entry(info, &vp_dev->virtqueues, node) { |
| 202 | if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) |
| 203 | ret = IRQ_HANDLED; |
| 204 | } |
| 205 | spin_unlock(&vp_dev->lock); |
| 206 | |
| 207 | return ret; |
| 208 | } |
| 209 | |
| 210 | /* the config->find_vq() implementation */ |
| 211 | static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, |
| 212 | void (*callback)(struct virtqueue *vq)) |
| 213 | { |
| 214 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
| 215 | struct virtio_pci_vq_info *info; |
| 216 | struct virtqueue *vq; |
| 217 | u16 num; |
| 218 | int err; |
| 219 | |
| 220 | /* Select the queue we're interested in */ |
| 221 | iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); |
| 222 | |
| 223 | /* Check if queue is either not available or already active. */ |
| 224 | num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); |
| 225 | if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) |
| 226 | return ERR_PTR(-ENOENT); |
| 227 | |
| 228 | /* allocate and fill out our structure the represents an active |
| 229 | * queue */ |
| 230 | info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL); |
| 231 | if (!info) |
| 232 | return ERR_PTR(-ENOMEM); |
| 233 | |
| 234 | info->queue_index = index; |
| 235 | info->num = num; |
| 236 | |
| 237 | info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL); |
| 238 | if (info->queue == NULL) { |
| 239 | err = -ENOMEM; |
| 240 | goto out_info; |
| 241 | } |
| 242 | |
| 243 | /* activate the queue */ |
| 244 | iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT, |
| 245 | vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); |
| 246 | |
| 247 | /* create the vring */ |
| 248 | vq = vring_new_virtqueue(info->num, vdev, info->queue, |
| 249 | vp_notify, callback); |
| 250 | if (!vq) { |
| 251 | err = -ENOMEM; |
| 252 | goto out_activate_queue; |
| 253 | } |
| 254 | |
| 255 | vq->priv = info; |
| 256 | info->vq = vq; |
| 257 | |
| 258 | spin_lock(&vp_dev->lock); |
| 259 | list_add(&info->node, &vp_dev->virtqueues); |
| 260 | spin_unlock(&vp_dev->lock); |
| 261 | |
| 262 | return vq; |
| 263 | |
| 264 | out_activate_queue: |
| 265 | iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); |
| 266 | kfree(info->queue); |
| 267 | out_info: |
| 268 | kfree(info); |
| 269 | return ERR_PTR(err); |
| 270 | } |
| 271 | |
| 272 | /* the config->del_vq() implementation */ |
| 273 | static void vp_del_vq(struct virtqueue *vq) |
| 274 | { |
| 275 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); |
| 276 | struct virtio_pci_vq_info *info = vq->priv; |
| 277 | |
| 278 | spin_lock(&vp_dev->lock); |
| 279 | list_del(&info->node); |
| 280 | spin_unlock(&vp_dev->lock); |
| 281 | |
| 282 | vring_del_virtqueue(vq); |
| 283 | |
| 284 | /* Select and deactivate the queue */ |
| 285 | iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); |
| 286 | iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); |
| 287 | |
| 288 | kfree(info->queue); |
| 289 | kfree(info); |
| 290 | } |
| 291 | |
| 292 | static struct virtio_config_ops virtio_pci_config_ops = { |
| 293 | .feature = vp_feature, |
| 294 | .get = vp_get, |
| 295 | .set = vp_set, |
| 296 | .get_status = vp_get_status, |
| 297 | .set_status = vp_set_status, |
| 298 | .reset = vp_reset, |
| 299 | .find_vq = vp_find_vq, |
| 300 | .del_vq = vp_del_vq, |
| 301 | }; |
| 302 | |
| 303 | /* the PCI probing function */ |
| 304 | static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, |
| 305 | const struct pci_device_id *id) |
| 306 | { |
| 307 | struct virtio_pci_device *vp_dev; |
| 308 | int err; |
| 309 | |
| 310 | /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ |
| 311 | if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) |
| 312 | return -ENODEV; |
| 313 | |
| 314 | /* allocate our structure and fill it out */ |
| 315 | vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); |
| 316 | if (vp_dev == NULL) |
| 317 | return -ENOMEM; |
| 318 | |
| 319 | snprintf(vp_dev->vdev.dev.bus_id, BUS_ID_SIZE, "virtio%d", dev_index); |
| 320 | vp_dev->vdev.index = dev_index; |
| 321 | dev_index++; |
| 322 | |
| 323 | vp_dev->vdev.dev.parent = &virtio_pci_root; |
| 324 | vp_dev->vdev.config = &virtio_pci_config_ops; |
| 325 | vp_dev->pci_dev = pci_dev; |
| 326 | INIT_LIST_HEAD(&vp_dev->virtqueues); |
| 327 | spin_lock_init(&vp_dev->lock); |
| 328 | |
| 329 | /* enable the device */ |
| 330 | err = pci_enable_device(pci_dev); |
| 331 | if (err) |
| 332 | goto out; |
| 333 | |
| 334 | err = pci_request_regions(pci_dev, "virtio-pci"); |
| 335 | if (err) |
| 336 | goto out_enable_device; |
| 337 | |
| 338 | vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); |
| 339 | if (vp_dev->ioaddr == NULL) |
| 340 | goto out_req_regions; |
| 341 | |
| 342 | pci_set_drvdata(pci_dev, vp_dev); |
| 343 | |
| 344 | /* we use the subsystem vendor/device id as the virtio vendor/device |
| 345 | * id. this allows us to use the same PCI vendor/device id for all |
| 346 | * virtio devices and to identify the particular virtio driver by |
| 347 | * the subsytem ids */ |
| 348 | vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; |
| 349 | vp_dev->vdev.id.device = pci_dev->subsystem_device; |
| 350 | |
| 351 | /* register a handler for the queue with the PCI device's interrupt */ |
| 352 | err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, |
| 353 | vp_dev->vdev.dev.bus_id, vp_dev); |
| 354 | if (err) |
| 355 | goto out_set_drvdata; |
| 356 | |
| 357 | /* finally register the virtio device */ |
| 358 | err = register_virtio_device(&vp_dev->vdev); |
| 359 | if (err) |
| 360 | goto out_req_irq; |
| 361 | |
| 362 | return 0; |
| 363 | |
| 364 | out_req_irq: |
| 365 | free_irq(pci_dev->irq, vp_dev); |
| 366 | out_set_drvdata: |
| 367 | pci_set_drvdata(pci_dev, NULL); |
| 368 | pci_iounmap(pci_dev, vp_dev->ioaddr); |
| 369 | out_req_regions: |
| 370 | pci_release_regions(pci_dev); |
| 371 | out_enable_device: |
| 372 | pci_disable_device(pci_dev); |
| 373 | out: |
| 374 | kfree(vp_dev); |
| 375 | return err; |
| 376 | } |
| 377 | |
| 378 | static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) |
| 379 | { |
| 380 | struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); |
| 381 | |
| 382 | free_irq(pci_dev->irq, vp_dev); |
| 383 | pci_set_drvdata(pci_dev, NULL); |
| 384 | pci_iounmap(pci_dev, vp_dev->ioaddr); |
| 385 | pci_release_regions(pci_dev); |
| 386 | pci_disable_device(pci_dev); |
| 387 | kfree(vp_dev); |
| 388 | } |
| 389 | |
| 390 | #ifdef CONFIG_PM |
| 391 | static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) |
| 392 | { |
| 393 | pci_save_state(pci_dev); |
| 394 | pci_set_power_state(pci_dev, PCI_D3hot); |
| 395 | return 0; |
| 396 | } |
| 397 | |
| 398 | static int virtio_pci_resume(struct pci_dev *pci_dev) |
| 399 | { |
| 400 | pci_restore_state(pci_dev); |
| 401 | pci_set_power_state(pci_dev, PCI_D0); |
| 402 | return 0; |
| 403 | } |
| 404 | #endif |
| 405 | |
| 406 | static struct pci_driver virtio_pci_driver = { |
| 407 | .name = "virtio-pci", |
| 408 | .id_table = virtio_pci_id_table, |
| 409 | .probe = virtio_pci_probe, |
| 410 | .remove = virtio_pci_remove, |
| 411 | #ifdef CONFIG_PM |
| 412 | .suspend = virtio_pci_suspend, |
| 413 | .resume = virtio_pci_resume, |
| 414 | #endif |
| 415 | }; |
| 416 | |
| 417 | static int __init virtio_pci_init(void) |
| 418 | { |
| 419 | int err; |
| 420 | |
| 421 | err = device_register(&virtio_pci_root); |
| 422 | if (err) |
| 423 | return err; |
| 424 | |
| 425 | err = pci_register_driver(&virtio_pci_driver); |
| 426 | if (err) |
| 427 | device_unregister(&virtio_pci_root); |
| 428 | |
| 429 | return err; |
| 430 | } |
| 431 | |
| 432 | module_init(virtio_pci_init); |
| 433 | |
| 434 | static void __exit virtio_pci_exit(void) |
| 435 | { |
| 436 | device_unregister(&virtio_pci_root); |
| 437 | pci_unregister_driver(&virtio_pci_driver); |
| 438 | } |
| 439 | |
| 440 | module_exit(virtio_pci_exit); |