ioat2,3: cacheline align software descriptor allocations
All the necessary fields for handling an ioat2,3 ring entry can fit into
one cacheline. Move ->len prior to ->txd in struct ioat_ring_ent, and
move allocation of these entries to a hw-cache-aligned kmem cache to
reduce the number of cachelines dirtied for descriptor management.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index c4e4322..61086c6 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -69,6 +69,8 @@
module_param(ioat_dca_enabled, int, 0644);
MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+struct kmem_cache *ioat2_cache;
+
#define DRV_NAME "ioatdma"
static struct pci_driver ioat_pci_driver = {
@@ -168,12 +170,24 @@
static int __init ioat_init_module(void)
{
- return pci_register_driver(&ioat_pci_driver);
+ int err;
+
+ ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ioat2_cache)
+ return -ENOMEM;
+
+ err = pci_register_driver(&ioat_pci_driver);
+ if (err)
+ kmem_cache_destroy(ioat2_cache);
+
+ return err;
}
module_init(ioat_init_module);
static void __exit ioat_exit_module(void)
{
pci_unregister_driver(&ioat_pci_driver);
+ kmem_cache_destroy(ioat2_cache);
}
module_exit(ioat_exit_module);