ioat2,3: cacheline align software descriptor allocations
All the necessary fields for handling an ioat2,3 ring entry can fit into
one cacheline. Move ->len prior to ->txd in struct ioat_ring_ent, and
move allocation of these entries to a hw-cache-aligned kmem cache to
reduce the number of cachelines dirtied for descriptor management.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 460b773..fa3d6db 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -399,11 +399,12 @@
return NULL;
memset(hw, 0, sizeof(*hw));
- desc = kzalloc(sizeof(*desc), flags);
+ desc = kmem_cache_alloc(ioat2_cache, flags);
if (!desc) {
pci_pool_free(dma->dma_pool, hw, phys);
return NULL;
}
+ memset(desc, 0, sizeof(*desc));
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = ioat2_tx_submit_unlock;
@@ -418,7 +419,7 @@
dma = to_ioatdma_device(chan->device);
pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
- kfree(desc);
+ kmem_cache_free(ioat2_cache, desc);
}
static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)