scsi: hisi_sas: optimise DMA slot memory
Currently we allocate 3 sets of DMA memories from separate pools for
each slot. This is inefficient in terms of memory usage
(buffers are less than 1 page in size, so we lose due to alignment),
and also time spent in doing 3 allocations + de-allocations per slot,
instead of 1.
To optimise, combine the 3 DMA buffers into a single buffer from a
single pool.
Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index 22dd48b..a722f2b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -34,10 +34,24 @@
#define HISI_SAS_MAX_DEVICES HISI_SAS_MAX_ITCT_ENTRIES
#define HISI_SAS_RESET_BIT 0
-#define HISI_SAS_STATUS_BUF_SZ \
- (sizeof(struct hisi_sas_err_record) + 1024)
-#define HISI_SAS_COMMAND_TABLE_SZ \
- (((sizeof(union hisi_sas_command_table)+3)/4)*4)
+#define HISI_SAS_STATUS_BUF_SZ (sizeof(struct hisi_sas_status_buffer))
+#define HISI_SAS_COMMAND_TABLE_SZ (sizeof(union hisi_sas_command_table))
+
+#define hisi_sas_status_buf_addr(buf) \
+ (buf + offsetof(struct hisi_sas_slot_buf_table, status_buffer))
+#define hisi_sas_status_buf_addr_mem(slot) hisi_sas_status_buf_addr(slot->buf)
+#define hisi_sas_status_buf_addr_dma(slot) \
+ hisi_sas_status_buf_addr(slot->buf_dma)
+
+#define hisi_sas_cmd_hdr_addr(buf) \
+ (buf + offsetof(struct hisi_sas_slot_buf_table, command_header))
+#define hisi_sas_cmd_hdr_addr_mem(slot) hisi_sas_cmd_hdr_addr(slot->buf)
+#define hisi_sas_cmd_hdr_addr_dma(slot) hisi_sas_cmd_hdr_addr(slot->buf_dma)
+
+#define hisi_sas_sge_addr(buf) \
+ (buf + offsetof(struct hisi_sas_slot_buf_table, sge_page))
+#define hisi_sas_sge_addr_mem(slot) hisi_sas_sge_addr(slot->buf)
+#define hisi_sas_sge_addr_dma(slot) hisi_sas_sge_addr(slot->buf_dma)
#define HISI_SAS_MAX_SSP_RESP_SZ (sizeof(struct ssp_frame_hdr) + 1024)
#define HISI_SAS_MAX_SMP_RESP_SZ 1028
@@ -139,14 +153,10 @@
int cmplt_queue_slot;
int idx;
int abort;
+ void *buf;
+ dma_addr_t buf_dma;
void *cmd_hdr;
dma_addr_t cmd_hdr_dma;
- void *status_buffer;
- dma_addr_t status_buffer_dma;
- void *command_table;
- dma_addr_t command_table_dma;
- struct hisi_sas_sge_page *sge_page;
- dma_addr_t sge_page_dma;
struct work_struct abort_slot;
struct timer_list internal_abort_timer;
};
@@ -232,10 +242,8 @@
int queue_count;
- struct dma_pool *sge_page_pool;
+ struct dma_pool *buffer_pool;
struct hisi_sas_device devices[HISI_SAS_MAX_DEVICES];
- struct dma_pool *command_table_pool;
- struct dma_pool *status_buffer_pool;
struct hisi_sas_cmd_hdr *cmd_hdr[HISI_SAS_MAX_QUEUES];
dma_addr_t cmd_hdr_dma[HISI_SAS_MAX_QUEUES];
void *complete_hdr[HISI_SAS_MAX_QUEUES];
@@ -347,7 +355,7 @@
#define HISI_SAS_SGE_PAGE_CNT SG_CHUNK_SIZE
struct hisi_sas_sge_page {
struct hisi_sas_sge sge[HISI_SAS_SGE_PAGE_CNT];
-};
+} __aligned(16);
struct hisi_sas_command_table_ssp {
struct ssp_frame_hdr hdr;
@@ -366,6 +374,17 @@
struct hisi_sas_command_table_ssp ssp;
struct hisi_sas_command_table_smp smp;
struct hisi_sas_command_table_stp stp;
+} __aligned(16);
+
+struct hisi_sas_status_buffer {
+ struct hisi_sas_err_record err;
+ u8 iu[1024];
+} __aligned(16);
+
+struct hisi_sas_slot_buf_table {
+ struct hisi_sas_status_buffer status_buffer;
+ union hisi_sas_command_table command_header;
+ struct hisi_sas_sge_page sge_page;
};
extern struct scsi_transport_template *hisi_sas_stt;