tile PCI RC: support more MSI-X interrupt vectors

To support PCIe devices with higher number of MSI-X interrupt vectors,
e.g. 16 for the LSI RAID card, enhance the Gx RC stack to provide more
MSI-X vectors by using the TRIO Scatter Queues, which provide 8 more
vectors in addition to ~10 from the Map Mem regions.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c
index cef4b22..da6e18e 100644
--- a/arch/tile/gxio/iorpc_trio.c
+++ b/arch/tile/gxio/iorpc_trio.c
@@ -61,6 +61,29 @@
 
 EXPORT_SYMBOL(gxio_trio_alloc_memory_maps);
 
+struct alloc_scatter_queues_param {
+	unsigned int count;
+	unsigned int first;
+	unsigned int flags;
+};
+
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+				   unsigned int count, unsigned int first,
+				   unsigned int flags)
+{
+	struct alloc_scatter_queues_param temp;
+	struct alloc_scatter_queues_param *params = &temp;
+
+	params->count = count;
+	params->first = first;
+	params->flags = flags;
+
+	return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+			     sizeof(*params),
+			     GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES);
+}
+
+EXPORT_SYMBOL(gxio_trio_alloc_scatter_queues);
 
 struct alloc_pio_regions_param {
 	unsigned int count;
diff --git a/arch/tile/include/arch/trio.h b/arch/tile/include/arch/trio.h
index d3000a8..c0ddedc 100644
--- a/arch/tile/include/arch/trio.h
+++ b/arch/tile/include/arch/trio.h
@@ -23,6 +23,45 @@
 #ifndef __ASSEMBLER__
 
 /*
+ * Map SQ Doorbell Format.
+ * This describes the format of the write-only doorbell register that exists
+ * in the last 8-bytes of the MAP_SQ_BASE/LIM range.  This register is only
+ * writable from PCIe space.  Writes to this register will not be written to
+ * Tile memory space and thus no IO VA translation is required if the last
+ * page of the BASE/LIM range is not otherwise written.
+ */
+
+__extension__
+typedef union
+{
+  struct
+  {
+#ifndef __BIG_ENDIAN__
+    /*
+     * When written with a 1, the associated MAP_SQ region's doorbell
+     * interrupt will be triggered once all previous writes are visible to
+     * Tile software.
+     */
+    uint_reg_t doorbell   : 1;
+    /*
+     * When written with a 1, the descriptor at the head of the associated
+     * MAP_SQ's FIFO will be dequeued.
+     */
+    uint_reg_t pop        : 1;
+    /* Reserved. */
+    uint_reg_t __reserved : 62;
+#else   /* __BIG_ENDIAN__ */
+    uint_reg_t __reserved : 62;
+    uint_reg_t pop        : 1;
+    uint_reg_t doorbell   : 1;
+#endif
+  };
+
+  uint_reg_t word;
+} TRIO_MAP_SQ_DOORBELL_FMT_t;
+
+
+/*
  * Tile PIO Region Configuration - CFG Address Format.
  * This register describes the address format for PIO accesses when the
  * associated region is setup with TYPE=CFG.
diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h
index 58105c3..d95b96f 100644
--- a/arch/tile/include/gxio/iorpc_trio.h
+++ b/arch/tile/include/gxio/iorpc_trio.h
@@ -30,6 +30,7 @@
 
 #define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404)
 
+#define GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e)
 #define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412)
 
 #define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414)
@@ -54,6 +55,10 @@
 				unsigned int flags);
 
 
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+				   unsigned int count, unsigned int first,
+				   unsigned int flags);
+
 int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context,
 				unsigned int count, unsigned int first,
 				unsigned int flags);
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 2cc3e64..e0d6664 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1474,32 +1474,55 @@
 	trio_context = controller->trio;
 
 	/*
-	 * Allocate the Mem-Map that will accept the MSI write and
-	 * trigger the TILE-side interrupts.
+	 * Allocate a scatter-queue that will accept the MSI write and
+	 * trigger the TILE-side interrupts. We use the scatter-queue regions
+	 * before the mem map regions, because the latter are needed by more
+	 * applications.
 	 */
-	mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
-	if (mem_map < 0) {
-		dev_printk(KERN_INFO, &pdev->dev,
-			"%s Mem-Map alloc failure. "
-			"Failed to initialize MSI interrupts. "
-			"Falling back to legacy interrupts.\n",
-			desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
+	mem_map = gxio_trio_alloc_scatter_queues(trio_context, 1, 0, 0);
+	if (mem_map >= 0) {
+		TRIO_MAP_SQ_DOORBELL_FMT_t doorbell_template = {{
+			.pop = 0,
+			.doorbell = 1,
+		}};
 
-		ret = -ENOMEM;
-		goto msi_mem_map_alloc_failure;
+		mem_map += TRIO_NUM_MAP_MEM_REGIONS;
+		mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
+			mem_map * MEM_MAP_INTR_REGION_SIZE;
+		mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
+
+		msi_addr = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 8;
+		msg.data = (unsigned int)doorbell_template.word;
+	} else {
+		/* SQ regions are out, allocate from map mem regions. */
+		mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
+		if (mem_map < 0) {
+			dev_printk(KERN_INFO, &pdev->dev,
+				"%s Mem-Map alloc failure. "
+				"Failed to initialize MSI interrupts. "
+				"Falling back to legacy interrupts.\n",
+				desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
+			ret = -ENOMEM;
+			goto msi_mem_map_alloc_failure;
+		}
+
+		mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
+			mem_map * MEM_MAP_INTR_REGION_SIZE;
+		mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
+
+		msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 -
+			TRIO_MAP_MEM_REG_INT0;
+
+		msg.data = mem_map;
 	}
 
 	/* We try to distribute different IRQs to different tiles. */
 	cpu = tile_irq_cpu(irq);
 
 	/*
-	 * Now call up to the HV to configure the Mem-Map interrupt and
+	 * Now call up to the HV to configure the MSI interrupt and
 	 * set up the IPI binding.
 	 */
-	mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
-		mem_map * MEM_MAP_INTR_REGION_SIZE;
-	mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
-
 	ret = gxio_trio_config_msi_intr(trio_context, cpu_x(cpu), cpu_y(cpu),
 					KERNEL_PL, irq, controller->mac,
 					mem_map, mem_map_base, mem_map_limit,
@@ -1512,13 +1535,9 @@
 
 	irq_set_msi_desc(irq, desc);
 
-	msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 - TRIO_MAP_MEM_REG_INT0;
-
 	msg.address_hi = msi_addr >> 32;
 	msg.address_lo = msi_addr & 0xffffffff;
 
-	msg.data = mem_map;
-
 	write_msi_msg(irq, &msg);
 	irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq);
 	irq_set_handler_data(irq, controller);