sh: Fix up multi-resource mapping for SH7786 PCIe.

This reworks some of the SH7786 PCIe initialization code to dynamically
setup and size the various resource windows, as opposed to the original
code that simply wired in a couple of them statically.

At the same time, we tidy up the initialization code a bit, kill off some
read-only register twiddling that was gleaned from the bus analyzer, and
also propagate the physical slot/channel mapping.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index 95d095f..ae91a2d 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -204,7 +204,9 @@
 {
 	struct pci_channel *chan = port->hose;
 	unsigned int data;
-	int ret;
+	phys_addr_t memphys;
+	size_t memsize;
+	int ret, i;
 
 	/* Begin initialization */
 	pci_write_reg(chan, 0, SH4A_PCIETCTLR);
@@ -227,15 +229,24 @@
 	data |= PCI_CAP_ID_EXP;
 	pci_write_reg(chan, data, SH4A_PCIEEXPCAP0);
 
-	/* Enable x4 link width and extended sync. */
+	/* Enable data link layer active state reporting */
+	pci_write_reg(chan, PCI_EXP_LNKCAP_DLLLARC, SH4A_PCIEEXPCAP3);
+
+	/* Enable extended sync and ASPM L0s support */
 	data = pci_read_reg(chan, SH4A_PCIEEXPCAP4);
-	data &= ~(PCI_EXP_LNKSTA_NLW << 16);
-	data |= (1 << 22) | PCI_EXP_LNKCTL_ES;
+	data &= ~PCI_EXP_LNKCTL_ASPMC;
+	data |= PCI_EXP_LNKCTL_ES | 1;
 	pci_write_reg(chan, data, SH4A_PCIEEXPCAP4);
 
+	/* Write out the physical slot number */
+	data = pci_read_reg(chan, SH4A_PCIEEXPCAP5);
+	data &= ~PCI_EXP_SLTCAP_PSN;
+	data |= (port->index + 1) << 19;
+	pci_write_reg(chan, data, SH4A_PCIEEXPCAP5);
+
 	/* Set the completion timer timeout to the maximum 32ms. */
 	data = pci_read_reg(chan, SH4A_PCIETLCTLR);
-	data &= ~0xffff;
+	data &= ~0x3f00;
 	data |= 0x32 << 8;
 	pci_write_reg(chan, data, SH4A_PCIETLCTLR);
 
@@ -248,6 +259,33 @@
 	data |= (0xff << 16);
 	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);
 
+	memphys = __pa(memory_start);
+	memsize = roundup_pow_of_two(memory_end - memory_start);
+
+	/*
+	 * If there's more than 512MB of memory, we need to roll over to
+	 * LAR1/LAMR1.
+	 */
+	if (memsize > SZ_512M) {
+		__raw_writel(memphys + SZ_512M, chan->reg_base + SH4A_PCIELAR1);
+		__raw_writel(((memsize - SZ_512M) - SZ_256) | 1,
+			     chan->reg_base + SH4A_PCIELAMR1);
+		memsize = SZ_512M;
+	} else {
+		/*
+		 * Otherwise just zero it out and disable it.
+		 */
+		__raw_writel(0, chan->reg_base + SH4A_PCIELAR1);
+		__raw_writel(0, chan->reg_base + SH4A_PCIELAMR1);
+	}
+
+	/*
+	 * LAR0/LAMR0 covers up to the first 512MB, which is enough to
+	 * cover all of lowmem on most platforms.
+	 */
+	__raw_writel(memphys, chan->reg_base + SH4A_PCIELAR0);
+	__raw_writel((memsize - SZ_256) | 1, chan->reg_base + SH4A_PCIELAMR0);
+
 	/* Finish initialization */
 	data = pci_read_reg(chan, SH4A_PCIETCTLR);
 	data |= 0x1;
@@ -267,10 +305,14 @@
 	if (unlikely(ret != 0))
 		return -ENODEV;
 
-	pci_write_reg(chan, 0x00100007, SH4A_PCIEPCICONF1);
+	data = pci_read_reg(chan, SH4A_PCIEPCICONF1);
+	data &= ~(PCI_STATUS_DEVSEL_MASK << 16);
+	data |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
+		(PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_FAST) << 16;
+	pci_write_reg(chan, data, SH4A_PCIEPCICONF1);
+
 	pci_write_reg(chan, 0x80888000, SH4A_PCIETXVC0DCTLR);
 	pci_write_reg(chan, 0x00222000, SH4A_PCIERXVC0DCTLR);
-	pci_write_reg(chan, 0x000050A0, SH4A_PCIEEXPCAP2);
 
 	wmb();
 
@@ -278,15 +320,32 @@
 	printk(KERN_NOTICE "PCI: PCIe#%d link width %d\n",
 	       port->index, (data >> 20) & 0x3f);
 
-	pci_write_reg(chan, 0x007c0000, SH4A_PCIEPAMR0);
-	pci_write_reg(chan, 0x00000000, SH4A_PCIEPARH0);
-	pci_write_reg(chan, 0x00000000, SH4A_PCIEPARL0);
-	pci_write_reg(chan, 0x80000100, SH4A_PCIEPTCTLR0);
 
-	pci_write_reg(chan, 0x03fc0000, SH4A_PCIEPAMR2);
-	pci_write_reg(chan, 0x00000000, SH4A_PCIEPARH2);
-	pci_write_reg(chan, 0x00000000, SH4A_PCIEPARL2);
-	pci_write_reg(chan, 0x80000000, SH4A_PCIEPTCTLR2);
+	for (i = 0; i < chan->nr_resources; i++) {
+		struct resource *res = chan->resources + i;
+		resource_size_t size;
+		u32 enable_mask;
+
+		pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(i));
+
+		size = resource_size(res);
+
+		/*
+		 * The PAMR mask is calculated in units of 256kB, which
+		 * keeps things pretty simple.
+		 */
+		__raw_writel(((roundup_pow_of_two(size) / SZ_256K) - 1) << 18,
+			     chan->reg_base + SH4A_PCIEPAMR(i));
+
+		pci_write_reg(chan, 0x00000000, SH4A_PCIEPARH(i));
+		pci_write_reg(chan, 0x00000000, SH4A_PCIEPARL(i));
+
+		enable_mask = MASK_PARE;
+		if (res->flags & IORESOURCE_IO)
+			enable_mask |= MASK_SPC;
+
+		pci_write_reg(chan, enable_mask, SH4A_PCIEPTCTLR(i));
+	}
 
 	return 0;
 }
diff --git a/arch/sh/drivers/pci/pcie-sh7786.h b/arch/sh/drivers/pci/pcie-sh7786.h
index 6666ea2..90a6992 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.h
+++ b/arch/sh/drivers/pci/pcie-sh7786.h
@@ -312,23 +312,23 @@
 #define	SH4A_PCIECSAR5		(0x0202B4)	/* R/W R/W 0x0000 0000 32 */
 #define	SH4A_PCIESTCTLR5	(0x0202B8)	/* R/W R/W 0x0000 0000 32 */
 
-/*	PCIEPARL0	*/
-#define	SH4A_PCIEPARL0		(0x020400)	/* R/W R/W 0x0000 0000 32 */
+/*	PCIEPARL	*/
+#define	SH4A_PCIEPARL(x)	(0x020400 + ((x) * 0x20)) /* R/W R/W 0x0000 0000 32 */
 #define		BITS_PAL	(18)
 #define		MASK_PAL	(0x3fff<<BITS_PAL)
 
-/*	PCIEPARH0	*/
-#define	SH4A_PCIEPARH0		(0x020404)	/* R/W R/W 0x0000 0000 32 */
+/*	PCIEPARH	*/
+#define	SH4A_PCIEPARH(x)	(0x020404 + ((x) * 0x20)) /* R/W R/W 0x0000 0000 32 */
 #define		BITS_PAH	(0)
 #define		MASK_PAH	(0xffffffff<<BITS_PAH)
 
-/*	PCIEPAMR0	 */
-#define	SH4A_PCIEPAMR0		(0x020408)	/* R/W R/W 0x0000 0000 32 */
+/*	PCIEPAMR	 */
+#define	SH4A_PCIEPAMR(x)	(0x020408 + ((x) * 0x20)) /* R/W R/W 0x0000 0000 32 */
 #define		BITS_PAM	(18)
 #define		MASK_PAM	(0x3fff<<BITS_PAM)
 
-/*	PCIEPTCTLR0	*/
-#define	SH4A_PCIEPTCTLR0	(0x02040C)	/* R/W R/W 0x0000 0000 32 */
+/*	PCIEPTCTLR	*/
+#define SH4A_PCIEPTCTLR(x)	(0x02040C + ((x) * 0x20))
 #define		BITS_PARE	(31)
 #define		MASK_PARE	(0x1<<BITS_PARE)
 #define		BITS_TC		(20)
@@ -340,26 +340,6 @@
 #define		BITS_SPC	(8)
 #define		MASK_SPC	(0x1<<BITS_SPC)
 
-#define	SH4A_PCIEPARL1		(0x020420)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARH1		(0x020424)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPAMR1		(0x020428)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPTCTLR1	(0x02042C)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARL2		(0x020440)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARH2		(0x020444)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPAMR2		(0x020448)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPTCTLR2	(0x02044C)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARL3		(0x020460)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARH3		(0x020464)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPAMR3		(0x020468)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPTCTLR3	(0x02046C)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARL4		(0x020480)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARH4		(0x020484)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPAMR4		(0x020488)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPTCTLR4	(0x02048C)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARL5		(0x0204A0)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPARH5		(0x0204A4)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPAMR5		(0x0204A8)	/* R/W R/W 0x0000 0000 32 */
-#define	SH4A_PCIEPTCTLR5	(0x0204AC)	/* R/W R/W 0x0000 0000 32 */
 #define	SH4A_PCIEDMAOR		(0x021000)	/* R/W R/W 0x0000 0000 32 */
 #define	SH4A_PCIEDMSAR0		(0x021100)	/* R/W R/W 0x0000 0000 32 */
 #define	SH4A_PCIEDMSAHR0	(0x021104)	/* R/W R/W 0x0000 0000 32 */