drm/i915: fix Haswell FDI link training code

This commit makes hsw_fdi_link_train responsible for implementing
everything described in the "Enable and train FDI" section from the
Hawell CRT mode set sequence documentation. We completely rewrite
hsw_fdi_link_train to match the documentation and we also call it in
the right place.

This patch was initially sent as a series of tiny patches fixing every
little problem of the function, but since there were too many patches
fixing the same function it got a little difficult to get the "big
picture" of how the function would be in the end, so here we amended
all the patches into a single big patch fixing the whole function.

Problems we fixed:

  1 - Train Haswell FDI at the right time.

    We need to train the FDI before enabling the pipes and planes, so
    we're moving the call from lpt_pch_enable to haswell_crtc_enable
    directly.

    We are also removing ironlake_fdi_pll_enable since the PLL
    enablement on Haswell is completely different and is also done
    during the link training steps.

  2 - Use the right FDI_RX_CTL register on Haswell

    There is only one PCH transcoder, so it's always _FDI_RXA_CTL.
    Using "pipe" here is wrong.

  3 - Don't rely on DDI_BUF_CTL previous values

    Just set the bits we want, everything else is zero. Also
    POSTING_READ the register before sleeping.

  4 - Program the FDI RX TUSIZE register on hsw_fdi_link_train

    According to the mode set sequence documentation, this is the
    right place. According to the FDI_RX_TUSIZE register description,
    this is the value we should set.

    Also remove the code that sets this register from the old
    location: lpt_pch_enable.

  5 - Properly program FDI_RX_MISC pwrdn lane values on HSW

  6 - Wait only 35us for the FDI link training

    First we wait 30us for the FDI receiver lane calibration, then we
    wait 5us for the FDI auto training time.

  7 - Remove an useless indentation level on hsw_fdi_link_train

    We already "break" when the link training succeeds.

  8 - Disable FDI_RX_ENABLE, not FDI_RX_PLL_ENABLE

    When we fail the training.

  9 - Change Haswell FDI link training error messages

    We shouldn't call DRM_ERROR when still looping through voltage
    levels since this is expected and not really a failure. So in this
    commit we adjust the error path to only DRM_ERROR when we really
    fail after trying everything.

    While at it, replace DRM_DEBUG_DRIVER with DRM_DEBUG_KMS since
    it's what we use everywhere.

  10 - Try each voltage twice at hsw_fdi_link_train

    Now with Daniel Vetter's suggestion to use "/2" instead of ">>1".

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
[danvet: Applied tiny bikesheds:
- mention in comment that we test each voltage/emphasis level twice
- realing arguments of the only untouched reg write, it spilled over
  the 80 char limit ...]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3674891..5e820fa 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3936,16 +3936,21 @@
 #define  FDI_PORT_WIDTH_2X_LPT			(1<<19)
 #define  FDI_PORT_WIDTH_1X_LPT			(0<<19)
 
-#define _FDI_RXA_MISC            0xf0010
-#define _FDI_RXB_MISC            0xf1010
+#define _FDI_RXA_MISC			0xf0010
+#define _FDI_RXB_MISC			0xf1010
+#define  FDI_RX_PWRDN_LANE1_MASK	(3<<26)
+#define  FDI_RX_PWRDN_LANE1_VAL(x)	((x)<<26)
+#define  FDI_RX_PWRDN_LANE0_MASK	(3<<24)
+#define  FDI_RX_PWRDN_LANE0_VAL(x)	((x)<<24)
+#define  FDI_RX_TP1_TO_TP2_48		(2<<20)
+#define  FDI_RX_TP1_TO_TP2_64		(3<<20)
+#define  FDI_RX_FDI_DELAY_90		(0x90<<0)
+#define FDI_RX_MISC(pipe) _PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
+
 #define _FDI_RXA_TUSIZE1         0xf0030
 #define _FDI_RXA_TUSIZE2         0xf0038
 #define _FDI_RXB_TUSIZE1         0xf1030
 #define _FDI_RXB_TUSIZE2         0xf1038
-#define  FDI_RX_TP1_TO_TP2_48	(2<<20)
-#define  FDI_RX_TP1_TO_TP2_64	(3<<20)
-#define  FDI_RX_FDI_DELAY_90	(0x90<<0)
-#define FDI_RX_MISC(pipe) _PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
 #define FDI_RX_TUSIZE1(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE1, _FDI_RXB_TUSIZE1)
 #define FDI_RX_TUSIZE2(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE2, _FDI_RXB_TUSIZE2)
 
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index a7a555f..0095839 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -153,11 +153,34 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i;
+	u32 temp, i, rx_ctl_val;
 
-	/* Start the training iterating through available voltages and emphasis */
-	for (i=0; i < ARRAY_SIZE(hsw_ddi_buf_ctl_values); i++) {
+	/* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the
+	 * mode set "sequence for CRT port" document:
+	 * - TP1 to TP2 time with the default value
+	 * - FDI delay to 90h
+	 */
+	I915_WRITE(_FDI_RXA_MISC, FDI_RX_PWRDN_LANE1_VAL(2) |
+				  FDI_RX_PWRDN_LANE0_VAL(2) |
+				  FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
+
+	/* Enable the PCH Receiver FDI PLL */
+	rx_ctl_val = FDI_RX_PLL_ENABLE | FDI_RX_ENHANCE_FRAME_ENABLE |
+		     ((intel_crtc->fdi_lanes - 1) << 19);
+	I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+	POSTING_READ(_FDI_RXA_CTL);
+	udelay(220);
+
+	/* Switch from Rawclk to PCDclk */
+	rx_ctl_val |= FDI_PCDCLK;
+	I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+
+	/* Configure Port Clock Select */
+	I915_WRITE(PORT_CLK_SEL(PORT_E), intel_crtc->ddi_pll_sel);
+
+	/* Start the training iterating through available voltages and emphasis,
+	 * testing each value twice. */
+	for (i = 0; i < ARRAY_SIZE(hsw_ddi_buf_ctl_values) * 2; i++) {
 		/* Configure DP_TP_CTL with auto-training */
 		I915_WRITE(DP_TP_CTL(PORT_E),
 					DP_TP_CTL_FDI_AUTOTRAIN |
@@ -166,65 +189,63 @@
 					DP_TP_CTL_ENABLE);
 
 		/* Configure and enable DDI_BUF_CTL for DDI E with next voltage */
-		temp = I915_READ(DDI_BUF_CTL(PORT_E));
-		temp = (temp & ~DDI_BUF_EMP_MASK);
 		I915_WRITE(DDI_BUF_CTL(PORT_E),
-				temp |
-				DDI_BUF_CTL_ENABLE |
-				((intel_crtc->fdi_lanes - 1) << 1) |
-				hsw_ddi_buf_ctl_values[i]);
+			   DDI_BUF_CTL_ENABLE |
+			   ((intel_crtc->fdi_lanes - 1) << 1) |
+			   hsw_ddi_buf_ctl_values[i / 2]);
+		POSTING_READ(DDI_BUF_CTL(PORT_E));
 
 		udelay(600);
 
-		/* We need to program FDI_RX_MISC with the default TP1 to TP2
-		 * values before enabling the receiver, and configure the delay
-		 * for the FDI timing generator to 90h. Luckily, all the other
-		 * bits are supposed to be zeroed, so we can write those values
-		 * directly.
-		 */
-		I915_WRITE(FDI_RX_MISC(pipe), FDI_RX_TP1_TO_TP2_48 |
-				FDI_RX_FDI_DELAY_90);
+		/* Program PCH FDI Receiver TU */
+		I915_WRITE(_FDI_RXA_TUSIZE1, TU_SIZE(64));
 
-		/* Enable CPU FDI Receiver with auto-training */
-		reg = FDI_RX_CTL(pipe);
-		I915_WRITE(reg,
-				I915_READ(reg) |
-					FDI_LINK_TRAIN_AUTO |
-					FDI_RX_ENABLE |
-					FDI_LINK_TRAIN_PATTERN_1_CPT |
-					FDI_RX_ENHANCE_FRAME_ENABLE |
-					((intel_crtc->fdi_lanes - 1) << 19) |
-					FDI_RX_PLL_ENABLE);
-		POSTING_READ(reg);
-		udelay(100);
+		/* Enable PCH FDI Receiver with auto-training */
+		rx_ctl_val |= FDI_RX_ENABLE | FDI_LINK_TRAIN_AUTO;
+		I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+		POSTING_READ(_FDI_RXA_CTL);
+
+		/* Wait for FDI receiver lane calibration */
+		udelay(30);
+
+		/* Unset FDI_RX_MISC pwrdn lanes */
+		temp = I915_READ(_FDI_RXA_MISC);
+		temp &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK);
+		I915_WRITE(_FDI_RXA_MISC, temp);
+		POSTING_READ(_FDI_RXA_MISC);
+
+		/* Wait for FDI auto training time */
+		udelay(5);
 
 		temp = I915_READ(DP_TP_STATUS(PORT_E));
 		if (temp & DP_TP_STATUS_AUTOTRAIN_DONE) {
-			DRM_DEBUG_DRIVER("BUF_CTL training done on %d step\n", i);
+			DRM_DEBUG_KMS("FDI link training done on step %d\n", i);
 
 			/* Enable normal pixel sending for FDI */
 			I915_WRITE(DP_TP_CTL(PORT_E),
-						DP_TP_CTL_FDI_AUTOTRAIN |
-						DP_TP_CTL_LINK_TRAIN_NORMAL |
-						DP_TP_CTL_ENHANCED_FRAME_ENABLE |
-						DP_TP_CTL_ENABLE);
+				   DP_TP_CTL_FDI_AUTOTRAIN |
+				   DP_TP_CTL_LINK_TRAIN_NORMAL |
+				   DP_TP_CTL_ENHANCED_FRAME_ENABLE |
+				   DP_TP_CTL_ENABLE);
 
-			break;
-		} else {
-			DRM_ERROR("Error training BUF_CTL %d\n", i);
-
-			/* Disable DP_TP_CTL and FDI_RX_CTL) and retry */
-			I915_WRITE(DP_TP_CTL(PORT_E),
-					I915_READ(DP_TP_CTL(PORT_E)) &
-						~DP_TP_CTL_ENABLE);
-			I915_WRITE(FDI_RX_CTL(pipe),
-					I915_READ(FDI_RX_CTL(pipe)) &
-						~FDI_RX_PLL_ENABLE);
-			continue;
+			return;
 		}
+
+		/* Disable DP_TP_CTL and FDI_RX_CTL and retry */
+		I915_WRITE(DP_TP_CTL(PORT_E),
+			   I915_READ(DP_TP_CTL(PORT_E)) & ~DP_TP_CTL_ENABLE);
+
+		rx_ctl_val &= ~FDI_RX_ENABLE;
+		I915_WRITE(_FDI_RXA_CTL, rx_ctl_val);
+
+		/* Reset FDI_RX_MISC pwrdn lanes */
+		temp = I915_READ(_FDI_RXA_MISC);
+		temp &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK);
+		temp |= FDI_RX_PWRDN_LANE1_VAL(2) | FDI_RX_PWRDN_LANE0_VAL(2);
+		I915_WRITE(_FDI_RXA_MISC, temp);
 	}
 
-	DRM_DEBUG_KMS("FDI train done.\n");
+	DRM_ERROR("FDI link training failed!\n");
 }
 
 /* WRPLL clock dividers */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 50215e4..417fb4a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3224,19 +3224,10 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
 	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
 
 	assert_transcoder_disabled(dev_priv, TRANSCODER_A);
 
-	/* Write the TU size bits before fdi link training, so that error
-	 * detection works. */
-	I915_WRITE(FDI_RX_TUSIZE1(pipe),
-		   I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK);
-
-	/* For PCH output, training FDI link */
-	dev_priv->display.fdi_link_train(crtc);
-
 	lpt_program_iclkip(crtc);
 
 	/* Set transcoder timing. */
@@ -3463,7 +3454,7 @@
 	is_pch_port = haswell_crtc_driving_pch(crtc);
 
 	if (is_pch_port)
-		ironlake_fdi_pll_enable(intel_crtc);
+		dev_priv->display.fdi_link_train(crtc);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)