drm/radeon: enable additional power gating features on trinity

TN has some additional powergating features beyond what is
supported on ON/LN.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index c6bbf62..10ccd87 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4035,10 +4035,15 @@
 
 static void evergreen_rlc_start(struct radeon_device *rdev)
 {
-	if (rdev->flags & RADEON_IS_IGP)
-		WREG32(RLC_CNTL, RLC_ENABLE | GFX_POWER_GATING_ENABLE | GFX_POWER_GATING_SRC);
-	else
-		WREG32(RLC_CNTL, RLC_ENABLE);
+	u32 mask = RLC_ENABLE;
+
+	if (rdev->flags & RADEON_IS_IGP) {
+		mask |= GFX_POWER_GATING_ENABLE | GFX_POWER_GATING_SRC;
+		if (rdev->family == CHIP_ARUBA)
+			mask |= DYN_PER_SIMD_PG_ENABLE | LB_CNT_SPIM_ACTIVE | LOAD_BALANCE_ENABLE;
+	}
+
+	WREG32(RLC_CNTL, mask);
 }
 
 int evergreen_rlc_resume(struct radeon_device *rdev)
@@ -4054,15 +4059,33 @@
 	WREG32(RLC_HB_CNTL, 0);
 
 	if (rdev->flags & RADEON_IS_IGP) {
+		if (rdev->family == CHIP_ARUBA) {
+			u32 always_on_bitmap =
+				3 | (3 << (16 * rdev->config.cayman.max_shader_engines));
+			/* find out the number of active simds */
+			u32 tmp = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+			tmp |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
+			tmp = hweight32(~tmp);
+			if (tmp == rdev->config.cayman.max_simds_per_se) {
+				WREG32(TN_RLC_LB_ALWAYS_ACTIVE_SIMD_MASK, always_on_bitmap);
+				WREG32(TN_RLC_LB_PARAMS, 0x00601004);
+				WREG32(TN_RLC_LB_INIT_SIMD_MASK, 0xffffffff);
+				WREG32(TN_RLC_LB_CNTR_INIT, 0x00000000);
+				WREG32(TN_RLC_LB_CNTR_MAX, 0x00002000);
+			}
+		} else {
+			WREG32(RLC_HB_WPTR_LSB_ADDR, 0);
+			WREG32(RLC_HB_WPTR_MSB_ADDR, 0);
+		}
 		WREG32(TN_RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
 		WREG32(TN_RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
 	} else {
 		WREG32(RLC_HB_BASE, 0);
 		WREG32(RLC_HB_RPTR, 0);
 		WREG32(RLC_HB_WPTR, 0);
+		WREG32(RLC_HB_WPTR_LSB_ADDR, 0);
+		WREG32(RLC_HB_WPTR_MSB_ADDR, 0);
 	}
-	WREG32(RLC_HB_WPTR_LSB_ADDR, 0);
-	WREG32(RLC_HB_WPTR_MSB_ADDR, 0);
 	WREG32(RLC_MC_CNTL, 0);
 	WREG32(RLC_UCODE_CNTL, 0);
 
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index c0df1ca..a7baf67 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -381,6 +381,10 @@
 #       define RLC_ENABLE                               (1 << 0)
 #       define GFX_POWER_GATING_ENABLE                  (1 << 7)
 #       define GFX_POWER_GATING_SRC                     (1 << 8)
+#       define DYN_PER_SIMD_PG_ENABLE                   (1 << 27)
+#       define LB_CNT_SPIM_ACTIVE                       (1 << 30)
+#       define LOAD_BALANCE_ENABLE                      (1 << 31)
+
 #define RLC_HB_BASE                                       0x3f10
 #define RLC_HB_CNTL                                       0x3f0c
 #define RLC_HB_RPTR                                       0x3f20
@@ -394,7 +398,12 @@
 
 /* new for TN */
 #define TN_RLC_SAVE_AND_RESTORE_BASE                      0x3f10
+#define TN_RLC_LB_CNTR_MAX                                0x3f14
+#define TN_RLC_LB_CNTR_INIT                               0x3f18
 #define TN_RLC_CLEAR_STATE_RESTORE_BASE                   0x3f20
+#define TN_RLC_LB_INIT_SIMD_MASK                          0x3fe4
+#define TN_RLC_LB_ALWAYS_ACTIVE_SIMD_MASK                 0x3fe8
+#define TN_RLC_LB_PARAMS                                  0x3fec
 
 #define GRBM_GFX_INDEX          			0x802C
 #define		INSTANCE_INDEX(x)			((x) << 0)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index cafc3bd..f30127c 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1176,6 +1176,16 @@
 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
 
 	udelay(50);
+
+	/* set clockgating golden values on TN */
+	if (rdev->family == CHIP_ARUBA) {
+		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
+		tmp &= ~0x00380000;
+		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
+                tmp = RREG32_CG(CG_CGTT_LOCAL_1);
+		tmp &= ~0x0e000000;
+		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
+	}
 }
 
 /*
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 1775043..95693c7 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -665,6 +665,12 @@
 #define		TID_UNIT(x)				((x) << 14)
 #define		TID_UNIT_MASK				(0xf << 14)
 
+#define	CG_IND_ADDR					0x8f8
+#define	CG_IND_DATA					0x8fc
+/* CGIND regs */
+#define	CG_CGTT_LOCAL_0					0x00
+#define	CG_CGTT_LOCAL_1					0x01
+
 #define MC_CG_CONFIG                                    0x25bc
 #define         MCDW_WR_ENABLE                          (1 << 0)
 #define         MCDX_WR_ENABLE                          (1 << 1)