Merge branch 'drm-radeon-sitn-support' of git://people.freedesktop.org/~airlied/linux

Pull radeon southern islands / trinity support from Dave Airlie:
 "This is support from AMD for their newest GPU and APUs.  The products
  called RadeonHD 7xxx, and the Trinity APU series.

  This did come in a bit late, due to some over-complicated AMD internal
  review process, which from the outside seems unnecessary once the
  company has decided it wants to support open source.  However as I
  said previously I'd rather not put the people who've got this hw for 3
  months now being forced to use fglrx on it if there is open code.

  Its pretty well self contained and just plugs into the driver in
  various places."

* 'drm-radeon-sitn-support' of git://people.freedesktop.org/~airlied/linux: (48 commits)
  drm/radeon/kms: update duallink checks for DCE6
  drm/radeon/kms: add trinity pci ids
  drm/radeon/kms: add radeon_asic struct for trinity
  drm/radeon/kms: add support for ucode loading on trinity (v2)
  drm/radeon/kms/vm: set vram base offset properly for TN
  drm/radeon/kms: Update evergreen functions for trinity
  drm/radeon/kms: cayman gpu init updates for trinity
  drm/radeon/kms: Add checks for TN in the DP bridge code
  drm/radeon/kms/DCE6.1: ss is not supported on the internal pplls
  drm/radeon/kms: disable PPLL0 on DCE6.1 when not in use
  drm/radeon/kms: Adjust pll picker for DCE6.1
  drm/radeon/kms: DCE6.1 disp eng pll updates
  drm/radeon/kms: DCE6.1 watermark updates for TN
  drm/radeon/kms: no support for internal thermal sensor on TN yet
  drm/radeon/kms: add trinity (TN) chip family
  drm/radeon/kms: Add SI pci ids
  drm/radeon: Update radeon_info_ioctl for SI. (v2)
  drm/radeon/kms: add radeon_asic struct for SI
  drm/radeon/kms: add support for compute rings in CS ioctl on SI
  drm/radeon/kms: fill in startup/shutdown callbacks for SI
  ...
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 8410415..9d83729 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -71,7 +71,7 @@
 	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
 	radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o \
-	radeon_semaphore.o radeon_sa.o atombios_i2c.o
+	radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o si_blit_shaders.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
diff --git a/drivers/gpu/drm/radeon/ObjectID.h b/drivers/gpu/drm/radeon/ObjectID.h
index c61c3fe..ca4b038 100644
--- a/drivers/gpu/drm/radeon/ObjectID.h
+++ b/drivers/gpu/drm/radeon/ObjectID.h
@@ -85,6 +85,7 @@
 #define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA   0x1F
 #define ENCODER_OBJECT_ID_INTERNAL_UNIPHY1        0x20
 #define ENCODER_OBJECT_ID_INTERNAL_UNIPHY2        0x21
+#define ENCODER_OBJECT_ID_INTERNAL_VCE            0x24
 
 #define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO    0xFF
 
@@ -387,6 +388,10 @@
                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
                                                   ENCODER_OBJECT_ID_NUTMEG << OBJECT_ID_SHIFT)
 
+#define ENCODER_VCE_ENUM_ID1                     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_INTERNAL_VCE << OBJECT_ID_SHIFT)
+
 /****************************************************/
 /* Connector Object ID definition - Shared with BIOS */
 /****************************************************/
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 1b50ad8..4b04ba3 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -101,6 +101,7 @@
 #define ATOM_LCD_SELFTEST_START									(ATOM_DISABLE+5)
 #define ATOM_LCD_SELFTEST_STOP									(ATOM_ENABLE+5)
 #define ATOM_ENCODER_INIT			                  (ATOM_DISABLE+7)
+#define ATOM_INIT			                          (ATOM_DISABLE+7)
 #define ATOM_GET_STATUS                         (ATOM_DISABLE+8)
 
 #define ATOM_BLANKING         1
@@ -251,25 +252,25 @@
   USHORT SetEngineClock;                         //Function Table,directly used by various SW components,latest version 1.1
   USHORT SetMemoryClock;                         //Function Table,directly used by various SW components,latest version 1.1
   USHORT SetPixelClock;                          //Function Table,directly used by various SW components,latest version 1.2  
-  USHORT DynamicClockGating;                     //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+  USHORT EnableDispPowerGating;                  //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
   USHORT ResetMemoryDLL;                         //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
   USHORT ResetMemoryDevice;                      //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
-  USHORT MemoryPLLInit;
-  USHORT AdjustDisplayPll;												//only used by Bios
+  USHORT MemoryPLLInit;                          //Atomic Table,  used only by Bios
+  USHORT AdjustDisplayPll;											 //Atomic Table,  used by various SW componentes. 
   USHORT AdjustMemoryController;                 //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock                
   USHORT EnableASIC_StaticPwrMgt;                //Atomic Table,  only used by Bios
   USHORT ASIC_StaticPwrMgtStatusChange;          //Obsolete ,     only used by Bios   
   USHORT DAC_LoadDetection;                      //Atomic Table,  directly used by various SW components,latest version 1.2  
   USHORT LVTMAEncoderControl;                    //Atomic Table,directly used by various SW components,latest version 1.3
-  USHORT LCD1OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT HW_Misc_Operation;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
   USHORT DAC1EncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1  
   USHORT DAC2EncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1 
   USHORT DVOOutputControl;                       //Atomic Table,  directly used by various SW components,latest version 1.1 
   USHORT CV1OutputControl;                       //Atomic Table,  Atomic Table,  Obsolete from Ry6xx, use DAC2 Output instead 
-  USHORT GetConditionalGoldenSetting;            //only used by Bios
+  USHORT GetConditionalGoldenSetting;            //Only used by Bios
   USHORT TVEncoderControl;                       //Function Table,directly used by various SW components,latest version 1.1
-  USHORT TMDSAEncoderControl;                    //Atomic Table,  directly used by various SW components,latest version 1.3
-  USHORT LVDSEncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.3
+  USHORT PatchMCSetting;                         //only used by BIOS
+  USHORT MC_SEQ_Control;                         //only used by BIOS
   USHORT TV1OutputControl;                       //Atomic Table,  Obsolete from Ry6xx, use DAC2 Output instead
   USHORT EnableScaler;                           //Atomic Table,  used only by Bios
   USHORT BlankCRTC;                              //Atomic Table,  directly used by various SW components,latest version 1.1 
@@ -282,7 +283,7 @@
   USHORT SetCRTC_Replication;                    //Atomic Table,  used only by Bios
   USHORT SelectCRTC_Source;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
   USHORT EnableGraphSurfaces;                    //Atomic Table,  used only by Bios
-  USHORT UpdateCRTC_DoubleBufferRegisters;
+  USHORT UpdateCRTC_DoubleBufferRegisters;			 //Atomic Table,  used only by Bios
   USHORT LUT_AutoFill;                           //Atomic Table,  only used by Bios
   USHORT EnableHW_IconCursor;                    //Atomic Table,  only used by Bios
   USHORT GetMemoryClock;                         //Atomic Table,  directly used by various SW components,latest version 1.1 
@@ -308,27 +309,36 @@
   USHORT SetVoltage;                             //Function Table,directly and/or indirectly used by various SW components,latest version 1.1
   USHORT DAC1OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1
   USHORT DAC2OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1
-  USHORT SetupHWAssistedI2CStatus;               //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C"
+  USHORT ComputeMemoryClockParam;                //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C"
   USHORT ClockSource;                            //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
   USHORT MemoryDeviceInit;                       //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
-  USHORT EnableYUV;                              //Atomic Table,  indirectly used by various SW components,called from EnableVGARender
+  USHORT GetDispObjectInfo;                      //Atomic Table,  indirectly used by various SW components,called from EnableVGARender
   USHORT DIG1EncoderControl;                     //Atomic Table,directly used by various SW components,latest version 1.1
   USHORT DIG2EncoderControl;                     //Atomic Table,directly used by various SW components,latest version 1.1
   USHORT DIG1TransmitterControl;                 //Atomic Table,directly used by various SW components,latest version 1.1
   USHORT DIG2TransmitterControl;	               //Atomic Table,directly used by various SW components,latest version 1.1 
   USHORT ProcessAuxChannelTransaction;					 //Function Table,only used by Bios
   USHORT DPEncoderService;											 //Function Table,only used by Bios
+  USHORT GetVoltageInfo;                         //Function Table,only used by Bios since SI
 }ATOM_MASTER_LIST_OF_COMMAND_TABLES;   
 
 // For backward compatible 
 #define ReadEDIDFromHWAssistedI2C                ProcessI2cChannelTransaction
-#define UNIPHYTransmitterControl						     DIG1TransmitterControl
-#define LVTMATransmitterControl							     DIG2TransmitterControl
+#define DPTranslatorControl                      DIG2EncoderControl
+#define UNIPHYTransmitterControl			     DIG1TransmitterControl
+#define LVTMATransmitterControl				     DIG2TransmitterControl
 #define SetCRTC_DPM_State                        GetConditionalGoldenSetting
 #define SetUniphyInstance                        ASIC_StaticPwrMgtStatusChange
 #define HPDInterruptService                      ReadHWAssistedI2CStatus
 #define EnableVGA_Access                         GetSCLKOverMCLKRatio
-#define GetDispObjectInfo                        EnableYUV 
+#define EnableYUV                                GetDispObjectInfo                         
+#define DynamicClockGating                       EnableDispPowerGating
+#define SetupHWAssistedI2CStatus                 ComputeMemoryClockParam
+
+#define TMDSAEncoderControl                      PatchMCSetting
+#define LVDSEncoderControl                       MC_SEQ_Control
+#define LCD1OutputControl                        HW_Misc_Operation
+
 
 typedef struct _ATOM_MASTER_COMMAND_TABLE
 {
@@ -495,6 +505,34 @@
 // ucInputFlag
 #define ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN  1   // 1-StrobeMode, 0-PerformanceMode
 
+// use for ComputeMemoryClockParamTable
+typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1
+{
+  union
+  {
+    ULONG  ulClock;         
+    ATOM_S_MPLL_FB_DIVIDER   ulFbDiv;         //Output:UPPER_WORD=FB_DIV_INTEGER,  LOWER_WORD=FB_DIV_FRAC shl (16-FB_FRACTION_BITS)
+  };
+  UCHAR   ucDllSpeed;                         //Output 
+  UCHAR   ucPostDiv;                          //Output
+  union{
+    UCHAR   ucInputFlag;                      //Input : ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN: 1-StrobeMode, 0-PerformanceMode
+    UCHAR   ucPllCntlFlag;                    //Output: 
+  };
+  UCHAR   ucBWCntl;                       
+}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1;
+
+// definition of ucInputFlag
+#define MPLL_INPUT_FLAG_STROBE_MODE_EN          0x01
+// definition of ucPllCntlFlag
+#define MPLL_CNTL_FLAG_VCO_MODE_MASK            0x03 
+#define MPLL_CNTL_FLAG_BYPASS_DQ_PLL            0x04
+#define MPLL_CNTL_FLAG_QDR_ENABLE               0x08
+#define MPLL_CNTL_FLAG_AD_HALF_RATE             0x10
+
+//MPLL_CNTL_FLAG_BYPASS_AD_PLL has a wrong name, should be BYPASS_DQ_PLL
+#define MPLL_CNTL_FLAG_BYPASS_AD_PLL            0x04
+
 typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER
 {
   ATOM_COMPUTE_CLOCK_FREQ ulClock;
@@ -562,6 +600,16 @@
 #define  DYNAMIC_CLOCK_GATING_PS_ALLOCATION  DYNAMIC_CLOCK_GATING_PARAMETERS
 
 /****************************************************************************/	
+// Structure used by EnableDispPowerGatingTable.ctb
+/****************************************************************************/	
+typedef struct _ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 
+{
+  UCHAR ucDispPipeId;                 // ATOM_CRTC1, ATOM_CRTC2, ...
+  UCHAR ucEnable;                     // ATOM_ENABLE or ATOM_DISABLE
+  UCHAR ucPadding[2];
+}ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1;
+
+/****************************************************************************/	
 // Structure used by EnableASIC_StaticPwrMgtTable.ctb
 /****************************************************************************/	
 typedef struct _ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS
@@ -807,6 +855,7 @@
 #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_1_62GHZ		  0x00
 #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_2_70GHZ		  0x01
 #define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_5_40GHZ		  0x02
+#define ATOM_ENCODER_CONFIG_V4_DPLINKRATE_3_24GHZ		  0x03
 #define ATOM_ENCODER_CONFIG_V4_ENCODER_SEL					  0x70
 #define ATOM_ENCODER_CONFIG_V4_DIG0_ENCODER					  0x00
 #define ATOM_ENCODER_CONFIG_V4_DIG1_ENCODER					  0x10
@@ -814,6 +863,7 @@
 #define ATOM_ENCODER_CONFIG_V4_DIG3_ENCODER					  0x30
 #define ATOM_ENCODER_CONFIG_V4_DIG4_ENCODER					  0x40
 #define ATOM_ENCODER_CONFIG_V4_DIG5_ENCODER					  0x50
+#define ATOM_ENCODER_CONFIG_V4_DIG6_ENCODER					  0x60
 
 typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4
 {
@@ -1171,6 +1221,106 @@
 #define ATOM_TRANSMITTER_CONFIG_V4_TRANSMITTER3           	0x80	//EF
 
 
+typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V5
+{
+#if ATOM_BIG_ENDIAN
+  UCHAR ucReservd1:1;
+  UCHAR ucHPDSel:3;
+  UCHAR ucPhyClkSrcId:2;            
+  UCHAR ucCoherentMode:1;            
+  UCHAR ucReserved:1;
+#else
+  UCHAR ucReserved:1;
+  UCHAR ucCoherentMode:1;            
+  UCHAR ucPhyClkSrcId:2;            
+  UCHAR ucHPDSel:3;
+  UCHAR ucReservd1:1;
+#endif
+}ATOM_DIG_TRANSMITTER_CONFIG_V5;
+
+typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5
+{
+  USHORT usSymClock;		        // Encoder Clock in 10kHz,(DP mode)= linkclock/10, (TMDS/LVDS/HDMI)= pixel clock,  (HDMI deep color), =pixel clock * deep_color_ratio
+  UCHAR  ucPhyId;                   // 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4= UNIPHYE 5=UNIPHYF
+  UCHAR  ucAction;				    // define as ATOM_TRANSMITER_ACTION_xxx
+  UCHAR  ucLaneNum;                 // indicate lane number 1-8
+  UCHAR  ucConnObjId;               // Connector Object Id defined in ObjectId.h
+  UCHAR  ucDigMode;                 // indicate DIG mode
+  union{
+  ATOM_DIG_TRANSMITTER_CONFIG_V5 asConfig;
+  UCHAR ucConfig;
+  };
+  UCHAR  ucDigEncoderSel;           // indicate DIG front end encoder 
+  UCHAR  ucDPLaneSet;
+  UCHAR  ucReserved;
+  UCHAR  ucReserved1;
+}DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5;
+
+//ucPhyId
+#define ATOM_PHY_ID_UNIPHYA                                 0  
+#define ATOM_PHY_ID_UNIPHYB                                 1
+#define ATOM_PHY_ID_UNIPHYC                                 2
+#define ATOM_PHY_ID_UNIPHYD                                 3
+#define ATOM_PHY_ID_UNIPHYE                                 4
+#define ATOM_PHY_ID_UNIPHYF                                 5
+#define ATOM_PHY_ID_UNIPHYG                                 6
+
+// ucDigEncoderSel
+#define ATOM_TRANMSITTER_V5__DIGA_SEL                       0x01
+#define ATOM_TRANMSITTER_V5__DIGB_SEL                       0x02
+#define ATOM_TRANMSITTER_V5__DIGC_SEL                       0x04
+#define ATOM_TRANMSITTER_V5__DIGD_SEL                       0x08
+#define ATOM_TRANMSITTER_V5__DIGE_SEL                       0x10
+#define ATOM_TRANMSITTER_V5__DIGF_SEL                       0x20
+#define ATOM_TRANMSITTER_V5__DIGG_SEL                       0x40
+
+// ucDigMode
+#define ATOM_TRANSMITTER_DIGMODE_V5_DP                      0
+#define ATOM_TRANSMITTER_DIGMODE_V5_LVDS                    1
+#define ATOM_TRANSMITTER_DIGMODE_V5_DVI                     2
+#define ATOM_TRANSMITTER_DIGMODE_V5_HDMI                    3
+#define ATOM_TRANSMITTER_DIGMODE_V5_SDVO                    4
+#define ATOM_TRANSMITTER_DIGMODE_V5_DP_MST                  5
+
+// ucDPLaneSet
+#define DP_LANE_SET__0DB_0_4V                               0x00
+#define DP_LANE_SET__0DB_0_6V                               0x01
+#define DP_LANE_SET__0DB_0_8V                               0x02
+#define DP_LANE_SET__0DB_1_2V                               0x03
+#define DP_LANE_SET__3_5DB_0_4V                             0x08  
+#define DP_LANE_SET__3_5DB_0_6V                             0x09
+#define DP_LANE_SET__3_5DB_0_8V                             0x0a
+#define DP_LANE_SET__6DB_0_4V                               0x10
+#define DP_LANE_SET__6DB_0_6V                               0x11
+#define DP_LANE_SET__9_5DB_0_4V                             0x18  
+
+// ATOM_DIG_TRANSMITTER_CONFIG_V5 asConfig;
+// Bit1
+#define ATOM_TRANSMITTER_CONFIG_V5_COHERENT				          0x02
+
+// Bit3:2
+#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SEL_MASK 	        0x0c
+#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SEL_SHIFT		    0x02
+
+#define ATOM_TRANSMITTER_CONFIG_V5_P1PLL         		        0x00
+#define ATOM_TRANSMITTER_CONFIG_V5_P2PLL		                0x04
+#define ATOM_TRANSMITTER_CONFIG_V5_P0PLL		                0x08   
+#define ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT           0x0c
+// Bit6:4
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD_SEL_MASK		          0x70
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD_SEL_SHIFT		      0x04
+
+#define ATOM_TRANSMITTER_CONFIG_V5_NO_HPD_SEL				        0x00
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD1_SEL				          0x10
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD2_SEL				          0x20
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD3_SEL				          0x30
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD4_SEL				          0x40
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD5_SEL				          0x50
+#define ATOM_TRANSMITTER_CONFIG_V5_HPD6_SEL				          0x60
+
+#define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION_V1_5            DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5
+
+
 /****************************************************************************/	
 // Structures used by ExternalEncoderControlTable V1.3
 // ASIC Families: Evergreen, Llano, NI
@@ -1793,6 +1943,7 @@
 #define ATOM_PPLL_SS_TYPE_V3_P1PLL            0x00
 #define ATOM_PPLL_SS_TYPE_V3_P2PLL            0x04
 #define ATOM_PPLL_SS_TYPE_V3_DCPLL            0x08
+#define ATOM_PPLL_SS_TYPE_V3_P0PLL            ATOM_PPLL_SS_TYPE_V3_DCPLL
 #define ATOM_PPLL_SS_AMOUNT_V3_FBDIV_MASK     0x00FF
 #define ATOM_PPLL_SS_AMOUNT_V3_FBDIV_SHIFT    0
 #define ATOM_PPLL_SS_AMOUNT_V3_NFRAC_MASK     0x0F00
@@ -2030,12 +2181,77 @@
   USHORT   usVoltageLevel;              // real voltage level
 }SET_VOLTAGE_PARAMETERS_V2;
 
+
+typedef struct	_SET_VOLTAGE_PARAMETERS_V1_3
+{
+  UCHAR    ucVoltageType;               // To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI
+  UCHAR    ucVoltageMode;               // Indicate action: Set voltage level
+  USHORT   usVoltageLevel;              // real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. )
+}SET_VOLTAGE_PARAMETERS_V1_3;
+
+//ucVoltageType
+#define VOLTAGE_TYPE_VDDC                    1
+#define VOLTAGE_TYPE_MVDDC                   2
+#define VOLTAGE_TYPE_MVDDQ                   3
+#define VOLTAGE_TYPE_VDDCI                   4
+
+//SET_VOLTAGE_PARAMETERS_V3.ucVoltageMode
+#define ATOM_SET_VOLTAGE                     0        //Set voltage Level
+#define ATOM_INIT_VOLTAGE_REGULATOR          3        //Init Regulator
+#define ATOM_SET_VOLTAGE_PHASE               4        //Set Vregulator Phase
+#define ATOM_GET_MAX_VOLTAGE                 6        //Get Max Voltage, not used in SetVoltageTable v1.3
+#define ATOM_GET_VOLTAGE_LEVEL               6        //Get Voltage level from vitual voltage ID
+
+// define vitual voltage id in usVoltageLevel
+#define ATOM_VIRTUAL_VOLTAGE_ID0             0xff01
+#define ATOM_VIRTUAL_VOLTAGE_ID1             0xff02
+#define ATOM_VIRTUAL_VOLTAGE_ID2             0xff03
+#define ATOM_VIRTUAL_VOLTAGE_ID3             0xff04
+
 typedef struct _SET_VOLTAGE_PS_ALLOCATION
 {
   SET_VOLTAGE_PARAMETERS sASICSetVoltage;
   WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
 }SET_VOLTAGE_PS_ALLOCATION;
 
+// New Added from SI for GetVoltageInfoTable, input parameter structure
+typedef struct  _GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_1
+{
+  UCHAR    ucVoltageType;               // Input: To tell which voltage to set up, VDDC/MVDDC/MVDDQ/VDDCI
+  UCHAR    ucVoltageMode;               // Input: Indicate action: Get voltage info
+  USHORT   usVoltageLevel;              // Input: real voltage level in unit of mv or Voltage Phase (0, 1, 2, .. ) or Leakage Id 
+  ULONG    ulReserved;
+}GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_1;
+
+// New Added from SI for GetVoltageInfoTable, output parameter structure when ucVotlageMode == ATOM_GET_VOLTAGE_VID
+typedef struct  _GET_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1
+{
+  ULONG    ulVotlageGpioState;
+  ULONG    ulVoltageGPioMask;
+}GET_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1;
+
+// New Added from SI for GetVoltageInfoTable, output parameter structure when ucVotlageMode == ATOM_GET_VOLTAGE_STATEx_LEAKAGE_VID
+typedef struct  _GET_LEAKAGE_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1
+{
+  USHORT   usVoltageLevel;
+  USHORT   usVoltageId;                                  // Voltage Id programmed in Voltage Regulator
+  ULONG    ulReseved;
+}GET_LEAKAGE_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_1;
+
+
+// GetVoltageInfo v1.1 ucVoltageMode
+#define	ATOM_GET_VOLTAGE_VID                0x00
+#define ATOM_GET_VOTLAGE_INIT_SEQ           0x03
+#define ATOM_GET_VOLTTAGE_PHASE_PHASE_VID   0x04
+// for SI, this state map to 0xff02 voltage state in Power Play table, which is power boost state
+#define	ATOM_GET_VOLTAGE_STATE0_LEAKAGE_VID 0x10
+
+// for SI, this state map to 0xff01 voltage state in Power Play table, which is performance state
+#define	ATOM_GET_VOLTAGE_STATE1_LEAKAGE_VID 0x11
+// undefined power state
+#define	ATOM_GET_VOLTAGE_STATE2_LEAKAGE_VID 0x12
+#define	ATOM_GET_VOLTAGE_STATE3_LEAKAGE_VID 0x13
+
 /****************************************************************************/	
 // Structures used by TVEncoderControlTable
 /****************************************************************************/	
@@ -2065,9 +2281,9 @@
   USHORT        MultimediaConfigInfo;     // Only used by MM Lib,latest version 2.1, not configuable from Bios, need to include the table to build Bios
   USHORT        StandardVESA_Timing;      // Only used by Bios
   USHORT        FirmwareInfo;             // Shared by various SW components,latest version 1.4
-  USHORT        DAC_Info;                 // Will be obsolete from R600
+  USHORT        PaletteData;              // Only used by BIOS
   USHORT        LCD_Info;                 // Shared by various SW components,latest version 1.3, was called LVDS_Info 
-  USHORT        TMDS_Info;                // Will be obsolete from R600
+  USHORT        DIGTransmitterInfo;       // Internal used by VBIOS only version 3.1
   USHORT        AnalogTV_Info;            // Shared by various SW components,latest version 1.1 
   USHORT        SupportedDevicesInfo;     // Will be obsolete from R600
   USHORT        GPIO_I2C_Info;            // Shared by various SW components,latest version 1.2 will be used from R600           
@@ -2096,15 +2312,16 @@
 	USHORT				PowerSourceInfo;					// Shared by various SW components, latest versoin 1.1
 }ATOM_MASTER_LIST_OF_DATA_TABLES;
 
-// For backward compatible 
-#define LVDS_Info                LCD_Info
-
 typedef struct _ATOM_MASTER_DATA_TABLE
 { 
   ATOM_COMMON_TABLE_HEADER sHeader;  
   ATOM_MASTER_LIST_OF_DATA_TABLES   ListOfDataTables;
 }ATOM_MASTER_DATA_TABLE;
 
+// For backward compatible 
+#define LVDS_Info                LCD_Info
+#define DAC_Info                 PaletteData
+#define TMDS_Info                DIGTransmitterInfo
 
 /****************************************************************************/	
 // Structure used in MultimediaCapabilityInfoTable
@@ -2171,7 +2388,9 @@
 typedef struct _ATOM_FIRMWARE_CAPABILITY
 {
 #if ATOM_BIG_ENDIAN
-  USHORT Reserved:3;
+  USHORT Reserved:1;
+  USHORT SCL2Redefined:1;
+  USHORT PostWithoutModeSet:1;
   USHORT HyperMemory_Size:4;
   USHORT HyperMemory_Support:1;
   USHORT PPMode_Assigned:1;
@@ -2193,7 +2412,9 @@
   USHORT PPMode_Assigned:1;
   USHORT HyperMemory_Support:1;
   USHORT HyperMemory_Size:4;
-  USHORT Reserved:3;
+  USHORT PostWithoutModeSet:1;
+  USHORT SCL2Redefined:1;
+  USHORT Reserved:1;
 #endif
 }ATOM_FIRMWARE_CAPABILITY;
 
@@ -2418,7 +2639,8 @@
   USHORT                          usLcdMaxPixelClockPLL_Output; // In MHz unit
   ULONG                           ulReserved4;                //Was ulAsicMaximumVoltage
   ULONG                           ulMinPixelClockPLL_Output;  //In 10Khz unit
-  ULONG                           ulReserved5;                //Was usMinEngineClockPLL_Input and usMaxEngineClockPLL_Input
+  UCHAR                           ucRemoteDisplayConfig;
+  UCHAR                           ucReserved5[3];             //Was usMinEngineClockPLL_Input and usMaxEngineClockPLL_Input
   ULONG                           ulReserved6;                //Was usMinEngineClockPLL_Output and usMinMemoryClockPLL_Input
   ULONG                           ulReserved7;                //Was usMaxMemoryClockPLL_Input and usMinMemoryClockPLL_Output
   USHORT                          usReserved11;               //Was usMaxPixelClock;  //In 10Khz unit, Max.  Pclk used only for DAC
@@ -2438,6 +2660,11 @@
 
 #define ATOM_FIRMWARE_INFO_LAST  ATOM_FIRMWARE_INFO_V2_2
 
+
+// definition of ucRemoteDisplayConfig
+#define REMOTE_DISPLAY_DISABLE                   0x00
+#define REMOTE_DISPLAY_ENABLE                    0x01
+
 /****************************************************************************/	
 // Structures used in IntegratedSystemInfoTable
 /****************************************************************************/	
@@ -2660,8 +2887,9 @@
 #define    INTEGRATED_SYSTEM_INFO__AMD_CPU__GREYHOUND      2
 #define    INTEGRATED_SYSTEM_INFO__AMD_CPU__K8             3
 #define    INTEGRATED_SYSTEM_INFO__AMD_CPU__PHARAOH        4
+#define    INTEGRATED_SYSTEM_INFO__AMD_CPU__OROCHI         5
 
-#define    INTEGRATED_SYSTEM_INFO__AMD_CPU__MAX_CODE       INTEGRATED_SYSTEM_INFO__AMD_CPU__PHARAOH    // this deff reflects max defined CPU code
+#define    INTEGRATED_SYSTEM_INFO__AMD_CPU__MAX_CODE       INTEGRATED_SYSTEM_INFO__AMD_CPU__OROCHI    // this deff reflects max defined CPU code
 
 #define SYSTEM_CONFIG_POWEREXPRESS_ENABLE                 0x00000001
 #define SYSTEM_CONFIG_RUN_AT_OVERDRIVE_ENGINE             0x00000002
@@ -2753,6 +2981,7 @@
 #define ASIC_INT_DIG4_ENCODER_ID													0x0b
 #define ASIC_INT_DIG5_ENCODER_ID													0x0c
 #define ASIC_INT_DIG6_ENCODER_ID													0x0d
+#define ASIC_INT_DIG7_ENCODER_ID													0x0e
 
 //define Encoder attribute
 #define ATOM_ANALOG_ENCODER																0
@@ -3226,15 +3455,23 @@
 
   UCHAR               ucPowerSequenceDIGONtoDE_in4Ms;
   UCHAR               ucPowerSequenceDEtoVARY_BL_in4Ms;
-  UCHAR               ucPowerSequenceDEtoDIGON_in4Ms;
   UCHAR               ucPowerSequenceVARY_BLtoDE_in4Ms;
+  UCHAR               ucPowerSequenceDEtoDIGON_in4Ms;
 
   UCHAR               ucOffDelay_in4Ms;
   UCHAR               ucPowerSequenceVARY_BLtoBLON_in4Ms;
   UCHAR               ucPowerSequenceBLONtoVARY_BL_in4Ms;
   UCHAR               ucReserved1;
 
-  ULONG               ulReserved[4];
+  UCHAR               ucDPCD_eDP_CONFIGURATION_CAP;     // dpcd 0dh
+  UCHAR               ucDPCD_MAX_LINK_RATE;             // dpcd 01h
+  UCHAR               ucDPCD_MAX_LANE_COUNT;            // dpcd 02h
+  UCHAR               ucDPCD_MAX_DOWNSPREAD;            // dpcd 03h
+
+  USHORT              usMaxPclkFreqInSingleLink;        // Max PixelClock frequency in single link mode. 
+  UCHAR               uceDPToLVDSRxId;
+  UCHAR               ucLcdReservd;
+  ULONG               ulReserved[2];
 }ATOM_LCD_INFO_V13;  
 
 #define ATOM_LCD_INFO_LAST  ATOM_LCD_INFO_V13    
@@ -3273,6 +3510,11 @@
 //Use this cap bit for a quick reference whether an embadded panel (LCD1 ) is LVDS or eDP.
 #define	LCDPANEL_CAP_V13_eDP                    0x4        // = LCDPANEL_CAP_eDP no change comparing to previous version
 
+//uceDPToLVDSRxId
+#define eDP_TO_LVDS_RX_DISABLE                  0x00       // no eDP->LVDS translator chip 
+#define eDP_TO_LVDS_COMMON_ID                   0x01       // common eDP->LVDS translator chip without AMD SW init
+#define eDP_TO_LVDS_RT_ID                       0x02       // RT tanslator which require AMD SW init
+
 typedef struct  _ATOM_PATCH_RECORD_MODE
 {
   UCHAR     ucRecordType;
@@ -3317,6 +3559,7 @@
 #define LCD_CAP_RECORD_TYPE                   3
 #define LCD_FAKE_EDID_PATCH_RECORD_TYPE       4
 #define LCD_PANEL_RESOLUTION_RECORD_TYPE      5
+#define LCD_EDID_OFFSET_PATCH_RECORD_TYPE     6
 #define ATOM_RECORD_END_TYPE                  0xFF
 
 /****************************Spread Spectrum Info Table Definitions **********************/
@@ -3528,6 +3771,7 @@
 
 CAIL needs to claim an reserved area defined by FBAccessAreaOffset and usFBUsedbyDrvInKB in non VGA case.*/
 
+/***********************************************************************************/	
 #define ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO			1
 
 typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO
@@ -3818,13 +4062,17 @@
     ATOM_DP_CONN_CHANNEL_MAPPING asDPMapping;
     ATOM_DVI_CONN_CHANNEL_MAPPING asDVIMapping;
   };
-  UCHAR   ucReserved;
-  USHORT  usReserved[2]; 
+  UCHAR   ucChPNInvert;                   // bit vector for up to 8 lanes, =0: P and N is not invert, =1 P and N is inverted
+  USHORT  usCaps;
+  USHORT  usReserved; 
 }EXT_DISPLAY_PATH;
    
 #define NUMBER_OF_UCHAR_FOR_GUID          16
 #define MAX_NUMBER_OF_EXT_DISPLAY_PATH    7
 
+//usCaps
+#define  EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE          0x01
+
 typedef  struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO
 {
   ATOM_COMMON_TABLE_HEADER sHeader;
@@ -3832,7 +4080,9 @@
   EXT_DISPLAY_PATH         sPath[MAX_NUMBER_OF_EXT_DISPLAY_PATH]; // total of fixed 7 entries.
   UCHAR                    ucChecksum;                            // a  simple Checksum of the sum of whole structure equal to 0x0. 
   UCHAR                    uc3DStereoPinId;                       // use for eDP panel
-  UCHAR                    Reserved [6];                          // for potential expansion
+  UCHAR                    ucRemoteDisplayConfig;
+  UCHAR                    uceDPToLVDSRxId;
+  UCHAR                    Reserved[4];                           // for potential expansion
 }ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO;
 
 //Related definitions, all records are different but they have a commond header
@@ -3977,6 +4227,7 @@
 #define GPIO_PIN_STATE_ACTIVE_HIGH      0x1
 
 // Indexes to GPIO array in GLSync record 
+// GLSync record is for Frame Lock/Gen Lock feature.
 #define ATOM_GPIO_INDEX_GLSYNC_REFCLK    0
 #define ATOM_GPIO_INDEX_GLSYNC_HSYNC     1
 #define ATOM_GPIO_INDEX_GLSYNC_VSYNC     2
@@ -3984,7 +4235,9 @@
 #define ATOM_GPIO_INDEX_GLSYNC_SWAP_GNT  4
 #define ATOM_GPIO_INDEX_GLSYNC_INTERRUPT 5
 #define ATOM_GPIO_INDEX_GLSYNC_V_RESET   6
-#define ATOM_GPIO_INDEX_GLSYNC_MAX       7
+#define ATOM_GPIO_INDEX_GLSYNC_SWAP_CNTL 7
+#define ATOM_GPIO_INDEX_GLSYNC_SWAP_SEL  8
+#define ATOM_GPIO_INDEX_GLSYNC_MAX       9
 
 typedef struct  _ATOM_ENCODER_DVO_CF_RECORD
 {
@@ -3994,7 +4247,8 @@
 }ATOM_ENCODER_DVO_CF_RECORD;
 
 // Bit maps for ATOM_ENCODER_CAP_RECORD.ucEncoderCap
-#define ATOM_ENCODER_CAP_RECORD_HBR2     0x01         // DP1.2 HBR2 is supported by this path
+#define ATOM_ENCODER_CAP_RECORD_HBR2                  0x01         // DP1.2 HBR2 is supported by HW encoder
+#define ATOM_ENCODER_CAP_RECORD_HBR2_EN               0x02         // DP1.2 HBR2 setting is qualified and HBR2 can be enabled 
 
 typedef struct  _ATOM_ENCODER_CAP_RECORD
 {
@@ -4003,11 +4257,13 @@
     USHORT                    usEncoderCap;         
     struct {
 #if ATOM_BIG_ENDIAN
-      USHORT                  usReserved:15;        // Bit1-15 may be defined for other capability in future
+      USHORT                  usReserved:14;        // Bit1-15 may be defined for other capability in future
+      USHORT                  usHBR2En:1;           // Bit1 is for DP1.2 HBR2 enable
       USHORT                  usHBR2Cap:1;          // Bit0 is for DP1.2 HBR2 capability. 
 #else
       USHORT                  usHBR2Cap:1;          // Bit0 is for DP1.2 HBR2 capability. 
-      USHORT                  usReserved:15;        // Bit1-15 may be defined for other capability in future
+      USHORT                  usHBR2En:1;           // Bit1 is for DP1.2 HBR2 enable
+      USHORT                  usReserved:14;        // Bit1-15 may be defined for other capability in future
 #endif
     };
   }; 
@@ -4157,6 +4413,7 @@
 #define	VOLTAGE_CONTROL_ID_VT1556M						0x07									
 #define	VOLTAGE_CONTROL_ID_CHL822x						0x08									
 #define	VOLTAGE_CONTROL_ID_VT1586M						0x09
+#define VOLTAGE_CONTROL_ID_UP1637 						0x0A
 
 typedef struct  _ATOM_VOLTAGE_OBJECT
 {
@@ -4193,6 +4450,69 @@
 	USHORT	usVoltage;
 }ATOM_LEAKID_VOLTAGE;
 
+typedef struct _ATOM_VOLTAGE_OBJECT_HEADER_V3{
+ 	 UCHAR		ucVoltageType;									//Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI	 
+   UCHAR		ucVoltageMode;							    //Indicate voltage control mode: Init/Set/Leakage/Set phase 
+	 USHORT		usSize;													//Size of Object	
+}ATOM_VOLTAGE_OBJECT_HEADER_V3;
+
+typedef struct  _VOLTAGE_LUT_ENTRY_V2
+{
+	 ULONG		ulVoltageId;									  // The Voltage ID which is used to program GPIO register
+	 USHORT		usVoltageValue;									// The corresponding Voltage Value, in mV
+}VOLTAGE_LUT_ENTRY_V2;
+
+typedef struct  _LEAKAGE_VOLTAGE_LUT_ENTRY_V2
+{
+  USHORT	usVoltageLevel; 							  // The Voltage ID which is used to program GPIO register
+  USHORT  usVoltageId;                    
+	USHORT	usLeakageId;									  // The corresponding Voltage Value, in mV
+}LEAKAGE_VOLTAGE_LUT_ENTRY_V2;
+
+typedef struct  _ATOM_I2C_VOLTAGE_OBJECT_V3
+{
+   ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;
+   UCHAR	ucVoltageRegulatorId;					  //Indicate Voltage Regulator Id
+   UCHAR    ucVoltageControlI2cLine;
+   UCHAR    ucVoltageControlAddress;
+   UCHAR    ucVoltageControlOffset;	 	
+   ULONG    ulReserved;
+   VOLTAGE_LUT_ENTRY asVolI2cLut[1];        // end with 0xff
+}ATOM_I2C_VOLTAGE_OBJECT_V3;
+
+typedef struct  _ATOM_GPIO_VOLTAGE_OBJECT_V3
+{
+   ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;   
+   UCHAR    ucVoltageGpioCntlId;         // default is 0 which indicate control through CG VID mode 
+   UCHAR    ucGpioEntryNum;              // indiate the entry numbers of Votlage/Gpio value Look up table
+   UCHAR    ucPhaseDelay;                // phase delay in unit of micro second
+   UCHAR    ucReserved;   
+   ULONG    ulGpioMaskVal;               // GPIO Mask value
+   VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[1];   
+}ATOM_GPIO_VOLTAGE_OBJECT_V3;
+
+typedef struct  _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3
+{
+   ATOM_VOLTAGE_OBJECT_HEADER_V3 sHeader;
+   UCHAR    ucLeakageCntlId;             // default is 0
+   UCHAR    ucLeakageEntryNum;           // indicate the entry number of LeakageId/Voltage Lut table
+   UCHAR    ucReserved[2];               
+   ULONG    ulMaxVoltageLevel;
+   LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1];   
+}ATOM_LEAKAGE_VOLTAGE_OBJECT_V3;
+
+typedef union _ATOM_VOLTAGE_OBJECT_V3{
+  ATOM_GPIO_VOLTAGE_OBJECT_V3 asGpioVoltageObj;
+  ATOM_I2C_VOLTAGE_OBJECT_V3 asI2cVoltageObj;
+  ATOM_LEAKAGE_VOLTAGE_OBJECT_V3 asLeakageObj;
+}ATOM_VOLTAGE_OBJECT_V3;
+
+typedef struct  _ATOM_VOLTAGE_OBJECT_INFO_V3_1
+{
+   ATOM_COMMON_TABLE_HEADER	sHeader; 
+	 ATOM_VOLTAGE_OBJECT_V3			asVoltageObj[3];	//Info for Voltage control	  	 
+}ATOM_VOLTAGE_OBJECT_INFO_V3_1;
+
 typedef struct  _ATOM_ASIC_PROFILE_VOLTAGE
 {
 	UCHAR		ucProfileId;
@@ -4305,7 +4625,18 @@
   USHORT usHDMISSpreadRateIn10Hz;
   USHORT usDVISSPercentage;
   USHORT usDVISSpreadRateIn10Hz;
-  ULONG  ulReserved3[21]; 
+  ULONG  SclkDpmBoostMargin;
+  ULONG  SclkDpmThrottleMargin;
+  USHORT SclkDpmTdpLimitPG; 
+  USHORT SclkDpmTdpLimitBoost;
+  ULONG  ulBoostEngineCLock;
+  UCHAR  ulBoostVid_2bit;  
+  UCHAR  EnableBoost;
+  USHORT GnbTdpLimit;
+  USHORT usMaxLVDSPclkFreqInSingleLink;
+  UCHAR  ucLvdsMisc;
+  UCHAR  ucLVDSReserved;
+  ULONG  ulReserved3[15]; 
   ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo;   
 }ATOM_INTEGRATED_SYSTEM_INFO_V6;   
 
@@ -4313,9 +4644,16 @@
 #define INTEGRATED_SYSTEM_INFO_V6_GPUCAPINFO__TMDSHDMI_COHERENT_SINGLEPLL_MODE       0x01
 #define INTEGRATED_SYSTEM_INFO_V6_GPUCAPINFO__DISABLE_AUX_HW_MODE_DETECTION          0x08
 
-// ulOtherDisplayMisc
-#define INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT                       0x01
+//ucLVDSMisc:                   
+#define SYS_INFO_LVDSMISC__888_FPDI_MODE                                             0x01
+#define SYS_INFO_LVDSMISC__DL_CH_SWAP                                                0x02
+#define SYS_INFO_LVDSMISC__888_BPC                                                   0x04
+#define SYS_INFO_LVDSMISC__OVERRIDE_EN                                               0x08
+#define SYS_INFO_LVDSMISC__BLON_ACTIVE_LOW                                           0x10
 
+// not used any more
+#define SYS_INFO_LVDSMISC__VSYNC_ACTIVE_LOW                                          0x04
+#define SYS_INFO_LVDSMISC__HSYNC_ACTIVE_LOW                                          0x08
 
 /**********************************************************************************************************************
   ATOM_INTEGRATED_SYSTEM_INFO_V6 Description
@@ -4384,7 +4722,208 @@
 ulCSR_M3_ARB_CNTL_DEFAULT[10]:    Arrays with values for CSR M3 arbiter for default
 ulCSR_M3_ARB_CNTL_UVD[10]:        Arrays with values for CSR M3 arbiter for UVD playback.
 ulCSR_M3_ARB_CNTL_FS3D[10]:       Arrays with values for CSR M3 arbiter for Full Screen 3D applications.
-sAvail_SCLK[5]:                   Arrays to provide available list of SLCK and corresponding voltage, order from low to high  
+sAvail_SCLK[5]:                   Arrays to provide availabe list of SLCK and corresponding voltage, order from low to high  
+ulGMCRestoreResetTime:            GMC power restore and GMC reset time to calculate data reconnection latency. Unit in ns. 
+ulMinimumNClk:                    Minimum NCLK speed among all NB-Pstates to calcualte data reconnection latency. Unit in 10kHz. 
+ulIdleNClk:                       NCLK speed while memory runs in self-refresh state. Unit in 10kHz.
+ulDDR_DLL_PowerUpTime:            DDR PHY DLL power up time. Unit in ns.
+ulDDR_PLL_PowerUpTime:            DDR PHY PLL power up time. Unit in ns.
+usPCIEClkSSPercentage:            PCIE Clock Spred Spectrum Percentage in unit 0.01%; 100 mean 1%.
+usPCIEClkSSType:                  PCIE Clock Spred Spectrum Type. 0 for Down spread(default); 1 for Center spread.
+usLvdsSSPercentage:               LVDS panel ( not include eDP ) Spread Spectrum Percentage in unit of 0.01%, =0, use VBIOS default setting. 
+usLvdsSSpreadRateIn10Hz:          LVDS panel ( not include eDP ) Spread Spectrum frequency in unit of 10Hz, =0, use VBIOS default setting. 
+usHDMISSPercentage:               HDMI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%,  =0, use VBIOS default setting. 
+usHDMISSpreadRateIn10Hz:          HDMI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
+usDVISSPercentage:                DVI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%,  =0, use VBIOS default setting. 
+usDVISSpreadRateIn10Hz:           DVI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
+usMaxLVDSPclkFreqInSingleLink:    Max pixel clock LVDS panel single link, if=0 means VBIOS use default threhold, right now it is 85Mhz
+ucLVDSMisc:                       [bit0] LVDS 888bit panel mode =0: LVDS 888 panel in LDI mode, =1: LVDS 888 panel in FPDI mode
+                                  [bit1] LVDS panel lower and upper link mapping =0: lower link and upper link not swap, =1: lower link and upper link are swapped
+                                  [bit2] LVDS 888bit per color mode  =0: 666 bit per color =1:888 bit per color
+                                  [bit3] LVDS parameter override enable  =0: ucLvdsMisc parameter are not used =1: ucLvdsMisc parameter should be used
+                                  [bit4] Polarity of signal sent to digital BLON output pin. =0: not inverted(active high) =1: inverted ( active low )
+**********************************************************************************************************************/
+
+// this Table is used for Liano/Ontario APU
+typedef struct _ATOM_FUSION_SYSTEM_INFO_V1
+{
+  ATOM_INTEGRATED_SYSTEM_INFO_V6    sIntegratedSysInfo;   
+  ULONG  ulPowerplayTable[128];  
+}ATOM_FUSION_SYSTEM_INFO_V1; 
+/**********************************************************************************************************************
+  ATOM_FUSION_SYSTEM_INFO_V1 Description
+sIntegratedSysInfo:               refer to ATOM_INTEGRATED_SYSTEM_INFO_V6 definition.
+ulPowerplayTable[128]:            This 512 bytes memory is used to save ATOM_PPLIB_POWERPLAYTABLE3, starting form ulPowerplayTable[0]    
+**********************************************************************************************************************/ 
+
+// this IntegrateSystemInfoTable is used for Trinity APU
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+  ULONG  ulBootUpEngineClock;
+  ULONG  ulDentistVCOFreq;
+  ULONG  ulBootUpUMAClock;
+  ATOM_CLK_VOLT_CAPABILITY   sDISPCLK_Voltage[4];
+  ULONG  ulBootUpReqDisplayVector;
+  ULONG  ulOtherDisplayMisc;
+  ULONG  ulGPUCapInfo;
+  ULONG  ulSB_MMIO_Base_Addr;
+  USHORT usRequestedPWMFreqInHz;
+  UCHAR  ucHtcTmpLmt;
+  UCHAR  ucHtcHystLmt;
+  ULONG  ulMinEngineClock;
+  ULONG  ulSystemConfig;            
+  ULONG  ulCPUCapInfo;
+  USHORT usNBP0Voltage;               
+  USHORT usNBP1Voltage;
+  USHORT usBootUpNBVoltage;                       
+  USHORT usExtDispConnInfoOffset;
+  USHORT usPanelRefreshRateRange;     
+  UCHAR  ucMemoryType;  
+  UCHAR  ucUMAChannelNumber;
+  UCHAR  strVBIOSMsg[40];
+  ULONG  ulReserved[20];
+  ATOM_AVAILABLE_SCLK_LIST   sAvail_SCLK[5];
+  ULONG  ulGMCRestoreResetTime;
+  ULONG  ulMinimumNClk;
+  ULONG  ulIdleNClk;
+  ULONG  ulDDR_DLL_PowerUpTime;
+  ULONG  ulDDR_PLL_PowerUpTime;
+  USHORT usPCIEClkSSPercentage;
+  USHORT usPCIEClkSSType;
+  USHORT usLvdsSSPercentage;
+  USHORT usLvdsSSpreadRateIn10Hz;
+  USHORT usHDMISSPercentage;
+  USHORT usHDMISSpreadRateIn10Hz;
+  USHORT usDVISSPercentage;
+  USHORT usDVISSpreadRateIn10Hz;
+  ULONG  SclkDpmBoostMargin;
+  ULONG  SclkDpmThrottleMargin;
+  USHORT SclkDpmTdpLimitPG; 
+  USHORT SclkDpmTdpLimitBoost;
+  ULONG  ulBoostEngineCLock;
+  UCHAR  ulBoostVid_2bit;  
+  UCHAR  EnableBoost;
+  USHORT GnbTdpLimit;
+  USHORT usMaxLVDSPclkFreqInSingleLink;
+  UCHAR  ucLvdsMisc;
+  UCHAR  ucLVDSReserved;
+  UCHAR  ucLVDSPwrOnSeqDIGONtoDE_in4Ms;
+  UCHAR  ucLVDSPwrOnSeqDEtoVARY_BL_in4Ms;
+  UCHAR  ucLVDSPwrOffSeqVARY_BLtoDE_in4Ms;
+  UCHAR  ucLVDSPwrOffSeqDEtoDIGON_in4Ms;
+  UCHAR  ucLVDSOffToOnDelay_in4Ms;
+  UCHAR  ucLVDSPwrOnSeqVARY_BLtoBLON_in4Ms;
+  UCHAR  ucLVDSPwrOffSeqBLONtoVARY_BL_in4Ms;
+  UCHAR  ucLVDSReserved1;
+  ULONG  ulLCDBitDepthControlVal;
+  ULONG  ulNbpStateMemclkFreq[4];
+  USHORT usNBP2Voltage;               
+  USHORT usNBP3Voltage;
+  ULONG  ulNbpStateNClkFreq[4];
+  UCHAR  ucNBDPMEnable;
+  UCHAR  ucReserved[3];
+  UCHAR  ucDPMState0VclkFid;
+  UCHAR  ucDPMState0DclkFid;
+  UCHAR  ucDPMState1VclkFid;
+  UCHAR  ucDPMState1DclkFid;
+  UCHAR  ucDPMState2VclkFid;
+  UCHAR  ucDPMState2DclkFid;
+  UCHAR  ucDPMState3VclkFid;
+  UCHAR  ucDPMState3DclkFid;
+  ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo;
+}ATOM_INTEGRATED_SYSTEM_INFO_V1_7;
+
+// ulOtherDisplayMisc
+#define INTEGRATED_SYSTEM_INFO__GET_EDID_CALLBACK_FUNC_SUPPORT            0x01
+#define INTEGRATED_SYSTEM_INFO__GET_BOOTUP_DISPLAY_CALLBACK_FUNC_SUPPORT  0x02
+#define INTEGRATED_SYSTEM_INFO__GET_EXPANSION_CALLBACK_FUNC_SUPPORT       0x04
+#define INTEGRATED_SYSTEM_INFO__FAST_BOOT_SUPPORT                         0x08
+
+// ulGPUCapInfo
+#define SYS_INFO_GPUCAPS__TMDSHDMI_COHERENT_SINGLEPLL_MODE                0x01
+#define SYS_INFO_GPUCAPS__DP_SINGLEPLL_MODE                               0x02
+#define SYS_INFO_GPUCAPS__DISABLE_AUX_MODE_DETECT                         0x08
+
+/**********************************************************************************************************************
+  ATOM_INTEGRATED_SYSTEM_INFO_V1_7 Description
+ulBootUpEngineClock:              VBIOS bootup Engine clock frequency, in 10kHz unit. if it is equal 0, then VBIOS use pre-defined bootup engine clock
+ulDentistVCOFreq:                 Dentist VCO clock in 10kHz unit. 
+ulBootUpUMAClock:                 System memory boot up clock frequency in 10Khz unit. 
+sDISPCLK_Voltage:                 Report Display clock voltage requirement.
+ 
+ulBootUpReqDisplayVector:         VBIOS boot up display IDs, following are supported devices in Trinity projects:
+                                  ATOM_DEVICE_CRT1_SUPPORT                  0x0001
+                                  ATOM_DEVICE_DFP1_SUPPORT                  0x0008 
+                                  ATOM_DEVICE_DFP6_SUPPORT                  0x0040 
+                                  ATOM_DEVICE_DFP2_SUPPORT                  0x0080       
+                                  ATOM_DEVICE_DFP3_SUPPORT                  0x0200       
+                                  ATOM_DEVICE_DFP4_SUPPORT                  0x0400        
+                                  ATOM_DEVICE_DFP5_SUPPORT                  0x0800
+                                  ATOM_DEVICE_LCD1_SUPPORT                  0x0002
+ulOtherDisplayMisc:      	        bit[0]=0: INT15 callback function Get LCD EDID ( ax=4e08, bl=1b ) is not supported by SBIOS. 
+                                        =1: INT15 callback function Get LCD EDID ( ax=4e08, bl=1b ) is supported by SBIOS. 
+                                  bit[1]=0: INT15 callback function Get boot display( ax=4e08, bl=01h) is not supported by SBIOS
+                                        =1: INT15 callback function Get boot display( ax=4e08, bl=01h) is supported by SBIOS
+                                  bit[2]=0: INT15 callback function Get panel Expansion ( ax=4e08, bl=02h) is not supported by SBIOS
+                                        =1: INT15 callback function Get panel Expansion ( ax=4e08, bl=02h) is supported by SBIOS
+                                  bit[3]=0: VBIOS fast boot is disable
+                                        =1: VBIOS fast boot is enable. ( VBIOS skip display device detection in every set mode if LCD panel is connect and LID is open)
+ulGPUCapInfo:                     bit[0]=0: TMDS/HDMI Coherent Mode use cascade PLL mode.
+                                        =1: TMDS/HDMI Coherent Mode use signel PLL mode.
+                                  bit[1]=0: DP mode use cascade PLL mode ( New for Trinity )
+                                        =1: DP mode use single PLL mode
+                                  bit[3]=0: Enable AUX HW mode detection logic
+                                        =1: Disable AUX HW mode detection logic
+                                      
+ulSB_MMIO_Base_Addr:              Physical Base address to SB MMIO space. Driver needs to initialize it for SMU usage.
+
+usRequestedPWMFreqInHz:           When it's set to 0x0 by SBIOS: the LCD BackLight is not controlled by GPU(SW). 
+                                  Any attempt to change BL using VBIOS function or enable VariBri from PP table is not effective since ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==0;
+                                  
+                                  When it's set to a non-zero frequency, the BackLight is controlled by GPU (SW) in one of two ways below:
+                                  1. SW uses the GPU BL PWM output to control the BL, in chis case, this non-zero frequency determines what freq GPU should use;
+                                  VBIOS will set up proper PWM frequency and ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==1,as the result,
+                                  Changing BL using VBIOS function is functional in both driver and non-driver present environment; 
+                                  and enabling VariBri under the driver environment from PP table is optional.
+
+                                  2. SW uses other means to control BL (like DPCD),this non-zero frequency serves as a flag only indicating
+                                  that BL control from GPU is expected.
+                                  VBIOS will NOT set up PWM frequency but make ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU==1
+                                  Changing BL using VBIOS function could be functional in both driver and non-driver present environment,but
+                                  it's per platform 
+                                  and enabling VariBri under the driver environment from PP table is optional.
+
+ucHtcTmpLmt:                      Refer to D18F3x64 bit[22:16], HtcTmpLmt. 
+                                  Threshold on value to enter HTC_active state.
+ucHtcHystLmt:                     Refer to D18F3x64 bit[27:24], HtcHystLmt. 
+                                  To calculate threshold off value to exit HTC_active state, which is Threshold on vlaue minus ucHtcHystLmt.
+ulMinEngineClock:                 Minimum SCLK allowed in 10kHz unit. This is calculated based on WRCK Fuse settings.
+ulSystemConfig:                   Bit[0]=0: PCIE Power Gating Disabled 
+                                        =1: PCIE Power Gating Enabled
+                                  Bit[1]=0: DDR-DLL shut-down feature disabled.
+                                         1: DDR-DLL shut-down feature enabled.
+                                  Bit[2]=0: DDR-PLL Power down feature disabled.
+                                         1: DDR-PLL Power down feature enabled.                                 
+ulCPUCapInfo:                     TBD
+usNBP0Voltage:                    VID for voltage on NB P0 State
+usNBP1Voltage:                    VID for voltage on NB P1 State  
+usNBP2Voltage:                    VID for voltage on NB P2 State
+usNBP3Voltage:                    VID for voltage on NB P3 State  
+usBootUpNBVoltage:                Voltage Index of GNB voltage configured by SBIOS, which is suffcient to support VBIOS DISPCLK requirement.
+usExtDispConnInfoOffset:          Offset to sExtDispConnInfo inside the structure
+usPanelRefreshRateRange:          Bit vector for LCD supported refresh rate range. If DRR is requestd by the platform, at least two bits need to be set
+                                  to indicate a range.
+                                  SUPPORTED_LCD_REFRESHRATE_30Hz          0x0004
+                                  SUPPORTED_LCD_REFRESHRATE_40Hz          0x0008
+                                  SUPPORTED_LCD_REFRESHRATE_50Hz          0x0010
+                                  SUPPORTED_LCD_REFRESHRATE_60Hz          0x0020
+ucMemoryType:                     [3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved.
+ucUMAChannelNumber:      	        System memory channel numbers. 
+ulCSR_M3_ARB_CNTL_DEFAULT[10]:    Arrays with values for CSR M3 arbiter for default
+ulCSR_M3_ARB_CNTL_UVD[10]:        Arrays with values for CSR M3 arbiter for UVD playback.
+ulCSR_M3_ARB_CNTL_FS3D[10]:       Arrays with values for CSR M3 arbiter for Full Screen 3D applications.
+sAvail_SCLK[5]:                   Arrays to provide availabe list of SLCK and corresponding voltage, order from low to high  
 ulGMCRestoreResetTime:            GMC power restore and GMC reset time to calculate data reconnection latency. Unit in ns. 
 ulMinimumNClk:                    Minimum NCLK speed among all NB-Pstates to calcualte data reconnection latency. Unit in 10kHz. 
 ulIdleNClk:                       NCLK speed while memory runs in self-refresh state. Unit in 10kHz.
@@ -4398,6 +4937,41 @@
 usHDMISSpreadRateIn10Hz:          HDMI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
 usDVISSPercentage:                DVI Spread Spectrum Percentage in unit 0.01%; 100 mean 1%,  =0, use VBIOS default setting. 
 usDVISSpreadRateIn10Hz:           DVI Spread Spectrum frequency in unit of 10Hz,  =0, use VBIOS default setting. 
+usMaxLVDSPclkFreqInSingleLink:    Max pixel clock LVDS panel single link, if=0 means VBIOS use default threhold, right now it is 85Mhz
+ucLVDSMisc:                       [bit0] LVDS 888bit panel mode =0: LVDS 888 panel in LDI mode, =1: LVDS 888 panel in FPDI mode
+                                  [bit1] LVDS panel lower and upper link mapping =0: lower link and upper link not swap, =1: lower link and upper link are swapped
+                                  [bit2] LVDS 888bit per color mode  =0: 666 bit per color =1:888 bit per color
+                                  [bit3] LVDS parameter override enable  =0: ucLvdsMisc parameter are not used =1: ucLvdsMisc parameter should be used
+                                  [bit4] Polarity of signal sent to digital BLON output pin. =0: not inverted(active high) =1: inverted ( active low )
+ucLVDSPwrOnSeqDIGONtoDE_in4Ms:    LVDS power up sequence time in unit of 4ms, time delay from DIGON signal active to data enable signal active( DE ).
+                                  =0 mean use VBIOS default which is 8 ( 32ms ). The LVDS power up sequence is as following: DIGON->DE->VARY_BL->BLON. 
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+ucLVDSPwrOnDEtoVARY_BL_in4Ms:     LVDS power up sequence time in unit of 4ms., time delay from DE( data enable ) active to Vary Brightness enable signal active( VARY_BL ).  
+                                  =0 mean use VBIOS default which is 90 ( 360ms ). The LVDS power up sequence is as following: DIGON->DE->VARY_BL->BLON. 
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+
+ucLVDSPwrOffVARY_BLtoDE_in4Ms:    LVDS power down sequence time in unit of 4ms, time delay from data enable ( DE ) signal off to LCDVCC (DIGON) off. 
+                                  =0 mean use VBIOS default delay which is 8 ( 32ms ). The LVDS power down sequence is as following: BLON->VARY_BL->DE->DIGON
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+
+ucLVDSPwrOffDEtoDIGON_in4Ms:      LVDS power down sequence time in unit of 4ms, time delay from vary brightness enable signal( VARY_BL) off to data enable ( DE ) signal off. 
+                                  =0 mean use VBIOS default which is 90 ( 360ms ). The LVDS power down sequence is as following: BLON->VARY_BL->DE->DIGON
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+
+ucLVDSOffToOnDelay_in4Ms:         LVDS power down sequence time in unit of 4ms. Time delay from DIGON signal off to DIGON signal active. 
+                                  =0 means to use VBIOS default delay which is 125 ( 500ms ).
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+
+ucLVDSPwrOnVARY_BLtoBLON_in4Ms:   LVDS power up sequence time in unit of 4ms. Time delay from VARY_BL signal on to DLON signal active. 
+                                  =0 means to use VBIOS default delay which is 0 ( 0ms ).
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+
+ucLVDSPwrOffBLONtoVARY_BL_in4Ms:  LVDS power down sequence time in unit of 4ms. Time delay from BLON signal off to VARY_BL signal off. 
+                                  =0 means to use VBIOS default delay which is 0 ( 0ms ).
+                                  This parameter is used by VBIOS only. VBIOS will patch LVDS_InfoTable.
+
+ulNbpStateMemclkFreq[4]:          system memory clock frequncey in unit of 10Khz in different NB pstate. 
+
 **********************************************************************************************************************/
 
 /**************************************************************************/
@@ -4459,6 +5033,7 @@
 #define ASIC_INTERNAL_SS_ON_DP      7
 #define ASIC_INTERNAL_SS_ON_DCPLL   8
 #define ASIC_EXTERNAL_SS_ON_DP_CLOCK 9
+#define ASIC_INTERNAL_VCE_SS        10
 
 typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V2
 {
@@ -4520,7 +5095,7 @@
 #define ATOM_DOS_MODE_INFO_DEF        7
 #define ATOM_I2C_CHANNEL_STATUS_DEF   8
 #define ATOM_I2C_CHANNEL_STATUS1_DEF  9
-
+#define ATOM_INTERNAL_TIMER_DEF       10
 
 // BIOS_0_SCRATCH Definition 
 #define ATOM_S0_CRT1_MONO               0x00000001L
@@ -4648,6 +5223,7 @@
 #define ATOM_S2_DEVICE_DPMS_MASKw1      0x3FF
 #define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASKb3     0x0C
 #define ATOM_S2_FORCEDLOWPWRMODE_STATE_CHANGEb3   0x10
+#define ATOM_S2_TMDS_COHERENT_MODEb3    0x10          // used by VBIOS code only, use coherent mode for TMDS/HDMI mode
 #define ATOM_S2_VRI_BRIGHT_ENABLEb3     0x20
 #define ATOM_S2_ROTATION_STATE_MASKb3   0xC0
 
@@ -5038,6 +5614,23 @@
   USHORT usDeviceId;                  // Active Device Id for this surface. If no device, set to 0. 
 }ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3;
 
+typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_4
+{
+  USHORT usHight;                     // Image Hight
+  USHORT usWidth;                     // Image Width
+  USHORT usGraphPitch;
+  UCHAR  ucColorDepth;
+  UCHAR  ucPixelFormat;
+  UCHAR  ucSurface;                   // Surface 1 or 2
+  UCHAR  ucEnable;                    // ATOM_ENABLE or ATOM_DISABLE
+  UCHAR  ucModeType;
+  UCHAR  ucReserved;
+}ENABLE_GRAPH_SURFACE_PARAMETERS_V1_4;
+
+// ucEnable
+#define ATOM_GRAPH_CONTROL_SET_PITCH             0x0f
+#define ATOM_GRAPH_CONTROL_SET_DISP_START        0x10
+
 typedef struct _ENABLE_GRAPH_SURFACE_PS_ALLOCATION
 {
   ENABLE_GRAPH_SURFACE_PARAMETERS sSetSurface;          
@@ -5057,6 +5650,58 @@
   USHORT  usY_Size;
 }GET_DISPLAY_SURFACE_SIZE_PARAMETERS; 
 
+typedef struct  _GET_DISPLAY_SURFACE_SIZE_PARAMETERS_V2
+{
+  union{
+    USHORT  usX_Size;                     //When use as input parameter, usX_Size indicates which CRTC                 
+    USHORT  usSurface; 
+  };
+  USHORT usY_Size;
+  USHORT usDispXStart;               
+  USHORT usDispYStart;
+}GET_DISPLAY_SURFACE_SIZE_PARAMETERS_V2; 
+
+
+typedef struct _PALETTE_DATA_CONTROL_PARAMETERS_V3 
+{
+  UCHAR  ucLutId;
+  UCHAR  ucAction;
+  USHORT usLutStartIndex;
+  USHORT usLutLength;
+  USHORT usLutOffsetInVram;
+}PALETTE_DATA_CONTROL_PARAMETERS_V3;
+
+// ucAction:
+#define PALETTE_DATA_AUTO_FILL            1
+#define PALETTE_DATA_READ                 2
+#define PALETTE_DATA_WRITE                3
+
+
+typedef struct _INTERRUPT_SERVICE_PARAMETERS_V2
+{
+  UCHAR  ucInterruptId;
+  UCHAR  ucServiceId;
+  UCHAR  ucStatus;
+  UCHAR  ucReserved;
+}INTERRUPT_SERVICE_PARAMETER_V2;
+
+// ucInterruptId
+#define HDP1_INTERRUPT_ID                 1
+#define HDP2_INTERRUPT_ID                 2
+#define HDP3_INTERRUPT_ID                 3
+#define HDP4_INTERRUPT_ID                 4
+#define HDP5_INTERRUPT_ID                 5
+#define HDP6_INTERRUPT_ID                 6
+#define SW_INTERRUPT_ID                   11   
+
+// ucAction
+#define INTERRUPT_SERVICE_GEN_SW_INT      1
+#define INTERRUPT_SERVICE_GET_STATUS      2
+
+ // ucStatus
+#define INTERRUPT_STATUS__INT_TRIGGER     1
+#define INTERRUPT_STATUS__HPD_HIGH        2
+
 typedef struct _INDIRECT_IO_ACCESS
 {
   ATOM_COMMON_TABLE_HEADER sHeader;  
@@ -5189,7 +5834,7 @@
 
 #define END_OF_REG_INDEX_BLOCK  0x0ffff
 #define END_OF_REG_DATA_BLOCK   0x00000000
-#define ATOM_INIT_REG_MASK_FLAG 0x80
+#define ATOM_INIT_REG_MASK_FLAG 0x80               //Not used in BIOS
 #define	CLOCK_RANGE_HIGHEST			0x00ffffff
 
 #define VALUE_DWORD             SIZEOF ULONG
@@ -5229,6 +5874,7 @@
 #define _128Mx8             0x51
 #define _128Mx16            0x52
 #define _256Mx8             0x61
+#define _256Mx16            0x62
 
 #define SAMSUNG             0x1
 #define INFINEON            0x2
@@ -5585,7 +6231,7 @@
   ULONG	  ulChannelMapCfg;	                // mmMC_SHARED_CHREMAP
   USHORT  usModuleSize;                     // Size of ATOM_VRAM_MODULE_V7
   USHORT  usPrivateReserved;                // MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS)
-  USHORT  usReserved;
+  USHORT  usEnableChannels;                 // bit vector which indicate which channels are enabled
   UCHAR   ucExtMemoryID;                    // Current memory module ID
   UCHAR   ucMemoryType;                     // MEM_TYPE_DDR2/DDR3/GDDR3/GDDR5
   UCHAR   ucChannelNum;                     // Number of mem. channels supported in this module
@@ -5597,7 +6243,8 @@
   UCHAR   ucNPL_RT;                         // Round trip delay (MC_SEQ_CAS_TIMING [28:24]:TCL=CL+NPL_RT-2). Always 2.
   UCHAR	  ucPreamble;                       // [7:4] Write Preamble, [3:0] Read Preamble
   UCHAR   ucMemorySize;                     // Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros
-  UCHAR   ucReserved[3];
+  USHORT  usSEQSettingOffset;
+  UCHAR   ucReserved;
 // Memory Module specific values
   USHORT  usEMRS2Value;                     // EMRS2/MR2 Value. 
   USHORT  usEMRS3Value;                     // EMRS3/MR3 Value.
@@ -5633,10 +6280,10 @@
 typedef struct _ATOM_VRAM_INFO_V4
 {
   ATOM_COMMON_TABLE_HEADER   sHeader;
-	USHORT										 usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
-	USHORT										 usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
-	USHORT										 usRerseved;
-	UCHAR           	         ucMemDQ7_0ByteRemap;													   // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3
+  USHORT                     usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
+  USHORT                     usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
+  USHORT										 usRerseved;
+  UCHAR           	         ucMemDQ7_0ByteRemap;													   // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3
   ULONG                      ulMemDQ7_0BitRemap;                             // each DQ line ( 7~0) use 3bits, like: DQ0=Bit[2:0], DQ1:[5:3], ... DQ7:[23:21]
   UCHAR                      ucReservde[4]; 
   UCHAR                      ucNumOfVRAMModule;
@@ -5648,9 +6295,10 @@
 typedef struct _ATOM_VRAM_INFO_HEADER_V2_1
 {
   ATOM_COMMON_TABLE_HEADER   sHeader;
-	USHORT										 usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
-	USHORT										 usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
-	USHORT										 usReserved[4];
+  USHORT                     usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
+  USHORT                     usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
+  USHORT                     usPerBytePresetOffset;                          // offset of ATOM_INIT_REG_BLOCK structure for Per Byte Offset Preset Settings
+  USHORT                     usReserved[3];
   UCHAR                      ucNumOfVRAMModule;                              // indicate number of VRAM module
   UCHAR                      ucMemoryClkPatchTblVer;                         // version of memory AC timing register list
   UCHAR                      ucVramModuleVer;                                // indicate ATOM_VRAM_MODUE version
@@ -5935,6 +6583,52 @@
 	ASIC_ENCODER_INFO      asEncoderInfo[1];
 }ATOM_DISP_OUT_INFO_V2;
 
+
+typedef struct _ATOM_DISP_CLOCK_ID {
+  UCHAR ucPpllId; 
+  UCHAR ucPpllAttribute;
+}ATOM_DISP_CLOCK_ID;
+
+// ucPpllAttribute
+#define CLOCK_SOURCE_SHAREABLE            0x01
+#define CLOCK_SOURCE_DP_MODE              0x02
+#define CLOCK_SOURCE_NONE_DP_MODE         0x04
+
+//DispOutInfoTable
+typedef struct _ASIC_TRANSMITTER_INFO_V2
+{
+	USHORT usTransmitterObjId;
+	USHORT usDispClkIdOffset;    // point to clock source id list supported by Encoder Object
+  UCHAR  ucTransmitterCmdTblId;
+	UCHAR  ucConfig;
+	UCHAR  ucEncoderID;					 // available 1st encoder ( default )
+	UCHAR  ucOptionEncoderID;    // available 2nd encoder ( optional )
+	UCHAR  uc2ndEncoderID;
+	UCHAR  ucReserved;
+}ASIC_TRANSMITTER_INFO_V2;
+
+typedef struct _ATOM_DISP_OUT_INFO_V3
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+	USHORT ptrTransmitterInfo;
+	USHORT ptrEncoderInfo;
+  USHORT ptrMainCallParserFar;                  // direct address of main parser call in VBIOS binary. 
+  USHORT usReserved;
+  UCHAR  ucDCERevision;   
+  UCHAR  ucMaxDispEngineNum;
+  UCHAR  ucMaxActiveDispEngineNum;
+  UCHAR  ucMaxPPLLNum;
+  UCHAR  ucCoreRefClkSource;                          // value of CORE_REF_CLK_SOURCE
+  UCHAR  ucReserved[3];
+	ASIC_TRANSMITTER_INFO_V2  asTransmitterInfo[1];     // for alligment only
+}ATOM_DISP_OUT_INFO_V3;
+
+typedef enum CORE_REF_CLK_SOURCE{
+  CLOCK_SRC_XTALIN=0,
+  CLOCK_SRC_XO_IN=1,
+  CLOCK_SRC_XO_IN2=2,
+}CORE_REF_CLK_SOURCE;
+
 // DispDevicePriorityInfo
 typedef struct _ATOM_DISPLAY_DEVICE_PRIORITY_INFO
 {
@@ -6070,6 +6764,39 @@
 #define HW_I2C_READ         0
 #define I2C_2BYTE_ADDR      0x02
 
+/****************************************************************************/	
+// Structures used by HW_Misc_OperationTable
+/****************************************************************************/	
+typedef struct  _ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1 
+{
+  UCHAR  ucCmd;                //  Input: To tell which action to take
+  UCHAR  ucReserved[3];
+  ULONG  ulReserved;
+}ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1; 
+
+typedef struct  _ATOM_HW_MISC_OPERATION_OUTPUT_PARAMETER_V1_1 
+{
+  UCHAR  ucReturnCode;        // Output: Return value base on action was taken
+  UCHAR  ucReserved[3];
+  ULONG  ulReserved;
+}ATOM_HW_MISC_OPERATION_OUTPUT_PARAMETER_V1_1;
+
+// Actions code
+#define  ATOM_GET_SDI_SUPPORT              0xF0
+
+// Return code 
+#define  ATOM_UNKNOWN_CMD                   0
+#define  ATOM_FEATURE_NOT_SUPPORTED         1
+#define  ATOM_FEATURE_SUPPORTED             2
+
+typedef struct _ATOM_HW_MISC_OPERATION_PS_ALLOCATION
+{
+	ATOM_HW_MISC_OPERATION_INPUT_PARAMETER_V1_1        sInput_Output;
+	PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS         sReserved; 
+}ATOM_HW_MISC_OPERATION_PS_ALLOCATION;
+
+/****************************************************************************/	
+
 typedef struct _SET_HWBLOCK_INSTANCE_PARAMETER_V2
 {
    UCHAR ucHWBlkInst;                // HW block instance, 0, 1, 2, ...
@@ -6090,6 +6817,52 @@
 #define SELECT_CRTC_PIXEL_RATE        7
 #define SELECT_VGA_BLK                8
 
+// DIGTransmitterInfoTable structure used to program UNIPHY settings 
+typedef struct _DIG_TRANSMITTER_INFO_HEADER_V3_1{  
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  USHORT usDPVsPreEmphSettingOffset;     // offset of PHY_ANALOG_SETTING_INFO * with DP Voltage Swing and Pre-Emphasis for each Link clock 
+  USHORT usPhyAnalogRegListOffset;       // offset of CLOCK_CONDITION_REGESTER_INFO* with None-DP mode Analog Setting's register Info 
+  USHORT usPhyAnalogSettingOffset;       // offset of CLOCK_CONDITION_SETTING_ENTRY* with None-DP mode Analog Setting for each link clock range
+  USHORT usPhyPllRegListOffset;          // offset of CLOCK_CONDITION_REGESTER_INFO* with Phy Pll register Info 
+  USHORT usPhyPllSettingOffset;          // offset of CLOCK_CONDITION_SETTING_ENTRY* with Phy Pll Settings
+}DIG_TRANSMITTER_INFO_HEADER_V3_1;
+
+typedef struct _CLOCK_CONDITION_REGESTER_INFO{
+  USHORT usRegisterIndex;
+  UCHAR  ucStartBit;
+  UCHAR  ucEndBit;
+}CLOCK_CONDITION_REGESTER_INFO;
+
+typedef struct _CLOCK_CONDITION_SETTING_ENTRY{
+  USHORT usMaxClockFreq;
+  UCHAR  ucEncodeMode;
+  UCHAR  ucPhySel;
+  ULONG  ulAnalogSetting[1];
+}CLOCK_CONDITION_SETTING_ENTRY;
+
+typedef struct _CLOCK_CONDITION_SETTING_INFO{
+  USHORT usEntrySize;
+  CLOCK_CONDITION_SETTING_ENTRY asClkCondSettingEntry[1];
+}CLOCK_CONDITION_SETTING_INFO;
+
+typedef struct _PHY_CONDITION_REG_VAL{
+  ULONG  ulCondition;
+  ULONG  ulRegVal;
+}PHY_CONDITION_REG_VAL;
+
+typedef struct _PHY_CONDITION_REG_INFO{
+  USHORT usRegIndex;
+  USHORT usSize;
+  PHY_CONDITION_REG_VAL asRegVal[1];
+}PHY_CONDITION_REG_INFO;
+
+typedef struct _PHY_ANALOG_SETTING_INFO{
+  UCHAR  ucEncodeMode;
+  UCHAR  ucPhySel;
+  USHORT usSize;
+  PHY_CONDITION_REG_INFO  asAnalogSetting[1];
+}PHY_ANALOG_SETTING_INFO;
+
 /****************************************************************************/	
 //Portion VI: Definitinos for vbios MC scratch registers that driver used
 /****************************************************************************/
@@ -6497,6 +7270,8 @@
 #define ATOM_PP_THERMALCONTROLLER_EMC2103   13  /* 0x0D */ // Only fan control will be implemented, do NOT show this in PPGen.
 #define ATOM_PP_THERMALCONTROLLER_SUMO      14  /* 0x0E */ // Sumo type, used internally
 #define ATOM_PP_THERMALCONTROLLER_NISLANDS  15
+#define ATOM_PP_THERMALCONTROLLER_SISLANDS  16
+#define ATOM_PP_THERMALCONTROLLER_LM96163   17
 
 // Thermal controller 'combo type' to use an external controller for Fan control and an internal controller for thermal.
 // We probably should reserve the bit 0x80 for this use.
@@ -6512,6 +7287,7 @@
     UCHAR ucClockStateIndices[1]; // variable-sized
 } ATOM_PPLIB_STATE;
 
+
 typedef struct _ATOM_PPLIB_FANTABLE
 {
     UCHAR   ucFanTableFormat;                // Change this if the table format changes or version changes so that the other fields are not the same.
@@ -6524,12 +7300,20 @@
     USHORT  usPWMHigh;                       // The PWM value at THigh.
 } ATOM_PPLIB_FANTABLE;
 
+typedef struct _ATOM_PPLIB_FANTABLE2
+{
+    ATOM_PPLIB_FANTABLE basicTable;
+    USHORT  usTMax;                          // The max temperature
+} ATOM_PPLIB_FANTABLE2;
+
 typedef struct _ATOM_PPLIB_EXTENDEDHEADER
 {
     USHORT  usSize;
     ULONG   ulMaxEngineClock;   // For Overdrive.
     ULONG   ulMaxMemoryClock;   // For Overdrive.
     // Add extra system parameters here, always adjust size to include all fields.
+    USHORT  usVCETableOffset; //points to ATOM_PPLIB_VCE_Table
+    USHORT  usUVDTableOffset;   //points to ATOM_PPLIB_UVD_Table
 } ATOM_PPLIB_EXTENDEDHEADER;
 
 //// ATOM_PPLIB_POWERPLAYTABLE::ulPlatformCaps
@@ -6552,6 +7336,7 @@
 #define ATOM_PP_PLATFORM_CAP_REGULATOR_HOT 0x00010000               // Enable the 'regulator hot' feature.
 #define ATOM_PP_PLATFORM_CAP_BACO          0x00020000               // Does the driver supports BACO state.
 
+
 typedef struct _ATOM_PPLIB_POWERPLAYTABLE
 {
       ATOM_COMMON_TABLE_HEADER sHeader;
@@ -6610,7 +7395,8 @@
     USHORT                     usVddciDependencyOnMCLKOffset;
     USHORT                     usVddcDependencyOnMCLKOffset;
     USHORT                     usMaxClockVoltageOnDCOffset;
-    USHORT                     usReserved[2];  
+    USHORT                     usVddcPhaseShedLimitsTableOffset;    // Points to ATOM_PPLIB_PhaseSheddingLimits_Table
+    USHORT                     usReserved;  
 } ATOM_PPLIB_POWERPLAYTABLE4, *LPATOM_PPLIB_POWERPLAYTABLE4;
 
 typedef struct _ATOM_PPLIB_POWERPLAYTABLE5
@@ -6620,8 +7406,9 @@
     ULONG                      ulNearTDPLimit;
     ULONG                      ulSQRampingThreshold;
     USHORT                     usCACLeakageTableOffset;         // Points to ATOM_PPLIB_CAC_Leakage_Table
-    ULONG                      ulCACLeakage;                    // TBD, this parameter is still under discussion.  Change to ulReserved if not needed.
-    ULONG                      ulReserved;
+    ULONG                      ulCACLeakage;                    // The iLeakage for driver calculated CAC leakage table
+    USHORT                     usTDPODLimit;
+    USHORT                     usLoadLineSlope;                 // in milliOhms * 100
 } ATOM_PPLIB_POWERPLAYTABLE5, *LPATOM_PPLIB_POWERPLAYTABLE5;
 
 //// ATOM_PPLIB_NONCLOCK_INFO::usClassification
@@ -6650,6 +7437,7 @@
 //// ATOM_PPLIB_NONCLOCK_INFO::usClassification2
 #define ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2     0x0001
 #define ATOM_PPLIB_CLASSIFICATION2_ULV                      0x0002
+#define ATOM_PPLIB_CLASSIFICATION2_MVC                      0x0004   //Multi-View Codec (BD-3D)
 
 //// ATOM_PPLIB_NONCLOCK_INFO::ulCapsAndSettings
 #define ATOM_PPLIB_SINGLE_DISPLAY_ONLY           0x00000001
@@ -6673,7 +7461,9 @@
 
 #define ATOM_PPLIB_SOFTWARE_DISABLE_LOADBALANCING        0x00001000
 #define ATOM_PPLIB_SOFTWARE_ENABLE_SLEEP_FOR_TIMESTAMPS  0x00002000
-#define ATOM_PPLIB_DISALLOW_ON_DC                        0x00004000
+
+#define ATOM_PPLIB_DISALLOW_ON_DC                       0x00004000
+
 #define ATOM_PPLIB_ENABLE_VARIBRIGHT                     0x00008000
 
 //memory related flags
@@ -6735,7 +7525,7 @@
 #define ATOM_PPLIB_R600_FLAGS_UVDSAFE           2
 #define ATOM_PPLIB_R600_FLAGS_BACKBIASENABLE    4
 #define ATOM_PPLIB_R600_FLAGS_MEMORY_ODT_OFF    8
-#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF    16
+#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF   16
 #define ATOM_PPLIB_R600_FLAGS_LOWPOWER         32   // On the RV770 use 'low power' setting (sequencer S0).
 
 typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO
@@ -6754,6 +7544,24 @@
 
 } ATOM_PPLIB_EVERGREEN_CLOCK_INFO;
 
+typedef struct _ATOM_PPLIB_SI_CLOCK_INFO
+{
+      USHORT usEngineClockLow;
+      UCHAR  ucEngineClockHigh;
+
+      USHORT usMemoryClockLow;
+      UCHAR  ucMemoryClockHigh;
+
+      USHORT usVDDC;
+      USHORT usVDDCI;
+      UCHAR  ucPCIEGen;
+      UCHAR  ucUnused1;
+
+      ULONG ulFlags; // ATOM_PPLIB_SI_FLAGS_*, no flag is necessary for now
+
+} ATOM_PPLIB_SI_CLOCK_INFO;
+
+
 typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO
 
 {
@@ -6766,7 +7574,7 @@
       UCHAR  ucPadding;                   // For proper alignment and size.
       USHORT usVDDC;                      // For the 780, use: None, Low, High, Variable
       UCHAR  ucMaxHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}
-      UCHAR  ucMinHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requirement.
+      UCHAR  ucMinHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requriement.
       USHORT usHTLinkFreq;                // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200).
       ULONG  ulFlags; 
 } ATOM_PPLIB_RS780_CLOCK_INFO;
@@ -6788,9 +7596,7 @@
       USHORT usEngineClockLow;  //clockfrequency & 0xFFFF. The unit is in 10khz
       UCHAR  ucEngineClockHigh; //clockfrequency >> 16. 
       UCHAR  vddcIndex;         //2-bit vddc index;
-      UCHAR  leakage;          //please use 8-bit absolute value, not the 6-bit % value 
-      //please initalize to 0
-      UCHAR  rsv;
+      USHORT tdpLimit;
       //please initalize to 0
       USHORT rsv1;
       //please initialize to 0s
@@ -6813,7 +7619,7 @@
       UCHAR clockInfoIndex[1];
 } ATOM_PPLIB_STATE_V2;
 
-typedef struct StateArray{
+typedef struct _StateArray{
     //how many states we have 
     UCHAR ucNumEntries;
     
@@ -6821,18 +7627,17 @@
 }StateArray;
 
 
-typedef struct ClockInfoArray{
+typedef struct _ClockInfoArray{
     //how many clock levels we have
     UCHAR ucNumEntries;
     
-    //sizeof(ATOM_PPLIB_SUMO_CLOCK_INFO)
+    //sizeof(ATOM_PPLIB_CLOCK_INFO)
     UCHAR ucEntrySize;
     
-    //this is for Sumo
-    ATOM_PPLIB_SUMO_CLOCK_INFO clockInfo[1];
+    UCHAR clockInfo[1];
 }ClockInfoArray;
 
-typedef struct NonClockInfoArray{
+typedef struct _NonClockInfoArray{
 
     //how many non-clock levels we have. normally should be same as number of states
     UCHAR ucNumEntries;
@@ -6871,6 +7676,124 @@
     ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1];                  // Dynamically allocate entries.
 }ATOM_PPLIB_Clock_Voltage_Limit_Table;
 
+typedef struct _ATOM_PPLIB_CAC_Leakage_Record
+{
+    USHORT usVddc;  // We use this field for the "fake" standardized VDDC for power calculations                                                  
+    ULONG  ulLeakageValue;
+}ATOM_PPLIB_CAC_Leakage_Record;
+
+typedef struct _ATOM_PPLIB_CAC_Leakage_Table
+{
+    UCHAR ucNumEntries;                                                 // Number of entries.
+    ATOM_PPLIB_CAC_Leakage_Record entries[1];                           // Dynamically allocate entries.
+}ATOM_PPLIB_CAC_Leakage_Table;
+
+typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
+{
+    USHORT usVoltage;
+    USHORT usSclkLow;
+    UCHAR  ucSclkHigh;
+    USHORT usMclkLow;
+    UCHAR  ucMclkHigh;
+}ATOM_PPLIB_PhaseSheddingLimits_Record;
+
+typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table
+{
+    UCHAR ucNumEntries;                                                 // Number of entries.
+    ATOM_PPLIB_PhaseSheddingLimits_Record entries[1];                   // Dynamically allocate entries.
+}ATOM_PPLIB_PhaseSheddingLimits_Table;
+
+typedef struct _VCEClockInfo{
+    USHORT usEVClkLow;
+    UCHAR  ucEVClkHigh;
+    USHORT usECClkLow;
+    UCHAR  ucECClkHigh;
+}VCEClockInfo;
+
+typedef struct _VCEClockInfoArray{
+    UCHAR ucNumEntries;
+    VCEClockInfo entries[1];
+}VCEClockInfoArray;
+
+typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record
+{
+    USHORT usVoltage;
+    UCHAR  ucVCEClockInfoIndex;
+}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1];
+}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table;
+
+typedef struct _ATOM_PPLIB_VCE_State_Record
+{
+    UCHAR  ucVCEClockInfoIndex;
+    UCHAR  ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary
+}ATOM_PPLIB_VCE_State_Record;
+
+typedef struct _ATOM_PPLIB_VCE_State_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_VCE_State_Record entries[1];
+}ATOM_PPLIB_VCE_State_Table;
+
+
+typedef struct _ATOM_PPLIB_VCE_Table
+{
+      UCHAR revid;
+//    VCEClockInfoArray array;
+//    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table limits;
+//    ATOM_PPLIB_VCE_State_Table states;
+}ATOM_PPLIB_VCE_Table;
+
+
+typedef struct _UVDClockInfo{
+    USHORT usVClkLow;
+    UCHAR  ucVClkHigh;
+    USHORT usDClkLow;
+    UCHAR  ucDClkHigh;
+}UVDClockInfo;
+
+typedef struct _UVDClockInfoArray{
+    UCHAR ucNumEntries;
+    UVDClockInfo entries[1];
+}UVDClockInfoArray;
+
+typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record
+{
+    USHORT usVoltage;
+    UCHAR  ucUVDClockInfoIndex;
+}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1];
+}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table;
+
+typedef struct _ATOM_PPLIB_UVD_State_Record
+{
+    UCHAR  ucUVDClockInfoIndex;
+    UCHAR  ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary
+}ATOM_PPLIB_UVD_State_Record;
+
+typedef struct _ATOM_PPLIB_UVD_State_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_UVD_State_Record entries[1];
+}ATOM_PPLIB_UVD_State_Table;
+
+
+typedef struct _ATOM_PPLIB_UVD_Table
+{
+      UCHAR revid;
+//    UVDClockInfoArray array;
+//    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table limits;
+//    ATOM_PPLIB_UVD_State_Table states;
+}ATOM_PPLIB_UVD_Table;
+
 /**************************************************************************/
 
 
@@ -7020,4 +7943,68 @@
 
 #pragma pack() // BIOS data must use byte aligment
 
+//
+// AMD ACPI Table
+//
+#pragma pack(1)
+
+typedef struct {
+  ULONG Signature;
+  ULONG TableLength;      //Length
+  UCHAR Revision;
+  UCHAR Checksum;
+  UCHAR OemId[6];
+  UCHAR OemTableId[8];    //UINT64  OemTableId;
+  ULONG OemRevision;
+  ULONG CreatorId;
+  ULONG CreatorRevision;
+} AMD_ACPI_DESCRIPTION_HEADER;
+/*
+//EFI_ACPI_DESCRIPTION_HEADER from AcpiCommon.h
+typedef struct {
+  UINT32  Signature;       //0x0
+  UINT32  Length;          //0x4
+  UINT8   Revision;        //0x8
+  UINT8   Checksum;        //0x9
+  UINT8   OemId[6];        //0xA
+  UINT64  OemTableId;      //0x10
+  UINT32  OemRevision;     //0x18
+  UINT32  CreatorId;       //0x1C
+  UINT32  CreatorRevision; //0x20
+}EFI_ACPI_DESCRIPTION_HEADER;
+*/
+typedef struct {
+  AMD_ACPI_DESCRIPTION_HEADER SHeader;
+  UCHAR TableUUID[16];    //0x24
+  ULONG VBIOSImageOffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the stucture.
+  ULONG Lib1ImageOffset;  //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the stucture.
+  ULONG Reserved[4];      //0x3C
+}UEFI_ACPI_VFCT;
+
+typedef struct {
+  ULONG  PCIBus;          //0x4C
+  ULONG  PCIDevice;       //0x50
+  ULONG  PCIFunction;     //0x54
+  USHORT VendorID;        //0x58
+  USHORT DeviceID;        //0x5A
+  USHORT SSVID;           //0x5C
+  USHORT SSID;            //0x5E
+  ULONG  Revision;        //0x60
+  ULONG  ImageLength;     //0x64
+}VFCT_IMAGE_HEADER;
+
+
+typedef struct {
+  VFCT_IMAGE_HEADER	VbiosHeader;
+  UCHAR	VbiosContent[1];
+}GOP_VBIOS_CONTENT;
+
+typedef struct {
+  VFCT_IMAGE_HEADER	Lib1Header;
+  UCHAR	Lib1Content[1];
+}GOP_LIB1_CONTENT;
+
+#pragma pack()
+
+
 #endif /* _ATOMBIOS_H */
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 72672ea..083b3ea 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -231,6 +231,22 @@
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
+static void atombios_powergate_crtc(struct drm_crtc *crtc, int state)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
+	ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucDispPipeId = radeon_crtc->crtc_id;
+	args.ucEnable = state;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
 void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
 	struct drm_device *dev = crtc->dev;
@@ -242,8 +258,11 @@
 		radeon_crtc->enabled = true;
 		/* adjust pm to dpms changes BEFORE enabling crtcs */
 		radeon_pm_compute_clocks(rdev);
+		/* disable crtc pair power gating before programming */
+		if (ASIC_IS_DCE6(rdev))
+			atombios_powergate_crtc(crtc, ATOM_DISABLE);
 		atombios_enable_crtc(crtc, ATOM_ENABLE);
-		if (ASIC_IS_DCE3(rdev))
+		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
 			atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
 		atombios_blank_crtc(crtc, ATOM_DISABLE);
 		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
@@ -255,10 +274,29 @@
 		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->enabled)
 			atombios_blank_crtc(crtc, ATOM_ENABLE);
-		if (ASIC_IS_DCE3(rdev))
+		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
 			atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
 		atombios_enable_crtc(crtc, ATOM_DISABLE);
 		radeon_crtc->enabled = false;
+		/* power gating is per-pair */
+		if (ASIC_IS_DCE6(rdev)) {
+			struct drm_crtc *other_crtc;
+			struct radeon_crtc *other_radeon_crtc;
+			list_for_each_entry(other_crtc, &rdev->ddev->mode_config.crtc_list, head) {
+				other_radeon_crtc = to_radeon_crtc(other_crtc);
+				if (((radeon_crtc->crtc_id == 0) && (other_radeon_crtc->crtc_id == 1)) ||
+				    ((radeon_crtc->crtc_id == 1) && (other_radeon_crtc->crtc_id == 0)) ||
+				    ((radeon_crtc->crtc_id == 2) && (other_radeon_crtc->crtc_id == 3)) ||
+				    ((radeon_crtc->crtc_id == 3) && (other_radeon_crtc->crtc_id == 2)) ||
+				    ((radeon_crtc->crtc_id == 4) && (other_radeon_crtc->crtc_id == 5)) ||
+				    ((radeon_crtc->crtc_id == 5) && (other_radeon_crtc->crtc_id == 4))) {
+					/* if both crtcs in the pair are off, enable power gating */
+					if (other_radeon_crtc->enabled == false)
+						atombios_powergate_crtc(crtc, ATOM_ENABLE);
+					break;
+				}
+			}
+		}
 		/* adjust pm to dpms changes AFTER disabling crtcs */
 		radeon_pm_compute_clocks(rdev);
 		break;
@@ -436,7 +474,7 @@
 			return;
 		}
 		args.v3.ucEnable = enable;
-		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK))
+		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE61(rdev))
 			args.v3.ucEnable = ATOM_DISABLE;
 	} else if (ASIC_IS_DCE4(rdev)) {
 		args.v2.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage);
@@ -699,7 +737,7 @@
 /* on DCE5, make sure the voltage is high enough to support the
  * required disp clk.
  */
-static void atombios_crtc_set_dcpll(struct radeon_device *rdev,
+static void atombios_crtc_set_disp_eng_pll(struct radeon_device *rdev,
 				    u32 dispclk)
 {
 	u8 frev, crev;
@@ -729,7 +767,12 @@
 			 * SetPixelClock provides the dividers
 			 */
 			args.v6.ulDispEngClkFreq = cpu_to_le32(dispclk);
-			args.v6.ucPpll = ATOM_DCPLL;
+			if (ASIC_IS_DCE61(rdev))
+				args.v6.ucPpll = ATOM_EXT_PLL1;
+			else if (ASIC_IS_DCE6(rdev))
+				args.v6.ucPpll = ATOM_PPLL0;
+			else
+				args.v6.ucPpll = ATOM_DCPLL;
 			break;
 		default:
 			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
@@ -1444,7 +1487,36 @@
 	struct drm_crtc *test_crtc;
 	uint32_t pll_in_use = 0;
 
-	if (ASIC_IS_DCE4(rdev)) {
+	if (ASIC_IS_DCE61(rdev)) {
+		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
+			if (test_encoder->crtc && (test_encoder->crtc == crtc)) {
+				struct radeon_encoder *test_radeon_encoder =
+					to_radeon_encoder(test_encoder);
+				struct radeon_encoder_atom_dig *dig =
+					test_radeon_encoder->enc_priv;
+
+				if ((test_radeon_encoder->encoder_id ==
+				     ENCODER_OBJECT_ID_INTERNAL_UNIPHY) &&
+				    (dig->linkb == false)) /* UNIPHY A uses PPLL2 */
+					return ATOM_PPLL2;
+			}
+		}
+		/* UNIPHY B/C/D/E/F */
+		list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+			struct radeon_crtc *radeon_test_crtc;
+
+			if (crtc == test_crtc)
+				continue;
+
+			radeon_test_crtc = to_radeon_crtc(test_crtc);
+			if ((radeon_test_crtc->pll_id == ATOM_PPLL0) ||
+			    (radeon_test_crtc->pll_id == ATOM_PPLL1))
+				pll_in_use |= (1 << radeon_test_crtc->pll_id);
+		}
+		if (!(pll_in_use & 4))
+			return ATOM_PPLL0;
+		return ATOM_PPLL1;
+	} else if (ASIC_IS_DCE4(rdev)) {
 		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
 			if (test_encoder->crtc && (test_encoder->crtc == crtc)) {
 				/* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock,
@@ -1483,10 +1555,12 @@
 
 }
 
-void radeon_atom_dcpll_init(struct radeon_device *rdev)
+void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev)
 {
 	/* always set DCPLL */
-	if (ASIC_IS_DCE4(rdev)) {
+	if (ASIC_IS_DCE6(rdev))
+		atombios_crtc_set_disp_eng_pll(rdev, rdev->clock.default_dispclk);
+	else if (ASIC_IS_DCE4(rdev)) {
 		struct radeon_atom_ss ss;
 		bool ss_enabled = radeon_atombios_get_asic_ss_info(rdev, &ss,
 								   ASIC_INTERNAL_SS_ON_DCPLL,
@@ -1494,7 +1568,7 @@
 		if (ss_enabled)
 			atombios_crtc_program_ss(rdev, ATOM_DISABLE, ATOM_DCPLL, &ss);
 		/* XXX: DCE5, make sure voltage, dispclk is high enough */
-		atombios_crtc_set_dcpll(rdev, rdev->clock.default_dispclk);
+		atombios_crtc_set_disp_eng_pll(rdev, rdev->clock.default_dispclk);
 		if (ss_enabled)
 			atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, &ss);
 	}
@@ -1572,6 +1646,8 @@
 static void atombios_crtc_disable(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_atom_ss ss;
 
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
@@ -1583,6 +1659,12 @@
 		atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
 					  0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
 		break;
+	case ATOM_PPLL0:
+		/* disable the ppll */
+		if (ASIC_IS_DCE61(rdev))
+			atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
+						  0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 191218a..6c62be2 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -63,12 +63,12 @@
 
 	memset(&args, 0, sizeof(args));
 
-	base = (unsigned char *)rdev->mode_info.atom_context->scratch;
+	base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1);
 
 	memcpy(base, send, send_bytes);
 
-	args.v1.lpAuxRequest = 0;
-	args.v1.lpDataOut = 16;
+	args.v1.lpAuxRequest = 0 + 4;
+	args.v1.lpDataOut = 16 + 4;
 	args.v1.ucDataOutLen = 0;
 	args.v1.ucChannelID = chan->rec.i2c_id;
 	args.v1.ucDelay = delay / 10;
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index b88c460..468b874 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -479,7 +479,7 @@
  * - 2 DIG encoder blocks.
  * DIG1/2 can drive UNIPHY0/1/2 link A or link B
  *
- * DCE 4.0/5.0
+ * DCE 4.0/5.0/6.0
  * - 3 DIG transmitter blocks UNIPHY0/1/2 (links A and B).
  * Supports up to 6 digital outputs
  * - 6 DIG encoder blocks.
@@ -495,7 +495,11 @@
  * - 3 DIG transmitter blocks UNIPHY0/1/2 (links A and B).
  * Supports up to 6 digital outputs
  * - 2 DIG encoder blocks.
+ * llano
  * DIG1/2 can drive UNIPHY0/1/2 link A or link B
+ * ontario
+ * DIG1 drives UNIPHY0/1/2 link A
+ * DIG2 drives UNIPHY0/1/2 link B
  *
  * Routing
  * crtc -> dig encoder -> UNIPHY/LVTMA (1 or 2 links)
@@ -703,6 +707,7 @@
 	DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2;
 	DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3;
 	DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5;
 };
 
 void
@@ -723,6 +728,7 @@
 	int connector_object_id = 0;
 	int igp_lane_info = 0;
 	int dig_encoder = dig->dig_encoder;
+	int hpd_id = RADEON_HPD_NONE;
 
 	if (action == ATOM_TRANSMITTER_ACTION_INIT) {
 		connector = radeon_get_connector_for_encoder_init(encoder);
@@ -738,6 +744,7 @@
 		struct radeon_connector_atom_dig *dig_connector =
 			radeon_connector->con_priv;
 
+		hpd_id = radeon_connector->hpd.hpd;
 		dp_clock = dig_connector->dp_clock;
 		dp_lane_count = dig_connector->dp_lane_count;
 		connector_object_id =
@@ -1003,6 +1010,60 @@
 					args.v4.acConfig.fDualLinkConnector = 1;
 			}
 			break;
+		case 5:
+			args.v5.ucAction = action;
+			if (is_dp)
+				args.v5.usSymClock = cpu_to_le16(dp_clock / 10);
+			else
+				args.v5.usSymClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+
+			switch (radeon_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				if (dig->linkb)
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYB;
+				else
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYA;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+				if (dig->linkb)
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYD;
+				else
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYC;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				if (dig->linkb)
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYF;
+				else
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYE;
+				break;
+			}
+			if (is_dp)
+				args.v5.ucLaneNum = dp_lane_count;
+			else if (radeon_encoder->pixel_clock > 165000)
+				args.v5.ucLaneNum = 8;
+			else
+				args.v5.ucLaneNum = 4;
+			args.v5.ucConnObjId = connector_object_id;
+			args.v5.ucDigMode = atombios_get_encoder_mode(encoder);
+
+			if (is_dp && rdev->clock.dp_extclk)
+				args.v5.asConfig.ucPhyClkSrcId = ENCODER_REFCLK_SRC_EXTCLK;
+			else
+				args.v5.asConfig.ucPhyClkSrcId = pll_id;
+
+			if (is_dp)
+				args.v5.asConfig.ucCoherentMode = 1; /* DP requires coherent */
+			else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (dig->coherent_mode)
+					args.v5.asConfig.ucCoherentMode = 1;
+			}
+			if (hpd_id == RADEON_HPD_NONE)
+				args.v5.asConfig.ucHPDSel = 0;
+			else
+				args.v5.asConfig.ucHPDSel = hpd_id + 1;
+			args.v5.ucDigEncoderSel = 1 << dig_encoder;
+			args.v5.ucDPLaneSet = lane_set;
+			break;
 		default:
 			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
 			break;
@@ -1377,7 +1438,7 @@
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 	default:
-		if (ASIC_IS_DCE41(rdev)) {
+		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) {
 			atombios_external_encoder_setup(encoder, ext_encoder,
 							EXTERNAL_ENCODER_ACTION_V3_ENABLE_OUTPUT);
 			atombios_external_encoder_setup(encoder, ext_encoder,
@@ -1388,7 +1449,7 @@
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		if (ASIC_IS_DCE41(rdev)) {
+		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) {
 			atombios_external_encoder_setup(encoder, ext_encoder,
 							EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING);
 			atombios_external_encoder_setup(encoder, ext_encoder,
@@ -1761,7 +1822,7 @@
 			break;
 		}
 
-		if (ext_encoder && ASIC_IS_DCE41(rdev))
+		if (ext_encoder && (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)))
 			atombios_external_encoder_setup(encoder, ext_encoder,
 							EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT);
 	}
@@ -1850,7 +1911,7 @@
 	}
 
 	if (ext_encoder) {
-		if (ASIC_IS_DCE41(rdev))
+		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
 			atombios_external_encoder_setup(encoder, ext_encoder,
 							EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
 		else
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 466db41..cfa372c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -581,7 +581,7 @@
 	return 0;
 }
 
-static u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev)
+u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev)
 {
 	u32 tmp = RREG32(MC_SHARED_CHMAP);
 
@@ -1328,7 +1328,10 @@
 			rdev->mc.vram_end >> 12);
 	}
 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, rdev->vram_scratch.gpu_addr >> 12);
-	if (rdev->flags & RADEON_IS_IGP) {
+	/* llano/ontario only */
+	if ((rdev->family == CHIP_PALM) ||
+	    (rdev->family == CHIP_SUMO) ||
+	    (rdev->family == CHIP_SUMO2)) {
 		tmp = RREG32(MC_FUS_VM_FB_OFFSET) & 0x000FFFFF;
 		tmp |= ((rdev->mc.vram_end >> 20) & 0xF) << 24;
 		tmp |= ((rdev->mc.vram_start >> 20) & 0xF) << 20;
@@ -1972,7 +1975,9 @@
 
 
 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
-	if (rdev->flags & RADEON_IS_IGP)
+	if ((rdev->family == CHIP_PALM) ||
+	    (rdev->family == CHIP_SUMO) ||
+	    (rdev->family == CHIP_SUMO2))
 		mc_arb_ramcfg = RREG32(FUS_MC_ARB_RAMCFG);
 	else
 		mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
@@ -2362,7 +2367,9 @@
 
 	/* Get VRAM informations */
 	rdev->mc.vram_is_ddr = true;
-	if (rdev->flags & RADEON_IS_IGP)
+	if ((rdev->family == CHIP_PALM) ||
+	    (rdev->family == CHIP_SUMO) ||
+	    (rdev->family == CHIP_SUMO2))
 		tmp = RREG32(FUS_MC_ARB_RAMCFG);
 	else
 		tmp = RREG32(MC_ARB_RAMCFG);
@@ -2394,12 +2401,14 @@
 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
 	/* Setup GPU memory space */
-	if (rdev->flags & RADEON_IS_IGP) {
+	if ((rdev->family == CHIP_PALM) ||
+	    (rdev->family == CHIP_SUMO) ||
+	    (rdev->family == CHIP_SUMO2)) {
 		/* size in bytes on fusion */
 		rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 		rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
 	} else {
-		/* size in MB on evergreen */
+		/* size in MB on evergreen/cayman/tn */
 		rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
 		rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
 	}
@@ -2557,7 +2566,9 @@
 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
 	}
 
-	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
+	/* only one DAC on DCE6 */
+	if (!ASIC_IS_DCE6(rdev))
+		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
 	WREG32(DACB_AUTODETECT_INT_CONTROL, 0);
 
 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 4e83fdc..222acd2 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -226,7 +226,7 @@
 		x1 = 1;
 	if (y2 == 0)
 		y1 = 1;
-	if (rdev->family == CHIP_CAYMAN) {
+	if (rdev->family >= CHIP_CAYMAN) {
 		if ((x2 == 1) && (y2 == 1))
 			x2 = 2;
 	}
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 160799c..a48ca53 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -42,6 +42,8 @@
 extern int evergreen_mc_init(struct radeon_device *rdev);
 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+extern void si_rlc_fini(struct radeon_device *rdev);
+extern int si_rlc_init(struct radeon_device *rdev);
 
 #define EVERGREEN_PFP_UCODE_SIZE 1120
 #define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -53,6 +55,8 @@
 #define CAYMAN_RLC_UCODE_SIZE 1024
 #define CAYMAN_MC_UCODE_SIZE 6037
 
+#define ARUBA_RLC_UCODE_SIZE 1536
+
 /* Firmware Names */
 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
 MODULE_FIRMWARE("radeon/BARTS_me.bin");
@@ -68,6 +72,9 @@
 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
+MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
+MODULE_FIRMWARE("radeon/ARUBA_me.bin");
+MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
 
 #define BTC_IO_MC_REGS_SIZE 29
 
@@ -326,6 +333,15 @@
 		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
 		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
 		break;
+	case CHIP_ARUBA:
+		chip_name = "ARUBA";
+		rlc_chip_name = "ARUBA";
+		/* pfp/me same size as CAYMAN */
+		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
+		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
+		rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
+		mc_req_size = 0;
+		break;
 	default: BUG();
 	}
 
@@ -365,15 +381,18 @@
 		err = -EINVAL;
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
-	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
-	if (err)
-		goto out;
-	if (rdev->mc_fw->size != mc_req_size) {
-		printk(KERN_ERR
-		       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
-		       rdev->mc_fw->size, fw_name);
-		err = -EINVAL;
+	/* no MC ucode on TN */
+	if (!(rdev->flags & RADEON_IS_IGP)) {
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+		err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
+		if (err)
+			goto out;
+		if (rdev->mc_fw->size != mc_req_size) {
+			printk(KERN_ERR
+			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
+			       rdev->mc_fw->size, fw_name);
+			err = -EINVAL;
+		}
 	}
 out:
 	platform_device_unregister(pdev);
@@ -478,6 +497,7 @@
 	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * CAYMAN_MAX_PIPES);
 	switch (rdev->family) {
 	case CHIP_CAYMAN:
+	case CHIP_ARUBA:
 		force_no_swizzle = true;
 		break;
 	default:
@@ -610,7 +630,6 @@
 
 	switch (rdev->family) {
 	case CHIP_CAYMAN:
-	default:
 		rdev->config.cayman.max_shader_engines = 2;
 		rdev->config.cayman.max_pipes_per_simd = 4;
 		rdev->config.cayman.max_tile_pipes = 8;
@@ -632,6 +651,43 @@
 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
 		break;
+	case CHIP_ARUBA:
+	default:
+		rdev->config.cayman.max_shader_engines = 1;
+		rdev->config.cayman.max_pipes_per_simd = 4;
+		rdev->config.cayman.max_tile_pipes = 2;
+		if ((rdev->pdev->device == 0x9900) ||
+		    (rdev->pdev->device == 0x9901)) {
+			rdev->config.cayman.max_simds_per_se = 6;
+			rdev->config.cayman.max_backends_per_se = 2;
+		} else if ((rdev->pdev->device == 0x9903) ||
+			   (rdev->pdev->device == 0x9904)) {
+			rdev->config.cayman.max_simds_per_se = 4;
+			rdev->config.cayman.max_backends_per_se = 2;
+		} else if ((rdev->pdev->device == 0x9990) ||
+			   (rdev->pdev->device == 0x9991)) {
+			rdev->config.cayman.max_simds_per_se = 3;
+			rdev->config.cayman.max_backends_per_se = 1;
+		} else {
+			rdev->config.cayman.max_simds_per_se = 2;
+			rdev->config.cayman.max_backends_per_se = 1;
+		}
+		rdev->config.cayman.max_texture_channel_caches = 2;
+		rdev->config.cayman.max_gprs = 256;
+		rdev->config.cayman.max_threads = 256;
+		rdev->config.cayman.max_gs_threads = 32;
+		rdev->config.cayman.max_stack_entries = 512;
+		rdev->config.cayman.sx_num_of_sets = 8;
+		rdev->config.cayman.sx_max_export_size = 256;
+		rdev->config.cayman.sx_max_export_pos_size = 64;
+		rdev->config.cayman.sx_max_export_smx_size = 192;
+		rdev->config.cayman.max_hw_contexts = 8;
+		rdev->config.cayman.sq_num_cf_insts = 2;
+
+		rdev->config.cayman.sc_prim_fifo_size = 0x40;
+		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
+		break;
 	}
 
 	/* Initialize HDP */
@@ -652,7 +708,9 @@
 
 	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
 	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
-	cgts_tcc_disable = 0xff000000;
+	cgts_tcc_disable = 0xffff0000;
+	for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
+		cgts_tcc_disable &= ~(1 << (16 + i));
 	gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
 	gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG);
 	cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
@@ -804,8 +862,13 @@
 		rdev->config.cayman.tile_config |= (3 << 0);
 		break;
 	}
-	rdev->config.cayman.tile_config |=
-		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
+
+	/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
+	if (rdev->flags & RADEON_IS_IGP)
+		rdev->config.evergreen.tile_config |= 1 << 4;
+	else
+		rdev->config.cayman.tile_config |=
+			((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
 	rdev->config.cayman.tile_config |=
 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
 	rdev->config.cayman.tile_config |=
@@ -1440,18 +1503,29 @@
 	/* enable pcie gen2 link */
 	evergreen_pcie_gen2_enable(rdev);
 
-	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
-		r = ni_init_microcode(rdev);
+	if (rdev->flags & RADEON_IS_IGP) {
+		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
+			r = ni_init_microcode(rdev);
+			if (r) {
+				DRM_ERROR("Failed to load firmware!\n");
+				return r;
+			}
+		}
+	} else {
+		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
+			r = ni_init_microcode(rdev);
+			if (r) {
+				DRM_ERROR("Failed to load firmware!\n");
+				return r;
+			}
+		}
+
+		r = ni_mc_load_microcode(rdev);
 		if (r) {
-			DRM_ERROR("Failed to load firmware!\n");
+			DRM_ERROR("Failed to load MC firmware!\n");
 			return r;
 		}
 	}
-	r = ni_mc_load_microcode(rdev);
-	if (r) {
-		DRM_ERROR("Failed to load MC firmware!\n");
-		return r;
-	}
 
 	r = r600_vram_scratch_init(rdev);
 	if (r)
@@ -1470,6 +1544,15 @@
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
 
+	/* allocate rlc buffers */
+	if (rdev->flags & RADEON_IS_IGP) {
+		r = si_rlc_init(rdev);
+		if (r) {
+			DRM_ERROR("Failed to init rlc BOs!\n");
+			return r;
+		}
+	}
+
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
 	if (r)
@@ -1654,6 +1737,8 @@
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		cayman_cp_fini(rdev);
 		r600_irq_fini(rdev);
+		if (rdev->flags & RADEON_IS_IGP)
+			si_rlc_fini(rdev);
 		radeon_wb_fini(rdev);
 		r100_ib_fini(rdev);
 		radeon_vm_manager_fini(rdev);
@@ -1665,8 +1750,11 @@
 	/* Don't start up if the MC ucode is missing.
 	 * The default clocks and voltages before the MC ucode
 	 * is loaded are not suffient for advanced operations.
+	 *
+	 * We can skip this check for TN, because there is no MC
+	 * ucode.
 	 */
-	if (!rdev->mc_fw) {
+	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
 		return -EINVAL;
 	}
@@ -1679,6 +1767,8 @@
 	r600_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
+	if (rdev->flags & RADEON_IS_IGP)
+		si_rlc_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_vm_manager_fini(rdev);
 	r100_ib_fini(rdev);
@@ -1702,7 +1792,12 @@
 	/* number of VMs */
 	rdev->vm_manager.nvm = 8;
 	/* base offset of vram pages */
-	rdev->vm_manager.vram_base_offset = 0;
+	if (rdev->flags & RADEON_IS_IGP) {
+		u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
+		tmp <<= 22;
+		rdev->vm_manager.vram_base_offset = tmp;
+	} else
+		rdev->vm_manager.vram_base_offset = 0;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 9a7f3b6..2aa7046 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -106,6 +106,7 @@
 #define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 3)
 #define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 5)
 #define		ENABLE_ADVANCED_DRIVER_MODEL			(1 << 6)
+#define	FUS_MC_VM_FB_OFFSET				0x2068
 
 #define MC_SHARED_BLACKOUT_CNTL           		0x20ac
 #define	MC_ARB_RAMCFG					0x2760
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 5eb2382..391bd26 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -49,6 +49,7 @@
 #define EVERGREEN_PM4_UCODE_SIZE 1376
 #define EVERGREEN_RLC_UCODE_SIZE 768
 #define CAYMAN_RLC_UCODE_SIZE 1024
+#define ARUBA_RLC_UCODE_SIZE 1536
 
 /* Firmware Names */
 MODULE_FIRMWARE("radeon/R600_pfp.bin");
@@ -2778,7 +2779,7 @@
 	rdev->ih.rptr = 0;
 }
 
-static int r600_ih_ring_alloc(struct radeon_device *rdev)
+int r600_ih_ring_alloc(struct radeon_device *rdev)
 {
 	int r;
 
@@ -2814,7 +2815,7 @@
 	return 0;
 }
 
-static void r600_ih_ring_fini(struct radeon_device *rdev)
+void r600_ih_ring_fini(struct radeon_device *rdev)
 {
 	int r;
 	if (rdev->ih.ring_obj) {
@@ -2861,10 +2862,17 @@
 
 	r600_rlc_stop(rdev);
 
-	WREG32(RLC_HB_BASE, 0);
 	WREG32(RLC_HB_CNTL, 0);
-	WREG32(RLC_HB_RPTR, 0);
-	WREG32(RLC_HB_WPTR, 0);
+
+	if (rdev->family == CHIP_ARUBA) {
+		WREG32(TN_RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+		WREG32(TN_RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+	}
+	if (rdev->family <= CHIP_CAYMAN) {
+		WREG32(RLC_HB_BASE, 0);
+		WREG32(RLC_HB_RPTR, 0);
+		WREG32(RLC_HB_WPTR, 0);
+	}
 	if (rdev->family <= CHIP_CAICOS) {
 		WREG32(RLC_HB_WPTR_LSB_ADDR, 0);
 		WREG32(RLC_HB_WPTR_MSB_ADDR, 0);
@@ -2873,7 +2881,12 @@
 	WREG32(RLC_UCODE_CNTL, 0);
 
 	fw_data = (const __be32 *)rdev->rlc_fw->data;
-	if (rdev->family >= CHIP_CAYMAN) {
+	if (rdev->family >= CHIP_ARUBA) {
+		for (i = 0; i < ARUBA_RLC_UCODE_SIZE; i++) {
+			WREG32(RLC_UCODE_ADDR, i);
+			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
+		}
+	} else if (rdev->family >= CHIP_CAYMAN) {
 		for (i = 0; i < CAYMAN_RLC_UCODE_SIZE; i++) {
 			WREG32(RLC_UCODE_ADDR, i);
 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 8ae328f..3568a2e 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -593,6 +593,10 @@
 #define RLC_UCODE_ADDR                                    0x3f2c
 #define RLC_UCODE_DATA                                    0x3f30
 
+/* new for TN */
+#define TN_RLC_SAVE_AND_RESTORE_BASE                      0x3f10
+#define TN_RLC_CLEAR_STATE_RESTORE_BASE                   0x3f20
+
 #define SRBM_SOFT_RESET                                   0xe60
 #       define SOFT_RESET_RLC                             (1 << 13)
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d2870a0..138b952 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -236,12 +236,12 @@
 void radeon_combios_get_power_modes(struct radeon_device *rdev);
 void radeon_atombios_get_power_modes(struct radeon_device *rdev);
 void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
-int radeon_atom_get_max_vddc(struct radeon_device *rdev, u16 *voltage);
 void rs690_pm_info(struct radeon_device *rdev);
 extern int rv6xx_get_temp(struct radeon_device *rdev);
 extern int rv770_get_temp(struct radeon_device *rdev);
 extern int evergreen_get_temp(struct radeon_device *rdev);
 extern int sumo_get_temp(struct radeon_device *rdev);
+extern int si_get_temp(struct radeon_device *rdev);
 extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
 				    unsigned *bankh, unsigned *mtaspect,
 				    unsigned *tile_split);
@@ -632,6 +632,7 @@
 	uint32_t		*ptr;
 	struct radeon_fence	*fence;
 	unsigned		vm_id;
+	bool			is_const_ib;
 };
 
 /*
@@ -771,6 +772,18 @@
 
 void r600_blit_suspend(struct radeon_device *rdev);
 
+/*
+ * SI RLC stuff
+ */
+struct si_rlc {
+	/* for power gating */
+	struct radeon_bo	*save_restore_obj;
+	uint64_t		save_restore_gpu_addr;
+	/* for clear state */
+	struct radeon_bo	*clear_state_obj;
+	uint64_t		clear_state_gpu_addr;
+};
+
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib **ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
@@ -836,7 +849,9 @@
 	int			chunk_ib_idx;
 	int			chunk_relocs_idx;
 	int			chunk_flags_idx;
+	int			chunk_const_ib_idx;
 	struct radeon_ib	*ib;
+	struct radeon_ib	*const_ib;
 	void			*track;
 	unsigned		family;
 	int			parser_error;
@@ -978,6 +993,7 @@
 	THERMAL_TYPE_EVERGREEN,
 	THERMAL_TYPE_SUMO,
 	THERMAL_TYPE_NI,
+	THERMAL_TYPE_SI,
 };
 
 struct radeon_voltage {
@@ -1369,6 +1385,37 @@
 	struct r100_gpu_lockup	lockup;
 };
 
+struct si_asic {
+	unsigned max_shader_engines;
+	unsigned max_pipes_per_simd;
+	unsigned max_tile_pipes;
+	unsigned max_simds_per_se;
+	unsigned max_backends_per_se;
+	unsigned max_texture_channel_caches;
+	unsigned max_gprs;
+	unsigned max_gs_threads;
+	unsigned max_hw_contexts;
+	unsigned sc_prim_fifo_size_frontend;
+	unsigned sc_prim_fifo_size_backend;
+	unsigned sc_hiz_tile_fifo_size;
+	unsigned sc_earlyz_tile_fifo_size;
+
+	unsigned num_shader_engines;
+	unsigned num_tile_pipes;
+	unsigned num_backends_per_se;
+	unsigned backend_disable_mask_per_asic;
+	unsigned backend_map;
+	unsigned num_texture_channel_caches;
+	unsigned mem_max_burst_length_bytes;
+	unsigned mem_row_size_in_kb;
+	unsigned shader_engine_tile_size;
+	unsigned num_gpus;
+	unsigned multi_gpu_tile_size;
+
+	unsigned tile_config;
+	struct r100_gpu_lockup	lockup;
+};
+
 union radeon_asic_config {
 	struct r300_asic	r300;
 	struct r100_asic	r100;
@@ -1376,6 +1423,7 @@
 	struct rv770_asic	rv770;
 	struct evergreen_asic	evergreen;
 	struct cayman_asic	cayman;
+	struct si_asic		si;
 };
 
 /*
@@ -1491,10 +1539,12 @@
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
 	const struct firmware *rlc_fw;	/* r6/700 RLC firmware */
 	const struct firmware *mc_fw;	/* NI MC firmware */
+	const struct firmware *ce_fw;	/* SI CE firmware */
 	struct r600_blit r600_blit;
 	struct r600_vram_scratch vram_scratch;
 	int msi_enabled; /* msi enabled */
 	struct r600_ih ih; /* r6/700 interrupt ring */
+	struct si_rlc rlc;
 	struct work_struct hotplug_work;
 	int num_crtc; /* number of crtcs */
 	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
@@ -1638,6 +1688,9 @@
 #define ASIC_IS_DCE41(rdev) ((rdev->family >= CHIP_PALM) && \
 			     (rdev->flags & RADEON_IS_IGP))
 #define ASIC_IS_DCE5(rdev) ((rdev->family >= CHIP_BARTS))
+#define ASIC_IS_DCE6(rdev) ((rdev->family >= CHIP_ARUBA))
+#define ASIC_IS_DCE61(rdev) ((rdev->family >= CHIP_ARUBA) && \
+			     (rdev->flags & RADEON_IS_IGP))
 
 /*
  * BIOS helpers.
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 479c89e..be4dc2f 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1408,6 +1408,200 @@
 	},
 };
 
+static struct radeon_asic trinity_asic = {
+	.init = &cayman_init,
+	.fini = &cayman_fini,
+	.suspend = &cayman_suspend,
+	.resume = &cayman_resume,
+	.gpu_is_lockup = &cayman_gpu_is_lockup,
+	.asic_reset = &cayman_asic_reset,
+	.vga_set_state = &r600_vga_set_state,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
+	.gui_idle = &r600_gui_idle,
+	.mc_wait_for_idle = &evergreen_mc_wait_for_idle,
+	.gart = {
+		.tlb_flush = &cayman_pcie_gart_tlb_flush,
+		.set_page = &rs600_gart_set_page,
+	},
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &cayman_ring_ib_execute,
+			.ib_parse = &evergreen_ib_parse,
+			.emit_fence = &cayman_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+			.cs_parse = &evergreen_cs_parse,
+			.ring_test = &r600_ring_test,
+			.ib_test = &r600_ib_test,
+		},
+		[CAYMAN_RING_TYPE_CP1_INDEX] = {
+			.ib_execute = &cayman_ring_ib_execute,
+			.ib_parse = &evergreen_ib_parse,
+			.emit_fence = &cayman_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+			.cs_parse = &evergreen_cs_parse,
+			.ring_test = &r600_ring_test,
+			.ib_test = &r600_ib_test,
+		},
+		[CAYMAN_RING_TYPE_CP2_INDEX] = {
+			.ib_execute = &cayman_ring_ib_execute,
+			.ib_parse = &evergreen_ib_parse,
+			.emit_fence = &cayman_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+			.cs_parse = &evergreen_cs_parse,
+			.ring_test = &r600_ring_test,
+			.ib_test = &r600_ib_test,
+		}
+	},
+	.irq = {
+		.set = &evergreen_irq_set,
+		.process = &evergreen_irq_process,
+	},
+	.display = {
+		.bandwidth_update = &dce6_bandwidth_update,
+		.get_vblank_counter = &evergreen_get_vblank_counter,
+		.wait_for_vblank = &dce4_wait_for_vblank,
+	},
+	.copy = {
+		.blit = &r600_copy_blit,
+		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = NULL,
+		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.copy = &r600_copy_blit,
+		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+	},
+	.surface = {
+		.set_reg = r600_set_surface_reg,
+		.clear_reg = r600_clear_surface_reg,
+	},
+	.hpd = {
+		.init = &evergreen_hpd_init,
+		.fini = &evergreen_hpd_fini,
+		.sense = &evergreen_hpd_sense,
+		.set_polarity = &evergreen_hpd_set_polarity,
+	},
+	.pm = {
+		.misc = &evergreen_pm_misc,
+		.prepare = &evergreen_pm_prepare,
+		.finish = &evergreen_pm_finish,
+		.init_profile = &sumo_pm_init_profile,
+		.get_dynpm_state = &r600_pm_get_dynpm_state,
+		.get_engine_clock = &radeon_atom_get_engine_clock,
+		.set_engine_clock = &radeon_atom_set_engine_clock,
+		.get_memory_clock = NULL,
+		.set_memory_clock = NULL,
+		.get_pcie_lanes = NULL,
+		.set_pcie_lanes = NULL,
+		.set_clock_gating = NULL,
+	},
+	.pflip = {
+		.pre_page_flip = &evergreen_pre_page_flip,
+		.page_flip = &evergreen_page_flip,
+		.post_page_flip = &evergreen_post_page_flip,
+	},
+};
+
+static const struct radeon_vm_funcs si_vm_funcs = {
+	.init = &si_vm_init,
+	.fini = &si_vm_fini,
+	.bind = &si_vm_bind,
+	.unbind = &si_vm_unbind,
+	.tlb_flush = &si_vm_tlb_flush,
+	.page_flags = &cayman_vm_page_flags,
+	.set_page = &cayman_vm_set_page,
+};
+
+static struct radeon_asic si_asic = {
+	.init = &si_init,
+	.fini = &si_fini,
+	.suspend = &si_suspend,
+	.resume = &si_resume,
+	.gpu_is_lockup = &si_gpu_is_lockup,
+	.asic_reset = &si_asic_reset,
+	.vga_set_state = &r600_vga_set_state,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
+	.gui_idle = &r600_gui_idle,
+	.mc_wait_for_idle = &evergreen_mc_wait_for_idle,
+	.gart = {
+		.tlb_flush = &si_pcie_gart_tlb_flush,
+		.set_page = &rs600_gart_set_page,
+	},
+	.ring = {
+		[RADEON_RING_TYPE_GFX_INDEX] = {
+			.ib_execute = &si_ring_ib_execute,
+			.ib_parse = &si_ib_parse,
+			.emit_fence = &si_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+			.cs_parse = NULL,
+			.ring_test = &r600_ring_test,
+			.ib_test = &r600_ib_test,
+		},
+		[CAYMAN_RING_TYPE_CP1_INDEX] = {
+			.ib_execute = &si_ring_ib_execute,
+			.ib_parse = &si_ib_parse,
+			.emit_fence = &si_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+			.cs_parse = NULL,
+			.ring_test = &r600_ring_test,
+			.ib_test = &r600_ib_test,
+		},
+		[CAYMAN_RING_TYPE_CP2_INDEX] = {
+			.ib_execute = &si_ring_ib_execute,
+			.ib_parse = &si_ib_parse,
+			.emit_fence = &si_fence_ring_emit,
+			.emit_semaphore = &r600_semaphore_ring_emit,
+			.cs_parse = NULL,
+			.ring_test = &r600_ring_test,
+			.ib_test = &r600_ib_test,
+		}
+	},
+	.irq = {
+		.set = &si_irq_set,
+		.process = &si_irq_process,
+	},
+	.display = {
+		.bandwidth_update = &dce6_bandwidth_update,
+		.get_vblank_counter = &evergreen_get_vblank_counter,
+		.wait_for_vblank = &dce4_wait_for_vblank,
+	},
+	.copy = {
+		.blit = NULL,
+		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.dma = NULL,
+		.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+		.copy = NULL,
+		.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+	},
+	.surface = {
+		.set_reg = r600_set_surface_reg,
+		.clear_reg = r600_clear_surface_reg,
+	},
+	.hpd = {
+		.init = &evergreen_hpd_init,
+		.fini = &evergreen_hpd_fini,
+		.sense = &evergreen_hpd_sense,
+		.set_polarity = &evergreen_hpd_set_polarity,
+	},
+	.pm = {
+		.misc = &evergreen_pm_misc,
+		.prepare = &evergreen_pm_prepare,
+		.finish = &evergreen_pm_finish,
+		.init_profile = &sumo_pm_init_profile,
+		.get_dynpm_state = &r600_pm_get_dynpm_state,
+		.get_engine_clock = &radeon_atom_get_engine_clock,
+		.set_engine_clock = &radeon_atom_set_engine_clock,
+		.get_memory_clock = &radeon_atom_get_memory_clock,
+		.set_memory_clock = &radeon_atom_set_memory_clock,
+		.get_pcie_lanes = NULL,
+		.set_pcie_lanes = NULL,
+		.set_clock_gating = NULL,
+	},
+	.pflip = {
+		.pre_page_flip = &evergreen_pre_page_flip,
+		.page_flip = &evergreen_page_flip,
+		.post_page_flip = &evergreen_post_page_flip,
+	},
+};
+
 int radeon_asic_init(struct radeon_device *rdev)
 {
 	radeon_register_accessor_init(rdev);
@@ -1525,6 +1719,20 @@
 		rdev->num_crtc = 6;
 		rdev->vm_manager.funcs = &cayman_vm_funcs;
 		break;
+	case CHIP_ARUBA:
+		rdev->asic = &trinity_asic;
+		/* set num crtcs */
+		rdev->num_crtc = 4;
+		rdev->vm_manager.funcs = &cayman_vm_funcs;
+		break;
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+		rdev->asic = &si_asic;
+		/* set num crtcs */
+		rdev->num_crtc = 6;
+		rdev->vm_manager.funcs = &si_vm_funcs;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index b8f0a16..3d9f9f1 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -461,4 +461,29 @@
 			unsigned pfn, uint64_t addr, uint32_t flags);
 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 
+/* DCE6 - SI */
+void dce6_bandwidth_update(struct radeon_device *rdev);
+
+/*
+ * si
+ */
+void si_fence_ring_emit(struct radeon_device *rdev,
+			struct radeon_fence *fence);
+void si_pcie_gart_tlb_flush(struct radeon_device *rdev);
+int si_init(struct radeon_device *rdev);
+void si_fini(struct radeon_device *rdev);
+int si_suspend(struct radeon_device *rdev);
+int si_resume(struct radeon_device *rdev);
+bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
+int si_asic_reset(struct radeon_device *rdev);
+void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int si_irq_set(struct radeon_device *rdev);
+int si_irq_process(struct radeon_device *rdev);
+int si_vm_init(struct radeon_device *rdev);
+void si_vm_fini(struct radeon_device *rdev);
+int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
+void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
+void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
+int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 7354137..f6e69b8 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -56,6 +56,10 @@
 radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_enum,
 			  uint32_t supported_device);
 
+/* local */
+static int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type,
+				    u16 voltage_id, u16 *voltage);
+
 union atom_supported_devices {
 	struct _ATOM_SUPPORTED_DEVICES_INFO info;
 	struct _ATOM_SUPPORTED_DEVICES_INFO_2 info_2;
@@ -253,7 +257,9 @@
 
 	memset(&hpd, 0, sizeof(struct radeon_hpd));
 
-	if (ASIC_IS_DCE4(rdev))
+	if (ASIC_IS_DCE6(rdev))
+		reg = SI_DC_GPIO_HPD_A;
+	else if (ASIC_IS_DCE4(rdev))
 		reg = EVERGREEN_DC_GPIO_HPD_A;
 	else
 		reg = AVIVO_DC_GPIO_HPD_A;
@@ -1888,6 +1894,8 @@
 	"emc2103",
 	"Sumo",
 	"Northern Islands",
+	"Southern Islands",
+	"lm96163",
 };
 
 union power_info {
@@ -1904,6 +1912,7 @@
 	struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780;
 	struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen;
 	struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo;
+	struct _ATOM_PPLIB_SI_CLOCK_INFO si;
 };
 
 union pplib_power_state {
@@ -2161,6 +2170,11 @@
 				 (controller->ucFanParameters &
 				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
 			rdev->pm.int_thermal_type = THERMAL_TYPE_NI;
+		} else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_SISLANDS) {
+			DRM_INFO("Internal thermal controller %s fan control\n",
+				 (controller->ucFanParameters &
+				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
+			rdev->pm.int_thermal_type = THERMAL_TYPE_SI;
 		} else if ((controller->ucType ==
 			    ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) ||
 			   (controller->ucType ==
@@ -2281,6 +2295,7 @@
 						   union pplib_clock_info *clock_info)
 {
 	u32 sclk, mclk;
+	u16 vddc;
 
 	if (rdev->flags & RADEON_IS_IGP) {
 		if (rdev->family >= CHIP_PALM) {
@@ -2292,6 +2307,19 @@
 			sclk |= clock_info->rs780.ucLowEngineClockHigh << 16;
 			rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk;
 		}
+	} else if (ASIC_IS_DCE6(rdev)) {
+		sclk = le16_to_cpu(clock_info->si.usEngineClockLow);
+		sclk |= clock_info->si.ucEngineClockHigh << 16;
+		mclk = le16_to_cpu(clock_info->si.usMemoryClockLow);
+		mclk |= clock_info->si.ucMemoryClockHigh << 16;
+		rdev->pm.power_state[state_index].clock_info[mode_index].mclk = mclk;
+		rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk;
+		rdev->pm.power_state[state_index].clock_info[mode_index].voltage.type =
+			VOLTAGE_SW;
+		rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage =
+			le16_to_cpu(clock_info->si.usVDDC);
+		rdev->pm.power_state[state_index].clock_info[mode_index].voltage.vddci =
+			le16_to_cpu(clock_info->si.usVDDCI);
 	} else if (ASIC_IS_DCE4(rdev)) {
 		sclk = le16_to_cpu(clock_info->evergreen.usEngineClockLow);
 		sclk |= clock_info->evergreen.ucEngineClockHigh << 16;
@@ -2319,11 +2347,18 @@
 	}
 
 	/* patch up vddc if necessary */
-	if (rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage == 0xff01) {
-		u16 vddc;
-
-		if (radeon_atom_get_max_vddc(rdev, &vddc) == 0)
+	switch (rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage) {
+	case ATOM_VIRTUAL_VOLTAGE_ID0:
+	case ATOM_VIRTUAL_VOLTAGE_ID1:
+	case ATOM_VIRTUAL_VOLTAGE_ID2:
+	case ATOM_VIRTUAL_VOLTAGE_ID3:
+		if (radeon_atom_get_max_vddc(rdev, VOLTAGE_TYPE_VDDC,
+					     rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage,
+					     &vddc) == 0)
 			rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage = vddc;
+		break;
+	default:
+		break;
 	}
 
 	if (rdev->flags & RADEON_IS_IGP) {
@@ -2433,9 +2468,9 @@
 	int i, j, non_clock_array_index, clock_array_index;
 	int state_index = 0, mode_index = 0;
 	union pplib_clock_info *clock_info;
-	struct StateArray *state_array;
-	struct ClockInfoArray *clock_info_array;
-	struct NonClockInfoArray *non_clock_info_array;
+	struct _StateArray *state_array;
+	struct _ClockInfoArray *clock_info_array;
+	struct _NonClockInfoArray *non_clock_info_array;
 	bool valid;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
@@ -2448,13 +2483,13 @@
 	power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
 
 	radeon_atombios_add_pplib_thermal_controller(rdev, &power_info->pplib.sThermalController);
-	state_array = (struct StateArray *)
+	state_array = (struct _StateArray *)
 		(mode_info->atom_context->bios + data_offset +
 		 le16_to_cpu(power_info->pplib.usStateArrayOffset));
-	clock_info_array = (struct ClockInfoArray *)
+	clock_info_array = (struct _ClockInfoArray *)
 		(mode_info->atom_context->bios + data_offset +
 		 le16_to_cpu(power_info->pplib.usClockInfoArrayOffset));
-	non_clock_info_array = (struct NonClockInfoArray *)
+	non_clock_info_array = (struct _NonClockInfoArray *)
 		(mode_info->atom_context->bios + data_offset +
 		 le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
 	rdev->pm.power_state = kzalloc(sizeof(struct radeon_power_state) *
@@ -2481,7 +2516,7 @@
 				if (clock_array_index >= clock_info_array->ucNumEntries)
 					continue;
 				clock_info = (union pplib_clock_info *)
-					&clock_info_array->clockInfo[clock_array_index];
+					&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
 				valid = radeon_atombios_parse_pplib_clock_info(rdev,
 									       state_index, mode_index,
 									       clock_info);
@@ -2638,6 +2673,7 @@
 	struct _SET_VOLTAGE_PS_ALLOCATION alloc;
 	struct _SET_VOLTAGE_PARAMETERS v1;
 	struct _SET_VOLTAGE_PARAMETERS_V2 v2;
+	struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
 };
 
 void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type)
@@ -2664,6 +2700,11 @@
 		args.v2.ucVoltageMode = SET_ASIC_VOLTAGE_MODE_SET_VOLTAGE;
 		args.v2.usVoltageLevel = cpu_to_le16(voltage_level);
 		break;
+	case 3:
+		args.v3.ucVoltageType = voltage_type;
+		args.v3.ucVoltageMode = ATOM_SET_VOLTAGE;
+		args.v3.usVoltageLevel = cpu_to_le16(voltage_level);
+		break;
 	default:
 		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
 		return;
@@ -2672,8 +2713,8 @@
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
-int radeon_atom_get_max_vddc(struct radeon_device *rdev,
-			     u16 *voltage)
+static int radeon_atom_get_max_vddc(struct radeon_device *rdev, u8 voltage_type,
+				    u16 voltage_id, u16 *voltage)
 {
 	union set_voltage args;
 	int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
@@ -2694,6 +2735,15 @@
 
 		*voltage = le16_to_cpu(args.v2.usVoltageLevel);
 		break;
+	case 3:
+		args.v3.ucVoltageType = voltage_type;
+		args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
+		args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
+
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+		*voltage = le16_to_cpu(args.v3.usVoltageLevel);
+		break;
 	default:
 		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 64774ac..bd05156 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1085,7 +1085,7 @@
 		    (radeon_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B))
 			return MODE_OK;
 		else if (radeon_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_A) {
-			if (0) {
+			if (ASIC_IS_DCE6(rdev)) {
 				/* HDMI 1.3+ supports max clock of 340 Mhz */
 				if (mode->clock > 340000)
 					return MODE_CLOCK_HIGH;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index d9d9f5a..5cac832 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -103,8 +103,13 @@
 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
 		break;
 	case RADEON_CS_RING_COMPUTE:
-		/* for now */
-		p->ring = RADEON_RING_TYPE_GFX_INDEX;
+		if (p->rdev->family >= CHIP_TAHITI) {
+			if (p->priority > 0)
+				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
+			else
+				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
+		} else
+			p->ring = RADEON_RING_TYPE_GFX_INDEX;
 		break;
 	}
 	return 0;
@@ -170,6 +175,7 @@
 	p->chunk_ib_idx = -1;
 	p->chunk_relocs_idx = -1;
 	p->chunk_flags_idx = -1;
+	p->chunk_const_ib_idx = -1;
 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (p->chunks_array == NULL) {
 		return -ENOMEM;
@@ -208,6 +214,12 @@
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
+		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
+			p->chunk_const_ib_idx = i;
+			/* zero length CONST IB isn't useful */
+			if (p->chunks[i].length_dw == 0)
+				return -EINVAL;
+		}
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
 			p->chunk_flags_idx = i;
 			/* zero length flags aren't useful */
@@ -246,6 +258,13 @@
 		return -EINVAL;
 	}
 
+	/* we only support VM on SI+ */
+	if ((p->rdev->family >= CHIP_TAHITI) &&
+	    ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
+		DRM_ERROR("VM required on SI+!\n");
+		return -EINVAL;
+	}
+
 	if (radeon_cs_get_ring(p, ring, priority))
 		return -EINVAL;
 
@@ -389,6 +408,32 @@
 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 		return 0;
 
+	if ((rdev->family >= CHIP_TAHITI) &&
+	    (parser->chunk_const_ib_idx != -1)) {
+		ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
+		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
+			DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
+			return -EINVAL;
+		}
+		r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
+				   ib_chunk->length_dw * 4);
+		if (r) {
+			DRM_ERROR("Failed to get const ib !\n");
+			return r;
+		}
+		parser->const_ib->is_const_ib = true;
+		parser->const_ib->length_dw = ib_chunk->length_dw;
+		/* Copy the packet into the IB */
+		if (DRM_COPY_FROM_USER(parser->const_ib->ptr, ib_chunk->user_ptr,
+				       ib_chunk->length_dw * 4)) {
+			return -EFAULT;
+		}
+		r = radeon_ring_ib_parse(rdev, parser->ring, parser->const_ib);
+		if (r) {
+			return r;
+		}
+	}
+
 	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
 	if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 		DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
@@ -424,11 +469,25 @@
 	if (r) {
 		DRM_ERROR("Failed to synchronize rings !\n");
 	}
+
+	if ((rdev->family >= CHIP_TAHITI) &&
+	    (parser->chunk_const_ib_idx != -1)) {
+		parser->const_ib->vm_id = vm->id;
+		/* ib pool is bind at 0 in virtual address space to gpu_addr is the
+		 * offset inside the pool bo
+		 */
+		parser->const_ib->gpu_addr = parser->const_ib->sa_bo.offset;
+		r = radeon_ib_schedule(rdev, parser->const_ib);
+		if (r)
+			goto out;
+	}
+
 	parser->ib->vm_id = vm->id;
 	/* ib pool is bind at 0 in virtual address space to gpu_addr is the
 	 * offset inside the pool bo
 	 */
 	parser->ib->gpu_addr = parser->ib->sa_bo.offset;
+	parser->ib->is_const_ib = false;
 	r = radeon_ib_schedule(rdev, parser->ib);
 out:
 	if (!r) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 49f7cb7..ea7df16e2 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -89,6 +89,10 @@
 	"TURKS",
 	"CAICOS",
 	"CAYMAN",
+	"ARUBA",
+	"TAHITI",
+	"PITCAIRN",
+	"VERDE",
 	"LAST",
 };
 
@@ -964,7 +968,7 @@
 	/* init dig PHYs, disp eng pll */
 	if (rdev->is_atom_bios) {
 		radeon_atom_encoder_init(rdev);
-		radeon_atom_dcpll_init(rdev);
+		radeon_atom_disp_eng_pll_init(rdev);
 	}
 	/* reset hpd state */
 	radeon_hpd_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 1ebcef2..8086c96 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1296,7 +1296,7 @@
 	/* init dig PHYs, disp eng pll */
 	if (rdev->is_atom_bios) {
 		radeon_atom_encoder_init(rdev);
-		radeon_atom_dcpll_init(rdev);
+		radeon_atom_disp_eng_pll_init(rdev);
 	}
 
 	/* initialize hpd */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 498d21d..ef7bb3f 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -56,9 +56,10 @@
  *   2.12.0 - RADEON_CS_KEEP_TILING_FLAGS
  *   2.13.0 - virtual memory support, streamout
  *   2.14.0 - add evergreen tiling informations
+ *   2.15.0 - add max_pipes query
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	14
+#define KMS_DRIVER_MINOR	15
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 26e9270..7467069 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -307,6 +307,8 @@
 bool radeon_dig_monitor_is_duallink(struct drm_encoder *encoder,
 				    u32 pixel_clock)
 {
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector;
 	struct radeon_connector_atom_dig *dig_connector;
@@ -324,7 +326,7 @@
 	case DRM_MODE_CONNECTOR_HDMIB:
 		if (radeon_connector->use_digital) {
 			/* HDMI 1.3 supports up to 340 Mhz over single link */
-			if (0 && drm_detect_hdmi_monitor(radeon_connector->edid)) {
+			if (ASIC_IS_DCE6(rdev) && drm_detect_hdmi_monitor(radeon_connector->edid)) {
 				if (pixel_clock > 340000)
 					return true;
 				else
@@ -346,7 +348,7 @@
 			return false;
 		else {
 			/* HDMI 1.3 supports up to 340 Mhz over single link */
-			if (0 && drm_detect_hdmi_monitor(radeon_connector->edid)) {
+			if (ASIC_IS_DCE6(rdev) && drm_detect_hdmi_monitor(radeon_connector->edid)) {
 				if (pixel_clock > 340000)
 					return true;
 				else
diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h
index ec2f1ea..d1fafea 100644
--- a/drivers/gpu/drm/radeon/radeon_family.h
+++ b/drivers/gpu/drm/radeon/radeon_family.h
@@ -87,6 +87,10 @@
 	CHIP_TURKS,
 	CHIP_CAICOS,
 	CHIP_CAYMAN,
+	CHIP_ARUBA,
+	CHIP_TAHITI,
+	CHIP_PITCAIRN,
+	CHIP_VERDE,
 	CHIP_LAST,
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 1986eba..3c2628b 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -171,7 +171,9 @@
 		value = rdev->accel_working;
 		break;
 	case RADEON_INFO_TILING_CONFIG:
-		if (rdev->family >= CHIP_CAYMAN)
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.tile_config;
+		else if (rdev->family >= CHIP_CAYMAN)
 			value = rdev->config.cayman.tile_config;
 		else if (rdev->family >= CHIP_CEDAR)
 			value = rdev->config.evergreen.tile_config;
@@ -210,7 +212,10 @@
 		value = rdev->clock.spll.reference_freq * 10;
 		break;
 	case RADEON_INFO_NUM_BACKENDS:
-		if (rdev->family >= CHIP_CAYMAN)
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.max_backends_per_se *
+				rdev->config.si.max_shader_engines;
+		else if (rdev->family >= CHIP_CAYMAN)
 			value = rdev->config.cayman.max_backends_per_se *
 				rdev->config.cayman.max_shader_engines;
 		else if (rdev->family >= CHIP_CEDAR)
@@ -224,7 +229,9 @@
 		}
 		break;
 	case RADEON_INFO_NUM_TILE_PIPES:
-		if (rdev->family >= CHIP_CAYMAN)
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.max_tile_pipes;
+		else if (rdev->family >= CHIP_CAYMAN)
 			value = rdev->config.cayman.max_tile_pipes;
 		else if (rdev->family >= CHIP_CEDAR)
 			value = rdev->config.evergreen.max_tile_pipes;
@@ -240,7 +247,9 @@
 		value = 1;
 		break;
 	case RADEON_INFO_BACKEND_MAP:
-		if (rdev->family >= CHIP_CAYMAN)
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.backend_map;
+		else if (rdev->family >= CHIP_CAYMAN)
 			value = rdev->config.cayman.backend_map;
 		else if (rdev->family >= CHIP_CEDAR)
 			value = rdev->config.evergreen.backend_map;
@@ -264,6 +273,21 @@
 			return -EINVAL;
 		value = RADEON_IB_VM_MAX_SIZE;
 		break;
+	case RADEON_INFO_MAX_PIPES:
+		if (rdev->family >= CHIP_TAHITI)
+			value = rdev->config.si.max_pipes_per_simd;
+		else if (rdev->family >= CHIP_CAYMAN)
+			value = rdev->config.cayman.max_pipes_per_simd;
+		else if (rdev->family >= CHIP_CEDAR)
+			value = rdev->config.evergreen.max_pipes;
+		else if (rdev->family >= CHIP_RV770)
+			value = rdev->config.rv770.max_pipes;
+		else if (rdev->family >= CHIP_R600)
+			value = rdev->config.r600.max_pipes;
+		else {
+			return -EINVAL;
+		}
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 8a85598..f7eb5d8 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -491,7 +491,7 @@
 				    struct drm_connector *connector);
 extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode);
 extern void radeon_atom_encoder_init(struct radeon_device *rdev);
-extern void radeon_atom_dcpll_init(struct radeon_device *rdev);
+extern void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev);
 extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder,
 					   int action, uint8_t lane_num,
 					   uint8_t lane_set);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 3575129..caa55d6 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -474,6 +474,9 @@
 	case THERMAL_TYPE_SUMO:
 		temp = sumo_get_temp(rdev);
 		break;
+	case THERMAL_TYPE_SI:
+		temp = si_get_temp(rdev);
+		break;
 	default:
 		temp = 0;
 		break;
@@ -514,6 +517,10 @@
 	case THERMAL_TYPE_EVERGREEN:
 	case THERMAL_TYPE_NI:
 	case THERMAL_TYPE_SUMO:
+	case THERMAL_TYPE_SI:
+		/* No support for TN yet */
+		if (rdev->family == CHIP_ARUBA)
+			return err;
 		rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev);
 		if (IS_ERR(rdev->pm.int_hwmon_dev)) {
 			err = PTR_ERR(rdev->pm.int_hwmon_dev);
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index 5098634..5d8f735 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -56,6 +56,7 @@
 #include "r600_reg.h"
 #include "evergreen_reg.h"
 #include "ni_reg.h"
+#include "si_reg.h"
 
 #define RADEON_MC_AGP_LOCATION		0x014c
 #define		RADEON_MC_AGP_START_MASK	0x0000FFFF
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 3056620..cc33b3d 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -133,6 +133,7 @@
 				(*ib)->gpu_addr += (*ib)->sa_bo.offset;
 				(*ib)->fence = fence;
 				(*ib)->vm_id = 0;
+				(*ib)->is_const_ib = false;
 				/* ib are most likely to be allocated in a ring fashion
 				 * thus rdev->ib_pool.head_id should be the id of the
 				 * oldest ib
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
new file mode 100644
index 0000000..ac7a199
--- /dev/null
+++ b/drivers/gpu/drm/radeon/si.c
@@ -0,0 +1,4128 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "radeon_drm.h"
+#include "sid.h"
+#include "atom.h"
+#include "si_blit_shaders.h"
+
+#define SI_PFP_UCODE_SIZE 2144
+#define SI_PM4_UCODE_SIZE 2144
+#define SI_CE_UCODE_SIZE 2144
+#define SI_RLC_UCODE_SIZE 2048
+#define SI_MC_UCODE_SIZE 7769
+
+MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
+MODULE_FIRMWARE("radeon/TAHITI_me.bin");
+MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
+MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
+MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
+MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
+MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
+MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
+MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
+MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
+MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
+MODULE_FIRMWARE("radeon/VERDE_me.bin");
+MODULE_FIRMWARE("radeon/VERDE_ce.bin");
+MODULE_FIRMWARE("radeon/VERDE_mc.bin");
+MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
+
+extern int r600_ih_ring_alloc(struct radeon_device *rdev);
+extern void r600_ih_ring_fini(struct radeon_device *rdev);
+extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
+extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
+extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
+extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
+
+/* get temperature in millidegrees */
+int si_get_temp(struct radeon_device *rdev)
+{
+	u32 temp;
+	int actual_temp = 0;
+
+	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
+		CTF_TEMP_SHIFT;
+
+	if (temp & 0x200)
+		actual_temp = 255;
+	else
+		actual_temp = temp & 0x1ff;
+
+	actual_temp = (actual_temp * 1000);
+
+	return actual_temp;
+}
+
+#define TAHITI_IO_MC_REGS_SIZE 36
+
+static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
+	{0x0000006f, 0x03044000},
+	{0x00000070, 0x0480c018},
+	{0x00000071, 0x00000040},
+	{0x00000072, 0x01000000},
+	{0x00000074, 0x000000ff},
+	{0x00000075, 0x00143400},
+	{0x00000076, 0x08ec0800},
+	{0x00000077, 0x040000cc},
+	{0x00000079, 0x00000000},
+	{0x0000007a, 0x21000409},
+	{0x0000007c, 0x00000000},
+	{0x0000007d, 0xe8000000},
+	{0x0000007e, 0x044408a8},
+	{0x0000007f, 0x00000003},
+	{0x00000080, 0x00000000},
+	{0x00000081, 0x01000000},
+	{0x00000082, 0x02000000},
+	{0x00000083, 0x00000000},
+	{0x00000084, 0xe3f3e4f4},
+	{0x00000085, 0x00052024},
+	{0x00000087, 0x00000000},
+	{0x00000088, 0x66036603},
+	{0x00000089, 0x01000000},
+	{0x0000008b, 0x1c0a0000},
+	{0x0000008c, 0xff010000},
+	{0x0000008e, 0xffffefff},
+	{0x0000008f, 0xfff3efff},
+	{0x00000090, 0xfff3efbf},
+	{0x00000094, 0x00101101},
+	{0x00000095, 0x00000fff},
+	{0x00000096, 0x00116fff},
+	{0x00000097, 0x60010000},
+	{0x00000098, 0x10010000},
+	{0x00000099, 0x00006000},
+	{0x0000009a, 0x00001000},
+	{0x0000009f, 0x00a77400}
+};
+
+static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
+	{0x0000006f, 0x03044000},
+	{0x00000070, 0x0480c018},
+	{0x00000071, 0x00000040},
+	{0x00000072, 0x01000000},
+	{0x00000074, 0x000000ff},
+	{0x00000075, 0x00143400},
+	{0x00000076, 0x08ec0800},
+	{0x00000077, 0x040000cc},
+	{0x00000079, 0x00000000},
+	{0x0000007a, 0x21000409},
+	{0x0000007c, 0x00000000},
+	{0x0000007d, 0xe8000000},
+	{0x0000007e, 0x044408a8},
+	{0x0000007f, 0x00000003},
+	{0x00000080, 0x00000000},
+	{0x00000081, 0x01000000},
+	{0x00000082, 0x02000000},
+	{0x00000083, 0x00000000},
+	{0x00000084, 0xe3f3e4f4},
+	{0x00000085, 0x00052024},
+	{0x00000087, 0x00000000},
+	{0x00000088, 0x66036603},
+	{0x00000089, 0x01000000},
+	{0x0000008b, 0x1c0a0000},
+	{0x0000008c, 0xff010000},
+	{0x0000008e, 0xffffefff},
+	{0x0000008f, 0xfff3efff},
+	{0x00000090, 0xfff3efbf},
+	{0x00000094, 0x00101101},
+	{0x00000095, 0x00000fff},
+	{0x00000096, 0x00116fff},
+	{0x00000097, 0x60010000},
+	{0x00000098, 0x10010000},
+	{0x00000099, 0x00006000},
+	{0x0000009a, 0x00001000},
+	{0x0000009f, 0x00a47400}
+};
+
+static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
+	{0x0000006f, 0x03044000},
+	{0x00000070, 0x0480c018},
+	{0x00000071, 0x00000040},
+	{0x00000072, 0x01000000},
+	{0x00000074, 0x000000ff},
+	{0x00000075, 0x00143400},
+	{0x00000076, 0x08ec0800},
+	{0x00000077, 0x040000cc},
+	{0x00000079, 0x00000000},
+	{0x0000007a, 0x21000409},
+	{0x0000007c, 0x00000000},
+	{0x0000007d, 0xe8000000},
+	{0x0000007e, 0x044408a8},
+	{0x0000007f, 0x00000003},
+	{0x00000080, 0x00000000},
+	{0x00000081, 0x01000000},
+	{0x00000082, 0x02000000},
+	{0x00000083, 0x00000000},
+	{0x00000084, 0xe3f3e4f4},
+	{0x00000085, 0x00052024},
+	{0x00000087, 0x00000000},
+	{0x00000088, 0x66036603},
+	{0x00000089, 0x01000000},
+	{0x0000008b, 0x1c0a0000},
+	{0x0000008c, 0xff010000},
+	{0x0000008e, 0xffffefff},
+	{0x0000008f, 0xfff3efff},
+	{0x00000090, 0xfff3efbf},
+	{0x00000094, 0x00101101},
+	{0x00000095, 0x00000fff},
+	{0x00000096, 0x00116fff},
+	{0x00000097, 0x60010000},
+	{0x00000098, 0x10010000},
+	{0x00000099, 0x00006000},
+	{0x0000009a, 0x00001000},
+	{0x0000009f, 0x00a37400}
+};
+
+/* ucode loading */
+static int si_mc_load_microcode(struct radeon_device *rdev)
+{
+	const __be32 *fw_data;
+	u32 running, blackout = 0;
+	u32 *io_mc_regs;
+	int i, ucode_size, regs_size;
+
+	if (!rdev->mc_fw)
+		return -EINVAL;
+
+	switch (rdev->family) {
+	case CHIP_TAHITI:
+		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
+		ucode_size = SI_MC_UCODE_SIZE;
+		regs_size = TAHITI_IO_MC_REGS_SIZE;
+		break;
+	case CHIP_PITCAIRN:
+		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
+		ucode_size = SI_MC_UCODE_SIZE;
+		regs_size = TAHITI_IO_MC_REGS_SIZE;
+		break;
+	case CHIP_VERDE:
+	default:
+		io_mc_regs = (u32 *)&verde_io_mc_regs;
+		ucode_size = SI_MC_UCODE_SIZE;
+		regs_size = TAHITI_IO_MC_REGS_SIZE;
+		break;
+	}
+
+	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
+
+	if (running == 0) {
+		if (running) {
+			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
+			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
+		}
+
+		/* reset the engine and set to writable */
+		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
+		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
+
+		/* load mc io regs */
+		for (i = 0; i < regs_size; i++) {
+			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
+			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
+		}
+		/* load the MC ucode */
+		fw_data = (const __be32 *)rdev->mc_fw->data;
+		for (i = 0; i < ucode_size; i++)
+			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
+
+		/* put the engine back into the active state */
+		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
+		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
+		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
+
+		/* wait for training to complete */
+		for (i = 0; i < rdev->usec_timeout; i++) {
+			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
+				break;
+			udelay(1);
+		}
+		for (i = 0; i < rdev->usec_timeout; i++) {
+			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
+				break;
+			udelay(1);
+		}
+
+		if (running)
+			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
+	}
+
+	return 0;
+}
+
+static int si_init_microcode(struct radeon_device *rdev)
+{
+	struct platform_device *pdev;
+	const char *chip_name;
+	const char *rlc_chip_name;
+	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
+	char fw_name[30];
+	int err;
+
+	DRM_DEBUG("\n");
+
+	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
+	err = IS_ERR(pdev);
+	if (err) {
+		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
+		return -EINVAL;
+	}
+
+	switch (rdev->family) {
+	case CHIP_TAHITI:
+		chip_name = "TAHITI";
+		rlc_chip_name = "TAHITI";
+		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
+		me_req_size = SI_PM4_UCODE_SIZE * 4;
+		ce_req_size = SI_CE_UCODE_SIZE * 4;
+		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
+		mc_req_size = SI_MC_UCODE_SIZE * 4;
+		break;
+	case CHIP_PITCAIRN:
+		chip_name = "PITCAIRN";
+		rlc_chip_name = "PITCAIRN";
+		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
+		me_req_size = SI_PM4_UCODE_SIZE * 4;
+		ce_req_size = SI_CE_UCODE_SIZE * 4;
+		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
+		mc_req_size = SI_MC_UCODE_SIZE * 4;
+		break;
+	case CHIP_VERDE:
+		chip_name = "VERDE";
+		rlc_chip_name = "VERDE";
+		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
+		me_req_size = SI_PM4_UCODE_SIZE * 4;
+		ce_req_size = SI_CE_UCODE_SIZE * 4;
+		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
+		mc_req_size = SI_MC_UCODE_SIZE * 4;
+		break;
+	default: BUG();
+	}
+
+	DRM_INFO("Loading %s Microcode\n", chip_name);
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->pfp_fw->size != pfp_req_size) {
+		printk(KERN_ERR
+		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->pfp_fw->size, fw_name);
+		err = -EINVAL;
+		goto out;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->me_fw->size != me_req_size) {
+		printk(KERN_ERR
+		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->me_fw->size, fw_name);
+		err = -EINVAL;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
+	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->ce_fw->size != ce_req_size) {
+		printk(KERN_ERR
+		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->ce_fw->size, fw_name);
+		err = -EINVAL;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
+	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->rlc_fw->size != rlc_req_size) {
+		printk(KERN_ERR
+		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->rlc_fw->size, fw_name);
+		err = -EINVAL;
+	}
+
+	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
+	if (err)
+		goto out;
+	if (rdev->mc_fw->size != mc_req_size) {
+		printk(KERN_ERR
+		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
+		       rdev->mc_fw->size, fw_name);
+		err = -EINVAL;
+	}
+
+out:
+	platform_device_unregister(pdev);
+
+	if (err) {
+		if (err != -EINVAL)
+			printk(KERN_ERR
+			       "si_cp: Failed to load firmware \"%s\"\n",
+			       fw_name);
+		release_firmware(rdev->pfp_fw);
+		rdev->pfp_fw = NULL;
+		release_firmware(rdev->me_fw);
+		rdev->me_fw = NULL;
+		release_firmware(rdev->ce_fw);
+		rdev->ce_fw = NULL;
+		release_firmware(rdev->rlc_fw);
+		rdev->rlc_fw = NULL;
+		release_firmware(rdev->mc_fw);
+		rdev->mc_fw = NULL;
+	}
+	return err;
+}
+
+/* watermark setup */
+static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
+				   struct radeon_crtc *radeon_crtc,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *other_mode)
+{
+	u32 tmp;
+	/*
+	 * Line Buffer Setup
+	 * There are 3 line buffers, each one shared by 2 display controllers.
+	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
+	 * the display controllers.  The paritioning is done via one of four
+	 * preset allocations specified in bits 21:20:
+	 *  0 - half lb
+	 *  2 - whole lb, other crtc must be disabled
+	 */
+	/* this can get tricky if we have two large displays on a paired group
+	 * of crtcs.  Ideally for multiple large displays we'd assign them to
+	 * non-linked crtcs for maximum line buffer allocation.
+	 */
+	if (radeon_crtc->base.enabled && mode) {
+		if (other_mode)
+			tmp = 0; /* 1/2 */
+		else
+			tmp = 2; /* whole */
+	} else
+		tmp = 0;
+
+	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
+	       DC_LB_MEMORY_CONFIG(tmp));
+
+	if (radeon_crtc->base.enabled && mode) {
+		switch (tmp) {
+		case 0:
+		default:
+			return 4096 * 2;
+		case 2:
+			return 8192 * 2;
+		}
+	}
+
+	/* controller not enabled, so no lb used */
+	return 0;
+}
+
+static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32(MC_SHARED_CHMAP);
+
+	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+	case 0:
+	default:
+		return 1;
+	case 1:
+		return 2;
+	case 2:
+		return 4;
+	case 3:
+		return 8;
+	case 4:
+		return 3;
+	case 5:
+		return 6;
+	case 6:
+		return 10;
+	case 7:
+		return 12;
+	case 8:
+		return 16;
+	}
+}
+
+struct dce6_wm_params {
+	u32 dram_channels; /* number of dram channels */
+	u32 yclk;          /* bandwidth per dram data pin in kHz */
+	u32 sclk;          /* engine clock in kHz */
+	u32 disp_clk;      /* display clock in kHz */
+	u32 src_width;     /* viewport width */
+	u32 active_time;   /* active display time in ns */
+	u32 blank_time;    /* blank time in ns */
+	bool interlaced;    /* mode is interlaced */
+	fixed20_12 vsc;    /* vertical scale ratio */
+	u32 num_heads;     /* number of active crtcs */
+	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+	u32 lb_size;       /* line buffer allocated to pipe */
+	u32 vtaps;         /* vertical scaler taps */
+};
+
+static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate raw DRAM Bandwidth */
+	fixed20_12 dram_efficiency; /* 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	dram_efficiency.full = dfixed_const(7);
+	dram_efficiency.full = dfixed_div(dram_efficiency, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
+{
+	/* Calculate DRAM Bandwidth and the part allocated to display. */
+	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+	return dfixed_trunc(bandwidth);
+}
+
+static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the display Data return Bandwidth */
+	fixed20_12 return_efficiency; /* 0.8 */
+	fixed20_12 sclk, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	sclk.full = dfixed_const(wm->sclk);
+	sclk.full = dfixed_div(sclk, a);
+	a.full = dfixed_const(10);
+	return_efficiency.full = dfixed_const(8);
+	return_efficiency.full = dfixed_div(return_efficiency, a);
+	a.full = dfixed_const(32);
+	bandwidth.full = dfixed_mul(a, sclk);
+	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
+{
+	return 32;
+}
+
+static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the DMIF Request Bandwidth */
+	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+	fixed20_12 disp_clk, sclk, bandwidth;
+	fixed20_12 a, b1, b2;
+	u32 min_bandwidth;
+
+	a.full = dfixed_const(1000);
+	disp_clk.full = dfixed_const(wm->disp_clk);
+	disp_clk.full = dfixed_div(disp_clk, a);
+	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
+	b1.full = dfixed_mul(a, disp_clk);
+
+	a.full = dfixed_const(1000);
+	sclk.full = dfixed_const(wm->sclk);
+	sclk.full = dfixed_div(sclk, a);
+	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
+	b2.full = dfixed_mul(a, sclk);
+
+	a.full = dfixed_const(10);
+	disp_clk_request_efficiency.full = dfixed_const(8);
+	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
+
+	a.full = dfixed_const(min_bandwidth);
+	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
+	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
+	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
+
+	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the display mode Average Bandwidth
+	 * DisplayMode should contain the source and destination dimensions,
+	 * timing, etc.
+	 */
+	fixed20_12 bpp;
+	fixed20_12 line_time;
+	fixed20_12 src_width;
+	fixed20_12 bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+	line_time.full = dfixed_div(line_time, a);
+	bpp.full = dfixed_const(wm->bytes_per_pixel);
+	src_width.full = dfixed_const(wm->src_width);
+	bandwidth.full = dfixed_mul(src_width, bpp);
+	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+	bandwidth.full = dfixed_div(bandwidth, line_time);
+
+	return dfixed_trunc(bandwidth);
+}
+
+static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
+{
+	/* First calcualte the latency in ns */
+	u32 mc_latency = 2000; /* 2000 ns. */
+	u32 available_bandwidth = dce6_available_bandwidth(wm);
+	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+		(wm->num_heads * cursor_line_pair_return_time);
+	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+	u32 tmp, dmif_size = 12288;
+	fixed20_12 a, b, c;
+
+	if (wm->num_heads == 0)
+		return 0;
+
+	a.full = dfixed_const(2);
+	b.full = dfixed_const(1);
+	if ((wm->vsc.full > a.full) ||
+	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+	    (wm->vtaps >= 5) ||
+	    ((wm->vsc.full >= a.full) && wm->interlaced))
+		max_src_lines_per_dst_line = 4;
+	else
+		max_src_lines_per_dst_line = 2;
+
+	a.full = dfixed_const(available_bandwidth);
+	b.full = dfixed_const(wm->num_heads);
+	a.full = dfixed_div(a, b);
+
+	b.full = dfixed_const(mc_latency + 512);
+	c.full = dfixed_const(wm->disp_clk);
+	b.full = dfixed_div(b, c);
+
+	c.full = dfixed_const(dmif_size);
+	b.full = dfixed_div(c, b);
+
+	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
+
+	b.full = dfixed_const(1000);
+	c.full = dfixed_const(wm->disp_clk);
+	b.full = dfixed_div(c, b);
+	c.full = dfixed_const(wm->bytes_per_pixel);
+	b.full = dfixed_mul(b, c);
+
+	lb_fill_bw = min(tmp, dfixed_trunc(b));
+
+	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+	b.full = dfixed_const(1000);
+	c.full = dfixed_const(lb_fill_bw);
+	b.full = dfixed_div(c, b);
+	a.full = dfixed_div(a, b);
+	line_fill_time = dfixed_trunc(a);
+
+	if (line_fill_time < wm->active_time)
+		return latency;
+	else
+		return latency + (line_fill_time - wm->active_time);
+
+}
+
+static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
+{
+	if (dce6_average_bandwidth(wm) <=
+	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+};
+
+static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
+{
+	if (dce6_average_bandwidth(wm) <=
+	    (dce6_available_bandwidth(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+};
+
+static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
+{
+	u32 lb_partitions = wm->lb_size / wm->src_width;
+	u32 line_time = wm->active_time + wm->blank_time;
+	u32 latency_tolerant_lines;
+	u32 latency_hiding;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1);
+	if (wm->vsc.full > a.full)
+		latency_tolerant_lines = 1;
+	else {
+		if (lb_partitions <= (wm->vtaps + 1))
+			latency_tolerant_lines = 1;
+		else
+			latency_tolerant_lines = 2;
+	}
+
+	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+	if (dce6_latency_watermark(wm) <= latency_hiding)
+		return true;
+	else
+		return false;
+}
+
+static void dce6_program_watermarks(struct radeon_device *rdev,
+					 struct radeon_crtc *radeon_crtc,
+					 u32 lb_size, u32 num_heads)
+{
+	struct drm_display_mode *mode = &radeon_crtc->base.mode;
+	struct dce6_wm_params wm;
+	u32 pixel_period;
+	u32 line_time = 0;
+	u32 latency_watermark_a = 0, latency_watermark_b = 0;
+	u32 priority_a_mark = 0, priority_b_mark = 0;
+	u32 priority_a_cnt = PRIORITY_OFF;
+	u32 priority_b_cnt = PRIORITY_OFF;
+	u32 tmp, arb_control3;
+	fixed20_12 a, b, c;
+
+	if (radeon_crtc->base.enabled && num_heads && mode) {
+		pixel_period = 1000000 / (u32)mode->clock;
+		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		priority_a_cnt = 0;
+		priority_b_cnt = 0;
+
+		wm.yclk = rdev->pm.current_mclk * 10;
+		wm.sclk = rdev->pm.current_sclk * 10;
+		wm.disp_clk = mode->clock;
+		wm.src_width = mode->crtc_hdisplay;
+		wm.active_time = mode->crtc_hdisplay * pixel_period;
+		wm.blank_time = line_time - wm.active_time;
+		wm.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm.interlaced = true;
+		wm.vsc = radeon_crtc->vsc;
+		wm.vtaps = 1;
+		if (radeon_crtc->rmx_type != RMX_OFF)
+			wm.vtaps = 2;
+		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm.lb_size = lb_size;
+		if (rdev->family == CHIP_ARUBA)
+			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
+		else
+			wm.dram_channels = si_get_number_of_dram_channels(rdev);
+		wm.num_heads = num_heads;
+
+		/* set for high clocks */
+		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
+		/* set for low clocks */
+		/* wm.yclk = low clk; wm.sclk = low clk */
+		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
+		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
+		    !dce6_check_latency_hiding(&wm) ||
+		    (rdev->disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+			priority_a_cnt |= PRIORITY_ALWAYS_ON;
+			priority_b_cnt |= PRIORITY_ALWAYS_ON;
+		}
+
+		a.full = dfixed_const(1000);
+		b.full = dfixed_const(mode->clock);
+		b.full = dfixed_div(b, a);
+		c.full = dfixed_const(latency_watermark_a);
+		c.full = dfixed_mul(c, b);
+		c.full = dfixed_mul(c, radeon_crtc->hsc);
+		c.full = dfixed_div(c, a);
+		a.full = dfixed_const(16);
+		c.full = dfixed_div(c, a);
+		priority_a_mark = dfixed_trunc(c);
+		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
+
+		a.full = dfixed_const(1000);
+		b.full = dfixed_const(mode->clock);
+		b.full = dfixed_div(b, a);
+		c.full = dfixed_const(latency_watermark_b);
+		c.full = dfixed_mul(c, b);
+		c.full = dfixed_mul(c, radeon_crtc->hsc);
+		c.full = dfixed_div(c, a);
+		a.full = dfixed_const(16);
+		c.full = dfixed_div(c, a);
+		priority_b_mark = dfixed_trunc(c);
+		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+	}
+
+	/* select wm A */
+	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
+	tmp = arb_control3;
+	tmp &= ~LATENCY_WATERMARK_MASK(3);
+	tmp |= LATENCY_WATERMARK_MASK(1);
+	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
+	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
+	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
+		LATENCY_HIGH_WATERMARK(line_time)));
+	/* select wm B */
+	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
+	tmp &= ~LATENCY_WATERMARK_MASK(3);
+	tmp |= LATENCY_WATERMARK_MASK(2);
+	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
+	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
+	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
+		LATENCY_HIGH_WATERMARK(line_time)));
+	/* restore original selection */
+	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
+
+	/* write the priority marks */
+	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
+	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
+
+}
+
+void dce6_bandwidth_update(struct radeon_device *rdev)
+{
+	struct drm_display_mode *mode0 = NULL;
+	struct drm_display_mode *mode1 = NULL;
+	u32 num_heads = 0, lb_size;
+	int i;
+
+	radeon_update_display_priority(rdev);
+
+	for (i = 0; i < rdev->num_crtc; i++) {
+		if (rdev->mode_info.crtcs[i]->base.enabled)
+			num_heads++;
+	}
+	for (i = 0; i < rdev->num_crtc; i += 2) {
+		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
+		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
+		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
+		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
+		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
+		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
+	}
+}
+
+/*
+ * Core functions
+ */
+static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
+					   u32 num_tile_pipes,
+					   u32 num_backends_per_asic,
+					   u32 *backend_disable_mask_per_asic,
+					   u32 num_shader_engines)
+{
+	u32 backend_map = 0;
+	u32 enabled_backends_mask = 0;
+	u32 enabled_backends_count = 0;
+	u32 num_backends_per_se;
+	u32 cur_pipe;
+	u32 swizzle_pipe[SI_MAX_PIPES];
+	u32 cur_backend = 0;
+	u32 i;
+	bool force_no_swizzle;
+
+	/* force legal values */
+	if (num_tile_pipes < 1)
+		num_tile_pipes = 1;
+	if (num_tile_pipes > rdev->config.si.max_tile_pipes)
+		num_tile_pipes = rdev->config.si.max_tile_pipes;
+	if (num_shader_engines < 1)
+		num_shader_engines = 1;
+	if (num_shader_engines > rdev->config.si.max_shader_engines)
+		num_shader_engines = rdev->config.si.max_shader_engines;
+	if (num_backends_per_asic < num_shader_engines)
+		num_backends_per_asic = num_shader_engines;
+	if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
+		num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
+
+	/* make sure we have the same number of backends per se */
+	num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
+	/* set up the number of backends per se */
+	num_backends_per_se = num_backends_per_asic / num_shader_engines;
+	if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
+		num_backends_per_se = rdev->config.si.max_backends_per_se;
+		num_backends_per_asic = num_backends_per_se * num_shader_engines;
+	}
+
+	/* create enable mask and count for enabled backends */
+	for (i = 0; i < SI_MAX_BACKENDS; ++i) {
+		if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
+			enabled_backends_mask |= (1 << i);
+			++enabled_backends_count;
+		}
+		if (enabled_backends_count == num_backends_per_asic)
+			break;
+	}
+
+	/* force the backends mask to match the current number of backends */
+	if (enabled_backends_count != num_backends_per_asic) {
+		u32 this_backend_enabled;
+		u32 shader_engine;
+		u32 backend_per_se;
+
+		enabled_backends_mask = 0;
+		enabled_backends_count = 0;
+		*backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
+		for (i = 0; i < SI_MAX_BACKENDS; ++i) {
+			/* calc the current se */
+			shader_engine = i / rdev->config.si.max_backends_per_se;
+			/* calc the backend per se */
+			backend_per_se = i % rdev->config.si.max_backends_per_se;
+			/* default to not enabled */
+			this_backend_enabled = 0;
+			if ((shader_engine < num_shader_engines) &&
+			    (backend_per_se < num_backends_per_se))
+				this_backend_enabled = 1;
+			if (this_backend_enabled) {
+				enabled_backends_mask |= (1 << i);
+				*backend_disable_mask_per_asic &= ~(1 << i);
+				++enabled_backends_count;
+			}
+		}
+	}
+
+
+	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
+	switch (rdev->family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+		force_no_swizzle = true;
+		break;
+	default:
+		force_no_swizzle = false;
+		break;
+	}
+	if (force_no_swizzle) {
+		bool last_backend_enabled = false;
+
+		force_no_swizzle = false;
+		for (i = 0; i < SI_MAX_BACKENDS; ++i) {
+			if (((enabled_backends_mask >> i) & 1) == 1) {
+				if (last_backend_enabled)
+					force_no_swizzle = true;
+				last_backend_enabled = true;
+			} else
+				last_backend_enabled = false;
+		}
+	}
+
+	switch (num_tile_pipes) {
+	case 1:
+	case 3:
+	case 5:
+	case 7:
+		DRM_ERROR("odd number of pipes!\n");
+		break;
+	case 2:
+		swizzle_pipe[0] = 0;
+		swizzle_pipe[1] = 1;
+		break;
+	case 4:
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 1;
+			swizzle_pipe[3] = 3;
+		}
+		break;
+	case 6:
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 1;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 5;
+		}
+		break;
+	case 8:
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+			swizzle_pipe[6] = 6;
+			swizzle_pipe[7] = 7;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 6;
+			swizzle_pipe[4] = 1;
+			swizzle_pipe[5] = 3;
+			swizzle_pipe[6] = 5;
+			swizzle_pipe[7] = 7;
+		}
+		break;
+	}
+
+	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+		while (((1 << cur_backend) & enabled_backends_mask) == 0)
+			cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
+
+		backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
+
+		cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
+	}
+
+	return backend_map;
+}
+
+static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
+					u32 disable_mask_per_se,
+					u32 max_disable_mask_per_se,
+					u32 num_shader_engines)
+{
+	u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
+	u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
+
+	if (num_shader_engines == 1)
+		return disable_mask_per_asic;
+	else if (num_shader_engines == 2)
+		return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
+	else
+		return 0xffffffff;
+}
+
+static void si_tiling_mode_table_init(struct radeon_device *rdev)
+{
+	const u32 num_tile_mode_states = 32;
+	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+
+	switch (rdev->config.si.mem_row_size_in_kb) {
+	case 1:
+		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
+		break;
+	case 2:
+	default:
+		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
+		break;
+	case 4:
+		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
+		break;
+	}
+
+	if ((rdev->family == CHIP_TAHITI) ||
+	    (rdev->family == CHIP_PITCAIRN)) {
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
+			switch (reg_offset) {
+			case 0:  /* non-AA compressed depth or any compressed stencil */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 1:  /* 2xAA/4xAA compressed depth only */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 2:  /* 8xAA compressed depth only */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 8:  /* 1D and 1D Array Surfaces */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 9:  /* Displayable maps. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 10:  /* Display 8bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 11:  /* Display 16bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 12:  /* Display 32bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			case 13:  /* Thin. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 14:  /* Thin 8 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			case 15:  /* Thin 16 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			case 16:  /* Thin 32 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			case 17:  /* Thin 64 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			case 21:  /* 8 bpp PRT. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 22:  /* 16 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 23:  /* 32 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 24:  /* 64 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 25:  /* 128 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+						 NUM_BANKS(ADDR_SURF_8_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			default:
+				gb_tile_moden = 0;
+				break;
+			}
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
+		}
+	} else if (rdev->family == CHIP_VERDE) {
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
+			switch (reg_offset) {
+			case 0:  /* non-AA compressed depth or any compressed stencil */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 1:  /* 2xAA/4xAA compressed depth only */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 2:  /* 8xAA compressed depth only */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 8:  /* 1D and 1D Array Surfaces */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 9:  /* Displayable maps. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 10:  /* Display 8bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 11:  /* Display 16bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 12:  /* Display 32bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 13:  /* Thin. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 14:  /* Thin 8 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 15:  /* Thin 16 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 16:  /* Thin 32 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 17:  /* Thin 64 bpp. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+						 TILE_SPLIT(split_equal_to_row_size) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 21:  /* 8 bpp PRT. */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 22:  /* 16 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+				break;
+			case 23:  /* 32 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 24:  /* 64 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						 NUM_BANKS(ADDR_SURF_16_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+				break;
+			case 25:  /* 128 bpp PRT */
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+						 NUM_BANKS(ADDR_SURF_8_BANK) |
+						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+				break;
+			default:
+				gb_tile_moden = 0;
+				break;
+			}
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
+		}
+	} else
+		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
+}
+
+static void si_gpu_init(struct radeon_device *rdev)
+{
+	u32 cc_rb_backend_disable = 0;
+	u32 cc_gc_shader_array_config;
+	u32 gb_addr_config = 0;
+	u32 mc_shared_chmap, mc_arb_ramcfg;
+	u32 gb_backend_map;
+	u32 cgts_tcc_disable;
+	u32 sx_debug_1;
+	u32 gc_user_shader_array_config;
+	u32 gc_user_rb_backend_disable;
+	u32 cgts_user_tcc_disable;
+	u32 hdp_host_path_cntl;
+	u32 tmp;
+	int i, j;
+
+	switch (rdev->family) {
+	case CHIP_TAHITI:
+		rdev->config.si.max_shader_engines = 2;
+		rdev->config.si.max_pipes_per_simd = 4;
+		rdev->config.si.max_tile_pipes = 12;
+		rdev->config.si.max_simds_per_se = 8;
+		rdev->config.si.max_backends_per_se = 4;
+		rdev->config.si.max_texture_channel_caches = 12;
+		rdev->config.si.max_gprs = 256;
+		rdev->config.si.max_gs_threads = 32;
+		rdev->config.si.max_hw_contexts = 8;
+
+		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
+		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
+		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
+		break;
+	case CHIP_PITCAIRN:
+		rdev->config.si.max_shader_engines = 2;
+		rdev->config.si.max_pipes_per_simd = 4;
+		rdev->config.si.max_tile_pipes = 8;
+		rdev->config.si.max_simds_per_se = 5;
+		rdev->config.si.max_backends_per_se = 4;
+		rdev->config.si.max_texture_channel_caches = 8;
+		rdev->config.si.max_gprs = 256;
+		rdev->config.si.max_gs_threads = 32;
+		rdev->config.si.max_hw_contexts = 8;
+
+		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
+		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
+		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
+		break;
+	case CHIP_VERDE:
+	default:
+		rdev->config.si.max_shader_engines = 1;
+		rdev->config.si.max_pipes_per_simd = 4;
+		rdev->config.si.max_tile_pipes = 4;
+		rdev->config.si.max_simds_per_se = 2;
+		rdev->config.si.max_backends_per_se = 4;
+		rdev->config.si.max_texture_channel_caches = 4;
+		rdev->config.si.max_gprs = 256;
+		rdev->config.si.max_gs_threads = 32;
+		rdev->config.si.max_hw_contexts = 8;
+
+		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
+		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
+		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
+		break;
+	}
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+
+	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+	evergreen_fix_pci_max_read_req_size(rdev);
+
+	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
+
+	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
+	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
+
+	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
+	cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+	cgts_tcc_disable = 0xffff0000;
+	for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
+		cgts_tcc_disable &= ~(1 << (16 + i));
+	gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
+	gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+	cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
+
+	rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
+	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
+	tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
+	rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
+	tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
+	rdev->config.si.backend_disable_mask_per_asic =
+		si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
+					     rdev->config.si.num_shader_engines);
+	rdev->config.si.backend_map =
+		si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
+						rdev->config.si.num_backends_per_se *
+						rdev->config.si.num_shader_engines,
+						&rdev->config.si.backend_disable_mask_per_asic,
+						rdev->config.si.num_shader_engines);
+	tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
+	rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
+	rdev->config.si.mem_max_burst_length_bytes = 256;
+	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
+	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
+	if (rdev->config.si.mem_row_size_in_kb > 4)
+		rdev->config.si.mem_row_size_in_kb = 4;
+	/* XXX use MC settings? */
+	rdev->config.si.shader_engine_tile_size = 32;
+	rdev->config.si.num_gpus = 1;
+	rdev->config.si.multi_gpu_tile_size = 64;
+
+	gb_addr_config = 0;
+	switch (rdev->config.si.num_tile_pipes) {
+	case 1:
+		gb_addr_config |= NUM_PIPES(0);
+		break;
+	case 2:
+		gb_addr_config |= NUM_PIPES(1);
+		break;
+	case 4:
+		gb_addr_config |= NUM_PIPES(2);
+		break;
+	case 8:
+	default:
+		gb_addr_config |= NUM_PIPES(3);
+		break;
+	}
+
+	tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
+	gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
+	gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
+	tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
+	gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
+	switch (rdev->config.si.num_gpus) {
+	case 1:
+	default:
+		gb_addr_config |= NUM_GPUS(0);
+		break;
+	case 2:
+		gb_addr_config |= NUM_GPUS(1);
+		break;
+	case 4:
+		gb_addr_config |= NUM_GPUS(2);
+		break;
+	}
+	switch (rdev->config.si.multi_gpu_tile_size) {
+	case 16:
+		gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
+		break;
+	case 32:
+	default:
+		gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
+		break;
+	case 64:
+		gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
+		break;
+	case 128:
+		gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
+		break;
+	}
+	switch (rdev->config.si.mem_row_size_in_kb) {
+	case 1:
+	default:
+		gb_addr_config |= ROW_SIZE(0);
+		break;
+	case 2:
+		gb_addr_config |= ROW_SIZE(1);
+		break;
+	case 4:
+		gb_addr_config |= ROW_SIZE(2);
+		break;
+	}
+
+	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
+	rdev->config.si.num_tile_pipes = (1 << tmp);
+	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
+	rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
+	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
+	rdev->config.si.num_shader_engines = tmp + 1;
+	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
+	rdev->config.si.num_gpus = tmp + 1;
+	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
+	rdev->config.si.multi_gpu_tile_size = 1 << tmp;
+	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
+	rdev->config.si.mem_row_size_in_kb = 1 << tmp;
+
+	gb_backend_map =
+		si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
+						rdev->config.si.num_backends_per_se *
+						rdev->config.si.num_shader_engines,
+						&rdev->config.si.backend_disable_mask_per_asic,
+						rdev->config.si.num_shader_engines);
+
+	/* setup tiling info dword.  gb_addr_config is not adequate since it does
+	 * not have bank info, so create a custom tiling dword.
+	 * bits 3:0   num_pipes
+	 * bits 7:4   num_banks
+	 * bits 11:8  group_size
+	 * bits 15:12 row_size
+	 */
+	rdev->config.si.tile_config = 0;
+	switch (rdev->config.si.num_tile_pipes) {
+	case 1:
+		rdev->config.si.tile_config |= (0 << 0);
+		break;
+	case 2:
+		rdev->config.si.tile_config |= (1 << 0);
+		break;
+	case 4:
+		rdev->config.si.tile_config |= (2 << 0);
+		break;
+	case 8:
+	default:
+		/* XXX what about 12? */
+		rdev->config.si.tile_config |= (3 << 0);
+		break;
+	}
+	rdev->config.si.tile_config |=
+		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
+	rdev->config.si.tile_config |=
+		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
+	rdev->config.si.tile_config |=
+		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
+
+	rdev->config.si.backend_map = gb_backend_map;
+	WREG32(GB_ADDR_CONFIG, gb_addr_config);
+	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
+	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+
+	/* primary versions */
+	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
+
+	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
+
+	/* user versions */
+	WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
+
+	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
+
+	si_tiling_mode_table_init(rdev);
+
+	/* set HW defaults for 3D engine */
+	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
+				     ROQ_IB2_START(0x2b)));
+	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
+
+	sx_debug_1 = RREG32(SX_DEBUG_1);
+	WREG32(SX_DEBUG_1, sx_debug_1);
+
+	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
+
+	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
+				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
+				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
+				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
+
+	WREG32(VGT_NUM_INSTANCES, 1);
+
+	WREG32(CP_PERFMON_CNTL, 0);
+
+	WREG32(SQ_CONFIG, 0);
+
+	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
+					  FORCE_EOV_MAX_REZ_CNT(255)));
+
+	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
+	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
+
+	WREG32(VGT_GS_VERTEX_REUSE, 16);
+	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+
+	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
+	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
+	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
+	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
+	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
+	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
+	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
+	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
+
+	tmp = RREG32(HDP_MISC_CNTL);
+	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
+	WREG32(HDP_MISC_CNTL, tmp);
+
+	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
+	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
+
+	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
+
+	udelay(50);
+}
+
+/*
+ * GPU scratch registers helpers function.
+ */
+static void si_scratch_init(struct radeon_device *rdev)
+{
+	int i;
+
+	rdev->scratch.num_reg = 7;
+	rdev->scratch.reg_base = SCRATCH_REG0;
+	for (i = 0; i < rdev->scratch.num_reg; i++) {
+		rdev->scratch.free[i] = true;
+		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
+	}
+}
+
+void si_fence_ring_emit(struct radeon_device *rdev,
+			struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	/* flush read cache over gart */
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);
+	radeon_ring_write(ring, 0xFFFFFFFF);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 10); /* poll interval */
+	/* EVENT_WRITE_EOP - flush caches, send int */
+	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, 0);
+}
+
+/*
+ * IB stuff
+ */
+void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+	u32 header;
+
+	if (ib->is_const_ib)
+		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
+	else
+		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+	radeon_ring_write(ring, header);
+	radeon_ring_write(ring,
+#ifdef __BIG_ENDIAN
+			  (2 << 0) |
+#endif
+			  (ib->gpu_addr & 0xFFFFFFFC));
+	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
+	radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24));
+
+	/* flush read cache over gart for this vmid */
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
+	radeon_ring_write(ring, ib->vm_id);
+	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+			  PACKET3_TC_ACTION_ENA |
+			  PACKET3_SH_KCACHE_ACTION_ENA |
+			  PACKET3_SH_ICACHE_ACTION_ENA);
+	radeon_ring_write(ring, 0xFFFFFFFF);
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 10); /* poll interval */
+}
+
+/*
+ * CP.
+ */
+static void si_cp_enable(struct radeon_device *rdev, bool enable)
+{
+	if (enable)
+		WREG32(CP_ME_CNTL, 0);
+	else {
+		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
+		WREG32(SCRATCH_UMSK, 0);
+	}
+	udelay(50);
+}
+
+static int si_cp_load_microcode(struct radeon_device *rdev)
+{
+	const __be32 *fw_data;
+	int i;
+
+	if (!rdev->me_fw || !rdev->pfp_fw)
+		return -EINVAL;
+
+	si_cp_enable(rdev, false);
+
+	/* PFP */
+	fw_data = (const __be32 *)rdev->pfp_fw->data;
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
+		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+
+	/* CE */
+	fw_data = (const __be32 *)rdev->ce_fw->data;
+	WREG32(CP_CE_UCODE_ADDR, 0);
+	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
+		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
+	WREG32(CP_CE_UCODE_ADDR, 0);
+
+	/* ME */
+	fw_data = (const __be32 *)rdev->me_fw->data;
+	WREG32(CP_ME_RAM_WADDR, 0);
+	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
+		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
+	WREG32(CP_ME_RAM_WADDR, 0);
+
+	WREG32(CP_PFP_UCODE_ADDR, 0);
+	WREG32(CP_CE_UCODE_ADDR, 0);
+	WREG32(CP_ME_RAM_WADDR, 0);
+	WREG32(CP_ME_RAM_RADDR, 0);
+	return 0;
+}
+
+static int si_cp_start(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	int r, i;
+
+	r = radeon_ring_lock(rdev, ring, 7 + 4);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		return r;
+	}
+	/* init the CP */
+	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
+	radeon_ring_write(ring, 0x1);
+	radeon_ring_write(ring, 0x0);
+	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
+	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, 0);
+
+	/* init the CE partitions */
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+	radeon_ring_write(ring, 0xc000);
+	radeon_ring_write(ring, 0xe000);
+	radeon_ring_unlock_commit(rdev, ring);
+
+	si_cp_enable(rdev, true);
+
+	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		return r;
+	}
+
+	/* setup clear context state */
+	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	for (i = 0; i < si_default_size; i++)
+		radeon_ring_write(ring, si_default_state[i]);
+
+	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	/* set clear context state */
+	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+	radeon_ring_write(ring, 0x00000316);
+	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
+
+	radeon_ring_unlock_commit(rdev, ring);
+
+	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
+		ring = &rdev->ring[i];
+		r = radeon_ring_lock(rdev, ring, 2);
+
+		/* clear the compute context state */
+		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
+		radeon_ring_write(ring, 0);
+
+		radeon_ring_unlock_commit(rdev, ring);
+	}
+
+	return 0;
+}
+
+static void si_cp_fini(struct radeon_device *rdev)
+{
+	si_cp_enable(rdev, false);
+	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
+	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+}
+
+static int si_cp_resume(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring;
+	u32 tmp;
+	u32 rb_bufsz;
+	int r;
+
+	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
+	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
+				 SOFT_RESET_PA |
+				 SOFT_RESET_VGT |
+				 SOFT_RESET_SPI |
+				 SOFT_RESET_SX));
+	RREG32(GRBM_SOFT_RESET);
+	mdelay(15);
+	WREG32(GRBM_SOFT_RESET, 0);
+	RREG32(GRBM_SOFT_RESET);
+
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
+	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
+
+	/* Set the write pointer delay */
+	WREG32(CP_RB_WPTR_DELAY, 0);
+
+	WREG32(CP_DEBUG, 0);
+	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
+
+	/* ring 0 - compute and gfx */
+	/* Set ring buffer size */
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	rb_bufsz = drm_order(ring->ring_size / 8);
+	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+	tmp |= BUF_SWAP_32BIT;
+#endif
+	WREG32(CP_RB0_CNTL, tmp);
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
+	ring->wptr = 0;
+	WREG32(CP_RB0_WPTR, ring->wptr);
+
+	/* set the wb address wether it's enabled or not */
+	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
+	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
+
+	if (rdev->wb.enabled)
+		WREG32(SCRATCH_UMSK, 0xff);
+	else {
+		tmp |= RB_NO_UPDATE;
+		WREG32(SCRATCH_UMSK, 0);
+	}
+
+	mdelay(1);
+	WREG32(CP_RB0_CNTL, tmp);
+
+	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
+
+	ring->rptr = RREG32(CP_RB0_RPTR);
+
+	/* ring1  - compute only */
+	/* Set ring buffer size */
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+	rb_bufsz = drm_order(ring->ring_size / 8);
+	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+	tmp |= BUF_SWAP_32BIT;
+#endif
+	WREG32(CP_RB1_CNTL, tmp);
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
+	ring->wptr = 0;
+	WREG32(CP_RB1_WPTR, ring->wptr);
+
+	/* set the wb address wether it's enabled or not */
+	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
+	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
+
+	mdelay(1);
+	WREG32(CP_RB1_CNTL, tmp);
+
+	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
+
+	ring->rptr = RREG32(CP_RB1_RPTR);
+
+	/* ring2 - compute only */
+	/* Set ring buffer size */
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+	rb_bufsz = drm_order(ring->ring_size / 8);
+	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+	tmp |= BUF_SWAP_32BIT;
+#endif
+	WREG32(CP_RB2_CNTL, tmp);
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
+	ring->wptr = 0;
+	WREG32(CP_RB2_WPTR, ring->wptr);
+
+	/* set the wb address wether it's enabled or not */
+	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
+	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
+
+	mdelay(1);
+	WREG32(CP_RB2_CNTL, tmp);
+
+	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
+
+	ring->rptr = RREG32(CP_RB2_RPTR);
+
+	/* start the rings */
+	si_cp_start(rdev);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
+	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
+	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
+	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	if (r) {
+		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+		return r;
+	}
+	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
+	if (r) {
+		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+	}
+	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+	if (r) {
+		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+	}
+
+	return 0;
+}
+
+bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 srbm_status;
+	u32 grbm_status, grbm_status2;
+	u32 grbm_status_se0, grbm_status_se1;
+	struct r100_gpu_lockup *lockup = &rdev->config.si.lockup;
+	int r;
+
+	srbm_status = RREG32(SRBM_STATUS);
+	grbm_status = RREG32(GRBM_STATUS);
+	grbm_status2 = RREG32(GRBM_STATUS2);
+	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
+	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
+	if (!(grbm_status & GUI_ACTIVE)) {
+		r100_gpu_lockup_update(lockup, ring);
+		return false;
+	}
+	/* force CP activities */
+	r = radeon_ring_lock(rdev, ring, 2);
+	if (!r) {
+		/* PACKET2 NOP */
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_write(ring, 0x80000000);
+		radeon_ring_unlock_commit(rdev, ring);
+	}
+	/* XXX deal with CP0,1,2 */
+	ring->rptr = RREG32(ring->rptr_reg);
+	return r100_gpu_cp_is_lockup(rdev, lockup, ring);
+}
+
+static int si_gpu_soft_reset(struct radeon_device *rdev)
+{
+	struct evergreen_mc_save save;
+	u32 grbm_reset = 0;
+
+	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
+		return 0;
+
+	dev_info(rdev->dev, "GPU softreset \n");
+	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
+		RREG32(GRBM_STATUS));
+	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
+		RREG32(GRBM_STATUS2));
+	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
+		RREG32(GRBM_STATUS_SE0));
+	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
+		RREG32(GRBM_STATUS_SE1));
+	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
+		RREG32(SRBM_STATUS));
+	evergreen_mc_stop(rdev, &save);
+	if (radeon_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	/* Disable CP parsing/prefetching */
+	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
+
+	/* reset all the gfx blocks */
+	grbm_reset = (SOFT_RESET_CP |
+		      SOFT_RESET_CB |
+		      SOFT_RESET_DB |
+		      SOFT_RESET_GDS |
+		      SOFT_RESET_PA |
+		      SOFT_RESET_SC |
+		      SOFT_RESET_SPI |
+		      SOFT_RESET_SX |
+		      SOFT_RESET_TC |
+		      SOFT_RESET_TA |
+		      SOFT_RESET_VGT |
+		      SOFT_RESET_IA);
+
+	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
+	WREG32(GRBM_SOFT_RESET, grbm_reset);
+	(void)RREG32(GRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(GRBM_SOFT_RESET, 0);
+	(void)RREG32(GRBM_SOFT_RESET);
+	/* Wait a little for things to settle down */
+	udelay(50);
+	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
+		RREG32(GRBM_STATUS));
+	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
+		RREG32(GRBM_STATUS2));
+	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
+		RREG32(GRBM_STATUS_SE0));
+	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
+		RREG32(GRBM_STATUS_SE1));
+	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
+		RREG32(SRBM_STATUS));
+	evergreen_mc_resume(rdev, &save);
+	return 0;
+}
+
+int si_asic_reset(struct radeon_device *rdev)
+{
+	return si_gpu_soft_reset(rdev);
+}
+
+/* MC */
+static void si_mc_program(struct radeon_device *rdev)
+{
+	struct evergreen_mc_save save;
+	u32 tmp;
+	int i, j;
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+	evergreen_mc_stop(rdev, &save);
+	if (radeon_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+	/* Update configuration */
+	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+	       rdev->mc.vram_start >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+	       rdev->mc.vram_end >> 12);
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+	       rdev->vram_scratch.gpu_addr >> 12);
+	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	/* XXX double check these! */
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+	WREG32(MC_VM_AGP_BASE, 0);
+	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	if (radeon_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	evergreen_mc_resume(rdev, &save);
+	/* we need to own VRAM, so turn off the VGA renderer here
+	 * to stop it overwriting our objects */
+	rv515_vga_render_disable(rdev);
+}
+
+/* SI MC address space is 40 bits */
+static void si_vram_location(struct radeon_device *rdev,
+			     struct radeon_mc *mc, u64 base)
+{
+	mc->vram_start = base;
+	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
+		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
+		mc->real_vram_size = mc->aper_size;
+		mc->mc_vram_size = mc->aper_size;
+	}
+	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+			mc->mc_vram_size >> 20, mc->vram_start,
+			mc->vram_end, mc->real_vram_size >> 20);
+}
+
+static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
+{
+	u64 size_af, size_bf;
+
+	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
+	size_bf = mc->vram_start & ~mc->gtt_base_align;
+	if (size_bf > size_af) {
+		if (mc->gtt_size > size_bf) {
+			dev_warn(rdev->dev, "limiting GTT\n");
+			mc->gtt_size = size_bf;
+		}
+		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
+	} else {
+		if (mc->gtt_size > size_af) {
+			dev_warn(rdev->dev, "limiting GTT\n");
+			mc->gtt_size = size_af;
+		}
+		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
+	}
+	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
+	dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
+			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
+}
+
+static void si_vram_gtt_location(struct radeon_device *rdev,
+				 struct radeon_mc *mc)
+{
+	if (mc->mc_vram_size > 0xFFC0000000ULL) {
+		/* leave room for at least 1024M GTT */
+		dev_warn(rdev->dev, "limiting VRAM\n");
+		mc->real_vram_size = 0xFFC0000000ULL;
+		mc->mc_vram_size = 0xFFC0000000ULL;
+	}
+	si_vram_location(rdev, &rdev->mc, 0);
+	rdev->mc.gtt_base_align = 0;
+	si_gtt_location(rdev, mc);
+}
+
+static int si_mc_init(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int chansize, numchan;
+
+	/* Get VRAM informations */
+	rdev->mc.vram_is_ddr = true;
+	tmp = RREG32(MC_ARB_RAMCFG);
+	if (tmp & CHANSIZE_OVERRIDE) {
+		chansize = 16;
+	} else if (tmp & CHANSIZE_MASK) {
+		chansize = 64;
+	} else {
+		chansize = 32;
+	}
+	tmp = RREG32(MC_SHARED_CHMAP);
+	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+	case 0:
+	default:
+		numchan = 1;
+		break;
+	case 1:
+		numchan = 2;
+		break;
+	case 2:
+		numchan = 4;
+		break;
+	case 3:
+		numchan = 8;
+		break;
+	case 4:
+		numchan = 3;
+		break;
+	case 5:
+		numchan = 6;
+		break;
+	case 6:
+		numchan = 10;
+		break;
+	case 7:
+		numchan = 12;
+		break;
+	case 8:
+		numchan = 16;
+		break;
+	}
+	rdev->mc.vram_width = numchan * chansize;
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
+	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
+	/* size in MB on si */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+	rdev->mc.visible_vram_size = rdev->mc.aper_size;
+	si_vram_gtt_location(rdev, &rdev->mc);
+	radeon_update_bandwidth_info(rdev);
+
+	return 0;
+}
+
+/*
+ * GART
+ */
+void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
+{
+	/* flush hdp cache */
+	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+
+	/* bits 0-15 are the VM contexts0-15 */
+	WREG32(VM_INVALIDATE_REQUEST, 1);
+}
+
+int si_pcie_gart_enable(struct radeon_device *rdev)
+{
+	int r, i;
+
+	if (rdev->gart.robj == NULL) {
+		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
+		return -EINVAL;
+	}
+	r = radeon_gart_table_vram_pin(rdev);
+	if (r)
+		return r;
+	radeon_gart_restore(rdev);
+	/* Setup TLB control */
+	WREG32(MC_VM_MX_L1_TLB_CNTL,
+	       (0xA << 7) |
+	       ENABLE_L1_TLB |
+	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+	       ENABLE_ADVANCED_DRIVER_MODEL |
+	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+	       EFFECTIVE_L2_QUEUE_SIZE(7) |
+	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
+	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
+	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
+	/* setup context0 */
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	WREG32(VM_CONTEXT0_CNTL2, 0);
+	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+
+	WREG32(0x15D4, 0);
+	WREG32(0x15D8, 0);
+	WREG32(0x15DC, 0);
+
+	/* empty context1-15 */
+	/* FIXME start with 1G, once using 2 level pt switch to full
+	 * vm size space
+	 */
+	/* set vm size, must be a multiple of 4 */
+	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
+	for (i = 1; i < 16; i++) {
+		if (i < 8)
+			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
+			       rdev->gart.table_addr >> 12);
+		else
+			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
+			       rdev->gart.table_addr >> 12);
+	}
+
+	/* enable context1-15 */
+	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+	       (u32)(rdev->dummy_page.addr >> 12));
+	WREG32(VM_CONTEXT1_CNTL2, 0);
+	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+
+	si_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void si_pcie_gart_disable(struct radeon_device *rdev)
+{
+	/* Disable all tables */
+	WREG32(VM_CONTEXT0_CNTL, 0);
+	WREG32(VM_CONTEXT1_CNTL, 0);
+	/* Setup TLB control */
+	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
+	       EFFECTIVE_L2_QUEUE_SIZE(7) |
+	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
+	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
+	radeon_gart_table_vram_unpin(rdev);
+}
+
+void si_pcie_gart_fini(struct radeon_device *rdev)
+{
+	si_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+/* vm parser */
+static bool si_vm_reg_valid(u32 reg)
+{
+	/* context regs are fine */
+	if (reg >= 0x28000)
+		return true;
+
+	/* check config regs */
+	switch (reg) {
+	case GRBM_GFX_INDEX:
+	case VGT_VTX_VECT_EJECT_REG:
+	case VGT_CACHE_INVALIDATION:
+	case VGT_ESGS_RING_SIZE:
+	case VGT_GSVS_RING_SIZE:
+	case VGT_GS_VERTEX_REUSE:
+	case VGT_PRIMITIVE_TYPE:
+	case VGT_INDEX_TYPE:
+	case VGT_NUM_INDICES:
+	case VGT_NUM_INSTANCES:
+	case VGT_TF_RING_SIZE:
+	case VGT_HS_OFFCHIP_PARAM:
+	case VGT_TF_MEMORY_BASE:
+	case PA_CL_ENHANCE:
+	case PA_SU_LINE_STIPPLE_VALUE:
+	case PA_SC_LINE_STIPPLE_STATE:
+	case PA_SC_ENHANCE:
+	case SQC_CACHES:
+	case SPI_STATIC_THREAD_MGMT_1:
+	case SPI_STATIC_THREAD_MGMT_2:
+	case SPI_STATIC_THREAD_MGMT_3:
+	case SPI_PS_MAX_WAVE_ID:
+	case SPI_CONFIG_CNTL:
+	case SPI_CONFIG_CNTL_1:
+	case TA_CNTL_AUX:
+		return true;
+	default:
+		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
+		return false;
+	}
+}
+
+static int si_vm_packet3_ce_check(struct radeon_device *rdev,
+				  u32 *ib, struct radeon_cs_packet *pkt)
+{
+	switch (pkt->opcode) {
+	case PACKET3_NOP:
+	case PACKET3_SET_BASE:
+	case PACKET3_SET_CE_DE_COUNTERS:
+	case PACKET3_LOAD_CONST_RAM:
+	case PACKET3_WRITE_CONST_RAM:
+	case PACKET3_WRITE_CONST_RAM_OFFSET:
+	case PACKET3_DUMP_CONST_RAM:
+	case PACKET3_INCREMENT_CE_COUNTER:
+	case PACKET3_WAIT_ON_DE_COUNTER:
+	case PACKET3_CE_WRITE:
+		break;
+	default:
+		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
+				   u32 *ib, struct radeon_cs_packet *pkt)
+{
+	u32 idx = pkt->idx + 1;
+	u32 idx_value = ib[idx];
+	u32 start_reg, end_reg, reg, i;
+
+	switch (pkt->opcode) {
+	case PACKET3_NOP:
+	case PACKET3_SET_BASE:
+	case PACKET3_CLEAR_STATE:
+	case PACKET3_INDEX_BUFFER_SIZE:
+	case PACKET3_DISPATCH_DIRECT:
+	case PACKET3_DISPATCH_INDIRECT:
+	case PACKET3_ALLOC_GDS:
+	case PACKET3_WRITE_GDS_RAM:
+	case PACKET3_ATOMIC_GDS:
+	case PACKET3_ATOMIC:
+	case PACKET3_OCCLUSION_QUERY:
+	case PACKET3_SET_PREDICATION:
+	case PACKET3_COND_EXEC:
+	case PACKET3_PRED_EXEC:
+	case PACKET3_DRAW_INDIRECT:
+	case PACKET3_DRAW_INDEX_INDIRECT:
+	case PACKET3_INDEX_BASE:
+	case PACKET3_DRAW_INDEX_2:
+	case PACKET3_CONTEXT_CONTROL:
+	case PACKET3_INDEX_TYPE:
+	case PACKET3_DRAW_INDIRECT_MULTI:
+	case PACKET3_DRAW_INDEX_AUTO:
+	case PACKET3_DRAW_INDEX_IMMD:
+	case PACKET3_NUM_INSTANCES:
+	case PACKET3_DRAW_INDEX_MULTI_AUTO:
+	case PACKET3_STRMOUT_BUFFER_UPDATE:
+	case PACKET3_DRAW_INDEX_OFFSET_2:
+	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
+	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
+	case PACKET3_MPEG_INDEX:
+	case PACKET3_WAIT_REG_MEM:
+	case PACKET3_MEM_WRITE:
+	case PACKET3_PFP_SYNC_ME:
+	case PACKET3_SURFACE_SYNC:
+	case PACKET3_EVENT_WRITE:
+	case PACKET3_EVENT_WRITE_EOP:
+	case PACKET3_EVENT_WRITE_EOS:
+	case PACKET3_SET_CONTEXT_REG:
+	case PACKET3_SET_CONTEXT_REG_INDIRECT:
+	case PACKET3_SET_SH_REG:
+	case PACKET3_SET_SH_REG_OFFSET:
+	case PACKET3_INCREMENT_DE_COUNTER:
+	case PACKET3_WAIT_ON_CE_COUNTER:
+	case PACKET3_WAIT_ON_AVAIL_BUFFER:
+	case PACKET3_ME_WRITE:
+		break;
+	case PACKET3_COPY_DATA:
+		if ((idx_value & 0xf00) == 0) {
+			reg = ib[idx + 3] * 4;
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	case PACKET3_WRITE_DATA:
+		if ((idx_value & 0xf00) == 0) {
+			start_reg = ib[idx + 1] * 4;
+			if (idx_value & 0x10000) {
+				if (!si_vm_reg_valid(start_reg))
+					return -EINVAL;
+			} else {
+				for (i = 0; i < (pkt->count - 2); i++) {
+					reg = start_reg + (4 * i);
+					if (!si_vm_reg_valid(reg))
+						return -EINVAL;
+				}
+			}
+		}
+		break;
+	case PACKET3_COND_WRITE:
+		if (idx_value & 0x100) {
+			reg = ib[idx + 5] * 4;
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	case PACKET3_COPY_DW:
+		if (idx_value & 0x2) {
+			reg = ib[idx + 3] * 4;
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	case PACKET3_SET_CONFIG_REG:
+		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
+		end_reg = 4 * pkt->count + start_reg - 4;
+		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
+		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
+		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
+			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
+			return -EINVAL;
+		}
+		for (i = 0; i < pkt->count; i++) {
+			reg = start_reg + (4 * i);
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int si_vm_packet3_compute_check(struct radeon_device *rdev,
+				       u32 *ib, struct radeon_cs_packet *pkt)
+{
+	u32 idx = pkt->idx + 1;
+	u32 idx_value = ib[idx];
+	u32 start_reg, reg, i;
+
+	switch (pkt->opcode) {
+	case PACKET3_NOP:
+	case PACKET3_SET_BASE:
+	case PACKET3_CLEAR_STATE:
+	case PACKET3_DISPATCH_DIRECT:
+	case PACKET3_DISPATCH_INDIRECT:
+	case PACKET3_ALLOC_GDS:
+	case PACKET3_WRITE_GDS_RAM:
+	case PACKET3_ATOMIC_GDS:
+	case PACKET3_ATOMIC:
+	case PACKET3_OCCLUSION_QUERY:
+	case PACKET3_SET_PREDICATION:
+	case PACKET3_COND_EXEC:
+	case PACKET3_PRED_EXEC:
+	case PACKET3_CONTEXT_CONTROL:
+	case PACKET3_STRMOUT_BUFFER_UPDATE:
+	case PACKET3_WAIT_REG_MEM:
+	case PACKET3_MEM_WRITE:
+	case PACKET3_PFP_SYNC_ME:
+	case PACKET3_SURFACE_SYNC:
+	case PACKET3_EVENT_WRITE:
+	case PACKET3_EVENT_WRITE_EOP:
+	case PACKET3_EVENT_WRITE_EOS:
+	case PACKET3_SET_CONTEXT_REG:
+	case PACKET3_SET_CONTEXT_REG_INDIRECT:
+	case PACKET3_SET_SH_REG:
+	case PACKET3_SET_SH_REG_OFFSET:
+	case PACKET3_INCREMENT_DE_COUNTER:
+	case PACKET3_WAIT_ON_CE_COUNTER:
+	case PACKET3_WAIT_ON_AVAIL_BUFFER:
+	case PACKET3_ME_WRITE:
+		break;
+	case PACKET3_COPY_DATA:
+		if ((idx_value & 0xf00) == 0) {
+			reg = ib[idx + 3] * 4;
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	case PACKET3_WRITE_DATA:
+		if ((idx_value & 0xf00) == 0) {
+			start_reg = ib[idx + 1] * 4;
+			if (idx_value & 0x10000) {
+				if (!si_vm_reg_valid(start_reg))
+					return -EINVAL;
+			} else {
+				for (i = 0; i < (pkt->count - 2); i++) {
+					reg = start_reg + (4 * i);
+					if (!si_vm_reg_valid(reg))
+						return -EINVAL;
+				}
+			}
+		}
+		break;
+	case PACKET3_COND_WRITE:
+		if (idx_value & 0x100) {
+			reg = ib[idx + 5] * 4;
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	case PACKET3_COPY_DW:
+		if (idx_value & 0x2) {
+			reg = ib[idx + 3] * 4;
+			if (!si_vm_reg_valid(reg))
+				return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	int ret = 0;
+	u32 idx = 0;
+	struct radeon_cs_packet pkt;
+
+	do {
+		pkt.idx = idx;
+		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
+		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
+		pkt.one_reg_wr = 0;
+		switch (pkt.type) {
+		case PACKET_TYPE0:
+			dev_err(rdev->dev, "Packet0 not allowed!\n");
+			ret = -EINVAL;
+			break;
+		case PACKET_TYPE2:
+			idx += 1;
+			break;
+		case PACKET_TYPE3:
+			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
+			if (ib->is_const_ib)
+				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
+			else {
+				switch (ib->fence->ring) {
+				case RADEON_RING_TYPE_GFX_INDEX:
+					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
+					break;
+				case CAYMAN_RING_TYPE_CP1_INDEX:
+				case CAYMAN_RING_TYPE_CP2_INDEX:
+					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
+					break;
+				default:
+					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
+					ret = -EINVAL;
+					break;
+				}
+			}
+			idx += pkt.count + 2;
+			break;
+		default:
+			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
+			ret = -EINVAL;
+			break;
+		}
+		if (ret)
+			break;
+	} while (idx < ib->length_dw);
+
+	return ret;
+}
+
+/*
+ * vm
+ */
+int si_vm_init(struct radeon_device *rdev)
+{
+	/* number of VMs */
+	rdev->vm_manager.nvm = 16;
+	/* base offset of vram pages */
+	rdev->vm_manager.vram_base_offset = 0;
+
+	return 0;
+}
+
+void si_vm_fini(struct radeon_device *rdev)
+{
+}
+
+int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
+{
+	if (id < 8)
+		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
+	else
+		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
+		       vm->pt_gpu_addr >> 12);
+	/* flush hdp cache */
+	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+	/* bits 0-15 are the VM contexts0-15 */
+	WREG32(VM_INVALIDATE_REQUEST, 1 << id);
+	return 0;
+}
+
+void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	if (vm->id < 8)
+		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
+	else
+		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
+	/* flush hdp cache */
+	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+	/* bits 0-15 are the VM contexts0-15 */
+	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
+}
+
+void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	if (vm->id == -1)
+		return;
+
+	/* flush hdp cache */
+	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+	/* bits 0-15 are the VM contexts0-15 */
+	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
+}
+
+/*
+ * RLC
+ */
+void si_rlc_fini(struct radeon_device *rdev)
+{
+	int r;
+
+	/* save restore block */
+	if (rdev->rlc.save_restore_obj) {
+		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
+		radeon_bo_unpin(rdev->rlc.save_restore_obj);
+		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+
+		radeon_bo_unref(&rdev->rlc.save_restore_obj);
+		rdev->rlc.save_restore_obj = NULL;
+	}
+
+	/* clear state block */
+	if (rdev->rlc.clear_state_obj) {
+		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
+		radeon_bo_unpin(rdev->rlc.clear_state_obj);
+		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+
+		radeon_bo_unref(&rdev->rlc.clear_state_obj);
+		rdev->rlc.clear_state_obj = NULL;
+	}
+}
+
+int si_rlc_init(struct radeon_device *rdev)
+{
+	int r;
+
+	/* save restore block */
+	if (rdev->rlc.save_restore_obj == NULL) {
+		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
+				RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.save_restore_obj);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
+			return r;
+		}
+	}
+
+	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
+	if (unlikely(r != 0)) {
+		si_rlc_fini(rdev);
+		return r;
+	}
+	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->rlc.save_restore_gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
+		si_rlc_fini(rdev);
+		return r;
+	}
+
+	/* clear state block */
+	if (rdev->rlc.clear_state_obj == NULL) {
+		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
+				RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.clear_state_obj);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
+			si_rlc_fini(rdev);
+			return r;
+		}
+	}
+	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
+	if (unlikely(r != 0)) {
+		si_rlc_fini(rdev);
+		return r;
+	}
+	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->rlc.clear_state_gpu_addr);
+	if (r) {
+
+		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
+		si_rlc_fini(rdev);
+		return r;
+	}
+
+	return 0;
+}
+
+static void si_rlc_stop(struct radeon_device *rdev)
+{
+	WREG32(RLC_CNTL, 0);
+}
+
+static void si_rlc_start(struct radeon_device *rdev)
+{
+	WREG32(RLC_CNTL, RLC_ENABLE);
+}
+
+static int si_rlc_resume(struct radeon_device *rdev)
+{
+	u32 i;
+	const __be32 *fw_data;
+
+	if (!rdev->rlc_fw)
+		return -EINVAL;
+
+	si_rlc_stop(rdev);
+
+	WREG32(RLC_RL_BASE, 0);
+	WREG32(RLC_RL_SIZE, 0);
+	WREG32(RLC_LB_CNTL, 0);
+	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
+	WREG32(RLC_LB_CNTR_INIT, 0);
+
+	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+
+	WREG32(RLC_MC_CNTL, 0);
+	WREG32(RLC_UCODE_CNTL, 0);
+
+	fw_data = (const __be32 *)rdev->rlc_fw->data;
+	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
+		WREG32(RLC_UCODE_ADDR, i);
+		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
+	}
+	WREG32(RLC_UCODE_ADDR, 0);
+
+	si_rlc_start(rdev);
+
+	return 0;
+}
+
+static void si_enable_interrupts(struct radeon_device *rdev)
+{
+	u32 ih_cntl = RREG32(IH_CNTL);
+	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
+
+	ih_cntl |= ENABLE_INTR;
+	ih_rb_cntl |= IH_RB_ENABLE;
+	WREG32(IH_CNTL, ih_cntl);
+	WREG32(IH_RB_CNTL, ih_rb_cntl);
+	rdev->ih.enabled = true;
+}
+
+static void si_disable_interrupts(struct radeon_device *rdev)
+{
+	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
+	u32 ih_cntl = RREG32(IH_CNTL);
+
+	ih_rb_cntl &= ~IH_RB_ENABLE;
+	ih_cntl &= ~ENABLE_INTR;
+	WREG32(IH_RB_CNTL, ih_rb_cntl);
+	WREG32(IH_CNTL, ih_cntl);
+	/* set rptr, wptr to 0 */
+	WREG32(IH_RB_RPTR, 0);
+	WREG32(IH_RB_WPTR, 0);
+	rdev->ih.enabled = false;
+	rdev->ih.wptr = 0;
+	rdev->ih.rptr = 0;
+}
+
+static void si_disable_interrupt_state(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+	WREG32(CP_INT_CNTL_RING1, 0);
+	WREG32(CP_INT_CNTL_RING2, 0);
+	WREG32(GRBM_INT_CNTL, 0);
+	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	if (rdev->num_crtc >= 4) {
+		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	}
+	if (rdev->num_crtc >= 6) {
+		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+	}
+
+	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	if (rdev->num_crtc >= 4) {
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	}
+	if (rdev->num_crtc >= 6) {
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+	}
+
+	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
+
+	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
+	WREG32(DC_HPD1_INT_CONTROL, tmp);
+	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
+	WREG32(DC_HPD2_INT_CONTROL, tmp);
+	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
+	WREG32(DC_HPD3_INT_CONTROL, tmp);
+	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
+	WREG32(DC_HPD4_INT_CONTROL, tmp);
+	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
+	WREG32(DC_HPD5_INT_CONTROL, tmp);
+	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
+	WREG32(DC_HPD6_INT_CONTROL, tmp);
+
+}
+
+static int si_irq_init(struct radeon_device *rdev)
+{
+	int ret = 0;
+	int rb_bufsz;
+	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+
+	/* allocate ring */
+	ret = r600_ih_ring_alloc(rdev);
+	if (ret)
+		return ret;
+
+	/* disable irqs */
+	si_disable_interrupts(rdev);
+
+	/* init rlc */
+	ret = si_rlc_resume(rdev);
+	if (ret) {
+		r600_ih_ring_fini(rdev);
+		return ret;
+	}
+
+	/* setup interrupt control */
+	/* set dummy read address to ring address */
+	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
+	interrupt_cntl = RREG32(INTERRUPT_CNTL);
+	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
+	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
+	 */
+	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
+	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
+	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
+	WREG32(INTERRUPT_CNTL, interrupt_cntl);
+
+	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
+	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
+
+	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
+		      IH_WPTR_OVERFLOW_CLEAR |
+		      (rb_bufsz << 1));
+
+	if (rdev->wb.enabled)
+		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
+
+	/* set the writeback address whether it's enabled or not */
+	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
+	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
+
+	WREG32(IH_RB_CNTL, ih_rb_cntl);
+
+	/* set rptr, wptr to 0 */
+	WREG32(IH_RB_RPTR, 0);
+	WREG32(IH_RB_WPTR, 0);
+
+	/* Default settings for IH_CNTL (disabled at first) */
+	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
+	/* RPTR_REARM only works if msi's are enabled */
+	if (rdev->msi_enabled)
+		ih_cntl |= RPTR_REARM;
+	WREG32(IH_CNTL, ih_cntl);
+
+	/* force the active interrupt state to all disabled */
+	si_disable_interrupt_state(rdev);
+
+	/* enable irqs */
+	si_enable_interrupts(rdev);
+
+	return ret;
+}
+
+int si_irq_set(struct radeon_device *rdev)
+{
+	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
+	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
+	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
+	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
+	u32 grbm_int_cntl = 0;
+	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
+
+	if (!rdev->irq.installed) {
+		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
+		return -EINVAL;
+	}
+	/* don't enable anything if the ih is disabled */
+	if (!rdev->ih.enabled) {
+		si_disable_interrupts(rdev);
+		/* force the active interrupt state to all disabled */
+		si_disable_interrupt_state(rdev);
+		return 0;
+	}
+
+	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
+	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
+	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
+	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
+	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
+	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
+
+	/* enable CP interrupts on all rings */
+	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+		DRM_DEBUG("si_irq_set: sw int gfx\n");
+		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
+	}
+	if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) {
+		DRM_DEBUG("si_irq_set: sw int cp1\n");
+		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
+	}
+	if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) {
+		DRM_DEBUG("si_irq_set: sw int cp2\n");
+		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
+	}
+	if (rdev->irq.crtc_vblank_int[0] ||
+	    rdev->irq.pflip[0]) {
+		DRM_DEBUG("si_irq_set: vblank 0\n");
+		crtc1 |= VBLANK_INT_MASK;
+	}
+	if (rdev->irq.crtc_vblank_int[1] ||
+	    rdev->irq.pflip[1]) {
+		DRM_DEBUG("si_irq_set: vblank 1\n");
+		crtc2 |= VBLANK_INT_MASK;
+	}
+	if (rdev->irq.crtc_vblank_int[2] ||
+	    rdev->irq.pflip[2]) {
+		DRM_DEBUG("si_irq_set: vblank 2\n");
+		crtc3 |= VBLANK_INT_MASK;
+	}
+	if (rdev->irq.crtc_vblank_int[3] ||
+	    rdev->irq.pflip[3]) {
+		DRM_DEBUG("si_irq_set: vblank 3\n");
+		crtc4 |= VBLANK_INT_MASK;
+	}
+	if (rdev->irq.crtc_vblank_int[4] ||
+	    rdev->irq.pflip[4]) {
+		DRM_DEBUG("si_irq_set: vblank 4\n");
+		crtc5 |= VBLANK_INT_MASK;
+	}
+	if (rdev->irq.crtc_vblank_int[5] ||
+	    rdev->irq.pflip[5]) {
+		DRM_DEBUG("si_irq_set: vblank 5\n");
+		crtc6 |= VBLANK_INT_MASK;
+	}
+	if (rdev->irq.hpd[0]) {
+		DRM_DEBUG("si_irq_set: hpd 1\n");
+		hpd1 |= DC_HPDx_INT_EN;
+	}
+	if (rdev->irq.hpd[1]) {
+		DRM_DEBUG("si_irq_set: hpd 2\n");
+		hpd2 |= DC_HPDx_INT_EN;
+	}
+	if (rdev->irq.hpd[2]) {
+		DRM_DEBUG("si_irq_set: hpd 3\n");
+		hpd3 |= DC_HPDx_INT_EN;
+	}
+	if (rdev->irq.hpd[3]) {
+		DRM_DEBUG("si_irq_set: hpd 4\n");
+		hpd4 |= DC_HPDx_INT_EN;
+	}
+	if (rdev->irq.hpd[4]) {
+		DRM_DEBUG("si_irq_set: hpd 5\n");
+		hpd5 |= DC_HPDx_INT_EN;
+	}
+	if (rdev->irq.hpd[5]) {
+		DRM_DEBUG("si_irq_set: hpd 6\n");
+		hpd6 |= DC_HPDx_INT_EN;
+	}
+	if (rdev->irq.gui_idle) {
+		DRM_DEBUG("gui idle\n");
+		grbm_int_cntl |= GUI_IDLE_INT_ENABLE;
+	}
+
+	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
+	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
+	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
+
+	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
+
+	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
+	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
+	if (rdev->num_crtc >= 4) {
+		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
+		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
+	}
+	if (rdev->num_crtc >= 6) {
+		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
+		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
+	}
+
+	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
+	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
+	if (rdev->num_crtc >= 4) {
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
+	}
+	if (rdev->num_crtc >= 6) {
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
+		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
+	}
+
+	WREG32(DC_HPD1_INT_CONTROL, hpd1);
+	WREG32(DC_HPD2_INT_CONTROL, hpd2);
+	WREG32(DC_HPD3_INT_CONTROL, hpd3);
+	WREG32(DC_HPD4_INT_CONTROL, hpd4);
+	WREG32(DC_HPD5_INT_CONTROL, hpd5);
+	WREG32(DC_HPD6_INT_CONTROL, hpd6);
+
+	return 0;
+}
+
+static inline void si_irq_ack(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
+	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
+	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
+	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
+	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
+	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
+	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
+	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
+	if (rdev->num_crtc >= 4) {
+		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
+		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
+	}
+	if (rdev->num_crtc >= 6) {
+		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
+		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
+	}
+
+	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
+		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
+	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
+		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
+	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
+		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
+	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
+		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
+		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
+		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
+
+	if (rdev->num_crtc >= 4) {
+		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
+			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
+		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
+			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
+			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
+			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
+			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
+			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
+	}
+
+	if (rdev->num_crtc >= 6) {
+		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
+			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
+		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
+			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
+			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
+			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
+			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
+		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
+			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
+	}
+
+	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
+		tmp = RREG32(DC_HPD1_INT_CONTROL);
+		tmp |= DC_HPDx_INT_ACK;
+		WREG32(DC_HPD1_INT_CONTROL, tmp);
+	}
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
+		tmp = RREG32(DC_HPD2_INT_CONTROL);
+		tmp |= DC_HPDx_INT_ACK;
+		WREG32(DC_HPD2_INT_CONTROL, tmp);
+	}
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
+		tmp = RREG32(DC_HPD3_INT_CONTROL);
+		tmp |= DC_HPDx_INT_ACK;
+		WREG32(DC_HPD3_INT_CONTROL, tmp);
+	}
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
+		tmp = RREG32(DC_HPD4_INT_CONTROL);
+		tmp |= DC_HPDx_INT_ACK;
+		WREG32(DC_HPD4_INT_CONTROL, tmp);
+	}
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
+		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp |= DC_HPDx_INT_ACK;
+		WREG32(DC_HPD5_INT_CONTROL, tmp);
+	}
+	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
+		tmp = RREG32(DC_HPD5_INT_CONTROL);
+		tmp |= DC_HPDx_INT_ACK;
+		WREG32(DC_HPD6_INT_CONTROL, tmp);
+	}
+}
+
+static void si_irq_disable(struct radeon_device *rdev)
+{
+	si_disable_interrupts(rdev);
+	/* Wait and acknowledge irq */
+	mdelay(1);
+	si_irq_ack(rdev);
+	si_disable_interrupt_state(rdev);
+}
+
+static void si_irq_suspend(struct radeon_device *rdev)
+{
+	si_irq_disable(rdev);
+	si_rlc_stop(rdev);
+}
+
+static void si_irq_fini(struct radeon_device *rdev)
+{
+	si_irq_suspend(rdev);
+	r600_ih_ring_fini(rdev);
+}
+
+static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
+{
+	u32 wptr, tmp;
+
+	if (rdev->wb.enabled)
+		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
+	else
+		wptr = RREG32(IH_RB_WPTR);
+
+	if (wptr & RB_OVERFLOW) {
+		/* When a ring buffer overflow happen start parsing interrupt
+		 * from the last not overwritten vector (wptr + 16). Hopefully
+		 * this should allow us to catchup.
+		 */
+		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
+			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
+		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
+		tmp = RREG32(IH_RB_CNTL);
+		tmp |= IH_WPTR_OVERFLOW_CLEAR;
+		WREG32(IH_RB_CNTL, tmp);
+	}
+	return (wptr & rdev->ih.ptr_mask);
+}
+
+/*        SI IV Ring
+ * Each IV ring entry is 128 bits:
+ * [7:0]    - interrupt source id
+ * [31:8]   - reserved
+ * [59:32]  - interrupt source data
+ * [63:60]  - reserved
+ * [71:64]  - RINGID
+ * [79:72]  - VMID
+ * [127:80] - reserved
+ */
+int si_irq_process(struct radeon_device *rdev)
+{
+	u32 wptr;
+	u32 rptr;
+	u32 src_id, src_data, ring_id;
+	u32 ring_index;
+	unsigned long flags;
+	bool queue_hotplug = false;
+
+	if (!rdev->ih.enabled || rdev->shutdown)
+		return IRQ_NONE;
+
+	wptr = si_get_ih_wptr(rdev);
+	rptr = rdev->ih.rptr;
+	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
+
+	spin_lock_irqsave(&rdev->ih.lock, flags);
+	if (rptr == wptr) {
+		spin_unlock_irqrestore(&rdev->ih.lock, flags);
+		return IRQ_NONE;
+	}
+restart_ih:
+	/* Order reading of wptr vs. reading of IH ring data */
+	rmb();
+
+	/* display interrupts */
+	si_irq_ack(rdev);
+
+	rdev->ih.wptr = wptr;
+	while (rptr != wptr) {
+		/* wptr/rptr are in bytes! */
+		ring_index = rptr / 4;
+		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
+		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
+		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
+
+		switch (src_id) {
+		case 1: /* D1 vblank/vline */
+			switch (src_data) {
+			case 0: /* D1 vblank */
+				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
+					if (rdev->irq.crtc_vblank_int[0]) {
+						drm_handle_vblank(rdev->ddev, 0);
+						rdev->pm.vblank_sync = true;
+						wake_up(&rdev->irq.vblank_queue);
+					}
+					if (rdev->irq.pflip[0])
+						radeon_crtc_handle_flip(rdev, 0);
+					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
+					DRM_DEBUG("IH: D1 vblank\n");
+				}
+				break;
+			case 1: /* D1 vline */
+				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
+					DRM_DEBUG("IH: D1 vline\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 2: /* D2 vblank/vline */
+			switch (src_data) {
+			case 0: /* D2 vblank */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
+					if (rdev->irq.crtc_vblank_int[1]) {
+						drm_handle_vblank(rdev->ddev, 1);
+						rdev->pm.vblank_sync = true;
+						wake_up(&rdev->irq.vblank_queue);
+					}
+					if (rdev->irq.pflip[1])
+						radeon_crtc_handle_flip(rdev, 1);
+					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
+					DRM_DEBUG("IH: D2 vblank\n");
+				}
+				break;
+			case 1: /* D2 vline */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
+					DRM_DEBUG("IH: D2 vline\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 3: /* D3 vblank/vline */
+			switch (src_data) {
+			case 0: /* D3 vblank */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
+					if (rdev->irq.crtc_vblank_int[2]) {
+						drm_handle_vblank(rdev->ddev, 2);
+						rdev->pm.vblank_sync = true;
+						wake_up(&rdev->irq.vblank_queue);
+					}
+					if (rdev->irq.pflip[2])
+						radeon_crtc_handle_flip(rdev, 2);
+					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
+					DRM_DEBUG("IH: D3 vblank\n");
+				}
+				break;
+			case 1: /* D3 vline */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
+					DRM_DEBUG("IH: D3 vline\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 4: /* D4 vblank/vline */
+			switch (src_data) {
+			case 0: /* D4 vblank */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
+					if (rdev->irq.crtc_vblank_int[3]) {
+						drm_handle_vblank(rdev->ddev, 3);
+						rdev->pm.vblank_sync = true;
+						wake_up(&rdev->irq.vblank_queue);
+					}
+					if (rdev->irq.pflip[3])
+						radeon_crtc_handle_flip(rdev, 3);
+					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
+					DRM_DEBUG("IH: D4 vblank\n");
+				}
+				break;
+			case 1: /* D4 vline */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
+					DRM_DEBUG("IH: D4 vline\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 5: /* D5 vblank/vline */
+			switch (src_data) {
+			case 0: /* D5 vblank */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
+					if (rdev->irq.crtc_vblank_int[4]) {
+						drm_handle_vblank(rdev->ddev, 4);
+						rdev->pm.vblank_sync = true;
+						wake_up(&rdev->irq.vblank_queue);
+					}
+					if (rdev->irq.pflip[4])
+						radeon_crtc_handle_flip(rdev, 4);
+					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
+					DRM_DEBUG("IH: D5 vblank\n");
+				}
+				break;
+			case 1: /* D5 vline */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
+					DRM_DEBUG("IH: D5 vline\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 6: /* D6 vblank/vline */
+			switch (src_data) {
+			case 0: /* D6 vblank */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
+					if (rdev->irq.crtc_vblank_int[5]) {
+						drm_handle_vblank(rdev->ddev, 5);
+						rdev->pm.vblank_sync = true;
+						wake_up(&rdev->irq.vblank_queue);
+					}
+					if (rdev->irq.pflip[5])
+						radeon_crtc_handle_flip(rdev, 5);
+					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
+					DRM_DEBUG("IH: D6 vblank\n");
+				}
+				break;
+			case 1: /* D6 vline */
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
+					DRM_DEBUG("IH: D6 vline\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 42: /* HPD hotplug */
+			switch (src_data) {
+			case 0:
+				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
+					queue_hotplug = true;
+					DRM_DEBUG("IH: HPD1\n");
+				}
+				break;
+			case 1:
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
+					queue_hotplug = true;
+					DRM_DEBUG("IH: HPD2\n");
+				}
+				break;
+			case 2:
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
+					queue_hotplug = true;
+					DRM_DEBUG("IH: HPD3\n");
+				}
+				break;
+			case 3:
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
+					queue_hotplug = true;
+					DRM_DEBUG("IH: HPD4\n");
+				}
+				break;
+			case 4:
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
+					queue_hotplug = true;
+					DRM_DEBUG("IH: HPD5\n");
+				}
+				break;
+			case 5:
+				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
+					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
+					queue_hotplug = true;
+					DRM_DEBUG("IH: HPD6\n");
+				}
+				break;
+			default:
+				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
+		case 176: /* RINGID0 CP_INT */
+			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
+			break;
+		case 177: /* RINGID1 CP_INT */
+			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+			break;
+		case 178: /* RINGID2 CP_INT */
+			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
+			break;
+		case 181: /* CP EOP event */
+			DRM_DEBUG("IH: CP EOP\n");
+			switch (ring_id) {
+			case 0:
+				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
+				break;
+			case 1:
+				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+				break;
+			case 2:
+				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
+				break;
+			}
+			break;
+		case 233: /* GUI IDLE */
+			DRM_DEBUG("IH: GUI idle\n");
+			rdev->pm.gui_idle = true;
+			wake_up(&rdev->irq.idle_queue);
+			break;
+		default:
+			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
+			break;
+		}
+
+		/* wptr/rptr are in bytes! */
+		rptr += 16;
+		rptr &= rdev->ih.ptr_mask;
+	}
+	/* make sure wptr hasn't changed while processing */
+	wptr = si_get_ih_wptr(rdev);
+	if (wptr != rdev->ih.wptr)
+		goto restart_ih;
+	if (queue_hotplug)
+		schedule_work(&rdev->hotplug_work);
+	rdev->ih.rptr = rptr;
+	WREG32(IH_RB_RPTR, rdev->ih.rptr);
+	spin_unlock_irqrestore(&rdev->ih.lock, flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * startup/shutdown callbacks
+ */
+static int si_startup(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring;
+	int r;
+
+	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
+	    !rdev->rlc_fw || !rdev->mc_fw) {
+		r = si_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+
+	r = si_mc_load_microcode(rdev);
+	if (r) {
+		DRM_ERROR("Failed to load MC firmware!\n");
+		return r;
+	}
+
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	si_mc_program(rdev);
+	r = si_pcie_gart_enable(rdev);
+	if (r)
+		return r;
+	si_gpu_init(rdev);
+
+#if 0
+	r = evergreen_blit_init(rdev);
+	if (r) {
+		r600_blit_fini(rdev);
+		rdev->asic->copy = NULL;
+		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
+	}
+#endif
+	/* allocate rlc buffers */
+	r = si_rlc_init(rdev);
+	if (r) {
+		DRM_ERROR("Failed to init rlc BOs!\n");
+		return r;
+	}
+
+	/* allocate wb buffer */
+	r = radeon_wb_init(rdev);
+	if (r)
+		return r;
+
+	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
+		return r;
+	}
+
+	/* Enable IRQ */
+	r = si_irq_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: IH init failed (%d).\n", r);
+		radeon_irq_kms_fini(rdev);
+		return r;
+	}
+	si_irq_set(rdev);
+
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
+			     CP_RB0_RPTR, CP_RB0_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
+	if (r)
+		return r;
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
+			     CP_RB1_RPTR, CP_RB1_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
+	if (r)
+		return r;
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
+			     CP_RB2_RPTR, CP_RB2_WPTR,
+			     0, 0xfffff, RADEON_CP_PACKET2);
+	if (r)
+		return r;
+
+	r = si_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = si_cp_resume(rdev);
+	if (r)
+		return r;
+
+	r = radeon_ib_pool_start(rdev);
+	if (r)
+		return r;
+
+	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	if (r) {
+		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 0\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
+	r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
+	if (r) {
+		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 1\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
+	r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+	if (r) {
+		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 2\n", r);
+		rdev->accel_working = false;
+		return r;
+	}
+
+	r = radeon_vm_manager_start(rdev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+int si_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
+	 * posting will perform necessary task to bring back GPU into good
+	 * shape.
+	 */
+	/* post card */
+	atom_asic_init(rdev->mode_info.atom_context);
+
+	rdev->accel_working = true;
+	r = si_startup(rdev);
+	if (r) {
+		DRM_ERROR("si startup failed on resume\n");
+		rdev->accel_working = false;
+		return r;
+	}
+
+	return r;
+
+}
+
+int si_suspend(struct radeon_device *rdev)
+{
+	/* FIXME: we should wait for ring to be empty */
+	radeon_ib_pool_suspend(rdev);
+	radeon_vm_manager_suspend(rdev);
+#if 0
+	r600_blit_suspend(rdev);
+#endif
+	si_cp_enable(rdev, false);
+	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+	si_irq_suspend(rdev);
+	radeon_wb_disable(rdev);
+	si_pcie_gart_disable(rdev);
+	return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int si_init(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	int r;
+
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios) {
+		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
+		return -EINVAL;
+	}
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+
+	/* Post card if necessary */
+	if (!radeon_card_posted(rdev)) {
+		if (!rdev->bios) {
+			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
+			return -EINVAL;
+		}
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	si_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	/* Initialize clocks */
+	radeon_get_clock_info(rdev->ddev);
+
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+
+	/* initialize memory controller */
+	r = si_mc_init(rdev);
+	if (r)
+		return r;
+	/* Memory manager */
+	r = radeon_bo_init(rdev);
+	if (r)
+		return r;
+
+	r = radeon_irq_kms_init(rdev);
+	if (r)
+		return r;
+
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 1024 * 1024);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 1024 * 1024);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+	ring->ring_obj = NULL;
+	r600_ring_init(rdev, ring, 1024 * 1024);
+
+	rdev->ih.ring_obj = NULL;
+	r600_ih_ring_init(rdev, 64 * 1024);
+
+	r = r600_pcie_gart_init(rdev);
+	if (r)
+		return r;
+
+	r = radeon_ib_pool_init(rdev);
+	rdev->accel_working = true;
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
+		rdev->accel_working = false;
+	}
+	r = radeon_vm_manager_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
+	}
+
+	r = si_startup(rdev);
+	if (r) {
+		dev_err(rdev->dev, "disabling GPU acceleration\n");
+		si_cp_fini(rdev);
+		si_irq_fini(rdev);
+		si_rlc_fini(rdev);
+		radeon_wb_fini(rdev);
+		r100_ib_fini(rdev);
+		radeon_vm_manager_fini(rdev);
+		radeon_irq_kms_fini(rdev);
+		si_pcie_gart_fini(rdev);
+		rdev->accel_working = false;
+	}
+
+	/* Don't start up if the MC ucode is missing.
+	 * The default clocks and voltages before the MC ucode
+	 * is loaded are not suffient for advanced operations.
+	 */
+	if (!rdev->mc_fw) {
+		DRM_ERROR("radeon: MC ucode required for NI+.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void si_fini(struct radeon_device *rdev)
+{
+#if 0
+	r600_blit_fini(rdev);
+#endif
+	si_cp_fini(rdev);
+	si_irq_fini(rdev);
+	si_rlc_fini(rdev);
+	radeon_wb_fini(rdev);
+	radeon_vm_manager_fini(rdev);
+	r100_ib_fini(rdev);
+	radeon_irq_kms_fini(rdev);
+	si_pcie_gart_fini(rdev);
+	r600_vram_scratch_fini(rdev);
+	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_bo_fini(rdev);
+	radeon_atombios_fini(rdev);
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+}
+
diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.c b/drivers/gpu/drm/radeon/si_blit_shaders.c
new file mode 100644
index 0000000..a7124b4
--- /dev/null
+++ b/drivers/gpu/drm/radeon/si_blit_shaders.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     Alex Deucher <alexander.deucher@amd.com>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const u32 si_default_state[] =
+{
+	0xc0066900,
+	0x00000000,
+	0x00000060, /* DB_RENDER_CONTROL */
+	0x00000000, /* DB_COUNT_CONTROL */
+	0x00000000, /* DB_DEPTH_VIEW */
+	0x0000002a, /* DB_RENDER_OVERRIDE */
+	0x00000000, /* DB_RENDER_OVERRIDE2 */
+	0x00000000, /* DB_HTILE_DATA_BASE */
+
+	0xc0046900,
+	0x00000008,
+	0x00000000, /* DB_DEPTH_BOUNDS_MIN */
+	0x00000000, /* DB_DEPTH_BOUNDS_MAX */
+	0x00000000, /* DB_STENCIL_CLEAR */
+	0x00000000, /* DB_DEPTH_CLEAR */
+
+	0xc0036900,
+	0x0000000f,
+	0x00000000, /* DB_DEPTH_INFO */
+	0x00000000, /* DB_Z_INFO */
+	0x00000000, /* DB_STENCIL_INFO */
+
+	0xc0016900,
+	0x00000080,
+	0x00000000, /* PA_SC_WINDOW_OFFSET */
+
+	0xc00d6900,
+	0x00000083,
+	0x0000ffff, /* PA_SC_CLIPRECT_RULE */
+	0x00000000, /* PA_SC_CLIPRECT_0_TL */
+	0x20002000, /* PA_SC_CLIPRECT_0_BR */
+	0x00000000,
+	0x20002000,
+	0x00000000,
+	0x20002000,
+	0x00000000,
+	0x20002000,
+	0xaaaaaaaa, /* PA_SC_EDGERULE */
+	0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
+	0x0000000f, /* CB_TARGET_MASK */
+	0x0000000f, /* CB_SHADER_MASK */
+
+	0xc0226900,
+	0x00000094,
+	0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
+	0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x00000000, /* PA_SC_VPORT_ZMIN_0 */
+	0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
+
+	0xc0026900,
+	0x000000d9,
+	0x00000000, /* CP_RINGID */
+	0x00000000, /* CP_VMID */
+
+	0xc0046900,
+	0x00000100,
+	0xffffffff, /* VGT_MAX_VTX_INDX */
+	0x00000000, /* VGT_MIN_VTX_INDX */
+	0x00000000, /* VGT_INDX_OFFSET */
+	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
+
+	0xc0046900,
+	0x00000105,
+	0x00000000, /* CB_BLEND_RED */
+	0x00000000, /* CB_BLEND_GREEN */
+	0x00000000, /* CB_BLEND_BLUE */
+	0x00000000, /* CB_BLEND_ALPHA */
+
+	0xc0016900,
+	0x000001e0,
+	0x00000000, /* CB_BLEND0_CONTROL */
+
+	0xc00e6900,
+	0x00000200,
+	0x00000000, /* DB_DEPTH_CONTROL */
+	0x00000000, /* DB_EQAA */
+	0x00cc0010, /* CB_COLOR_CONTROL */
+	0x00000210, /* DB_SHADER_CONTROL */
+	0x00010000, /* PA_CL_CLIP_CNTL */
+	0x00000004, /* PA_SU_SC_MODE_CNTL */
+	0x00000100, /* PA_CL_VTE_CNTL */
+	0x00000000, /* PA_CL_VS_OUT_CNTL */
+	0x00000000, /* PA_CL_NANINF_CNTL */
+	0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
+	0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
+	0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
+	0x00000000, /*  */
+	0x00000000, /*  */
+
+	0xc0116900,
+	0x00000280,
+	0x00000000, /* PA_SU_POINT_SIZE */
+	0x00000000, /* PA_SU_POINT_MINMAX */
+	0x00000008, /* PA_SU_LINE_CNTL */
+	0x00000000, /* PA_SC_LINE_STIPPLE */
+	0x00000000, /* VGT_OUTPUT_PATH_CNTL */
+	0x00000000, /* VGT_HOS_CNTL */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000, /* VGT_GS_MODE */
+
+	0xc0026900,
+	0x00000292,
+	0x00000000, /* PA_SC_MODE_CNTL_0 */
+	0x00000000, /* PA_SC_MODE_CNTL_1 */
+
+	0xc0016900,
+	0x000002a1,
+	0x00000000, /* VGT_PRIMITIVEID_EN */
+
+	0xc0016900,
+	0x000002a5,
+	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
+
+	0xc0026900,
+	0x000002a8,
+	0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
+	0x00000000,
+
+	0xc0026900,
+	0x000002ad,
+	0x00000000, /* VGT_REUSE_OFF */
+	0x00000000,
+
+	0xc0016900,
+	0x000002d5,
+	0x00000000, /* VGT_SHADER_STAGES_EN */
+
+	0xc0016900,
+	0x000002dc,
+	0x0000aa00, /* DB_ALPHA_TO_MASK */
+
+	0xc0066900,
+	0x000002de,
+	0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0026900,
+	0x000002e5,
+	0x00000000, /* VGT_STRMOUT_CONFIG */
+	0x00000000,
+
+	0xc01b6900,
+	0x000002f5,
+	0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
+	0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
+	0x00000000, /* PA_SC_LINE_CNTL */
+	0x00000000, /* PA_SC_AA_CONFIG */
+	0x00000005, /* PA_SU_VTX_CNTL */
+	0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
+	0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
+	0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
+	0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
+	0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
+	0xffffffff,
+
+	0xc0026900,
+	0x00000316,
+	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+	0x00000010, /*  */
+};
+
+const u32 si_default_size = ARRAY_SIZE(si_default_state);
diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.h b/drivers/gpu/drm/radeon/si_blit_shaders.h
new file mode 100644
index 0000000..c739e51
--- /dev/null
+++ b/drivers/gpu/drm/radeon/si_blit_shaders.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SI_BLIT_SHADERS_H
+#define SI_BLIT_SHADERS_H
+
+extern const u32 si_default_state[];
+
+extern const u32 si_default_size;
+
+#endif
diff --git a/drivers/gpu/drm/radeon/si_reg.h b/drivers/gpu/drm/radeon/si_reg.h
new file mode 100644
index 0000000..eda938a
--- /dev/null
+++ b/drivers/gpu/drm/radeon/si_reg.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef __SI_REG_H__
+#define __SI_REG_H__
+
+/* SI */
+#define SI_DC_GPIO_HPD_MASK                      0x65b0
+#define SI_DC_GPIO_HPD_A                         0x65b4
+#define SI_DC_GPIO_HPD_EN                        0x65b8
+#define SI_DC_GPIO_HPD_Y                         0x65bc
+
+#endif
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
new file mode 100644
index 0000000..53ea2c4
--- /dev/null
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef SI_H
+#define SI_H
+
+#define	CG_MULT_THERMAL_STATUS					0x714
+#define		ASIC_MAX_TEMP(x)				((x) << 0)
+#define		ASIC_MAX_TEMP_MASK				0x000001ff
+#define		ASIC_MAX_TEMP_SHIFT				0
+#define		CTF_TEMP(x)					((x) << 9)
+#define		CTF_TEMP_MASK					0x0003fe00
+#define		CTF_TEMP_SHIFT					9
+
+#define SI_MAX_SH_GPRS           256
+#define SI_MAX_TEMP_GPRS         16
+#define SI_MAX_SH_THREADS        256
+#define SI_MAX_SH_STACK_ENTRIES  4096
+#define SI_MAX_FRC_EOV_CNT       16384
+#define SI_MAX_BACKENDS          8
+#define SI_MAX_BACKENDS_MASK     0xFF
+#define SI_MAX_BACKENDS_PER_SE_MASK     0x0F
+#define SI_MAX_SIMDS             12
+#define SI_MAX_SIMDS_MASK        0x0FFF
+#define SI_MAX_SIMDS_PER_SE_MASK        0x00FF
+#define SI_MAX_PIPES             8
+#define SI_MAX_PIPES_MASK        0xFF
+#define SI_MAX_PIPES_PER_SIMD_MASK      0x3F
+#define SI_MAX_LDS_NUM           0xFFFF
+#define SI_MAX_TCC               16
+#define SI_MAX_TCC_MASK          0xFFFF
+
+#define VGA_HDP_CONTROL  				0x328
+#define		VGA_MEMORY_DISABLE				(1 << 4)
+
+#define DMIF_ADDR_CONFIG  				0xBD4
+
+#define	SRBM_STATUS				        0xE50
+
+#define	CC_SYS_RB_BACKEND_DISABLE			0xe80
+#define	GC_USER_SYS_RB_BACKEND_DISABLE			0xe84
+
+#define VM_L2_CNTL					0x1400
+#define		ENABLE_L2_CACHE					(1 << 0)
+#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
+#define		L2_CACHE_PTE_ENDIAN_SWAP_MODE(x)		((x) << 2)
+#define		L2_CACHE_PDE_ENDIAN_SWAP_MODE(x)		((x) << 4)
+#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
+#define		ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE	(1 << 10)
+#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 15)
+#define		CONTEXT1_IDENTITY_ACCESS_MODE(x)		(((x) & 3) << 19)
+#define VM_L2_CNTL2					0x1404
+#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
+#define		INVALIDATE_L2_CACHE				(1 << 1)
+#define		INVALIDATE_CACHE_MODE(x)			((x) << 26)
+#define			INVALIDATE_PTE_AND_PDE_CACHES		0
+#define			INVALIDATE_ONLY_PTE_CACHES		1
+#define			INVALIDATE_ONLY_PDE_CACHES		2
+#define VM_L2_CNTL3					0x1408
+#define		BANK_SELECT(x)					((x) << 0)
+#define		L2_CACHE_UPDATE_MODE(x)				((x) << 6)
+#define		L2_CACHE_BIGK_FRAGMENT_SIZE(x)			((x) << 15)
+#define		L2_CACHE_BIGK_ASSOCIATIVITY			(1 << 20)
+#define	VM_L2_STATUS					0x140C
+#define		L2_BUSY						(1 << 0)
+#define VM_CONTEXT0_CNTL				0x1410
+#define		ENABLE_CONTEXT					(1 << 0)
+#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
+#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
+#define VM_CONTEXT1_CNTL				0x1414
+#define VM_CONTEXT0_CNTL2				0x1430
+#define VM_CONTEXT1_CNTL2				0x1434
+#define	VM_CONTEXT8_PAGE_TABLE_BASE_ADDR		0x1438
+#define	VM_CONTEXT9_PAGE_TABLE_BASE_ADDR		0x143c
+#define	VM_CONTEXT10_PAGE_TABLE_BASE_ADDR		0x1440
+#define	VM_CONTEXT11_PAGE_TABLE_BASE_ADDR		0x1444
+#define	VM_CONTEXT12_PAGE_TABLE_BASE_ADDR		0x1448
+#define	VM_CONTEXT13_PAGE_TABLE_BASE_ADDR		0x144c
+#define	VM_CONTEXT14_PAGE_TABLE_BASE_ADDR		0x1450
+#define	VM_CONTEXT15_PAGE_TABLE_BASE_ADDR		0x1454
+
+#define VM_INVALIDATE_REQUEST				0x1478
+#define VM_INVALIDATE_RESPONSE				0x147c
+
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1518
+#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR	0x151c
+
+#define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x153c
+#define	VM_CONTEXT1_PAGE_TABLE_BASE_ADDR		0x1540
+#define	VM_CONTEXT2_PAGE_TABLE_BASE_ADDR		0x1544
+#define	VM_CONTEXT3_PAGE_TABLE_BASE_ADDR		0x1548
+#define	VM_CONTEXT4_PAGE_TABLE_BASE_ADDR		0x154c
+#define	VM_CONTEXT5_PAGE_TABLE_BASE_ADDR		0x1550
+#define	VM_CONTEXT6_PAGE_TABLE_BASE_ADDR		0x1554
+#define	VM_CONTEXT7_PAGE_TABLE_BASE_ADDR		0x1558
+#define	VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x155c
+#define	VM_CONTEXT1_PAGE_TABLE_START_ADDR		0x1560
+
+#define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
+#define	VM_CONTEXT1_PAGE_TABLE_END_ADDR			0x1580
+
+#define MC_SHARED_CHMAP						0x2004
+#define		NOOFCHAN_SHIFT					12
+#define		NOOFCHAN_MASK					0x0000f000
+#define MC_SHARED_CHREMAP					0x2008
+
+#define	MC_VM_FB_LOCATION				0x2024
+#define	MC_VM_AGP_TOP					0x2028
+#define	MC_VM_AGP_BOT					0x202C
+#define	MC_VM_AGP_BASE					0x2030
+#define	MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2034
+#define	MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2038
+#define	MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x203C
+
+#define	MC_VM_MX_L1_TLB_CNTL				0x2064
+#define		ENABLE_L1_TLB					(1 << 0)
+#define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
+#define		SYSTEM_ACCESS_MODE_PA_ONLY			(0 << 3)
+#define		SYSTEM_ACCESS_MODE_USE_SYS_MAP			(1 << 3)
+#define		SYSTEM_ACCESS_MODE_IN_SYS			(2 << 3)
+#define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 3)
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 5)
+#define		ENABLE_ADVANCED_DRIVER_MODEL			(1 << 6)
+
+#define MC_SHARED_BLACKOUT_CNTL           		0x20ac
+
+#define	MC_ARB_RAMCFG					0x2760
+#define		NOOFBANK_SHIFT					0
+#define		NOOFBANK_MASK					0x00000003
+#define		NOOFRANK_SHIFT					2
+#define		NOOFRANK_MASK					0x00000004
+#define		NOOFROWS_SHIFT					3
+#define		NOOFROWS_MASK					0x00000038
+#define		NOOFCOLS_SHIFT					6
+#define		NOOFCOLS_MASK					0x000000C0
+#define		CHANSIZE_SHIFT					8
+#define		CHANSIZE_MASK					0x00000100
+#define		CHANSIZE_OVERRIDE				(1 << 11)
+#define		NOOFGROUPS_SHIFT				12
+#define		NOOFGROUPS_MASK					0x00001000
+
+#define	MC_SEQ_TRAIN_WAKEUP_CNTL			0x2808
+#define		TRAIN_DONE_D0      			(1 << 30)
+#define		TRAIN_DONE_D1      			(1 << 31)
+
+#define MC_SEQ_SUP_CNTL           			0x28c8
+#define		RUN_MASK      				(1 << 0)
+#define MC_SEQ_SUP_PGM           			0x28cc
+
+#define MC_IO_PAD_CNTL_D0           			0x29d0
+#define		MEM_FALL_OUT_CMD      			(1 << 8)
+
+#define MC_SEQ_IO_DEBUG_INDEX           		0x2a44
+#define MC_SEQ_IO_DEBUG_DATA           			0x2a48
+
+#define	HDP_HOST_PATH_CNTL				0x2C00
+#define	HDP_NONSURFACE_BASE				0x2C04
+#define	HDP_NONSURFACE_INFO				0x2C08
+#define	HDP_NONSURFACE_SIZE				0x2C0C
+
+#define HDP_ADDR_CONFIG  				0x2F48
+#define HDP_MISC_CNTL					0x2F4C
+#define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
+
+#define IH_RB_CNTL                                        0x3e00
+#       define IH_RB_ENABLE                               (1 << 0)
+#       define IH_IB_SIZE(x)                              ((x) << 1) /* log2 */
+#       define IH_RB_FULL_DRAIN_ENABLE                    (1 << 6)
+#       define IH_WPTR_WRITEBACK_ENABLE                   (1 << 8)
+#       define IH_WPTR_WRITEBACK_TIMER(x)                 ((x) << 9) /* log2 */
+#       define IH_WPTR_OVERFLOW_ENABLE                    (1 << 16)
+#       define IH_WPTR_OVERFLOW_CLEAR                     (1 << 31)
+#define IH_RB_BASE                                        0x3e04
+#define IH_RB_RPTR                                        0x3e08
+#define IH_RB_WPTR                                        0x3e0c
+#       define RB_OVERFLOW                                (1 << 0)
+#       define WPTR_OFFSET_MASK                           0x3fffc
+#define IH_RB_WPTR_ADDR_HI                                0x3e10
+#define IH_RB_WPTR_ADDR_LO                                0x3e14
+#define IH_CNTL                                           0x3e18
+#       define ENABLE_INTR                                (1 << 0)
+#       define IH_MC_SWAP(x)                              ((x) << 1)
+#       define IH_MC_SWAP_NONE                            0
+#       define IH_MC_SWAP_16BIT                           1
+#       define IH_MC_SWAP_32BIT                           2
+#       define IH_MC_SWAP_64BIT                           3
+#       define RPTR_REARM                                 (1 << 4)
+#       define MC_WRREQ_CREDIT(x)                         ((x) << 15)
+#       define MC_WR_CLEAN_CNT(x)                         ((x) << 20)
+#       define MC_VMID(x)                                 ((x) << 25)
+
+#define	CONFIG_MEMSIZE					0x5428
+
+#define INTERRUPT_CNTL                                    0x5468
+#       define IH_DUMMY_RD_OVERRIDE                       (1 << 0)
+#       define IH_DUMMY_RD_EN                             (1 << 1)
+#       define IH_REQ_NONSNOOP_EN                         (1 << 3)
+#       define GEN_IH_INT_EN                              (1 << 8)
+#define INTERRUPT_CNTL2                                   0x546c
+
+#define HDP_MEM_COHERENCY_FLUSH_CNTL			0x5480
+
+#define	BIF_FB_EN						0x5490
+#define		FB_READ_EN					(1 << 0)
+#define		FB_WRITE_EN					(1 << 1)
+
+#define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
+
+#define	DC_LB_MEMORY_SPLIT					0x6b0c
+#define		DC_LB_MEMORY_CONFIG(x)				((x) << 20)
+
+#define	PRIORITY_A_CNT						0x6b18
+#define		PRIORITY_MARK_MASK				0x7fff
+#define		PRIORITY_OFF					(1 << 16)
+#define		PRIORITY_ALWAYS_ON				(1 << 20)
+#define	PRIORITY_B_CNT						0x6b1c
+
+#define	DPG_PIPE_ARBITRATION_CONTROL3				0x6cc8
+#       define LATENCY_WATERMARK_MASK(x)			((x) << 16)
+#define	DPG_PIPE_LATENCY_CONTROL				0x6ccc
+#       define LATENCY_LOW_WATERMARK(x)				((x) << 0)
+#       define LATENCY_HIGH_WATERMARK(x)			((x) << 16)
+
+/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
+#define VLINE_STATUS                                    0x6bb8
+#       define VLINE_OCCURRED                           (1 << 0)
+#       define VLINE_ACK                                (1 << 4)
+#       define VLINE_STAT                               (1 << 12)
+#       define VLINE_INTERRUPT                          (1 << 16)
+#       define VLINE_INTERRUPT_TYPE                     (1 << 17)
+/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
+#define VBLANK_STATUS                                   0x6bbc
+#       define VBLANK_OCCURRED                          (1 << 0)
+#       define VBLANK_ACK                               (1 << 4)
+#       define VBLANK_STAT                              (1 << 12)
+#       define VBLANK_INTERRUPT                         (1 << 16)
+#       define VBLANK_INTERRUPT_TYPE                    (1 << 17)
+
+/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
+#define INT_MASK                                        0x6b40
+#       define VBLANK_INT_MASK                          (1 << 0)
+#       define VLINE_INT_MASK                           (1 << 4)
+
+#define DISP_INTERRUPT_STATUS                           0x60f4
+#       define LB_D1_VLINE_INTERRUPT                    (1 << 2)
+#       define LB_D1_VBLANK_INTERRUPT                   (1 << 3)
+#       define DC_HPD1_INTERRUPT                        (1 << 17)
+#       define DC_HPD1_RX_INTERRUPT                     (1 << 18)
+#       define DACA_AUTODETECT_INTERRUPT                (1 << 22)
+#       define DACB_AUTODETECT_INTERRUPT                (1 << 23)
+#       define DC_I2C_SW_DONE_INTERRUPT                 (1 << 24)
+#       define DC_I2C_HW_DONE_INTERRUPT                 (1 << 25)
+#define DISP_INTERRUPT_STATUS_CONTINUE                  0x60f8
+#       define LB_D2_VLINE_INTERRUPT                    (1 << 2)
+#       define LB_D2_VBLANK_INTERRUPT                   (1 << 3)
+#       define DC_HPD2_INTERRUPT                        (1 << 17)
+#       define DC_HPD2_RX_INTERRUPT                     (1 << 18)
+#       define DISP_TIMER_INTERRUPT                     (1 << 24)
+#define DISP_INTERRUPT_STATUS_CONTINUE2                 0x60fc
+#       define LB_D3_VLINE_INTERRUPT                    (1 << 2)
+#       define LB_D3_VBLANK_INTERRUPT                   (1 << 3)
+#       define DC_HPD3_INTERRUPT                        (1 << 17)
+#       define DC_HPD3_RX_INTERRUPT                     (1 << 18)
+#define DISP_INTERRUPT_STATUS_CONTINUE3                 0x6100
+#       define LB_D4_VLINE_INTERRUPT                    (1 << 2)
+#       define LB_D4_VBLANK_INTERRUPT                   (1 << 3)
+#       define DC_HPD4_INTERRUPT                        (1 << 17)
+#       define DC_HPD4_RX_INTERRUPT                     (1 << 18)
+#define DISP_INTERRUPT_STATUS_CONTINUE4                 0x614c
+#       define LB_D5_VLINE_INTERRUPT                    (1 << 2)
+#       define LB_D5_VBLANK_INTERRUPT                   (1 << 3)
+#       define DC_HPD5_INTERRUPT                        (1 << 17)
+#       define DC_HPD5_RX_INTERRUPT                     (1 << 18)
+#define DISP_INTERRUPT_STATUS_CONTINUE5                 0x6150
+#       define LB_D6_VLINE_INTERRUPT                    (1 << 2)
+#       define LB_D6_VBLANK_INTERRUPT                   (1 << 3)
+#       define DC_HPD6_INTERRUPT                        (1 << 17)
+#       define DC_HPD6_RX_INTERRUPT                     (1 << 18)
+
+/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
+#define GRPH_INT_STATUS                                 0x6858
+#       define GRPH_PFLIP_INT_OCCURRED                  (1 << 0)
+#       define GRPH_PFLIP_INT_CLEAR                     (1 << 8)
+/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
+#define	GRPH_INT_CONTROL			        0x685c
+#       define GRPH_PFLIP_INT_MASK                      (1 << 0)
+#       define GRPH_PFLIP_INT_TYPE                      (1 << 8)
+
+#define	DACA_AUTODETECT_INT_CONTROL			0x66c8
+
+#define DC_HPD1_INT_STATUS                              0x601c
+#define DC_HPD2_INT_STATUS                              0x6028
+#define DC_HPD3_INT_STATUS                              0x6034
+#define DC_HPD4_INT_STATUS                              0x6040
+#define DC_HPD5_INT_STATUS                              0x604c
+#define DC_HPD6_INT_STATUS                              0x6058
+#       define DC_HPDx_INT_STATUS                       (1 << 0)
+#       define DC_HPDx_SENSE                            (1 << 1)
+#       define DC_HPDx_RX_INT_STATUS                    (1 << 8)
+
+#define DC_HPD1_INT_CONTROL                             0x6020
+#define DC_HPD2_INT_CONTROL                             0x602c
+#define DC_HPD3_INT_CONTROL                             0x6038
+#define DC_HPD4_INT_CONTROL                             0x6044
+#define DC_HPD5_INT_CONTROL                             0x6050
+#define DC_HPD6_INT_CONTROL                             0x605c
+#       define DC_HPDx_INT_ACK                          (1 << 0)
+#       define DC_HPDx_INT_POLARITY                     (1 << 8)
+#       define DC_HPDx_INT_EN                           (1 << 16)
+#       define DC_HPDx_RX_INT_ACK                       (1 << 20)
+#       define DC_HPDx_RX_INT_EN                        (1 << 24)
+
+#define DC_HPD1_CONTROL                                   0x6024
+#define DC_HPD2_CONTROL                                   0x6030
+#define DC_HPD3_CONTROL                                   0x603c
+#define DC_HPD4_CONTROL                                   0x6048
+#define DC_HPD5_CONTROL                                   0x6054
+#define DC_HPD6_CONTROL                                   0x6060
+#       define DC_HPDx_CONNECTION_TIMER(x)                ((x) << 0)
+#       define DC_HPDx_RX_INT_TIMER(x)                    ((x) << 16)
+#       define DC_HPDx_EN                                 (1 << 28)
+
+/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
+#define CRTC_STATUS_FRAME_COUNT                         0x6e98
+
+#define	GRBM_CNTL					0x8000
+#define		GRBM_READ_TIMEOUT(x)				((x) << 0)
+
+#define	GRBM_STATUS2					0x8008
+#define		RLC_RQ_PENDING 					(1 << 0)
+#define		RLC_BUSY 					(1 << 8)
+#define		TC_BUSY 					(1 << 9)
+
+#define	GRBM_STATUS					0x8010
+#define		CMDFIFO_AVAIL_MASK				0x0000000F
+#define		RING2_RQ_PENDING				(1 << 4)
+#define		SRBM_RQ_PENDING					(1 << 5)
+#define		RING1_RQ_PENDING				(1 << 6)
+#define		CF_RQ_PENDING					(1 << 7)
+#define		PF_RQ_PENDING					(1 << 8)
+#define		GDS_DMA_RQ_PENDING				(1 << 9)
+#define		GRBM_EE_BUSY					(1 << 10)
+#define		DB_CLEAN					(1 << 12)
+#define		CB_CLEAN					(1 << 13)
+#define		TA_BUSY 					(1 << 14)
+#define		GDS_BUSY 					(1 << 15)
+#define		VGT_BUSY					(1 << 17)
+#define		IA_BUSY_NO_DMA					(1 << 18)
+#define		IA_BUSY						(1 << 19)
+#define		SX_BUSY 					(1 << 20)
+#define		SPI_BUSY					(1 << 22)
+#define		BCI_BUSY					(1 << 23)
+#define		SC_BUSY 					(1 << 24)
+#define		PA_BUSY 					(1 << 25)
+#define		DB_BUSY 					(1 << 26)
+#define		CP_COHERENCY_BUSY      				(1 << 28)
+#define		CP_BUSY 					(1 << 29)
+#define		CB_BUSY 					(1 << 30)
+#define		GUI_ACTIVE					(1 << 31)
+#define	GRBM_STATUS_SE0					0x8014
+#define	GRBM_STATUS_SE1					0x8018
+#define		SE_DB_CLEAN					(1 << 1)
+#define		SE_CB_CLEAN					(1 << 2)
+#define		SE_BCI_BUSY					(1 << 22)
+#define		SE_VGT_BUSY					(1 << 23)
+#define		SE_PA_BUSY					(1 << 24)
+#define		SE_TA_BUSY					(1 << 25)
+#define		SE_SX_BUSY					(1 << 26)
+#define		SE_SPI_BUSY					(1 << 27)
+#define		SE_SC_BUSY					(1 << 29)
+#define		SE_DB_BUSY					(1 << 30)
+#define		SE_CB_BUSY					(1 << 31)
+
+#define	GRBM_SOFT_RESET					0x8020
+#define		SOFT_RESET_CP					(1 << 0)
+#define		SOFT_RESET_CB					(1 << 1)
+#define		SOFT_RESET_RLC					(1 << 2)
+#define		SOFT_RESET_DB					(1 << 3)
+#define		SOFT_RESET_GDS					(1 << 4)
+#define		SOFT_RESET_PA					(1 << 5)
+#define		SOFT_RESET_SC					(1 << 6)
+#define		SOFT_RESET_BCI					(1 << 7)
+#define		SOFT_RESET_SPI					(1 << 8)
+#define		SOFT_RESET_SX					(1 << 10)
+#define		SOFT_RESET_TC					(1 << 11)
+#define		SOFT_RESET_TA					(1 << 12)
+#define		SOFT_RESET_VGT					(1 << 14)
+#define		SOFT_RESET_IA					(1 << 15)
+
+#define GRBM_GFX_INDEX          			0x802C
+
+#define GRBM_INT_CNTL                                   0x8060
+#       define RDERR_INT_ENABLE                         (1 << 0)
+#       define GUI_IDLE_INT_ENABLE                      (1 << 19)
+
+#define	SCRATCH_REG0					0x8500
+#define	SCRATCH_REG1					0x8504
+#define	SCRATCH_REG2					0x8508
+#define	SCRATCH_REG3					0x850C
+#define	SCRATCH_REG4					0x8510
+#define	SCRATCH_REG5					0x8514
+#define	SCRATCH_REG6					0x8518
+#define	SCRATCH_REG7					0x851C
+
+#define	SCRATCH_UMSK					0x8540
+#define	SCRATCH_ADDR					0x8544
+
+#define	CP_SEM_WAIT_TIMER				0x85BC
+
+#define	CP_SEM_INCOMPLETE_TIMER_CNTL			0x85C8
+
+#define CP_ME_CNTL					0x86D8
+#define		CP_CE_HALT					(1 << 24)
+#define		CP_PFP_HALT					(1 << 26)
+#define		CP_ME_HALT					(1 << 28)
+
+#define	CP_COHER_CNTL2					0x85E8
+
+#define	CP_RB2_RPTR					0x86f8
+#define	CP_RB1_RPTR					0x86fc
+#define	CP_RB0_RPTR					0x8700
+#define	CP_RB_WPTR_DELAY				0x8704
+
+#define	CP_QUEUE_THRESHOLDS				0x8760
+#define		ROQ_IB1_START(x)				((x) << 0)
+#define		ROQ_IB2_START(x)				((x) << 8)
+#define CP_MEQ_THRESHOLDS				0x8764
+#define		MEQ1_START(x)				((x) << 0)
+#define		MEQ2_START(x)				((x) << 8)
+
+#define	CP_PERFMON_CNTL					0x87FC
+
+#define	VGT_VTX_VECT_EJECT_REG				0x88B0
+
+#define	VGT_CACHE_INVALIDATION				0x88C4
+#define		CACHE_INVALIDATION(x)				((x) << 0)
+#define			VC_ONLY						0
+#define			TC_ONLY						1
+#define			VC_AND_TC					2
+#define		AUTO_INVLD_EN(x)				((x) << 6)
+#define			NO_AUTO						0
+#define			ES_AUTO						1
+#define			GS_AUTO						2
+#define			ES_AND_GS_AUTO					3
+#define	VGT_ESGS_RING_SIZE				0x88C8
+#define	VGT_GSVS_RING_SIZE				0x88CC
+
+#define	VGT_GS_VERTEX_REUSE				0x88D4
+
+#define	VGT_PRIMITIVE_TYPE				0x8958
+#define	VGT_INDEX_TYPE					0x895C
+
+#define	VGT_NUM_INDICES					0x8970
+#define	VGT_NUM_INSTANCES				0x8974
+
+#define	VGT_TF_RING_SIZE				0x8988
+
+#define	VGT_HS_OFFCHIP_PARAM				0x89B0
+
+#define	VGT_TF_MEMORY_BASE				0x89B8
+
+#define CC_GC_SHADER_ARRAY_CONFIG			0x89bc
+#define GC_USER_SHADER_ARRAY_CONFIG			0x89c0
+
+#define	PA_CL_ENHANCE					0x8A14
+#define		CLIP_VTX_REORDER_ENA				(1 << 0)
+#define		NUM_CLIP_SEQ(x)					((x) << 1)
+
+#define	PA_SU_LINE_STIPPLE_VALUE			0x8A60
+
+#define	PA_SC_LINE_STIPPLE_STATE			0x8B10
+
+#define	PA_SC_FORCE_EOV_MAX_CNTS			0x8B24
+#define		FORCE_EOV_MAX_CLK_CNT(x)			((x) << 0)
+#define		FORCE_EOV_MAX_REZ_CNT(x)			((x) << 16)
+
+#define	PA_SC_FIFO_SIZE					0x8BCC
+#define		SC_FRONTEND_PRIM_FIFO_SIZE(x)			((x) << 0)
+#define		SC_BACKEND_PRIM_FIFO_SIZE(x)			((x) << 6)
+#define		SC_HIZ_TILE_FIFO_SIZE(x)			((x) << 15)
+#define		SC_EARLYZ_TILE_FIFO_SIZE(x)			((x) << 23)
+
+#define	PA_SC_ENHANCE					0x8BF0
+
+#define	SQ_CONFIG					0x8C00
+
+#define	SQC_CACHES					0x8C08
+
+#define	SX_DEBUG_1					0x9060
+
+#define	SPI_STATIC_THREAD_MGMT_1			0x90E0
+#define	SPI_STATIC_THREAD_MGMT_2			0x90E4
+#define	SPI_STATIC_THREAD_MGMT_3			0x90E8
+#define	SPI_PS_MAX_WAVE_ID				0x90EC
+
+#define	SPI_CONFIG_CNTL					0x9100
+
+#define	SPI_CONFIG_CNTL_1				0x913C
+#define		VTX_DONE_DELAY(x)				((x) << 0)
+#define		INTERP_ONE_PRIM_PER_ROW				(1 << 4)
+
+#define	CGTS_TCC_DISABLE				0x9148
+#define	CGTS_USER_TCC_DISABLE				0x914C
+#define		TCC_DISABLE_MASK				0xFFFF0000
+#define		TCC_DISABLE_SHIFT				16
+
+#define	TA_CNTL_AUX					0x9508
+
+#define CC_RB_BACKEND_DISABLE				0x98F4
+#define		BACKEND_DISABLE(x)     			((x) << 16)
+#define GB_ADDR_CONFIG  				0x98F8
+#define		NUM_PIPES(x)				((x) << 0)
+#define		NUM_PIPES_MASK				0x00000007
+#define		NUM_PIPES_SHIFT				0
+#define		PIPE_INTERLEAVE_SIZE(x)			((x) << 4)
+#define		PIPE_INTERLEAVE_SIZE_MASK		0x00000070
+#define		PIPE_INTERLEAVE_SIZE_SHIFT		4
+#define		NUM_SHADER_ENGINES(x)			((x) << 12)
+#define		NUM_SHADER_ENGINES_MASK			0x00003000
+#define		NUM_SHADER_ENGINES_SHIFT		12
+#define		SHADER_ENGINE_TILE_SIZE(x)     		((x) << 16)
+#define		SHADER_ENGINE_TILE_SIZE_MASK		0x00070000
+#define		SHADER_ENGINE_TILE_SIZE_SHIFT		16
+#define		NUM_GPUS(x)     			((x) << 20)
+#define		NUM_GPUS_MASK				0x00700000
+#define		NUM_GPUS_SHIFT				20
+#define		MULTI_GPU_TILE_SIZE(x)     		((x) << 24)
+#define		MULTI_GPU_TILE_SIZE_MASK		0x03000000
+#define		MULTI_GPU_TILE_SIZE_SHIFT		24
+#define		ROW_SIZE(x)             		((x) << 28)
+#define		ROW_SIZE_MASK				0x30000000
+#define		ROW_SIZE_SHIFT				28
+
+#define	GB_TILE_MODE0					0x9910
+#       define MICRO_TILE_MODE(x)				((x) << 0)
+#              define	ADDR_SURF_DISPLAY_MICRO_TILING		0
+#              define	ADDR_SURF_THIN_MICRO_TILING		1
+#              define	ADDR_SURF_DEPTH_MICRO_TILING		2
+#       define ARRAY_MODE(x)					((x) << 2)
+#              define	ARRAY_LINEAR_GENERAL			0
+#              define	ARRAY_LINEAR_ALIGNED			1
+#              define	ARRAY_1D_TILED_THIN1			2
+#              define	ARRAY_2D_TILED_THIN1			4
+#       define PIPE_CONFIG(x)					((x) << 6)
+#              define	ADDR_SURF_P2				0
+#              define	ADDR_SURF_P4_8x16			4
+#              define	ADDR_SURF_P4_16x16			5
+#              define	ADDR_SURF_P4_16x32			6
+#              define	ADDR_SURF_P4_32x32			7
+#              define	ADDR_SURF_P8_16x16_8x16			8
+#              define	ADDR_SURF_P8_16x32_8x16			9
+#              define	ADDR_SURF_P8_32x32_8x16			10
+#              define	ADDR_SURF_P8_16x32_16x16		11
+#              define	ADDR_SURF_P8_32x32_16x16		12
+#              define	ADDR_SURF_P8_32x32_16x32		13
+#              define	ADDR_SURF_P8_32x64_32x32		14
+#       define TILE_SPLIT(x)					((x) << 11)
+#              define	ADDR_SURF_TILE_SPLIT_64B		0
+#              define	ADDR_SURF_TILE_SPLIT_128B		1
+#              define	ADDR_SURF_TILE_SPLIT_256B		2
+#              define	ADDR_SURF_TILE_SPLIT_512B		3
+#              define	ADDR_SURF_TILE_SPLIT_1KB		4
+#              define	ADDR_SURF_TILE_SPLIT_2KB		5
+#              define	ADDR_SURF_TILE_SPLIT_4KB		6
+#       define BANK_WIDTH(x)					((x) << 14)
+#              define	ADDR_SURF_BANK_WIDTH_1			0
+#              define	ADDR_SURF_BANK_WIDTH_2			1
+#              define	ADDR_SURF_BANK_WIDTH_4			2
+#              define	ADDR_SURF_BANK_WIDTH_8			3
+#       define BANK_HEIGHT(x)					((x) << 16)
+#              define	ADDR_SURF_BANK_HEIGHT_1			0
+#              define	ADDR_SURF_BANK_HEIGHT_2			1
+#              define	ADDR_SURF_BANK_HEIGHT_4			2
+#              define	ADDR_SURF_BANK_HEIGHT_8			3
+#       define MACRO_TILE_ASPECT(x)				((x) << 18)
+#              define	ADDR_SURF_MACRO_ASPECT_1		0
+#              define	ADDR_SURF_MACRO_ASPECT_2		1
+#              define	ADDR_SURF_MACRO_ASPECT_4		2
+#              define	ADDR_SURF_MACRO_ASPECT_8		3
+#       define NUM_BANKS(x)					((x) << 20)
+#              define	ADDR_SURF_2_BANK			0
+#              define	ADDR_SURF_4_BANK			1
+#              define	ADDR_SURF_8_BANK			2
+#              define	ADDR_SURF_16_BANK			3
+
+#define	CB_PERFCOUNTER0_SELECT0				0x9a20
+#define	CB_PERFCOUNTER0_SELECT1				0x9a24
+#define	CB_PERFCOUNTER1_SELECT0				0x9a28
+#define	CB_PERFCOUNTER1_SELECT1				0x9a2c
+#define	CB_PERFCOUNTER2_SELECT0				0x9a30
+#define	CB_PERFCOUNTER2_SELECT1				0x9a34
+#define	CB_PERFCOUNTER3_SELECT0				0x9a38
+#define	CB_PERFCOUNTER3_SELECT1				0x9a3c
+
+#define	GC_USER_RB_BACKEND_DISABLE			0x9B7C
+#define		BACKEND_DISABLE_MASK			0x00FF0000
+#define		BACKEND_DISABLE_SHIFT			16
+
+#define	TCP_CHAN_STEER_LO				0xac0c
+#define	TCP_CHAN_STEER_HI				0xac10
+
+#define	CP_RB0_BASE					0xC100
+#define	CP_RB0_CNTL					0xC104
+#define		RB_BUFSZ(x)					((x) << 0)
+#define		RB_BLKSZ(x)					((x) << 8)
+#define		BUF_SWAP_32BIT					(2 << 16)
+#define		RB_NO_UPDATE					(1 << 27)
+#define		RB_RPTR_WR_ENA					(1 << 31)
+
+#define	CP_RB0_RPTR_ADDR				0xC10C
+#define	CP_RB0_RPTR_ADDR_HI				0xC110
+#define	CP_RB0_WPTR					0xC114
+
+#define	CP_PFP_UCODE_ADDR				0xC150
+#define	CP_PFP_UCODE_DATA				0xC154
+#define	CP_ME_RAM_RADDR					0xC158
+#define	CP_ME_RAM_WADDR					0xC15C
+#define	CP_ME_RAM_DATA					0xC160
+
+#define	CP_CE_UCODE_ADDR				0xC168
+#define	CP_CE_UCODE_DATA				0xC16C
+
+#define	CP_RB1_BASE					0xC180
+#define	CP_RB1_CNTL					0xC184
+#define	CP_RB1_RPTR_ADDR				0xC188
+#define	CP_RB1_RPTR_ADDR_HI				0xC18C
+#define	CP_RB1_WPTR					0xC190
+#define	CP_RB2_BASE					0xC194
+#define	CP_RB2_CNTL					0xC198
+#define	CP_RB2_RPTR_ADDR				0xC19C
+#define	CP_RB2_RPTR_ADDR_HI				0xC1A0
+#define	CP_RB2_WPTR					0xC1A4
+#define CP_INT_CNTL_RING0                               0xC1A8
+#define CP_INT_CNTL_RING1                               0xC1AC
+#define CP_INT_CNTL_RING2                               0xC1B0
+#       define CNTX_BUSY_INT_ENABLE                     (1 << 19)
+#       define CNTX_EMPTY_INT_ENABLE                    (1 << 20)
+#       define WAIT_MEM_SEM_INT_ENABLE                  (1 << 21)
+#       define TIME_STAMP_INT_ENABLE                    (1 << 26)
+#       define CP_RINGID2_INT_ENABLE                    (1 << 29)
+#       define CP_RINGID1_INT_ENABLE                    (1 << 30)
+#       define CP_RINGID0_INT_ENABLE                    (1 << 31)
+#define CP_INT_STATUS_RING0                             0xC1B4
+#define CP_INT_STATUS_RING1                             0xC1B8
+#define CP_INT_STATUS_RING2                             0xC1BC
+#       define WAIT_MEM_SEM_INT_STAT                    (1 << 21)
+#       define TIME_STAMP_INT_STAT                      (1 << 26)
+#       define CP_RINGID2_INT_STAT                      (1 << 29)
+#       define CP_RINGID1_INT_STAT                      (1 << 30)
+#       define CP_RINGID0_INT_STAT                      (1 << 31)
+
+#define	CP_DEBUG					0xC1FC
+
+#define RLC_CNTL                                          0xC300
+#       define RLC_ENABLE                                 (1 << 0)
+#define RLC_RL_BASE                                       0xC304
+#define RLC_RL_SIZE                                       0xC308
+#define RLC_LB_CNTL                                       0xC30C
+#define RLC_SAVE_AND_RESTORE_BASE                         0xC310
+#define RLC_LB_CNTR_MAX                                   0xC314
+#define RLC_LB_CNTR_INIT                                  0xC318
+
+#define RLC_CLEAR_STATE_RESTORE_BASE                      0xC320
+
+#define RLC_UCODE_ADDR                                    0xC32C
+#define RLC_UCODE_DATA                                    0xC330
+
+#define RLC_MC_CNTL                                       0xC344
+#define RLC_UCODE_CNTL                                    0xC348
+
+#define VGT_EVENT_INITIATOR                             0x28a90
+#       define SAMPLE_STREAMOUTSTATS1                   (1 << 0)
+#       define SAMPLE_STREAMOUTSTATS2                   (2 << 0)
+#       define SAMPLE_STREAMOUTSTATS3                   (3 << 0)
+#       define CACHE_FLUSH_TS                           (4 << 0)
+#       define CACHE_FLUSH                              (6 << 0)
+#       define CS_PARTIAL_FLUSH                         (7 << 0)
+#       define VGT_STREAMOUT_RESET                      (10 << 0)
+#       define END_OF_PIPE_INCR_DE                      (11 << 0)
+#       define END_OF_PIPE_IB_END                       (12 << 0)
+#       define RST_PIX_CNT                              (13 << 0)
+#       define VS_PARTIAL_FLUSH                         (15 << 0)
+#       define PS_PARTIAL_FLUSH                         (16 << 0)
+#       define CACHE_FLUSH_AND_INV_TS_EVENT             (20 << 0)
+#       define ZPASS_DONE                               (21 << 0)
+#       define CACHE_FLUSH_AND_INV_EVENT                (22 << 0)
+#       define PERFCOUNTER_START                        (23 << 0)
+#       define PERFCOUNTER_STOP                         (24 << 0)
+#       define PIPELINESTAT_START                       (25 << 0)
+#       define PIPELINESTAT_STOP                        (26 << 0)
+#       define PERFCOUNTER_SAMPLE                       (27 << 0)
+#       define SAMPLE_PIPELINESTAT                      (30 << 0)
+#       define SAMPLE_STREAMOUTSTATS                    (32 << 0)
+#       define RESET_VTX_CNT                            (33 << 0)
+#       define VGT_FLUSH                                (36 << 0)
+#       define BOTTOM_OF_PIPE_TS                        (40 << 0)
+#       define DB_CACHE_FLUSH_AND_INV                   (42 << 0)
+#       define FLUSH_AND_INV_DB_DATA_TS                 (43 << 0)
+#       define FLUSH_AND_INV_DB_META                    (44 << 0)
+#       define FLUSH_AND_INV_CB_DATA_TS                 (45 << 0)
+#       define FLUSH_AND_INV_CB_META                    (46 << 0)
+#       define CS_DONE                                  (47 << 0)
+#       define PS_DONE                                  (48 << 0)
+#       define FLUSH_AND_INV_CB_PIXEL_DATA              (49 << 0)
+#       define THREAD_TRACE_START                       (51 << 0)
+#       define THREAD_TRACE_STOP                        (52 << 0)
+#       define THREAD_TRACE_FLUSH                       (54 << 0)
+#       define THREAD_TRACE_FINISH                      (55 << 0)
+
+/*
+ * PM4
+ */
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
+			 (((reg) >> 2) & 0xFFFF) |			\
+			 ((n) & 0x3FFF) << 16)
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
+			 (((op) & 0xFF) << 8) |				\
+			 ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define	PACKET3_NOP					0x10
+#define	PACKET3_SET_BASE				0x11
+#define		PACKET3_BASE_INDEX(x)                  ((x) << 0)
+#define			GDS_PARTITION_BASE		2
+#define			CE_PARTITION_BASE		3
+#define	PACKET3_CLEAR_STATE				0x12
+#define	PACKET3_INDEX_BUFFER_SIZE			0x13
+#define	PACKET3_DISPATCH_DIRECT				0x15
+#define	PACKET3_DISPATCH_INDIRECT			0x16
+#define	PACKET3_ALLOC_GDS				0x1B
+#define	PACKET3_WRITE_GDS_RAM				0x1C
+#define	PACKET3_ATOMIC_GDS				0x1D
+#define	PACKET3_ATOMIC					0x1E
+#define	PACKET3_OCCLUSION_QUERY				0x1F
+#define	PACKET3_SET_PREDICATION				0x20
+#define	PACKET3_REG_RMW					0x21
+#define	PACKET3_COND_EXEC				0x22
+#define	PACKET3_PRED_EXEC				0x23
+#define	PACKET3_DRAW_INDIRECT				0x24
+#define	PACKET3_DRAW_INDEX_INDIRECT			0x25
+#define	PACKET3_INDEX_BASE				0x26
+#define	PACKET3_DRAW_INDEX_2				0x27
+#define	PACKET3_CONTEXT_CONTROL				0x28
+#define	PACKET3_INDEX_TYPE				0x2A
+#define	PACKET3_DRAW_INDIRECT_MULTI			0x2C
+#define	PACKET3_DRAW_INDEX_AUTO				0x2D
+#define	PACKET3_DRAW_INDEX_IMMD				0x2E
+#define	PACKET3_NUM_INSTANCES				0x2F
+#define	PACKET3_DRAW_INDEX_MULTI_AUTO			0x30
+#define	PACKET3_INDIRECT_BUFFER_CONST			0x31
+#define	PACKET3_INDIRECT_BUFFER				0x32
+#define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
+#define	PACKET3_DRAW_INDEX_OFFSET_2			0x35
+#define	PACKET3_DRAW_INDEX_MULTI_ELEMENT		0x36
+#define	PACKET3_WRITE_DATA				0x37
+#define	PACKET3_DRAW_INDEX_INDIRECT_MULTI		0x38
+#define	PACKET3_MEM_SEMAPHORE				0x39
+#define	PACKET3_MPEG_INDEX				0x3A
+#define	PACKET3_COPY_DW					0x3B
+#define	PACKET3_WAIT_REG_MEM				0x3C
+#define	PACKET3_MEM_WRITE				0x3D
+#define	PACKET3_COPY_DATA				0x40
+#define	PACKET3_PFP_SYNC_ME				0x42
+#define	PACKET3_SURFACE_SYNC				0x43
+#              define PACKET3_DEST_BASE_0_ENA      (1 << 0)
+#              define PACKET3_DEST_BASE_1_ENA      (1 << 1)
+#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
+#              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
+#              define PACKET3_CB2_DEST_BASE_ENA    (1 << 8)
+#              define PACKET3_CB3_DEST_BASE_ENA    (1 << 9)
+#              define PACKET3_CB4_DEST_BASE_ENA    (1 << 10)
+#              define PACKET3_CB5_DEST_BASE_ENA    (1 << 11)
+#              define PACKET3_CB6_DEST_BASE_ENA    (1 << 12)
+#              define PACKET3_CB7_DEST_BASE_ENA    (1 << 13)
+#              define PACKET3_DB_DEST_BASE_ENA     (1 << 14)
+#              define PACKET3_DEST_BASE_2_ENA      (1 << 19)
+#              define PACKET3_DEST_BASE_3_ENA      (1 << 21)
+#              define PACKET3_TCL1_ACTION_ENA      (1 << 22)
+#              define PACKET3_TC_ACTION_ENA        (1 << 23)
+#              define PACKET3_CB_ACTION_ENA        (1 << 25)
+#              define PACKET3_DB_ACTION_ENA        (1 << 26)
+#              define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+#              define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define	PACKET3_ME_INITIALIZE				0x44
+#define		PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define	PACKET3_COND_WRITE				0x45
+#define	PACKET3_EVENT_WRITE				0x46
+#define		EVENT_TYPE(x)                           ((x) << 0)
+#define		EVENT_INDEX(x)                          ((x) << 8)
+                /* 0 - any non-TS event
+		 * 1 - ZPASS_DONE
+		 * 2 - SAMPLE_PIPELINESTAT
+		 * 3 - SAMPLE_STREAMOUTSTAT*
+		 * 4 - *S_PARTIAL_FLUSH
+		 * 5 - EOP events
+		 * 6 - EOS events
+		 * 7 - CACHE_FLUSH, CACHE_FLUSH_AND_INV_EVENT
+		 */
+#define		INV_L2                                  (1 << 20)
+                /* INV TC L2 cache when EVENT_INDEX = 7 */
+#define	PACKET3_EVENT_WRITE_EOP				0x47
+#define		DATA_SEL(x)                             ((x) << 29)
+                /* 0 - discard
+		 * 1 - send low 32bit data
+		 * 2 - send 64bit data
+		 * 3 - send 64bit counter value
+		 */
+#define		INT_SEL(x)                              ((x) << 24)
+                /* 0 - none
+		 * 1 - interrupt only (DATA_SEL = 0)
+		 * 2 - interrupt when data write is confirmed
+		 */
+#define	PACKET3_EVENT_WRITE_EOS				0x48
+#define	PACKET3_PREAMBLE_CNTL				0x4A
+#              define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE     (2 << 28)
+#              define PACKET3_PREAMBLE_END_CLEAR_STATE       (3 << 28)
+#define	PACKET3_ONE_REG_WRITE				0x57
+#define	PACKET3_LOAD_CONFIG_REG				0x5F
+#define	PACKET3_LOAD_CONTEXT_REG			0x60
+#define	PACKET3_LOAD_SH_REG				0x61
+#define	PACKET3_SET_CONFIG_REG				0x68
+#define		PACKET3_SET_CONFIG_REG_START			0x00008000
+#define		PACKET3_SET_CONFIG_REG_END			0x0000b000
+#define	PACKET3_SET_CONTEXT_REG				0x69
+#define		PACKET3_SET_CONTEXT_REG_START			0x00028000
+#define		PACKET3_SET_CONTEXT_REG_END			0x00029000
+#define	PACKET3_SET_CONTEXT_REG_INDIRECT		0x73
+#define	PACKET3_SET_RESOURCE_INDIRECT			0x74
+#define	PACKET3_SET_SH_REG				0x76
+#define		PACKET3_SET_SH_REG_START			0x0000b000
+#define		PACKET3_SET_SH_REG_END				0x0000c000
+#define	PACKET3_SET_SH_REG_OFFSET			0x77
+#define	PACKET3_ME_WRITE				0x7A
+#define	PACKET3_SCRATCH_RAM_WRITE			0x7D
+#define	PACKET3_SCRATCH_RAM_READ			0x7E
+#define	PACKET3_CE_WRITE				0x7F
+#define	PACKET3_LOAD_CONST_RAM				0x80
+#define	PACKET3_WRITE_CONST_RAM				0x81
+#define	PACKET3_WRITE_CONST_RAM_OFFSET			0x82
+#define	PACKET3_DUMP_CONST_RAM				0x83
+#define	PACKET3_INCREMENT_CE_COUNTER			0x84
+#define	PACKET3_INCREMENT_DE_COUNTER			0x85
+#define	PACKET3_WAIT_ON_CE_COUNTER			0x86
+#define	PACKET3_WAIT_ON_DE_COUNTER			0x87
+#define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
+#define	PACKET3_SET_CE_DE_COUNTERS			0x89
+#define	PACKET3_WAIT_ON_AVAIL_BUFFER			0x8A
+
+#endif
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 14b6cd0..58d0bda 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -202,11 +202,49 @@
 	{0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x677B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x678A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6790, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6801, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6810, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6818, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6821, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6823, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6824, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6826, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x683B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x683D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x683F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6840, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6841, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6842, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6843, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6849, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x684C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6858, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6859, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
@@ -512,6 +550,22 @@
 	{0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9904, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9905, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9906, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9908, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0, 0, 0}
 
 #define r128_PCI_IDS \
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index cb2f0c3..7c491b4 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -908,6 +908,7 @@
 #define RADEON_CHUNK_ID_RELOCS	0x01
 #define RADEON_CHUNK_ID_IB	0x02
 #define RADEON_CHUNK_ID_FLAGS	0x03
+#define RADEON_CHUNK_ID_CONST_IB	0x04
 
 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
@@ -962,6 +963,8 @@
 #define RADEON_INFO_VA_START		0x0e
 /* maximum size of ib using the virtual memory cs */
 #define RADEON_INFO_IB_VM_MAX_SIZE	0x0f
+/* max pipes - needed for compute shaders */
+#define RADEON_INFO_MAX_PIPES		0x10
 
 struct drm_radeon_info {
 	uint32_t		request;