diff options
| -rw-r--r-- | src/compiler/CompilerIR.h | 32 | ||||
| -rw-r--r-- | src/compiler/CompilerUtility.h | 4 | ||||
| -rw-r--r-- | src/compiler/Dataflow.cc | 306 | ||||
| -rw-r--r-- | src/compiler/Dataflow.h | 6 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 12 | ||||
| -rw-r--r-- | src/compiler/Ralloc.cc | 174 | ||||
| -rw-r--r-- | src/compiler/Utility.cc | 37 | ||||
| -rw-r--r-- | src/compiler/codegen/CodegenFactory.cc | 14 | ||||
| -rw-r--r-- | src/compiler/codegen/CompilerCodegen.h | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/Ralloc.h | 1 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 31 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArchUtility.cc | 50 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmLIR.h | 15 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmRallocUtil.cc | 185 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/MethodCodegenDriver.cc | 107 |
15 files changed, 554 insertions, 422 deletions
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h index dac63cf29e..5be48ab721 100644 --- a/src/compiler/CompilerIR.h +++ b/src/compiler/CompilerIR.h @@ -32,23 +32,30 @@ typedef enum RegLocationType { kLocSpill, } RegLocationType; +typedef struct PromotionMap { + RegLocationType coreLocation:3; + u1 coreReg; + RegLocationType fpLocation:3; + u1 fpReg; + bool firstInPair; +} PromotionMap; + typedef struct RegLocation { - RegLocationType location:2; + RegLocationType location:3; unsigned wide:1; - unsigned fp:1; // Hint for float/double - u1 lowReg:6; // First physical register - u1 highReg:6; // 2nd physical register (if wide) - s2 sRegLow; // SSA name for low Dalvik word - unsigned home:1; // Does this represent the home location? - RegLocationType fpLocation:2; // Used only for non-SSA loc records - u1 fpLowReg:6; // Used only for non-SSA loc records - u1 fpHighReg:6; // Used only for non-SSA loc records - int spOffset:17; + unsigned defined:1; // Do we know the type? + unsigned fp:1; // Floating point? + unsigned core:1; // Non-floating point? + unsigned highWord:1; // High word of pair? + unsigned home:1; // Does this represent the home location? + u1 lowReg; // First physical register + u1 highReg; // 2nd physical register (if wide) + s2 sRegLow; // SSA name for low Dalvik word } RegLocation; #define INVALID_SREG (-1) #define INVALID_VREG (0xFFFFU) -#define INVALID_REG (0x3F) +#define INVALID_REG (0xFF) #define INVALID_OFFSET (-1) typedef enum BBType { @@ -233,6 +240,9 @@ typedef struct CompilationUnit { RegLocation* regLocation; int sequenceNumber; + /* Keep track of Dalvik vReg to physical register mappings */ + PromotionMap* promotionMap; + /* * Set to the Dalvik PC of the switch instruction if it has more than * MAX_CHAINED_SWITCH_CASES cases. diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h index c0dcaf76c1..0a7d079b50 100644 --- a/src/compiler/CompilerUtility.h +++ b/src/compiler/CompilerUtility.h @@ -19,8 +19,8 @@ #include "Dalvik.h" -/* Each arena page has some overhead, so take a few bytes off 8k */ -#define ARENA_DEFAULT_SIZE 8100 +/* Each arena page has some overhead, so take a few bytes off */ +#define ARENA_DEFAULT_SIZE ((256 * 1024) - 256) /* Allocate the initial memory block for arena-based allocation */ bool oatHeapInit(void); diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index 09a362ef25..e73ccc931b 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -50,13 +50,13 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UB_WIDE | DF_IS_MOVE, // 07 OP_MOVE_OBJECT vA, vB - DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE, + DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B, // 08 OP_MOVE_OBJECT_FROM16 vAA, vBBBB - DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE, + DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B, // 09 OP_MOVE_OBJECT_16 vAAAA, vBBBB - DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE, + DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B, // 0A OP_MOVE_RESULT vAA DF_DA, @@ -65,10 +65,10 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE, // 0C OP_MOVE_RESULT_OBJECT vAA - DF_DA, + DF_DA | DF_CORE_A, // 0D OP_MOVE_EXCEPTION vAA - DF_DA, + DF_DA | DF_CORE_A, // 0E OP_RETURN_VOID DF_NOP, @@ -80,7 +80,7 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA_WIDE, // 11 OP_RETURN_OBJECT vAA - DF_UA, + DF_UA | DF_CORE_A, // 12 OP_CONST_4 vA, #+B DF_DA | DF_SETS_CONST, @@ -107,34 +107,34 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_SETS_CONST, // 1A OP_CONST_STRING vAA, string@BBBB - DF_DA, + DF_DA | DF_CORE_A, // 1B OP_CONST_STRING_JUMBO vAA, string@BBBBBBBB - DF_DA, + DF_DA | DF_CORE_A, // 1C OP_CONST_CLASS vAA, type@BBBB - DF_DA, + DF_DA | DF_CORE_A, // 1D OP_MONITOR_ENTER vAA - DF_UA | DF_NULL_CHK_0, + DF_UA | DF_NULL_CHK_0 | DF_CORE_A, // 1E OP_MONITOR_EXIT vAA - DF_UA | DF_NULL_CHK_0, + DF_UA | DF_NULL_CHK_0 | DF_CORE_A, // 1F OP_CHK_CAST vAA, type@BBBB - DF_UA, + DF_UA | DF_CORE_A, // 20 OP_INSTANCE_OF vA, vB, type@CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 21 OP_ARRAY_LENGTH vA, vB - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_A | DF_CORE_B, // 22 OP_NEW_INSTANCE vAA, type@BBBB - DF_DA | DF_NON_NULL_DST, + DF_DA | DF_NON_NULL_DST | DF_CORE_A, // 23 OP_NEW_ARRAY vA, vB, type@CCCC - DF_DA | DF_UB | DF_NON_NULL_DST, + DF_DA | DF_UB | DF_NON_NULL_DST | DF_CORE_A | DF_CORE_B, // 24 OP_FILLED_NEW_ARRAY {vD, vE, vF, vG, vA} DF_FORMAT_35C | DF_NON_NULL_RET, @@ -143,10 +143,10 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_FORMAT_3RC | DF_NON_NULL_RET, // 26 OP_FILL_ARRAY_DATA vAA, +BBBBBBBB - DF_UA, + DF_UA | DF_CORE_A, // 27 OP_THROW vAA - DF_UA, + DF_UA | DF_CORE_A, // 28 OP_GOTO DF_NOP, @@ -164,56 +164,56 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA, // 2D OP_CMPL_FLOAT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C, + DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A, // 2E OP_CMPG_FLOAT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C, + DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A, // 2F OP_CMPL_DOUBLE vAA, vBB, vCC - DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C, + DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A, // 30 OP_CMPG_DOUBLE vAA, vBB, vCC - DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C, + DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A, // 31 OP_CMP_LONG vAA, vBB, vCC - DF_DA | DF_UB_WIDE | DF_UC_WIDE, + DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 32 OP_IF_EQ vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 33 OP_IF_NE vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 34 OP_IF_LT vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 35 OP_IF_GE vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 36 OP_IF_GT vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 37 OP_IF_LE vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 38 OP_IF_EQZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 39 OP_IF_NEZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3A OP_IF_LTZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3B OP_IF_GEZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3C OP_IF_GTZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3D OP_IF_LEZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3E OP_UNUSED_3E DF_NOP, @@ -234,88 +234,88 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_NOP, // 44 OP_AGET vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 45 OP_AGET_WIDE vAA, vBB, vCC - DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 46 OP_AGET_OBJECT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 47 OP_AGET_BOOLEAN vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 48 OP_AGET_BYTE vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 49 OP_AGET_CHAR vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 4A OP_AGET_SHORT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 4B OP_APUT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4C OP_APUT_WIDE vAA, vBB, vCC - DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4D OP_APUT_OBJECT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4E OP_APUT_BOOLEAN vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4F OP_APUT_BYTE vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 50 OP_APUT_CHAR vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 51 OP_APUT_SHORT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 52 OP_IGET vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 53 OP_IGET_WIDE vA, vB, field@CCCC - DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 54 OP_IGET_OBJECT vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 55 OP_IGET_BOOLEAN vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 56 OP_IGET_BYTE vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 57 OP_IGET_CHAR vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 58 OP_IGET_SHORT vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 59 OP_IPUT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5A OP_IPUT_WIDE vA, vB, field@CCCC - DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5B OP_IPUT_OBJECT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5C OP_IPUT_BOOLEAN vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5D OP_IPUT_BYTE vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5E OP_IPUT_CHAR vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5F OP_IPUT_SHORT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 60 OP_SGET vAA, field@BBBB DF_DA | DF_IS_GETTER, @@ -324,7 +324,7 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_IS_GETTER, // 62 OP_SGET_OBJECT vAA, field@BBBB - DF_DA | DF_IS_GETTER, + DF_DA | DF_IS_GETTER | DF_CORE_A, // 63 OP_SGET_BOOLEAN vAA, field@BBBB DF_DA | DF_IS_GETTER, @@ -345,7 +345,7 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA_WIDE | DF_IS_SETTER, // 69 OP_SPUT_OBJECT vAA, field@BBBB - DF_UA | DF_IS_SETTER, + DF_UA | DF_IS_SETTER | DF_CORE_A, // 6A OP_SPUT_BOOLEAN vAA, field@BBBB DF_UA | DF_IS_SETTER, @@ -399,16 +399,16 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_NOP, // 7B OP_NEG_INT vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 7C OP_NOT_INT vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 7D OP_NEG_LONG vA, vB - DF_DA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // 7E OP_NOT_LONG vA, vB - DF_DA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // 7F OP_NEG_FLOAT vA, vB DF_DA | DF_UB | DF_FP_A | DF_FP_B, @@ -417,115 +417,115 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B, // 81 OP_INT_TO_LONG vA, vB - DF_DA_WIDE | DF_UB, + DF_DA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // 82 OP_INT_TO_FLOAT vA, vB - DF_DA | DF_UB | DF_FP_A, + DF_DA | DF_UB | DF_FP_A | DF_CORE_B, // 83 OP_INT_TO_DOUBLE vA, vB - DF_DA_WIDE | DF_UB | DF_FP_A, + DF_DA_WIDE | DF_UB | DF_FP_A | DF_CORE_B, // 84 OP_LONG_TO_INT vA, vB - DF_DA | DF_UB_WIDE, + DF_DA | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // 85 OP_LONG_TO_FLOAT vA, vB - DF_DA | DF_UB_WIDE | DF_FP_A, + DF_DA | DF_UB_WIDE | DF_FP_A | DF_CORE_B, // 86 OP_LONG_TO_DOUBLE vA, vB - DF_DA_WIDE | DF_UB_WIDE | DF_FP_A, + DF_DA_WIDE | DF_UB_WIDE | DF_FP_A | DF_CORE_B, // 87 OP_FLOAT_TO_INT vA, vB - DF_DA | DF_UB | DF_FP_B, + DF_DA | DF_UB | DF_FP_B | DF_CORE_A, // 88 OP_FLOAT_TO_LONG vA, vB - DF_DA_WIDE | DF_UB | DF_FP_B, + DF_DA_WIDE | DF_UB | DF_FP_B | DF_CORE_A, // 89 OP_FLOAT_TO_DOUBLE vA, vB DF_DA_WIDE | DF_UB | DF_FP_A | DF_FP_B, // 8A OP_DOUBLE_TO_INT vA, vB - DF_DA | DF_UB_WIDE | DF_FP_B, + DF_DA | DF_UB_WIDE | DF_FP_B | DF_CORE_A, // 8B OP_DOUBLE_TO_LONG vA, vB - DF_DA_WIDE | DF_UB_WIDE | DF_FP_B, + DF_DA_WIDE | DF_UB_WIDE | DF_FP_B | DF_CORE_A, // 8C OP_DOUBLE_TO_FLOAT vA, vB DF_DA | DF_UB_WIDE | DF_FP_A | DF_FP_B, // 8D OP_INT_TO_BYTE vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 8E OP_INT_TO_CHAR vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 8F OP_INT_TO_SHORT vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 90 OP_ADD_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_IS_LINEAR, + DF_DA | DF_UB | DF_UC | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 91 OP_SUB_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_IS_LINEAR, + DF_DA | DF_UB | DF_UC | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 92 OP_MUL_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 93 OP_DIV_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 94 OP_REM_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 95 OP_AND_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 96 OP_OR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 97 OP_XOR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 98 OP_SHL_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 99 OP_SHR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9A OP_USHR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9B OP_ADD_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9C OP_SUB_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9D OP_MUL_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9E OP_DIV_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9F OP_REM_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A0 OP_AND_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A1 OP_OR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A2 OP_XOR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A3 OP_SHL_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC, + DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A4 OP_SHR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC, + DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A5 OP_USHR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC, + DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A6 OP_ADD_FLOAT vAA, vBB, vCC DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C, @@ -558,70 +558,70 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_FP_A | DF_FP_B | DF_FP_C, // B0 OP_ADD_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B1 OP_SUB_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B2 OP_MUL_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B3 OP_DIV_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B4 OP_REM_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B5 OP_AND_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B6 OP_OR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B7 OP_XOR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B8 OP_SHL_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B9 OP_SHR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // BA OP_USHR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // BB OP_ADD_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BC OP_SUB_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BD OP_MUL_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BE OP_DIV_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BF OP_REM_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C0 OP_AND_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C1 OP_OR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C2 OP_XOR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C3 OP_SHL_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB, + DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // C4 OP_SHR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB, + DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // C5 OP_USHR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB, + DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // C6 OP_ADD_FLOAT_2ADDR vA, vB DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B, @@ -654,67 +654,67 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B, // D0 OP_ADD_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D1 OP_RSUB_INT vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D2 OP_MUL_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D3 OP_DIV_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D4 OP_REM_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D5 OP_AND_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D6 OP_OR_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D7 OP_XOR_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D8 OP_ADD_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB | DF_IS_LINEAR, + DF_DA | DF_UB | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B, // D9 OP_RSUB_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DA OP_MUL_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DB OP_DIV_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DC OP_REM_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DD OP_AND_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DE OP_OR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DF OP_XOR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E0 OP_SHL_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E1 OP_SHR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E2 OP_USHR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E3 OP_IGET_VOLATILE - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_B, // E4 OP_IPUT_VOLATILE - DF_UA | DF_UB | DF_NULL_CHK_1, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_CORE_B, // E5 OP_SGET_VOLATILE DF_DA, @@ -723,13 +723,13 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA, // E7 OP_IGET_OBJECT_VOLATILE - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_A | DF_CORE_B, // E8 OP_IGET_WIDE_VOLATILE - DF_DA_WIDE | DF_UB | DF_NULL_CHK_0, + DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_CORE_B, // E9 OP_IPUT_WIDE_VOLATILE - DF_UA_WIDE | DF_UB | DF_NULL_CHK_1, + DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_CORE_B, // EA OP_SGET_WIDE_VOLATILE DF_DA_WIDE, @@ -786,13 +786,13 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_FORMAT_3RC | DF_NULL_CHK_OUT0, // FC OP_IPUT_OBJECT_VOLATILE - DF_UA | DF_UB | DF_NULL_CHK_1, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_CORE_A | DF_CORE_B, // FD OP_SGET_OBJECT_VOLATILE - DF_DA, + DF_DA | DF_CORE_A, // FE OP_SPUT_OBJECT_VOLATILE - DF_UA, + DF_UA | DF_CORE_A, // FF OP_DISPATCH_FF DF_NOP, diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h index e4a37265be..1696e44f3b 100644 --- a/src/compiler/Dataflow.h +++ b/src/compiler/Dataflow.h @@ -47,6 +47,9 @@ typedef enum DataFlowAttributePos { kFPA, kFPB, kFPC, + kCoreA, + kCoreB, + kCoreC, kGetter, kSetter, } DataFlowAttributes; @@ -78,6 +81,9 @@ typedef enum DataFlowAttributePos { #define DF_FP_A (1 << kFPA) #define DF_FP_B (1 << kFPB) #define DF_FP_C (1 << kFPC) +#define DF_CORE_A (1 << kCoreA) +#define DF_CORE_B (1 << kCoreB) +#define DF_CORE_C (1 << kCoreC) #define DF_IS_GETTER (1 << kGetter) #define DF_IS_SETTER (1 << kSetter) diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index 33dbba4ba5..015aee974a 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -23,12 +23,12 @@ /* Default optimizer/debug setting for the compiler. */ uint32_t compilerOptimizerDisableFlags = 0 | // Disable specific optimizations - (1 << kLoadStoreElimination) | - (1 << kLoadHoisting) | - (1 << kSuppressLoads) | - (1 << kNullCheckElimination) | - (1 << kPromoteRegs) | - (1 << kTrackLiveTemps) | + //(1 << kLoadStoreElimination) | + //(1 << kLoadHoisting) | + //(1 << kSuppressLoads) | + //(1 << kNullCheckElimination) | + //(1 << kPromoteRegs) | + //(1 << kTrackLiveTemps) | 0; uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc index aaf9b97f15..b4cc0b5689 100644 --- a/src/compiler/Ralloc.cc +++ b/src/compiler/Ralloc.cc @@ -21,8 +21,25 @@ STATIC bool setFp(CompilationUnit* cUnit, int index, bool isFP) { bool change = false; + if (cUnit->regLocation[index].highWord) { + return change; + } if (isFP && !cUnit->regLocation[index].fp) { cUnit->regLocation[index].fp = true; + cUnit->regLocation[index].defined = true; + change = true; + } + return change; +} + +STATIC bool setCore(CompilationUnit* cUnit, int index, bool isCore) { + bool change = false; + if (cUnit->regLocation[index].highWord) { + return change; + } + if (isCore && !cUnit->regLocation[index].defined) { + cUnit->regLocation[index].core = true; + cUnit->regLocation[index].defined = true; change = true; } return change; @@ -66,21 +83,60 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) SSARepresentation *ssaRep = mir->ssaRep; if (ssaRep) { int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode]; - int next = 0; - if (attrs & DF_DA_WIDE) { - cUnit->regLocation[ssaRep->defs[0]].wide = true; + + // Handle defs + if (attrs & (DF_DA | DF_DA_WIDE)) { + if (attrs & DF_CORE_A) { + changed |= setCore(cUnit, ssaRep->defs[0], true); + } + if (attrs & DF_DA_WIDE) { + cUnit->regLocation[ssaRep->defs[0]].wide = true; + cUnit->regLocation[ssaRep->defs[1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1, + oatS2VReg(cUnit, ssaRep->defs[1])); + } } - if (attrs & DF_UA_WIDE) { - cUnit->regLocation[ssaRep->uses[next]].wide = true; - next += 2; + + // Handles uses + int next = 0; + if (attrs & (DF_UA | DF_UA_WIDE)) { + if (attrs & DF_CORE_A) { + changed |= setCore(cUnit, ssaRep->uses[next], true); + } + if (attrs & DF_UA_WIDE) { + cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, + oatS2VReg(cUnit, ssaRep->uses[next + 1])); + next += 2; + } else { + next++; + } } - if (attrs & DF_UB_WIDE) { - cUnit->regLocation[ssaRep->uses[next]].wide = true; - next += 2; + if (attrs & (DF_UB | DF_UB_WIDE)) { + if (attrs & DF_CORE_B) { + changed |= setCore(cUnit, ssaRep->uses[next], true); + } + if (attrs & DF_UB_WIDE) { + cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, + oatS2VReg(cUnit, ssaRep->uses[next + 1])); + next += 2; + } else { + next++; + } } - if (attrs & DF_UC_WIDE) { - cUnit->regLocation[ssaRep->uses[next]].wide = true; - next += 2; + if (attrs & (DF_UC | DF_UC_WIDE)) { + if (attrs & DF_CORE_C) { + changed |= setCore(cUnit, ssaRep->uses[next], true); + } + if (attrs & DF_UC_WIDE) { + cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, + oatS2VReg(cUnit, ssaRep->uses[next + 1])); + } } // Special-case handling for format 35c/3rc invokes @@ -97,6 +153,8 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) // If this is a non-static invoke, skip implicit "this" if (((mir->dalvikInsn.opcode != OP_INVOKE_STATIC) && (mir->dalvikInsn.opcode != OP_INVOKE_STATIC_RANGE))) { + cUnit->regLocation[ssaRep->uses[next]].defined = true; + cUnit->regLocation[ssaRep->uses[next]].core = true; next++; } uint32_t cpos = 1; @@ -108,16 +166,26 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) ssaRep->fpUse[i] = true; ssaRep->fpUse[i+1] = true; cUnit->regLocation[ssaRep->uses[i]].wide = true; + cUnit->regLocation[ssaRep->uses[i+1]].highWord + = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, + oatS2VReg(cUnit, ssaRep->uses[i+1])); i++; break; case 'J': cUnit->regLocation[ssaRep->uses[i]].wide = true; + cUnit->regLocation[ssaRep->uses[i+1]].highWord + = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, + oatS2VReg(cUnit, ssaRep->uses[i+1])); + changed |= setCore(cUnit, ssaRep->uses[i],true); i++; break; case 'F': ssaRep->fpUse[i] = true; break; default: + changed |= setCore(cUnit,ssaRep->uses[i], true); break; } i++; @@ -135,13 +203,25 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } // Special-case handling for moves & Phi if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) { - bool isFP = cUnit->regLocation[ssaRep->defs[0]].fp; + // If any of our inputs or outputs is defined, set all + bool definedFP = false; + bool definedCore = false; + definedFP |= (cUnit->regLocation[ssaRep->defs[0]].defined && + cUnit->regLocation[ssaRep->defs[0]].fp); + definedCore |= (cUnit->regLocation[ssaRep->defs[0]].defined && + cUnit->regLocation[ssaRep->defs[0]].core); for (int i = 0; i < ssaRep->numUses; i++) { - isFP |= cUnit->regLocation[ssaRep->uses[i]].fp; + definedFP |= (cUnit->regLocation[ssaRep->uses[i]].defined && + cUnit->regLocation[ssaRep->uses[i]].fp); + definedCore |= (cUnit->regLocation[ssaRep->uses[i]].defined + && cUnit->regLocation[ssaRep->uses[i]].core); } - changed |= setFp(cUnit, ssaRep->defs[0], isFP); + DCHECK(!(definedFP && definedCore)); + changed |= setFp(cUnit, ssaRep->defs[0], definedFP); + changed |= setCore(cUnit, ssaRep->defs[0], definedCore); for (int i = 0; i < ssaRep->numUses; i++) { - changed |= setFp(cUnit, ssaRep->uses[i], isFP); + changed |= setFp(cUnit, ssaRep->uses[i], definedFP); + changed |= setCore(cUnit, ssaRep->uses[i], definedCore); } } } @@ -155,20 +235,19 @@ void oatDumpRegLocTable(RegLocation* table, int count) { for (int i = 0; i < count; i++) { char buf[100]; - snprintf(buf, 100, "Loc[%02d] : %s, %c %c r%d r%d S%d : %s s%d s%d", + snprintf(buf, 100, "Loc[%02d] : %s, %c %c %c %c %c %c%d %c%d S%d", i, storageName[table[i].location], table[i].wide ? 'W' : 'N', - table[i].fp ? 'F' : 'C', table[i].lowReg, table[i].highReg, - table[i].sRegLow, storageName[table[i].fpLocation], - table[i].fpLowReg & FP_REG_MASK, table[i].fpHighReg & - FP_REG_MASK); + table[i].defined ? 'D' : 'U', table[i].fp ? 'F' : 'C', + table[i].highWord ? 'H' : 'L', table[i].home ? 'h' : 't', + FPREG(table[i].lowReg) ? 's' : 'r', table[i].lowReg & FP_REG_MASK, + FPREG(table[i].highReg) ? 's' : 'r', table[i].highReg & FP_REG_MASK, + table[i].sRegLow); LOG(INFO) << buf; } } -static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, INVALID_REG, - INVALID_REG, INVALID_SREG, 0, - kLocDalvikFrame, INVALID_REG, INVALID_REG, - INVALID_OFFSET}; +static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, + INVALID_REG, INVALID_REG, INVALID_SREG}; /* * Simple register allocation. Some Dalvik virtual registers may @@ -189,6 +268,10 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) } cUnit->regLocation = loc; + /* Allocation the promotion map */ + cUnit->promotionMap = (PromotionMap*)oatNew( cUnit->method->NumRegisters() + * sizeof(cUnit->promotionMap[0]), true); + /* Add types of incoming arguments based on signature */ int numRegs = cUnit->method->NumRegisters(); int numIns = cUnit->method->NumIns(); @@ -196,16 +279,39 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) int sReg = numRegs - numIns; if (!cUnit->method->IsStatic()) { // Skip past "this" + cUnit->regLocation[sReg].defined = true; + cUnit->regLocation[sReg].core = true; sReg++; } const String* shorty = cUnit->method->GetShorty(); for (int i = 1; i < shorty->GetLength(); i++) { - char arg = shorty->CharAt(i); - // Is it wide? - if ((arg == 'D') || (arg == 'J')) { - cUnit->regLocation[sReg].wide = true; - cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp; - sReg++; // Skip to next + switch(shorty->CharAt(i)) { + case 'D': + cUnit->regLocation[sReg].wide = true; + cUnit->regLocation[sReg+1].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, + oatS2VReg(cUnit, sReg+1)); + cUnit->regLocation[sReg].fp = true; + cUnit->regLocation[sReg].defined = true; + sReg++; + break; + case 'J': + cUnit->regLocation[sReg].wide = true; + cUnit->regLocation[sReg+1].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, + oatS2VReg(cUnit, sReg+1)); + cUnit->regLocation[sReg].core = true; + cUnit->regLocation[sReg].defined = true; + sReg++; + break; + case 'F': + cUnit->regLocation[sReg].fp = true; + cUnit->regLocation[sReg].defined = true; + break; + default: + cUnit->regLocation[sReg].core = true; + cUnit->regLocation[sReg].defined = true; + break; } sReg++; } @@ -254,10 +360,4 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) cUnit->numPadding + 2) * 4; cUnit->insOffset = cUnit->frameSize + 4; cUnit->regsOffset = (cUnit->numOuts + cUnit->numPadding + 1) * 4; - - /* Compute sp-relative home location offsets */ - for (i = 0; i < cUnit->numSSARegs; i++) { - int vReg = oatS2VReg(cUnit, cUnit->regLocation[i].sRegLow); - cUnit->regLocation[i].spOffset = oatVRegOffset(cUnit, vReg); - } } diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc index 0fc8a8046d..e3c20ecc3c 100644 --- a/src/compiler/Utility.cc +++ b/src/compiler/Utility.cc @@ -61,6 +61,7 @@ retry: */ if (currentArena->next) { currentArena = currentArena->next; + currentArena->bytesAllocated = 0; goto retry; } @@ -88,12 +89,10 @@ retry: /* Reclaim all the arena blocks allocated so far */ void oatArenaReset(void) { - ArenaMemBlock *block; - - for (block = arenaHead; block; block = block->next) { - block->bytesAllocated = 0; - } currentArena = arenaHead; + if (currentArena) { + currentArena->bytesAllocated = 0; + } } /* Growable List initialization */ @@ -201,6 +200,15 @@ void oatDumpStats(void) oatArchDump(); } +static uint32_t checkMasks[32] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, + 0x00000020, 0x00000040, 0x00000080, 0x00000100, 0x00000200, + 0x00000400, 0x00000800, 0x00001000, 0x00002000, 0x00004000, + 0x00008000, 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, 0x01000000, + 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, + 0x40000000, 0x80000000 }; + /* * Allocate a bit vector with enough space to hold at least the specified * number of bits. @@ -231,7 +239,7 @@ bool oatIsBitSet(const ArenaBitVector* pBits, unsigned int num) { DCHECK_LT(num, pBits->storageSize * sizeof(u4) * 8); - unsigned int val = pBits->storage[num >> 5] & (1 << (num & 0x1f)); + unsigned int val = pBits->storage[num >> 5] & checkMasks[num & 0x1f]; return (val != 0); } @@ -270,7 +278,7 @@ bool oatSetBit(ArenaBitVector* pBits, unsigned int num) pBits->storageSize = newSize; } - pBits->storage[num >> 5] |= 1 << (num & 0x1f); + pBits->storage[num >> 5] |= checkMasks[num & 0x1f]; return true; } @@ -288,7 +296,7 @@ bool oatClearBit(ArenaBitVector* pBits, unsigned int num) LOG(FATAL) << "Attempt to clear a bit not set in the vector yet";; } - pBits->storage[num >> 5] &= ~(1 << (num & 0x1f)); + pBits->storage[num >> 5] &= ~checkMasks[num & 0x1f]; return true; } @@ -462,13 +470,20 @@ int oatBitVectorIteratorNext(ArenaBitVectorIterator* iterator) DCHECK_EQ(iterator->bitSize, pBits->storageSize * sizeof(u4) * 8); if (bitIndex >= iterator->bitSize) return -1; - for (; bitIndex < iterator->bitSize; bitIndex++) { + for (; bitIndex < iterator->bitSize;) { unsigned int wordIndex = bitIndex >> 5; - unsigned int mask = 1 << (bitIndex & 0x1f); - if (pBits->storage[wordIndex] & mask) { + unsigned int bitPos = bitIndex & 0x1f; + unsigned int word = pBits->storage[wordIndex]; + if (word & checkMasks[bitPos]) { iterator->idx = bitIndex+1; return bitIndex; } + if (word == 0) { + // Helps if this is a sparse vector + bitIndex += (32 - bitPos); + } else { + bitIndex++; + } } /* No more set bits */ return -1; diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc index 55ed8af080..5dbcd9769a 100644 --- a/src/compiler/codegen/CodegenFactory.cc +++ b/src/compiler/codegen/CodegenFactory.cc @@ -58,7 +58,7 @@ STATIC void loadValueDirect(CompilationUnit* cUnit, RegLocation rlSrc, genRegCopy(cUnit, reg1, rlSrc.lowReg); } else { DCHECK(rlSrc.location == kLocDalvikFrame); - loadWordDisp(cUnit, rSP, rlSrc.spOffset, reg1); + loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1); } } @@ -88,7 +88,8 @@ STATIC void loadValueDirectWide(CompilationUnit* cUnit, RegLocation rlSrc, genRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg); } else { DCHECK(rlSrc.location == kLocDalvikFrame); - loadBaseDispWide(cUnit, NULL, rSP, rlSrc.spOffset, + loadBaseDispWide(cUnit, NULL, rSP, + oatSRegOffset(cUnit, rlSrc.sRegLow), regLo, regHi, INVALID_SREG); } } @@ -156,7 +157,8 @@ STATIC void storeValue(CompilationUnit* cUnit, RegLocation rlDest, if (oatIsDirty(cUnit, rlDest.lowReg) && oatLiveOut(cUnit, rlDest.sRegLow)) { defStart = (LIR* )cUnit->lastLIRInsn; - storeBaseDisp(cUnit, rSP, rlDest.spOffset, rlDest.lowReg, kWord); + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow), + rlDest.lowReg, kWord); oatMarkClean(cUnit, rlDest); defEnd = (LIR* )cUnit->lastLIRInsn; oatMarkDef(cUnit, rlDest, defStart, defEnd); @@ -183,10 +185,6 @@ STATIC void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, { LIR* defStart; LIR* defEnd; - if (FPREG(rlSrc.lowReg)!=FPREG(rlSrc.highReg)) { - LOG(WARNING) << "rlSrc.lowreg:" << rlSrc.lowReg << ", rlSrc.highReg:" - << rlSrc.highReg; - } DCHECK_EQ(FPREG(rlSrc.lowReg), FPREG(rlSrc.highReg)); DCHECK(rlDest.wide); DCHECK(rlSrc.wide); @@ -230,7 +228,7 @@ STATIC void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, defStart = (LIR*)cUnit->lastLIRInsn; DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1), oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow))); - storeBaseDispWide(cUnit, rSP, rlDest.spOffset, + storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow), rlDest.lowReg, rlDest.highReg); oatMarkClean(cUnit, rlDest); defEnd = (LIR*)cUnit->lastLIRInsn; diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h index 58ab1d3b97..d2e5f0a631 100644 --- a/src/compiler/codegen/CompilerCodegen.h +++ b/src/compiler/codegen/CompilerCodegen.h @@ -27,6 +27,8 @@ void oatAssembleLIR(CompilationUnit* cUnit); /* Implemented in the codegen/<target>/ArchUtility.c */ void oatCodegenDump(CompilationUnit* cUnit); +void oatDumpPromotionMap(CompilationUnit* cUnit); +void oatDumpFullPromotionMap(CompilationUnit* cUnit); /* Implemented in codegen/<target>/Ralloc.c */ void oatSimpleRegAlloc(CompilationUnit* cUnit); diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h index e343ec5682..fee0e9a913 100644 --- a/src/compiler/codegen/Ralloc.h +++ b/src/compiler/codegen/Ralloc.h @@ -232,6 +232,7 @@ extern void oatFlushRegWideImpl(CompilationUnit* cUnit, int rBase, extern void oatDoPromotion(CompilationUnit* cUnit); extern int oatVRegOffset(CompilationUnit* cUnit, int reg); +extern int oatSRegOffset(CompilationUnit* cUnit, int reg); extern void oatDumpCoreRegPool(CompilationUnit* cUint); extern void oatDumpFPRegPool(CompilationUnit* cUint); extern bool oatCheckCorePoolSanity(CompilationUnit* cUnit); diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index 7fd062d5a5..1b0fb90e35 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -186,9 +186,10 @@ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) cUnit->coreSpillMask |= (1 << res); cUnit->coreVmapTable.push_back(sReg); cUnit->numCoreSpills++; - cUnit->regLocation[sReg].location = kLocPhysReg; - cUnit->regLocation[sReg].lowReg = res; - cUnit->regLocation[sReg].home = true; + // Should be promoting based on initial sReg set + DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + cUnit->promotionMap[sReg].coreLocation = kLocPhysReg; + cUnit->promotionMap[sReg].coreReg = res; break; } } @@ -231,10 +232,11 @@ STATIC int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) ((FPRegs[i].reg & 0x1) == 0) == even) { res = FPRegs[i].reg; FPRegs[i].inUse = true; + // Should be promoting based on initial sReg set + DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); markPreservedSingle(cUnit, sReg, res); - cUnit->regLocation[sReg].fpLocation = kLocPhysReg; - cUnit->regLocation[sReg].fpLowReg = res; - cUnit->regLocation[sReg].home = true; + cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; + cUnit->promotionMap[sReg].fpReg = res; break; } } @@ -252,9 +254,11 @@ STATIC int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) STATIC int allocPreservedDouble(CompilationUnit* cUnit, int sReg) { int res = -1; // Assume failure - if (cUnit->regLocation[sReg+1].fpLocation == kLocPhysReg) { + // Should be promoting based on initial sReg set + DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) { // Upper reg is already allocated. Can we fit? - int highReg = cUnit->regLocation[sReg+1].fpLowReg; + int highReg = cUnit->promotionMap[sReg+1].fpReg; if ((highReg & 1) == 0) { // High reg is even - fail. return res; @@ -289,12 +293,10 @@ STATIC int allocPreservedDouble(CompilationUnit* cUnit, int sReg) } } if (res != -1) { - cUnit->regLocation[sReg].fpLocation = kLocPhysReg; - cUnit->regLocation[sReg].fpLowReg = res; - cUnit->regLocation[sReg].home = true; - cUnit->regLocation[sReg+1].fpLocation = kLocPhysReg; - cUnit->regLocation[sReg+1].fpLowReg = res + 1; - cUnit->regLocation[sReg+1].home = true; + cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; + cUnit->promotionMap[sReg].fpReg = res; + cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg; + cUnit->promotionMap[sReg+1].fpReg = res + 1; } return res; } @@ -312,7 +314,6 @@ extern int oatAllocPreservedFPReg(CompilationUnit* cUnit, int sReg, int res = -1; if (doubleStart) { res = allocPreservedDouble(cUnit, sReg); - } else { } if (res == -1) { res = allocPreservedSingle(cUnit, sReg, false /* try odd # */); diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc index aef98faff5..c4d3b6d780 100644 --- a/src/compiler/codegen/arm/ArchUtility.cc +++ b/src/compiler/codegen/arm/ArchUtility.cc @@ -16,6 +16,7 @@ #include "../../CompilerInternals.h" #include "ArmLIR.h" +#include "../Ralloc.h" static const char* coreRegNames[16] = { "r0", @@ -391,6 +392,38 @@ void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr) } } +void oatDumpPromotionMap(CompilationUnit *cUnit) +{ + const Method *method = cUnit->method; + for (int i = 0; i < method->NumRegisters(); i++) { + PromotionMap vRegMap = cUnit->promotionMap[i]; + char buf[100]; + if (vRegMap.fpLocation == kLocPhysReg) { + snprintf(buf, 100, " : s%d", vRegMap.fpReg & FP_REG_MASK); + } else { + buf[0] = 0; + } + char buf2[100]; + snprintf(buf2, 100, "V[%02d] -> %s%d%s", i, + vRegMap.coreLocation == kLocPhysReg ? + "r" : "SP+", vRegMap.coreLocation == kLocPhysReg ? + vRegMap.coreReg : oatSRegOffset(cUnit, i), buf); + LOG(INFO) << buf2; + } +} + +void oatDumpFullPromotionMap(CompilationUnit *cUnit) +{ + const Method *method = cUnit->method; + for (int i = 0; i < method->NumRegisters(); i++) { + PromotionMap vRegMap = cUnit->promotionMap[i]; + LOG(INFO) << i << " -> " << "CL:" << (int)vRegMap.coreLocation << + ", CR:" << (int)vRegMap.coreReg << ", FL:" << + (int)vRegMap.fpLocation << ", FR:" << (int)vRegMap.fpReg << + ", - " << (int)vRegMap.firstInPair; + } +} + /* Dump instructions and constant pool contents */ void oatCodegenDump(CompilationUnit* cUnit) { @@ -414,22 +447,7 @@ void oatCodegenDump(CompilationUnit* cUnit) " bytes, Dalvik size is " << insnsSize * 2; LOG(INFO) << "expansion factor: " << (float)cUnit->totalSize / (float)(insnsSize * 2); - for (int i = 0; i < method->NumRegisters(); i++) { - RegLocation loc = cUnit->regLocation[i]; - char buf[100]; - if (loc.fpLocation == kLocPhysReg) { - snprintf(buf, 100, " : s%d", loc.fpLowReg & FP_REG_MASK); - } else { - buf[0] = 0; - } - char buf2[100]; - snprintf(buf2, 100, "V[%02d] -> %s%d%s", i, - loc.location == kLocPhysReg ? - "r" : "SP+", loc.location == kLocPhysReg ? - loc.lowReg : loc.spOffset, buf); - LOG(INFO) << buf2; - - } + oatDumpPromotionMap(cUnit); for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) { oatDumpLIRInsn(cUnit, lirInsn, 0); } diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h index 1e4022ee4b..729e708fad 100644 --- a/src/compiler/codegen/arm/ArmLIR.h +++ b/src/compiler/codegen/arm/ArmLIR.h @@ -123,16 +123,13 @@ #define rNone (-1) /* RegisterLocation templates return values (r0, or r0/r1) */ -#define LOC_C_RETURN {kLocPhysReg, 0, 0, r0, INVALID_REG, INVALID_SREG, \ - 1, kLocPhysReg, r0, INVALID_REG, INVALID_OFFSET} -#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, r0, r1, INVALID_SREG, \ - 1, kLocPhysReg, r0, r1, INVALID_OFFSET} +#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, INVALID_SREG} +#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG} /* RegisterLocation templates for interpState->retVal; */ -#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, r0, INVALID_REG, \ - INVALID_SREG, 1, kLocPhysReg, r0, INVALID_REG, \ - INVALID_OFFSET} -#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, r0, r1, \ - INVALID_SREG, 1, kLocPhysReg, r0, r1, INVALID_OFFSET} +#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \ + INVALID_SREG} +#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, \ + INVALID_SREG} /* * Data structure tracking the mapping between a Dalvik register (pair) and a diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc index ed8a5b2ab3..4af3d0713a 100644 --- a/src/compiler/codegen/arm/ArmRallocUtil.cc +++ b/src/compiler/codegen/arm/ArmRallocUtil.cc @@ -37,7 +37,7 @@ typedef struct RefCounts { /* USE SSA names to count references of base Dalvik vRegs. */ STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb, - RefCounts* counts, bool fp) + RefCounts* coreCounts, RefCounts* fpCounts) { MIR* mir; if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock && @@ -47,59 +47,42 @@ STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb, for (mir = bb->firstMIRInsn; mir; mir = mir->next) { SSARepresentation *ssaRep = mir->ssaRep; if (ssaRep) { - int i; - int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode]; - if (fp) { - // Mark 1st reg of double pairs - int first = 0; - int sReg; - if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0])); - counts[sReg].doubleStart = true; + for (int i = 0; i < ssaRep->numDefs;) { + RegLocation loc = cUnit->regLocation[ssaRep->defs[i]]; + RefCounts* counts = loc.fp ? fpCounts : coreCounts; + int vReg = oatS2VReg(cUnit, ssaRep->defs[i]); + if (loc.defined) { + counts[vReg].count++; } - if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - if (attrs & DF_UA_WIDE) { - first += 2; - } - if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - if (attrs & DF_UB_WIDE) { - first += 2; - } - if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - } - for (i=0; i< ssaRep->numUses; i++) { - int origSreg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i])); - DCHECK_LT(origSreg, cUnit->method->NumRegisters()); - bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false; - if (fp == fpUse) { - counts[origSreg].count++; + if (loc.wide) { + if (loc.defined) { + if (loc.fp) { + counts[vReg].doubleStart = true; + } + counts[vReg+1].count++; + } + i += 2; + } else { + i++; } } - for (i=0; i< ssaRep->numDefs; i++) { - if (attrs & DF_SETS_CONST) { - // CONST opcodes are untyped - don't pollute the counts - continue; + for (int i = 0; i < ssaRep->numUses;) { + RegLocation loc = cUnit->regLocation[ssaRep->uses[i]]; + RefCounts* counts = loc.fp ? fpCounts : coreCounts; + int vReg = oatS2VReg(cUnit, ssaRep->uses[i]); + if (loc.defined) { + counts[vReg].count++; } - int origSreg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i])); - DCHECK_LT(origSreg, cUnit->method->NumRegisters()); - bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false; - if (fp == fpDef) { - counts[origSreg].count++; + if (loc.wide) { + if (loc.defined) { + if (loc.fp) { + counts[vReg].doubleStart = true; + } + counts[vReg+1].count++; + } + i += 2; + } else { + i++; } } } @@ -159,8 +142,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) BasicBlock* bb; bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator); if (bb == NULL) break; - countRefs(cUnit, bb, coreRegs, false); - countRefs(cUnit, bb, fpRegs, true); + countRefs(cUnit, bb, coreRegs, fpRegs); } /* @@ -178,21 +160,27 @@ extern void oatDoPromotion(CompilationUnit* cUnit) qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts); qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts); + if (cUnit->printMe) { + dumpCounts(coreRegs, numRegs, "Core regs after sort"); + dumpCounts(fpRegs, numRegs, "Fp regs after sort"); + } + if (!(cUnit->disableOpt & (1 << kPromoteRegs))) { // Promote fpRegs for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) { - if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) { + if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) { int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg, fpRegs[i].doubleStart); if (reg < 0) { - break; // No more left + break; // No more left } } } // Promote core regs for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) { - if (cUnit->regLocation[i].location != kLocPhysReg) { + if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation != + kLocPhysReg) { int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg); if (reg < 0) { break; // No more left @@ -203,58 +191,69 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Now, update SSA names to new home locations for (int i = 0; i < cUnit->numSSARegs; i++) { - int baseSreg = cUnit->regLocation[i].sRegLow; - RegLocation *base = &cUnit->regLocation[baseSreg]; - RegLocation *baseNext = &cUnit->regLocation[baseSreg+1]; RegLocation *curr = &cUnit->regLocation[i]; - if (curr->fp) { - /* Single or double, check fpLocation of base */ - if (base->fpLocation == kLocPhysReg) { - if (curr->wide) { - /* TUNING: consider alignment during allocation */ - if ((base->fpLowReg & 1) || - (baseNext->fpLocation != kLocPhysReg)) { - /* Half-promoted or bad alignment - demote */ - curr->location = kLocDalvikFrame; - curr->lowReg = INVALID_REG; - curr->highReg = INVALID_REG; - continue; - } - curr->highReg = baseNext->fpLowReg; + int baseVReg = oatS2VReg(cUnit, curr->sRegLow); + if (!curr->wide) { + if (curr->fp) { + if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].fpReg; + curr->home = true; + } + } else { + if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; + curr->home = true; } - curr->location = kLocPhysReg; - curr->lowReg = base->fpLowReg; - curr->home = true; } + curr->highReg = INVALID_REG; } else { - /* Core or wide */ - if (base->location == kLocPhysReg) { - if (curr->wide) { - /* Make sure upper half is also in reg or skip */ - if (baseNext->location != kLocPhysReg) { - /* Only half promoted; demote to frame */ - curr->location = kLocDalvikFrame; - curr->lowReg = INVALID_REG; - curr->highReg = INVALID_REG; - continue; + if (curr->highWord) { + continue; + } + if (curr->fp) { + if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) && + (cUnit->promotionMap[baseVReg+1].fpLocation == + kLocPhysReg)) { + int lowReg = cUnit->promotionMap[baseVReg].fpReg; + int highReg = cUnit->promotionMap[baseVReg+1].fpReg; + // Doubles require pair of singles starting at even reg + if (((lowReg & 0x1) == 0) && ((lowReg + 1) == highReg)) { + curr->location = kLocPhysReg; + curr->lowReg = lowReg; + curr->highReg = highReg; + curr->home = true; } - curr->highReg = baseNext->lowReg; } - curr->location = kLocPhysReg; - curr->lowReg = base->lowReg; - curr->home = true; + } else { + if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) + && (cUnit->promotionMap[baseVReg+1].coreLocation == + kLocPhysReg)) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; + curr->highReg = cUnit->promotionMap[baseVReg+1].coreReg; + curr->home = true; + } } } } } -/* Returns sp-relative offset in bytes */ -extern int oatVRegOffset(CompilationUnit* cUnit, int reg) +/* Returns sp-relative offset in bytes for a VReg */ +extern int oatVRegOffset(CompilationUnit* cUnit, int vReg) { - return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) : - cUnit->insOffset + ((reg - cUnit->numRegs) << 2); + return (vReg < cUnit->numRegs) ? cUnit->regsOffset + (vReg << 2) : + cUnit->insOffset + ((vReg - cUnit->numRegs) << 2); } +/* Returns sp-relative offset in bytes for a SReg */ +extern int oatSRegOffset(CompilationUnit* cUnit, int sReg) +{ + return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg)); +} + + /* Return sp-relative offset in bytes using Method* */ extern int oatVRegOffsetFromMethod(Method* method, int reg) { diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc index 633125443b..4a657718fd 100644 --- a/src/compiler/codegen/arm/MethodCodegenDriver.cc +++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc @@ -17,10 +17,8 @@ #define DISPLAY_MISSING_TARGETS (cUnit->enableDebug & \ (1 << kDebugDisplayMissingTargets)) -STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, INVALID_REG, - INVALID_REG, INVALID_SREG, 0, - kLocDalvikFrame, INVALID_REG, INVALID_REG, - INVALID_OFFSET}; +STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, INVALID_REG, + INVALID_REG, INVALID_SREG}; /* Mark register usage state and return long retloc */ STATIC RegLocation getRetLocWide(CompilationUnit* cUnit) @@ -99,7 +97,8 @@ STATIC void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange) RegLocation loc = oatUpdateLoc(cUnit, oatGetSrc(cUnit, mir, i)); if (loc.location == kLocPhysReg) { - storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord); + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow), + loc.lowReg, kWord); } } /* @@ -113,7 +112,8 @@ STATIC void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange) int rVal = rLR; // Using a lot of temps, rLR is known free here // Set up source pointer RegLocation rlFirst = oatGetSrc(cUnit, mir, 0); - opRegRegImm(cUnit, kOpAdd, rSrc, rSP, rlFirst.spOffset); + opRegRegImm(cUnit, kOpAdd, rSrc, rSP, + oatSRegOffset(cUnit, rlFirst.sRegLow)); // Set up the target pointer opRegRegImm(cUnit, kOpAdd, rDst, r0, Array::DataOffset().Int32Value()); @@ -773,7 +773,8 @@ STATIC int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir, } else { // r2 & r3 can safely be used here reg = r3; - loadWordDisp(cUnit, rSP, rlArg.spOffset + 4, reg); + loadWordDisp(cUnit, rSP, + oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg); callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback); } @@ -872,20 +873,23 @@ STATIC int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir, if (loc.wide) { loc = oatUpdateLocWide(cUnit, loc); if ((nextArg >= 2) && (loc.location == kLocPhysReg)) { - storeBaseDispWide(cUnit, rSP, loc.spOffset, loc.lowReg, - loc.highReg); + storeBaseDispWide(cUnit, rSP, + oatSRegOffset(cUnit, loc.sRegLow), + loc.lowReg, loc.highReg); } nextArg += 2; } else { loc = oatUpdateLoc(cUnit, loc); if ((nextArg >= 3) && (loc.location == kLocPhysReg)) { - storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord); + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow), + loc.lowReg, kWord); } nextArg++; } } - int startOffset = cUnit->regLocation[mir->ssaRep->uses[3]].spOffset; + int startOffset = oatSRegOffset(cUnit, + cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow); int outsOffset = 4 /* Method* */ + (3 * 4); if (numArgs >= 20) { // Generate memcpy @@ -1790,63 +1794,44 @@ STATIC void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir) } } -/* If there are any ins passed in registers that have not been promoted - * to a callee-save register, flush them to the frame. - * Note: at this pointCopy any ins that are passed in register to their - * home location */ +/* + * If there are any ins passed in registers that have not been promoted + * to a callee-save register, flush them to the frame. Perform intial + * assignment of promoted arguments. + */ STATIC void flushIns(CompilationUnit* cUnit) { if (cUnit->method->NumIns() == 0) return; - int inRegs = (cUnit->method->NumIns() > 2) ? 3 - : cUnit->method->NumIns(); - int startReg = r1; - int startLoc = cUnit->method->NumRegisters() - + int firstArgReg = r1; + int lastArgReg = r3; + int startVReg = cUnit->method->NumRegisters() - cUnit->method->NumIns(); - for (int i = 0; i < inRegs; i++) { - RegLocation loc = cUnit->regLocation[startLoc + i]; - //TUNING: be smarter about flushing ins to frame - storeBaseDisp(cUnit, rSP, loc.spOffset, startReg + i, kWord); - if (loc.location == kLocPhysReg) { - genRegCopy(cUnit, loc.lowReg, startReg + i); - } - } - - // Handle special case of wide argument half in regs, half in frame - if (inRegs == 3) { - RegLocation loc = cUnit->regLocation[startLoc + 2]; - if (loc.wide && loc.location == kLocPhysReg) { - // Load the other half of the arg into the promoted pair - loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg); - inRegs++; - } - } - - // Now, do initial assignment of all promoted arguments passed in frame - for (int i = inRegs; i < cUnit->method->NumIns();) { - RegLocation loc = cUnit->regLocation[startLoc + i]; - if (loc.fpLocation == kLocPhysReg) { - loc.location = kLocPhysReg; - loc.fp = true; - loc.lowReg = loc.fpLowReg; - loc.highReg = loc.fpHighReg; - } - if (loc.location == kLocPhysReg) { - if (loc.wide) { - if (loc.fp && (loc.lowReg & 1) != 0) { - // Misaligned - need to load as a pair of singles - loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg); - loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg); - } else { - loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset, - loc.lowReg, loc.highReg, INVALID_SREG); - } - i++; - } else { - loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg); + for (int i = 0; i < cUnit->method->NumIns(); i++) { + PromotionMap vMap = cUnit->promotionMap[startVReg + i]; + // For arguments only, should have at most one promotion kind + DCHECK(!((vMap.coreLocation == kLocPhysReg) && + (vMap.fpLocation == kLocPhysReg))); + if (i <= (lastArgReg - firstArgReg)) { + // If arriving in register, copy or flush + if (vMap.coreLocation == kLocPhysReg) { + genRegCopy(cUnit, vMap.coreReg, firstArgReg + i); + } else if (vMap.fpLocation == kLocPhysReg) { + genRegCopy(cUnit, vMap.fpReg, firstArgReg + i); + } + // Also put a copy in memory in case we're partially promoted + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i), + firstArgReg + i, kWord); + } else { + // If arriving in frame, initialize promoted target regs + if (vMap.coreLocation == kLocPhysReg) { + loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i), + vMap.coreReg); + } else if (vMap.fpLocation == kLocPhysReg) { + loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i), + vMap.fpReg); } } - i++; } } |