diff options
| author | 2011-10-11 18:08:40 -0700 | |
|---|---|---|
| committer | 2011-10-14 10:54:03 -0700 | |
| commit | 67bc236a207852d652f6ddeab0a90efc1bd111bb (patch) | |
| tree | eea13fcb90ad8ce5b2b3819fb8caf0281583cd61 /src | |
| parent | 95caa791e560da97363c0c0d22bfda4a7e7377c3 (diff) | |
Register promotion fix
Restructured the type inference mechanism, added lots of DCHECKS,
bumped the default memory allocation size to reflect AOT
compilation and tweaked the bit vector manipulation routines
to be better at handling large sparse vectors (something the old
trace JIT didn't encounter enough to care).
With this CL, optimization is back on by default. Should also see
a significant boost in compilation speed (~2x better for boot.oat).
Change-Id: Ifd134ef337be173a1be756bb9198b24c5b4936b3
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler/CompilerIR.h | 32 | ||||
| -rw-r--r-- | src/compiler/CompilerUtility.h | 4 | ||||
| -rw-r--r-- | src/compiler/Dataflow.cc | 306 | ||||
| -rw-r--r-- | src/compiler/Dataflow.h | 6 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 12 | ||||
| -rw-r--r-- | src/compiler/Ralloc.cc | 174 | ||||
| -rw-r--r-- | src/compiler/Utility.cc | 37 | ||||
| -rw-r--r-- | src/compiler/codegen/CodegenFactory.cc | 14 | ||||
| -rw-r--r-- | src/compiler/codegen/CompilerCodegen.h | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/Ralloc.h | 1 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 31 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArchUtility.cc | 50 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmLIR.h | 15 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmRallocUtil.cc | 185 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/MethodCodegenDriver.cc | 107 |
15 files changed, 554 insertions, 422 deletions
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h index dac63cf29e..5be48ab721 100644 --- a/src/compiler/CompilerIR.h +++ b/src/compiler/CompilerIR.h @@ -32,23 +32,30 @@ typedef enum RegLocationType { kLocSpill, } RegLocationType; +typedef struct PromotionMap { + RegLocationType coreLocation:3; + u1 coreReg; + RegLocationType fpLocation:3; + u1 fpReg; + bool firstInPair; +} PromotionMap; + typedef struct RegLocation { - RegLocationType location:2; + RegLocationType location:3; unsigned wide:1; - unsigned fp:1; // Hint for float/double - u1 lowReg:6; // First physical register - u1 highReg:6; // 2nd physical register (if wide) - s2 sRegLow; // SSA name for low Dalvik word - unsigned home:1; // Does this represent the home location? - RegLocationType fpLocation:2; // Used only for non-SSA loc records - u1 fpLowReg:6; // Used only for non-SSA loc records - u1 fpHighReg:6; // Used only for non-SSA loc records - int spOffset:17; + unsigned defined:1; // Do we know the type? + unsigned fp:1; // Floating point? + unsigned core:1; // Non-floating point? + unsigned highWord:1; // High word of pair? + unsigned home:1; // Does this represent the home location? + u1 lowReg; // First physical register + u1 highReg; // 2nd physical register (if wide) + s2 sRegLow; // SSA name for low Dalvik word } RegLocation; #define INVALID_SREG (-1) #define INVALID_VREG (0xFFFFU) -#define INVALID_REG (0x3F) +#define INVALID_REG (0xFF) #define INVALID_OFFSET (-1) typedef enum BBType { @@ -233,6 +240,9 @@ typedef struct CompilationUnit { RegLocation* regLocation; int sequenceNumber; + /* Keep track of Dalvik vReg to physical register mappings */ + PromotionMap* promotionMap; + /* * Set to the Dalvik PC of the switch instruction if it has more than * MAX_CHAINED_SWITCH_CASES cases. diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h index c0dcaf76c1..0a7d079b50 100644 --- a/src/compiler/CompilerUtility.h +++ b/src/compiler/CompilerUtility.h @@ -19,8 +19,8 @@ #include "Dalvik.h" -/* Each arena page has some overhead, so take a few bytes off 8k */ -#define ARENA_DEFAULT_SIZE 8100 +/* Each arena page has some overhead, so take a few bytes off */ +#define ARENA_DEFAULT_SIZE ((256 * 1024) - 256) /* Allocate the initial memory block for arena-based allocation */ bool oatHeapInit(void); diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index 09a362ef25..e73ccc931b 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -50,13 +50,13 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UB_WIDE | DF_IS_MOVE, // 07 OP_MOVE_OBJECT vA, vB - DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE, + DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B, // 08 OP_MOVE_OBJECT_FROM16 vAA, vBBBB - DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE, + DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B, // 09 OP_MOVE_OBJECT_16 vAAAA, vBBBB - DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE, + DF_DA | DF_UB | DF_NULL_TRANSFER_0 | DF_IS_MOVE | DF_CORE_A | DF_CORE_B, // 0A OP_MOVE_RESULT vAA DF_DA, @@ -65,10 +65,10 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE, // 0C OP_MOVE_RESULT_OBJECT vAA - DF_DA, + DF_DA | DF_CORE_A, // 0D OP_MOVE_EXCEPTION vAA - DF_DA, + DF_DA | DF_CORE_A, // 0E OP_RETURN_VOID DF_NOP, @@ -80,7 +80,7 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA_WIDE, // 11 OP_RETURN_OBJECT vAA - DF_UA, + DF_UA | DF_CORE_A, // 12 OP_CONST_4 vA, #+B DF_DA | DF_SETS_CONST, @@ -107,34 +107,34 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_SETS_CONST, // 1A OP_CONST_STRING vAA, string@BBBB - DF_DA, + DF_DA | DF_CORE_A, // 1B OP_CONST_STRING_JUMBO vAA, string@BBBBBBBB - DF_DA, + DF_DA | DF_CORE_A, // 1C OP_CONST_CLASS vAA, type@BBBB - DF_DA, + DF_DA | DF_CORE_A, // 1D OP_MONITOR_ENTER vAA - DF_UA | DF_NULL_CHK_0, + DF_UA | DF_NULL_CHK_0 | DF_CORE_A, // 1E OP_MONITOR_EXIT vAA - DF_UA | DF_NULL_CHK_0, + DF_UA | DF_NULL_CHK_0 | DF_CORE_A, // 1F OP_CHK_CAST vAA, type@BBBB - DF_UA, + DF_UA | DF_CORE_A, // 20 OP_INSTANCE_OF vA, vB, type@CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 21 OP_ARRAY_LENGTH vA, vB - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_A | DF_CORE_B, // 22 OP_NEW_INSTANCE vAA, type@BBBB - DF_DA | DF_NON_NULL_DST, + DF_DA | DF_NON_NULL_DST | DF_CORE_A, // 23 OP_NEW_ARRAY vA, vB, type@CCCC - DF_DA | DF_UB | DF_NON_NULL_DST, + DF_DA | DF_UB | DF_NON_NULL_DST | DF_CORE_A | DF_CORE_B, // 24 OP_FILLED_NEW_ARRAY {vD, vE, vF, vG, vA} DF_FORMAT_35C | DF_NON_NULL_RET, @@ -143,10 +143,10 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_FORMAT_3RC | DF_NON_NULL_RET, // 26 OP_FILL_ARRAY_DATA vAA, +BBBBBBBB - DF_UA, + DF_UA | DF_CORE_A, // 27 OP_THROW vAA - DF_UA, + DF_UA | DF_CORE_A, // 28 OP_GOTO DF_NOP, @@ -164,56 +164,56 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA, // 2D OP_CMPL_FLOAT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C, + DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A, // 2E OP_CMPG_FLOAT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C, + DF_DA | DF_UB | DF_UC | DF_FP_B | DF_FP_C | DF_CORE_A, // 2F OP_CMPL_DOUBLE vAA, vBB, vCC - DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C, + DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A, // 30 OP_CMPG_DOUBLE vAA, vBB, vCC - DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C, + DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_FP_B | DF_FP_C | DF_CORE_A, // 31 OP_CMP_LONG vAA, vBB, vCC - DF_DA | DF_UB_WIDE | DF_UC_WIDE, + DF_DA | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 32 OP_IF_EQ vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 33 OP_IF_NE vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 34 OP_IF_LT vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 35 OP_IF_GE vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 36 OP_IF_GT vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 37 OP_IF_LE vA, vB, +CCCC - DF_UA | DF_UB, + DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // 38 OP_IF_EQZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 39 OP_IF_NEZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3A OP_IF_LTZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3B OP_IF_GEZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3C OP_IF_GTZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3D OP_IF_LEZ vAA, +BBBB - DF_UA, + DF_UA | DF_CORE_A, // 3E OP_UNUSED_3E DF_NOP, @@ -234,88 +234,88 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_NOP, // 44 OP_AGET vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 45 OP_AGET_WIDE vAA, vBB, vCC - DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 46 OP_AGET_OBJECT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 47 OP_AGET_BOOLEAN vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 48 OP_AGET_BYTE vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 49 OP_AGET_CHAR vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 4A OP_AGET_SHORT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_IS_GETTER | DF_CORE_B | DF_CORE_C, // 4B OP_APUT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4C OP_APUT_WIDE vAA, vBB, vCC - DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4D OP_APUT_OBJECT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4E OP_APUT_BOOLEAN vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 4F OP_APUT_BYTE vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 50 OP_APUT_CHAR vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 51 OP_APUT_SHORT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C, // 52 OP_IGET vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 53 OP_IGET_WIDE vA, vB, field@CCCC - DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 54 OP_IGET_OBJECT vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 55 OP_IGET_BOOLEAN vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 56 OP_IGET_BYTE vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 57 OP_IGET_CHAR vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 58 OP_IGET_SHORT vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_IS_GETTER | DF_CORE_B, // 59 OP_IPUT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5A OP_IPUT_WIDE vA, vB, field@CCCC - DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5B OP_IPUT_OBJECT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5C OP_IPUT_BOOLEAN vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5D OP_IPUT_BYTE vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5E OP_IPUT_CHAR vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 5F OP_IPUT_SHORT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B, // 60 OP_SGET vAA, field@BBBB DF_DA | DF_IS_GETTER, @@ -324,7 +324,7 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_IS_GETTER, // 62 OP_SGET_OBJECT vAA, field@BBBB - DF_DA | DF_IS_GETTER, + DF_DA | DF_IS_GETTER | DF_CORE_A, // 63 OP_SGET_BOOLEAN vAA, field@BBBB DF_DA | DF_IS_GETTER, @@ -345,7 +345,7 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA_WIDE | DF_IS_SETTER, // 69 OP_SPUT_OBJECT vAA, field@BBBB - DF_UA | DF_IS_SETTER, + DF_UA | DF_IS_SETTER | DF_CORE_A, // 6A OP_SPUT_BOOLEAN vAA, field@BBBB DF_UA | DF_IS_SETTER, @@ -399,16 +399,16 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_NOP, // 7B OP_NEG_INT vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 7C OP_NOT_INT vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 7D OP_NEG_LONG vA, vB - DF_DA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // 7E OP_NOT_LONG vA, vB - DF_DA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // 7F OP_NEG_FLOAT vA, vB DF_DA | DF_UB | DF_FP_A | DF_FP_B, @@ -417,115 +417,115 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B, // 81 OP_INT_TO_LONG vA, vB - DF_DA_WIDE | DF_UB, + DF_DA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // 82 OP_INT_TO_FLOAT vA, vB - DF_DA | DF_UB | DF_FP_A, + DF_DA | DF_UB | DF_FP_A | DF_CORE_B, // 83 OP_INT_TO_DOUBLE vA, vB - DF_DA_WIDE | DF_UB | DF_FP_A, + DF_DA_WIDE | DF_UB | DF_FP_A | DF_CORE_B, // 84 OP_LONG_TO_INT vA, vB - DF_DA | DF_UB_WIDE, + DF_DA | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // 85 OP_LONG_TO_FLOAT vA, vB - DF_DA | DF_UB_WIDE | DF_FP_A, + DF_DA | DF_UB_WIDE | DF_FP_A | DF_CORE_B, // 86 OP_LONG_TO_DOUBLE vA, vB - DF_DA_WIDE | DF_UB_WIDE | DF_FP_A, + DF_DA_WIDE | DF_UB_WIDE | DF_FP_A | DF_CORE_B, // 87 OP_FLOAT_TO_INT vA, vB - DF_DA | DF_UB | DF_FP_B, + DF_DA | DF_UB | DF_FP_B | DF_CORE_A, // 88 OP_FLOAT_TO_LONG vA, vB - DF_DA_WIDE | DF_UB | DF_FP_B, + DF_DA_WIDE | DF_UB | DF_FP_B | DF_CORE_A, // 89 OP_FLOAT_TO_DOUBLE vA, vB DF_DA_WIDE | DF_UB | DF_FP_A | DF_FP_B, // 8A OP_DOUBLE_TO_INT vA, vB - DF_DA | DF_UB_WIDE | DF_FP_B, + DF_DA | DF_UB_WIDE | DF_FP_B | DF_CORE_A, // 8B OP_DOUBLE_TO_LONG vA, vB - DF_DA_WIDE | DF_UB_WIDE | DF_FP_B, + DF_DA_WIDE | DF_UB_WIDE | DF_FP_B | DF_CORE_A, // 8C OP_DOUBLE_TO_FLOAT vA, vB DF_DA | DF_UB_WIDE | DF_FP_A | DF_FP_B, // 8D OP_INT_TO_BYTE vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 8E OP_INT_TO_CHAR vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 8F OP_INT_TO_SHORT vA, vB - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // 90 OP_ADD_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_IS_LINEAR, + DF_DA | DF_UB | DF_UC | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 91 OP_SUB_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_IS_LINEAR, + DF_DA | DF_UB | DF_UC | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 92 OP_MUL_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 93 OP_DIV_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 94 OP_REM_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 95 OP_AND_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 96 OP_OR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 97 OP_XOR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 98 OP_SHL_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 99 OP_SHR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9A OP_USHR_INT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC, + DF_DA | DF_UB | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9B OP_ADD_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9C OP_SUB_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9D OP_MUL_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9E OP_DIV_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 9F OP_REM_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A0 OP_AND_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A1 OP_OR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A2 OP_XOR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE, + DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A3 OP_SHL_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC, + DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A4 OP_SHR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC, + DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A5 OP_USHR_LONG vAA, vBB, vCC - DF_DA_WIDE | DF_UB_WIDE | DF_UC, + DF_DA_WIDE | DF_UB_WIDE | DF_UC | DF_CORE_A | DF_CORE_B | DF_CORE_C, // A6 OP_ADD_FLOAT vAA, vBB, vCC DF_DA | DF_UB | DF_UC | DF_FP_A | DF_FP_B | DF_FP_C, @@ -558,70 +558,70 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UB_WIDE | DF_UC_WIDE | DF_FP_A | DF_FP_B | DF_FP_C, // B0 OP_ADD_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B1 OP_SUB_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B2 OP_MUL_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B3 OP_DIV_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B4 OP_REM_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B5 OP_AND_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B6 OP_OR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B7 OP_XOR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B8 OP_SHL_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // B9 OP_SHR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // BA OP_USHR_INT_2ADDR vA, vB - DF_DA | DF_UA | DF_UB, + DF_DA | DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, // BB OP_ADD_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BC OP_SUB_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BD OP_MUL_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BE OP_DIV_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // BF OP_REM_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C0 OP_AND_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C1 OP_OR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C2 OP_XOR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE, + DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, // C3 OP_SHL_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB, + DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // C4 OP_SHR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB, + DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // C5 OP_USHR_LONG_2ADDR vA, vB - DF_DA_WIDE | DF_UA_WIDE | DF_UB, + DF_DA_WIDE | DF_UA_WIDE | DF_UB | DF_CORE_A | DF_CORE_B, // C6 OP_ADD_FLOAT_2ADDR vA, vB DF_DA | DF_UA | DF_UB | DF_FP_A | DF_FP_B, @@ -654,67 +654,67 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_DA_WIDE | DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B, // D0 OP_ADD_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D1 OP_RSUB_INT vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D2 OP_MUL_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D3 OP_DIV_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D4 OP_REM_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D5 OP_AND_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D6 OP_OR_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D7 OP_XOR_INT_LIT16 vA, vB, #+CCCC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // D8 OP_ADD_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB | DF_IS_LINEAR, + DF_DA | DF_UB | DF_IS_LINEAR | DF_CORE_A | DF_CORE_B, // D9 OP_RSUB_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DA OP_MUL_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DB OP_DIV_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DC OP_REM_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DD OP_AND_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DE OP_OR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // DF OP_XOR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E0 OP_SHL_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E1 OP_SHR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E2 OP_USHR_INT_LIT8 vAA, vBB, #+CC - DF_DA | DF_UB, + DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E3 OP_IGET_VOLATILE - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_B, // E4 OP_IPUT_VOLATILE - DF_UA | DF_UB | DF_NULL_CHK_1, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_CORE_B, // E5 OP_SGET_VOLATILE DF_DA, @@ -723,13 +723,13 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_UA, // E7 OP_IGET_OBJECT_VOLATILE - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_CORE_A | DF_CORE_B, // E8 OP_IGET_WIDE_VOLATILE - DF_DA_WIDE | DF_UB | DF_NULL_CHK_0, + DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_CORE_B, // E9 OP_IPUT_WIDE_VOLATILE - DF_UA_WIDE | DF_UB | DF_NULL_CHK_1, + DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_CORE_B, // EA OP_SGET_WIDE_VOLATILE DF_DA_WIDE, @@ -786,13 +786,13 @@ int oatDataFlowAttributes[kMirOpLast] = { DF_FORMAT_3RC | DF_NULL_CHK_OUT0, // FC OP_IPUT_OBJECT_VOLATILE - DF_UA | DF_UB | DF_NULL_CHK_1, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_CORE_A | DF_CORE_B, // FD OP_SGET_OBJECT_VOLATILE - DF_DA, + DF_DA | DF_CORE_A, // FE OP_SPUT_OBJECT_VOLATILE - DF_UA, + DF_UA | DF_CORE_A, // FF OP_DISPATCH_FF DF_NOP, diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h index e4a37265be..1696e44f3b 100644 --- a/src/compiler/Dataflow.h +++ b/src/compiler/Dataflow.h @@ -47,6 +47,9 @@ typedef enum DataFlowAttributePos { kFPA, kFPB, kFPC, + kCoreA, + kCoreB, + kCoreC, kGetter, kSetter, } DataFlowAttributes; @@ -78,6 +81,9 @@ typedef enum DataFlowAttributePos { #define DF_FP_A (1 << kFPA) #define DF_FP_B (1 << kFPB) #define DF_FP_C (1 << kFPC) +#define DF_CORE_A (1 << kCoreA) +#define DF_CORE_B (1 << kCoreB) +#define DF_CORE_C (1 << kCoreC) #define DF_IS_GETTER (1 << kGetter) #define DF_IS_SETTER (1 << kSetter) diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index 33dbba4ba5..015aee974a 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -23,12 +23,12 @@ /* Default optimizer/debug setting for the compiler. */ uint32_t compilerOptimizerDisableFlags = 0 | // Disable specific optimizations - (1 << kLoadStoreElimination) | - (1 << kLoadHoisting) | - (1 << kSuppressLoads) | - (1 << kNullCheckElimination) | - (1 << kPromoteRegs) | - (1 << kTrackLiveTemps) | + //(1 << kLoadStoreElimination) | + //(1 << kLoadHoisting) | + //(1 << kSuppressLoads) | + //(1 << kNullCheckElimination) | + //(1 << kPromoteRegs) | + //(1 << kTrackLiveTemps) | 0; uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc index aaf9b97f15..b4cc0b5689 100644 --- a/src/compiler/Ralloc.cc +++ b/src/compiler/Ralloc.cc @@ -21,8 +21,25 @@ STATIC bool setFp(CompilationUnit* cUnit, int index, bool isFP) { bool change = false; + if (cUnit->regLocation[index].highWord) { + return change; + } if (isFP && !cUnit->regLocation[index].fp) { cUnit->regLocation[index].fp = true; + cUnit->regLocation[index].defined = true; + change = true; + } + return change; +} + +STATIC bool setCore(CompilationUnit* cUnit, int index, bool isCore) { + bool change = false; + if (cUnit->regLocation[index].highWord) { + return change; + } + if (isCore && !cUnit->regLocation[index].defined) { + cUnit->regLocation[index].core = true; + cUnit->regLocation[index].defined = true; change = true; } return change; @@ -66,21 +83,60 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) SSARepresentation *ssaRep = mir->ssaRep; if (ssaRep) { int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode]; - int next = 0; - if (attrs & DF_DA_WIDE) { - cUnit->regLocation[ssaRep->defs[0]].wide = true; + + // Handle defs + if (attrs & (DF_DA | DF_DA_WIDE)) { + if (attrs & DF_CORE_A) { + changed |= setCore(cUnit, ssaRep->defs[0], true); + } + if (attrs & DF_DA_WIDE) { + cUnit->regLocation[ssaRep->defs[0]].wide = true; + cUnit->regLocation[ssaRep->defs[1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1, + oatS2VReg(cUnit, ssaRep->defs[1])); + } } - if (attrs & DF_UA_WIDE) { - cUnit->regLocation[ssaRep->uses[next]].wide = true; - next += 2; + + // Handles uses + int next = 0; + if (attrs & (DF_UA | DF_UA_WIDE)) { + if (attrs & DF_CORE_A) { + changed |= setCore(cUnit, ssaRep->uses[next], true); + } + if (attrs & DF_UA_WIDE) { + cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, + oatS2VReg(cUnit, ssaRep->uses[next + 1])); + next += 2; + } else { + next++; + } } - if (attrs & DF_UB_WIDE) { - cUnit->regLocation[ssaRep->uses[next]].wide = true; - next += 2; + if (attrs & (DF_UB | DF_UB_WIDE)) { + if (attrs & DF_CORE_B) { + changed |= setCore(cUnit, ssaRep->uses[next], true); + } + if (attrs & DF_UB_WIDE) { + cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, + oatS2VReg(cUnit, ssaRep->uses[next + 1])); + next += 2; + } else { + next++; + } } - if (attrs & DF_UC_WIDE) { - cUnit->regLocation[ssaRep->uses[next]].wide = true; - next += 2; + if (attrs & (DF_UC | DF_UC_WIDE)) { + if (attrs & DF_CORE_C) { + changed |= setCore(cUnit, ssaRep->uses[next], true); + } + if (attrs & DF_UC_WIDE) { + cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, + oatS2VReg(cUnit, ssaRep->uses[next + 1])); + } } // Special-case handling for format 35c/3rc invokes @@ -97,6 +153,8 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) // If this is a non-static invoke, skip implicit "this" if (((mir->dalvikInsn.opcode != OP_INVOKE_STATIC) && (mir->dalvikInsn.opcode != OP_INVOKE_STATIC_RANGE))) { + cUnit->regLocation[ssaRep->uses[next]].defined = true; + cUnit->regLocation[ssaRep->uses[next]].core = true; next++; } uint32_t cpos = 1; @@ -108,16 +166,26 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) ssaRep->fpUse[i] = true; ssaRep->fpUse[i+1] = true; cUnit->regLocation[ssaRep->uses[i]].wide = true; + cUnit->regLocation[ssaRep->uses[i+1]].highWord + = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, + oatS2VReg(cUnit, ssaRep->uses[i+1])); i++; break; case 'J': cUnit->regLocation[ssaRep->uses[i]].wide = true; + cUnit->regLocation[ssaRep->uses[i+1]].highWord + = true; + DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, + oatS2VReg(cUnit, ssaRep->uses[i+1])); + changed |= setCore(cUnit, ssaRep->uses[i],true); i++; break; case 'F': ssaRep->fpUse[i] = true; break; default: + changed |= setCore(cUnit,ssaRep->uses[i], true); break; } i++; @@ -135,13 +203,25 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } // Special-case handling for moves & Phi if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) { - bool isFP = cUnit->regLocation[ssaRep->defs[0]].fp; + // If any of our inputs or outputs is defined, set all + bool definedFP = false; + bool definedCore = false; + definedFP |= (cUnit->regLocation[ssaRep->defs[0]].defined && + cUnit->regLocation[ssaRep->defs[0]].fp); + definedCore |= (cUnit->regLocation[ssaRep->defs[0]].defined && + cUnit->regLocation[ssaRep->defs[0]].core); for (int i = 0; i < ssaRep->numUses; i++) { - isFP |= cUnit->regLocation[ssaRep->uses[i]].fp; + definedFP |= (cUnit->regLocation[ssaRep->uses[i]].defined && + cUnit->regLocation[ssaRep->uses[i]].fp); + definedCore |= (cUnit->regLocation[ssaRep->uses[i]].defined + && cUnit->regLocation[ssaRep->uses[i]].core); } - changed |= setFp(cUnit, ssaRep->defs[0], isFP); + DCHECK(!(definedFP && definedCore)); + changed |= setFp(cUnit, ssaRep->defs[0], definedFP); + changed |= setCore(cUnit, ssaRep->defs[0], definedCore); for (int i = 0; i < ssaRep->numUses; i++) { - changed |= setFp(cUnit, ssaRep->uses[i], isFP); + changed |= setFp(cUnit, ssaRep->uses[i], definedFP); + changed |= setCore(cUnit, ssaRep->uses[i], definedCore); } } } @@ -155,20 +235,19 @@ void oatDumpRegLocTable(RegLocation* table, int count) { for (int i = 0; i < count; i++) { char buf[100]; - snprintf(buf, 100, "Loc[%02d] : %s, %c %c r%d r%d S%d : %s s%d s%d", + snprintf(buf, 100, "Loc[%02d] : %s, %c %c %c %c %c %c%d %c%d S%d", i, storageName[table[i].location], table[i].wide ? 'W' : 'N', - table[i].fp ? 'F' : 'C', table[i].lowReg, table[i].highReg, - table[i].sRegLow, storageName[table[i].fpLocation], - table[i].fpLowReg & FP_REG_MASK, table[i].fpHighReg & - FP_REG_MASK); + table[i].defined ? 'D' : 'U', table[i].fp ? 'F' : 'C', + table[i].highWord ? 'H' : 'L', table[i].home ? 'h' : 't', + FPREG(table[i].lowReg) ? 's' : 'r', table[i].lowReg & FP_REG_MASK, + FPREG(table[i].highReg) ? 's' : 'r', table[i].highReg & FP_REG_MASK, + table[i].sRegLow); LOG(INFO) << buf; } } -static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, INVALID_REG, - INVALID_REG, INVALID_SREG, 0, - kLocDalvikFrame, INVALID_REG, INVALID_REG, - INVALID_OFFSET}; +static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, + INVALID_REG, INVALID_REG, INVALID_SREG}; /* * Simple register allocation. Some Dalvik virtual registers may @@ -189,6 +268,10 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) } cUnit->regLocation = loc; + /* Allocation the promotion map */ + cUnit->promotionMap = (PromotionMap*)oatNew( cUnit->method->NumRegisters() + * sizeof(cUnit->promotionMap[0]), true); + /* Add types of incoming arguments based on signature */ int numRegs = cUnit->method->NumRegisters(); int numIns = cUnit->method->NumIns(); @@ -196,16 +279,39 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) int sReg = numRegs - numIns; if (!cUnit->method->IsStatic()) { // Skip past "this" + cUnit->regLocation[sReg].defined = true; + cUnit->regLocation[sReg].core = true; sReg++; } const String* shorty = cUnit->method->GetShorty(); for (int i = 1; i < shorty->GetLength(); i++) { - char arg = shorty->CharAt(i); - // Is it wide? - if ((arg == 'D') || (arg == 'J')) { - cUnit->regLocation[sReg].wide = true; - cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp; - sReg++; // Skip to next + switch(shorty->CharAt(i)) { + case 'D': + cUnit->regLocation[sReg].wide = true; + cUnit->regLocation[sReg+1].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, + oatS2VReg(cUnit, sReg+1)); + cUnit->regLocation[sReg].fp = true; + cUnit->regLocation[sReg].defined = true; + sReg++; + break; + case 'J': + cUnit->regLocation[sReg].wide = true; + cUnit->regLocation[sReg+1].highWord = true; + DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, + oatS2VReg(cUnit, sReg+1)); + cUnit->regLocation[sReg].core = true; + cUnit->regLocation[sReg].defined = true; + sReg++; + break; + case 'F': + cUnit->regLocation[sReg].fp = true; + cUnit->regLocation[sReg].defined = true; + break; + default: + cUnit->regLocation[sReg].core = true; + cUnit->regLocation[sReg].defined = true; + break; } sReg++; } @@ -254,10 +360,4 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) cUnit->numPadding + 2) * 4; cUnit->insOffset = cUnit->frameSize + 4; cUnit->regsOffset = (cUnit->numOuts + cUnit->numPadding + 1) * 4; - - /* Compute sp-relative home location offsets */ - for (i = 0; i < cUnit->numSSARegs; i++) { - int vReg = oatS2VReg(cUnit, cUnit->regLocation[i].sRegLow); - cUnit->regLocation[i].spOffset = oatVRegOffset(cUnit, vReg); - } } diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc index 0fc8a8046d..e3c20ecc3c 100644 --- a/src/compiler/Utility.cc +++ b/src/compiler/Utility.cc @@ -61,6 +61,7 @@ retry: */ if (currentArena->next) { currentArena = currentArena->next; + currentArena->bytesAllocated = 0; goto retry; } @@ -88,12 +89,10 @@ retry: /* Reclaim all the arena blocks allocated so far */ void oatArenaReset(void) { - ArenaMemBlock *block; - - for (block = arenaHead; block; block = block->next) { - block->bytesAllocated = 0; - } currentArena = arenaHead; + if (currentArena) { + currentArena->bytesAllocated = 0; + } } /* Growable List initialization */ @@ -201,6 +200,15 @@ void oatDumpStats(void) oatArchDump(); } +static uint32_t checkMasks[32] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, + 0x00000020, 0x00000040, 0x00000080, 0x00000100, 0x00000200, + 0x00000400, 0x00000800, 0x00001000, 0x00002000, 0x00004000, + 0x00008000, 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, 0x01000000, + 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, + 0x40000000, 0x80000000 }; + /* * Allocate a bit vector with enough space to hold at least the specified * number of bits. @@ -231,7 +239,7 @@ bool oatIsBitSet(const ArenaBitVector* pBits, unsigned int num) { DCHECK_LT(num, pBits->storageSize * sizeof(u4) * 8); - unsigned int val = pBits->storage[num >> 5] & (1 << (num & 0x1f)); + unsigned int val = pBits->storage[num >> 5] & checkMasks[num & 0x1f]; return (val != 0); } @@ -270,7 +278,7 @@ bool oatSetBit(ArenaBitVector* pBits, unsigned int num) pBits->storageSize = newSize; } - pBits->storage[num >> 5] |= 1 << (num & 0x1f); + pBits->storage[num >> 5] |= checkMasks[num & 0x1f]; return true; } @@ -288,7 +296,7 @@ bool oatClearBit(ArenaBitVector* pBits, unsigned int num) LOG(FATAL) << "Attempt to clear a bit not set in the vector yet";; } - pBits->storage[num >> 5] &= ~(1 << (num & 0x1f)); + pBits->storage[num >> 5] &= ~checkMasks[num & 0x1f]; return true; } @@ -462,13 +470,20 @@ int oatBitVectorIteratorNext(ArenaBitVectorIterator* iterator) DCHECK_EQ(iterator->bitSize, pBits->storageSize * sizeof(u4) * 8); if (bitIndex >= iterator->bitSize) return -1; - for (; bitIndex < iterator->bitSize; bitIndex++) { + for (; bitIndex < iterator->bitSize;) { unsigned int wordIndex = bitIndex >> 5; - unsigned int mask = 1 << (bitIndex & 0x1f); - if (pBits->storage[wordIndex] & mask) { + unsigned int bitPos = bitIndex & 0x1f; + unsigned int word = pBits->storage[wordIndex]; + if (word & checkMasks[bitPos]) { iterator->idx = bitIndex+1; return bitIndex; } + if (word == 0) { + // Helps if this is a sparse vector + bitIndex += (32 - bitPos); + } else { + bitIndex++; + } } /* No more set bits */ return -1; diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc index 55ed8af080..5dbcd9769a 100644 --- a/src/compiler/codegen/CodegenFactory.cc +++ b/src/compiler/codegen/CodegenFactory.cc @@ -58,7 +58,7 @@ STATIC void loadValueDirect(CompilationUnit* cUnit, RegLocation rlSrc, genRegCopy(cUnit, reg1, rlSrc.lowReg); } else { DCHECK(rlSrc.location == kLocDalvikFrame); - loadWordDisp(cUnit, rSP, rlSrc.spOffset, reg1); + loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1); } } @@ -88,7 +88,8 @@ STATIC void loadValueDirectWide(CompilationUnit* cUnit, RegLocation rlSrc, genRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg); } else { DCHECK(rlSrc.location == kLocDalvikFrame); - loadBaseDispWide(cUnit, NULL, rSP, rlSrc.spOffset, + loadBaseDispWide(cUnit, NULL, rSP, + oatSRegOffset(cUnit, rlSrc.sRegLow), regLo, regHi, INVALID_SREG); } } @@ -156,7 +157,8 @@ STATIC void storeValue(CompilationUnit* cUnit, RegLocation rlDest, if (oatIsDirty(cUnit, rlDest.lowReg) && oatLiveOut(cUnit, rlDest.sRegLow)) { defStart = (LIR* )cUnit->lastLIRInsn; - storeBaseDisp(cUnit, rSP, rlDest.spOffset, rlDest.lowReg, kWord); + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow), + rlDest.lowReg, kWord); oatMarkClean(cUnit, rlDest); defEnd = (LIR* )cUnit->lastLIRInsn; oatMarkDef(cUnit, rlDest, defStart, defEnd); @@ -183,10 +185,6 @@ STATIC void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, { LIR* defStart; LIR* defEnd; - if (FPREG(rlSrc.lowReg)!=FPREG(rlSrc.highReg)) { - LOG(WARNING) << "rlSrc.lowreg:" << rlSrc.lowReg << ", rlSrc.highReg:" - << rlSrc.highReg; - } DCHECK_EQ(FPREG(rlSrc.lowReg), FPREG(rlSrc.highReg)); DCHECK(rlDest.wide); DCHECK(rlSrc.wide); @@ -230,7 +228,7 @@ STATIC void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, defStart = (LIR*)cUnit->lastLIRInsn; DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1), oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow))); - storeBaseDispWide(cUnit, rSP, rlDest.spOffset, + storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow), rlDest.lowReg, rlDest.highReg); oatMarkClean(cUnit, rlDest); defEnd = (LIR*)cUnit->lastLIRInsn; diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h index 58ab1d3b97..d2e5f0a631 100644 --- a/src/compiler/codegen/CompilerCodegen.h +++ b/src/compiler/codegen/CompilerCodegen.h @@ -27,6 +27,8 @@ void oatAssembleLIR(CompilationUnit* cUnit); /* Implemented in the codegen/<target>/ArchUtility.c */ void oatCodegenDump(CompilationUnit* cUnit); +void oatDumpPromotionMap(CompilationUnit* cUnit); +void oatDumpFullPromotionMap(CompilationUnit* cUnit); /* Implemented in codegen/<target>/Ralloc.c */ void oatSimpleRegAlloc(CompilationUnit* cUnit); diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h index e343ec5682..fee0e9a913 100644 --- a/src/compiler/codegen/Ralloc.h +++ b/src/compiler/codegen/Ralloc.h @@ -232,6 +232,7 @@ extern void oatFlushRegWideImpl(CompilationUnit* cUnit, int rBase, extern void oatDoPromotion(CompilationUnit* cUnit); extern int oatVRegOffset(CompilationUnit* cUnit, int reg); +extern int oatSRegOffset(CompilationUnit* cUnit, int reg); extern void oatDumpCoreRegPool(CompilationUnit* cUint); extern void oatDumpFPRegPool(CompilationUnit* cUint); extern bool oatCheckCorePoolSanity(CompilationUnit* cUnit); diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index 7fd062d5a5..1b0fb90e35 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -186,9 +186,10 @@ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) cUnit->coreSpillMask |= (1 << res); cUnit->coreVmapTable.push_back(sReg); cUnit->numCoreSpills++; - cUnit->regLocation[sReg].location = kLocPhysReg; - cUnit->regLocation[sReg].lowReg = res; - cUnit->regLocation[sReg].home = true; + // Should be promoting based on initial sReg set + DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + cUnit->promotionMap[sReg].coreLocation = kLocPhysReg; + cUnit->promotionMap[sReg].coreReg = res; break; } } @@ -231,10 +232,11 @@ STATIC int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) ((FPRegs[i].reg & 0x1) == 0) == even) { res = FPRegs[i].reg; FPRegs[i].inUse = true; + // Should be promoting based on initial sReg set + DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); markPreservedSingle(cUnit, sReg, res); - cUnit->regLocation[sReg].fpLocation = kLocPhysReg; - cUnit->regLocation[sReg].fpLowReg = res; - cUnit->regLocation[sReg].home = true; + cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; + cUnit->promotionMap[sReg].fpReg = res; break; } } @@ -252,9 +254,11 @@ STATIC int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) STATIC int allocPreservedDouble(CompilationUnit* cUnit, int sReg) { int res = -1; // Assume failure - if (cUnit->regLocation[sReg+1].fpLocation == kLocPhysReg) { + // Should be promoting based on initial sReg set + DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) { // Upper reg is already allocated. Can we fit? - int highReg = cUnit->regLocation[sReg+1].fpLowReg; + int highReg = cUnit->promotionMap[sReg+1].fpReg; if ((highReg & 1) == 0) { // High reg is even - fail. return res; @@ -289,12 +293,10 @@ STATIC int allocPreservedDouble(CompilationUnit* cUnit, int sReg) } } if (res != -1) { - cUnit->regLocation[sReg].fpLocation = kLocPhysReg; - cUnit->regLocation[sReg].fpLowReg = res; - cUnit->regLocation[sReg].home = true; - cUnit->regLocation[sReg+1].fpLocation = kLocPhysReg; - cUnit->regLocation[sReg+1].fpLowReg = res + 1; - cUnit->regLocation[sReg+1].home = true; + cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; + cUnit->promotionMap[sReg].fpReg = res; + cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg; + cUnit->promotionMap[sReg+1].fpReg = res + 1; } return res; } @@ -312,7 +314,6 @@ extern int oatAllocPreservedFPReg(CompilationUnit* cUnit, int sReg, int res = -1; if (doubleStart) { res = allocPreservedDouble(cUnit, sReg); - } else { } if (res == -1) { res = allocPreservedSingle(cUnit, sReg, false /* try odd # */); diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc index aef98faff5..c4d3b6d780 100644 --- a/src/compiler/codegen/arm/ArchUtility.cc +++ b/src/compiler/codegen/arm/ArchUtility.cc @@ -16,6 +16,7 @@ #include "../../CompilerInternals.h" #include "ArmLIR.h" +#include "../Ralloc.h" static const char* coreRegNames[16] = { "r0", @@ -391,6 +392,38 @@ void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr) } } +void oatDumpPromotionMap(CompilationUnit *cUnit) +{ + const Method *method = cUnit->method; + for (int i = 0; i < method->NumRegisters(); i++) { + PromotionMap vRegMap = cUnit->promotionMap[i]; + char buf[100]; + if (vRegMap.fpLocation == kLocPhysReg) { + snprintf(buf, 100, " : s%d", vRegMap.fpReg & FP_REG_MASK); + } else { + buf[0] = 0; + } + char buf2[100]; + snprintf(buf2, 100, "V[%02d] -> %s%d%s", i, + vRegMap.coreLocation == kLocPhysReg ? + "r" : "SP+", vRegMap.coreLocation == kLocPhysReg ? + vRegMap.coreReg : oatSRegOffset(cUnit, i), buf); + LOG(INFO) << buf2; + } +} + +void oatDumpFullPromotionMap(CompilationUnit *cUnit) +{ + const Method *method = cUnit->method; + for (int i = 0; i < method->NumRegisters(); i++) { + PromotionMap vRegMap = cUnit->promotionMap[i]; + LOG(INFO) << i << " -> " << "CL:" << (int)vRegMap.coreLocation << + ", CR:" << (int)vRegMap.coreReg << ", FL:" << + (int)vRegMap.fpLocation << ", FR:" << (int)vRegMap.fpReg << + ", - " << (int)vRegMap.firstInPair; + } +} + /* Dump instructions and constant pool contents */ void oatCodegenDump(CompilationUnit* cUnit) { @@ -414,22 +447,7 @@ void oatCodegenDump(CompilationUnit* cUnit) " bytes, Dalvik size is " << insnsSize * 2; LOG(INFO) << "expansion factor: " << (float)cUnit->totalSize / (float)(insnsSize * 2); - for (int i = 0; i < method->NumRegisters(); i++) { - RegLocation loc = cUnit->regLocation[i]; - char buf[100]; - if (loc.fpLocation == kLocPhysReg) { - snprintf(buf, 100, " : s%d", loc.fpLowReg & FP_REG_MASK); - } else { - buf[0] = 0; - } - char buf2[100]; - snprintf(buf2, 100, "V[%02d] -> %s%d%s", i, - loc.location == kLocPhysReg ? - "r" : "SP+", loc.location == kLocPhysReg ? - loc.lowReg : loc.spOffset, buf); - LOG(INFO) << buf2; - - } + oatDumpPromotionMap(cUnit); for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) { oatDumpLIRInsn(cUnit, lirInsn, 0); } diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h index 1e4022ee4b..729e708fad 100644 --- a/src/compiler/codegen/arm/ArmLIR.h +++ b/src/compiler/codegen/arm/ArmLIR.h @@ -123,16 +123,13 @@ #define rNone (-1) /* RegisterLocation templates return values (r0, or r0/r1) */ -#define LOC_C_RETURN {kLocPhysReg, 0, 0, r0, INVALID_REG, INVALID_SREG, \ - 1, kLocPhysReg, r0, INVALID_REG, INVALID_OFFSET} -#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, r0, r1, INVALID_SREG, \ - 1, kLocPhysReg, r0, r1, INVALID_OFFSET} +#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, INVALID_SREG} +#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG} /* RegisterLocation templates for interpState->retVal; */ -#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, r0, INVALID_REG, \ - INVALID_SREG, 1, kLocPhysReg, r0, INVALID_REG, \ - INVALID_OFFSET} -#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, r0, r1, \ - INVALID_SREG, 1, kLocPhysReg, r0, r1, INVALID_OFFSET} +#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \ + INVALID_SREG} +#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, \ + INVALID_SREG} /* * Data structure tracking the mapping between a Dalvik register (pair) and a diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc index ed8a5b2ab3..4af3d0713a 100644 --- a/src/compiler/codegen/arm/ArmRallocUtil.cc +++ b/src/compiler/codegen/arm/ArmRallocUtil.cc @@ -37,7 +37,7 @@ typedef struct RefCounts { /* USE SSA names to count references of base Dalvik vRegs. */ STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb, - RefCounts* counts, bool fp) + RefCounts* coreCounts, RefCounts* fpCounts) { MIR* mir; if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock && @@ -47,59 +47,42 @@ STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb, for (mir = bb->firstMIRInsn; mir; mir = mir->next) { SSARepresentation *ssaRep = mir->ssaRep; if (ssaRep) { - int i; - int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode]; - if (fp) { - // Mark 1st reg of double pairs - int first = 0; - int sReg; - if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0])); - counts[sReg].doubleStart = true; + for (int i = 0; i < ssaRep->numDefs;) { + RegLocation loc = cUnit->regLocation[ssaRep->defs[i]]; + RefCounts* counts = loc.fp ? fpCounts : coreCounts; + int vReg = oatS2VReg(cUnit, ssaRep->defs[i]); + if (loc.defined) { + counts[vReg].count++; } - if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - if (attrs & DF_UA_WIDE) { - first += 2; - } - if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - if (attrs & DF_UB_WIDE) { - first += 2; - } - if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - } - for (i=0; i< ssaRep->numUses; i++) { - int origSreg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i])); - DCHECK_LT(origSreg, cUnit->method->NumRegisters()); - bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false; - if (fp == fpUse) { - counts[origSreg].count++; + if (loc.wide) { + if (loc.defined) { + if (loc.fp) { + counts[vReg].doubleStart = true; + } + counts[vReg+1].count++; + } + i += 2; + } else { + i++; } } - for (i=0; i< ssaRep->numDefs; i++) { - if (attrs & DF_SETS_CONST) { - // CONST opcodes are untyped - don't pollute the counts - continue; + for (int i = 0; i < ssaRep->numUses;) { + RegLocation loc = cUnit->regLocation[ssaRep->uses[i]]; + RefCounts* counts = loc.fp ? fpCounts : coreCounts; + int vReg = oatS2VReg(cUnit, ssaRep->uses[i]); + if (loc.defined) { + counts[vReg].count++; } - int origSreg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i])); - DCHECK_LT(origSreg, cUnit->method->NumRegisters()); - bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false; - if (fp == fpDef) { - counts[origSreg].count++; + if (loc.wide) { + if (loc.defined) { + if (loc.fp) { + counts[vReg].doubleStart = true; + } + counts[vReg+1].count++; + } + i += 2; + } else { + i++; } } } @@ -159,8 +142,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) BasicBlock* bb; bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator); if (bb == NULL) break; - countRefs(cUnit, bb, coreRegs, false); - countRefs(cUnit, bb, fpRegs, true); + countRefs(cUnit, bb, coreRegs, fpRegs); } /* @@ -178,21 +160,27 @@ extern void oatDoPromotion(CompilationUnit* cUnit) qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts); qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts); + if (cUnit->printMe) { + dumpCounts(coreRegs, numRegs, "Core regs after sort"); + dumpCounts(fpRegs, numRegs, "Fp regs after sort"); + } + if (!(cUnit->disableOpt & (1 << kPromoteRegs))) { // Promote fpRegs for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) { - if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) { + if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) { int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg, fpRegs[i].doubleStart); if (reg < 0) { - break; // No more left + break; // No more left } } } // Promote core regs for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) { - if (cUnit->regLocation[i].location != kLocPhysReg) { + if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation != + kLocPhysReg) { int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg); if (reg < 0) { break; // No more left @@ -203,58 +191,69 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Now, update SSA names to new home locations for (int i = 0; i < cUnit->numSSARegs; i++) { - int baseSreg = cUnit->regLocation[i].sRegLow; - RegLocation *base = &cUnit->regLocation[baseSreg]; - RegLocation *baseNext = &cUnit->regLocation[baseSreg+1]; RegLocation *curr = &cUnit->regLocation[i]; - if (curr->fp) { - /* Single or double, check fpLocation of base */ - if (base->fpLocation == kLocPhysReg) { - if (curr->wide) { - /* TUNING: consider alignment during allocation */ - if ((base->fpLowReg & 1) || - (baseNext->fpLocation != kLocPhysReg)) { - /* Half-promoted or bad alignment - demote */ - curr->location = kLocDalvikFrame; - curr->lowReg = INVALID_REG; - curr->highReg = INVALID_REG; - continue; - } - curr->highReg = baseNext->fpLowReg; + int baseVReg = oatS2VReg(cUnit, curr->sRegLow); + if (!curr->wide) { + if (curr->fp) { + if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].fpReg; + curr->home = true; + } + } else { + if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; + curr->home = true; } - curr->location = kLocPhysReg; - curr->lowReg = base->fpLowReg; - curr->home = true; } + curr->highReg = INVALID_REG; } else { - /* Core or wide */ - if (base->location == kLocPhysReg) { - if (curr->wide) { - /* Make sure upper half is also in reg or skip */ - if (baseNext->location != kLocPhysReg) { - /* Only half promoted; demote to frame */ - curr->location = kLocDalvikFrame; - curr->lowReg = INVALID_REG; - curr->highReg = INVALID_REG; - continue; + if (curr->highWord) { + continue; + } + if (curr->fp) { + if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) && + (cUnit->promotionMap[baseVReg+1].fpLocation == + kLocPhysReg)) { + int lowReg = cUnit->promotionMap[baseVReg].fpReg; + int highReg = cUnit->promotionMap[baseVReg+1].fpReg; + // Doubles require pair of singles starting at even reg + if (((lowReg & 0x1) == 0) && ((lowReg + 1) == highReg)) { + curr->location = kLocPhysReg; + curr->lowReg = lowReg; + curr->highReg = highReg; + curr->home = true; } - curr->highReg = baseNext->lowReg; } - curr->location = kLocPhysReg; - curr->lowReg = base->lowReg; - curr->home = true; + } else { + if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) + && (cUnit->promotionMap[baseVReg+1].coreLocation == + kLocPhysReg)) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; + curr->highReg = cUnit->promotionMap[baseVReg+1].coreReg; + curr->home = true; + } } } } } -/* Returns sp-relative offset in bytes */ -extern int oatVRegOffset(CompilationUnit* cUnit, int reg) +/* Returns sp-relative offset in bytes for a VReg */ +extern int oatVRegOffset(CompilationUnit* cUnit, int vReg) { - return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) : - cUnit->insOffset + ((reg - cUnit->numRegs) << 2); + return (vReg < cUnit->numRegs) ? cUnit->regsOffset + (vReg << 2) : + cUnit->insOffset + ((vReg - cUnit->numRegs) << 2); } +/* Returns sp-relative offset in bytes for a SReg */ +extern int oatSRegOffset(CompilationUnit* cUnit, int sReg) +{ + return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg)); +} + + /* Return sp-relative offset in bytes using Method* */ extern int oatVRegOffsetFromMethod(Method* method, int reg) { diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc index 633125443b..4a657718fd 100644 --- a/src/compiler/codegen/arm/MethodCodegenDriver.cc +++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc @@ -17,10 +17,8 @@ #define DISPLAY_MISSING_TARGETS (cUnit->enableDebug & \ (1 << kDebugDisplayMissingTargets)) -STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, INVALID_REG, - INVALID_REG, INVALID_SREG, 0, - kLocDalvikFrame, INVALID_REG, INVALID_REG, - INVALID_OFFSET}; +STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, INVALID_REG, + INVALID_REG, INVALID_SREG}; /* Mark register usage state and return long retloc */ STATIC RegLocation getRetLocWide(CompilationUnit* cUnit) @@ -99,7 +97,8 @@ STATIC void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange) RegLocation loc = oatUpdateLoc(cUnit, oatGetSrc(cUnit, mir, i)); if (loc.location == kLocPhysReg) { - storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord); + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow), + loc.lowReg, kWord); } } /* @@ -113,7 +112,8 @@ STATIC void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange) int rVal = rLR; // Using a lot of temps, rLR is known free here // Set up source pointer RegLocation rlFirst = oatGetSrc(cUnit, mir, 0); - opRegRegImm(cUnit, kOpAdd, rSrc, rSP, rlFirst.spOffset); + opRegRegImm(cUnit, kOpAdd, rSrc, rSP, + oatSRegOffset(cUnit, rlFirst.sRegLow)); // Set up the target pointer opRegRegImm(cUnit, kOpAdd, rDst, r0, Array::DataOffset().Int32Value()); @@ -773,7 +773,8 @@ STATIC int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir, } else { // r2 & r3 can safely be used here reg = r3; - loadWordDisp(cUnit, rSP, rlArg.spOffset + 4, reg); + loadWordDisp(cUnit, rSP, + oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg); callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback); } @@ -872,20 +873,23 @@ STATIC int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir, if (loc.wide) { loc = oatUpdateLocWide(cUnit, loc); if ((nextArg >= 2) && (loc.location == kLocPhysReg)) { - storeBaseDispWide(cUnit, rSP, loc.spOffset, loc.lowReg, - loc.highReg); + storeBaseDispWide(cUnit, rSP, + oatSRegOffset(cUnit, loc.sRegLow), + loc.lowReg, loc.highReg); } nextArg += 2; } else { loc = oatUpdateLoc(cUnit, loc); if ((nextArg >= 3) && (loc.location == kLocPhysReg)) { - storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord); + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow), + loc.lowReg, kWord); } nextArg++; } } - int startOffset = cUnit->regLocation[mir->ssaRep->uses[3]].spOffset; + int startOffset = oatSRegOffset(cUnit, + cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow); int outsOffset = 4 /* Method* */ + (3 * 4); if (numArgs >= 20) { // Generate memcpy @@ -1790,63 +1794,44 @@ STATIC void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir) } } -/* If there are any ins passed in registers that have not been promoted - * to a callee-save register, flush them to the frame. - * Note: at this pointCopy any ins that are passed in register to their - * home location */ +/* + * If there are any ins passed in registers that have not been promoted + * to a callee-save register, flush them to the frame. Perform intial + * assignment of promoted arguments. + */ STATIC void flushIns(CompilationUnit* cUnit) { if (cUnit->method->NumIns() == 0) return; - int inRegs = (cUnit->method->NumIns() > 2) ? 3 - : cUnit->method->NumIns(); - int startReg = r1; - int startLoc = cUnit->method->NumRegisters() - + int firstArgReg = r1; + int lastArgReg = r3; + int startVReg = cUnit->method->NumRegisters() - cUnit->method->NumIns(); - for (int i = 0; i < inRegs; i++) { - RegLocation loc = cUnit->regLocation[startLoc + i]; - //TUNING: be smarter about flushing ins to frame - storeBaseDisp(cUnit, rSP, loc.spOffset, startReg + i, kWord); - if (loc.location == kLocPhysReg) { - genRegCopy(cUnit, loc.lowReg, startReg + i); - } - } - - // Handle special case of wide argument half in regs, half in frame - if (inRegs == 3) { - RegLocation loc = cUnit->regLocation[startLoc + 2]; - if (loc.wide && loc.location == kLocPhysReg) { - // Load the other half of the arg into the promoted pair - loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg); - inRegs++; - } - } - - // Now, do initial assignment of all promoted arguments passed in frame - for (int i = inRegs; i < cUnit->method->NumIns();) { - RegLocation loc = cUnit->regLocation[startLoc + i]; - if (loc.fpLocation == kLocPhysReg) { - loc.location = kLocPhysReg; - loc.fp = true; - loc.lowReg = loc.fpLowReg; - loc.highReg = loc.fpHighReg; - } - if (loc.location == kLocPhysReg) { - if (loc.wide) { - if (loc.fp && (loc.lowReg & 1) != 0) { - // Misaligned - need to load as a pair of singles - loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg); - loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg); - } else { - loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset, - loc.lowReg, loc.highReg, INVALID_SREG); - } - i++; - } else { - loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg); + for (int i = 0; i < cUnit->method->NumIns(); i++) { + PromotionMap vMap = cUnit->promotionMap[startVReg + i]; + // For arguments only, should have at most one promotion kind + DCHECK(!((vMap.coreLocation == kLocPhysReg) && + (vMap.fpLocation == kLocPhysReg))); + if (i <= (lastArgReg - firstArgReg)) { + // If arriving in register, copy or flush + if (vMap.coreLocation == kLocPhysReg) { + genRegCopy(cUnit, vMap.coreReg, firstArgReg + i); + } else if (vMap.fpLocation == kLocPhysReg) { + genRegCopy(cUnit, vMap.fpReg, firstArgReg + i); + } + // Also put a copy in memory in case we're partially promoted + storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i), + firstArgReg + i, kWord); + } else { + // If arriving in frame, initialize promoted target regs + if (vMap.coreLocation == kLocPhysReg) { + loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i), + vMap.coreReg); + } else if (vMap.fpLocation == kLocPhysReg) { + loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i), + vMap.fpReg); } } - i++; } } |