diff options
72 files changed, 3001 insertions, 985 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 1bb1d563d6..4c2cda47f8 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -34,6 +34,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/base/unix_file/string_file_test.cc \ runtime/class_linker_test.cc \ runtime/dex_file_test.cc \ + runtime/dex_file_verifier_test.cc \ runtime/dex_instruction_visitor_test.cc \ runtime/dex_method_iterator_test.cc \ runtime/entrypoints/math_entrypoints_test.cc \ diff --git a/compiler/Android.mk b/compiler/Android.mk index 9a868fcd79..4f9f31251f 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -48,6 +48,7 @@ LIBART_COMPILER_SRC_FILES := \ dex/quick/mips/utility_mips.cc \ dex/quick/mir_to_lir.cc \ dex/quick/ralloc_util.cc \ + dex/quick/resource_mask.cc \ dex/quick/x86/assemble_x86.cc \ dex/quick/x86/call_x86.cc \ dex/quick/x86/fp_x86.cc \ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index f0b47878e6..55a4c7815e 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -82,22 +82,6 @@ enum BBType { kDead, }; -/* - * Def/Use encoding in 64-bit use_mask/def_mask. Low positions used for target-specific - * registers (and typically use the register number as the position). High positions - * reserved for common and abstract resources. - */ - -enum ResourceEncodingPos { - kMustNotAlias = 63, - kHeapRef = 62, // Default memory reference type. - kLiteral = 61, // Literal pool memory reference. - kDalvikReg = 60, // Dalvik v_reg memory reference. - kFPStatus = 59, - kCCode = 58, - kLowestCommonResource = kCCode -}; - // Shared pseudo opcodes - must be < 0. enum LIRPseudoOpcode { kPseudoExportedPC = -16, diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index d5443972c9..d45379712b 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -143,13 +143,25 @@ int arm64_support_list[] = { Instruction::MOVE, Instruction::MOVE_FROM16, Instruction::MOVE_16, + Instruction::MOVE_WIDE, + Instruction::MOVE_WIDE_FROM16, + Instruction::MOVE_WIDE_16, + Instruction::MOVE_OBJECT, + Instruction::MOVE_OBJECT_FROM16, + Instruction::MOVE_OBJECT_16, Instruction::MOVE_EXCEPTION, Instruction::RETURN_VOID, Instruction::RETURN, Instruction::RETURN_WIDE, + Instruction::RETURN_OBJECT, Instruction::CONST_4, Instruction::CONST_16, Instruction::CONST, + Instruction::CONST_HIGH16, + Instruction::CONST_WIDE_16, + Instruction::CONST_WIDE_32, + Instruction::CONST_WIDE, + Instruction::CONST_WIDE_HIGH16, Instruction::CONST_STRING, Instruction::MONITOR_ENTER, Instruction::MONITOR_EXIT, @@ -159,6 +171,11 @@ int arm64_support_list[] = { Instruction::GOTO_32, Instruction::PACKED_SWITCH, Instruction::SPARSE_SWITCH, + Instruction::CMPL_FLOAT, + Instruction::CMPG_FLOAT, + Instruction::CMPL_DOUBLE, + Instruction::CMPG_DOUBLE, + Instruction::CMP_LONG, Instruction::IF_EQ, Instruction::IF_NE, Instruction::IF_LT, @@ -226,6 +243,81 @@ int arm64_support_list[] = { Instruction::SHL_INT_LIT8, Instruction::SHR_INT_LIT8, Instruction::USHR_INT_LIT8, + Instruction::SGET, + Instruction::SGET_BOOLEAN, + Instruction::SGET_BYTE, + Instruction::SGET_CHAR, + Instruction::SGET_SHORT, + Instruction::SGET_OBJECT, + Instruction::SPUT, + Instruction::SPUT_OBJECT, + Instruction::SPUT_BOOLEAN, + Instruction::SPUT_BYTE, + Instruction::SPUT_CHAR, + Instruction::SPUT_SHORT, + Instruction::CMPL_FLOAT, + Instruction::CMPG_FLOAT, + Instruction::IGET, + Instruction::IGET_OBJECT, + Instruction::IGET_BOOLEAN, + Instruction::IGET_BYTE, + Instruction::IGET_CHAR, + Instruction::IGET_SHORT, + Instruction::IPUT, + Instruction::IPUT_OBJECT, + Instruction::IPUT_BOOLEAN, + Instruction::IPUT_BYTE, + Instruction::IPUT_CHAR, + Instruction::IPUT_SHORT, + Instruction::NEG_LONG, + Instruction::NOT_LONG, + Instruction::NEG_DOUBLE, + Instruction::INT_TO_LONG, + Instruction::INT_TO_FLOAT, + Instruction::INT_TO_DOUBLE, + Instruction::LONG_TO_INT, + Instruction::LONG_TO_FLOAT, + Instruction::LONG_TO_DOUBLE, + Instruction::FLOAT_TO_INT, + Instruction::FLOAT_TO_LONG, + Instruction::FLOAT_TO_DOUBLE, + Instruction::DOUBLE_TO_INT, + Instruction::DOUBLE_TO_LONG, + Instruction::DOUBLE_TO_FLOAT, + Instruction::ADD_LONG, + Instruction::SUB_LONG, + Instruction::MUL_LONG, + Instruction::DIV_LONG, + Instruction::REM_LONG, + Instruction::AND_LONG, + Instruction::OR_LONG, + Instruction::XOR_LONG, + Instruction::SHL_LONG, + Instruction::SHR_LONG, + Instruction::USHR_LONG, + Instruction::REM_FLOAT, + Instruction::ADD_DOUBLE, + Instruction::SUB_DOUBLE, + Instruction::MUL_DOUBLE, + Instruction::DIV_DOUBLE, + Instruction::REM_DOUBLE, + Instruction::ADD_LONG_2ADDR, + Instruction::SUB_LONG_2ADDR, + Instruction::MUL_LONG_2ADDR, + Instruction::DIV_LONG_2ADDR, + Instruction::REM_LONG_2ADDR, + Instruction::AND_LONG_2ADDR, + Instruction::OR_LONG_2ADDR, + Instruction::XOR_LONG_2ADDR, + Instruction::SHL_LONG_2ADDR, + Instruction::SHR_LONG_2ADDR, + Instruction::USHR_LONG_2ADDR, + Instruction::REM_FLOAT_2ADDR, + Instruction::ADD_DOUBLE_2ADDR, + Instruction::SUB_DOUBLE_2ADDR, + Instruction::MUL_DOUBLE_2ADDR, + Instruction::DIV_DOUBLE_2ADDR, + Instruction::REM_DOUBLE_2ADDR, // TODO(Arm64): Enable compiler pass // ----- ExtendedMIROpcode ----- kMirOpPhi, @@ -244,21 +336,9 @@ int arm64_support_list[] = { kMirOpSelect, #if ARM64_USE_EXPERIMENTAL_OPCODES - Instruction::MOVE_WIDE, - Instruction::MOVE_WIDE_FROM16, - Instruction::MOVE_WIDE_16, - Instruction::MOVE_OBJECT, - Instruction::MOVE_OBJECT_FROM16, - Instruction::MOVE_OBJECT_16, // Instruction::MOVE_RESULT, // Instruction::MOVE_RESULT_WIDE, // Instruction::MOVE_RESULT_OBJECT, - // Instruction::RETURN_OBJECT, - // Instruction::CONST_HIGH16, - // Instruction::CONST_WIDE_16, - // Instruction::CONST_WIDE_32, - // Instruction::CONST_WIDE, - // Instruction::CONST_WIDE_HIGH16, // Instruction::CONST_STRING_JUMBO, // Instruction::CONST_CLASS, // Instruction::CHECK_CAST, @@ -269,11 +349,6 @@ int arm64_support_list[] = { // Instruction::FILLED_NEW_ARRAY, // Instruction::FILLED_NEW_ARRAY_RANGE, // Instruction::FILL_ARRAY_DATA, - Instruction::CMPL_FLOAT, - Instruction::CMPG_FLOAT, - Instruction::CMPL_DOUBLE, - Instruction::CMPG_DOUBLE, - Instruction::CMP_LONG, // Instruction::UNUSED_3E, // Instruction::UNUSED_3F, // Instruction::UNUSED_40, @@ -294,34 +369,10 @@ int arm64_support_list[] = { // Instruction::APUT_BYTE, // Instruction::APUT_CHAR, // Instruction::APUT_SHORT, - // Instruction::IGET, - // Instruction::IGET_WIDE, - // Instruction::IGET_OBJECT, - // Instruction::IGET_BOOLEAN, - // Instruction::IGET_BYTE, - // Instruction::IGET_CHAR, - // Instruction::IGET_SHORT, - // Instruction::IPUT, // Instruction::IPUT_WIDE, - // Instruction::IPUT_OBJECT, - // Instruction::IPUT_BOOLEAN, - // Instruction::IPUT_BYTE, - // Instruction::IPUT_CHAR, - // Instruction::IPUT_SHORT, - Instruction::SGET, + // Instruction::IGET_WIDE, // Instruction::SGET_WIDE, - Instruction::SGET_OBJECT, - // Instruction::SGET_BOOLEAN, - // Instruction::SGET_BYTE, - // Instruction::SGET_CHAR, - // Instruction::SGET_SHORT, - Instruction::SPUT, // Instruction::SPUT_WIDE, - // Instruction::SPUT_OBJECT, - // Instruction::SPUT_BOOLEAN, - // Instruction::SPUT_BYTE, - // Instruction::SPUT_CHAR, - // Instruction::SPUT_SHORT, Instruction::INVOKE_VIRTUAL, Instruction::INVOKE_SUPER, Instruction::INVOKE_DIRECT, @@ -335,55 +386,6 @@ int arm64_support_list[] = { // Instruction::INVOKE_INTERFACE_RANGE, // Instruction::UNUSED_79, // Instruction::UNUSED_7A, - Instruction::NEG_LONG, - Instruction::NOT_LONG, - Instruction::NEG_DOUBLE, - Instruction::INT_TO_LONG, - Instruction::INT_TO_FLOAT, - Instruction::INT_TO_DOUBLE, - Instruction::LONG_TO_INT, - Instruction::LONG_TO_FLOAT, - Instruction::LONG_TO_DOUBLE, - Instruction::FLOAT_TO_INT, - Instruction::FLOAT_TO_LONG, - Instruction::FLOAT_TO_DOUBLE, - Instruction::DOUBLE_TO_INT, - Instruction::DOUBLE_TO_LONG, - Instruction::DOUBLE_TO_FLOAT, - Instruction::ADD_LONG, - Instruction::SUB_LONG, - Instruction::MUL_LONG, - Instruction::DIV_LONG, - Instruction::REM_LONG, - Instruction::AND_LONG, - Instruction::OR_LONG, - Instruction::XOR_LONG, - Instruction::SHL_LONG, - Instruction::SHR_LONG, - Instruction::USHR_LONG, - // Instruction::REM_FLOAT, - Instruction::ADD_DOUBLE, - Instruction::SUB_DOUBLE, - Instruction::MUL_DOUBLE, - Instruction::DIV_DOUBLE, - // Instruction::REM_DOUBLE, - Instruction::ADD_LONG_2ADDR, - Instruction::SUB_LONG_2ADDR, - Instruction::MUL_LONG_2ADDR, - Instruction::DIV_LONG_2ADDR, - Instruction::REM_LONG_2ADDR, - Instruction::AND_LONG_2ADDR, - Instruction::OR_LONG_2ADDR, - Instruction::XOR_LONG_2ADDR, - Instruction::SHL_LONG_2ADDR, - Instruction::SHR_LONG_2ADDR, - Instruction::USHR_LONG_2ADDR, - // Instruction::REM_FLOAT_2ADDR, - Instruction::ADD_DOUBLE_2ADDR, - Instruction::SUB_DOUBLE_2ADDR, - Instruction::MUL_DOUBLE_2ADDR, - Instruction::DIV_DOUBLE_2ADDR, - // Instruction::REM_DOUBLE_2ADDR, // Instruction::IGET_QUICK, // Instruction::IGET_WIDE_QUICK, // Instruction::IGET_OBJECT_QUICK, diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index e32e7cb74f..6272555983 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -109,12 +109,6 @@ enum ArmResourceEncodingPos { kArmRegEnd = 48, }; -#define ENCODE_ARM_REG_LIST(N) (static_cast<uint64_t>(N)) -#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) -#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) -#define ENCODE_ARM_REG_PC (1ULL << kArmRegPC) -#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16) - enum ArmNativeRegisterPool { r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1, diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index db0731f1ee..5466abd11d 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -87,7 +87,7 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, tab_rec->anchor = switch_branch; // Needs to use setflags encoding here OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); OpCondBranch(kCondNe, target); } diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 9c801a520b..8db7d4ee73 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -63,7 +63,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -79,12 +79,13 @@ class ArmMir2Lir FINAL : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); @@ -204,6 +205,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); LIR* LoadFPConstantValue(int r_dest, int value); + LIR* LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement, + RegStorage r_src_dest, RegStorage r_work = RegStorage::InvalidReg()); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void AssignDataOffsets(); @@ -217,6 +220,10 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op); bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops); void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops); + + static constexpr ResourceMask GetRegMaskArm(RegStorage reg); + static constexpr ResourceMask EncodeArmRegList(int reg_list); + static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list); }; } // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 4732e52234..916c52838a 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -224,13 +224,13 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { bool cheap_false_val = InexpensiveConstantInt(false_val); if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, ""); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(kCondLs, ""); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact @@ -882,14 +882,14 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { } FreeTemp(r_tmp_high); // Now unneeded - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondEq, "T"); NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); } else { NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0); OpRegReg(kOpSub, r_tmp, rl_expected.reg); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondEq, "T"); NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); } @@ -907,7 +907,7 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // result := (tmp1 != 0) ? 0 : 1; RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondUlt, ""); LoadConstant(rl_result.reg, 0); /* cc */ FreeTemp(r_tmp); // Now unneeded. @@ -971,7 +971,7 @@ LIR* ArmMir2Lir::OpTestSuspend(LIR* target) { LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { // Combine sub & test using sub setflags encoding here OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); return OpCondBranch(c_code, target); } @@ -1004,7 +1004,7 @@ bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. DCHECK(!barrier->flags.use_def_invalid); - barrier->u.m.def_mask = ENCODE_ALL; + barrier->u.m.def_mask = &kEncodeAll; return ret; #else return false; diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 3b30cde0d4..e1e2d5b749 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -135,30 +135,32 @@ RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { /* * Decode the register id. */ -uint64_t ArmMir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id = reg.GetRegNum(); - /* Each double register is equal to a pair of single-precision FP registers */ - if (reg.IsDouble()) { - seed = 0x3; - reg_id = reg_id << 1; - } else { - seed = 1; - } - /* FP register starts at bit position 16 */ - shift = reg.IsFloat() ? kArmFPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); +ResourceMask ArmMir2Lir::GetRegMaskCommon(const RegStorage& reg) const { + return GetRegMaskArm(reg); +} + +constexpr ResourceMask ArmMir2Lir::GetRegMaskArm(RegStorage reg) { + return reg.IsDouble() + /* Each double register is equal to a pair of single-precision FP registers */ + ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kArmFPReg0) + : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kArmFPReg0 : reg.GetRegNum()); +} + +constexpr ResourceMask ArmMir2Lir::EncodeArmRegList(int reg_list) { + return ResourceMask::RawMask(static_cast<uint64_t >(reg_list), 0u); +} + +constexpr ResourceMask ArmMir2Lir::EncodeArmRegFpcsList(int reg_list) { + return ResourceMask::RawMask(static_cast<uint64_t >(reg_list) << kArmFPReg16, 0u); } -uint64_t ArmMir2Lir::GetPCUseDefEncoding() { - return ENCODE_ARM_REG_PC; +ResourceMask ArmMir2Lir::GetPCUseDefEncoding() const { + return ResourceMask::Bit(kArmRegPC); } // Thumb2 specific setup. TODO: inline?: -void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK_EQ(cu_->instruction_set, kThumb2); DCHECK(!lir->flags.use_def_invalid); @@ -169,70 +171,70 @@ void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 | REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) { if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + def_mask->SetBit(kArmRegSP); } if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + use_mask->SetBit(kArmRegSP); } if (flags & REG_DEF_LIST0) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + def_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_DEF_LIST1) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + def_mask->SetBits(EncodeArmRegList(lir->operands[1])); } if (flags & REG_DEF_FPCS_LIST0) { - lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + def_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_DEF_FPCS_LIST2) { for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i); + SetupRegMask(def_mask, lir->operands[1] + i); } } if (flags & REG_USE_PC) { - lir->u.m.use_mask |= ENCODE_ARM_REG_PC; + use_mask->SetBit(kArmRegPC); } /* Conservatively treat the IT block */ if (flags & IS_IT) { - lir->u.m.def_mask = ENCODE_ALL; + *def_mask = kEncodeAll; } if (flags & REG_USE_LIST0) { - lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + use_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_USE_LIST1) { - lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + use_mask->SetBits(EncodeArmRegList(lir->operands[1])); } if (flags & REG_USE_FPCS_LIST0) { - lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + use_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_USE_FPCS_LIST2) { for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i); + SetupRegMask(use_mask, lir->operands[1] + i); } } /* Fixup for kThumbPush/lr and kThumbPop/pc */ if (opcode == kThumbPush || opcode == kThumbPop) { - uint64_t r8Mask = GetRegMaskCommon(rs_r8); - if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) { - lir->u.m.use_mask &= ~r8Mask; - lir->u.m.use_mask |= ENCODE_ARM_REG_LR; - } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) { - lir->u.m.def_mask &= ~r8Mask; - lir->u.m.def_mask |= ENCODE_ARM_REG_PC; + constexpr ResourceMask r8Mask = GetRegMaskArm(rs_r8); + if ((opcode == kThumbPush) && (use_mask->Intersects(r8Mask))) { + use_mask->ClearBits(r8Mask); + use_mask->SetBit(kArmRegLR); + } else if ((opcode == kThumbPop) && (def_mask->Intersects(r8Mask))) { + def_mask->ClearBits(r8Mask); + def_mask->SetBit(kArmRegPC);; } } if (flags & REG_DEF_LR) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LR; + def_mask->SetBit(kArmRegLR); } } } @@ -486,44 +488,44 @@ std::string ArmMir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char return buf; } -void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) { +void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; for (i = 0; i < kArmRegEnd; i++) { - if (mask & (1ULL << i)) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } - if (mask & ENCODE_FP_STATUS) { + if (mask.HasBit(ResourceMask::kFPStatus)) { strcat(buf, "fpcc "); } /* Memory bits */ - if (arm_lir && (mask & ENCODE_DALVIK_REG)) { + if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 86d32f4d72..61d3d56036 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -87,9 +87,9 @@ LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) { if (data_target == NULL) { data_target = AddWordData(&literal_list_, value); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target); - SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } @@ -670,6 +670,7 @@ LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (data_target == NULL) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); if (r_dest.IsFloat()) { res = RawLIR(current_dalvik_offset_, kThumb2Vldrd, r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target); @@ -678,7 +679,6 @@ LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8, r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target); } - SetMemRefType(res, true, kLiteral); AppendLIR(res); } return res; @@ -819,6 +819,30 @@ LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStor return store; } +// Helper function for LoadBaseDispBody()/StoreBaseDispBody(). +LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement, + RegStorage r_src_dest, RegStorage r_work) { + DCHECK_EQ(displacement & 3, 0); + int encoded_disp = (displacement & 1020) >> 2; // Within range of the instruction. + RegStorage r_ptr = r_base; + if ((displacement & ~1020) != 0) { + r_ptr = r_work.Valid() ? r_work : AllocTemp(); + // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB. + OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020); + } + LIR* lir = nullptr; + if (!r_src_dest.IsPair()) { + lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp); + } else { + lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(), + encoded_disp); + } + if ((displacement & ~1020) != 0 && !r_work.Valid()) { + FreeTemp(r_ptr); + } + return lir; +} + /* * Load value from base + displacement. Optionally perform null check * on base (which must have an associated s_reg and MIR). If not @@ -836,40 +860,26 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag switch (size) { case kDouble: // Intentional fall-though. - case k64: { - DCHECK_EQ(displacement & 3, 0); - encoded_disp = (displacement & 1020) >> 2; // Within range of kThumb2Vldrd/kThumb2LdrdI8. - RegStorage r_ptr = r_base; - if ((displacement & ~1020) != 0) { - // For core register load, use the r_dest.GetLow() for the temporary pointer. - r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow(); - // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB. - OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020); - } + case k64: if (r_dest.IsFloat()) { DCHECK(!r_dest.IsPair()); - load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp); + load = LoadStoreMaxDisp1020(kThumb2Vldrd, r_base, displacement, r_dest); } else { - load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(), - encoded_disp); - } - if ((displacement & ~1020) != 0 && r_dest.IsFloat()) { - FreeTemp(r_ptr); + DCHECK(r_dest.IsPair()); + // Use the r_dest.GetLow() for the temporary pointer if needed. + load = LoadStoreMaxDisp1020(kThumb2LdrdI8, r_base, displacement, r_dest, r_dest.GetLow()); } already_generated = true; break; - } case kSingle: // Intentional fall-though. case k32: // Intentional fall-though. case kReference: if (r_dest.IsFloat()) { - opcode = kThumb2Vldrs; - if (displacement <= 1020) { - short_form = true; - encoded_disp >>= 2; - } + DCHECK(r_dest.IsSingle()); + load = LoadStoreMaxDisp1020(kThumb2Vldrs, r_base, displacement, r_dest); + already_generated = true; break; } if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && @@ -934,19 +944,15 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag } else { RegStorage reg_offset = AllocTemp(); LoadConstant(reg_offset, encoded_disp); - if (r_dest.IsFloat()) { - // No index ops - must use a long sequence. Turn the offset into a direct pointer. - OpRegReg(kOpAdd, reg_offset, r_base); - load = LoadBaseDispBody(reg_offset, 0, r_dest, size); - } else { - load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); - } + DCHECK(!r_dest.IsFloat()); + load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); FreeTemp(reg_offset); } } // TODO: in future may need to differentiate Dalvik accesses w/ spills - if (r_base == rs_rARM_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rARM_SP); AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); } return load; @@ -992,28 +998,16 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora switch (size) { case kDouble: // Intentional fall-though. - case k64: { - DCHECK_EQ(displacement & 3, 0); - encoded_disp = (displacement & 1020) >> 2; // Within range of kThumb2Vstrd/kThumb2StrdI8. - RegStorage r_ptr = r_base; - if ((displacement & ~1020) != 0) { - r_ptr = AllocTemp(); - // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB. - OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020); - } + case k64: if (r_src.IsFloat()) { DCHECK(!r_src.IsPair()); - store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp); + store = LoadStoreMaxDisp1020(kThumb2Vstrd, r_base, displacement, r_src); } else { - store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(), - encoded_disp); - } - if ((displacement & ~1020) != 0) { - FreeTemp(r_ptr); + DCHECK(r_src.IsPair()); + store = LoadStoreMaxDisp1020(kThumb2StrdI8, r_base, displacement, r_src); } already_generated = true; break; - } case kSingle: // Intentional fall-through. case k32: @@ -1021,11 +1015,8 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora case kReference: if (r_src.IsFloat()) { DCHECK(r_src.IsSingle()); - opcode = kThumb2Vstrs; - if (displacement <= 1020) { - short_form = true; - encoded_disp >>= 2; - } + store = LoadStoreMaxDisp1020(kThumb2Vstrs, r_base, displacement, r_src); + already_generated = true; break; } if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { @@ -1073,19 +1064,15 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora } else { RegStorage r_scratch = AllocTemp(); LoadConstant(r_scratch, encoded_disp); - if (r_src.IsFloat()) { - // No index ops - must use a long sequence. Turn the offset into a direct pointer. - OpRegReg(kOpAdd, r_scratch, r_base); - store = StoreBaseDispBody(r_scratch, 0, r_src, size); - } else { - store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); - } + DCHECK(!r_src.IsFloat()); + store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); FreeTemp(r_scratch); } } // TODO: In future, may need to differentiate Dalvik & spill accesses - if (r_base == rs_rARM_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rARM_SP); AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); } return store; diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index 6a6b0f6a53..c1ce03deb1 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -102,17 +102,14 @@ namespace art { #define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp) #define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr) -enum ArmResourceEncodingPos { - kArmGPReg0 = 0, - kArmRegLR = 30, - kArmRegSP = 31, - kArmFPReg0 = 32, - kArmRegEnd = 64, +enum Arm64ResourceEncodingPos { + kArm64GPReg0 = 0, + kArm64RegLR = 30, + kArm64RegSP = 31, + kArm64FPReg0 = 32, + kArm64RegEnd = 64, }; -#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) -#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) - #define IS_SIGNED_IMM(size, value) \ ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1))) #define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value) @@ -212,7 +209,7 @@ enum A64RegExtEncodings { }; #define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0)) - +#define ENCODE_NO_EXTEND (EncodeExtend(kA64Uxtx, 0)) /* * The following enum defines the list of supported A64 instructions by the * assembler. Their corresponding EncodingMap positions will be defined in @@ -289,6 +286,7 @@ enum ArmOpcode { kA64Ldur3fXd, // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Ldur3rXd, // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Ldxr2rX, // ldxr[1s00100001011111011111] rn[9-5] rt[4-0]. + kA64Ldaxr2rX, // ldaxr[1s00100001011111111111] rn[9-5] rt[4-0]. kA64Lsl3rrr, // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0]. kA64Lsr3rrd, // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}". kA64Lsr3rrr, // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0]. @@ -328,8 +326,9 @@ enum ArmOpcode { kA64Stur3fXd, // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Stur3rXd, // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Stxr3wrX, // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0]. + kA64Stlxr3wrX, // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0]. kA64Sub4RRdT, // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0]. - kA64Sub4rrro, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. + kA64Sub4rrro, // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0]. kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3". kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. @@ -394,9 +393,6 @@ enum ArmEncodingKind { kFmtSkip, // Unused field, but continue to next. }; -// TODO(Arm64): should we get rid of kFmtExtend? -// Note: the only instructions that use it (cmp, cmn) are not used themselves. - // Struct used to define the snippet positions for each A64 opcode. struct ArmEncodingMap { uint32_t wskeleton; diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 4a0c055f4d..2c4f26216f 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -268,7 +268,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fmov", "!0s, !1w", kFixupNone), - ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000), + ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e670000), kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fmov", "!0S, !1x", kFixupNone), @@ -386,6 +386,10 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldxr", "!0r, [!1X]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldaxr", "!0r, [!1X]", kFixupNone), ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000), kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, @@ -443,11 +447,11 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, "ret", "", kFixupNone), ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00), - kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1, + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "rev", "!0r, !1r", kFixupNone), - ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0), - kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1, + ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0x5ac00400), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "rev16", "!0r, !1r", kFixupNone), ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00), @@ -542,6 +546,10 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE, "stxr", "!0w, !1r, [!2X]", kFixupNone), + ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00), + kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE, + "stlxr", "!0w, !1r, [!2X]", kFixupNone), ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000), kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1, @@ -803,7 +811,7 @@ void Arm64Mir2Lir::AssembleLIR() { DCHECK(target_lir); CodeOffset pc = lir->offset; CodeOffset target = target_lir->offset + - ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) { LOG(FATAL) << "Invalid jump range in kFixupLoad"; diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index b85f5694d6..59eec3d486 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -301,12 +301,14 @@ void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) { * Mark garbage collection card. Skip if the value we're storing is null. */ void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { - RegStorage reg_card_base = AllocTemp(); + RegStorage reg_card_base = AllocTempWide(); RegStorage reg_card_no = AllocTemp(); LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base); OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); - StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte); + // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"? + StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base), + 0, kUnsignedByte); LIR* target = NewLIR0(kPseudoTargetLabel); branch_over->target = target; FreeTemp(reg_card_base); @@ -315,62 +317,133 @@ void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { /* - * On entry, x0, x1, x2 & x3 are live. Let the register allocation + * On entry, x0 to x7 are live. Let the register allocation * mechanism know so it doesn't try to use any of them when - * expanding the frame or flushing. This leaves the utility - * code with a single temp: r12. This should be enough. + * expanding the frame or flushing. + * Reserve x8 & x9 for temporaries. */ LockTemp(rs_x0); LockTemp(rs_x1); LockTemp(rs_x2); LockTemp(rs_x3); + LockTemp(rs_x4); + LockTemp(rs_x5); + LockTemp(rs_x6); + LockTemp(rs_x7); + LockTemp(rs_x8); + LockTemp(rs_x9); /* * We can safely skip the stack overflow check if we're * a leaf *and* our frame size < fudge factor. */ bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && - (static_cast<size_t>(frame_size_) < - Thread::kStackOverflowReservedBytes)); + (static_cast<size_t>(frame_size_) < + Thread::kStackOverflowReservedBytes)); + NewLIR0(kPseudoMethodEntry); + const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes); + const int spill_count = num_core_spills_ + num_fp_spills_; + const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf; // SP 16 byte alignment. + const int frame_size_without_spills = frame_size_ - spill_size; + if (!skip_overflow_check) { - LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12); - OpRegImm64(kOpSub, rs_rA64_SP, frame_size_); if (Runtime::Current()->ExplicitStackOverflowChecks()) { - /* Load stack limit */ - // TODO(Arm64): fix the line below: - // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow); + if (!large_frame) { + // Load stack limit + LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9); + } } else { + // TODO(Arm64) Implement implicit checks. // Implicit stack overflow check. // Generate a load from [sp, #-framesize]. If this is in the stack // redzone we will get a segmentation fault. - // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR? - Load32Disp(rs_rA64_SP, 0, rs_wzr); - MarkPossibleStackOverflowException(); + // Load32Disp(rs_rA64_SP, -Thread::kStackOverflowReservedBytes, rs_wzr); + // MarkPossibleStackOverflowException(); + LOG(FATAL) << "Implicit stack overflow checks not implemented."; } - } else if (frame_size_ > 0) { - OpRegImm64(kOpSub, rs_rA64_SP, frame_size_); + } + + if (frame_size_ > 0) { + OpRegImm64(kOpSub, rs_rA64_SP, spill_size); } /* Need to spill any FP regs? */ if (fp_spill_mask_) { - int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_); + int spill_offset = spill_size - kArm64PointerSize*(num_fp_spills_ + num_core_spills_); SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_); } /* Spill core callee saves. */ if (core_spill_mask_) { - int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_; + int spill_offset = spill_size - kArm64PointerSize*num_core_spills_; SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_); } + if (!skip_overflow_check) { + if (Runtime::Current()->ExplicitStackOverflowChecks()) { + class StackOverflowSlowPath: public LIRSlowPath { + public: + StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) : + LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), + sp_displace_(sp_displace) { + } + void Compile() OVERRIDE { + m2l_->ResetRegPool(); + m2l_->ResetDefTracking(); + GenerateTargetLabel(kPseudoThrowTarget); + // Unwinds stack. + m2l_->OpRegImm(kOpAdd, rs_rA64_SP, sp_displace_); + m2l_->ClobberCallerSave(); + ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); + m2l_->LockTemp(rs_x8); + m2l_->LoadWordDisp(rs_rA64_SELF, func_offset.Int32Value(), rs_x8); + m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg()); + m2l_->FreeTemp(rs_x8); + } + + private: + const size_t sp_displace_; + }; + + if (large_frame) { + // Compare Expected SP against bottom of stack. + // Branch to throw target if there is not enough room. + OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills); + LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8); + LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr); + AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size)); + OpRegCopy(rs_rA64_SP, rs_x9); // Establish stack after checks. + } else { + /* + * If the frame is small enough we are guaranteed to have enough space that remains to + * handle signals on the user stack. + * Establishes stack before checks. + */ + OpRegRegImm(kOpSub, rs_rA64_SP, rs_rA64_SP, frame_size_without_spills); + LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x9, nullptr); + AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_)); + } + } else { + OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills); + } + } else { + OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills); + } + FlushIns(ArgLocs, rl_method); FreeTemp(rs_x0); FreeTemp(rs_x1); FreeTemp(rs_x2); FreeTemp(rs_x3); + FreeTemp(rs_x4); + FreeTemp(rs_x5); + FreeTemp(rs_x6); + FreeTemp(rs_x7); + FreeTemp(rs_x8); + FreeTemp(rs_x9); } void Arm64Mir2Lir::GenExitSequence() { diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 21db77193b..bf09b8610e 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -63,7 +63,7 @@ class Arm64Mir2Lir : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -78,12 +78,13 @@ class Arm64Mir2Lir : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); @@ -123,6 +124,7 @@ class Arm64Mir2Lir : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + bool GenInlinedAbsLong(CallInfo* info); void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); void GenNotLong(RegLocation rl_dest, RegLocation rl_src); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); @@ -184,6 +186,7 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type); LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type); LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src); + LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value); LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); LIR* OpTestSuspend(LIR* target); @@ -201,6 +204,7 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, int shift); LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); + LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); static const ArmEncodingMap EncodingMap[kA64Last]; int EncodeShift(int code, int amount); int EncodeExtend(int extend_type, int amount); diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 0a76b9b295..8112c2ec6d 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -77,10 +77,10 @@ void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, default: LOG(FATAL) << "Unexpected case: " << opcode; } - rl_shift = LoadValueWide(rl_shift, kCoreReg); + rl_shift = LoadValue(rl_shift, kCoreReg); rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg); + OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg)); StoreValueWide(rl_dest, rl_result); } @@ -361,11 +361,22 @@ RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegS return rl_result; } -bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(FATAL); +bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) { + RegLocation rl_src = info->args[0]; + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_dest = InlineTargetWide(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage sign_reg = AllocTempWide(); + // abs(x) = y<=x>>63, (x+y)^y. + OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63); + OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg); + OpRegReg(kOpXor, rl_result.reg, sign_reg); + StoreValueWide(rl_dest, rl_result); + return true; +} - DCHECK_EQ(cu_->instruction_set, kThumb2); +bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { + DCHECK_EQ(cu_->instruction_set, kArm64); RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = info->args[1]; rl_src1 = LoadValue(rl_src1, kCoreReg); @@ -373,61 +384,43 @@ bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { RegLocation rl_dest = InlineTarget(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - // OpIT((is_min) ? kCondGt : kCondLt, "E"); - OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); - OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); - GenBarrier(); + NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), + rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt); StoreValue(rl_dest, rl_result); return true; } bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - RegLocation rl_src_address = info->args[0]; // long address - rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] + rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] ? RegLocation rl_dest = InlineTarget(info); - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); if (size == k64) { - // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. - if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) { - LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); - LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); - } else { - LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); - LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); - } StoreValueWide(rl_dest, rl_result); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); - // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); StoreValue(rl_dest, rl_result); } return true; } bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - RegLocation rl_src_address = info->args[0]; // long address rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_src_value = info->args[2]; // [size] value - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg + + RegLocation rl_value; if (size == k64) { - // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. - RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32); - StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32); + rl_value = LoadValueWide(rl_src_value, kCoreReg); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); - // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. - RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + rl_value = LoadValue(rl_src_value, kCoreReg); } + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); return true; } @@ -444,71 +437,30 @@ void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { } bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - - DCHECK_EQ(cu_->instruction_set, kThumb2); + DCHECK_EQ(cu_->instruction_set, kArm64); + ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low - rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] + rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] //TODO: do we really need this RegLocation rl_src_expected = info->args[4]; // int, long or Object // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] RegLocation rl_dest = InlineTarget(info); // boolean place for result - // We have only 5 temporary registers available and actually only 4 if the InlineTarget - // above locked one of the temps. For a straightforward CAS64 we need 7 registers: - // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor - // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop - // into the same temps, reducing the number of required temps down to 5. We shall work - // around the potentially locked temp by using LR for r_ptr, unconditionally. - // TODO: Pass information about the need for more temps to the stack frame generation - // code so that we can rely on being able to allocate enough temps. - DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp()); - MarkTemp(rs_rA64_LR); - FreeTemp(rs_rA64_LR); - LockTemp(rs_rA64_LR); - bool load_early = true; - if (is_long) { - RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() : - rl_src_expected.reg; - RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() : - rl_src_new_value.reg; - bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat(); - bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat(); - bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg); - bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg); - - if (!expected_is_good_reg && !new_value_is_good_reg) { - // None of expected/new_value is non-temp reg, need to load both late - load_early = false; - // Make sure they are not in the temp regs and the load will not be skipped. - if (expected_is_core_reg) { - FlushRegWide(rl_src_expected.reg); - ClobberSReg(rl_src_expected.s_reg_low); - ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); - rl_src_expected.location = kLocDalvikFrame; - } - if (new_value_is_core_reg) { - FlushRegWide(rl_src_new_value.reg); - ClobberSReg(rl_src_new_value.s_reg_low); - ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); - rl_src_new_value.location = kLocDalvikFrame; - } - } - } - - // Release store semantics, get the barrier out of the way. TODO: revisit - GenMemBarrier(kStoreLoad); - + // Load Object and offset RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); + RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg); + RegLocation rl_new_value; - if (!is_long) { - rl_new_value = LoadValue(rl_src_new_value); - } else if (load_early) { + RegLocation rl_expected; + if (is_long) { rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); + rl_expected = LoadValueWide(rl_src_expected, kCoreReg); + } else { + rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg); + rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg); } if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { @@ -516,9 +468,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { MarkGCCard(rl_new_value.reg, rl_object.reg); } - RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); - - RegStorage r_ptr = rs_rA64_LR; + RegStorage r_ptr = AllocTempRef(); OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); // Free now unneeded rl_object and rl_offset to give more temps. @@ -527,77 +477,40 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { ClobberSReg(rl_offset.s_reg_low); FreeTemp(rl_offset.reg); - RegLocation rl_expected; - if (!is_long) { - rl_expected = LoadValue(rl_src_expected); - } else if (load_early) { - rl_expected = LoadValueWide(rl_src_expected, kCoreReg); - } else { - // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. - int low_reg = AllocTemp().GetReg(); - int high_reg = AllocTemp().GetReg(); - rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg); - rl_expected = rl_new_value; - } - // do { // tmp = [r_ptr] - expected; // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); // result = tmp != 0; - RegStorage r_tmp = AllocTemp(); - LIR* target = NewLIR0(kPseudoTargetLabel); - + RegStorage r_tmp; if (is_long) { - RegStorage r_tmp_high = AllocTemp(); - if (!load_early) { - LoadValueDirectWide(rl_src_expected, rl_expected.reg); - } - NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); - OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow()); - OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh()); - if (!load_early) { - LoadValueDirectWide(rl_src_new_value, rl_new_value.reg); - } - - LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL); - LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL); - NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), - rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); - LIR* target2 = NewLIR0(kPseudoTargetLabel); - branch1->target = target2; - branch2->target = target2; - FreeTemp(r_tmp_high); // Now unneeded - + r_tmp = AllocTempWide(); + } else if (is_object) { + r_tmp = AllocTempRef(); } else { - NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0); - OpRegReg(kOpSub, r_tmp, rl_expected.reg); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - // OpIT(kCondEq, "T"); - NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); + r_tmp = AllocTemp(); } - // Still one conditional left from OpIT(kCondEq, "T") from either branch - OpRegImm(kOpCmp /* eq */, r_tmp, 1); - OpCondBranch(kCondEq, target); + LIR* loop = NewLIR0(kPseudoTargetLabel); + NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg()); + OpRegReg(kOpCmp, r_tmp, rl_expected.reg); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); + LIR* early_exit = OpCondBranch(kCondNe, NULL); - if (!load_early) { - FreeTemp(rl_expected.reg); // Now unneeded. - } + NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg()); + NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); + OpCondBranch(kCondNe, loop); - // result := (tmp1 != 0) ? 0 : 1; RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - // OpIT(kCondUlt, ""); - LoadConstant(rl_result.reg, 0); /* cc */ + LIR* exit = NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); + early_exit->target = exit; + FreeTemp(r_tmp); // Now unneeded. + FreeTemp(r_ptr); // Now unneeded. StoreValue(rl_dest, rl_result); - // Now, restore lr to its non-temp status. - Clobber(rs_rA64_LR); - UnmarkTemp(rs_rA64_LR); return true; } @@ -640,7 +553,7 @@ LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) { LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { // Combine sub & test using sub setflags encoding here OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); return OpCondBranch(c_code, target); } @@ -673,7 +586,7 @@ bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. DCHECK(!barrier->flags.use_def_invalid); - barrier->u.m.def_mask = ENCODE_ALL; + barrier->u.m.def_mask = &kEncodeAll; return ret; #else return false; @@ -1021,7 +934,7 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_de rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - OpRegRegImm(op, rl_result.reg, rl_src1.reg, val); + OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val); StoreValueWide(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 439dc8c73d..e2846aebc3 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -139,41 +139,43 @@ RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { /* * Decode the register id. This routine makes assumptions on the encoding made by RegStorage. */ -uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) { +ResourceMask Arm64Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const { // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor. - int reg_raw = reg.GetRawBits(); // Check if the shape mask is zero (i.e. invalid). if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) { // The zero register is not a true register. It is just an immediate zero. - return 0; + return kEncodeNone; } - return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask); + return ResourceMask::Bit( + // FP register starts at bit position 32. + (reg.IsFloat() ? kArm64FPReg0 : 0) + reg.GetRegNum()); } -uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() { +ResourceMask Arm64Mir2Lir::GetPCUseDefEncoding() const { LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64"; - return 0ULL; + return kEncodeNone; } // Arm64 specific setup. TODO: inline?: -void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK_EQ(cu_->instruction_set, kArm64); DCHECK(!lir->flags.use_def_invalid); // These flags are somewhat uncommon - bypass if we can. if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) { if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + def_mask->SetBit(kArm64RegSP); } if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + use_mask->SetBit(kArm64RegSP); } if (flags & REG_DEF_LR) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LR; + def_mask->SetBit(kArm64RegLR); } } } @@ -510,44 +512,44 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch return buf; } -void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) { +void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; - for (i = 0; i < kArmRegEnd; i++) { - if (mask & (1ULL << i)) { + for (i = 0; i < kArm64RegEnd; i++) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } - if (mask & ENCODE_FP_STATUS) { + if (mask.HasBit(ResourceMask::kFPStatus)) { strcat(buf, "fpcc "); } /* Memory bits */ - if (arm_lir && (mask & ENCODE_DALVIK_REG)) { + if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } @@ -850,6 +852,8 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { return; } + // Handle dalvik registers. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; for (int i = 0; i < cu_->num_ins; i++) { PromotionMap* v_map = &promotion_map_[start_vreg + i]; diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 4f0d7bc7d1..71e9e95ab0 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -102,9 +102,9 @@ LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) { data_target = AddWordData(&literal_list_, value); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, r_dest, 0, 0, 0, 0, data_target); - SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } @@ -129,9 +129,9 @@ LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) { } DCHECK(RegStorage::IsFloat(r_dest)); + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp), r_dest, 0, 0, 0, 0, data_target); - SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } @@ -146,7 +146,7 @@ static int CountTrailingZeros(bool is_wide, uint64_t value) { static int CountSetBits(bool is_wide, uint64_t value) { return ((is_wide) ? - __builtin_popcountl(value) : __builtin_popcount((uint32_t)value)); + __builtin_popcountll(value) : __builtin_popcount((uint32_t)value)); } /** @@ -387,11 +387,11 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r case kOpRev: DCHECK_EQ(shift, 0); // Binary, but rm is encoded twice. - return NewLIR3(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); + return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); break; case kOpRevsh: // Binary, but rm is encoded twice. - return NewLIR3(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); + return NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); break; case kOp2Byte: DCHECK_EQ(shift, ENCODE_NO_SHIFT); @@ -426,8 +426,43 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r return NULL; } +LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int extend) { + ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); + ArmOpcode opcode = kA64Brk1d; + + switch (op) { + case kOpCmn: + opcode = kA64Cmn3Rre; + break; + case kOpCmp: + opcode = kA64Cmp3Rre; + break; + default: + LOG(FATAL) << "Bad Opcode: " << opcode; + break; + } + + DCHECK(!IsPseudoLirOp(opcode)); + if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { + ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; + if (kind == kFmtExtend) { + return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), extend); + } + } + + LOG(FATAL) << "Unexpected encoding operand count"; + return NULL; +} + LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { - return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); + /* RegReg operations with SP in first parameter need extended register instruction form. + * Only CMN and CMP instructions are implemented. + */ + if (r_dest_src1 == rs_rA64_SP) { + return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND); + } else { + return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); + } } LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { @@ -517,8 +552,11 @@ LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); } -// Should be taking an int64_t value ? LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { + return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value)); +} + +LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) { LIR* res; bool neg = (value < 0); int64_t abs_value = (neg) ? -value : value; @@ -526,7 +564,6 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, ArmOpcode alt_opcode = kA64Brk1d; int32_t log_imm = -1; bool is_wide = r_dest.Is64Bit(); - CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); switch (op) { @@ -603,11 +640,17 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); } else { RegStorage r_scratch = AllocTemp(); - LoadConstant(r_scratch, value); + if (IS_WIDE(wide)) { + r_scratch = AllocTempWide(); + LoadConstantWide(r_scratch, value); + } else { + r_scratch = AllocTemp(); + LoadConstant(r_scratch, value); + } if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) - res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); + res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); else - res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); + res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); FreeTemp(r_scratch); return res; } @@ -632,9 +675,36 @@ LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) // abs_value is a shifted 12-bit immediate. shift = true; abs_value >>= 12; + } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) { + // Note: It is better to use two ADD/SUB instead of loading a number to a temp register. + // This works for both normal registers and SP. + // For a frame size == 0x2468, it will be encoded as: + // sub sp, #0x2000 + // sub sp, #0x468 + if (neg) { + op = (op == kOpAdd) ? kOpSub : kOpAdd; + } + OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff))); + return OpRegImm64(op, r_dest_src1, abs_value & 0xfff); + } else if (LIKELY(A64_REG_IS_SP(r_dest_src1.GetReg()) && (op == kOpAdd || op == kOpSub))) { + // Note: "sub sp, sp, Xm" is not correct on arm64. + // We need special instructions for SP. + // Also operation on 32-bit SP should be avoided. + DCHECK(IS_WIDE(wide)); + RegStorage r_tmp = AllocTempWide(); + OpRegRegImm(kOpAdd, r_tmp, r_dest_src1, 0); + OpRegImm64(op, r_tmp, value); + return OpRegRegImm(kOpAdd, r_dest_src1, r_tmp, 0); } else { - RegStorage r_tmp = AllocTemp(); - LIR* res = LoadConstant(r_tmp, value); + RegStorage r_tmp; + LIR* res; + if (IS_WIDE(wide)) { + r_tmp = AllocTempWide(); + res = LoadConstantWide(r_tmp, value); + } else { + r_tmp = AllocTemp(); + res = LoadConstant(r_tmp, value); + } OpRegReg(op, r_dest_src1, r_tmp); FreeTemp(r_tmp); return res; @@ -683,9 +753,9 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), r_dest.GetReg(), 0, 0, 0, 0, data_target); - SetMemRefType(res, true, kLiteral); AppendLIR(res); return res; } @@ -905,7 +975,8 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor } // TODO: in future may need to differentiate Dalvik accesses w/ spills - if (r_base == rs_rA64_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rA64_SP); AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); } return load; @@ -986,7 +1057,8 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto } // TODO: In future, may need to differentiate Dalvik & spill accesses. - if (r_base == rs_rA64_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rA64_SP); AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); } return store; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 3fbbc4eba7..ec0fb43571 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -74,9 +74,9 @@ bool Mir2Lir::IsInexpensiveConstant(RegLocation rl_src) { void Mir2Lir::MarkSafepointPC(LIR* inst) { DCHECK(!inst->flags.use_def_invalid); - inst->u.m.def_mask = ENCODE_ALL; + inst->u.m.def_mask = &kEncodeAll; LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC); - DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL); + DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll)); } /* Remove a LIR from the list. */ @@ -108,37 +108,40 @@ void Mir2Lir::NopLIR(LIR* lir) { } void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) { - uint64_t *mask_ptr; - uint64_t mask = ENCODE_MEM; DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE)); DCHECK(!lir->flags.use_def_invalid); + // TODO: Avoid the extra Arena allocation! + const ResourceMask** mask_ptr; + ResourceMask mask; if (is_load) { mask_ptr = &lir->u.m.use_mask; } else { mask_ptr = &lir->u.m.def_mask; } + mask = **mask_ptr; /* Clear out the memref flags */ - *mask_ptr &= ~mask; + mask.ClearBits(kEncodeMem); /* ..and then add back the one we need */ switch (mem_type) { - case kLiteral: + case ResourceMask::kLiteral: DCHECK(is_load); - *mask_ptr |= ENCODE_LITERAL; + mask.SetBit(ResourceMask::kLiteral); break; - case kDalvikReg: - *mask_ptr |= ENCODE_DALVIK_REG; + case ResourceMask::kDalvikReg: + mask.SetBit(ResourceMask::kDalvikReg); break; - case kHeapRef: - *mask_ptr |= ENCODE_HEAP_REF; + case ResourceMask::kHeapRef: + mask.SetBit(ResourceMask::kHeapRef); break; - case kMustNotAlias: + case ResourceMask::kMustNotAlias: /* Currently only loads can be marked as kMustNotAlias */ DCHECK(!(GetTargetInstFlags(lir->opcode) & IS_STORE)); - *mask_ptr |= ENCODE_MUST_NOT_ALIAS; + mask.SetBit(ResourceMask::kMustNotAlias); break; default: LOG(FATAL) << "Oat: invalid memref kind - " << mem_type; } + *mask_ptr = mask_cache_.GetMask(mask); } /* @@ -146,7 +149,8 @@ void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) { */ void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit) { - SetMemRefType(lir, is_load, kDalvikReg); + DCHECK((is_load ? lir->u.m.use_mask : lir->u.m.def_mask)->Intersection(kEncodeMem).Equals( + kEncodeDalvikReg)); /* * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit @@ -241,10 +245,10 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { } if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use")); + DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.use_mask, "use")); } if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def")); + DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.def_mask, "def")); } } @@ -794,7 +798,7 @@ LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) { new_label->operands[0] = keyVal; new_label->flags.fixup = kFixupLabel; DCHECK(!new_label->flags.use_def_invalid); - new_label->u.m.def_mask = ENCODE_ALL; + new_label->u.m.def_mask = &kEncodeAll; InsertLIRAfter(boundary_lir, new_label); res = new_label; } @@ -972,7 +976,9 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena fp_spill_mask_(0), first_lir_insn_(NULL), last_lir_insn_(NULL), - slow_paths_(arena, 32, kGrowableArraySlowPaths) { + slow_paths_(arena, 32, kGrowableArraySlowPaths), + mem_ref_type_(ResourceMask::kHeapRef), + mask_cache_(arena) { // Reserve pointer id 0 for NULL. size_t null_idx = WrapPointer(NULL); DCHECK_EQ(null_idx, 0U); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 69ca7154e4..8f6d716ecb 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -44,7 +44,7 @@ void Mir2Lir::GenBarrier() { LIR* barrier = NewLIR0(kPseudoBarrier); /* Mark all resources as being clobbered */ DCHECK(!barrier->flags.use_def_invalid); - barrier->u.m.def_mask = ENCODE_ALL; + barrier->u.m.def_mask = &kEncodeAll; } void Mir2Lir::GenDivZeroException() { @@ -447,6 +447,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { for (int i = 0; i < elems; i++) { RegLocation loc = UpdateLoc(info->args[i]); if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg); } } @@ -484,7 +485,12 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { // Generate the copy loop. Going backwards for convenience LIR* target = NewLIR0(kPseudoTargetLabel); // Copy next element - LoadBaseIndexed(r_src, r_idx, r_val, 2, k32); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + LoadBaseIndexed(r_src, r_idx, r_val, 2, k32); + // NOTE: No dalvik register annotation, local optimizations will be stopped + // by the loop boundaries. + } StoreBaseIndexed(r_dst, r_idx, r_val, 2, k32); FreeTemp(r_val); OpDecAndBranch(kCondGe, r_idx, target); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index b7ea362be1..2af847c7df 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -493,6 +493,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { * end up half-promoted. In those cases, we must flush the promoted * half to memory as well. */ + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); for (int i = 0; i < cu_->num_ins; i++) { PromotionMap* v_map = &promotion_map_[start_vreg + i]; RegStorage reg = GetArgMappingToPhysicalReg(i); @@ -901,11 +902,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, } else { // kArg2 & rArg3 can safely be used here reg = TargetReg(kArg3); - Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg); + } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } - Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg); + } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); next_use++; @@ -929,12 +936,15 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, vtable_idx, direct_code, direct_method, type); } int outs_offset = (next_use + 1) * 4; - if (rl_arg.wide) { - StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64); - next_use += 2; - } else { - Store32Disp(TargetReg(kSp), outs_offset, arg_reg); - next_use++; + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.wide) { + StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64); + next_use += 2; + } else { + Store32Disp(TargetReg(kSp), outs_offset, arg_reg); + next_use++; + } } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); @@ -998,12 +1008,14 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (loc.wide) { loc = UpdateLocWide(loc); if ((next_arg >= 2) && (loc.location == kLocPhysReg)) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); } next_arg += 2; } else { loc = UpdateLoc(loc); if ((next_arg >= 3) && (loc.location == kLocPhysReg)) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg); } next_arg++; @@ -1026,24 +1038,32 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset); - LIR* ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack); + LIR* ld = nullptr; + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack); + } // TUNING: loosen barrier - ld->u.m.def_mask = ENCODE_ALL; - SetMemRefType(ld, true /* is_load */, kDalvikReg); + ld->u.m.def_mask = &kEncodeAll; call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4)); call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); - LIR* st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack); - SetMemRefType(st, false /* is_load */, kDalvikReg); - st->u.m.def_mask = ENCODE_ALL; + LIR* st = nullptr; + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack); + } + st->u.m.def_mask = &kEncodeAll; call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { int current_src_offset = start_offset; int current_dest_offset = outs_offset; + // Only davik regs are accessed in this loop; no next_call_insn() calls. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); while (regs_left_to_pass_via_stack > 0) { // This is based on the knowledge that the stack itself is 16-byte aligned. bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; @@ -1110,8 +1130,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); } else { // Set barrier for 128-bit load. - SetMemRefType(ld1, true /* is_load */, kDalvikReg); - ld1->u.m.def_mask = ENCODE_ALL; + ld1->u.m.def_mask = &kEncodeAll; } } if (st1 != nullptr) { @@ -1121,8 +1140,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); } else { // Set barrier for 128-bit store. - SetMemRefType(st1, false /* is_load */, kDalvikReg); - st1->u.m.def_mask = ENCODE_ALL; + st1->u.m.def_mask = &kEncodeAll; } } @@ -1310,6 +1328,9 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { RegStorage t_reg = AllocTemp(); OpRegReg(kOpNeg, t_reg, rl_result.reg); OpRegRegReg(kOpAdc, rl_result.reg, rl_result.reg, t_reg); + } else if (cu_->instruction_set == kArm64) { + OpRegImm(kOpSub, rl_result.reg, 1); + OpRegRegImm(kOpLsr, rl_result.reg, rl_result.reg, 31); } else { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); OpRegImm(kOpSub, rl_result.reg, 1); @@ -1330,6 +1351,11 @@ bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (size == k64) { RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg); + if (cu_->instruction_set == kArm64) { + OpRegReg(kOpRev, rl_result.reg, rl_i.reg); + StoreValueWide(rl_dest, rl_result); + return true; + } RegStorage r_i_low = rl_i.reg.GetLow(); if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { // First REV shall clobber rl_result.reg.GetReg(), save the value in a temp for the second REV. @@ -1428,8 +1454,15 @@ bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_dest = InlineTargetWide(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegCopyWide(rl_result.reg, rl_src.reg); - OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); + + if (cu_->instruction_set == kArm64) { + // TODO - Can ecode ? UBXF otherwise + // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff); + return false; + } else { + OpRegCopyWide(rl_result.reg, rl_src.reg); + OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); + } StoreValueWide(rl_dest, rl_result); return true; } diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 6ef793427c..6469d9c4f1 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -65,6 +65,7 @@ void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) { OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg); } else { // Lives in the frame, need to store. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32); } if (!zero_reg.Valid()) { @@ -90,6 +91,7 @@ void Mir2Lir::LoadValueDirect(RegLocation rl_src, RegStorage r_dest) { } else { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_src.ref) { LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest); } else { @@ -123,6 +125,7 @@ void Mir2Lir::LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest) { } else { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64); } } @@ -210,6 +213,7 @@ void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) { ResetDefLoc(rl_dest); if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) { def_start = last_lir_insn_; + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg); MarkClean(rl_dest); def_end = last_lir_insn_; @@ -296,6 +300,7 @@ void Mir2Lir::StoreValueWide(RegLocation rl_dest, RegLocation rl_src) { def_start = last_lir_insn_; DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64); MarkClean(rl_dest); def_end = last_lir_insn_; @@ -323,6 +328,7 @@ void Mir2Lir::StoreFinalValue(RegLocation rl_dest, RegLocation rl_src) { ResetDefLoc(rl_dest); if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) { LIR *def_start = last_lir_insn_; + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg); MarkClean(rl_dest); LIR *def_end = last_lir_insn_; @@ -358,6 +364,7 @@ void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) { LIR *def_start = last_lir_insn_; DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64); MarkClean(rl_dest); LIR *def_end = last_lir_insn_; diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index 4a918a138a..b97ff2a447 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -21,8 +21,8 @@ namespace art { #define DEBUG_OPT(X) /* Check RAW, WAR, and RAW dependency on the register operands */ -#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \ - ((use | def) & check->u.m.def_mask)) +#define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \ + (use.Union(def).Intersects(*check->u.m.def_mask)) /* Scheduler heuristics */ #define MAX_HOIST_DISTANCE 20 @@ -109,20 +109,23 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { bool is_this_lir_load = target_flags & IS_LOAD; LIR* check_lir; /* Use the mem mask to determine the rough memory location */ - uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM; + ResourceMask this_mem_mask = kEncodeMem.Intersection( + this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask)); /* * Currently only eliminate redundant ld/st for constant and Dalvik * register accesses. */ - if (!(this_mem_mask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) { + if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg))) { continue; } - uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; - uint64_t stop_use_reg_mask; + ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem); + ResourceMask stop_use_reg_mask; if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM; + // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask. + stop_use_reg_mask = ResourceMask::Bit(kIsBranch).Union(*this_lir->u.m.use_mask).Without( + kEncodeMem); } else { /* * Add pc to the resource mask to prevent this instruction @@ -130,7 +133,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * region bits since stop_mask is used to check data/control * dependencies. */ - stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM; + stop_use_reg_mask = GetPCUseDefEncoding().Union(*this_lir->u.m.use_mask).Without(kEncodeMem); } for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) { @@ -142,8 +145,9 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM; - uint64_t alias_condition = this_mem_mask & check_mem_mask; + ResourceMask check_mem_mask = kEncodeMem.Intersection( + check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask)); + ResourceMask alias_condition = this_mem_mask.Intersection(check_mem_mask); bool stop_here = false; /* @@ -153,9 +157,9 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { // TUNING: Support instructions with multiple register targets. if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) { stop_here = true; - } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + } else if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) { bool is_check_lir_load = check_flags & IS_LOAD; - if (alias_condition == ENCODE_LITERAL) { + if (alias_condition.Equals(kEncodeLiteral)) { /* * Should only see literal loads in the instruction * stream. @@ -175,7 +179,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { } NopLIR(check_lir); } - } else if (alias_condition == ENCODE_DALVIK_REG) { + } else if (alias_condition.Equals(kEncodeDalvikReg)) { /* Must alias */ if (check_lir->flags.alias_info == this_lir->flags.alias_info) { /* Only optimize compatible registers */ @@ -304,7 +308,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t stop_use_all_mask = this_lir->u.m.use_mask; + ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask; if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) { /* @@ -313,14 +317,14 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * locations are safe to be hoisted. So only mark the heap references * conservatively here. */ - if (stop_use_all_mask & ENCODE_HEAP_REF) { - stop_use_all_mask |= GetPCUseDefEncoding(); + if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) { + stop_use_all_mask.SetBits(GetPCUseDefEncoding()); } } /* Similar as above, but just check for pure register dependency */ - uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM; - uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; + ResourceMask stop_use_reg_mask = stop_use_all_mask.Without(kEncodeMem); + ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem); int next_slot = 0; bool stop_here = false; @@ -335,22 +339,22 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM; - uint64_t alias_condition = stop_use_all_mask & check_mem_mask; + ResourceMask check_mem_mask = check_lir->u.m.def_mask->Intersection(kEncodeMem); + ResourceMask alias_condition = stop_use_all_mask.Intersection(check_mem_mask); stop_here = false; /* Potential WAR alias seen - check the exact relation */ - if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) { /* We can fully disambiguate Dalvik references */ - if (alias_condition == ENCODE_DALVIK_REG) { - /* Must alias or partually overlap */ + if (alias_condition.Equals(kEncodeDalvikReg)) { + /* Must alias or partially overlap */ if ((check_lir->flags.alias_info == this_lir->flags.alias_info) || IsDalvikRegisterClobbered(this_lir, check_lir)) { stop_here = true; } /* Conservatively treat all heap refs as may-alias */ } else { - DCHECK_EQ(alias_condition, ENCODE_HEAP_REF); + DCHECK(alias_condition.Equals(kEncodeHeapRef)); stop_here = true; } /* Memory content may be updated. Stop looking now. */ @@ -413,7 +417,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { LIR* prev_lir = prev_inst_list[slot+1]; /* Check the highest instruction */ - if (prev_lir->u.m.def_mask == ENCODE_ALL) { + if (prev_lir->u.m.def_mask->Equals(kEncodeAll)) { /* * If the first instruction is a load, don't hoist anything * above it since it is unlikely to be beneficial. @@ -443,7 +447,8 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { */ bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false : (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD); - if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) { + if ((prev_is_load && (cur_lir->u.m.use_mask->Intersects(*prev_lir->u.m.def_mask))) || + (slot < LD_LATENCY)) { break; } } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index ea3c901fa6..62a7f2455c 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -63,7 +63,7 @@ class MipsMir2Lir FINAL : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -77,12 +77,13 @@ class MipsMir2Lir FINAL : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); AssemblerStatus AssembleInstructions(CodeOffset start_addr); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 381c7ce0aa..76b5243a12 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -120,60 +120,50 @@ RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { /* * Decode the register id. */ -uint64_t MipsMir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id = reg.GetRegNum(); - /* Each double register is equal to a pair of single-precision FP registers */ - if (reg.IsDouble()) { - seed = 0x3; - reg_id = reg_id << 1; - } else { - seed = 1; - } - /* FP register starts at bit position 32 */ - shift = reg.IsFloat() ? kMipsFPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); +ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const { + return reg.IsDouble() + /* Each double register is equal to a pair of single-precision FP registers */ + ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0) + : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kMipsFPReg0 : reg.GetRegNum()); } -uint64_t MipsMir2Lir::GetPCUseDefEncoding() { - return ENCODE_MIPS_REG_PC; +ResourceMask MipsMir2Lir::GetPCUseDefEncoding() const { + return ResourceMask::Bit(kMipsRegPC); } -void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK_EQ(cu_->instruction_set, kMips); DCHECK(!lir->flags.use_def_invalid); // Mips-specific resource map setup here. if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_SP; + def_mask->SetBit(kMipsRegSP); } if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_MIPS_REG_SP; + use_mask->SetBit(kMipsRegSP); } if (flags & REG_DEF_LR) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_LR; + def_mask->SetBit(kMipsRegLR); } if (flags & REG_DEF_HI) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_HI; + def_mask->SetBit(kMipsRegHI); } if (flags & REG_DEF_LO) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_LO; + def_mask->SetBit(kMipsRegLO); } if (flags & REG_USE_HI) { - lir->u.m.use_mask |= ENCODE_MIPS_REG_HI; + use_mask->SetBit(kMipsRegHI); } if (flags & REG_USE_LO) { - lir->u.m.use_mask |= ENCODE_MIPS_REG_LO; + use_mask->SetBit(kMipsRegLO); } } @@ -283,43 +273,43 @@ std::string MipsMir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned cha } // FIXME: need to redo resource maps for MIPS - fix this at that time -void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, uint64_t mask, const char *prefix) { +void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, const ResourceMask& mask, const char *prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; for (i = 0; i < kMipsRegEnd; i++) { - if (mask & (1ULL << i)) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } - if (mask & ENCODE_FP_STATUS) { + if (mask.HasBit(ResourceMask::kFPStatus)) { strcat(buf, "fpcc "); } /* Memory bits */ - if (mips_lir && (mask & ENCODE_DALVIK_REG)) { + if (mips_lir && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info), DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 2757b7be08..01b25f9291 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -534,7 +534,8 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora } } - if (r_base == rs_rMIPS_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rMIPS_SP); AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, true /* is_load */, pair /* is64bit */); if (pair) { @@ -634,7 +635,8 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, FreeTemp(r_scratch); } - if (r_base == rs_rMIPS_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rMIPS_SP); AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, false /* is_load */, pair /* is64bit */); if (pair) { diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 2f37520b59..9912101eb1 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -57,7 +57,7 @@ inline LIR* Mir2Lir::RawLIR(DexOffset dalvik_offset, int opcode, int op0, (opcode == kPseudoExportedPC)) { // Always make labels scheduling barriers DCHECK(!insn->flags.use_def_invalid); - insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL; + insn->u.m.use_mask = insn->u.m.def_mask = &kEncodeAll; } return insn; } @@ -140,19 +140,20 @@ inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1 /* * Mark the corresponding bit(s). */ -inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) { +inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) { DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0); DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg; - *mask |= reginfo_map_.Get(reg)->DefUseMask(); + *mask = mask->Union(reginfo_map_.Get(reg)->DefUseMask()); } /* * Set up the proper fields in the resource mask */ -inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) { +inline void Mir2Lir::SetupResourceMasks(LIR* lir) { int opcode = lir->opcode; if (IsPseudoLirOp(opcode)) { + lir->u.m.use_mask = lir->u.m.def_mask = &kEncodeNone; if (opcode != kPseudoBarrier) { lir->flags.fixup = kFixupLabel; } @@ -166,13 +167,27 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) { lir->flags.fixup = kFixupLabel; } - /* Get the starting size of the instruction's template */ + /* Get the starting size of the instruction's template. */ lir->flags.size = GetInsnSize(lir); estimated_native_code_size_ += lir->flags.size; - /* Set up the mask for resources that are updated */ - if (!leave_mem_ref && (flags & (IS_LOAD | IS_STORE))) { - /* Default to heap - will catch specialized classes later */ - SetMemRefType(lir, flags & IS_LOAD, kHeapRef); + + /* Set up the mask for resources. */ + ResourceMask use_mask; + ResourceMask def_mask; + + if (flags & (IS_LOAD | IS_STORE)) { + /* Set memory reference type (defaults to heap, overridden by ScopedMemRefType). */ + if (flags & IS_LOAD) { + use_mask.SetBit(mem_ref_type_); + } else { + /* Currently only loads can be marked as kMustNotAlias. */ + DCHECK(mem_ref_type_ != ResourceMask::kMustNotAlias); + } + if (flags & IS_STORE) { + /* Literals cannot be written to. */ + DCHECK(mem_ref_type_ != ResourceMask::kLiteral); + def_mask.SetBit(mem_ref_type_); + } } /* @@ -180,52 +195,55 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) { * turn will trash everything. */ if (flags & IS_BRANCH) { - lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL; + lir->u.m.def_mask = lir->u.m.use_mask = &kEncodeAll; return; } if (flags & REG_DEF0) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[0]); + SetupRegMask(&def_mask, lir->operands[0]); } if (flags & REG_DEF1) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[1]); + SetupRegMask(&def_mask, lir->operands[1]); } if (flags & REG_DEF2) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[2]); + SetupRegMask(&def_mask, lir->operands[2]); } if (flags & REG_USE0) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[0]); + SetupRegMask(&use_mask, lir->operands[0]); } if (flags & REG_USE1) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[1]); + SetupRegMask(&use_mask, lir->operands[1]); } if (flags & REG_USE2) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[2]); + SetupRegMask(&use_mask, lir->operands[2]); } if (flags & REG_USE3) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[3]); + SetupRegMask(&use_mask, lir->operands[3]); } if (flags & REG_USE4) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[4]); + SetupRegMask(&use_mask, lir->operands[4]); } if (flags & SETS_CCODES) { - lir->u.m.def_mask |= ENCODE_CCODE; + def_mask.SetBit(ResourceMask::kCCode); } if (flags & USES_CCODES) { - lir->u.m.use_mask |= ENCODE_CCODE; + use_mask.SetBit(ResourceMask::kCCode); } // Handle target-specific actions - SetupTargetResourceMasks(lir, flags); + SetupTargetResourceMasks(lir, flags, &def_mask, &use_mask); + + lir->u.m.use_mask = mask_cache_.GetMask(use_mask); + lir->u.m.def_mask = mask_cache_.GetMask(def_mask); } inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index a85be5e90c..40205eabd6 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -68,6 +68,7 @@ void Mir2Lir::LockArg(int in_position, bool wide) { // TODO: needs revisit for 64-bit. RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); if (cu_->instruction_set == kX86) { @@ -159,6 +160,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) } void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); if (cu_->instruction_set == kX86) { /* @@ -1171,7 +1173,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { head_lir = &block_label_list_[bb->id]; // Set the first label as a scheduling barrier. DCHECK(!head_lir->flags.use_def_invalid); - head_lir->u.m.def_mask = ENCODE_ALL; + head_lir->u.m.def_mask = &kEncodeAll; } if (opcode == kMirOpCheck) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 9718acde6c..ca4d0e48bf 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -23,6 +23,7 @@ #include "dex/compiler_ir.h" #include "dex/reg_storage.h" #include "dex/backend.h" +#include "dex/quick/resource_mask.h" #include "driver/compiler_driver.h" #include "leb128.h" #include "safe_map.h" @@ -136,8 +137,8 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, typedef std::vector<uint8_t> CodeBuffer; struct UseDefMasks { - uint64_t use_mask; // Resource mask for use. - uint64_t def_mask; // Resource mask for def. + const ResourceMask* use_mask; // Resource mask for use. + const ResourceMask* def_mask; // Resource mask for def. }; struct AssemblyInfo { @@ -188,20 +189,6 @@ Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_grap #define DECODE_ALIAS_INFO_WIDE(X) ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0) #define ENCODE_ALIAS_INFO(REG, ISWIDE) (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0)) -// Common resource macros. -#define ENCODE_CCODE (1ULL << kCCode) -#define ENCODE_FP_STATUS (1ULL << kFPStatus) - -// Abstract memory locations. -#define ENCODE_DALVIK_REG (1ULL << kDalvikReg) -#define ENCODE_LITERAL (1ULL << kLiteral) -#define ENCODE_HEAP_REF (1ULL << kHeapRef) -#define ENCODE_MUST_NOT_ALIAS (1ULL << kMustNotAlias) - -#define ENCODE_ALL (~0ULL) -#define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \ - ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS) - #define ENCODE_REG_PAIR(low_reg, high_reg) ((low_reg & 0xff) | ((high_reg & 0xff) << 8)) #define DECODE_REG_PAIR(both_regs, low_reg, high_reg) \ do { \ @@ -327,7 +314,7 @@ class Mir2Lir : public Backend { */ class RegisterInfo { public: - RegisterInfo(RegStorage r, uint64_t mask = ENCODE_ALL); + RegisterInfo(RegStorage r, const ResourceMask& mask = kEncodeAll); ~RegisterInfo() {} static void* operator new(size_t size, ArenaAllocator* arena) { return arena->Alloc(size, kArenaAllocRegAlloc); @@ -378,8 +365,8 @@ class Mir2Lir : public Backend { RegStorage Partner() { return partner_; } void SetPartner(RegStorage partner) { partner_ = partner; } int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; } - uint64_t DefUseMask() { return def_use_mask_; } - void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; } + const ResourceMask& DefUseMask() { return def_use_mask_; } + void SetDefUseMask(const ResourceMask& def_use_mask) { def_use_mask_ = def_use_mask; } RegisterInfo* Master() { return master_; } void SetMaster(RegisterInfo* master) { master_ = master; @@ -417,7 +404,7 @@ class Mir2Lir : public Backend { bool aliased_; // Is this the master for other aliased RegisterInfo's? RegStorage partner_; // If wide_value, other reg of pair or self if 64-bit register. int s_reg_; // Name of live value. - uint64_t def_use_mask_; // Resources for this element. + ResourceMask def_use_mask_; // Resources for this element. uint32_t used_storage_; // 1 bit per 4 bytes of storage. Unused by aliases. uint32_t liveness_; // 1 bit per 4 bytes of storage. Unused by aliases. RegisterInfo* master_; // Pointer to controlling storage mask. @@ -539,6 +526,26 @@ class Mir2Lir : public Backend { LIR* const cont_; }; + // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_. + class ScopedMemRefType { + public: + ScopedMemRefType(Mir2Lir* m2l, ResourceMask::ResourceBit new_mem_ref_type) + : m2l_(m2l), + old_mem_ref_type_(m2l->mem_ref_type_) { + m2l_->mem_ref_type_ = new_mem_ref_type; + } + + ~ScopedMemRefType() { + m2l_->mem_ref_type_ = old_mem_ref_type_; + } + + private: + Mir2Lir* const m2l_; + ResourceMask::ResourceBit old_mem_ref_type_; + + DISALLOW_COPY_AND_ASSIGN(ScopedMemRefType); + }; + virtual ~Mir2Lir() {} int32_t s4FromSwitchData(const void* switch_data) { @@ -625,10 +632,10 @@ class Mir2Lir : public Backend { virtual void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); - void SetupResourceMasks(LIR* lir, bool leave_mem_ref = false); + void SetupResourceMasks(LIR* lir); void SetMemRefType(LIR* lir, bool is_load, int mem_type); void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit); - void SetupRegMask(uint64_t* mask, int reg); + void SetupRegMask(ResourceMask* mask, int reg); void DumpLIRInsn(LIR* arg, unsigned char* base_addr); void DumpPromotionMap(); void CodegenDump(); @@ -945,7 +952,7 @@ class Mir2Lir : public Backend { bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty); bool GenInlinedReverseBytes(CallInfo* info, OpSize size); bool GenInlinedAbsInt(CallInfo* info); - bool GenInlinedAbsLong(CallInfo* info); + virtual bool GenInlinedAbsLong(CallInfo* info); bool GenInlinedAbsFloat(CallInfo* info); bool GenInlinedAbsDouble(CallInfo* info); bool GenInlinedFloatCvt(CallInfo* info); @@ -1136,7 +1143,7 @@ class Mir2Lir : public Backend { virtual RegLocation LocCReturnDouble() = 0; virtual RegLocation LocCReturnFloat() = 0; virtual RegLocation LocCReturnWide() = 0; - virtual uint64_t GetRegMaskCommon(RegStorage reg) = 0; + virtual ResourceMask GetRegMaskCommon(const RegStorage& reg) const = 0; virtual void AdjustSpillMask() = 0; virtual void ClobberCallerSave() = 0; virtual void FreeCallTemps() = 0; @@ -1147,12 +1154,13 @@ class Mir2Lir : public Backend { // Required for target - miscellaneous. virtual void AssembleLIR() = 0; - virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0; - virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0; + virtual void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) = 0; + virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) = 0; virtual const char* GetTargetInstFmt(int opcode) = 0; virtual const char* GetTargetInstName(int opcode) = 0; virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0; - virtual uint64_t GetPCUseDefEncoding() = 0; + virtual ResourceMask GetPCUseDefEncoding() const = 0; virtual uint64_t GetTargetInstFlags(int opcode) = 0; virtual int GetInsnSize(LIR* lir) = 0; virtual bool IsUnconditionalBranch(LIR* lir) = 0; @@ -1576,6 +1584,17 @@ class Mir2Lir : public Backend { LIR* last_lir_insn_; GrowableArray<LIRSlowPath*> slow_paths_; + + // The memory reference type for new LIRs. + // NOTE: Passing this as an explicit parameter by all functions that directly or indirectly + // invoke RawLIR() would clutter the code and reduce the readability. + ResourceMask::ResourceBit mem_ref_type_; + + // Each resource mask now takes 16-bytes, so having both use/def masks directly in a LIR + // would consume 32 bytes per LIR. Instead, the LIR now holds only pointers to the masks + // (i.e. 8 bytes on 32-bit arch, 16 bytes on 64-bit arch) and we use ResourceMaskCache + // to deduplicate the masks. + ResourceMaskCache mask_cache_; }; // Class Mir2Lir } // namespace art diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index bbeef50d73..cae59c88c1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -38,7 +38,7 @@ void Mir2Lir::ResetRegPool() { } } -Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask) +Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, const ResourceMask& mask) : reg_(r), is_temp_(false), wide_value_(false), dirty_(false), aliased_(false), partner_(r), s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this), def_start_(nullptr), def_end_(nullptr), alias_chain_(nullptr) { @@ -82,22 +82,22 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, } // Construct the register pool. - for (RegStorage reg : core_regs) { + for (const RegStorage& reg : core_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); core_regs_.Insert(info); } - for (RegStorage reg : core64_regs) { + for (const RegStorage& reg : core64_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); core64_regs_.Insert(info); } - for (RegStorage reg : sp_regs) { + for (const RegStorage& reg : sp_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); sp_regs_.Insert(info); } - for (RegStorage reg : dp_regs) { + for (const RegStorage& reg : dp_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); dp_regs_.Insert(info); @@ -126,7 +126,7 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, } // Add an entry for InvalidReg with zero'd mask. - RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0); + RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), kEncodeNone); m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg); // Existence of core64 registers implies wide references. @@ -734,6 +734,7 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { info1 = info2; } int v_reg = mir_graph_->SRegToVReg(info1->SReg()); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64); } } else { @@ -741,6 +742,7 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { if (info->IsLive() && info->IsDirty()) { info->SetIsDirty(false); int v_reg = mir_graph_->SRegToVReg(info->SReg()); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64); } } @@ -752,6 +754,7 @@ void Mir2Lir::FlushReg(RegStorage reg) { if (info->IsLive() && info->IsDirty()) { info->SetIsDirty(false); int v_reg = mir_graph_->SRegToVReg(info->SReg()); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord); } } diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc new file mode 100644 index 0000000000..17995fbf79 --- /dev/null +++ b/compiler/dex/quick/resource_mask.cc @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iomanip> + +#include "resource_mask.h" + +#include "utils/arena_allocator.h" + +namespace art { + +namespace { // anonymous namespace + +constexpr ResourceMask kNoRegMasks[] = { + kEncodeNone, + kEncodeHeapRef, + kEncodeLiteral, + kEncodeDalvikReg, + ResourceMask::Bit(ResourceMask::kFPStatus), + ResourceMask::Bit(ResourceMask::kCCode), +}; +// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set. +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kHeapRef].Equals( + kEncodeHeapRef), check_kNoRegMasks_heap_ref_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kLiteral].Equals( + kEncodeLiteral), check_kNoRegMasks_literal_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kDalvikReg].Equals( + kEncodeDalvikReg), check_kNoRegMasks_dalvik_reg_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kFPStatus].Equals( + ResourceMask::Bit(ResourceMask::kFPStatus)), check_kNoRegMasks_fp_status_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kCCode].Equals( + ResourceMask::Bit(ResourceMask::kCCode)), check_kNoRegMasks_ccode_index); + +template <size_t special_bit> +constexpr ResourceMask OneRegOneSpecial(size_t reg) { + return ResourceMask::Bit(reg).Union(ResourceMask::Bit(special_bit)); +} + +// NOTE: Working around gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61484 . +// This should be a two-dimensions array, kSingleRegMasks[][32] and each line should be +// enclosed in an extra { }. However, gcc issues a bogus "error: array must be initialized +// with a brace-enclosed initializer" for that, so we flatten this to a one-dimensional array. +constexpr ResourceMask kSingleRegMasks[] = { +#define DEFINE_LIST_32(fn) \ + fn(0), fn(1), fn(2), fn(3), fn(4), fn(5), fn(6), fn(7), \ + fn(8), fn(9), fn(10), fn(11), fn(12), fn(13), fn(14), fn(15), \ + fn(16), fn(17), fn(18), fn(19), fn(20), fn(21), fn(22), fn(23), \ + fn(24), fn(25), fn(26), fn(27), fn(28), fn(29), fn(30), fn(31) + // NOTE: Each line is 512B of constant data, 3KiB in total. + DEFINE_LIST_32(ResourceMask::Bit), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kHeapRef>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kLiteral>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kDalvikReg>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kFPStatus>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kCCode>), +#undef DEFINE_LIST_32 +}; + +constexpr size_t SingleRegMaskIndex(size_t main_index, size_t sub_index) { + return main_index * 32u + sub_index; +} + +// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set. +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kHeapRef, 0)].Equals( + OneRegOneSpecial<ResourceMask::kHeapRef>(0)), check_kSingleRegMasks_heap_ref_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kLiteral, 0)].Equals( + OneRegOneSpecial<ResourceMask::kLiteral>(0)), check_kSingleRegMasks_literal_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kDalvikReg, 0)].Equals( + OneRegOneSpecial<ResourceMask::kDalvikReg>(0)), check_kSingleRegMasks_dalvik_reg_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kFPStatus, 0)].Equals( + OneRegOneSpecial<ResourceMask::kFPStatus>(0)), check_kSingleRegMasks_fp_status_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kCCode, 0)].Equals( + OneRegOneSpecial<ResourceMask::kCCode>(0)), check_kSingleRegMasks_ccode_index); + +// NOTE: arraysize(kNoRegMasks) multiplied by 32 due to the gcc bug workaround, see above. +COMPILE_ASSERT(arraysize(kSingleRegMasks) == arraysize(kNoRegMasks) * 32, check_arraysizes); + +constexpr ResourceMask kTwoRegsMasks[] = { +#define TWO(a, b) ResourceMask::Bit(a).Union(ResourceMask::Bit(b)) + // NOTE: 16 * 15 / 2 = 120 entries, 16 bytes each, 1920B in total. + TWO(0, 1), + TWO(0, 2), TWO(1, 2), + TWO(0, 3), TWO(1, 3), TWO(2, 3), + TWO(0, 4), TWO(1, 4), TWO(2, 4), TWO(3, 4), + TWO(0, 5), TWO(1, 5), TWO(2, 5), TWO(3, 5), TWO(4, 5), + TWO(0, 6), TWO(1, 6), TWO(2, 6), TWO(3, 6), TWO(4, 6), TWO(5, 6), + TWO(0, 7), TWO(1, 7), TWO(2, 7), TWO(3, 7), TWO(4, 7), TWO(5, 7), TWO(6, 7), + TWO(0, 8), TWO(1, 8), TWO(2, 8), TWO(3, 8), TWO(4, 8), TWO(5, 8), TWO(6, 8), TWO(7, 8), + TWO(0, 9), TWO(1, 9), TWO(2, 9), TWO(3, 9), TWO(4, 9), TWO(5, 9), TWO(6, 9), TWO(7, 9), + TWO(8, 9), + TWO(0, 10), TWO(1, 10), TWO(2, 10), TWO(3, 10), TWO(4, 10), TWO(5, 10), TWO(6, 10), TWO(7, 10), + TWO(8, 10), TWO(9, 10), + TWO(0, 11), TWO(1, 11), TWO(2, 11), TWO(3, 11), TWO(4, 11), TWO(5, 11), TWO(6, 11), TWO(7, 11), + TWO(8, 11), TWO(9, 11), TWO(10, 11), + TWO(0, 12), TWO(1, 12), TWO(2, 12), TWO(3, 12), TWO(4, 12), TWO(5, 12), TWO(6, 12), TWO(7, 12), + TWO(8, 12), TWO(9, 12), TWO(10, 12), TWO(11, 12), + TWO(0, 13), TWO(1, 13), TWO(2, 13), TWO(3, 13), TWO(4, 13), TWO(5, 13), TWO(6, 13), TWO(7, 13), + TWO(8, 13), TWO(9, 13), TWO(10, 13), TWO(11, 13), TWO(12, 13), + TWO(0, 14), TWO(1, 14), TWO(2, 14), TWO(3, 14), TWO(4, 14), TWO(5, 14), TWO(6, 14), TWO(7, 14), + TWO(8, 14), TWO(9, 14), TWO(10, 14), TWO(11, 14), TWO(12, 14), TWO(13, 14), + TWO(0, 15), TWO(1, 15), TWO(2, 15), TWO(3, 15), TWO(4, 15), TWO(5, 15), TWO(6, 15), TWO(7, 15), + TWO(8, 15), TWO(9, 15), TWO(10, 15), TWO(11, 15), TWO(12, 15), TWO(13, 15), TWO(14, 15), +#undef TWO +}; +COMPILE_ASSERT(arraysize(kTwoRegsMasks) == 16 * 15 / 2, check_arraysize_kTwoRegsMasks); + +constexpr size_t TwoRegsIndex(size_t higher, size_t lower) { + return (higher * (higher - 1)) / 2u + lower; +} + +constexpr bool CheckTwoRegsMask(size_t higher, size_t lower) { + return ResourceMask::Bit(lower).Union(ResourceMask::Bit(higher)).Equals( + kTwoRegsMasks[TwoRegsIndex(higher, lower)]); +} + +constexpr bool CheckTwoRegsMaskLine(size_t line, size_t lower = 0u) { + return (lower == line) || + (CheckTwoRegsMask(line, lower) && CheckTwoRegsMaskLine(line, lower + 1u)); +} + +constexpr bool CheckTwoRegsMaskTable(size_t lines) { + return lines == 0 || + (CheckTwoRegsMaskLine(lines - 1) && CheckTwoRegsMaskTable(lines - 1u)); +} + +COMPILE_ASSERT(CheckTwoRegsMaskTable(16), check_two_regs_masks_table); + +} // anonymous namespace + +const ResourceMask* ResourceMaskCache::GetMask(const ResourceMask& mask) { + // Instead of having a deduplication map, we shall just use pre-defined constexpr + // masks for the common cases. At most one of the these special bits is allowed: + constexpr ResourceMask kAllowedSpecialBits = ResourceMask::Bit(ResourceMask::kFPStatus) + .Union(ResourceMask::Bit(ResourceMask::kCCode)) + .Union(kEncodeHeapRef).Union(kEncodeLiteral).Union(kEncodeDalvikReg); + const ResourceMask* res = nullptr; + // Limit to low 32 regs and the kAllowedSpecialBits. + if ((mask.masks_[0] >> 32) == 0u && (mask.masks_[1] & ~kAllowedSpecialBits.masks_[1]) == 0u) { + // Check if it's only up to two registers. + uint32_t low_regs = static_cast<uint32_t>(mask.masks_[0]); + uint32_t low_regs_without_lowest = low_regs & (low_regs - 1u); + if (low_regs_without_lowest == 0u && IsPowerOfTwo(mask.masks_[1])) { + // 0 or 1 register, 0 or 1 bit from kAllowedBits. Use a pre-defined mask. + size_t index = (mask.masks_[1] != 0u) ? CLZ(mask.masks_[1]) : 0u; + DCHECK_LT(index, arraysize(kNoRegMasks)); + res = (low_regs != 0) ? &kSingleRegMasks[SingleRegMaskIndex(index, CTZ(low_regs))] + : &kNoRegMasks[index]; + } else if (IsPowerOfTwo(low_regs_without_lowest) && mask.masks_[1] == 0u) { + // 2 registers and no other flags. Use predefined mask if higher reg is < 16. + if (low_regs_without_lowest < (1u << 16)) { + res = &kTwoRegsMasks[TwoRegsIndex(CTZ(low_regs_without_lowest), CTZ(low_regs))]; + } + } + } else if (mask.Equals(kEncodeAll)) { + res = &kEncodeAll; + } + if (res != nullptr) { + DCHECK(res->Equals(mask)) + << "(" << std::hex << std::setw(16) << mask.masks_[0] + << ", "<< std::hex << std::setw(16) << mask.masks_[1] + << ") != (" << std::hex << std::setw(16) << res->masks_[0] + << ", "<< std::hex << std::setw(16) << res->masks_[1] << ")"; + return res; + } + + // TODO: Deduplicate. (At least the most common masks.) + void* mem = allocator_->Alloc(sizeof(ResourceMask), kArenaAllocLIRResourceMask); + return new (mem) ResourceMask(mask); +} + +} // namespace art diff --git a/compiler/dex/quick/resource_mask.h b/compiler/dex/quick/resource_mask.h new file mode 100644 index 0000000000..12ce98adc4 --- /dev/null +++ b/compiler/dex/quick/resource_mask.h @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_ +#define ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_ + +#include <stdint.h> + +#include "base/logging.h" +#include "dex/reg_storage.h" + +namespace art { + +class ArenaAllocator; + +/** + * @brief Resource mask for LIR insn uses or defs. + * @detail Def/Use mask used for checking dependencies between LIR insns in local + * optimizations such as load hoisting. + */ +class ResourceMask { + private: + constexpr ResourceMask(uint64_t mask1, uint64_t mask2) + : masks_{ mask1, mask2 } { // NOLINT + } + + public: + /* + * Def/Use encoding in 128-bit use_mask/def_mask. Low positions used for target-specific + * registers (and typically use the register number as the position). High positions + * reserved for common and abstract resources. + */ + enum ResourceBit { + kMustNotAlias = 127, + kHeapRef = 126, // Default memory reference type. + kLiteral = 125, // Literal pool memory reference. + kDalvikReg = 124, // Dalvik v_reg memory reference. + kFPStatus = 123, + kCCode = 122, + kLowestCommonResource = kCCode, + kHighestCommonResource = kMustNotAlias + }; + + // Default-constructible. + constexpr ResourceMask() + : masks_ { 0u, 0u } { + } + + // Copy-constructible and copyable. + ResourceMask(const ResourceMask& other) = default; + ResourceMask& operator=(const ResourceMask& other) = default; + + static constexpr ResourceMask RawMask(uint64_t mask1, uint64_t mask2) { + return ResourceMask(mask1, mask2); + } + + static constexpr ResourceMask Bit(size_t bit) { + return ResourceMask(bit >= 64u ? 0u : UINT64_C(1) << bit, + bit >= 64u ? UINT64_C(1) << (bit - 64u) : 0u); + } + + // Two consecutive bits. The start_bit must be even. + static constexpr ResourceMask TwoBits(size_t start_bit) { + return + DCHECK_CONSTEXPR((start_bit & 1u) == 0u, << start_bit << " isn't even", Bit(0)) + ResourceMask(start_bit >= 64u ? 0u : UINT64_C(3) << start_bit, + start_bit >= 64u ? UINT64_C(3) << (start_bit - 64u) : 0u); + } + + static constexpr ResourceMask NoBits() { + return ResourceMask(UINT64_C(0), UINT64_C(0)); + } + + static constexpr ResourceMask AllBits() { + return ResourceMask(~UINT64_C(0), ~UINT64_C(0)); + } + + constexpr ResourceMask Union(const ResourceMask& other) const { + return ResourceMask(masks_[0] | other.masks_[0], masks_[1] | other.masks_[1]); + } + + constexpr ResourceMask Intersection(const ResourceMask& other) const { + return ResourceMask(masks_[0] & other.masks_[0], masks_[1] & other.masks_[1]); + } + + constexpr ResourceMask Without(const ResourceMask& other) const { + return ResourceMask(masks_[0] & ~other.masks_[0], masks_[1] & ~other.masks_[1]); + } + + constexpr bool Equals(const ResourceMask& other) const { + return masks_[0] == other.masks_[0] && masks_[1] == other.masks_[1]; + } + + constexpr bool Intersects(const ResourceMask& other) const { + return (masks_[0] & other.masks_[0]) != 0u || (masks_[1] & other.masks_[1]) != 0u; + } + + void SetBit(size_t bit) { + DCHECK_LE(bit, kHighestCommonResource); + masks_[bit / 64u] |= UINT64_C(1) << (bit & 63u); + } + + constexpr bool HasBit(size_t bit) const { + return (masks_[bit / 64u] & (UINT64_C(1) << (bit & 63u))) != 0u; + } + + ResourceMask& SetBits(const ResourceMask& other) { + masks_[0] |= other.masks_[0]; + masks_[1] |= other.masks_[1]; + return *this; + } + + ResourceMask& ClearBits(const ResourceMask& other) { + masks_[0] &= ~other.masks_[0]; + masks_[1] &= ~other.masks_[1]; + return *this; + } + + private: + uint64_t masks_[2]; + + friend class ResourceMaskCache; +}; + +constexpr ResourceMask kEncodeNone = ResourceMask::NoBits(); +constexpr ResourceMask kEncodeAll = ResourceMask::AllBits(); +constexpr ResourceMask kEncodeHeapRef = ResourceMask::Bit(ResourceMask::kHeapRef); +constexpr ResourceMask kEncodeLiteral = ResourceMask::Bit(ResourceMask::kLiteral); +constexpr ResourceMask kEncodeDalvikReg = ResourceMask::Bit(ResourceMask::kDalvikReg); +constexpr ResourceMask kEncodeMem = kEncodeLiteral.Union(kEncodeDalvikReg).Union( + kEncodeHeapRef).Union(ResourceMask::Bit(ResourceMask::kMustNotAlias)); + +class ResourceMaskCache { + public: + explicit ResourceMaskCache(ArenaAllocator* allocator) + : allocator_(allocator) { + } + + const ResourceMask* GetMask(const ResourceMask& mask); + + private: + ArenaAllocator* allocator_; +}; + +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_ diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 0a8193af35..d37ee67647 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -1541,7 +1541,9 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { << " delta: " << delta << " old delta: " << lir->operands[0]; } lir->opcode = kX86Jcc32; - SetupResourceMasks(lir); + lir->flags.size = GetInsnSize(lir); + DCHECK(lir->u.m.def_mask->Equals(kEncodeAll)); + DCHECK(lir->u.m.use_mask->Equals(kEncodeAll)); res = kRetryAll; } if (kVerbosePcFixup) { @@ -1605,7 +1607,9 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { LOG(INFO) << "Retry for JMP growth at " << lir->offset; } lir->opcode = kX86Jmp32; - SetupResourceMasks(lir); + lir->flags.size = GetInsnSize(lir); + DCHECK(lir->u.m.def_mask->Equals(kEncodeAll)); + DCHECK(lir->u.m.use_mask->Equals(kEncodeAll)); res = kRetryAll; } lir->operands[0] = delta; diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 61c9f4f041..6ae553dab3 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -99,7 +99,7 @@ class X86Mir2Lir : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -113,12 +113,13 @@ class X86Mir2Lir : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); AssemblerStatus AssembleInstructions(CodeOffset start_addr); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index c3580f76ae..ced64009e6 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -147,6 +147,9 @@ void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_do // Update the in-register state of source. rl_src = UpdateLocWide(rl_src); + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + // If the source is in physical register, then put it in its location on stack. if (rl_src.location == kLocPhysReg) { RegisterInfo* reg_info = GetRegInfo(rl_src.reg); @@ -191,15 +194,12 @@ void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_do * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the * correct register class. */ + rl_result = EvalLoc(rl_dest, kFPReg, true); if (is_double) { - rl_result = EvalLocWide(rl_dest, kFPReg, true); - LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64); StoreFinalValueWide(rl_dest, rl_result); } else { - rl_result = EvalLoc(rl_dest, kFPReg, true); - Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg); StoreFinalValue(rl_dest, rl_result); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index a050a05466..4a77df2198 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -794,34 +794,61 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX); LoadValueDirectWideFixed(rl_src_expected, r_tmp1); LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); - NewLIR1(kX86Push32R, rs_rDI.GetReg()); - MarkTemp(rs_rDI); - LockTemp(rs_rDI); - NewLIR1(kX86Push32R, rs_rSI.GetReg()); - MarkTemp(rs_rSI); - LockTemp(rs_rSI); - const int push_offset = 4 /* push edi */ + 4 /* push esi */; - int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0 - : (IsInReg(this, rl_src_obj, rs_rDI) ? 4 - : (SRegOffset(rl_src_obj.s_reg_low) + push_offset)); // FIXME: needs 64-bit update. - LoadWordDisp(TargetReg(kSp), srcObjSp, rs_rDI); - int srcOffsetSp = IsInReg(this, rl_src_offset, rs_rSI) ? 0 - : (IsInReg(this, rl_src_offset, rs_rDI) ? 4 - : (SRegOffset(rl_src_offset.s_reg_low) + push_offset)); - LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI); - NewLIR4(kX86LockCmpxchg64A, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0); + const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI); + const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI); + DCHECK(!obj_in_si || !obj_in_di); + const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI); + const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI); + DCHECK(!off_in_si || !off_in_di); + // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg. + RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI; + RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI; + bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI); + bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI); + if (push_di) { + NewLIR1(kX86Push32R, rs_rDI.GetReg()); + MarkTemp(rs_rDI); + LockTemp(rs_rDI); + } + if (push_si) { + NewLIR1(kX86Push32R, rs_rSI.GetReg()); + MarkTemp(rs_rSI); + LockTemp(rs_rSI); + } + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); + if (!obj_in_si && !obj_in_di) { + LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj); + // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. + DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); + int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; + AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); + } + if (!off_in_si && !off_in_di) { + LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off); + // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. + DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); + int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; + AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); + } + NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0); // After a store we need to insert barrier in case of potential load. Since the // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. GenMemBarrier(kStoreLoad); - FreeTemp(rs_rSI); - UnmarkTemp(rs_rSI); - NewLIR1(kX86Pop32R, rs_rSI.GetReg()); - FreeTemp(rs_rDI); - UnmarkTemp(rs_rDI); - NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + + if (push_si) { + FreeTemp(rs_rSI); + UnmarkTemp(rs_rSI); + NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + } + if (push_di) { + FreeTemp(rs_rDI); + UnmarkTemp(rs_rDI); + NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + } FreeCallTemps(); } else { // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. @@ -885,11 +912,11 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256, 0, 0, target); res->target = target; res->flags.fixup = kFixupLoad; - SetMemRefType(res, true, kLiteral); store_method_addr_used_ = true; return res; } @@ -1077,6 +1104,9 @@ void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { } void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + LIR *m; switch (val) { case 0: @@ -1095,6 +1125,9 @@ void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (Gen64Bit()) { if (rl_src1.is_const) { std::swap(rl_src1, rl_src2); @@ -1346,6 +1379,7 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_src.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); @@ -1379,6 +1413,7 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instructi int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, @@ -2061,6 +2096,7 @@ bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); X86OpCode x86op = GetOpcode(op, rl_dest, false, val); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, @@ -2091,6 +2127,7 @@ bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (!IsNoOp(op, val_lo)) { X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); @@ -2469,6 +2506,9 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, return; } + // If we generate any memory access below, it will reference a dalvik reg. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (unary) { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = UpdateLocTyped(rl_dest, kCoreReg); @@ -2620,6 +2660,7 @@ void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); } else { int displacement = SRegOffset(rl_src.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, @@ -2670,6 +2711,7 @@ void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, rl_result = UpdateLocWideTyped(rl_dest, kCoreReg); if (rl_result.location != kLocPhysReg) { // Okay, we can do this into memory + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); OpMemReg(op, rl_result, t_reg.GetReg()); } else if (!rl_result.reg.IsFloat()) { // Can do this directly into the result register diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index ec165af865..d1ba2398c5 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -206,77 +206,70 @@ RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { /* * Decode the register id. */ -uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id; - - reg_id = reg.GetRegNum(); - /* Double registers in x86 are just a single FP register */ - seed = 1; - /* FP register starts at bit position 16 */ - shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); -} - -uint64_t X86Mir2Lir::GetPCUseDefEncoding() { +ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const { + /* Double registers in x86 are just a single FP register. This is always just a single bit. */ + return ResourceMask::Bit( + /* FP register starts at bit position 16 */ + ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum()); +} + +ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const { /* * FIXME: might make sense to use a virtual resource encoding bit for pc. Might be * able to clean up some of the x86/Arm_Mips differences */ LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86"; - return 0ULL; + return kEncodeNone; } -void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); DCHECK(!lir->flags.use_def_invalid); // X86-specific resource map setup here. if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_X86_REG_SP; + use_mask->SetBit(kX86RegSP); } if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_X86_REG_SP; + def_mask->SetBit(kX86RegSP); } if (flags & REG_DEFA) { - SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg()); + SetupRegMask(def_mask, rs_rAX.GetReg()); } if (flags & REG_DEFD) { - SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg()); + SetupRegMask(def_mask, rs_rDX.GetReg()); } if (flags & REG_USEA) { - SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg()); + SetupRegMask(use_mask, rs_rAX.GetReg()); } if (flags & REG_USEC) { - SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg()); + SetupRegMask(use_mask, rs_rCX.GetReg()); } if (flags & REG_USED) { - SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg()); + SetupRegMask(use_mask, rs_rDX.GetReg()); } if (flags & REG_USEB) { - SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg()); + SetupRegMask(use_mask, rs_rBX.GetReg()); } // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI. if (lir->opcode == kX86RepneScasw) { - SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg()); - SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg()); - SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg()); - SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg()); + SetupRegMask(use_mask, rs_rAX.GetReg()); + SetupRegMask(use_mask, rs_rCX.GetReg()); + SetupRegMask(use_mask, rs_rDI.GetReg()); + SetupRegMask(def_mask, rs_rDI.GetReg()); } if (flags & USE_FP_STACK) { - lir->u.m.use_mask |= ENCODE_X86_FP_STACK; - lir->u.m.def_mask |= ENCODE_X86_FP_STACK; + use_mask->SetBit(kX86FPStack); + def_mask->SetBit(kX86FPStack); } } @@ -368,40 +361,40 @@ std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char return buf; } -void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) { +void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; for (i = 0; i < kX86RegEnd; i++) { - if (mask & (1ULL << i)) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } /* Memory bits */ - if (x86LIR && (mask & ENCODE_DALVIK_REG)) { + if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info), (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } @@ -551,7 +544,7 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { } else { // Mark as a scheduling barrier. DCHECK(!mem_barrier->flags.use_def_invalid); - mem_barrier->u.m.def_mask = ENCODE_ALL; + mem_barrier->u.m.def_mask = &kEncodeAll; } return ret; #else @@ -822,6 +815,7 @@ void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo); AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); @@ -1109,7 +1103,10 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } else { // Load the start index from stack, remembering that we pushed EDI. int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t); - Load32Disp(rs_rX86_SP, displacement, rs_rBX); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + Load32Disp(rs_rX86_SP, displacement, rs_rBX); + } OpRegReg(kOpXor, rs_rDI, rs_rDI); OpRegReg(kOpCmp, rs_rBX, rs_rDI); OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI); @@ -1413,10 +1410,10 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */); load->flags.fixup = kFixupLoad; load->target = data_target; - SetMemRefType(load, true, kLiteral); } void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) { @@ -1856,6 +1853,7 @@ void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { * end up half-promoted. In those cases, we must flush the promoted * half to memory as well. */ + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); for (int i = 0; i < cu_->num_ins; i++) { PromotionMap* v_map = &promotion_map_[start_vreg + i]; RegStorage reg = RegStorage::InvalidReg(); @@ -1986,12 +1984,14 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (loc.wide) { loc = UpdateLocWide(loc); if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); } next_arg += 2; } else { loc = UpdateLoc(loc); if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32); } next_arg++; @@ -2008,6 +2008,8 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, int current_src_offset = start_offset; int current_dest_offset = outs_offset; + // Only davik regs are accessed in this loop; no next_call_insn() calls. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); while (regs_left_to_pass_via_stack > 0) { // This is based on the knowledge that the stack itself is 16-byte aligned. bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; @@ -2045,6 +2047,7 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (src_is_16b_aligned) { ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP); } else if (src_is_8b_aligned) { @@ -2074,8 +2077,7 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); } else { // Set barrier for 128-bit load. - SetMemRefType(ld1, true /* is_load */, kDalvikReg); - ld1->u.m.def_mask = ENCODE_ALL; + ld1->u.m.def_mask = &kEncodeAll; } } if (st1 != nullptr) { @@ -2085,8 +2087,7 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); } else { // Set barrier for 128-bit store. - SetMemRefType(st1, false /* is_load */, kDalvikReg); - st1->u.m.def_mask = ENCODE_ALL; + st1->u.m.def_mask = &kEncodeAll; } } @@ -2123,20 +2124,23 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (!reg.Valid()) { int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); - if (rl_arg.wide) { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); - } else { - LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); - } - i++; - } else { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.wide) { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); + } else { + LoadValueDirectWideFixed(rl_arg, regWide); + StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); + } + i++; } else { - LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + } else { + LoadValueDirectFixed(rl_arg, regSingle); + StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + } } } call_state = next_call_insn(cu_, info, call_state, target_method, diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index d074d8104d..c72e8cd1d9 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -376,7 +376,8 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int o break; } LIR *l = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), offset); - if (r_base == rs_rX86_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rX86_SP); AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */); } return l; @@ -403,8 +404,10 @@ LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) { break; } LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); - AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */); + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */); + } return l; } @@ -427,7 +430,9 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) { break; } LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + } return l; } @@ -575,11 +580,11 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), kDouble); res->target = data_target; res->flags.fixup = kFixupLoad; - SetMemRefType(res, true, kLiteral); store_method_addr_used_ = true; } else { if (val_lo == 0) { @@ -684,7 +689,8 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int displacement + HIWORD_OFFSET); } } - if (r_base == rs_rX86_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rX86_SP); AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, true /* is_load */, is64bit); if (pair) { @@ -815,7 +821,8 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetLowReg()); store2 = NewLIR3(opcode, r_base.GetReg(), displacement + HIWORD_OFFSET, r_src.GetHighReg()); } - if (r_base == rs_rX86_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rX86_SP); AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, false /* is_load */, is64bit); if (pair) { diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 5022529667..f1b5811a33 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -111,9 +111,6 @@ enum X86ResourceEncodingPos { kX86RegEnd = kX86FPStack, }; -#define ENCODE_X86_REG_SP (1ULL << kX86RegSP) -#define ENCODE_X86_FP_STACK (1ULL << kX86FPStack) - // FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum? enum X86NativeRegisterPool { r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index 7e50c311da..3b891f2f20 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_DEX_REG_STORAGE_H_ #define ART_COMPILER_DEX_REG_STORAGE_H_ +#include "base/logging.h" namespace art { @@ -102,17 +103,21 @@ class RegStorage { static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift); // Reg is [F][LLLLL], will override any existing shape and use rs_kind. - RegStorage(RegStorageKind rs_kind, int reg) { - DCHECK_NE(rs_kind, k64BitPair); - DCHECK_EQ(rs_kind & ~kShapeMask, 0); - reg_ = kValid | rs_kind | (reg & kRegTypeMask); - } - RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) { - DCHECK_EQ(rs_kind, k64BitPair); - DCHECK_EQ(low_reg & kFloatingPoint, high_reg & kFloatingPoint); - DCHECK_LE(high_reg & kRegNumMask, kHighRegNumMask) << "High reg must be in 0..31"; - reg_ = kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) | - (low_reg & kRegTypeMask); + constexpr RegStorage(RegStorageKind rs_kind, int reg) + : reg_( + DCHECK_CONSTEXPR(rs_kind != k64BitPair, , 0u) + DCHECK_CONSTEXPR((rs_kind & ~kShapeMask) == 0, , 0u) + kValid | rs_kind | (reg & kRegTypeMask)) { + } + constexpr RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) + : reg_( + DCHECK_CONSTEXPR(rs_kind == k64BitPair, << rs_kind, 0u) + DCHECK_CONSTEXPR((low_reg & kFloatingPoint) == (high_reg & kFloatingPoint), + << low_reg << ", " << high_reg, 0u) + DCHECK_CONSTEXPR((high_reg & kRegNumMask) <= kHighRegNumMask, + << "High reg must be in 0..31: " << high_reg, false) + kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) | + (low_reg & kRegTypeMask)) { } constexpr explicit RegStorage(uint16_t val) : reg_(val) {} RegStorage() : reg_(kInvalid) {} @@ -125,50 +130,53 @@ class RegStorage { return (reg_ != rhs.GetRawBits()); } - bool Valid() const { + constexpr bool Valid() const { return ((reg_ & kValidMask) == kValid); } - bool Is32Bit() const { + constexpr bool Is32Bit() const { return ((reg_ & kShapeMask) == k32BitSolo); } - bool Is64Bit() const { + constexpr bool Is64Bit() const { return ((reg_ & k64BitMask) == k64Bits); } - bool Is64BitSolo() const { + constexpr bool Is64BitSolo() const { return ((reg_ & kShapeMask) == k64BitSolo); } - bool IsPair() const { + constexpr bool IsPair() const { return ((reg_ & kShapeMask) == k64BitPair); } - bool IsFloat() const { - DCHECK(Valid()); - return ((reg_ & kFloatingPoint) == kFloatingPoint); + constexpr bool IsFloat() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + ((reg_ & kFloatingPoint) == kFloatingPoint); } - bool IsDouble() const { - DCHECK(Valid()); - return (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); + constexpr bool IsDouble() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); } - bool IsSingle() const { - DCHECK(Valid()); - return (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint; + constexpr bool IsSingle() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint; } - static bool IsFloat(uint16_t reg) { + static constexpr bool IsFloat(uint16_t reg) { return ((reg & kFloatingPoint) == kFloatingPoint); } - static bool IsDouble(uint16_t reg) { + static constexpr bool IsDouble(uint16_t reg) { return (reg & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); } - static bool IsSingle(uint16_t reg) { + static constexpr bool IsSingle(uint16_t reg) { return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint; } @@ -221,17 +229,17 @@ class RegStorage { } // Return the register number of low or solo. - int GetRegNum() const { + constexpr int GetRegNum() const { return reg_ & kRegNumMask; } // Is register number in 0..7? - bool Low8() const { + constexpr bool Low8() const { return GetRegNum() < 8; } // Is register number in 0..3? - bool Low4() const { + constexpr bool Low4() const { return GetRegNum() < 4; } @@ -244,11 +252,11 @@ class RegStorage { return RegStorage(k64BitPair, low.GetReg(), high.GetReg()); } - static bool SameRegType(RegStorage reg1, RegStorage reg2) { + static constexpr bool SameRegType(RegStorage reg1, RegStorage reg2) { return (reg1.IsDouble() == reg2.IsDouble()) && (reg1.IsSingle() == reg2.IsSingle()); } - static bool SameRegType(int reg1, int reg2) { + static constexpr bool SameRegType(int reg1, int reg2) { return (IsDouble(reg1) == IsDouble(reg2)) && (IsSingle(reg1) == IsSingle(reg2)); } @@ -258,17 +266,17 @@ class RegStorage { } // Create a floating point 32-bit solo. - static RegStorage FloatSolo32(int reg_num) { + static constexpr RegStorage FloatSolo32(int reg_num) { return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); } // Create a 128-bit solo. - static RegStorage Solo128(int reg_num) { + static constexpr RegStorage Solo128(int reg_num) { return RegStorage(k128BitSolo, reg_num & kRegTypeMask); } // Create a 64-bit solo. - static RegStorage Solo64(int reg_num) { + static constexpr RegStorage Solo64(int reg_num) { return RegStorage(k64BitSolo, reg_num & kRegTypeMask); } @@ -277,19 +285,19 @@ class RegStorage { return RegStorage(k64BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); } - static RegStorage InvalidReg() { + static constexpr RegStorage InvalidReg() { return RegStorage(kInvalid); } - static uint16_t RegNum(int raw_reg_bits) { + static constexpr uint16_t RegNum(int raw_reg_bits) { return raw_reg_bits & kRegNumMask; } - int GetRawBits() const { + constexpr int GetRawBits() const { return reg_; } - size_t StorageSize() { + size_t StorageSize() const { switch (reg_ & kShapeMask) { case kInvalid: return 0; case k32BitSolo: return 4; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index beafbcc386..f05cb66aba 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -29,30 +29,60 @@ namespace art { -void CodeGenerator::Compile(CodeAllocator* allocator) { +void CodeGenerator::CompileBaseline(CodeAllocator* allocator) { const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock()); DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1))); + block_labels_.SetSize(blocks.Size()); + + DCHECK_EQ(frame_size_, kUninitializedFrameSize); + ComputeFrameSize(GetGraph()->GetMaximumNumberOfOutVRegs() + + GetGraph()->GetNumberOfVRegs() + + 1 /* filler */); GenerateFrameEntry(); + for (size_t i = 0, e = blocks.Size(); i < e; ++i) { - CompileBlock(blocks.Get(i)); + HBasicBlock* block = blocks.Get(i); + Bind(GetLabelOf(block)); + HGraphVisitor* location_builder = GetLocationBuilder(); + HGraphVisitor* instruction_visitor = GetInstructionVisitor(); + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + current->Accept(location_builder); + InitLocations(current); + current->Accept(instruction_visitor); + } } + size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); MemoryRegion code(buffer, code_size); GetAssembler()->FinalizeInstructions(code); } -void CodeGenerator::CompileBlock(HBasicBlock* block) { - Bind(GetLabelOf(block)); - HGraphVisitor* location_builder = GetLocationBuilder(); - HGraphVisitor* instruction_visitor = GetInstructionVisitor(); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* current = it.Current(); - current->Accept(location_builder); - InitLocations(current); - current->Accept(instruction_visitor); +void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { + // The frame size has already been computed during register allocation. + DCHECK_NE(frame_size_, kUninitializedFrameSize); + const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); + DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock()); + DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1))); + block_labels_.SetSize(blocks.Size()); + + GenerateFrameEntry(); + for (size_t i = 0, e = blocks.Size(); i < e; ++i) { + HBasicBlock* block = blocks.Get(i); + Bind(GetLabelOf(block)); + HGraphVisitor* instruction_visitor = GetInstructionVisitor(); + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + current->Accept(instruction_visitor); + } } + + size_t code_size = GetAssembler()->CodeSize(); + uint8_t* buffer = allocator->Allocate(code_size); + MemoryRegion code(buffer, code_size); + GetAssembler()->FinalizeInstructions(code); } size_t CodeGenerator::AllocateFreeRegisterInternal( diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index e197ccd517..82fa6393e0 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -28,6 +28,7 @@ namespace art { static size_t constexpr kVRegSize = 4; +static size_t constexpr kUninitializedFrameSize = 0; class DexCompilationUnit; @@ -51,7 +52,8 @@ class CodeGenerator : public ArenaObject { public: // Compiles the graph to executable instructions. Returns whether the compilation // succeeded. - void Compile(CodeAllocator* allocator); + void CompileBaseline(CodeAllocator* allocator); + void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, InstructionSet instruction_set); @@ -61,6 +63,14 @@ class CodeGenerator : public ArenaObject { Label* GetLabelOf(HBasicBlock* block) const; bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; + size_t GetStackSlotOfParameter(HParameterValue* parameter) const { + // Note that this follows the current calling convention. + return GetFrameSize() + + kVRegSize // Art method + + (parameter->GetIndex() - graph_->GetNumberOfVRegs() + graph_->GetNumberOfInVRegs()) + * kVRegSize; + } + virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(Label* label) = 0; @@ -69,6 +79,7 @@ class CodeGenerator : public ArenaObject { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; + virtual void ComputeFrameSize(size_t number_of_spill_slots) = 0; uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } @@ -95,14 +106,12 @@ class CodeGenerator : public ArenaObject { protected: CodeGenerator(HGraph* graph, size_t number_of_registers) - : frame_size_(0), + : frame_size_(kUninitializedFrameSize), graph_(graph), block_labels_(graph->GetArena(), 0), pc_infos_(graph->GetArena(), 32), - blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) { - block_labels_.SetSize(graph->GetBlocks().Size()); - } - ~CodeGenerator() { } + blocked_registers_(graph->GetArena()->AllocArray<bool>(number_of_registers)) {} + ~CodeGenerator() {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -123,7 +132,6 @@ class CodeGenerator : public ArenaObject { private: void InitLocations(HInstruction* instruction); - void CompileBlock(HBasicBlock* block); HGraph* const graph_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e888cc1d6e..d61df36ca9 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -129,16 +129,18 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene assembler_(codegen->GetAssembler()), codegen_(codegen) {} +void CodeGeneratorARM::ComputeFrameSize(size_t number_of_spill_slots) { + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + kVRegSize // Art method + + kNumberOfPushedRegistersAtEntry * kArmWordSize, + kStackAlignment)); +} + void CodeGeneratorARM::GenerateFrameEntry() { core_spill_mask_ |= (1 << LR); __ PushList((1 << LR)); - SetFrameSize(RoundUp( - (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize - + kVRegSize // filler - + kArmWordSize // Art method - + kNumberOfPushedRegistersAtEntry * kArmWordSize, - kStackAlignment)); // The return PC has already been pushed on the stack. __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); __ str(R0, Address(SP, 0)); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index c945a06d47..ac5ef212ba 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -104,6 +104,7 @@ class CodeGeneratorARM : public CodeGenerator { explicit CodeGeneratorARM(HGraph* graph); virtual ~CodeGeneratorARM() { } + virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE; virtual void GenerateFrameEntry() OVERRIDE; virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 72c697ffee..c7dca86dab 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -48,7 +48,16 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), - instruction_visitor_(graph, this) {} + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this) {} + +void CodeGeneratorX86::ComputeFrameSize(size_t number_of_spill_slots) { + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + kVRegSize // Art method + + kNumberOfPushedRegistersAtEntry * kX86WordSize, + kStackAlignment)); +} static bool* GetBlockedRegisterPairs(bool* blocked_registers) { return blocked_registers + kNumberOfAllocIds; @@ -125,13 +134,6 @@ void CodeGeneratorX86::GenerateFrameEntry() { static const int kFakeReturnRegister = 8; core_spill_mask_ |= (1 << kFakeReturnRegister); - SetFrameSize(RoundUp( - (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize - + kVRegSize // filler - + kX86WordSize // Art method - + kNumberOfPushedRegistersAtEntry * kX86WordSize, - kStackAlignment)); - // The return PC has already been pushed on the stack. __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); @@ -264,8 +266,8 @@ void CodeGeneratorX86::Move32(Location destination, Location source) { __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister()); } else { DCHECK(source.IsStackSlot()); - __ movl(EAX, Address(ESP, source.GetStackIndex())); - __ movl(Address(ESP, destination.GetStackIndex()), EAX); + __ pushl(Address(ESP, source.GetStackIndex())); + __ popl(Address(ESP, destination.GetStackIndex())); } } } @@ -302,8 +304,8 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { DCHECK(source.IsDoubleStackSlot()); __ movl(calling_convention.GetRegisterAt(argument_index), Address(ESP, source.GetStackIndex())); - __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)), EAX); + __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); + __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize))); } } else { if (source.IsRegister()) { @@ -315,15 +317,15 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { uint32_t argument_index = source.GetQuickParameterIndex(); __ movl(Address(ESP, destination.GetStackIndex()), calling_convention.GetRegisterAt(argument_index)); - __ movl(EAX, Address(ESP, + __ pushl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize())); - __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX); + __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize))); } else { DCHECK(source.IsDoubleStackSlot()); - __ movl(EAX, Address(ESP, source.GetStackIndex())); - __ movl(Address(ESP, destination.GetStackIndex()), EAX); - __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX); + __ pushl(Address(ESP, source.GetStackIndex())); + __ popl(Address(ESP, destination.GetStackIndex())); + __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); + __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize))); } } } @@ -501,7 +503,7 @@ void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { } void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) { - // Will be generated at use site. + codegen_->Move(constant, constant->GetLocations()->Out(), nullptr); } void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { @@ -573,7 +575,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); - locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(X86CpuLocation(EAX)); InvokeDexCallingConventionVisitor calling_convention_visitor; for (size_t i = 0; i < invoke->InputCount(); i++) { @@ -802,7 +804,6 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorX86::VisitParameterValue(HParameterValue* instruction) { - // Nothing to do, the parameter is already at its location. } void LocationsBuilderX86::VisitNot(HNot* instruction) { @@ -829,15 +830,100 @@ void LocationsBuilderX86::VisitPhi(HPhi* instruction) { } void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; } void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; } void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +X86Assembler* ParallelMoveResolverX86::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src) { + ScratchRegisterScope ensure_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), static_cast<Register>(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister()) { + if (destination.IsRegister()) { + __ movl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister()); + } else { + DCHECK(destination.IsStackSlot()); + __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister()); + } + } else if (source.IsStackSlot()) { + if (destination.IsRegister()) { + __ movl(destination.AsX86().AsCpuRegister(), Address(ESP, source.GetStackIndex())); + } else { + DCHECK(destination.IsStackSlot()); + MoveMemoryToMemory(destination.GetStackIndex(), + source.GetStackIndex()); + } + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverX86::Exchange(Register reg, int mem) { + ScratchRegisterScope ensure_scratch(this, reg, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); + __ movl(Address(ESP, mem + stack_offset), reg); + __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); +} + + +void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch1( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + ScratchRegisterScope ensure_scratch2( + this, ensure_scratch1.GetRegister(), codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; + stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); + __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); + __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); + __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); +} + +void ParallelMoveResolverX86::EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister() && destination.IsRegister()) { + __ xchgl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister()); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange(source.AsX86().AsCpuRegister(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange(destination.AsX86().AsCpuRegister(), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + Exchange(destination.GetStackIndex(), source.GetStackIndex()); + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverX86::SpillScratch(int reg) { + __ pushl(static_cast<Register>(reg)); +} + +void ParallelMoveResolverX86::RestoreScratch(int reg) { + __ popl(static_cast<Register>(reg)); } } // namespace x86 diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 4a706363b2..acc670e09b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "nodes.h" +#include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" namespace art { @@ -59,6 +60,28 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; +class ParallelMoveResolverX86 : public ParallelMoveResolver { + public: + ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) + : ParallelMoveResolver(allocator), codegen_(codegen) {} + + virtual void EmitMove(size_t index) OVERRIDE; + virtual void EmitSwap(size_t index) OVERRIDE; + virtual void SpillScratch(int reg) OVERRIDE; + virtual void RestoreScratch(int reg) OVERRIDE; + + X86Assembler* GetAssembler() const; + + private: + void Exchange(Register reg, int mem); + void Exchange(int mem1, int mem2); + void MoveMemoryToMemory(int dst, int src); + + CodeGeneratorX86* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86); +}; + class LocationsBuilderX86 : public HGraphVisitor { public: LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen) @@ -105,6 +128,7 @@ class CodeGeneratorX86 : public CodeGenerator { explicit CodeGeneratorX86(HGraph* graph); virtual ~CodeGeneratorX86() { } + virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE; virtual void GenerateFrameEntry() OVERRIDE; virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; @@ -145,6 +169,10 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + ParallelMoveResolverX86* GetMoveResolver() { + return &move_resolver_; + } + private: // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); @@ -153,6 +181,7 @@ class CodeGeneratorX86 : public CodeGenerator { LocationsBuilderX86 location_builder_; InstructionCodeGeneratorX86 instruction_visitor_; + ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 7684bb189d..8ee775cbe1 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -56,7 +56,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe ASSERT_NE(graph, nullptr); InternalCodeAllocator allocator; CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86); - codegen->Compile(&allocator); + codegen->CompileBaseline(&allocator); typedef int32_t (*fptr)(); #if defined(__i386__) CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); @@ -66,7 +66,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe } #endif codegen = CodeGenerator::Create(&arena, graph, kArm); - codegen->Compile(&allocator); + codegen->CompileBaseline(&allocator); #if defined(__arm__) CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5c5042e20f..a49ce64a2d 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -28,8 +28,15 @@ namespace art { */ class HGraphVisualizerPrinter : public HGraphVisitor { public: - HGraphVisualizerPrinter(HGraph* graph, std::ostream& output, const CodeGenerator& codegen) - : HGraphVisitor(graph), output_(output), codegen_(codegen), indent_(0) {} + HGraphVisualizerPrinter(HGraph* graph, + std::ostream& output, + const char* pass_name, + const CodeGenerator& codegen) + : HGraphVisitor(graph), + output_(output), + pass_name_(pass_name), + codegen_(codegen), + indent_(0) {} void StartTag(const char* name) { AddIndent(); @@ -94,6 +101,33 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_<< std::endl; } + void DumpLocation(Location location, Primitive::Type type) { + if (location.IsRegister()) { + if (type == Primitive::kPrimDouble || type == Primitive::kPrimFloat) { + codegen_.DumpFloatingPointRegister(output_, location.reg().RegId()); + } else { + codegen_.DumpCoreRegister(output_, location.reg().RegId()); + } + } else { + DCHECK(location.IsStackSlot()); + output_ << location.GetStackIndex() << "(sp)"; + } + } + + void VisitParallelMove(HParallelMove* instruction) { + output_ << instruction->DebugName(); + output_ << " ("; + for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { + MoveOperands* move = instruction->MoveOperandsAt(i); + DumpLocation(move->GetSource(), Primitive::kPrimInt); + output_ << " -> "; + DumpLocation(move->GetDestination(), Primitive::kPrimInt); + if (i + 1 != e) { + output_ << ", "; + } + } + output_ << ")"; + } void VisitInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); @@ -104,24 +138,28 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } output_ << "]"; } - if (instruction->GetLifetimePosition() != kNoLifetime) { + if (pass_name_ == kLivenessPassName && instruction->GetLifetimePosition() != kNoLifetime) { output_ << " (liveness: " << instruction->GetLifetimePosition(); if (instruction->HasLiveInterval()) { output_ << " "; const LiveInterval& interval = *instruction->GetLiveInterval(); interval.Dump(output_); - if (interval.HasRegister()) { - int reg = interval.GetRegister(); + } + output_ << ")"; + } else if (pass_name_ == kRegisterAllocatorPassName) { + LocationSummary* locations = instruction->GetLocations(); + if (locations != nullptr) { + output_ << " ( "; + for (size_t i = 0; i < instruction->InputCount(); ++i) { + DumpLocation(locations->InAt(i), instruction->InputAt(i)->GetType()); output_ << " "; - if (instruction->GetType() == Primitive::kPrimFloat - || instruction->GetType() == Primitive::kPrimDouble) { - codegen_.DumpFloatingPointRegister(output_, reg); - } else { - codegen_.DumpCoreRegister(output_, reg); - } + } + output_ << ")"; + if (locations->Out().IsValid()) { + output_ << " -> "; + DumpLocation(locations->Out(), instruction->GetType()); } } - output_ << ")"; } } @@ -137,9 +175,9 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } } - void Run(const char* pass_name) { + void Run() { StartTag("cfg"); - PrintProperty("name", pass_name); + PrintProperty("name", pass_name_); VisitInsertionOrder(); EndTag("cfg"); } @@ -188,6 +226,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { private: std::ostream& output_; + const char* pass_name_; const CodeGenerator& codegen_; size_t indent_; @@ -209,7 +248,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } is_enabled_ = true; - HGraphVisualizerPrinter printer(graph, *output_, codegen_); + HGraphVisualizerPrinter printer(graph, *output_, "", codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", pretty_name.c_str()); printer.PrintProperty("method", pretty_name.c_str()); @@ -227,7 +266,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } is_enabled_ = true; - HGraphVisualizerPrinter printer(graph, *output_, codegen_); + HGraphVisualizerPrinter printer(graph, *output_, "", codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", name); printer.PrintProperty("method", name); @@ -239,8 +278,8 @@ void HGraphVisualizer::DumpGraph(const char* pass_name) { if (!is_enabled_) { return; } - HGraphVisualizerPrinter printer(graph_, *output_, codegen_); - printer.Run(pass_name); + HGraphVisualizerPrinter printer(graph_, *output_, pass_name, codegen_); + printer.Run(); } } // namespace art diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index 2638cf504d..7cd74e9b7a 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -25,6 +25,9 @@ class CodeGenerator; class DexCompilationUnit; class HGraph; +static const char* kLivenessPassName = "liveness"; +static const char* kRegisterAllocatorPassName = "register"; + /** * If enabled, emits compilation information suitable for the c1visualizer tool * and IRHydra. diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 68848de636..143d5c9e6f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -508,6 +508,7 @@ class HInstruction : public ArenaObject { void ReplaceWith(HInstruction* instruction); #define INSTRUCTION_TYPE_CHECK(type) \ + bool Is##type() { return (As##type() != nullptr); } \ virtual H##type* As##type() { return nullptr; } FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3dc0928d6d..ccacbef401 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -85,6 +85,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. bool shouldCompile = dex_compilation_unit.GetSymbol().find("00024opt_00024") != std::string::npos; + bool shouldOptimize = + dex_compilation_unit.GetSymbol().find("00024reg_00024") != std::string::npos; ArenaPool pool; ArenaAllocator arena(&pool); @@ -116,7 +118,36 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite visualizer.DumpGraph("builder"); CodeVectorAllocator allocator; - codegen->Compile(&allocator); + + if (RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) { + graph->BuildDominatorTree(); + graph->TransformToSSA(); + visualizer.DumpGraph("ssa"); + + graph->FindNaturalLoops(); + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); + visualizer.DumpGraph(kLivenessPassName); + + RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); + register_allocator.AllocateRegisters(); + + visualizer.DumpGraph(kRegisterAllocatorPassName); + codegen->CompileOptimized(&allocator); + } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { + LOG(FATAL) << "Could not allocate registers in optimizing compiler"; + } else { + codegen->CompileBaseline(&allocator); + + // Run these phases to get some test coverage. + graph->BuildDominatorTree(); + graph->TransformToSSA(); + visualizer.DumpGraph("ssa"); + graph->FindNaturalLoops(); + SsaLivenessAnalysis liveness(*graph, codegen); + liveness.Analyze(); + visualizer.DumpGraph(kLivenessPassName); + } std::vector<uint8_t> mapping_table; codegen->BuildMappingTable(&mapping_table); @@ -125,19 +156,6 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); - // Run these phases to get some test coverage. - graph->BuildDominatorTree(); - graph->TransformToSSA(); - visualizer.DumpGraph("ssa"); - - graph->FindNaturalLoops(); - SsaLivenessAnalysis liveness(*graph, codegen); - liveness.Analyze(); - visualizer.DumpGraph("liveness"); - - RegisterAllocator(graph->GetArena(), *codegen).AllocateRegisters(liveness); - visualizer.DumpGraph("register"); - return new CompiledMethod(GetCompilerDriver(), instruction_set, allocator.GetMemory(), diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 3d2d136ec3..4a1b6ce446 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -147,4 +147,64 @@ void ParallelMoveResolver::PerformMove(size_t index) { } } +bool ParallelMoveResolver::IsScratchLocation(Location loc) { + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->Blocks(loc)) { + return false; + } + } + + for (size_t i = 0; i < moves_.Size(); ++i) { + if (moves_.Get(i)->GetDestination().Equals(loc)) { + return true; + } + } + + return false; +} + +int ParallelMoveResolver::AllocateScratchRegister(int blocked, int register_count, bool* spilled) { + int scratch = -1; + for (int reg = 0; reg < register_count; ++reg) { + if ((blocked != reg) && + IsScratchLocation(Location::RegisterLocation(ManagedRegister(reg)))) { + scratch = reg; + break; + } + } + + if (scratch == -1) { + *spilled = true; + for (int reg = 0; reg < register_count; ++reg) { + if (blocked != reg) { + scratch = reg; + } + } + } else { + *spilled = false; + } + + return scratch; +} + + +ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolver* resolver, int blocked, int number_of_registers) + : resolver_(resolver), + reg_(kNoRegister), + spilled_(false) { + reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, &spilled_); + + if (spilled_) { + resolver->SpillScratch(reg_); + } +} + + +ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { + if (spilled_) { + resolver_->RestoreScratch(reg_); + } +} + } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index ff20cb0bc6..e1189d8520 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -23,6 +23,7 @@ namespace art { class HParallelMove; +class Location; class MoveOperands; /** @@ -39,15 +40,37 @@ class ParallelMoveResolver : public ValueObject { void EmitNativeCode(HParallelMove* parallel_move); protected: + class ScratchRegisterScope : public ValueObject { + public: + ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int number_of_registers); + ~ScratchRegisterScope(); + + int GetRegister() const { return reg_; } + bool IsSpilled() const { return spilled_; } + + private: + ParallelMoveResolver* resolver_; + int reg_; + bool spilled_; + }; + + bool IsScratchLocation(Location loc); + int AllocateScratchRegister(int blocked, int register_count, bool* spilled); + // Emit a move. virtual void EmitMove(size_t index) = 0; // Execute a move by emitting a swap of two operands. virtual void EmitSwap(size_t index) = 0; + virtual void SpillScratch(int reg) = 0; + virtual void RestoreScratch(int reg) = 0; + // List of moves not yet resolved. GrowableArray<MoveOperands*> moves_; + static constexpr int kNoRegister = -1; + private: // Build the initial list of moves. void BuildInitialMoveList(HParallelMove* parallel_move); diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 88df24d9ac..093856d497 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -50,6 +50,9 @@ class TestParallelMoveResolver : public ParallelMoveResolver { << ")"; } + virtual void SpillScratch(int reg) {} + virtual void RestoreScratch(int reg) {} + std::string GetMessage() const { return message_.str(); } diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 8c6eb2a174..c2a47697de 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -24,64 +24,151 @@ namespace art { static constexpr size_t kMaxLifetimePosition = -1; static constexpr size_t kDefaultNumberOfSpillSlots = 4; -RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen) +RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& liveness) : allocator_(allocator), codegen_(codegen), + liveness_(liveness), unhandled_(allocator, 0), handled_(allocator, 0), active_(allocator, 0), inactive_(allocator, 0), + physical_register_intervals_(allocator, codegen->GetNumberOfRegisters()), spill_slots_(allocator, kDefaultNumberOfSpillSlots), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), - blocked_registers_(allocator->AllocArray<bool>(codegen.GetNumberOfRegisters())) { - codegen.SetupBlockedRegisters(blocked_registers_); + blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())) { + codegen->SetupBlockedRegisters(blocked_registers_); + physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters()); } -static bool ShouldProcess(bool processing_core_registers, HInstruction* instruction) { - bool is_core_register = (instruction->GetType() != Primitive::kPrimDouble) - && (instruction->GetType() != Primitive::kPrimFloat); +bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, + InstructionSet instruction_set) { + if (!Supports(instruction_set)) { + return false; + } + for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) { + for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions()); + !it.Done(); + it.Advance()) { + HInstruction* current = it.Current(); + if (current->NeedsEnvironment()) return false; + if (current->GetType() == Primitive::kPrimLong) return false; + if (current->GetType() == Primitive::kPrimFloat) return false; + if (current->GetType() == Primitive::kPrimDouble) return false; + } + } + return true; +} + +static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { + bool is_core_register = (interval->GetType() != Primitive::kPrimDouble) + && (interval->GetType() != Primitive::kPrimFloat); return processing_core_registers == is_core_register; } -void RegisterAllocator::AllocateRegistersInternal(const SsaLivenessAnalysis& liveness) { +void RegisterAllocator::AllocateRegisters() { + processing_core_registers_ = true; + AllocateRegistersInternal(); + processing_core_registers_ = false; + AllocateRegistersInternal(); + + Resolve(); + + if (kIsDebugBuild) { + processing_core_registers_ = true; + ValidateInternal(true); + processing_core_registers_ = false; + ValidateInternal(true); + } +} + +void RegisterAllocator::BlockRegister(Location location, + size_t start, + size_t end, + Primitive::Type type) { + int reg = location.reg().RegId(); + LiveInterval* interval = physical_register_intervals_.Get(reg); + if (interval == nullptr) { + interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); + physical_register_intervals_.Put(reg, interval); + inactive_.Add(interval); + } + DCHECK(interval->GetRegister() == reg); + interval->AddRange(start, end); +} + +void RegisterAllocator::AllocateRegistersInternal() { number_of_registers_ = processing_core_registers_ - ? codegen_.GetNumberOfCoreRegisters() - : codegen_.GetNumberOfFloatingPointRegisters(); + ? codegen_->GetNumberOfCoreRegisters() + : codegen_->GetNumberOfFloatingPointRegisters(); registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); // Iterate post-order, to ensure the list is sorted, and the last added interval // is the one with the lowest start position. - for (size_t i = liveness.GetNumberOfSsaValues(); i > 0; --i) { - HInstruction* instruction = liveness.GetInstructionFromSsaIndex(i - 1); - if (ShouldProcess(processing_core_registers_, instruction)) { - LiveInterval* current = instruction->GetLiveInterval(); + for (size_t i = liveness_.GetNumberOfSsaValues(); i > 0; --i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i - 1); + LiveInterval* current = instruction->GetLiveInterval(); + if (ShouldProcess(processing_core_registers_, current)) { DCHECK(unhandled_.IsEmpty() || current->StartsBefore(unhandled_.Peek())); - unhandled_.Add(current); - } - } - LinearScan(); - if (kIsDebugBuild) { - ValidateInternal(liveness, true); - } -} + LocationSummary* locations = instruction->GetLocations(); + if (locations->GetTempCount() != 0) { + // Note that we already filtered out instructions requiring temporaries in + // RegisterAllocator::CanAllocateRegistersFor. + LOG(FATAL) << "Unimplemented"; + } -bool RegisterAllocator::ValidateInternal(const SsaLivenessAnalysis& liveness, - bool log_fatal_on_failure) const { - // To simplify unit testing, we eagerly create the array of intervals, and - // call the helper method. - GrowableArray<LiveInterval*> intervals(allocator_, 0); - for (size_t i = 0; i < liveness.GetNumberOfSsaValues(); ++i) { - HInstruction* instruction = liveness.GetInstructionFromSsaIndex(i); - if (ShouldProcess(processing_core_registers_, instruction)) { - intervals.Add(instruction->GetLiveInterval()); + // Some instructions define their output in fixed register/stack slot. We need + // to ensure we know these locations before doing register allocation. For a + // given register, we create an interval that covers these locations. The register + // will be unavailable at these locations when trying to allocate one for an + // interval. + // + // The backwards walking ensures the ranges are ordered on increasing start positions. + Location output = locations->Out(); + size_t position = instruction->GetLifetimePosition(); + if (output.IsRegister()) { + // Shift the interval's start by one to account for the blocked register. + current->SetFrom(position + 1); + current->SetRegister(output.reg().RegId()); + BlockRegister(output, position, position + 1, instruction->GetType()); + } else if (output.IsStackSlot()) { + current->SetSpillSlot(output.GetStackIndex()); + } + for (size_t i = 0; i < instruction->InputCount(); ++i) { + Location input = locations->InAt(i); + if (input.IsRegister()) { + BlockRegister(input, position, position + 1, instruction->InputAt(i)->GetType()); + } + } + + // Add the interval to the correct list. + if (current->HasRegister()) { + DCHECK(instruction->IsParameterValue()); + inactive_.Add(current); + } else if (current->HasSpillSlot()) { + DCHECK(instruction->IsParameterValue()); + // Split before first register use. + size_t first_register_use = current->FirstRegisterUse(); + if (first_register_use != kNoLifetime) { + LiveInterval* split = Split(current, first_register_use - 1); + // The new interval may start at a late + AddToUnhandled(split); + } else { + // Nothing to do, we won't allocate a register for this value. + } + } else { + DCHECK(unhandled_.IsEmpty() || current->StartsBefore(unhandled_.Peek())); + unhandled_.Add(current); + } } } - return ValidateIntervals(intervals, spill_slots_.Size(), codegen_, allocator_, - processing_core_registers_, log_fatal_on_failure); + + LinearScan(); } class AllRangesIterator : public ValueObject { @@ -111,6 +198,28 @@ class AllRangesIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); }; +bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { + // To simplify unit testing, we eagerly create the array of intervals, and + // call the helper method. + GrowableArray<LiveInterval*> intervals(allocator_, 0); + for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { + intervals.Add(instruction->GetLiveInterval()); + } + } + + for (size_t i = 0, e = physical_register_intervals_.Size(); i < e; ++i) { + LiveInterval* fixed = physical_register_intervals_.Get(i); + if (fixed != nullptr && ShouldProcess(processing_core_registers_, fixed)) { + intervals.Add(fixed); + } + } + + return ValidateIntervals(intervals, spill_slots_.Size(), *codegen_, allocator_, + processing_core_registers_, log_fatal_on_failure); +} + bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, size_t number_of_spill_slots, const CodeGenerator& codegen, @@ -132,7 +241,10 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in for (size_t i = 0, e = intervals.Size(); i < e; ++i) { for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) { LiveInterval* current = it.CurrentInterval(); - if (current->GetParent()->HasSpillSlot()) { + HInstruction* defined_by = current->GetParent()->GetDefinedBy(); + if (current->GetParent()->HasSpillSlot() + // Parameters have their own stack slot. + && !(defined_by != nullptr && defined_by->IsParameterValue())) { BitVector* liveness_of_spill_slot = liveness_of_values.Get( number_of_registers + current->GetParent()->GetSpillSlot() / kVRegSize); for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { @@ -176,14 +288,14 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in return true; } -void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) { +void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interval) const { interval->Dump(stream); stream << ": "; if (interval->HasRegister()) { if (processing_core_registers_) { - codegen_.DumpCoreRegister(stream, interval->GetRegister()); + codegen_->DumpCoreRegister(stream, interval->GetRegister()); } else { - codegen_.DumpFloatingPointRegister(stream, interval->GetRegister()); + codegen_->DumpFloatingPointRegister(stream, interval->GetRegister()); } } else { stream << "spilled"; @@ -196,6 +308,7 @@ void RegisterAllocator::LinearScan() { while (!unhandled_.IsEmpty()) { // (1) Remove interval with the lowest start position from unhandled. LiveInterval* current = unhandled_.Pop(); + DCHECK(!current->IsFixed() && !current->HasRegister() && !current->HasSpillSlot()); size_t position = current->GetStart(); // (2) Remove currently active intervals that are dead at this position. @@ -255,13 +368,6 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { free_until[i] = kMaxLifetimePosition; } - // For each active interval, set its register to not free. - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* interval = active_.Get(i); - DCHECK(interval->HasRegister()); - free_until[interval->GetRegister()] = 0; - } - // For each inactive interval, set its register to be free until // the next intersection with `current`. // Thanks to SSA, this should only be needed for intervals @@ -275,6 +381,13 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } + // For each active interval, set its register to not free. + for (size_t i = 0, e = active_.Size(); i < e; ++i) { + LiveInterval* interval = active_.Get(i); + DCHECK(interval->HasRegister()); + free_until[interval->GetRegister()] = 0; + } + // Pick the register that is free the longest. int reg = -1; for (size_t i = 0; i < number_of_registers_; ++i) { @@ -330,9 +443,13 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); DCHECK(active->HasRegister()); - size_t use = active->FirstRegisterUseAfter(current->GetStart()); - if (use != kNoLifetime) { - next_use[active->GetRegister()] = use; + if (active->IsFixed()) { + next_use[active->GetRegister()] = current->GetStart(); + } else { + size_t use = active->FirstRegisterUseAfter(current->GetStart()); + if (use != kNoLifetime) { + next_use[active->GetRegister()] = use; + } } } @@ -343,9 +460,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { LiveInterval* inactive = inactive_.Get(i); DCHECK(inactive->HasRegister()); - size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); - if (use != kNoLifetime) { - next_use[inactive->GetRegister()] = use; + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + next_use[inactive->GetRegister()] = + std::min(next_intersection, next_use[inactive->GetRegister()]); + } else { + size_t use = inactive->FirstRegisterUseAfter(current->GetStart()); + if (use != kNoLifetime) { + next_use[inactive->GetRegister()] = std::min(use, next_use[inactive->GetRegister()]); + } + } } } @@ -374,6 +499,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0, e = active_.Size(); i < e; ++i) { LiveInterval* active = active_.Get(i); if (active->GetRegister() == reg) { + DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); active_.DeleteAt(i); handled_.Add(active); @@ -385,11 +511,19 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { for (size_t i = 0; i < inactive_.Size(); ++i) { LiveInterval* inactive = inactive_.Get(i); if (inactive->GetRegister() == reg) { - LiveInterval* split = Split(inactive, current->GetStart()); - inactive_.DeleteAt(i); - handled_.Add(inactive); - AddToUnhandled(split); - --i; + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + LiveInterval* split = Split(current, next_intersection); + AddToUnhandled(split); + } else { + LiveInterval* split = Split(inactive, current->GetStart()); + inactive_.DeleteAt(i); + handled_.Add(inactive); + AddToUnhandled(split); + --i; + } + } } } @@ -398,13 +532,15 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } void RegisterAllocator::AddToUnhandled(LiveInterval* interval) { + size_t insert_at = 0; for (size_t i = unhandled_.Size(); i > 0; --i) { LiveInterval* current = unhandled_.Get(i - 1); if (current->StartsAfter(interval)) { - unhandled_.InsertAt(i, interval); + insert_at = i; break; } } + unhandled_.InsertAt(insert_at, interval); } LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { @@ -429,7 +565,13 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { return; } - // Find when this instruction dies. + HInstruction* defined_by = parent->GetDefinedBy(); + if (defined_by->IsParameterValue()) { + // Parameters have their own stack slot. + parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); + return; + } + LiveInterval* last_sibling = interval; while (last_sibling->GetNextSibling() != nullptr) { last_sibling = last_sibling->GetNextSibling(); @@ -451,7 +593,315 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { spill_slots_.Put(slot, end); } - interval->GetParent()->SetSpillSlot(slot * kVRegSize); + parent->SetSpillSlot(slot * kVRegSize); +} + +static Location ConvertToLocation(LiveInterval* interval) { + if (interval->HasRegister()) { + return Location::RegisterLocation(ManagedRegister(interval->GetRegister())); + } else { + DCHECK(interval->GetParent()->HasSpillSlot()); + return Location::StackSlot(interval->GetParent()->GetSpillSlot()); + } +} + +// We create a special marker for inputs moves to differentiate them from +// moves created during resolution. They must be different instructions +// because the input moves work on the assumption that the interval moves +// have been executed. +static constexpr size_t kInputMoveLifetimePosition = 0; +static bool IsInputMove(HInstruction* instruction) { + return instruction->GetLifetimePosition() == kInputMoveLifetimePosition; +} + +void RegisterAllocator::AddInputMoveFor(HInstruction* instruction, + Location source, + Location destination) const { + if (source.Equals(destination)) return; + + DCHECK(instruction->AsPhi() == nullptr); + + HInstruction* previous = instruction->GetPrevious(); + HParallelMove* move = nullptr; + if (previous == nullptr + || previous->AsParallelMove() == nullptr + || !IsInputMove(previous)) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(kInputMoveLifetimePosition); + instruction->GetBlock()->InsertInstructionBefore(move, instruction); + } else { + move = previous->AsParallelMove(); + } + DCHECK(IsInputMove(move)); + move->AddMove(new (allocator_) MoveOperands(source, destination)); +} + +void RegisterAllocator::InsertParallelMoveAt(size_t position, + Location source, + Location destination) const { + if (source.Equals(destination)) return; + + HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); + if (at == nullptr) { + // Block boundary, don't no anything the connection of split siblings will handle it. + return; + } + HParallelMove* move; + if ((position & 1) == 1) { + // Move must happen after the instruction. + DCHECK(!at->IsControlFlow()); + move = at->GetNext()->AsParallelMove(); + if (move == nullptr || IsInputMove(move)) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); + } + } else { + // Move must happen before the instruction. + HInstruction* previous = at->GetPrevious(); + if (previous != nullptr && previous->AsParallelMove() != nullptr) { + if (IsInputMove(previous)) { + previous = previous->GetPrevious(); + } + } + if (previous == nullptr || previous->AsParallelMove() == nullptr) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at); + } else { + move = previous->AsParallelMove(); + } + } + move->AddMove(new (allocator_) MoveOperands(source, destination)); +} + +void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, + Location source, + Location destination) const { + if (source.Equals(destination)) return; + + DCHECK_EQ(block->GetSuccessors().Size(), 1u); + HInstruction* last = block->GetLastInstruction(); + HInstruction* previous = last->GetPrevious(); + HParallelMove* move; + if (previous == nullptr || previous->AsParallelMove() == nullptr) { + move = new (allocator_) HParallelMove(allocator_); + block->InsertInstructionBefore(move, last); + } else { + move = previous->AsParallelMove(); + } + move->AddMove(new (allocator_) MoveOperands(source, destination)); +} + +void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, + Location source, + Location destination) const { + if (source.Equals(destination)) return; + + HInstruction* first = block->GetFirstInstruction(); + HParallelMove* move = first->AsParallelMove(); + if (move == nullptr || IsInputMove(move)) { + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(block->GetLifetimeStart()); + block->InsertInstructionBefore(move, first); + } + move->AddMove(new (allocator_) MoveOperands(source, destination)); +} + +void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, + Location source, + Location destination) const { + if (source.Equals(destination)) return; + + if (instruction->AsPhi() != nullptr) { + InsertParallelMoveAtEntryOf(instruction->GetBlock(), source, destination); + return; + } + + HParallelMove* move = instruction->GetNext()->AsParallelMove(); + if (move == nullptr || IsInputMove(move)) { + move = new (allocator_) HParallelMove(allocator_); + instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); + } + move->AddMove(new (allocator_) MoveOperands(source, destination)); +} + +void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { + LiveInterval* current = interval; + if (current->HasSpillSlot() && current->HasRegister()) { + // We spill eagerly, so move must be at definition. + InsertMoveAfter(interval->GetDefinedBy(), + Location::RegisterLocation(ManagedRegister(interval->GetRegister())), + Location::StackSlot(interval->GetParent()->GetSpillSlot())); + } + UsePosition* use = current->GetFirstUse(); + + // Walk over all siblings, updating locations of use positions, and + // connecting them when they are adjacent. + do { + Location source = ConvertToLocation(current); + + // Walk over all uses covered by this interval, and update the location + // information. + while (use != nullptr && use->GetPosition() <= current->GetEnd()) { + if (!use->GetIsEnvironment()) { + LocationSummary* locations = use->GetUser()->GetLocations(); + Location expected_location = locations->InAt(use->GetInputIndex()); + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else { + AddInputMoveFor(use->GetUser(), source, expected_location); + } + } + use = use->GetNext(); + } + + // If the next interval starts just after this one, and has a register, + // insert a move. + LiveInterval* next_sibling = current->GetNextSibling(); + if (next_sibling != nullptr + && next_sibling->HasRegister() + && current->GetEnd() == next_sibling->GetStart()) { + Location destination = ConvertToLocation(next_sibling); + InsertParallelMoveAt(current->GetEnd(), source, destination); + } + current = next_sibling; + } while (current != nullptr); + DCHECK(use == nullptr); +} + +void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, + HBasicBlock* from, + HBasicBlock* to) const { + if (interval->GetNextSibling() == nullptr) { + // Nothing to connect. The whole range was allocated to the same location. + return; + } + + size_t from_position = from->GetLifetimeEnd() - 1; + size_t to_position = to->GetLifetimeStart(); + + LiveInterval* destination = nullptr; + LiveInterval* source = nullptr; + + LiveInterval* current = interval; + + // Check the intervals that cover `from` and `to`. + while ((current != nullptr) && (source == nullptr || destination == nullptr)) { + if (current->Covers(from_position)) { + DCHECK(source == nullptr); + source = current; + } + if (current->Covers(to_position)) { + DCHECK(destination == nullptr); + destination = current; + } + + current = current->GetNextSibling(); + } + + if (destination == source) { + // Interval was not split. + return; + } + + if (!destination->HasRegister()) { + // Values are eagerly spilled. Spill slot already contains appropriate value. + return; + } + + // If `from` has only one successor, we can put the moves at the exit of it. Otherwise + // we need to put the moves at the entry of `to`. + if (from->GetSuccessors().Size() == 1) { + InsertParallelMoveAtExitOf(from, ConvertToLocation(source), ConvertToLocation(destination)); + } else { + DCHECK_EQ(to->GetPredecessors().Size(), 1u); + InsertParallelMoveAtEntryOf(to, ConvertToLocation(source), ConvertToLocation(destination)); + } +} + +// Returns the location of `interval`, or siblings of `interval`, at `position`. +static Location FindLocationAt(LiveInterval* interval, size_t position) { + LiveInterval* current = interval; + while (!current->Covers(position)) { + current = current->GetNextSibling(); + DCHECK(current != nullptr); + } + return ConvertToLocation(current); +} + +void RegisterAllocator::Resolve() { + codegen_->ComputeFrameSize(spill_slots_.Size()); + + // Adjust the Out Location of instructions. + // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + LiveInterval* current = instruction->GetLiveInterval(); + LocationSummary* locations = instruction->GetLocations(); + Location location = locations->Out(); + if (instruction->AsParameterValue() != nullptr) { + // Now that we know the frame size, adjust the parameter's location. + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + current->SetSpillSlot(location.GetStackIndex()); + locations->SetOut(location); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + current->SetSpillSlot(location.GetStackIndex()); + locations->SetOut(location); + } else if (current->HasSpillSlot()) { + current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); + } + } + + Location source = ConvertToLocation(current); + + if (location.IsUnallocated()) { + if (location.GetPolicy() == Location::kSameAsFirstInput) { + locations->SetInAt(0, source); + } + locations->SetOut(source); + } else { + DCHECK(source.Equals(location)); + } + } + + // Connect siblings. + for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) { + HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); + ConnectSiblings(instruction->GetLiveInterval()); + } + + // Resolve non-linear control flow across branches. Order does not matter. + for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + BitVector* live = liveness_.GetLiveInSet(*block); + for (uint32_t idx : live->Indexes()) { + HInstruction* current = liveness_.GetInstructionFromSsaIndex(idx); + LiveInterval* interval = current->GetLiveInterval(); + for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { + ConnectSplitSiblings(interval, block->GetPredecessors().Get(i), block); + } + } + } + + // Resolve phi inputs. Order does not matter. + for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + for (HInstructionIterator it(current->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* phi = it.Current(); + for (size_t i = 0, e = current->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = current->GetPredecessors().Get(i); + DCHECK_EQ(predecessor->GetSuccessors().Size(), 1u); + HInstruction* input = phi->InputAt(i); + Location source = FindLocationAt(input->GetLiveInterval(), + predecessor->GetLastInstruction()->GetLifetimePosition()); + Location destination = ConvertToLocation(phi->GetLiveInterval()); + InsertParallelMoveAtExitOf(predecessor, source, destination); + } + } + } } } // namespace art diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 3393a04d77..1b5585f36c 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -23,7 +23,12 @@ namespace art { class CodeGenerator; +class HBasicBlock; +class HGraph; +class HInstruction; +class HParallelMove; class LiveInterval; +class Location; class SsaLivenessAnalysis; /** @@ -31,26 +36,23 @@ class SsaLivenessAnalysis; */ class RegisterAllocator { public: - RegisterAllocator(ArenaAllocator* allocator, const CodeGenerator& codegen); + RegisterAllocator(ArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis); // Main entry point for the register allocator. Given the liveness analysis, // allocates registers to live intervals. - void AllocateRegisters(const SsaLivenessAnalysis& liveness) { - processing_core_registers_ = true; - AllocateRegistersInternal(liveness); - processing_core_registers_ = false; - AllocateRegistersInternal(liveness); - } + void AllocateRegisters(); // Validate that the register allocator did not allocate the same register to // intervals that intersect each other. Returns false if it did not. - bool Validate(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) { + bool Validate(bool log_fatal_on_failure) { processing_core_registers_ = true; - if (!ValidateInternal(liveness, log_fatal_on_failure)) { + if (!ValidateInternal(log_fatal_on_failure)) { return false; } processing_core_registers_ = false; - return ValidateInternal(liveness, log_fatal_on_failure); + return ValidateInternal(log_fatal_on_failure); } // Helper method for validation. Used by unit testing. @@ -61,11 +63,21 @@ class RegisterAllocator { bool processing_core_registers, bool log_fatal_on_failure); + static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); + static bool Supports(InstructionSet instruction_set) { + return instruction_set == kX86; + } + + size_t GetNumberOfSpillSlots() const { + return spill_slots_.Size(); + } + private: // Main methods of the allocator. void LinearScan(); bool TryAllocateFreeReg(LiveInterval* interval); bool AllocateBlockedReg(LiveInterval* interval); + void Resolve(); // Add `interval` in the sorted list of unhandled intervals. void AddToUnhandled(LiveInterval* interval); @@ -76,16 +88,33 @@ class RegisterAllocator { // Returns whether `reg` is blocked by the code generator. bool IsBlocked(int reg) const; + // Update the interval for the register in `location` to cover [start, end). + void BlockRegister(Location location, size_t start, size_t end, Primitive::Type type); + // Allocate a spill slot for the given interval. void AllocateSpillSlotFor(LiveInterval* interval); + // Connect adjacent siblings within blocks. + void ConnectSiblings(LiveInterval* interval); + + // Connect siblings between block entries and exits. + void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const; + + // Helper methods to insert parallel moves in the graph. + void InsertParallelMoveAtExitOf(HBasicBlock* block, Location source, Location destination) const; + void InsertParallelMoveAtEntryOf(HBasicBlock* block, Location source, Location destination) const; + void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const; + void AddInputMoveFor(HInstruction* instruction, Location source, Location destination) const; + void InsertParallelMoveAt(size_t position, Location source, Location destination) const; + // Helper methods. - void AllocateRegistersInternal(const SsaLivenessAnalysis& liveness); - bool ValidateInternal(const SsaLivenessAnalysis& liveness, bool log_fatal_on_failure) const; - void DumpInterval(std::ostream& stream, LiveInterval* interval); + void AllocateRegistersInternal(); + bool ValidateInternal(bool log_fatal_on_failure) const; + void DumpInterval(std::ostream& stream, LiveInterval* interval) const; ArenaAllocator* const allocator_; - const CodeGenerator& codegen_; + CodeGenerator* const codegen_; + const SsaLivenessAnalysis& liveness_; // List of intervals that must be processed, ordered by start position. Last entry // is the interval that has the lowest start position. @@ -102,6 +131,10 @@ class RegisterAllocator { // That is, they have a lifetime hole that spans the start of the new interval. GrowableArray<LiveInterval*> inactive_; + // Fixed intervals for physical registers. Such an interval covers the positions + // where an instruction requires a specific register. + GrowableArray<LiveInterval*> physical_register_intervals_; + // The spill slots allocated for live intervals. GrowableArray<size_t> spill_slots_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index ff9b9beefc..bfabc5ad27 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -43,9 +43,9 @@ static bool Check(const uint16_t* data) { CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, *codegen); - register_allocator.AllocateRegisters(liveness); - return register_allocator.Validate(liveness, false); + RegisterAllocator register_allocator(&allocator, codegen, liveness); + register_allocator.AllocateRegisters(); + return register_allocator.Validate(false); } /** @@ -300,9 +300,9 @@ TEST(RegisterAllocatorTest, Loop3) { CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86); SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); - RegisterAllocator register_allocator(&allocator, *codegen); - register_allocator.AllocateRegisters(liveness); - ASSERT_TRUE(register_allocator.Validate(liveness, false)); + RegisterAllocator register_allocator(&allocator, codegen, liveness); + register_allocator.AllocateRegisters(); + ASSERT_TRUE(register_allocator.Validate(false)); HBasicBlock* loop_header = graph->GetBlocks().Get(2); HPhi* phi = loop_header->GetFirstPhi()->AsPhi(); @@ -314,7 +314,7 @@ TEST(RegisterAllocatorTest, Loop3) { ASSERT_NE(phi_interval->GetRegister(), loop_update->GetRegister()); HBasicBlock* return_block = graph->GetBlocks().Get(3); - HReturn* ret = return_block->GetFirstInstruction()->AsReturn(); + HReturn* ret = return_block->GetLastInstruction()->AsReturn(); ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister()); } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 7903ad6cff..fc3eb660d5 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -172,6 +172,7 @@ class LiveInterval : public ArenaObject { // Last use is in the following block. first_range_->start_ = start_block_position; } else { + DCHECK(first_range_->GetStart() > position); // There is a hole in the interval. Create a new range. first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_); } @@ -192,6 +193,7 @@ class LiveInterval : public ArenaObject { // There is a use in the following block. first_range_->start_ = start; } else { + DCHECK(first_range_->GetStart() > end); // There is a hole in the interval. Create a new range. first_range_ = new (allocator_) LiveRange(start, end, first_range_); } diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc index 925d4a287a..da49524ee2 100644 --- a/compiler/utils/arena_allocator.cc +++ b/compiler/utils/arena_allocator.cc @@ -32,10 +32,11 @@ static constexpr size_t kValgrindRedZoneBytes = 8; constexpr size_t Arena::kDefaultSize; template <bool kCount> -const char* ArenaAllocatorStatsImpl<kCount>::kAllocNames[kNumArenaAllocKinds] = { +const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "Misc ", "BasicBlock ", "LIR ", + "LIR masks ", "MIR ", "DataFlow ", "GrowList ", @@ -101,6 +102,7 @@ void ArenaAllocatorStatsImpl<kCount>::Dump(std::ostream& os, const Arena* first, << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n"; } os << "===== Allocation by kind\n"; + COMPILE_ASSERT(arraysize(kAllocNames) == kNumArenaAllocKinds, check_arraysize_kAllocNames); for (int i = 0; i < kNumArenaAllocKinds; i++) { os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n"; } diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h index ac3938ff22..f4bcb1d44d 100644 --- a/compiler/utils/arena_allocator.h +++ b/compiler/utils/arena_allocator.h @@ -41,6 +41,7 @@ enum ArenaAllocKind { kArenaAllocMisc, kArenaAllocBB, kArenaAllocLIR, + kArenaAllocLIRResourceMask, kArenaAllocMIR, kArenaAllocDFInfo, kArenaAllocGrowableArray, @@ -92,7 +93,7 @@ class ArenaAllocatorStatsImpl { // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL. size_t alloc_stats_[kNumArenaAllocKinds]; // Bytes used by various allocation kinds. - static const char* kAllocNames[kNumArenaAllocKinds]; + static const char* const kAllocNames[]; }; typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 6031e25ebf..dd8e221547 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1650,7 +1650,102 @@ ENTRY art_quick_deoptimize END art_quick_deoptimize -UNIMPLEMENTED art_quick_indexof + /* + * String's indexOf. + * + * TODO: Not very optimized. + * On entry: + * x0: string object (known non-null) + * w1: char to match (known <= 0xFFFF) + * w2: Starting offset in string data + */ +ENTRY art_quick_indexof + ldr w3, [x0, #STRING_COUNT_OFFSET] + ldr w4, [x0, #STRING_OFFSET_OFFSET] + ldr w0, [x0, #STRING_VALUE_OFFSET] // x0 ? + + /* Clamp start to [0..count] */ + cmp w2, #0 + csel w2, wzr, w2, lt + cmp w2, w3 + csel w2, w3, w2, gt + + /* Build a pointer to the start of the string data */ + add x0, x0, #STRING_DATA_OFFSET + add x0, x0, x4, lsl #1 + + /* Save a copy to compute result */ + mov x5, x0 + + /* Build pointer to start of data to compare and pre-bias */ + add x0, x0, x2, lsl #1 + sub x0, x0, #2 + + /* Compute iteration count */ + sub w2, w3, w2 + + /* + * At this point we have: + * x0: start of the data to test + * w1: char to compare + * w2: iteration count + * x5: original start of string data + */ + + subs w2, w2, #4 + b.lt .Lindexof_remainder + +.Lindexof_loop4: + ldrh w6, [x0, #2]! + ldrh w7, [x0, #2]! + ldrh w8, [x0, #2]! + ldrh w9, [x0, #2]! + cmp w6, w1 + b.eq .Lmatch_0 + cmp w7, w1 + b.eq .Lmatch_1 + cmp w8, w1 + b.eq .Lmatch_2 + cmp w9, w1 + b.eq .Lmatch_3 + subs w2, w2, #4 + b.ge .Lindexof_loop4 + +.Lindexof_remainder: + adds w2, w2, #4 + b.eq .Lindexof_nomatch + +.Lindexof_loop1: + ldrh w6, [x0, #2]! + cmp w6, w1 + b.eq .Lmatch_3 + subs w2, w2, #1 + b.ne .Lindexof_loop1 + +.Lindexof_nomatch: + mov x0, #-1 + ret + +.Lmatch_0: + sub x0, x0, #6 + sub x0, x0, x5 + asr x0, x0, #1 + ret +.Lmatch_1: + sub x0, x0, #4 + sub x0, x0, x5 + asr x0, x0, #1 + ret +.Lmatch_2: + sub x0, x0, #2 + sub x0, x0, x5 + asr x0, x0, #1 + ret +.Lmatch_3: + sub x0, x0, x5 + asr x0, x0, #1 + ret +END art_quick_indexof /* * String's compareTo. @@ -1698,6 +1793,7 @@ ENTRY art_quick_string_compareto add x2, x2, #STRING_DATA_OFFSET add x1, x1, #STRING_DATA_OFFSET + // TODO: Tune this value. // Check for long string, do memcmp16 for them. cmp w3, #28 // Constant from arm32. bgt .Ldo_memcmp16 diff --git a/runtime/base/logging.h b/runtime/base/logging.h index 814195c7fa..caeb946ff0 100644 --- a/runtime/base/logging.h +++ b/runtime/base/logging.h @@ -66,6 +66,16 @@ } \ } while (false) +// CHECK that can be used in a constexpr function. For example, +// constexpr int half(int n) { +// return +// DCHECK_CONSTEXPR(n >= 0, , 0) +// CHECK_CONSTEXPR((n & 1) == 0), << "Extra debugging output: n = " << n, 0) +// n / 2; +// } +#define CHECK_CONSTEXPR(x, out, dummy) \ + (UNLIKELY(!(x))) ? (LOG(FATAL) << "Check failed: " << #x out, dummy) : + #ifndef NDEBUG #define DCHECK(x) CHECK(x) @@ -77,6 +87,7 @@ #define DCHECK_GT(x, y) CHECK_GT(x, y) #define DCHECK_STREQ(s1, s2) CHECK_STREQ(s1, s2) #define DCHECK_STRNE(s1, s2) CHECK_STRNE(s1, s2) +#define DCHECK_CONSTEXPR(x, out, dummy) CHECK_CONSTEXPR(x, out, dummy) #else // NDEBUG @@ -116,6 +127,9 @@ while (false) \ CHECK_STRNE(str1, str2) +#define DCHECK_CONSTEXPR(x, out, dummy) \ + (false && (x)) ? (dummy) : + #endif #define LOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, -1).stream() diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index 52cece64c1..61ea87059f 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -66,6 +66,64 @@ static bool IsDataSectionType(uint32_t map_type) { return true; } +const char* DexFileVerifier::CheckLoadStringByIdx(uint32_t idx, const char* error_string) { + if (UNLIKELY(!CheckIndex(idx, dex_file_->NumStringIds(), error_string))) { + return nullptr; + } + return dex_file_->StringDataByIdx(idx); +} + +const char* DexFileVerifier::CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_string) { + if (UNLIKELY(!CheckIndex(type_idx, dex_file_->NumTypeIds(), error_string))) { + return nullptr; + } + const DexFile::TypeId& type_id = dex_file_->GetTypeId(type_idx); + uint32_t idx = type_id.descriptor_idx_; + return CheckLoadStringByIdx(idx, error_string); +} + +const DexFile::FieldId* DexFileVerifier::CheckLoadFieldId(uint32_t idx, const char* error_string) { + if (UNLIKELY(!CheckIndex(idx, dex_file_->NumFieldIds(), error_string))) { + return nullptr; + } + return &dex_file_->GetFieldId(idx); +} + +const DexFile::MethodId* DexFileVerifier::CheckLoadMethodId(uint32_t idx, const char* err_string) { + if (UNLIKELY(!CheckIndex(idx, dex_file_->NumMethodIds(), err_string))) { + return nullptr; + } + return &dex_file_->GetMethodId(idx); +} + +// Helper macro to load string and return false on error. +#define LOAD_STRING(var, idx, error) \ + const char* var = CheckLoadStringByIdx(idx, error); \ + if (UNLIKELY(var == nullptr)) { \ + return false; \ + } + +// Helper macro to load string by type idx and return false on error. +#define LOAD_STRING_BY_TYPE(var, type_idx, error) \ + const char* var = CheckLoadStringByTypeIdx(type_idx, error); \ + if (UNLIKELY(var == nullptr)) { \ + return false; \ + } + +// Helper macro to load method id. Return last parameter on error. +#define LOAD_METHOD(var, idx, error_string, error_val) \ + const DexFile::MethodId* var = CheckLoadMethodId(idx, error_string); \ + if (UNLIKELY(var == nullptr)) { \ + return error_val; \ + } + +// Helper macro to load method id. Return last parameter on error. +#define LOAD_FIELD(var, idx, fmt, error_val) \ + const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \ + if (UNLIKELY(var == nullptr)) { \ + return error_val; \ + } + bool DexFileVerifier::Verify(const DexFile* dex_file, const byte* begin, size_t size, const char* location, std::string* error_msg) { std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location)); @@ -1320,41 +1378,43 @@ bool DexFileVerifier::CheckOffsetToTypeMap(size_t offset, uint16_t type) { return true; } -uint16_t DexFileVerifier::FindFirstClassDataDefiner(const byte* ptr) const { +uint32_t DexFileVerifier::FindFirstClassDataDefiner(const byte* ptr) { ClassDataItemIterator it(*dex_file_, ptr); if (it.HasNextStaticField() || it.HasNextInstanceField()) { - const DexFile::FieldId& field = dex_file_->GetFieldId(it.GetMemberIndex()); - return field.class_idx_; + LOAD_FIELD(field, it.GetMemberIndex(), "first_class_data_definer field_id", 0x10000U) + return field->class_idx_; } if (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) { - const DexFile::MethodId& method = dex_file_->GetMethodId(it.GetMemberIndex()); - return method.class_idx_; + LOAD_METHOD(method, it.GetMemberIndex(), "first_class_data_definer method_id", 0x10000U) + return method->class_idx_; } return DexFile::kDexNoIndex16; } -uint16_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const byte* ptr) const { +uint32_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const byte* ptr) { const DexFile::AnnotationsDirectoryItem* item = reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr); if (item->fields_size_ != 0) { DexFile::FieldAnnotationsItem* field_items = (DexFile::FieldAnnotationsItem*) (item + 1); - const DexFile::FieldId& field = dex_file_->GetFieldId(field_items[0].field_idx_); - return field.class_idx_; + LOAD_FIELD(field, field_items[0].field_idx_, "first_annotations_dir_definer field_id", 0x10000U) + return field->class_idx_; } if (item->methods_size_ != 0) { DexFile::MethodAnnotationsItem* method_items = (DexFile::MethodAnnotationsItem*) (item + 1); - const DexFile::MethodId& method = dex_file_->GetMethodId(method_items[0].method_idx_); - return method.class_idx_; + LOAD_METHOD(method, method_items[0].method_idx_, "first_annotations_dir_definer method id", + 0x10000U) + return method->class_idx_; } if (item->parameters_size_ != 0) { DexFile::ParameterAnnotationsItem* parameter_items = (DexFile::ParameterAnnotationsItem*) (item + 1); - const DexFile::MethodId& method = dex_file_->GetMethodId(parameter_items[0].method_idx_); - return method.class_idx_; + LOAD_METHOD(method, parameter_items[0].method_idx_, "first_annotations_dir_definer method id", + 0x10000U) + return method->class_idx_; } return DexFile::kDexNoIndex16; @@ -1385,7 +1445,8 @@ bool DexFileVerifier::CheckInterStringIdItem() { bool DexFileVerifier::CheckInterTypeIdItem() { const DexFile::TypeId* item = reinterpret_cast<const DexFile::TypeId*>(ptr_); - const char* descriptor = dex_file_->StringDataByIdx(item->descriptor_idx_); + + LOAD_STRING(descriptor, item->descriptor_idx_, "inter_type_id_item descriptor_idx") // Check that the descriptor is a valid type. if (UNLIKELY(!IsValidDescriptor(descriptor))) { @@ -1409,14 +1470,17 @@ bool DexFileVerifier::CheckInterTypeIdItem() { bool DexFileVerifier::CheckInterProtoIdItem() { const DexFile::ProtoId* item = reinterpret_cast<const DexFile::ProtoId*>(ptr_); - const char* shorty = dex_file_->StringDataByIdx(item->shorty_idx_); + + LOAD_STRING(shorty, item->shorty_idx_, "inter_proto_id_item shorty_idx") + if (item->parameters_off_ != 0 && !CheckOffsetToTypeMap(item->parameters_off_, DexFile::kDexTypeTypeList)) { return false; } // Check the return type and advance the shorty. - if (!CheckShortyDescriptorMatch(*shorty, dex_file_->StringByTypeIdx(item->return_type_idx_), true)) { + LOAD_STRING_BY_TYPE(return_type, item->return_type_idx_, "inter_proto_id_item return_type_idx") + if (!CheckShortyDescriptorMatch(*shorty, return_type, true)) { return false; } shorty++; @@ -1477,21 +1541,21 @@ bool DexFileVerifier::CheckInterFieldIdItem() { const DexFile::FieldId* item = reinterpret_cast<const DexFile::FieldId*>(ptr_); // Check that the class descriptor is valid. - const char* descriptor = dex_file_->StringByTypeIdx(item->class_idx_); - if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) { - ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", descriptor); + LOAD_STRING_BY_TYPE(class_descriptor, item->class_idx_, "inter_field_id_item class_idx") + if (UNLIKELY(!IsValidDescriptor(class_descriptor) || class_descriptor[0] != 'L')) { + ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", class_descriptor); return false; } // Check that the type descriptor is a valid field name. - descriptor = dex_file_->StringByTypeIdx(item->type_idx_); - if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] == 'V')) { - ErrorStringPrintf("Invalid descriptor for type_idx: '%s'", descriptor); + LOAD_STRING_BY_TYPE(type_descriptor, item->type_idx_, "inter_field_id_item type_idx") + if (UNLIKELY(!IsValidDescriptor(type_descriptor) || type_descriptor[0] == 'V')) { + ErrorStringPrintf("Invalid descriptor for type_idx: '%s'", type_descriptor); return false; } // Check that the name is valid. - descriptor = dex_file_->StringDataByIdx(item->name_idx_); + LOAD_STRING(descriptor, item->name_idx_, "inter_field_id_item name_idx") if (UNLIKELY(!IsValidMemberName(descriptor))) { ErrorStringPrintf("Invalid field name: '%s'", descriptor); return false; @@ -1524,19 +1588,26 @@ bool DexFileVerifier::CheckInterMethodIdItem() { const DexFile::MethodId* item = reinterpret_cast<const DexFile::MethodId*>(ptr_); // Check that the class descriptor is a valid reference name. - const char* descriptor = dex_file_->StringByTypeIdx(item->class_idx_); - if (UNLIKELY(!IsValidDescriptor(descriptor) || (descriptor[0] != 'L' && descriptor[0] != '['))) { - ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", descriptor); + LOAD_STRING_BY_TYPE(class_descriptor, item->class_idx_, "inter_method_id_item class_idx") + if (UNLIKELY(!IsValidDescriptor(class_descriptor) || (class_descriptor[0] != 'L' && + class_descriptor[0] != '['))) { + ErrorStringPrintf("Invalid descriptor for class_idx: '%s'", class_descriptor); return false; } // Check that the name is valid. - descriptor = dex_file_->StringDataByIdx(item->name_idx_); + LOAD_STRING(descriptor, item->name_idx_, "inter_method_id_item name_idx") if (UNLIKELY(!IsValidMemberName(descriptor))) { ErrorStringPrintf("Invalid method name: '%s'", descriptor); return false; } + // Check that the proto id is valid. + if (UNLIKELY(!CheckIndex(item->proto_idx_, dex_file_->NumProtoIds(), + "inter_method_id_item proto_idx"))) { + return false; + } + // Check ordering between items. This relies on the other sections being in order. if (previous_item_ != NULL) { const DexFile::MethodId* prev_item = reinterpret_cast<const DexFile::MethodId*>(previous_item_); @@ -1562,11 +1633,10 @@ bool DexFileVerifier::CheckInterMethodIdItem() { bool DexFileVerifier::CheckInterClassDefItem() { const DexFile::ClassDef* item = reinterpret_cast<const DexFile::ClassDef*>(ptr_); - uint32_t class_idx = item->class_idx_; - const char* descriptor = dex_file_->StringByTypeIdx(class_idx); - if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) { - ErrorStringPrintf("Invalid class descriptor: '%s'", descriptor); + LOAD_STRING_BY_TYPE(class_descriptor, item->class_idx_, "inter_class_def_item class_idx") + if (UNLIKELY(!IsValidDescriptor(class_descriptor) || class_descriptor[0] != 'L')) { + ErrorStringPrintf("Invalid class descriptor: '%s'", class_descriptor); return false; } @@ -1588,9 +1658,10 @@ bool DexFileVerifier::CheckInterClassDefItem() { } if (item->superclass_idx_ != DexFile::kDexNoIndex16) { - descriptor = dex_file_->StringByTypeIdx(item->superclass_idx_); - if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) { - ErrorStringPrintf("Invalid superclass: '%s'", descriptor); + LOAD_STRING_BY_TYPE(superclass_descriptor, item->superclass_idx_, + "inter_class_def_item superclass_idx") + if (UNLIKELY(!IsValidDescriptor(superclass_descriptor) || superclass_descriptor[0] != 'L')) { + ErrorStringPrintf("Invalid superclass: '%s'", superclass_descriptor); return false; } } @@ -1601,9 +1672,10 @@ bool DexFileVerifier::CheckInterClassDefItem() { // Ensure that all interfaces refer to classes (not arrays or primitives). for (uint32_t i = 0; i < size; i++) { - descriptor = dex_file_->StringByTypeIdx(interfaces->GetTypeItem(i).type_idx_); - if (UNLIKELY(!IsValidDescriptor(descriptor) || descriptor[0] != 'L')) { - ErrorStringPrintf("Invalid interface: '%s'", descriptor); + LOAD_STRING_BY_TYPE(inf_descriptor, interfaces->GetTypeItem(i).type_idx_, + "inter_class_def_item interface type_idx") + if (UNLIKELY(!IsValidDescriptor(inf_descriptor) || inf_descriptor[0] != 'L')) { + ErrorStringPrintf("Invalid interface: '%s'", inf_descriptor); return false; } } @@ -1627,7 +1699,10 @@ bool DexFileVerifier::CheckInterClassDefItem() { // Check that references in class_data_item are to the right class. if (item->class_data_off_ != 0) { const byte* data = begin_ + item->class_data_off_; - uint16_t data_definer = FindFirstClassDataDefiner(data); + uint32_t data_definer = FindFirstClassDataDefiner(data); + if (data_definer >= 0x10000U) { + return false; + } if (UNLIKELY((data_definer != item->class_idx_) && (data_definer != DexFile::kDexNoIndex16))) { ErrorStringPrintf("Invalid class_data_item"); return false; @@ -1637,7 +1712,10 @@ bool DexFileVerifier::CheckInterClassDefItem() { // Check that references in annotations_directory_item are to right class. if (item->annotations_off_ != 0) { const byte* data = begin_ + item->annotations_off_; - uint16_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data); + uint32_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data); + if (annotations_definer >= 0x10000U) { + return false; + } if (UNLIKELY((annotations_definer != item->class_idx_) && (annotations_definer != DexFile::kDexNoIndex16))) { ErrorStringPrintf("Invalid annotations_directory_item"); @@ -1699,11 +1777,14 @@ bool DexFileVerifier::CheckInterAnnotationSetItem() { bool DexFileVerifier::CheckInterClassDataItem() { ClassDataItemIterator it(*dex_file_, ptr_); - uint16_t defining_class = FindFirstClassDataDefiner(ptr_); + uint32_t defining_class = FindFirstClassDataDefiner(ptr_); + if (defining_class >= 0x10000U) { + return false; + } for (; it.HasNextStaticField() || it.HasNextInstanceField(); it.Next()) { - const DexFile::FieldId& field = dex_file_->GetFieldId(it.GetMemberIndex()); - if (UNLIKELY(field.class_idx_ != defining_class)) { + LOAD_FIELD(field, it.GetMemberIndex(), "inter_class_data_item field_id", false) + if (UNLIKELY(field->class_idx_ != defining_class)) { ErrorStringPrintf("Mismatched defining class for class_data_item field"); return false; } @@ -1713,8 +1794,8 @@ bool DexFileVerifier::CheckInterClassDataItem() { if (code_off != 0 && !CheckOffsetToTypeMap(code_off, DexFile::kDexTypeCodeItem)) { return false; } - const DexFile::MethodId& method = dex_file_->GetMethodId(it.GetMemberIndex()); - if (UNLIKELY(method.class_idx_ != defining_class)) { + LOAD_METHOD(method, it.GetMemberIndex(), "inter_class_data_item method_id", false) + if (UNLIKELY(method->class_idx_ != defining_class)) { ErrorStringPrintf("Mismatched defining class for class_data_item method"); return false; } @@ -1727,7 +1808,10 @@ bool DexFileVerifier::CheckInterClassDataItem() { bool DexFileVerifier::CheckInterAnnotationsDirectoryItem() { const DexFile::AnnotationsDirectoryItem* item = reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr_); - uint16_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_); + uint32_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_); + if (defining_class >= 0x10000U) { + return false; + } if (item->class_annotations_off_ != 0 && !CheckOffsetToTypeMap(item->class_annotations_off_, DexFile::kDexTypeAnnotationSetItem)) { @@ -1739,8 +1823,8 @@ bool DexFileVerifier::CheckInterAnnotationsDirectoryItem() { reinterpret_cast<const DexFile::FieldAnnotationsItem*>(item + 1); uint32_t field_count = item->fields_size_; for (uint32_t i = 0; i < field_count; i++) { - const DexFile::FieldId& field = dex_file_->GetFieldId(field_item->field_idx_); - if (UNLIKELY(field.class_idx_ != defining_class)) { + LOAD_FIELD(field, field_item->field_idx_, "inter_annotations_directory_item field_id", false) + if (UNLIKELY(field->class_idx_ != defining_class)) { ErrorStringPrintf("Mismatched defining class for field_annotation"); return false; } @@ -1755,8 +1839,9 @@ bool DexFileVerifier::CheckInterAnnotationsDirectoryItem() { reinterpret_cast<const DexFile::MethodAnnotationsItem*>(field_item); uint32_t method_count = item->methods_size_; for (uint32_t i = 0; i < method_count; i++) { - const DexFile::MethodId& method = dex_file_->GetMethodId(method_item->method_idx_); - if (UNLIKELY(method.class_idx_ != defining_class)) { + LOAD_METHOD(method, method_item->method_idx_, "inter_annotations_directory_item method_id", + false) + if (UNLIKELY(method->class_idx_ != defining_class)) { ErrorStringPrintf("Mismatched defining class for method_annotation"); return false; } @@ -1771,8 +1856,9 @@ bool DexFileVerifier::CheckInterAnnotationsDirectoryItem() { reinterpret_cast<const DexFile::ParameterAnnotationsItem*>(method_item); uint32_t parameter_count = item->parameters_size_; for (uint32_t i = 0; i < parameter_count; i++) { - const DexFile::MethodId& parameter_method = dex_file_->GetMethodId(parameter_item->method_idx_); - if (UNLIKELY(parameter_method.class_idx_ != defining_class)) { + LOAD_METHOD(parameter_method, parameter_item->method_idx_, + "inter_annotations_directory_item parameter method_id", false) + if (UNLIKELY(parameter_method->class_idx_ != defining_class)) { ErrorStringPrintf("Mismatched defining class for parameter_annotation"); return false; } diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h index 3337785428..7489dcde86 100644 --- a/runtime/dex_file_verifier.h +++ b/runtime/dex_file_verifier.h @@ -71,8 +71,11 @@ class DexFileVerifier { bool CheckIntraSection(); bool CheckOffsetToTypeMap(size_t offset, uint16_t type); - uint16_t FindFirstClassDataDefiner(const byte* ptr) const; - uint16_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr) const; + + // Note: the result type of the following methods is wider than that of the underlying index + // (16b vs 32b). This is so that we can define an error value (anything >= 2^16). + uint32_t FindFirstClassDataDefiner(const byte* ptr); + uint32_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr); bool CheckInterStringIdItem(); bool CheckInterTypeIdItem(); @@ -88,6 +91,16 @@ class DexFileVerifier { bool CheckInterSectionIterate(size_t offset, uint32_t count, uint16_t type); bool CheckInterSection(); + // Load a string by (type) index. Checks whether the index is in bounds, printing the error if + // not. If there is an error, nullptr is returned. + const char* CheckLoadStringByIdx(uint32_t idx, const char* error_fmt); + const char* CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_fmt); + + // Load a field/method Id by index. Checks whether the index is in bounds, printing the error if + // not. If there is an error, nullptr is returned. + const DexFile::FieldId* CheckLoadFieldId(uint32_t idx, const char* error_fmt); + const DexFile::MethodId* CheckLoadMethodId(uint32_t idx, const char* error_fmt); + void ErrorStringPrintf(const char* fmt, ...) __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR; diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc new file mode 100644 index 0000000000..d0ce00fc66 --- /dev/null +++ b/runtime/dex_file_verifier_test.cc @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_file_verifier.h" + +#include <memory> +#include "zlib.h" + +#include "common_runtime_test.h" +#include "base/macros.h" + +namespace art { + +class DexFileVerifierTest : public CommonRuntimeTest {}; + +static const byte kBase64Map[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 254, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // NOLINT + 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, // NOLINT + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, // NOLINT + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, // NOLINT + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255 +}; + +static inline byte* DecodeBase64(const char* src, size_t* dst_size) { + std::vector<byte> tmp; + uint32_t t = 0, y = 0; + int g = 3; + for (size_t i = 0; src[i] != '\0'; ++i) { + byte c = kBase64Map[src[i] & 0xFF]; + if (c == 255) continue; + // the final = symbols are read and used to trim the remaining bytes + if (c == 254) { + c = 0; + // prevent g < 0 which would potentially allow an overflow later + if (--g < 0) { + *dst_size = 0; + return nullptr; + } + } else if (g != 3) { + // we only allow = to be at the end + *dst_size = 0; + return nullptr; + } + t = (t << 6) | c; + if (++y == 4) { + tmp.push_back((t >> 16) & 255); + if (g > 1) { + tmp.push_back((t >> 8) & 255); + } + if (g > 2) { + tmp.push_back(t & 255); + } + y = t = 0; + } + } + if (y != 0) { + *dst_size = 0; + return nullptr; + } + std::unique_ptr<byte[]> dst(new byte[tmp.size()]); + if (dst_size != nullptr) { + *dst_size = tmp.size(); + } else { + *dst_size = 0; + } + std::copy(tmp.begin(), tmp.end(), dst.get()); + return dst.release(); +} + +static const DexFile* OpenDexFileBase64(const char* base64, const char* location, + std::string* error_msg) { + // decode base64 + CHECK(base64 != NULL); + size_t length; + std::unique_ptr<byte[]> dex_bytes(DecodeBase64(base64, &length)); + CHECK(dex_bytes.get() != NULL); + + // write to provided file + std::unique_ptr<File> file(OS::CreateEmptyFile(location)); + CHECK(file.get() != NULL); + if (!file->WriteFully(dex_bytes.get(), length)) { + PLOG(FATAL) << "Failed to write base64 as dex file"; + } + file.reset(); + + // read dex file + ScopedObjectAccess soa(Thread::Current()); + return DexFile::Open(location, location, error_msg); +} + + +// For reference. +static const char kGoodTestDex[] = + "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN" + "AAAAcAAAAAYAAACkAAAAAgAAALwAAAABAAAA1AAAAAQAAADcAAAAAQAAAPwAAACIAQAAHAEAAFoB" + "AABiAQAAagEAAIEBAACVAQAAqQEAAL0BAADDAQAAzgEAANEBAADVAQAA2gEAAN8BAAABAAAAAgAA" + "AAMAAAAEAAAABQAAAAgAAAAIAAAABQAAAAAAAAAJAAAABQAAAFQBAAAEAAEACwAAAAAAAAAAAAAA" + "AAAAAAoAAAABAAEADAAAAAIAAAAAAAAAAAAAAAEAAAACAAAAAAAAAAcAAAAAAAAA8wEAAAAAAAAB" + "AAEAAQAAAOgBAAAEAAAAcBADAAAADgACAAAAAgAAAO0BAAAIAAAAYgAAABoBBgBuIAIAEAAOAAEA" + "AAADAAY8aW5pdD4ABkxUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABJMamF2YS9sYW5nL09i" + "amVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwASTGphdmEvbGFuZy9TeXN0ZW07AARUZXN0AAlUZXN0" + "LmphdmEAAVYAAlZMAANmb28AA291dAAHcHJpbnRsbgABAAcOAAMABw54AAAAAgAAgYAEnAIBCbQC" + "AAAADQAAAAAAAAABAAAAAAAAAAEAAAANAAAAcAAAAAIAAAAGAAAApAAAAAMAAAACAAAAvAAAAAQA" + "AAABAAAA1AAAAAUAAAAEAAAA3AAAAAYAAAABAAAA/AAAAAEgAAACAAAAHAEAAAEQAAABAAAAVAEA" + "AAIgAAANAAAAWgEAAAMgAAACAAAA6AEAAAAgAAABAAAA8wEAAAAQAAABAAAABAIAAA=="; + +TEST_F(DexFileVerifierTest, GoodDex) { + ScratchFile tmp; + std::string error_msg; + std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kGoodTestDex, tmp.GetFilename().c_str(), + &error_msg)); + ASSERT_TRUE(raw.get() != nullptr) << error_msg; +} + +static void FixUpChecksum(byte* dex_file) { + DexFile::Header* header = reinterpret_cast<DexFile::Header*>(dex_file); + uint32_t expected_size = header->file_size_; + uint32_t adler_checksum = adler32(0L, Z_NULL, 0); + const uint32_t non_sum = sizeof(DexFile::Header::magic_) + sizeof(DexFile::Header::checksum_); + const byte* non_sum_ptr = dex_file + non_sum; + adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum); + header->checksum_ = adler_checksum; +} + +static const DexFile* FixChecksumAndOpen(byte* bytes, size_t length, const char* location, + std::string* error_msg) { + // Check data. + CHECK(bytes != nullptr); + + // Fixup of checksum. + FixUpChecksum(bytes); + + // write to provided file + std::unique_ptr<File> file(OS::CreateEmptyFile(location)); + CHECK(file.get() != NULL); + if (!file->WriteFully(bytes, length)) { + PLOG(FATAL) << "Failed to write base64 as dex file"; + } + file.reset(); + + // read dex file + ScopedObjectAccess soa(Thread::Current()); + return DexFile::Open(location, location, error_msg); +} + +static bool ModifyAndLoad(const char* location, size_t offset, uint8_t new_val, + std::string* error_msg) { + // Decode base64. + size_t length; + std::unique_ptr<byte[]> dex_bytes(DecodeBase64(kGoodTestDex, &length)); + CHECK(dex_bytes.get() != NULL); + + // Make modifications. + dex_bytes.get()[offset] = new_val; + + // Fixup and load. + std::unique_ptr<const DexFile> file(FixChecksumAndOpen(dex_bytes.get(), length, location, + error_msg)); + return file.get() != nullptr; +} + +TEST_F(DexFileVerifierTest, MethodId) { + { + // Class error. + ScratchFile tmp; + std::string error_msg; + bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg); + ASSERT_TRUE(success); + ASSERT_NE(error_msg.find("inter_method_id_item class_idx"), std::string::npos) << error_msg; + } + + { + // Proto error. + ScratchFile tmp; + std::string error_msg; + bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg); + ASSERT_TRUE(success); + ASSERT_NE(error_msg.find("inter_method_id_item proto_idx"), std::string::npos) << error_msg; + } + + { + // Name error. + ScratchFile tmp; + std::string error_msg; + bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg); + ASSERT_TRUE(success); + ASSERT_NE(error_msg.find("inter_method_id_item name_idx"), std::string::npos) << error_msg; + } +} + +} // namespace art diff --git a/runtime/reflection.cc b/runtime/reflection.cc index 89cdb4dc7e..fe5e1043a9 100644 --- a/runtime/reflection.cc +++ b/runtime/reflection.cc @@ -815,6 +815,10 @@ bool UnboxPrimitiveForResult(const ThrowLocation& throw_location, mirror::Object bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags) { NthCallerVisitor visitor(Thread::Current(), 2); visitor.WalkStack(); + if (UNLIKELY(visitor.caller == nullptr)) { + // The caller is an attached native thread. + return (access_flags & kAccPublic) != 0; + } mirror::Class* caller_class = visitor.caller->GetDeclaringClass(); if (((access_flags & kAccPublic) != 0) || (caller_class == declaring_class)) { diff --git a/runtime/utils.h b/runtime/utils.h index 6a4198fcfc..6d52459ec8 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -140,9 +140,8 @@ struct TypeIdentity { template<typename T> static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) { return - // DCHECK(IsPowerOfTwo(n)) in a form acceptable in a constexpr function: - (kIsDebugBuild && !IsPowerOfTwo(n)) ? (LOG(FATAL) << n << " isn't a power of 2", T(0)) - : (x & -n); + DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0)) + (x & -n); } template<typename T> diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index e5dcbb0ac4..9d1f6f4de1 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -1334,6 +1334,31 @@ bool MethodVerifier::CodeFlowVerifyMethod() { insn_flags_[insn_idx].ClearChanged(); } + // When we're in compiler mode, do not accept quickened instructions. + // We explicitly iterate over *all* instructions to check code that may be unreachable and + // missed by the loop above. + if (Runtime::Current() != nullptr && Runtime::Current()->IsCompiler()) { + uint32_t insn_idx = 0; + for (; insn_idx < insns_size; insn_idx += insn_flags_[insn_idx].GetLengthInCodeUnits()) { + const Instruction* inst = Instruction::At(insns + insn_idx); + switch (inst->Opcode()) { + case Instruction::IGET_QUICK: + case Instruction::IGET_WIDE_QUICK: + case Instruction::IGET_OBJECT_QUICK: + case Instruction::IPUT_QUICK: + case Instruction::IPUT_WIDE_QUICK: + case Instruction::IPUT_OBJECT_QUICK: + case Instruction::INVOKE_VIRTUAL_QUICK: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Quickened instructions not allowed. "; + return false; + + default: + break; + } + } + } + if (gDebugVerify) { /* * Scan for dead code. There's nothing "evil" about dead code diff --git a/test/404-optimizing-allocator/expected.txt b/test/404-optimizing-allocator/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/404-optimizing-allocator/expected.txt diff --git a/test/404-optimizing-allocator/info.txt b/test/404-optimizing-allocator/info.txt new file mode 100644 index 0000000000..930d42f3f1 --- /dev/null +++ b/test/404-optimizing-allocator/info.txt @@ -0,0 +1 @@ +Initial tests for testing the optimizing compiler's register allocator. diff --git a/test/404-optimizing-allocator/src/Main.java b/test/404-optimizing-allocator/src/Main.java new file mode 100644 index 0000000000..60477f9d8e --- /dev/null +++ b/test/404-optimizing-allocator/src/Main.java @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Note that $opt$reg$ is a marker for the optimizing compiler to ensure +// it does use its register allocator. + +public class Main { + public static void main(String[] args) { + + expectEquals(4, $opt$reg$TestLostCopy()); + expectEquals(-10, $opt$reg$TestTwoLive()); + expectEquals(-20, $opt$reg$TestThreeLive()); + expectEquals(5, $opt$reg$TestFourLive()); + expectEquals(10, $opt$reg$TestMultipleLive()); + expectEquals(1, $opt$reg$TestWithBreakAndContinue()); + expectEquals(-15, $opt$reg$testSpillInIf(5, 6, 7)); + expectEquals(-567, $opt$reg$TestAgressiveLive(1, 2, 3, 4, 5, 6, 7)); + } + + public static int $opt$reg$TestLostCopy() { + int a = 0; + int b = 0; + do { + b = a; + a++; + } while (a != 5); + return b; + } + + public static int $opt$reg$TestTwoLive() { + int a = 0; + int b = 0; + do { + a++; + b += 3; + } while (a != 5); + return a - b; + } + + public static int $opt$reg$TestThreeLive() { + int a = 0; + int b = 0; + int c = 0; + do { + a++; + b += 3; + c += 2; + } while (a != 5); + return a - b - c; + } + + public static int $opt$reg$TestFourLive() { + int a = 0; + int b = 0; + int c = 0; + int d = 0; + do { + a++; + b += 3; + c += 2; + d++; + } while (a != 5); + return d; + } + + public static int $opt$reg$TestMultipleLive() { + int a = 0; + int b = 0; + int c = 0; + int d = 0; + int e = 0; + int f = 0; + int g = 0; + do { + a++; + b++; + c++; + d++; + e += 3; + f += 2; + g += 2; + } while (a != 5); + return f; + } + + public static int $opt$reg$TestWithBreakAndContinue() { + int a = 0; + int b = 0; + do { + a++; + if (a == 2) { + continue; + } + b++; + if (a == 5) { + break; + } + } while (true); + return a - b; + } + + public static int $opt$reg$testSpillInIf(int a, int b, int c) { + int d = 0; + int e = 0; + if (a == 5) { + b++; + c++; + d += 2; + e += 3; + } + + return a - b - c - d - e; + } + + public static int $opt$reg$TestAgressiveLive(int a, int b, int c, int d, int e, int f, int g) { + int h = a - b; + int i = c - d; + int j = e - f; + int k = 42 + g - a; + do { + b++; + while (k != 1) { + --k; + ++i; + if (i == 9) { + ++i; + } + j += 5; + } + k = 9; + h++; + } while (h != 5); + return a - b - c - d - e - f - g - h - i - j - k; + } + + public static void expectEquals(int expected, int value) { + if (expected != value) { + throw new Error("Expected: " + expected + ", got: " + value); + } + } +} diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java index 3c4ed3505f..33418a98b2 100644 --- a/test/JniTest/JniTest.java +++ b/test/JniTest/JniTest.java @@ -21,6 +21,7 @@ class JniTest { System.loadLibrary("arttest"); testFindClassOnAttachedNativeThread(); testFindFieldOnAttachedNativeThread(); + testReflectFieldGetFromAttachedNativeThreadNative(); testCallStaticVoidMethodOnSubClass(); testGetMirandaMethod(); testZeroLengthByteBuffers(); @@ -34,6 +35,10 @@ class JniTest { private static boolean testFindFieldOnAttachedNativeThreadField; + private static native void testReflectFieldGetFromAttachedNativeThreadNative(); + + public static boolean testReflectFieldGetFromAttachedNativeThreadField; + private static void testFindFieldOnAttachedNativeThread() { testFindFieldOnAttachedNativeThreadNative(); if (!testFindFieldOnAttachedNativeThreadField) { diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc index 024ba53708..36cad72ace 100644 --- a/test/JniTest/jni_test.cc +++ b/test/JniTest/jni_test.cc @@ -103,6 +103,66 @@ extern "C" JNIEXPORT void JNICALL Java_JniTest_testFindFieldOnAttachedNativeThre assert(pthread_join_result == 0); } +static void* testReflectFieldGetFromAttachedNativeThread(void*) { + assert(jvm != NULL); + + JNIEnv* env = NULL; + JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL }; + int attach_result = jvm->AttachCurrentThread(&env, &args); + assert(attach_result == 0); + + jclass clazz = env->FindClass("JniTest"); + assert(clazz != NULL); + assert(!env->ExceptionCheck()); + + jclass class_clazz = env->FindClass("java/lang/Class"); + assert(class_clazz != NULL); + assert(!env->ExceptionCheck()); + + jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField", + "(Ljava/lang/String;)Ljava/lang/reflect/Field;"); + assert(getFieldMetodId != NULL); + assert(!env->ExceptionCheck()); + + jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField"); + assert(field_name != NULL); + assert(!env->ExceptionCheck()); + + jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name); + assert(field != NULL); + assert(!env->ExceptionCheck()); + + jclass field_clazz = env->FindClass("java/lang/reflect/Field"); + assert(field_clazz != NULL); + assert(!env->ExceptionCheck()); + + jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean", + "(Ljava/lang/Object;)Z"); + assert(getBooleanMetodId != NULL); + assert(!env->ExceptionCheck()); + + jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz); + assert(value == false); + assert(!env->ExceptionCheck()); + + int detach_result = jvm->DetachCurrentThread(); + assert(detach_result == 0); + return NULL; +} + +// http://b/15539150 +extern "C" JNIEXPORT void JNICALL Java_JniTest_testReflectFieldGetFromAttachedNativeThreadNative( + JNIEnv*, jclass) { + pthread_t pthread; + int pthread_create_result = pthread_create(&pthread, + NULL, + testReflectFieldGetFromAttachedNativeThread, + NULL); + assert(pthread_create_result == 0); + int pthread_join_result = pthread_join(pthread, NULL); + assert(pthread_join_result == 0); +} + // http://b/11243757 extern "C" JNIEXPORT void JNICALL Java_JniTest_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env, |