diff options
51 files changed, 1766 insertions, 792 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 1bb1d563d6..4c2cda47f8 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -34,6 +34,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/base/unix_file/string_file_test.cc \ runtime/class_linker_test.cc \ runtime/dex_file_test.cc \ + runtime/dex_file_verifier_test.cc \ runtime/dex_instruction_visitor_test.cc \ runtime/dex_method_iterator_test.cc \ runtime/entrypoints/math_entrypoints_test.cc \ diff --git a/compiler/Android.mk b/compiler/Android.mk index 9a868fcd79..4f9f31251f 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -48,6 +48,7 @@ LIBART_COMPILER_SRC_FILES := \ dex/quick/mips/utility_mips.cc \ dex/quick/mir_to_lir.cc \ dex/quick/ralloc_util.cc \ + dex/quick/resource_mask.cc \ dex/quick/x86/assemble_x86.cc \ dex/quick/x86/call_x86.cc \ dex/quick/x86/fp_x86.cc \ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index f0b47878e6..55a4c7815e 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -82,22 +82,6 @@ enum BBType { kDead, }; -/* - * Def/Use encoding in 64-bit use_mask/def_mask. Low positions used for target-specific - * registers (and typically use the register number as the position). High positions - * reserved for common and abstract resources. - */ - -enum ResourceEncodingPos { - kMustNotAlias = 63, - kHeapRef = 62, // Default memory reference type. - kLiteral = 61, // Literal pool memory reference. - kDalvikReg = 60, // Dalvik v_reg memory reference. - kFPStatus = 59, - kCCode = 58, - kLowestCommonResource = kCCode -}; - // Shared pseudo opcodes - must be < 0. enum LIRPseudoOpcode { kPseudoExportedPC = -16, diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index d5443972c9..d45379712b 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -143,13 +143,25 @@ int arm64_support_list[] = { Instruction::MOVE, Instruction::MOVE_FROM16, Instruction::MOVE_16, + Instruction::MOVE_WIDE, + Instruction::MOVE_WIDE_FROM16, + Instruction::MOVE_WIDE_16, + Instruction::MOVE_OBJECT, + Instruction::MOVE_OBJECT_FROM16, + Instruction::MOVE_OBJECT_16, Instruction::MOVE_EXCEPTION, Instruction::RETURN_VOID, Instruction::RETURN, Instruction::RETURN_WIDE, + Instruction::RETURN_OBJECT, Instruction::CONST_4, Instruction::CONST_16, Instruction::CONST, + Instruction::CONST_HIGH16, + Instruction::CONST_WIDE_16, + Instruction::CONST_WIDE_32, + Instruction::CONST_WIDE, + Instruction::CONST_WIDE_HIGH16, Instruction::CONST_STRING, Instruction::MONITOR_ENTER, Instruction::MONITOR_EXIT, @@ -159,6 +171,11 @@ int arm64_support_list[] = { Instruction::GOTO_32, Instruction::PACKED_SWITCH, Instruction::SPARSE_SWITCH, + Instruction::CMPL_FLOAT, + Instruction::CMPG_FLOAT, + Instruction::CMPL_DOUBLE, + Instruction::CMPG_DOUBLE, + Instruction::CMP_LONG, Instruction::IF_EQ, Instruction::IF_NE, Instruction::IF_LT, @@ -226,6 +243,81 @@ int arm64_support_list[] = { Instruction::SHL_INT_LIT8, Instruction::SHR_INT_LIT8, Instruction::USHR_INT_LIT8, + Instruction::SGET, + Instruction::SGET_BOOLEAN, + Instruction::SGET_BYTE, + Instruction::SGET_CHAR, + Instruction::SGET_SHORT, + Instruction::SGET_OBJECT, + Instruction::SPUT, + Instruction::SPUT_OBJECT, + Instruction::SPUT_BOOLEAN, + Instruction::SPUT_BYTE, + Instruction::SPUT_CHAR, + Instruction::SPUT_SHORT, + Instruction::CMPL_FLOAT, + Instruction::CMPG_FLOAT, + Instruction::IGET, + Instruction::IGET_OBJECT, + Instruction::IGET_BOOLEAN, + Instruction::IGET_BYTE, + Instruction::IGET_CHAR, + Instruction::IGET_SHORT, + Instruction::IPUT, + Instruction::IPUT_OBJECT, + Instruction::IPUT_BOOLEAN, + Instruction::IPUT_BYTE, + Instruction::IPUT_CHAR, + Instruction::IPUT_SHORT, + Instruction::NEG_LONG, + Instruction::NOT_LONG, + Instruction::NEG_DOUBLE, + Instruction::INT_TO_LONG, + Instruction::INT_TO_FLOAT, + Instruction::INT_TO_DOUBLE, + Instruction::LONG_TO_INT, + Instruction::LONG_TO_FLOAT, + Instruction::LONG_TO_DOUBLE, + Instruction::FLOAT_TO_INT, + Instruction::FLOAT_TO_LONG, + Instruction::FLOAT_TO_DOUBLE, + Instruction::DOUBLE_TO_INT, + Instruction::DOUBLE_TO_LONG, + Instruction::DOUBLE_TO_FLOAT, + Instruction::ADD_LONG, + Instruction::SUB_LONG, + Instruction::MUL_LONG, + Instruction::DIV_LONG, + Instruction::REM_LONG, + Instruction::AND_LONG, + Instruction::OR_LONG, + Instruction::XOR_LONG, + Instruction::SHL_LONG, + Instruction::SHR_LONG, + Instruction::USHR_LONG, + Instruction::REM_FLOAT, + Instruction::ADD_DOUBLE, + Instruction::SUB_DOUBLE, + Instruction::MUL_DOUBLE, + Instruction::DIV_DOUBLE, + Instruction::REM_DOUBLE, + Instruction::ADD_LONG_2ADDR, + Instruction::SUB_LONG_2ADDR, + Instruction::MUL_LONG_2ADDR, + Instruction::DIV_LONG_2ADDR, + Instruction::REM_LONG_2ADDR, + Instruction::AND_LONG_2ADDR, + Instruction::OR_LONG_2ADDR, + Instruction::XOR_LONG_2ADDR, + Instruction::SHL_LONG_2ADDR, + Instruction::SHR_LONG_2ADDR, + Instruction::USHR_LONG_2ADDR, + Instruction::REM_FLOAT_2ADDR, + Instruction::ADD_DOUBLE_2ADDR, + Instruction::SUB_DOUBLE_2ADDR, + Instruction::MUL_DOUBLE_2ADDR, + Instruction::DIV_DOUBLE_2ADDR, + Instruction::REM_DOUBLE_2ADDR, // TODO(Arm64): Enable compiler pass // ----- ExtendedMIROpcode ----- kMirOpPhi, @@ -244,21 +336,9 @@ int arm64_support_list[] = { kMirOpSelect, #if ARM64_USE_EXPERIMENTAL_OPCODES - Instruction::MOVE_WIDE, - Instruction::MOVE_WIDE_FROM16, - Instruction::MOVE_WIDE_16, - Instruction::MOVE_OBJECT, - Instruction::MOVE_OBJECT_FROM16, - Instruction::MOVE_OBJECT_16, // Instruction::MOVE_RESULT, // Instruction::MOVE_RESULT_WIDE, // Instruction::MOVE_RESULT_OBJECT, - // Instruction::RETURN_OBJECT, - // Instruction::CONST_HIGH16, - // Instruction::CONST_WIDE_16, - // Instruction::CONST_WIDE_32, - // Instruction::CONST_WIDE, - // Instruction::CONST_WIDE_HIGH16, // Instruction::CONST_STRING_JUMBO, // Instruction::CONST_CLASS, // Instruction::CHECK_CAST, @@ -269,11 +349,6 @@ int arm64_support_list[] = { // Instruction::FILLED_NEW_ARRAY, // Instruction::FILLED_NEW_ARRAY_RANGE, // Instruction::FILL_ARRAY_DATA, - Instruction::CMPL_FLOAT, - Instruction::CMPG_FLOAT, - Instruction::CMPL_DOUBLE, - Instruction::CMPG_DOUBLE, - Instruction::CMP_LONG, // Instruction::UNUSED_3E, // Instruction::UNUSED_3F, // Instruction::UNUSED_40, @@ -294,34 +369,10 @@ int arm64_support_list[] = { // Instruction::APUT_BYTE, // Instruction::APUT_CHAR, // Instruction::APUT_SHORT, - // Instruction::IGET, - // Instruction::IGET_WIDE, - // Instruction::IGET_OBJECT, - // Instruction::IGET_BOOLEAN, - // Instruction::IGET_BYTE, - // Instruction::IGET_CHAR, - // Instruction::IGET_SHORT, - // Instruction::IPUT, // Instruction::IPUT_WIDE, - // Instruction::IPUT_OBJECT, - // Instruction::IPUT_BOOLEAN, - // Instruction::IPUT_BYTE, - // Instruction::IPUT_CHAR, - // Instruction::IPUT_SHORT, - Instruction::SGET, + // Instruction::IGET_WIDE, // Instruction::SGET_WIDE, - Instruction::SGET_OBJECT, - // Instruction::SGET_BOOLEAN, - // Instruction::SGET_BYTE, - // Instruction::SGET_CHAR, - // Instruction::SGET_SHORT, - Instruction::SPUT, // Instruction::SPUT_WIDE, - // Instruction::SPUT_OBJECT, - // Instruction::SPUT_BOOLEAN, - // Instruction::SPUT_BYTE, - // Instruction::SPUT_CHAR, - // Instruction::SPUT_SHORT, Instruction::INVOKE_VIRTUAL, Instruction::INVOKE_SUPER, Instruction::INVOKE_DIRECT, @@ -335,55 +386,6 @@ int arm64_support_list[] = { // Instruction::INVOKE_INTERFACE_RANGE, // Instruction::UNUSED_79, // Instruction::UNUSED_7A, - Instruction::NEG_LONG, - Instruction::NOT_LONG, - Instruction::NEG_DOUBLE, - Instruction::INT_TO_LONG, - Instruction::INT_TO_FLOAT, - Instruction::INT_TO_DOUBLE, - Instruction::LONG_TO_INT, - Instruction::LONG_TO_FLOAT, - Instruction::LONG_TO_DOUBLE, - Instruction::FLOAT_TO_INT, - Instruction::FLOAT_TO_LONG, - Instruction::FLOAT_TO_DOUBLE, - Instruction::DOUBLE_TO_INT, - Instruction::DOUBLE_TO_LONG, - Instruction::DOUBLE_TO_FLOAT, - Instruction::ADD_LONG, - Instruction::SUB_LONG, - Instruction::MUL_LONG, - Instruction::DIV_LONG, - Instruction::REM_LONG, - Instruction::AND_LONG, - Instruction::OR_LONG, - Instruction::XOR_LONG, - Instruction::SHL_LONG, - Instruction::SHR_LONG, - Instruction::USHR_LONG, - // Instruction::REM_FLOAT, - Instruction::ADD_DOUBLE, - Instruction::SUB_DOUBLE, - Instruction::MUL_DOUBLE, - Instruction::DIV_DOUBLE, - // Instruction::REM_DOUBLE, - Instruction::ADD_LONG_2ADDR, - Instruction::SUB_LONG_2ADDR, - Instruction::MUL_LONG_2ADDR, - Instruction::DIV_LONG_2ADDR, - Instruction::REM_LONG_2ADDR, - Instruction::AND_LONG_2ADDR, - Instruction::OR_LONG_2ADDR, - Instruction::XOR_LONG_2ADDR, - Instruction::SHL_LONG_2ADDR, - Instruction::SHR_LONG_2ADDR, - Instruction::USHR_LONG_2ADDR, - // Instruction::REM_FLOAT_2ADDR, - Instruction::ADD_DOUBLE_2ADDR, - Instruction::SUB_DOUBLE_2ADDR, - Instruction::MUL_DOUBLE_2ADDR, - Instruction::DIV_DOUBLE_2ADDR, - // Instruction::REM_DOUBLE_2ADDR, // Instruction::IGET_QUICK, // Instruction::IGET_WIDE_QUICK, // Instruction::IGET_OBJECT_QUICK, diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index e32e7cb74f..6272555983 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -109,12 +109,6 @@ enum ArmResourceEncodingPos { kArmRegEnd = 48, }; -#define ENCODE_ARM_REG_LIST(N) (static_cast<uint64_t>(N)) -#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) -#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) -#define ENCODE_ARM_REG_PC (1ULL << kArmRegPC) -#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16) - enum ArmNativeRegisterPool { r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1, diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index db0731f1ee..5466abd11d 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -87,7 +87,7 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, tab_rec->anchor = switch_branch; // Needs to use setflags encoding here OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); OpCondBranch(kCondNe, target); } diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 9c801a520b..8db7d4ee73 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -63,7 +63,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -79,12 +79,13 @@ class ArmMir2Lir FINAL : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); @@ -204,6 +205,8 @@ class ArmMir2Lir FINAL : public Mir2Lir { void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); LIR* LoadFPConstantValue(int r_dest, int value); + LIR* LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement, + RegStorage r_src_dest, RegStorage r_work = RegStorage::InvalidReg()); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void AssignDataOffsets(); @@ -217,6 +220,10 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op); bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops); void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops); + + static constexpr ResourceMask GetRegMaskArm(RegStorage reg); + static constexpr ResourceMask EncodeArmRegList(int reg_list); + static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list); }; } // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 4732e52234..916c52838a 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -224,13 +224,13 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { bool cheap_false_val = InexpensiveConstantInt(false_val); if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, ""); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* it = OpIT(kCondLs, ""); LoadConstant(rl_result.reg, false_val); OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact @@ -882,14 +882,14 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { } FreeTemp(r_tmp_high); // Now unneeded - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondEq, "T"); NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); } else { NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0); OpRegReg(kOpSub, r_tmp, rl_expected.reg); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondEq, "T"); NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); } @@ -907,7 +907,7 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // result := (tmp1 != 0) ? 0 : 1; RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); it = OpIT(kCondUlt, ""); LoadConstant(rl_result.reg, 0); /* cc */ FreeTemp(r_tmp); // Now unneeded. @@ -971,7 +971,7 @@ LIR* ArmMir2Lir::OpTestSuspend(LIR* target) { LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { // Combine sub & test using sub setflags encoding here OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); return OpCondBranch(c_code, target); } @@ -1004,7 +1004,7 @@ bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. DCHECK(!barrier->flags.use_def_invalid); - barrier->u.m.def_mask = ENCODE_ALL; + barrier->u.m.def_mask = &kEncodeAll; return ret; #else return false; diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 3b30cde0d4..e1e2d5b749 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -135,30 +135,32 @@ RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { /* * Decode the register id. */ -uint64_t ArmMir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id = reg.GetRegNum(); - /* Each double register is equal to a pair of single-precision FP registers */ - if (reg.IsDouble()) { - seed = 0x3; - reg_id = reg_id << 1; - } else { - seed = 1; - } - /* FP register starts at bit position 16 */ - shift = reg.IsFloat() ? kArmFPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); +ResourceMask ArmMir2Lir::GetRegMaskCommon(const RegStorage& reg) const { + return GetRegMaskArm(reg); +} + +constexpr ResourceMask ArmMir2Lir::GetRegMaskArm(RegStorage reg) { + return reg.IsDouble() + /* Each double register is equal to a pair of single-precision FP registers */ + ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kArmFPReg0) + : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kArmFPReg0 : reg.GetRegNum()); +} + +constexpr ResourceMask ArmMir2Lir::EncodeArmRegList(int reg_list) { + return ResourceMask::RawMask(static_cast<uint64_t >(reg_list), 0u); +} + +constexpr ResourceMask ArmMir2Lir::EncodeArmRegFpcsList(int reg_list) { + return ResourceMask::RawMask(static_cast<uint64_t >(reg_list) << kArmFPReg16, 0u); } -uint64_t ArmMir2Lir::GetPCUseDefEncoding() { - return ENCODE_ARM_REG_PC; +ResourceMask ArmMir2Lir::GetPCUseDefEncoding() const { + return ResourceMask::Bit(kArmRegPC); } // Thumb2 specific setup. TODO: inline?: -void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK_EQ(cu_->instruction_set, kThumb2); DCHECK(!lir->flags.use_def_invalid); @@ -169,70 +171,70 @@ void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 | REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) { if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + def_mask->SetBit(kArmRegSP); } if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + use_mask->SetBit(kArmRegSP); } if (flags & REG_DEF_LIST0) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + def_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_DEF_LIST1) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + def_mask->SetBits(EncodeArmRegList(lir->operands[1])); } if (flags & REG_DEF_FPCS_LIST0) { - lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + def_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_DEF_FPCS_LIST2) { for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i); + SetupRegMask(def_mask, lir->operands[1] + i); } } if (flags & REG_USE_PC) { - lir->u.m.use_mask |= ENCODE_ARM_REG_PC; + use_mask->SetBit(kArmRegPC); } /* Conservatively treat the IT block */ if (flags & IS_IT) { - lir->u.m.def_mask = ENCODE_ALL; + *def_mask = kEncodeAll; } if (flags & REG_USE_LIST0) { - lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + use_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_USE_LIST1) { - lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + use_mask->SetBits(EncodeArmRegList(lir->operands[1])); } if (flags & REG_USE_FPCS_LIST0) { - lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + use_mask->SetBits(EncodeArmRegList(lir->operands[0])); } if (flags & REG_USE_FPCS_LIST2) { for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i); + SetupRegMask(use_mask, lir->operands[1] + i); } } /* Fixup for kThumbPush/lr and kThumbPop/pc */ if (opcode == kThumbPush || opcode == kThumbPop) { - uint64_t r8Mask = GetRegMaskCommon(rs_r8); - if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) { - lir->u.m.use_mask &= ~r8Mask; - lir->u.m.use_mask |= ENCODE_ARM_REG_LR; - } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) { - lir->u.m.def_mask &= ~r8Mask; - lir->u.m.def_mask |= ENCODE_ARM_REG_PC; + constexpr ResourceMask r8Mask = GetRegMaskArm(rs_r8); + if ((opcode == kThumbPush) && (use_mask->Intersects(r8Mask))) { + use_mask->ClearBits(r8Mask); + use_mask->SetBit(kArmRegLR); + } else if ((opcode == kThumbPop) && (def_mask->Intersects(r8Mask))) { + def_mask->ClearBits(r8Mask); + def_mask->SetBit(kArmRegPC);; } } if (flags & REG_DEF_LR) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LR; + def_mask->SetBit(kArmRegLR); } } } @@ -486,44 +488,44 @@ std::string ArmMir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char return buf; } -void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) { +void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; for (i = 0; i < kArmRegEnd; i++) { - if (mask & (1ULL << i)) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } - if (mask & ENCODE_FP_STATUS) { + if (mask.HasBit(ResourceMask::kFPStatus)) { strcat(buf, "fpcc "); } /* Memory bits */ - if (arm_lir && (mask & ENCODE_DALVIK_REG)) { + if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 86d32f4d72..61d3d56036 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -87,9 +87,9 @@ LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) { if (data_target == NULL) { data_target = AddWordData(&literal_list_, value); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target); - SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } @@ -670,6 +670,7 @@ LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (data_target == NULL) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); if (r_dest.IsFloat()) { res = RawLIR(current_dalvik_offset_, kThumb2Vldrd, r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target); @@ -678,7 +679,6 @@ LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8, r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target); } - SetMemRefType(res, true, kLiteral); AppendLIR(res); } return res; @@ -819,6 +819,30 @@ LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStor return store; } +// Helper function for LoadBaseDispBody()/StoreBaseDispBody(). +LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement, + RegStorage r_src_dest, RegStorage r_work) { + DCHECK_EQ(displacement & 3, 0); + int encoded_disp = (displacement & 1020) >> 2; // Within range of the instruction. + RegStorage r_ptr = r_base; + if ((displacement & ~1020) != 0) { + r_ptr = r_work.Valid() ? r_work : AllocTemp(); + // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB. + OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020); + } + LIR* lir = nullptr; + if (!r_src_dest.IsPair()) { + lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp); + } else { + lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(), + encoded_disp); + } + if ((displacement & ~1020) != 0 && !r_work.Valid()) { + FreeTemp(r_ptr); + } + return lir; +} + /* * Load value from base + displacement. Optionally perform null check * on base (which must have an associated s_reg and MIR). If not @@ -836,40 +860,26 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag switch (size) { case kDouble: // Intentional fall-though. - case k64: { - DCHECK_EQ(displacement & 3, 0); - encoded_disp = (displacement & 1020) >> 2; // Within range of kThumb2Vldrd/kThumb2LdrdI8. - RegStorage r_ptr = r_base; - if ((displacement & ~1020) != 0) { - // For core register load, use the r_dest.GetLow() for the temporary pointer. - r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow(); - // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB. - OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020); - } + case k64: if (r_dest.IsFloat()) { DCHECK(!r_dest.IsPair()); - load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp); + load = LoadStoreMaxDisp1020(kThumb2Vldrd, r_base, displacement, r_dest); } else { - load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(), - encoded_disp); - } - if ((displacement & ~1020) != 0 && r_dest.IsFloat()) { - FreeTemp(r_ptr); + DCHECK(r_dest.IsPair()); + // Use the r_dest.GetLow() for the temporary pointer if needed. + load = LoadStoreMaxDisp1020(kThumb2LdrdI8, r_base, displacement, r_dest, r_dest.GetLow()); } already_generated = true; break; - } case kSingle: // Intentional fall-though. case k32: // Intentional fall-though. case kReference: if (r_dest.IsFloat()) { - opcode = kThumb2Vldrs; - if (displacement <= 1020) { - short_form = true; - encoded_disp >>= 2; - } + DCHECK(r_dest.IsSingle()); + load = LoadStoreMaxDisp1020(kThumb2Vldrs, r_base, displacement, r_dest); + already_generated = true; break; } if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && @@ -934,19 +944,15 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag } else { RegStorage reg_offset = AllocTemp(); LoadConstant(reg_offset, encoded_disp); - if (r_dest.IsFloat()) { - // No index ops - must use a long sequence. Turn the offset into a direct pointer. - OpRegReg(kOpAdd, reg_offset, r_base); - load = LoadBaseDispBody(reg_offset, 0, r_dest, size); - } else { - load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); - } + DCHECK(!r_dest.IsFloat()); + load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); FreeTemp(reg_offset); } } // TODO: in future may need to differentiate Dalvik accesses w/ spills - if (r_base == rs_rARM_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rARM_SP); AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); } return load; @@ -992,28 +998,16 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora switch (size) { case kDouble: // Intentional fall-though. - case k64: { - DCHECK_EQ(displacement & 3, 0); - encoded_disp = (displacement & 1020) >> 2; // Within range of kThumb2Vstrd/kThumb2StrdI8. - RegStorage r_ptr = r_base; - if ((displacement & ~1020) != 0) { - r_ptr = AllocTemp(); - // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB. - OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020); - } + case k64: if (r_src.IsFloat()) { DCHECK(!r_src.IsPair()); - store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp); + store = LoadStoreMaxDisp1020(kThumb2Vstrd, r_base, displacement, r_src); } else { - store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(), - encoded_disp); - } - if ((displacement & ~1020) != 0) { - FreeTemp(r_ptr); + DCHECK(r_src.IsPair()); + store = LoadStoreMaxDisp1020(kThumb2StrdI8, r_base, displacement, r_src); } already_generated = true; break; - } case kSingle: // Intentional fall-through. case k32: @@ -1021,11 +1015,8 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora case kReference: if (r_src.IsFloat()) { DCHECK(r_src.IsSingle()); - opcode = kThumb2Vstrs; - if (displacement <= 1020) { - short_form = true; - encoded_disp >>= 2; - } + store = LoadStoreMaxDisp1020(kThumb2Vstrs, r_base, displacement, r_src); + already_generated = true; break; } if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { @@ -1073,19 +1064,15 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora } else { RegStorage r_scratch = AllocTemp(); LoadConstant(r_scratch, encoded_disp); - if (r_src.IsFloat()) { - // No index ops - must use a long sequence. Turn the offset into a direct pointer. - OpRegReg(kOpAdd, r_scratch, r_base); - store = StoreBaseDispBody(r_scratch, 0, r_src, size); - } else { - store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); - } + DCHECK(!r_src.IsFloat()); + store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); FreeTemp(r_scratch); } } // TODO: In future, may need to differentiate Dalvik & spill accesses - if (r_base == rs_rARM_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rARM_SP); AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); } return store; diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index 6a6b0f6a53..c1ce03deb1 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -102,17 +102,14 @@ namespace art { #define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp) #define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr) -enum ArmResourceEncodingPos { - kArmGPReg0 = 0, - kArmRegLR = 30, - kArmRegSP = 31, - kArmFPReg0 = 32, - kArmRegEnd = 64, +enum Arm64ResourceEncodingPos { + kArm64GPReg0 = 0, + kArm64RegLR = 30, + kArm64RegSP = 31, + kArm64FPReg0 = 32, + kArm64RegEnd = 64, }; -#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) -#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) - #define IS_SIGNED_IMM(size, value) \ ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1))) #define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value) @@ -212,7 +209,7 @@ enum A64RegExtEncodings { }; #define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0)) - +#define ENCODE_NO_EXTEND (EncodeExtend(kA64Uxtx, 0)) /* * The following enum defines the list of supported A64 instructions by the * assembler. Their corresponding EncodingMap positions will be defined in @@ -289,6 +286,7 @@ enum ArmOpcode { kA64Ldur3fXd, // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Ldur3rXd, // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Ldxr2rX, // ldxr[1s00100001011111011111] rn[9-5] rt[4-0]. + kA64Ldaxr2rX, // ldaxr[1s00100001011111111111] rn[9-5] rt[4-0]. kA64Lsl3rrr, // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0]. kA64Lsr3rrd, // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}". kA64Lsr3rrr, // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0]. @@ -328,8 +326,9 @@ enum ArmOpcode { kA64Stur3fXd, // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Stur3rXd, // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0]. kA64Stxr3wrX, // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0]. + kA64Stlxr3wrX, // stlxr[11001000000] rs[20-16] [111111] rn[9-5] rt[4-0]. kA64Sub4RRdT, // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0]. - kA64Sub4rrro, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. + kA64Sub4rrro, // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0]. kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3". kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. @@ -394,9 +393,6 @@ enum ArmEncodingKind { kFmtSkip, // Unused field, but continue to next. }; -// TODO(Arm64): should we get rid of kFmtExtend? -// Note: the only instructions that use it (cmp, cmn) are not used themselves. - // Struct used to define the snippet positions for each A64 opcode. struct ArmEncodingMap { uint32_t wskeleton; diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 4a0c055f4d..2c4f26216f 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -268,7 +268,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fmov", "!0s, !1w", kFixupNone), - ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000), + ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e670000), kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fmov", "!0S, !1x", kFixupNone), @@ -386,6 +386,10 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldxr", "!0r, [!1X]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldaxr2rX), SIZE_VARIANTS(0x885ffc00), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldaxr", "!0r, [!1X]", kFixupNone), ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000), kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, @@ -443,11 +447,11 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, "ret", "", kFixupNone), ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00), - kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1, + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "rev", "!0r, !1r", kFixupNone), - ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0), - kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1, + ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0x5ac00400), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "rev16", "!0r, !1r", kFixupNone), ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00), @@ -542,6 +546,10 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE, "stxr", "!0w, !1r, [!2X]", kFixupNone), + ENCODING_MAP(WIDE(kA64Stlxr3wrX), SIZE_VARIANTS(0x8800fc00), + kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE, + "stlxr", "!0w, !1r, [!2X]", kFixupNone), ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000), kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1, @@ -803,7 +811,7 @@ void Arm64Mir2Lir::AssembleLIR() { DCHECK(target_lir); CodeOffset pc = lir->offset; CodeOffset target = target_lir->offset + - ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) { LOG(FATAL) << "Invalid jump range in kFixupLoad"; diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index b85f5694d6..59eec3d486 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -301,12 +301,14 @@ void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) { * Mark garbage collection card. Skip if the value we're storing is null. */ void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { - RegStorage reg_card_base = AllocTemp(); + RegStorage reg_card_base = AllocTempWide(); RegStorage reg_card_no = AllocTemp(); LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base); OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); - StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte); + // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"? + StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base), + 0, kUnsignedByte); LIR* target = NewLIR0(kPseudoTargetLabel); branch_over->target = target; FreeTemp(reg_card_base); @@ -315,62 +317,133 @@ void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { /* - * On entry, x0, x1, x2 & x3 are live. Let the register allocation + * On entry, x0 to x7 are live. Let the register allocation * mechanism know so it doesn't try to use any of them when - * expanding the frame or flushing. This leaves the utility - * code with a single temp: r12. This should be enough. + * expanding the frame or flushing. + * Reserve x8 & x9 for temporaries. */ LockTemp(rs_x0); LockTemp(rs_x1); LockTemp(rs_x2); LockTemp(rs_x3); + LockTemp(rs_x4); + LockTemp(rs_x5); + LockTemp(rs_x6); + LockTemp(rs_x7); + LockTemp(rs_x8); + LockTemp(rs_x9); /* * We can safely skip the stack overflow check if we're * a leaf *and* our frame size < fudge factor. */ bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && - (static_cast<size_t>(frame_size_) < - Thread::kStackOverflowReservedBytes)); + (static_cast<size_t>(frame_size_) < + Thread::kStackOverflowReservedBytes)); + NewLIR0(kPseudoMethodEntry); + const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes); + const int spill_count = num_core_spills_ + num_fp_spills_; + const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf; // SP 16 byte alignment. + const int frame_size_without_spills = frame_size_ - spill_size; + if (!skip_overflow_check) { - LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12); - OpRegImm64(kOpSub, rs_rA64_SP, frame_size_); if (Runtime::Current()->ExplicitStackOverflowChecks()) { - /* Load stack limit */ - // TODO(Arm64): fix the line below: - // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow); + if (!large_frame) { + // Load stack limit + LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9); + } } else { + // TODO(Arm64) Implement implicit checks. // Implicit stack overflow check. // Generate a load from [sp, #-framesize]. If this is in the stack // redzone we will get a segmentation fault. - // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR? - Load32Disp(rs_rA64_SP, 0, rs_wzr); - MarkPossibleStackOverflowException(); + // Load32Disp(rs_rA64_SP, -Thread::kStackOverflowReservedBytes, rs_wzr); + // MarkPossibleStackOverflowException(); + LOG(FATAL) << "Implicit stack overflow checks not implemented."; } - } else if (frame_size_ > 0) { - OpRegImm64(kOpSub, rs_rA64_SP, frame_size_); + } + + if (frame_size_ > 0) { + OpRegImm64(kOpSub, rs_rA64_SP, spill_size); } /* Need to spill any FP regs? */ if (fp_spill_mask_) { - int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_); + int spill_offset = spill_size - kArm64PointerSize*(num_fp_spills_ + num_core_spills_); SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_); } /* Spill core callee saves. */ if (core_spill_mask_) { - int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_; + int spill_offset = spill_size - kArm64PointerSize*num_core_spills_; SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_); } + if (!skip_overflow_check) { + if (Runtime::Current()->ExplicitStackOverflowChecks()) { + class StackOverflowSlowPath: public LIRSlowPath { + public: + StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) : + LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), + sp_displace_(sp_displace) { + } + void Compile() OVERRIDE { + m2l_->ResetRegPool(); + m2l_->ResetDefTracking(); + GenerateTargetLabel(kPseudoThrowTarget); + // Unwinds stack. + m2l_->OpRegImm(kOpAdd, rs_rA64_SP, sp_displace_); + m2l_->ClobberCallerSave(); + ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); + m2l_->LockTemp(rs_x8); + m2l_->LoadWordDisp(rs_rA64_SELF, func_offset.Int32Value(), rs_x8); + m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg()); + m2l_->FreeTemp(rs_x8); + } + + private: + const size_t sp_displace_; + }; + + if (large_frame) { + // Compare Expected SP against bottom of stack. + // Branch to throw target if there is not enough room. + OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills); + LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8); + LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr); + AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size)); + OpRegCopy(rs_rA64_SP, rs_x9); // Establish stack after checks. + } else { + /* + * If the frame is small enough we are guaranteed to have enough space that remains to + * handle signals on the user stack. + * Establishes stack before checks. + */ + OpRegRegImm(kOpSub, rs_rA64_SP, rs_rA64_SP, frame_size_without_spills); + LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x9, nullptr); + AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_)); + } + } else { + OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills); + } + } else { + OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills); + } + FlushIns(ArgLocs, rl_method); FreeTemp(rs_x0); FreeTemp(rs_x1); FreeTemp(rs_x2); FreeTemp(rs_x3); + FreeTemp(rs_x4); + FreeTemp(rs_x5); + FreeTemp(rs_x6); + FreeTemp(rs_x7); + FreeTemp(rs_x8); + FreeTemp(rs_x9); } void Arm64Mir2Lir::GenExitSequence() { diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 21db77193b..bf09b8610e 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -63,7 +63,7 @@ class Arm64Mir2Lir : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -78,12 +78,13 @@ class Arm64Mir2Lir : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); @@ -123,6 +124,7 @@ class Arm64Mir2Lir : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + bool GenInlinedAbsLong(CallInfo* info); void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); void GenNotLong(RegLocation rl_dest, RegLocation rl_src); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); @@ -184,6 +186,7 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type); LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type); LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src); + LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value); LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); LIR* OpTestSuspend(LIR* target); @@ -201,6 +204,7 @@ class Arm64Mir2Lir : public Mir2Lir { LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, int shift); LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); + LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); static const ArmEncodingMap EncodingMap[kA64Last]; int EncodeShift(int code, int amount); int EncodeExtend(int extend_type, int amount); diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 0a76b9b295..8112c2ec6d 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -77,10 +77,10 @@ void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, default: LOG(FATAL) << "Unexpected case: " << opcode; } - rl_shift = LoadValueWide(rl_shift, kCoreReg); + rl_shift = LoadValue(rl_shift, kCoreReg); rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg); + OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg)); StoreValueWide(rl_dest, rl_result); } @@ -361,11 +361,22 @@ RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegS return rl_result; } -bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(FATAL); +bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) { + RegLocation rl_src = info->args[0]; + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_dest = InlineTargetWide(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage sign_reg = AllocTempWide(); + // abs(x) = y<=x>>63, (x+y)^y. + OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63); + OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg); + OpRegReg(kOpXor, rl_result.reg, sign_reg); + StoreValueWide(rl_dest, rl_result); + return true; +} - DCHECK_EQ(cu_->instruction_set, kThumb2); +bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { + DCHECK_EQ(cu_->instruction_set, kArm64); RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = info->args[1]; rl_src1 = LoadValue(rl_src1, kCoreReg); @@ -373,61 +384,43 @@ bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { RegLocation rl_dest = InlineTarget(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - // OpIT((is_min) ? kCondGt : kCondLt, "E"); - OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); - OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); - GenBarrier(); + NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), + rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt); StoreValue(rl_dest, rl_result); return true; } bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - RegLocation rl_src_address = info->args[0]; // long address - rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] + rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] ? RegLocation rl_dest = InlineTarget(info); - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); if (size == k64) { - // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. - if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) { - LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); - LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); - } else { - LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); - LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); - } StoreValueWide(rl_dest, rl_result); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); - // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); StoreValue(rl_dest, rl_result); } return true; } bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - RegLocation rl_src_address = info->args[0]; // long address rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_src_value = info->args[2]; // [size] value - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg + + RegLocation rl_value; if (size == k64) { - // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. - RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32); - StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32); + rl_value = LoadValueWide(rl_src_value, kCoreReg); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); - // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. - RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); - StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + rl_value = LoadValue(rl_src_value, kCoreReg); } + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); return true; } @@ -444,71 +437,30 @@ void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { } bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(WARNING); - - DCHECK_EQ(cu_->instruction_set, kThumb2); + DCHECK_EQ(cu_->instruction_set, kArm64); + ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low - rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] + rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] //TODO: do we really need this RegLocation rl_src_expected = info->args[4]; // int, long or Object // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] RegLocation rl_dest = InlineTarget(info); // boolean place for result - // We have only 5 temporary registers available and actually only 4 if the InlineTarget - // above locked one of the temps. For a straightforward CAS64 we need 7 registers: - // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor - // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop - // into the same temps, reducing the number of required temps down to 5. We shall work - // around the potentially locked temp by using LR for r_ptr, unconditionally. - // TODO: Pass information about the need for more temps to the stack frame generation - // code so that we can rely on being able to allocate enough temps. - DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp()); - MarkTemp(rs_rA64_LR); - FreeTemp(rs_rA64_LR); - LockTemp(rs_rA64_LR); - bool load_early = true; - if (is_long) { - RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() : - rl_src_expected.reg; - RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() : - rl_src_new_value.reg; - bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat(); - bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat(); - bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg); - bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg); - - if (!expected_is_good_reg && !new_value_is_good_reg) { - // None of expected/new_value is non-temp reg, need to load both late - load_early = false; - // Make sure they are not in the temp regs and the load will not be skipped. - if (expected_is_core_reg) { - FlushRegWide(rl_src_expected.reg); - ClobberSReg(rl_src_expected.s_reg_low); - ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); - rl_src_expected.location = kLocDalvikFrame; - } - if (new_value_is_core_reg) { - FlushRegWide(rl_src_new_value.reg); - ClobberSReg(rl_src_new_value.s_reg_low); - ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); - rl_src_new_value.location = kLocDalvikFrame; - } - } - } - - // Release store semantics, get the barrier out of the way. TODO: revisit - GenMemBarrier(kStoreLoad); - + // Load Object and offset RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); + RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg); + RegLocation rl_new_value; - if (!is_long) { - rl_new_value = LoadValue(rl_src_new_value); - } else if (load_early) { + RegLocation rl_expected; + if (is_long) { rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); + rl_expected = LoadValueWide(rl_src_expected, kCoreReg); + } else { + rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg); + rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg); } if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { @@ -516,9 +468,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { MarkGCCard(rl_new_value.reg, rl_object.reg); } - RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); - - RegStorage r_ptr = rs_rA64_LR; + RegStorage r_ptr = AllocTempRef(); OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); // Free now unneeded rl_object and rl_offset to give more temps. @@ -527,77 +477,40 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { ClobberSReg(rl_offset.s_reg_low); FreeTemp(rl_offset.reg); - RegLocation rl_expected; - if (!is_long) { - rl_expected = LoadValue(rl_src_expected); - } else if (load_early) { - rl_expected = LoadValueWide(rl_src_expected, kCoreReg); - } else { - // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. - int low_reg = AllocTemp().GetReg(); - int high_reg = AllocTemp().GetReg(); - rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg); - rl_expected = rl_new_value; - } - // do { // tmp = [r_ptr] - expected; // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); // result = tmp != 0; - RegStorage r_tmp = AllocTemp(); - LIR* target = NewLIR0(kPseudoTargetLabel); - + RegStorage r_tmp; if (is_long) { - RegStorage r_tmp_high = AllocTemp(); - if (!load_early) { - LoadValueDirectWide(rl_src_expected, rl_expected.reg); - } - NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); - OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow()); - OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh()); - if (!load_early) { - LoadValueDirectWide(rl_src_new_value, rl_new_value.reg); - } - - LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL); - LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL); - NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), - rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); - LIR* target2 = NewLIR0(kPseudoTargetLabel); - branch1->target = target2; - branch2->target = target2; - FreeTemp(r_tmp_high); // Now unneeded - + r_tmp = AllocTempWide(); + } else if (is_object) { + r_tmp = AllocTempRef(); } else { - NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0); - OpRegReg(kOpSub, r_tmp, rl_expected.reg); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - // OpIT(kCondEq, "T"); - NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); + r_tmp = AllocTemp(); } - // Still one conditional left from OpIT(kCondEq, "T") from either branch - OpRegImm(kOpCmp /* eq */, r_tmp, 1); - OpCondBranch(kCondEq, target); + LIR* loop = NewLIR0(kPseudoTargetLabel); + NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg()); + OpRegReg(kOpCmp, r_tmp, rl_expected.reg); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); + LIR* early_exit = OpCondBranch(kCondNe, NULL); - if (!load_early) { - FreeTemp(rl_expected.reg); // Now unneeded. - } + NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg()); + NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); + OpCondBranch(kCondNe, loop); - // result := (tmp1 != 0) ? 0 : 1; RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - // OpIT(kCondUlt, ""); - LoadConstant(rl_result.reg, 0); /* cc */ + LIR* exit = NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); + early_exit->target = exit; + FreeTemp(r_tmp); // Now unneeded. + FreeTemp(r_ptr); // Now unneeded. StoreValue(rl_dest, rl_result); - // Now, restore lr to its non-temp status. - Clobber(rs_rA64_LR); - UnmarkTemp(rs_rA64_LR); return true; } @@ -640,7 +553,7 @@ LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) { LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { // Combine sub & test using sub setflags encoding here OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); return OpCondBranch(c_code, target); } @@ -673,7 +586,7 @@ bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. DCHECK(!barrier->flags.use_def_invalid); - barrier->u.m.def_mask = ENCODE_ALL; + barrier->u.m.def_mask = &kEncodeAll; return ret; #else return false; @@ -1021,7 +934,7 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_de rl_src1 = LoadValueWide(rl_src1, kCoreReg); RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - OpRegRegImm(op, rl_result.reg, rl_src1.reg, val); + OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val); StoreValueWide(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 439dc8c73d..e2846aebc3 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -139,41 +139,43 @@ RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { /* * Decode the register id. This routine makes assumptions on the encoding made by RegStorage. */ -uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) { +ResourceMask Arm64Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const { // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor. - int reg_raw = reg.GetRawBits(); // Check if the shape mask is zero (i.e. invalid). if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) { // The zero register is not a true register. It is just an immediate zero. - return 0; + return kEncodeNone; } - return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask); + return ResourceMask::Bit( + // FP register starts at bit position 32. + (reg.IsFloat() ? kArm64FPReg0 : 0) + reg.GetRegNum()); } -uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() { +ResourceMask Arm64Mir2Lir::GetPCUseDefEncoding() const { LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64"; - return 0ULL; + return kEncodeNone; } // Arm64 specific setup. TODO: inline?: -void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK_EQ(cu_->instruction_set, kArm64); DCHECK(!lir->flags.use_def_invalid); // These flags are somewhat uncommon - bypass if we can. if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) { if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + def_mask->SetBit(kArm64RegSP); } if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + use_mask->SetBit(kArm64RegSP); } if (flags & REG_DEF_LR) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LR; + def_mask->SetBit(kArm64RegLR); } } } @@ -510,44 +512,44 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch return buf; } -void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) { +void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, const ResourceMask& mask, const char* prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; - for (i = 0; i < kArmRegEnd; i++) { - if (mask & (1ULL << i)) { + for (i = 0; i < kArm64RegEnd; i++) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } - if (mask & ENCODE_FP_STATUS) { + if (mask.HasBit(ResourceMask::kFPStatus)) { strcat(buf, "fpcc "); } /* Memory bits */ - if (arm_lir && (mask & ENCODE_DALVIK_REG)) { + if (arm_lir && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } @@ -850,6 +852,8 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { return; } + // Handle dalvik registers. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; for (int i = 0; i < cu_->num_ins; i++) { PromotionMap* v_map = &promotion_map_[start_vreg + i]; diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 4f0d7bc7d1..71e9e95ab0 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -102,9 +102,9 @@ LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) { data_target = AddWordData(&literal_list_, value); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, r_dest, 0, 0, 0, 0, data_target); - SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } @@ -129,9 +129,9 @@ LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) { } DCHECK(RegStorage::IsFloat(r_dest)); + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp), r_dest, 0, 0, 0, 0, data_target); - SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } @@ -146,7 +146,7 @@ static int CountTrailingZeros(bool is_wide, uint64_t value) { static int CountSetBits(bool is_wide, uint64_t value) { return ((is_wide) ? - __builtin_popcountl(value) : __builtin_popcount((uint32_t)value)); + __builtin_popcountll(value) : __builtin_popcount((uint32_t)value)); } /** @@ -387,11 +387,11 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r case kOpRev: DCHECK_EQ(shift, 0); // Binary, but rm is encoded twice. - return NewLIR3(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); + return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); break; case kOpRevsh: // Binary, but rm is encoded twice. - return NewLIR3(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); + return NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); break; case kOp2Byte: DCHECK_EQ(shift, ENCODE_NO_SHIFT); @@ -426,8 +426,43 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r return NULL; } +LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int extend) { + ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); + ArmOpcode opcode = kA64Brk1d; + + switch (op) { + case kOpCmn: + opcode = kA64Cmn3Rre; + break; + case kOpCmp: + opcode = kA64Cmp3Rre; + break; + default: + LOG(FATAL) << "Bad Opcode: " << opcode; + break; + } + + DCHECK(!IsPseudoLirOp(opcode)); + if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { + ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; + if (kind == kFmtExtend) { + return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), extend); + } + } + + LOG(FATAL) << "Unexpected encoding operand count"; + return NULL; +} + LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { - return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); + /* RegReg operations with SP in first parameter need extended register instruction form. + * Only CMN and CMP instructions are implemented. + */ + if (r_dest_src1 == rs_rA64_SP) { + return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND); + } else { + return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); + } } LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { @@ -517,8 +552,11 @@ LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); } -// Should be taking an int64_t value ? LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { + return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value)); +} + +LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) { LIR* res; bool neg = (value < 0); int64_t abs_value = (neg) ? -value : value; @@ -526,7 +564,6 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, ArmOpcode alt_opcode = kA64Brk1d; int32_t log_imm = -1; bool is_wide = r_dest.Is64Bit(); - CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); switch (op) { @@ -603,11 +640,17 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); } else { RegStorage r_scratch = AllocTemp(); - LoadConstant(r_scratch, value); + if (IS_WIDE(wide)) { + r_scratch = AllocTempWide(); + LoadConstantWide(r_scratch, value); + } else { + r_scratch = AllocTemp(); + LoadConstant(r_scratch, value); + } if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) - res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); + res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); else - res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); + res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); FreeTemp(r_scratch); return res; } @@ -632,9 +675,36 @@ LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) // abs_value is a shifted 12-bit immediate. shift = true; abs_value >>= 12; + } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) { + // Note: It is better to use two ADD/SUB instead of loading a number to a temp register. + // This works for both normal registers and SP. + // For a frame size == 0x2468, it will be encoded as: + // sub sp, #0x2000 + // sub sp, #0x468 + if (neg) { + op = (op == kOpAdd) ? kOpSub : kOpAdd; + } + OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff))); + return OpRegImm64(op, r_dest_src1, abs_value & 0xfff); + } else if (LIKELY(A64_REG_IS_SP(r_dest_src1.GetReg()) && (op == kOpAdd || op == kOpSub))) { + // Note: "sub sp, sp, Xm" is not correct on arm64. + // We need special instructions for SP. + // Also operation on 32-bit SP should be avoided. + DCHECK(IS_WIDE(wide)); + RegStorage r_tmp = AllocTempWide(); + OpRegRegImm(kOpAdd, r_tmp, r_dest_src1, 0); + OpRegImm64(op, r_tmp, value); + return OpRegRegImm(kOpAdd, r_dest_src1, r_tmp, 0); } else { - RegStorage r_tmp = AllocTemp(); - LIR* res = LoadConstant(r_tmp, value); + RegStorage r_tmp; + LIR* res; + if (IS_WIDE(wide)) { + r_tmp = AllocTempWide(); + res = LoadConstantWide(r_tmp, value); + } else { + r_tmp = AllocTemp(); + res = LoadConstant(r_tmp, value); + } OpRegReg(op, r_dest_src1, r_tmp); FreeTemp(r_tmp); return res; @@ -683,9 +753,9 @@ LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), r_dest.GetReg(), 0, 0, 0, 0, data_target); - SetMemRefType(res, true, kLiteral); AppendLIR(res); return res; } @@ -905,7 +975,8 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor } // TODO: in future may need to differentiate Dalvik accesses w/ spills - if (r_base == rs_rA64_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rA64_SP); AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); } return load; @@ -986,7 +1057,8 @@ LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegSto } // TODO: In future, may need to differentiate Dalvik & spill accesses. - if (r_base == rs_rA64_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rA64_SP); AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); } return store; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 3fbbc4eba7..ec0fb43571 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -74,9 +74,9 @@ bool Mir2Lir::IsInexpensiveConstant(RegLocation rl_src) { void Mir2Lir::MarkSafepointPC(LIR* inst) { DCHECK(!inst->flags.use_def_invalid); - inst->u.m.def_mask = ENCODE_ALL; + inst->u.m.def_mask = &kEncodeAll; LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC); - DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL); + DCHECK(safepoint_pc->u.m.def_mask->Equals(kEncodeAll)); } /* Remove a LIR from the list. */ @@ -108,37 +108,40 @@ void Mir2Lir::NopLIR(LIR* lir) { } void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) { - uint64_t *mask_ptr; - uint64_t mask = ENCODE_MEM; DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE)); DCHECK(!lir->flags.use_def_invalid); + // TODO: Avoid the extra Arena allocation! + const ResourceMask** mask_ptr; + ResourceMask mask; if (is_load) { mask_ptr = &lir->u.m.use_mask; } else { mask_ptr = &lir->u.m.def_mask; } + mask = **mask_ptr; /* Clear out the memref flags */ - *mask_ptr &= ~mask; + mask.ClearBits(kEncodeMem); /* ..and then add back the one we need */ switch (mem_type) { - case kLiteral: + case ResourceMask::kLiteral: DCHECK(is_load); - *mask_ptr |= ENCODE_LITERAL; + mask.SetBit(ResourceMask::kLiteral); break; - case kDalvikReg: - *mask_ptr |= ENCODE_DALVIK_REG; + case ResourceMask::kDalvikReg: + mask.SetBit(ResourceMask::kDalvikReg); break; - case kHeapRef: - *mask_ptr |= ENCODE_HEAP_REF; + case ResourceMask::kHeapRef: + mask.SetBit(ResourceMask::kHeapRef); break; - case kMustNotAlias: + case ResourceMask::kMustNotAlias: /* Currently only loads can be marked as kMustNotAlias */ DCHECK(!(GetTargetInstFlags(lir->opcode) & IS_STORE)); - *mask_ptr |= ENCODE_MUST_NOT_ALIAS; + mask.SetBit(ResourceMask::kMustNotAlias); break; default: LOG(FATAL) << "Oat: invalid memref kind - " << mem_type; } + *mask_ptr = mask_cache_.GetMask(mask); } /* @@ -146,7 +149,8 @@ void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) { */ void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit) { - SetMemRefType(lir, is_load, kDalvikReg); + DCHECK((is_load ? lir->u.m.use_mask : lir->u.m.def_mask)->Intersection(kEncodeMem).Equals( + kEncodeDalvikReg)); /* * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit @@ -241,10 +245,10 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { } if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use")); + DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.use_mask, "use")); } if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def")); + DUMP_RESOURCE_MASK(DumpResourceMask(lir, *lir->u.m.def_mask, "def")); } } @@ -794,7 +798,7 @@ LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) { new_label->operands[0] = keyVal; new_label->flags.fixup = kFixupLabel; DCHECK(!new_label->flags.use_def_invalid); - new_label->u.m.def_mask = ENCODE_ALL; + new_label->u.m.def_mask = &kEncodeAll; InsertLIRAfter(boundary_lir, new_label); res = new_label; } @@ -972,7 +976,9 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena fp_spill_mask_(0), first_lir_insn_(NULL), last_lir_insn_(NULL), - slow_paths_(arena, 32, kGrowableArraySlowPaths) { + slow_paths_(arena, 32, kGrowableArraySlowPaths), + mem_ref_type_(ResourceMask::kHeapRef), + mask_cache_(arena) { // Reserve pointer id 0 for NULL. size_t null_idx = WrapPointer(NULL); DCHECK_EQ(null_idx, 0U); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 69ca7154e4..8f6d716ecb 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -44,7 +44,7 @@ void Mir2Lir::GenBarrier() { LIR* barrier = NewLIR0(kPseudoBarrier); /* Mark all resources as being clobbered */ DCHECK(!barrier->flags.use_def_invalid); - barrier->u.m.def_mask = ENCODE_ALL; + barrier->u.m.def_mask = &kEncodeAll; } void Mir2Lir::GenDivZeroException() { @@ -447,6 +447,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { for (int i = 0; i < elems; i++) { RegLocation loc = UpdateLoc(info->args[i]); if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg); } } @@ -484,7 +485,12 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { // Generate the copy loop. Going backwards for convenience LIR* target = NewLIR0(kPseudoTargetLabel); // Copy next element - LoadBaseIndexed(r_src, r_idx, r_val, 2, k32); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + LoadBaseIndexed(r_src, r_idx, r_val, 2, k32); + // NOTE: No dalvik register annotation, local optimizations will be stopped + // by the loop boundaries. + } StoreBaseIndexed(r_dst, r_idx, r_val, 2, k32); FreeTemp(r_val); OpDecAndBranch(kCondGe, r_idx, target); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index b7ea362be1..2af847c7df 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -493,6 +493,7 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { * end up half-promoted. In those cases, we must flush the promoted * half to memory as well. */ + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); for (int i = 0; i < cu_->num_ins; i++) { PromotionMap* v_map = &promotion_map_[start_vreg + i]; RegStorage reg = GetArgMappingToPhysicalReg(i); @@ -901,11 +902,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, } else { // kArg2 & rArg3 can safely be used here reg = TargetReg(kArg3); - Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg); + } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } - Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg); + } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); next_use++; @@ -929,12 +936,15 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, vtable_idx, direct_code, direct_method, type); } int outs_offset = (next_use + 1) * 4; - if (rl_arg.wide) { - StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64); - next_use += 2; - } else { - Store32Disp(TargetReg(kSp), outs_offset, arg_reg); - next_use++; + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.wide) { + StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64); + next_use += 2; + } else { + Store32Disp(TargetReg(kSp), outs_offset, arg_reg); + next_use++; + } } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); @@ -998,12 +1008,14 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (loc.wide) { loc = UpdateLocWide(loc); if ((next_arg >= 2) && (loc.location == kLocPhysReg)) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); } next_arg += 2; } else { loc = UpdateLoc(loc); if ((next_arg >= 3) && (loc.location == kLocPhysReg)) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg); } next_arg++; @@ -1026,24 +1038,32 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset); - LIR* ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack); + LIR* ld = nullptr; + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack); + } // TUNING: loosen barrier - ld->u.m.def_mask = ENCODE_ALL; - SetMemRefType(ld, true /* is_load */, kDalvikReg); + ld->u.m.def_mask = &kEncodeAll; call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4)); call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); - LIR* st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack); - SetMemRefType(st, false /* is_load */, kDalvikReg); - st->u.m.def_mask = ENCODE_ALL; + LIR* st = nullptr; + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack); + } + st->u.m.def_mask = &kEncodeAll; call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { int current_src_offset = start_offset; int current_dest_offset = outs_offset; + // Only davik regs are accessed in this loop; no next_call_insn() calls. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); while (regs_left_to_pass_via_stack > 0) { // This is based on the knowledge that the stack itself is 16-byte aligned. bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; @@ -1110,8 +1130,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); } else { // Set barrier for 128-bit load. - SetMemRefType(ld1, true /* is_load */, kDalvikReg); - ld1->u.m.def_mask = ENCODE_ALL; + ld1->u.m.def_mask = &kEncodeAll; } } if (st1 != nullptr) { @@ -1121,8 +1140,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); } else { // Set barrier for 128-bit store. - SetMemRefType(st1, false /* is_load */, kDalvikReg); - st1->u.m.def_mask = ENCODE_ALL; + st1->u.m.def_mask = &kEncodeAll; } } @@ -1310,6 +1328,9 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { RegStorage t_reg = AllocTemp(); OpRegReg(kOpNeg, t_reg, rl_result.reg); OpRegRegReg(kOpAdc, rl_result.reg, rl_result.reg, t_reg); + } else if (cu_->instruction_set == kArm64) { + OpRegImm(kOpSub, rl_result.reg, 1); + OpRegRegImm(kOpLsr, rl_result.reg, rl_result.reg, 31); } else { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); OpRegImm(kOpSub, rl_result.reg, 1); @@ -1330,6 +1351,11 @@ bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (size == k64) { RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg); + if (cu_->instruction_set == kArm64) { + OpRegReg(kOpRev, rl_result.reg, rl_i.reg); + StoreValueWide(rl_dest, rl_result); + return true; + } RegStorage r_i_low = rl_i.reg.GetLow(); if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { // First REV shall clobber rl_result.reg.GetReg(), save the value in a temp for the second REV. @@ -1428,8 +1454,15 @@ bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { rl_src = LoadValueWide(rl_src, kCoreReg); RegLocation rl_dest = InlineTargetWide(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegCopyWide(rl_result.reg, rl_src.reg); - OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); + + if (cu_->instruction_set == kArm64) { + // TODO - Can ecode ? UBXF otherwise + // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff); + return false; + } else { + OpRegCopyWide(rl_result.reg, rl_src.reg); + OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); + } StoreValueWide(rl_dest, rl_result); return true; } diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 6ef793427c..6469d9c4f1 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -65,6 +65,7 @@ void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) { OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg); } else { // Lives in the frame, need to store. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32); } if (!zero_reg.Valid()) { @@ -90,6 +91,7 @@ void Mir2Lir::LoadValueDirect(RegLocation rl_src, RegStorage r_dest) { } else { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (rl_src.ref) { LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest); } else { @@ -123,6 +125,7 @@ void Mir2Lir::LoadValueDirectWide(RegLocation rl_src, RegStorage r_dest) { } else { DCHECK((rl_src.location == kLocDalvikFrame) || (rl_src.location == kLocCompilerTemp)); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64); } } @@ -210,6 +213,7 @@ void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) { ResetDefLoc(rl_dest); if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) { def_start = last_lir_insn_; + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg); MarkClean(rl_dest); def_end = last_lir_insn_; @@ -296,6 +300,7 @@ void Mir2Lir::StoreValueWide(RegLocation rl_dest, RegLocation rl_src) { def_start = last_lir_insn_; DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64); MarkClean(rl_dest); def_end = last_lir_insn_; @@ -323,6 +328,7 @@ void Mir2Lir::StoreFinalValue(RegLocation rl_dest, RegLocation rl_src) { ResetDefLoc(rl_dest); if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) { LIR *def_start = last_lir_insn_; + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg); MarkClean(rl_dest); LIR *def_end = last_lir_insn_; @@ -358,6 +364,7 @@ void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) { LIR *def_start = last_lir_insn_; DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64); MarkClean(rl_dest); LIR *def_end = last_lir_insn_; diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index 4a918a138a..b97ff2a447 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -21,8 +21,8 @@ namespace art { #define DEBUG_OPT(X) /* Check RAW, WAR, and RAW dependency on the register operands */ -#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \ - ((use | def) & check->u.m.def_mask)) +#define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \ + (use.Union(def).Intersects(*check->u.m.def_mask)) /* Scheduler heuristics */ #define MAX_HOIST_DISTANCE 20 @@ -109,20 +109,23 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { bool is_this_lir_load = target_flags & IS_LOAD; LIR* check_lir; /* Use the mem mask to determine the rough memory location */ - uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM; + ResourceMask this_mem_mask = kEncodeMem.Intersection( + this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask)); /* * Currently only eliminate redundant ld/st for constant and Dalvik * register accesses. */ - if (!(this_mem_mask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) { + if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg))) { continue; } - uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; - uint64_t stop_use_reg_mask; + ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem); + ResourceMask stop_use_reg_mask; if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM; + // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask. + stop_use_reg_mask = ResourceMask::Bit(kIsBranch).Union(*this_lir->u.m.use_mask).Without( + kEncodeMem); } else { /* * Add pc to the resource mask to prevent this instruction @@ -130,7 +133,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * region bits since stop_mask is used to check data/control * dependencies. */ - stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM; + stop_use_reg_mask = GetPCUseDefEncoding().Union(*this_lir->u.m.use_mask).Without(kEncodeMem); } for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) { @@ -142,8 +145,9 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM; - uint64_t alias_condition = this_mem_mask & check_mem_mask; + ResourceMask check_mem_mask = kEncodeMem.Intersection( + check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask)); + ResourceMask alias_condition = this_mem_mask.Intersection(check_mem_mask); bool stop_here = false; /* @@ -153,9 +157,9 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { // TUNING: Support instructions with multiple register targets. if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) { stop_here = true; - } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + } else if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) { bool is_check_lir_load = check_flags & IS_LOAD; - if (alias_condition == ENCODE_LITERAL) { + if (alias_condition.Equals(kEncodeLiteral)) { /* * Should only see literal loads in the instruction * stream. @@ -175,7 +179,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { } NopLIR(check_lir); } - } else if (alias_condition == ENCODE_DALVIK_REG) { + } else if (alias_condition.Equals(kEncodeDalvikReg)) { /* Must alias */ if (check_lir->flags.alias_info == this_lir->flags.alias_info) { /* Only optimize compatible registers */ @@ -304,7 +308,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t stop_use_all_mask = this_lir->u.m.use_mask; + ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask; if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) { /* @@ -313,14 +317,14 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * locations are safe to be hoisted. So only mark the heap references * conservatively here. */ - if (stop_use_all_mask & ENCODE_HEAP_REF) { - stop_use_all_mask |= GetPCUseDefEncoding(); + if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) { + stop_use_all_mask.SetBits(GetPCUseDefEncoding()); } } /* Similar as above, but just check for pure register dependency */ - uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM; - uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; + ResourceMask stop_use_reg_mask = stop_use_all_mask.Without(kEncodeMem); + ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem); int next_slot = 0; bool stop_here = false; @@ -335,22 +339,22 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM; - uint64_t alias_condition = stop_use_all_mask & check_mem_mask; + ResourceMask check_mem_mask = check_lir->u.m.def_mask->Intersection(kEncodeMem); + ResourceMask alias_condition = stop_use_all_mask.Intersection(check_mem_mask); stop_here = false; /* Potential WAR alias seen - check the exact relation */ - if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) { /* We can fully disambiguate Dalvik references */ - if (alias_condition == ENCODE_DALVIK_REG) { - /* Must alias or partually overlap */ + if (alias_condition.Equals(kEncodeDalvikReg)) { + /* Must alias or partially overlap */ if ((check_lir->flags.alias_info == this_lir->flags.alias_info) || IsDalvikRegisterClobbered(this_lir, check_lir)) { stop_here = true; } /* Conservatively treat all heap refs as may-alias */ } else { - DCHECK_EQ(alias_condition, ENCODE_HEAP_REF); + DCHECK(alias_condition.Equals(kEncodeHeapRef)); stop_here = true; } /* Memory content may be updated. Stop looking now. */ @@ -413,7 +417,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { LIR* prev_lir = prev_inst_list[slot+1]; /* Check the highest instruction */ - if (prev_lir->u.m.def_mask == ENCODE_ALL) { + if (prev_lir->u.m.def_mask->Equals(kEncodeAll)) { /* * If the first instruction is a load, don't hoist anything * above it since it is unlikely to be beneficial. @@ -443,7 +447,8 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { */ bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false : (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD); - if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) { + if ((prev_is_load && (cur_lir->u.m.use_mask->Intersects(*prev_lir->u.m.def_mask))) || + (slot < LD_LATENCY)) { break; } } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index ea3c901fa6..62a7f2455c 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -63,7 +63,7 @@ class MipsMir2Lir FINAL : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -77,12 +77,13 @@ class MipsMir2Lir FINAL : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); AssemblerStatus AssembleInstructions(CodeOffset start_addr); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 381c7ce0aa..76b5243a12 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -120,60 +120,50 @@ RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { /* * Decode the register id. */ -uint64_t MipsMir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id = reg.GetRegNum(); - /* Each double register is equal to a pair of single-precision FP registers */ - if (reg.IsDouble()) { - seed = 0x3; - reg_id = reg_id << 1; - } else { - seed = 1; - } - /* FP register starts at bit position 32 */ - shift = reg.IsFloat() ? kMipsFPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); +ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const { + return reg.IsDouble() + /* Each double register is equal to a pair of single-precision FP registers */ + ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0) + : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kMipsFPReg0 : reg.GetRegNum()); } -uint64_t MipsMir2Lir::GetPCUseDefEncoding() { - return ENCODE_MIPS_REG_PC; +ResourceMask MipsMir2Lir::GetPCUseDefEncoding() const { + return ResourceMask::Bit(kMipsRegPC); } -void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK_EQ(cu_->instruction_set, kMips); DCHECK(!lir->flags.use_def_invalid); // Mips-specific resource map setup here. if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_SP; + def_mask->SetBit(kMipsRegSP); } if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_MIPS_REG_SP; + use_mask->SetBit(kMipsRegSP); } if (flags & REG_DEF_LR) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_LR; + def_mask->SetBit(kMipsRegLR); } if (flags & REG_DEF_HI) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_HI; + def_mask->SetBit(kMipsRegHI); } if (flags & REG_DEF_LO) { - lir->u.m.def_mask |= ENCODE_MIPS_REG_LO; + def_mask->SetBit(kMipsRegLO); } if (flags & REG_USE_HI) { - lir->u.m.use_mask |= ENCODE_MIPS_REG_HI; + use_mask->SetBit(kMipsRegHI); } if (flags & REG_USE_LO) { - lir->u.m.use_mask |= ENCODE_MIPS_REG_LO; + use_mask->SetBit(kMipsRegLO); } } @@ -283,43 +273,43 @@ std::string MipsMir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned cha } // FIXME: need to redo resource maps for MIPS - fix this at that time -void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, uint64_t mask, const char *prefix) { +void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, const ResourceMask& mask, const char *prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; for (i = 0; i < kMipsRegEnd; i++) { - if (mask & (1ULL << i)) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } - if (mask & ENCODE_FP_STATUS) { + if (mask.HasBit(ResourceMask::kFPStatus)) { strcat(buf, "fpcc "); } /* Memory bits */ - if (mips_lir && (mask & ENCODE_DALVIK_REG)) { + if (mips_lir && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info), DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 2757b7be08..01b25f9291 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -534,7 +534,8 @@ LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStora } } - if (r_base == rs_rMIPS_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rMIPS_SP); AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, true /* is_load */, pair /* is64bit */); if (pair) { @@ -634,7 +635,8 @@ LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, FreeTemp(r_scratch); } - if (r_base == rs_rMIPS_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rMIPS_SP); AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, false /* is_load */, pair /* is64bit */); if (pair) { diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 2f37520b59..9912101eb1 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -57,7 +57,7 @@ inline LIR* Mir2Lir::RawLIR(DexOffset dalvik_offset, int opcode, int op0, (opcode == kPseudoExportedPC)) { // Always make labels scheduling barriers DCHECK(!insn->flags.use_def_invalid); - insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL; + insn->u.m.use_mask = insn->u.m.def_mask = &kEncodeAll; } return insn; } @@ -140,19 +140,20 @@ inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1 /* * Mark the corresponding bit(s). */ -inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) { +inline void Mir2Lir::SetupRegMask(ResourceMask* mask, int reg) { DCHECK_EQ((reg & ~RegStorage::kRegValMask), 0); DCHECK(reginfo_map_.Get(reg) != nullptr) << "No info for 0x" << reg; - *mask |= reginfo_map_.Get(reg)->DefUseMask(); + *mask = mask->Union(reginfo_map_.Get(reg)->DefUseMask()); } /* * Set up the proper fields in the resource mask */ -inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) { +inline void Mir2Lir::SetupResourceMasks(LIR* lir) { int opcode = lir->opcode; if (IsPseudoLirOp(opcode)) { + lir->u.m.use_mask = lir->u.m.def_mask = &kEncodeNone; if (opcode != kPseudoBarrier) { lir->flags.fixup = kFixupLabel; } @@ -166,13 +167,27 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) { lir->flags.fixup = kFixupLabel; } - /* Get the starting size of the instruction's template */ + /* Get the starting size of the instruction's template. */ lir->flags.size = GetInsnSize(lir); estimated_native_code_size_ += lir->flags.size; - /* Set up the mask for resources that are updated */ - if (!leave_mem_ref && (flags & (IS_LOAD | IS_STORE))) { - /* Default to heap - will catch specialized classes later */ - SetMemRefType(lir, flags & IS_LOAD, kHeapRef); + + /* Set up the mask for resources. */ + ResourceMask use_mask; + ResourceMask def_mask; + + if (flags & (IS_LOAD | IS_STORE)) { + /* Set memory reference type (defaults to heap, overridden by ScopedMemRefType). */ + if (flags & IS_LOAD) { + use_mask.SetBit(mem_ref_type_); + } else { + /* Currently only loads can be marked as kMustNotAlias. */ + DCHECK(mem_ref_type_ != ResourceMask::kMustNotAlias); + } + if (flags & IS_STORE) { + /* Literals cannot be written to. */ + DCHECK(mem_ref_type_ != ResourceMask::kLiteral); + def_mask.SetBit(mem_ref_type_); + } } /* @@ -180,52 +195,55 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir, bool leave_mem_ref) { * turn will trash everything. */ if (flags & IS_BRANCH) { - lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL; + lir->u.m.def_mask = lir->u.m.use_mask = &kEncodeAll; return; } if (flags & REG_DEF0) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[0]); + SetupRegMask(&def_mask, lir->operands[0]); } if (flags & REG_DEF1) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[1]); + SetupRegMask(&def_mask, lir->operands[1]); } if (flags & REG_DEF2) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[2]); + SetupRegMask(&def_mask, lir->operands[2]); } if (flags & REG_USE0) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[0]); + SetupRegMask(&use_mask, lir->operands[0]); } if (flags & REG_USE1) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[1]); + SetupRegMask(&use_mask, lir->operands[1]); } if (flags & REG_USE2) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[2]); + SetupRegMask(&use_mask, lir->operands[2]); } if (flags & REG_USE3) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[3]); + SetupRegMask(&use_mask, lir->operands[3]); } if (flags & REG_USE4) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[4]); + SetupRegMask(&use_mask, lir->operands[4]); } if (flags & SETS_CCODES) { - lir->u.m.def_mask |= ENCODE_CCODE; + def_mask.SetBit(ResourceMask::kCCode); } if (flags & USES_CCODES) { - lir->u.m.use_mask |= ENCODE_CCODE; + use_mask.SetBit(ResourceMask::kCCode); } // Handle target-specific actions - SetupTargetResourceMasks(lir, flags); + SetupTargetResourceMasks(lir, flags, &def_mask, &use_mask); + + lir->u.m.use_mask = mask_cache_.GetMask(use_mask); + lir->u.m.def_mask = mask_cache_.GetMask(def_mask); } inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index a85be5e90c..40205eabd6 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -68,6 +68,7 @@ void Mir2Lir::LockArg(int in_position, bool wide) { // TODO: needs revisit for 64-bit. RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); if (cu_->instruction_set == kX86) { @@ -159,6 +160,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) } void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); if (cu_->instruction_set == kX86) { /* @@ -1171,7 +1173,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { head_lir = &block_label_list_[bb->id]; // Set the first label as a scheduling barrier. DCHECK(!head_lir->flags.use_def_invalid); - head_lir->u.m.def_mask = ENCODE_ALL; + head_lir->u.m.def_mask = &kEncodeAll; } if (opcode == kMirOpCheck) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 9718acde6c..ca4d0e48bf 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -23,6 +23,7 @@ #include "dex/compiler_ir.h" #include "dex/reg_storage.h" #include "dex/backend.h" +#include "dex/quick/resource_mask.h" #include "driver/compiler_driver.h" #include "leb128.h" #include "safe_map.h" @@ -136,8 +137,8 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, typedef std::vector<uint8_t> CodeBuffer; struct UseDefMasks { - uint64_t use_mask; // Resource mask for use. - uint64_t def_mask; // Resource mask for def. + const ResourceMask* use_mask; // Resource mask for use. + const ResourceMask* def_mask; // Resource mask for def. }; struct AssemblyInfo { @@ -188,20 +189,6 @@ Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_grap #define DECODE_ALIAS_INFO_WIDE(X) ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0) #define ENCODE_ALIAS_INFO(REG, ISWIDE) (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0)) -// Common resource macros. -#define ENCODE_CCODE (1ULL << kCCode) -#define ENCODE_FP_STATUS (1ULL << kFPStatus) - -// Abstract memory locations. -#define ENCODE_DALVIK_REG (1ULL << kDalvikReg) -#define ENCODE_LITERAL (1ULL << kLiteral) -#define ENCODE_HEAP_REF (1ULL << kHeapRef) -#define ENCODE_MUST_NOT_ALIAS (1ULL << kMustNotAlias) - -#define ENCODE_ALL (~0ULL) -#define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \ - ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS) - #define ENCODE_REG_PAIR(low_reg, high_reg) ((low_reg & 0xff) | ((high_reg & 0xff) << 8)) #define DECODE_REG_PAIR(both_regs, low_reg, high_reg) \ do { \ @@ -327,7 +314,7 @@ class Mir2Lir : public Backend { */ class RegisterInfo { public: - RegisterInfo(RegStorage r, uint64_t mask = ENCODE_ALL); + RegisterInfo(RegStorage r, const ResourceMask& mask = kEncodeAll); ~RegisterInfo() {} static void* operator new(size_t size, ArenaAllocator* arena) { return arena->Alloc(size, kArenaAllocRegAlloc); @@ -378,8 +365,8 @@ class Mir2Lir : public Backend { RegStorage Partner() { return partner_; } void SetPartner(RegStorage partner) { partner_ = partner; } int SReg() { return (!IsTemp() || IsLive()) ? s_reg_ : INVALID_SREG; } - uint64_t DefUseMask() { return def_use_mask_; } - void SetDefUseMask(uint64_t def_use_mask) { def_use_mask_ = def_use_mask; } + const ResourceMask& DefUseMask() { return def_use_mask_; } + void SetDefUseMask(const ResourceMask& def_use_mask) { def_use_mask_ = def_use_mask; } RegisterInfo* Master() { return master_; } void SetMaster(RegisterInfo* master) { master_ = master; @@ -417,7 +404,7 @@ class Mir2Lir : public Backend { bool aliased_; // Is this the master for other aliased RegisterInfo's? RegStorage partner_; // If wide_value, other reg of pair or self if 64-bit register. int s_reg_; // Name of live value. - uint64_t def_use_mask_; // Resources for this element. + ResourceMask def_use_mask_; // Resources for this element. uint32_t used_storage_; // 1 bit per 4 bytes of storage. Unused by aliases. uint32_t liveness_; // 1 bit per 4 bytes of storage. Unused by aliases. RegisterInfo* master_; // Pointer to controlling storage mask. @@ -539,6 +526,26 @@ class Mir2Lir : public Backend { LIR* const cont_; }; + // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_. + class ScopedMemRefType { + public: + ScopedMemRefType(Mir2Lir* m2l, ResourceMask::ResourceBit new_mem_ref_type) + : m2l_(m2l), + old_mem_ref_type_(m2l->mem_ref_type_) { + m2l_->mem_ref_type_ = new_mem_ref_type; + } + + ~ScopedMemRefType() { + m2l_->mem_ref_type_ = old_mem_ref_type_; + } + + private: + Mir2Lir* const m2l_; + ResourceMask::ResourceBit old_mem_ref_type_; + + DISALLOW_COPY_AND_ASSIGN(ScopedMemRefType); + }; + virtual ~Mir2Lir() {} int32_t s4FromSwitchData(const void* switch_data) { @@ -625,10 +632,10 @@ class Mir2Lir : public Backend { virtual void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); - void SetupResourceMasks(LIR* lir, bool leave_mem_ref = false); + void SetupResourceMasks(LIR* lir); void SetMemRefType(LIR* lir, bool is_load, int mem_type); void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit); - void SetupRegMask(uint64_t* mask, int reg); + void SetupRegMask(ResourceMask* mask, int reg); void DumpLIRInsn(LIR* arg, unsigned char* base_addr); void DumpPromotionMap(); void CodegenDump(); @@ -945,7 +952,7 @@ class Mir2Lir : public Backend { bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty); bool GenInlinedReverseBytes(CallInfo* info, OpSize size); bool GenInlinedAbsInt(CallInfo* info); - bool GenInlinedAbsLong(CallInfo* info); + virtual bool GenInlinedAbsLong(CallInfo* info); bool GenInlinedAbsFloat(CallInfo* info); bool GenInlinedAbsDouble(CallInfo* info); bool GenInlinedFloatCvt(CallInfo* info); @@ -1136,7 +1143,7 @@ class Mir2Lir : public Backend { virtual RegLocation LocCReturnDouble() = 0; virtual RegLocation LocCReturnFloat() = 0; virtual RegLocation LocCReturnWide() = 0; - virtual uint64_t GetRegMaskCommon(RegStorage reg) = 0; + virtual ResourceMask GetRegMaskCommon(const RegStorage& reg) const = 0; virtual void AdjustSpillMask() = 0; virtual void ClobberCallerSave() = 0; virtual void FreeCallTemps() = 0; @@ -1147,12 +1154,13 @@ class Mir2Lir : public Backend { // Required for target - miscellaneous. virtual void AssembleLIR() = 0; - virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0; - virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0; + virtual void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) = 0; + virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) = 0; virtual const char* GetTargetInstFmt(int opcode) = 0; virtual const char* GetTargetInstName(int opcode) = 0; virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0; - virtual uint64_t GetPCUseDefEncoding() = 0; + virtual ResourceMask GetPCUseDefEncoding() const = 0; virtual uint64_t GetTargetInstFlags(int opcode) = 0; virtual int GetInsnSize(LIR* lir) = 0; virtual bool IsUnconditionalBranch(LIR* lir) = 0; @@ -1576,6 +1584,17 @@ class Mir2Lir : public Backend { LIR* last_lir_insn_; GrowableArray<LIRSlowPath*> slow_paths_; + + // The memory reference type for new LIRs. + // NOTE: Passing this as an explicit parameter by all functions that directly or indirectly + // invoke RawLIR() would clutter the code and reduce the readability. + ResourceMask::ResourceBit mem_ref_type_; + + // Each resource mask now takes 16-bytes, so having both use/def masks directly in a LIR + // would consume 32 bytes per LIR. Instead, the LIR now holds only pointers to the masks + // (i.e. 8 bytes on 32-bit arch, 16 bytes on 64-bit arch) and we use ResourceMaskCache + // to deduplicate the masks. + ResourceMaskCache mask_cache_; }; // Class Mir2Lir } // namespace art diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index bbeef50d73..cae59c88c1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -38,7 +38,7 @@ void Mir2Lir::ResetRegPool() { } } -Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask) +Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, const ResourceMask& mask) : reg_(r), is_temp_(false), wide_value_(false), dirty_(false), aliased_(false), partner_(r), s_reg_(INVALID_SREG), def_use_mask_(mask), master_(this), def_start_(nullptr), def_end_(nullptr), alias_chain_(nullptr) { @@ -82,22 +82,22 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, } // Construct the register pool. - for (RegStorage reg : core_regs) { + for (const RegStorage& reg : core_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); core_regs_.Insert(info); } - for (RegStorage reg : core64_regs) { + for (const RegStorage& reg : core64_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); core64_regs_.Insert(info); } - for (RegStorage reg : sp_regs) { + for (const RegStorage& reg : sp_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); sp_regs_.Insert(info); } - for (RegStorage reg : dp_regs) { + for (const RegStorage& reg : dp_regs) { RegisterInfo* info = new (arena) RegisterInfo(reg, m2l_->GetRegMaskCommon(reg)); m2l_->reginfo_map_.Put(reg.GetReg(), info); dp_regs_.Insert(info); @@ -126,7 +126,7 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, } // Add an entry for InvalidReg with zero'd mask. - RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0); + RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), kEncodeNone); m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg); // Existence of core64 registers implies wide references. @@ -734,6 +734,7 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { info1 = info2; } int v_reg = mir_graph_->SRegToVReg(info1->SReg()); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64); } } else { @@ -741,6 +742,7 @@ void Mir2Lir::FlushRegWide(RegStorage reg) { if (info->IsLive() && info->IsDirty()) { info->SetIsDirty(false); int v_reg = mir_graph_->SRegToVReg(info->SReg()); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64); } } @@ -752,6 +754,7 @@ void Mir2Lir::FlushReg(RegStorage reg) { if (info->IsLive() && info->IsDirty()) { info->SetIsDirty(false); int v_reg = mir_graph_->SRegToVReg(info->SReg()); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord); } } diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc new file mode 100644 index 0000000000..17995fbf79 --- /dev/null +++ b/compiler/dex/quick/resource_mask.cc @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iomanip> + +#include "resource_mask.h" + +#include "utils/arena_allocator.h" + +namespace art { + +namespace { // anonymous namespace + +constexpr ResourceMask kNoRegMasks[] = { + kEncodeNone, + kEncodeHeapRef, + kEncodeLiteral, + kEncodeDalvikReg, + ResourceMask::Bit(ResourceMask::kFPStatus), + ResourceMask::Bit(ResourceMask::kCCode), +}; +// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set. +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kHeapRef].Equals( + kEncodeHeapRef), check_kNoRegMasks_heap_ref_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kLiteral].Equals( + kEncodeLiteral), check_kNoRegMasks_literal_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kDalvikReg].Equals( + kEncodeDalvikReg), check_kNoRegMasks_dalvik_reg_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kFPStatus].Equals( + ResourceMask::Bit(ResourceMask::kFPStatus)), check_kNoRegMasks_fp_status_index); +COMPILE_ASSERT(kNoRegMasks[127-ResourceMask::kCCode].Equals( + ResourceMask::Bit(ResourceMask::kCCode)), check_kNoRegMasks_ccode_index); + +template <size_t special_bit> +constexpr ResourceMask OneRegOneSpecial(size_t reg) { + return ResourceMask::Bit(reg).Union(ResourceMask::Bit(special_bit)); +} + +// NOTE: Working around gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61484 . +// This should be a two-dimensions array, kSingleRegMasks[][32] and each line should be +// enclosed in an extra { }. However, gcc issues a bogus "error: array must be initialized +// with a brace-enclosed initializer" for that, so we flatten this to a one-dimensional array. +constexpr ResourceMask kSingleRegMasks[] = { +#define DEFINE_LIST_32(fn) \ + fn(0), fn(1), fn(2), fn(3), fn(4), fn(5), fn(6), fn(7), \ + fn(8), fn(9), fn(10), fn(11), fn(12), fn(13), fn(14), fn(15), \ + fn(16), fn(17), fn(18), fn(19), fn(20), fn(21), fn(22), fn(23), \ + fn(24), fn(25), fn(26), fn(27), fn(28), fn(29), fn(30), fn(31) + // NOTE: Each line is 512B of constant data, 3KiB in total. + DEFINE_LIST_32(ResourceMask::Bit), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kHeapRef>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kLiteral>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kDalvikReg>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kFPStatus>), + DEFINE_LIST_32(OneRegOneSpecial<ResourceMask::kCCode>), +#undef DEFINE_LIST_32 +}; + +constexpr size_t SingleRegMaskIndex(size_t main_index, size_t sub_index) { + return main_index * 32u + sub_index; +} + +// The 127-bit is the same as CLZ(masks_[1]) for a ResourceMask with only that bit set. +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kHeapRef, 0)].Equals( + OneRegOneSpecial<ResourceMask::kHeapRef>(0)), check_kSingleRegMasks_heap_ref_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kLiteral, 0)].Equals( + OneRegOneSpecial<ResourceMask::kLiteral>(0)), check_kSingleRegMasks_literal_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kDalvikReg, 0)].Equals( + OneRegOneSpecial<ResourceMask::kDalvikReg>(0)), check_kSingleRegMasks_dalvik_reg_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kFPStatus, 0)].Equals( + OneRegOneSpecial<ResourceMask::kFPStatus>(0)), check_kSingleRegMasks_fp_status_index); +COMPILE_ASSERT(kSingleRegMasks[SingleRegMaskIndex(127-ResourceMask::kCCode, 0)].Equals( + OneRegOneSpecial<ResourceMask::kCCode>(0)), check_kSingleRegMasks_ccode_index); + +// NOTE: arraysize(kNoRegMasks) multiplied by 32 due to the gcc bug workaround, see above. +COMPILE_ASSERT(arraysize(kSingleRegMasks) == arraysize(kNoRegMasks) * 32, check_arraysizes); + +constexpr ResourceMask kTwoRegsMasks[] = { +#define TWO(a, b) ResourceMask::Bit(a).Union(ResourceMask::Bit(b)) + // NOTE: 16 * 15 / 2 = 120 entries, 16 bytes each, 1920B in total. + TWO(0, 1), + TWO(0, 2), TWO(1, 2), + TWO(0, 3), TWO(1, 3), TWO(2, 3), + TWO(0, 4), TWO(1, 4), TWO(2, 4), TWO(3, 4), + TWO(0, 5), TWO(1, 5), TWO(2, 5), TWO(3, 5), TWO(4, 5), + TWO(0, 6), TWO(1, 6), TWO(2, 6), TWO(3, 6), TWO(4, 6), TWO(5, 6), + TWO(0, 7), TWO(1, 7), TWO(2, 7), TWO(3, 7), TWO(4, 7), TWO(5, 7), TWO(6, 7), + TWO(0, 8), TWO(1, 8), TWO(2, 8), TWO(3, 8), TWO(4, 8), TWO(5, 8), TWO(6, 8), TWO(7, 8), + TWO(0, 9), TWO(1, 9), TWO(2, 9), TWO(3, 9), TWO(4, 9), TWO(5, 9), TWO(6, 9), TWO(7, 9), + TWO(8, 9), + TWO(0, 10), TWO(1, 10), TWO(2, 10), TWO(3, 10), TWO(4, 10), TWO(5, 10), TWO(6, 10), TWO(7, 10), + TWO(8, 10), TWO(9, 10), + TWO(0, 11), TWO(1, 11), TWO(2, 11), TWO(3, 11), TWO(4, 11), TWO(5, 11), TWO(6, 11), TWO(7, 11), + TWO(8, 11), TWO(9, 11), TWO(10, 11), + TWO(0, 12), TWO(1, 12), TWO(2, 12), TWO(3, 12), TWO(4, 12), TWO(5, 12), TWO(6, 12), TWO(7, 12), + TWO(8, 12), TWO(9, 12), TWO(10, 12), TWO(11, 12), + TWO(0, 13), TWO(1, 13), TWO(2, 13), TWO(3, 13), TWO(4, 13), TWO(5, 13), TWO(6, 13), TWO(7, 13), + TWO(8, 13), TWO(9, 13), TWO(10, 13), TWO(11, 13), TWO(12, 13), + TWO(0, 14), TWO(1, 14), TWO(2, 14), TWO(3, 14), TWO(4, 14), TWO(5, 14), TWO(6, 14), TWO(7, 14), + TWO(8, 14), TWO(9, 14), TWO(10, 14), TWO(11, 14), TWO(12, 14), TWO(13, 14), + TWO(0, 15), TWO(1, 15), TWO(2, 15), TWO(3, 15), TWO(4, 15), TWO(5, 15), TWO(6, 15), TWO(7, 15), + TWO(8, 15), TWO(9, 15), TWO(10, 15), TWO(11, 15), TWO(12, 15), TWO(13, 15), TWO(14, 15), +#undef TWO +}; +COMPILE_ASSERT(arraysize(kTwoRegsMasks) == 16 * 15 / 2, check_arraysize_kTwoRegsMasks); + +constexpr size_t TwoRegsIndex(size_t higher, size_t lower) { + return (higher * (higher - 1)) / 2u + lower; +} + +constexpr bool CheckTwoRegsMask(size_t higher, size_t lower) { + return ResourceMask::Bit(lower).Union(ResourceMask::Bit(higher)).Equals( + kTwoRegsMasks[TwoRegsIndex(higher, lower)]); +} + +constexpr bool CheckTwoRegsMaskLine(size_t line, size_t lower = 0u) { + return (lower == line) || + (CheckTwoRegsMask(line, lower) && CheckTwoRegsMaskLine(line, lower + 1u)); +} + +constexpr bool CheckTwoRegsMaskTable(size_t lines) { + return lines == 0 || + (CheckTwoRegsMaskLine(lines - 1) && CheckTwoRegsMaskTable(lines - 1u)); +} + +COMPILE_ASSERT(CheckTwoRegsMaskTable(16), check_two_regs_masks_table); + +} // anonymous namespace + +const ResourceMask* ResourceMaskCache::GetMask(const ResourceMask& mask) { + // Instead of having a deduplication map, we shall just use pre-defined constexpr + // masks for the common cases. At most one of the these special bits is allowed: + constexpr ResourceMask kAllowedSpecialBits = ResourceMask::Bit(ResourceMask::kFPStatus) + .Union(ResourceMask::Bit(ResourceMask::kCCode)) + .Union(kEncodeHeapRef).Union(kEncodeLiteral).Union(kEncodeDalvikReg); + const ResourceMask* res = nullptr; + // Limit to low 32 regs and the kAllowedSpecialBits. + if ((mask.masks_[0] >> 32) == 0u && (mask.masks_[1] & ~kAllowedSpecialBits.masks_[1]) == 0u) { + // Check if it's only up to two registers. + uint32_t low_regs = static_cast<uint32_t>(mask.masks_[0]); + uint32_t low_regs_without_lowest = low_regs & (low_regs - 1u); + if (low_regs_without_lowest == 0u && IsPowerOfTwo(mask.masks_[1])) { + // 0 or 1 register, 0 or 1 bit from kAllowedBits. Use a pre-defined mask. + size_t index = (mask.masks_[1] != 0u) ? CLZ(mask.masks_[1]) : 0u; + DCHECK_LT(index, arraysize(kNoRegMasks)); + res = (low_regs != 0) ? &kSingleRegMasks[SingleRegMaskIndex(index, CTZ(low_regs))] + : &kNoRegMasks[index]; + } else if (IsPowerOfTwo(low_regs_without_lowest) && mask.masks_[1] == 0u) { + // 2 registers and no other flags. Use predefined mask if higher reg is < 16. + if (low_regs_without_lowest < (1u << 16)) { + res = &kTwoRegsMasks[TwoRegsIndex(CTZ(low_regs_without_lowest), CTZ(low_regs))]; + } + } + } else if (mask.Equals(kEncodeAll)) { + res = &kEncodeAll; + } + if (res != nullptr) { + DCHECK(res->Equals(mask)) + << "(" << std::hex << std::setw(16) << mask.masks_[0] + << ", "<< std::hex << std::setw(16) << mask.masks_[1] + << ") != (" << std::hex << std::setw(16) << res->masks_[0] + << ", "<< std::hex << std::setw(16) << res->masks_[1] << ")"; + return res; + } + + // TODO: Deduplicate. (At least the most common masks.) + void* mem = allocator_->Alloc(sizeof(ResourceMask), kArenaAllocLIRResourceMask); + return new (mem) ResourceMask(mask); +} + +} // namespace art diff --git a/compiler/dex/quick/resource_mask.h b/compiler/dex/quick/resource_mask.h new file mode 100644 index 0000000000..12ce98adc4 --- /dev/null +++ b/compiler/dex/quick/resource_mask.h @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_ +#define ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_ + +#include <stdint.h> + +#include "base/logging.h" +#include "dex/reg_storage.h" + +namespace art { + +class ArenaAllocator; + +/** + * @brief Resource mask for LIR insn uses or defs. + * @detail Def/Use mask used for checking dependencies between LIR insns in local + * optimizations such as load hoisting. + */ +class ResourceMask { + private: + constexpr ResourceMask(uint64_t mask1, uint64_t mask2) + : masks_{ mask1, mask2 } { // NOLINT + } + + public: + /* + * Def/Use encoding in 128-bit use_mask/def_mask. Low positions used for target-specific + * registers (and typically use the register number as the position). High positions + * reserved for common and abstract resources. + */ + enum ResourceBit { + kMustNotAlias = 127, + kHeapRef = 126, // Default memory reference type. + kLiteral = 125, // Literal pool memory reference. + kDalvikReg = 124, // Dalvik v_reg memory reference. + kFPStatus = 123, + kCCode = 122, + kLowestCommonResource = kCCode, + kHighestCommonResource = kMustNotAlias + }; + + // Default-constructible. + constexpr ResourceMask() + : masks_ { 0u, 0u } { + } + + // Copy-constructible and copyable. + ResourceMask(const ResourceMask& other) = default; + ResourceMask& operator=(const ResourceMask& other) = default; + + static constexpr ResourceMask RawMask(uint64_t mask1, uint64_t mask2) { + return ResourceMask(mask1, mask2); + } + + static constexpr ResourceMask Bit(size_t bit) { + return ResourceMask(bit >= 64u ? 0u : UINT64_C(1) << bit, + bit >= 64u ? UINT64_C(1) << (bit - 64u) : 0u); + } + + // Two consecutive bits. The start_bit must be even. + static constexpr ResourceMask TwoBits(size_t start_bit) { + return + DCHECK_CONSTEXPR((start_bit & 1u) == 0u, << start_bit << " isn't even", Bit(0)) + ResourceMask(start_bit >= 64u ? 0u : UINT64_C(3) << start_bit, + start_bit >= 64u ? UINT64_C(3) << (start_bit - 64u) : 0u); + } + + static constexpr ResourceMask NoBits() { + return ResourceMask(UINT64_C(0), UINT64_C(0)); + } + + static constexpr ResourceMask AllBits() { + return ResourceMask(~UINT64_C(0), ~UINT64_C(0)); + } + + constexpr ResourceMask Union(const ResourceMask& other) const { + return ResourceMask(masks_[0] | other.masks_[0], masks_[1] | other.masks_[1]); + } + + constexpr ResourceMask Intersection(const ResourceMask& other) const { + return ResourceMask(masks_[0] & other.masks_[0], masks_[1] & other.masks_[1]); + } + + constexpr ResourceMask Without(const ResourceMask& other) const { + return ResourceMask(masks_[0] & ~other.masks_[0], masks_[1] & ~other.masks_[1]); + } + + constexpr bool Equals(const ResourceMask& other) const { + return masks_[0] == other.masks_[0] && masks_[1] == other.masks_[1]; + } + + constexpr bool Intersects(const ResourceMask& other) const { + return (masks_[0] & other.masks_[0]) != 0u || (masks_[1] & other.masks_[1]) != 0u; + } + + void SetBit(size_t bit) { + DCHECK_LE(bit, kHighestCommonResource); + masks_[bit / 64u] |= UINT64_C(1) << (bit & 63u); + } + + constexpr bool HasBit(size_t bit) const { + return (masks_[bit / 64u] & (UINT64_C(1) << (bit & 63u))) != 0u; + } + + ResourceMask& SetBits(const ResourceMask& other) { + masks_[0] |= other.masks_[0]; + masks_[1] |= other.masks_[1]; + return *this; + } + + ResourceMask& ClearBits(const ResourceMask& other) { + masks_[0] &= ~other.masks_[0]; + masks_[1] &= ~other.masks_[1]; + return *this; + } + + private: + uint64_t masks_[2]; + + friend class ResourceMaskCache; +}; + +constexpr ResourceMask kEncodeNone = ResourceMask::NoBits(); +constexpr ResourceMask kEncodeAll = ResourceMask::AllBits(); +constexpr ResourceMask kEncodeHeapRef = ResourceMask::Bit(ResourceMask::kHeapRef); +constexpr ResourceMask kEncodeLiteral = ResourceMask::Bit(ResourceMask::kLiteral); +constexpr ResourceMask kEncodeDalvikReg = ResourceMask::Bit(ResourceMask::kDalvikReg); +constexpr ResourceMask kEncodeMem = kEncodeLiteral.Union(kEncodeDalvikReg).Union( + kEncodeHeapRef).Union(ResourceMask::Bit(ResourceMask::kMustNotAlias)); + +class ResourceMaskCache { + public: + explicit ResourceMaskCache(ArenaAllocator* allocator) + : allocator_(allocator) { + } + + const ResourceMask* GetMask(const ResourceMask& mask); + + private: + ArenaAllocator* allocator_; +}; + +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_RESOURCE_MASK_H_ diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 0a8193af35..d37ee67647 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -1541,7 +1541,9 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { << " delta: " << delta << " old delta: " << lir->operands[0]; } lir->opcode = kX86Jcc32; - SetupResourceMasks(lir); + lir->flags.size = GetInsnSize(lir); + DCHECK(lir->u.m.def_mask->Equals(kEncodeAll)); + DCHECK(lir->u.m.use_mask->Equals(kEncodeAll)); res = kRetryAll; } if (kVerbosePcFixup) { @@ -1605,7 +1607,9 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { LOG(INFO) << "Retry for JMP growth at " << lir->offset; } lir->opcode = kX86Jmp32; - SetupResourceMasks(lir); + lir->flags.size = GetInsnSize(lir); + DCHECK(lir->u.m.def_mask->Equals(kEncodeAll)); + DCHECK(lir->u.m.use_mask->Equals(kEncodeAll)); res = kRetryAll; } lir->operands[0] = delta; diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 61c9f4f041..6ae553dab3 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -99,7 +99,7 @@ class X86Mir2Lir : public Mir2Lir { RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); - uint64_t GetRegMaskCommon(RegStorage reg); + ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE; void AdjustSpillMask(); void ClobberCallerSave(); void FreeCallTemps(); @@ -113,12 +113,13 @@ class X86Mir2Lir : public Mir2Lir { int AssignInsnOffsets(); void AssignOffsets(); AssemblerStatus AssembleInstructions(CodeOffset start_addr); - void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + void DumpResourceMask(LIR* lir, const ResourceMask& mask, const char* prefix) OVERRIDE; + void SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) OVERRIDE; const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); - uint64_t GetPCUseDefEncoding(); + ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); int GetInsnSize(LIR* lir); bool IsUnconditionalBranch(LIR* lir); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index c3580f76ae..ced64009e6 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -147,6 +147,9 @@ void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_do // Update the in-register state of source. rl_src = UpdateLocWide(rl_src); + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + // If the source is in physical register, then put it in its location on stack. if (rl_src.location == kLocPhysReg) { RegisterInfo* reg_info = GetRegInfo(rl_src.reg); @@ -191,15 +194,12 @@ void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_do * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the * correct register class. */ + rl_result = EvalLoc(rl_dest, kFPReg, true); if (is_double) { - rl_result = EvalLocWide(rl_dest, kFPReg, true); - LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64); StoreFinalValueWide(rl_dest, rl_result); } else { - rl_result = EvalLoc(rl_dest, kFPReg, true); - Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg); StoreFinalValue(rl_dest, rl_result); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index a050a05466..4a77df2198 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -794,34 +794,61 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX); LoadValueDirectWideFixed(rl_src_expected, r_tmp1); LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); - NewLIR1(kX86Push32R, rs_rDI.GetReg()); - MarkTemp(rs_rDI); - LockTemp(rs_rDI); - NewLIR1(kX86Push32R, rs_rSI.GetReg()); - MarkTemp(rs_rSI); - LockTemp(rs_rSI); - const int push_offset = 4 /* push edi */ + 4 /* push esi */; - int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0 - : (IsInReg(this, rl_src_obj, rs_rDI) ? 4 - : (SRegOffset(rl_src_obj.s_reg_low) + push_offset)); // FIXME: needs 64-bit update. - LoadWordDisp(TargetReg(kSp), srcObjSp, rs_rDI); - int srcOffsetSp = IsInReg(this, rl_src_offset, rs_rSI) ? 0 - : (IsInReg(this, rl_src_offset, rs_rDI) ? 4 - : (SRegOffset(rl_src_offset.s_reg_low) + push_offset)); - LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI); - NewLIR4(kX86LockCmpxchg64A, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0); + const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI); + const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI); + DCHECK(!obj_in_si || !obj_in_di); + const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI); + const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI); + DCHECK(!off_in_si || !off_in_di); + // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg. + RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI; + RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI; + bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI); + bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI); + if (push_di) { + NewLIR1(kX86Push32R, rs_rDI.GetReg()); + MarkTemp(rs_rDI); + LockTemp(rs_rDI); + } + if (push_si) { + NewLIR1(kX86Push32R, rs_rSI.GetReg()); + MarkTemp(rs_rSI); + LockTemp(rs_rSI); + } + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); + if (!obj_in_si && !obj_in_di) { + LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj); + // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. + DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); + int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; + AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); + } + if (!off_in_si && !off_in_di) { + LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off); + // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. + DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); + int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; + AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); + } + NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0); // After a store we need to insert barrier in case of potential load. Since the // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. GenMemBarrier(kStoreLoad); - FreeTemp(rs_rSI); - UnmarkTemp(rs_rSI); - NewLIR1(kX86Pop32R, rs_rSI.GetReg()); - FreeTemp(rs_rDI); - UnmarkTemp(rs_rDI); - NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + + if (push_si) { + FreeTemp(rs_rSI); + UnmarkTemp(rs_rSI); + NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + } + if (push_di) { + FreeTemp(rs_rDI); + UnmarkTemp(rs_rDI); + NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + } FreeCallTemps(); } else { // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. @@ -885,11 +912,11 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256, 0, 0, target); res->target = target; res->flags.fixup = kFixupLoad; - SetMemRefType(res, true, kLiteral); store_method_addr_used_ = true; return res; } @@ -1077,6 +1104,9 @@ void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { } void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + LIR *m; switch (val) { case 0: @@ -1095,6 +1125,9 @@ void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (Gen64Bit()) { if (rl_src1.is_const) { std::swap(rl_src1, rl_src2); @@ -1346,6 +1379,7 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_src.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); @@ -1379,6 +1413,7 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instructi int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, @@ -2061,6 +2096,7 @@ bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); X86OpCode x86op = GetOpcode(op, rl_dest, false, val); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, @@ -2091,6 +2127,7 @@ bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (!IsNoOp(op, val_lo)) { X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); @@ -2469,6 +2506,9 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, return; } + // If we generate any memory access below, it will reference a dalvik reg. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (unary) { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = UpdateLocTyped(rl_dest, kCoreReg); @@ -2620,6 +2660,7 @@ void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); } else { int displacement = SRegOffset(rl_src.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, @@ -2670,6 +2711,7 @@ void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, rl_result = UpdateLocWideTyped(rl_dest, kCoreReg); if (rl_result.location != kLocPhysReg) { // Okay, we can do this into memory + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); OpMemReg(op, rl_result, t_reg.GetReg()); } else if (!rl_result.reg.IsFloat()) { // Can do this directly into the result register diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index ec165af865..d1ba2398c5 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -206,77 +206,70 @@ RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { /* * Decode the register id. */ -uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id; - - reg_id = reg.GetRegNum(); - /* Double registers in x86 are just a single FP register */ - seed = 1; - /* FP register starts at bit position 16 */ - shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); -} - -uint64_t X86Mir2Lir::GetPCUseDefEncoding() { +ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const { + /* Double registers in x86 are just a single FP register. This is always just a single bit. */ + return ResourceMask::Bit( + /* FP register starts at bit position 16 */ + ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum()); +} + +ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const { /* * FIXME: might make sense to use a virtual resource encoding bit for pc. Might be * able to clean up some of the x86/Arm_Mips differences */ LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86"; - return 0ULL; + return kEncodeNone; } -void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { +void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, + ResourceMask* use_mask, ResourceMask* def_mask) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); DCHECK(!lir->flags.use_def_invalid); // X86-specific resource map setup here. if (flags & REG_USE_SP) { - lir->u.m.use_mask |= ENCODE_X86_REG_SP; + use_mask->SetBit(kX86RegSP); } if (flags & REG_DEF_SP) { - lir->u.m.def_mask |= ENCODE_X86_REG_SP; + def_mask->SetBit(kX86RegSP); } if (flags & REG_DEFA) { - SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg()); + SetupRegMask(def_mask, rs_rAX.GetReg()); } if (flags & REG_DEFD) { - SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg()); + SetupRegMask(def_mask, rs_rDX.GetReg()); } if (flags & REG_USEA) { - SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg()); + SetupRegMask(use_mask, rs_rAX.GetReg()); } if (flags & REG_USEC) { - SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg()); + SetupRegMask(use_mask, rs_rCX.GetReg()); } if (flags & REG_USED) { - SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg()); + SetupRegMask(use_mask, rs_rDX.GetReg()); } if (flags & REG_USEB) { - SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg()); + SetupRegMask(use_mask, rs_rBX.GetReg()); } // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI. if (lir->opcode == kX86RepneScasw) { - SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg()); - SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg()); - SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg()); - SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg()); + SetupRegMask(use_mask, rs_rAX.GetReg()); + SetupRegMask(use_mask, rs_rCX.GetReg()); + SetupRegMask(use_mask, rs_rDI.GetReg()); + SetupRegMask(def_mask, rs_rDI.GetReg()); } if (flags & USE_FP_STACK) { - lir->u.m.use_mask |= ENCODE_X86_FP_STACK; - lir->u.m.def_mask |= ENCODE_X86_FP_STACK; + use_mask->SetBit(kX86FPStack); + def_mask->SetBit(kX86FPStack); } } @@ -368,40 +361,40 @@ std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char return buf; } -void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) { +void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) { char buf[256]; buf[0] = 0; - if (mask == ENCODE_ALL) { + if (mask.Equals(kEncodeAll)) { strcpy(buf, "all"); } else { char num[8]; int i; for (i = 0; i < kX86RegEnd; i++) { - if (mask & (1ULL << i)) { + if (mask.HasBit(i)) { snprintf(num, arraysize(num), "%d ", i); strcat(buf, num); } } - if (mask & ENCODE_CCODE) { + if (mask.HasBit(ResourceMask::kCCode)) { strcat(buf, "cc "); } /* Memory bits */ - if (x86LIR && (mask & ENCODE_DALVIK_REG)) { + if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) { snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info), (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : ""); } - if (mask & ENCODE_LITERAL) { + if (mask.HasBit(ResourceMask::kLiteral)) { strcat(buf, "lit "); } - if (mask & ENCODE_HEAP_REF) { + if (mask.HasBit(ResourceMask::kHeapRef)) { strcat(buf, "heap "); } - if (mask & ENCODE_MUST_NOT_ALIAS) { + if (mask.HasBit(ResourceMask::kMustNotAlias)) { strcat(buf, "noalias "); } } @@ -551,7 +544,7 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { } else { // Mark as a scheduling barrier. DCHECK(!mem_barrier->flags.use_def_invalid); - mem_barrier->u.m.def_mask = ENCODE_ALL; + mem_barrier->u.m.def_mask = &kEncodeAll; } return ret; #else @@ -822,6 +815,7 @@ void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo); AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); @@ -1109,7 +1103,10 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } else { // Load the start index from stack, remembering that we pushed EDI. int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t); - Load32Disp(rs_rX86_SP, displacement, rs_rBX); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + Load32Disp(rs_rX86_SP, displacement, rs_rBX); + } OpRegReg(kOpXor, rs_rDI, rs_rDI); OpRegReg(kOpCmp, rs_rBX, rs_rDI); OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI); @@ -1413,10 +1410,10 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */); load->flags.fixup = kFixupLoad; load->target = data_target; - SetMemRefType(load, true, kLiteral); } void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) { @@ -1856,6 +1853,7 @@ void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { * end up half-promoted. In those cases, we must flush the promoted * half to memory as well. */ + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); for (int i = 0; i < cu_->num_ins; i++) { PromotionMap* v_map = &promotion_map_[start_vreg + i]; RegStorage reg = RegStorage::InvalidReg(); @@ -1986,12 +1984,14 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (loc.wide) { loc = UpdateLocWide(loc); if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); } next_arg += 2; } else { loc = UpdateLoc(loc); if (loc.location == kLocPhysReg) { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32); } next_arg++; @@ -2008,6 +2008,8 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, int current_src_offset = start_offset; int current_dest_offset = outs_offset; + // Only davik regs are accessed in this loop; no next_call_insn() calls. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); while (regs_left_to_pass_via_stack > 0) { // This is based on the knowledge that the stack itself is 16-byte aligned. bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; @@ -2045,6 +2047,7 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); if (src_is_16b_aligned) { ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP); } else if (src_is_8b_aligned) { @@ -2074,8 +2077,7 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); } else { // Set barrier for 128-bit load. - SetMemRefType(ld1, true /* is_load */, kDalvikReg); - ld1->u.m.def_mask = ENCODE_ALL; + ld1->u.m.def_mask = &kEncodeAll; } } if (st1 != nullptr) { @@ -2085,8 +2087,7 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); } else { // Set barrier for 128-bit store. - SetMemRefType(st1, false /* is_load */, kDalvikReg); - st1->u.m.def_mask = ENCODE_ALL; + st1->u.m.def_mask = &kEncodeAll; } } @@ -2123,20 +2124,23 @@ int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (!reg.Valid()) { int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); - if (rl_arg.wide) { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); - } else { - LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); - } - i++; - } else { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.wide) { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); + } else { + LoadValueDirectWideFixed(rl_arg, regWide); + StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); + } + i++; } else { - LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + } else { + LoadValueDirectFixed(rl_arg, regSingle); + StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + } } } call_state = next_call_insn(cu_, info, call_state, target_method, diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index d074d8104d..c72e8cd1d9 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -376,7 +376,8 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int o break; } LIR *l = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), offset); - if (r_base == rs_rX86_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rX86_SP); AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */); } return l; @@ -403,8 +404,10 @@ LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) { break; } LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); - AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */); + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */); + } return l; } @@ -427,7 +430,9 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) { break; } LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + } return l; } @@ -575,11 +580,11 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. + ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), kDouble); res->target = data_target; res->flags.fixup = kFixupLoad; - SetMemRefType(res, true, kLiteral); store_method_addr_used_ = true; } else { if (val_lo == 0) { @@ -684,7 +689,8 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int displacement + HIWORD_OFFSET); } } - if (r_base == rs_rX86_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rX86_SP); AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, true /* is_load */, is64bit); if (pair) { @@ -815,7 +821,8 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetLowReg()); store2 = NewLIR3(opcode, r_base.GetReg(), displacement + HIWORD_OFFSET, r_src.GetHighReg()); } - if (r_base == rs_rX86_SP) { + if (mem_ref_type_ == ResourceMask::kDalvikReg) { + DCHECK(r_base == rs_rX86_SP); AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, false /* is_load */, is64bit); if (pair) { diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 5022529667..f1b5811a33 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -111,9 +111,6 @@ enum X86ResourceEncodingPos { kX86RegEnd = kX86FPStack, }; -#define ENCODE_X86_REG_SP (1ULL << kX86RegSP) -#define ENCODE_X86_FP_STACK (1ULL << kX86FPStack) - // FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum? enum X86NativeRegisterPool { r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index 7e50c311da..3b891f2f20 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_DEX_REG_STORAGE_H_ #define ART_COMPILER_DEX_REG_STORAGE_H_ +#include "base/logging.h" namespace art { @@ -102,17 +103,21 @@ class RegStorage { static const uint16_t kHighRegMask = (kHighRegNumMask << kHighRegShift); // Reg is [F][LLLLL], will override any existing shape and use rs_kind. - RegStorage(RegStorageKind rs_kind, int reg) { - DCHECK_NE(rs_kind, k64BitPair); - DCHECK_EQ(rs_kind & ~kShapeMask, 0); - reg_ = kValid | rs_kind | (reg & kRegTypeMask); - } - RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) { - DCHECK_EQ(rs_kind, k64BitPair); - DCHECK_EQ(low_reg & kFloatingPoint, high_reg & kFloatingPoint); - DCHECK_LE(high_reg & kRegNumMask, kHighRegNumMask) << "High reg must be in 0..31"; - reg_ = kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) | - (low_reg & kRegTypeMask); + constexpr RegStorage(RegStorageKind rs_kind, int reg) + : reg_( + DCHECK_CONSTEXPR(rs_kind != k64BitPair, , 0u) + DCHECK_CONSTEXPR((rs_kind & ~kShapeMask) == 0, , 0u) + kValid | rs_kind | (reg & kRegTypeMask)) { + } + constexpr RegStorage(RegStorageKind rs_kind, int low_reg, int high_reg) + : reg_( + DCHECK_CONSTEXPR(rs_kind == k64BitPair, << rs_kind, 0u) + DCHECK_CONSTEXPR((low_reg & kFloatingPoint) == (high_reg & kFloatingPoint), + << low_reg << ", " << high_reg, 0u) + DCHECK_CONSTEXPR((high_reg & kRegNumMask) <= kHighRegNumMask, + << "High reg must be in 0..31: " << high_reg, false) + kValid | rs_kind | ((high_reg & kHighRegNumMask) << kHighRegShift) | + (low_reg & kRegTypeMask)) { } constexpr explicit RegStorage(uint16_t val) : reg_(val) {} RegStorage() : reg_(kInvalid) {} @@ -125,50 +130,53 @@ class RegStorage { return (reg_ != rhs.GetRawBits()); } - bool Valid() const { + constexpr bool Valid() const { return ((reg_ & kValidMask) == kValid); } - bool Is32Bit() const { + constexpr bool Is32Bit() const { return ((reg_ & kShapeMask) == k32BitSolo); } - bool Is64Bit() const { + constexpr bool Is64Bit() const { return ((reg_ & k64BitMask) == k64Bits); } - bool Is64BitSolo() const { + constexpr bool Is64BitSolo() const { return ((reg_ & kShapeMask) == k64BitSolo); } - bool IsPair() const { + constexpr bool IsPair() const { return ((reg_ & kShapeMask) == k64BitPair); } - bool IsFloat() const { - DCHECK(Valid()); - return ((reg_ & kFloatingPoint) == kFloatingPoint); + constexpr bool IsFloat() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + ((reg_ & kFloatingPoint) == kFloatingPoint); } - bool IsDouble() const { - DCHECK(Valid()); - return (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); + constexpr bool IsDouble() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + (reg_ & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); } - bool IsSingle() const { - DCHECK(Valid()); - return (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint; + constexpr bool IsSingle() const { + return + DCHECK_CONSTEXPR(Valid(), , false) + (reg_ & (kFloatingPoint | k64BitMask)) == kFloatingPoint; } - static bool IsFloat(uint16_t reg) { + static constexpr bool IsFloat(uint16_t reg) { return ((reg & kFloatingPoint) == kFloatingPoint); } - static bool IsDouble(uint16_t reg) { + static constexpr bool IsDouble(uint16_t reg) { return (reg & (kFloatingPoint | k64BitMask)) == (kFloatingPoint | k64Bits); } - static bool IsSingle(uint16_t reg) { + static constexpr bool IsSingle(uint16_t reg) { return (reg & (kFloatingPoint | k64BitMask)) == kFloatingPoint; } @@ -221,17 +229,17 @@ class RegStorage { } // Return the register number of low or solo. - int GetRegNum() const { + constexpr int GetRegNum() const { return reg_ & kRegNumMask; } // Is register number in 0..7? - bool Low8() const { + constexpr bool Low8() const { return GetRegNum() < 8; } // Is register number in 0..3? - bool Low4() const { + constexpr bool Low4() const { return GetRegNum() < 4; } @@ -244,11 +252,11 @@ class RegStorage { return RegStorage(k64BitPair, low.GetReg(), high.GetReg()); } - static bool SameRegType(RegStorage reg1, RegStorage reg2) { + static constexpr bool SameRegType(RegStorage reg1, RegStorage reg2) { return (reg1.IsDouble() == reg2.IsDouble()) && (reg1.IsSingle() == reg2.IsSingle()); } - static bool SameRegType(int reg1, int reg2) { + static constexpr bool SameRegType(int reg1, int reg2) { return (IsDouble(reg1) == IsDouble(reg2)) && (IsSingle(reg1) == IsSingle(reg2)); } @@ -258,17 +266,17 @@ class RegStorage { } // Create a floating point 32-bit solo. - static RegStorage FloatSolo32(int reg_num) { + static constexpr RegStorage FloatSolo32(int reg_num) { return RegStorage(k32BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); } // Create a 128-bit solo. - static RegStorage Solo128(int reg_num) { + static constexpr RegStorage Solo128(int reg_num) { return RegStorage(k128BitSolo, reg_num & kRegTypeMask); } // Create a 64-bit solo. - static RegStorage Solo64(int reg_num) { + static constexpr RegStorage Solo64(int reg_num) { return RegStorage(k64BitSolo, reg_num & kRegTypeMask); } @@ -277,19 +285,19 @@ class RegStorage { return RegStorage(k64BitSolo, (reg_num & kRegNumMask) | kFloatingPoint); } - static RegStorage InvalidReg() { + static constexpr RegStorage InvalidReg() { return RegStorage(kInvalid); } - static uint16_t RegNum(int raw_reg_bits) { + static constexpr uint16_t RegNum(int raw_reg_bits) { return raw_reg_bits & kRegNumMask; } - int GetRawBits() const { + constexpr int GetRawBits() const { return reg_; } - size_t StorageSize() { + size_t StorageSize() const { switch (reg_ & kShapeMask) { case kInvalid: return 0; case k32BitSolo: return 4; diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc index 925d4a287a..da49524ee2 100644 --- a/compiler/utils/arena_allocator.cc +++ b/compiler/utils/arena_allocator.cc @@ -32,10 +32,11 @@ static constexpr size_t kValgrindRedZoneBytes = 8; constexpr size_t Arena::kDefaultSize; template <bool kCount> -const char* ArenaAllocatorStatsImpl<kCount>::kAllocNames[kNumArenaAllocKinds] = { +const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "Misc ", "BasicBlock ", "LIR ", + "LIR masks ", "MIR ", "DataFlow ", "GrowList ", @@ -101,6 +102,7 @@ void ArenaAllocatorStatsImpl<kCount>::Dump(std::ostream& os, const Arena* first, << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n"; } os << "===== Allocation by kind\n"; + COMPILE_ASSERT(arraysize(kAllocNames) == kNumArenaAllocKinds, check_arraysize_kAllocNames); for (int i = 0; i < kNumArenaAllocKinds; i++) { os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n"; } diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h index ac3938ff22..f4bcb1d44d 100644 --- a/compiler/utils/arena_allocator.h +++ b/compiler/utils/arena_allocator.h @@ -41,6 +41,7 @@ enum ArenaAllocKind { kArenaAllocMisc, kArenaAllocBB, kArenaAllocLIR, + kArenaAllocLIRResourceMask, kArenaAllocMIR, kArenaAllocDFInfo, kArenaAllocGrowableArray, @@ -92,7 +93,7 @@ class ArenaAllocatorStatsImpl { // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL. size_t alloc_stats_[kNumArenaAllocKinds]; // Bytes used by various allocation kinds. - static const char* kAllocNames[kNumArenaAllocKinds]; + static const char* const kAllocNames[]; }; typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 6031e25ebf..dd8e221547 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1650,7 +1650,102 @@ ENTRY art_quick_deoptimize END art_quick_deoptimize -UNIMPLEMENTED art_quick_indexof + /* + * String's indexOf. + * + * TODO: Not very optimized. + * On entry: + * x0: string object (known non-null) + * w1: char to match (known <= 0xFFFF) + * w2: Starting offset in string data + */ +ENTRY art_quick_indexof + ldr w3, [x0, #STRING_COUNT_OFFSET] + ldr w4, [x0, #STRING_OFFSET_OFFSET] + ldr w0, [x0, #STRING_VALUE_OFFSET] // x0 ? + + /* Clamp start to [0..count] */ + cmp w2, #0 + csel w2, wzr, w2, lt + cmp w2, w3 + csel w2, w3, w2, gt + + /* Build a pointer to the start of the string data */ + add x0, x0, #STRING_DATA_OFFSET + add x0, x0, x4, lsl #1 + + /* Save a copy to compute result */ + mov x5, x0 + + /* Build pointer to start of data to compare and pre-bias */ + add x0, x0, x2, lsl #1 + sub x0, x0, #2 + + /* Compute iteration count */ + sub w2, w3, w2 + + /* + * At this point we have: + * x0: start of the data to test + * w1: char to compare + * w2: iteration count + * x5: original start of string data + */ + + subs w2, w2, #4 + b.lt .Lindexof_remainder + +.Lindexof_loop4: + ldrh w6, [x0, #2]! + ldrh w7, [x0, #2]! + ldrh w8, [x0, #2]! + ldrh w9, [x0, #2]! + cmp w6, w1 + b.eq .Lmatch_0 + cmp w7, w1 + b.eq .Lmatch_1 + cmp w8, w1 + b.eq .Lmatch_2 + cmp w9, w1 + b.eq .Lmatch_3 + subs w2, w2, #4 + b.ge .Lindexof_loop4 + +.Lindexof_remainder: + adds w2, w2, #4 + b.eq .Lindexof_nomatch + +.Lindexof_loop1: + ldrh w6, [x0, #2]! + cmp w6, w1 + b.eq .Lmatch_3 + subs w2, w2, #1 + b.ne .Lindexof_loop1 + +.Lindexof_nomatch: + mov x0, #-1 + ret + +.Lmatch_0: + sub x0, x0, #6 + sub x0, x0, x5 + asr x0, x0, #1 + ret +.Lmatch_1: + sub x0, x0, #4 + sub x0, x0, x5 + asr x0, x0, #1 + ret +.Lmatch_2: + sub x0, x0, #2 + sub x0, x0, x5 + asr x0, x0, #1 + ret +.Lmatch_3: + sub x0, x0, x5 + asr x0, x0, #1 + ret +END art_quick_indexof /* * String's compareTo. @@ -1698,6 +1793,7 @@ ENTRY art_quick_string_compareto add x2, x2, #STRING_DATA_OFFSET add x1, x1, #STRING_DATA_OFFSET + // TODO: Tune this value. // Check for long string, do memcmp16 for them. cmp w3, #28 // Constant from arm32. bgt .Ldo_memcmp16 diff --git a/runtime/base/logging.h b/runtime/base/logging.h index 814195c7fa..caeb946ff0 100644 --- a/runtime/base/logging.h +++ b/runtime/base/logging.h @@ -66,6 +66,16 @@ } \ } while (false) +// CHECK that can be used in a constexpr function. For example, +// constexpr int half(int n) { +// return +// DCHECK_CONSTEXPR(n >= 0, , 0) +// CHECK_CONSTEXPR((n & 1) == 0), << "Extra debugging output: n = " << n, 0) +// n / 2; +// } +#define CHECK_CONSTEXPR(x, out, dummy) \ + (UNLIKELY(!(x))) ? (LOG(FATAL) << "Check failed: " << #x out, dummy) : + #ifndef NDEBUG #define DCHECK(x) CHECK(x) @@ -77,6 +87,7 @@ #define DCHECK_GT(x, y) CHECK_GT(x, y) #define DCHECK_STREQ(s1, s2) CHECK_STREQ(s1, s2) #define DCHECK_STRNE(s1, s2) CHECK_STRNE(s1, s2) +#define DCHECK_CONSTEXPR(x, out, dummy) CHECK_CONSTEXPR(x, out, dummy) #else // NDEBUG @@ -116,6 +127,9 @@ while (false) \ CHECK_STRNE(str1, str2) +#define DCHECK_CONSTEXPR(x, out, dummy) \ + (false && (x)) ? (dummy) : + #endif #define LOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, -1).stream() diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index a27dfadf50..61ea87059f 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -67,14 +67,14 @@ static bool IsDataSectionType(uint32_t map_type) { } const char* DexFileVerifier::CheckLoadStringByIdx(uint32_t idx, const char* error_string) { - if (!CheckIndex(idx, dex_file_->NumStringIds(), error_string)) { + if (UNLIKELY(!CheckIndex(idx, dex_file_->NumStringIds(), error_string))) { return nullptr; } return dex_file_->StringDataByIdx(idx); } const char* DexFileVerifier::CheckLoadStringByTypeIdx(uint32_t type_idx, const char* error_string) { - if (!CheckIndex(type_idx, dex_file_->NumTypeIds(), error_string)) { + if (UNLIKELY(!CheckIndex(type_idx, dex_file_->NumTypeIds(), error_string))) { return nullptr; } const DexFile::TypeId& type_id = dex_file_->GetTypeId(type_idx); @@ -83,14 +83,14 @@ const char* DexFileVerifier::CheckLoadStringByTypeIdx(uint32_t type_idx, const c } const DexFile::FieldId* DexFileVerifier::CheckLoadFieldId(uint32_t idx, const char* error_string) { - if (!CheckIndex(idx, dex_file_->NumFieldIds(), error_string)) { + if (UNLIKELY(!CheckIndex(idx, dex_file_->NumFieldIds(), error_string))) { return nullptr; } return &dex_file_->GetFieldId(idx); } const DexFile::MethodId* DexFileVerifier::CheckLoadMethodId(uint32_t idx, const char* err_string) { - if (!CheckIndex(idx, dex_file_->NumMethodIds(), err_string)) { + if (UNLIKELY(!CheckIndex(idx, dex_file_->NumMethodIds(), err_string))) { return nullptr; } return &dex_file_->GetMethodId(idx); @@ -99,28 +99,28 @@ const DexFile::MethodId* DexFileVerifier::CheckLoadMethodId(uint32_t idx, const // Helper macro to load string and return false on error. #define LOAD_STRING(var, idx, error) \ const char* var = CheckLoadStringByIdx(idx, error); \ - if (var == nullptr) { \ + if (UNLIKELY(var == nullptr)) { \ return false; \ } // Helper macro to load string by type idx and return false on error. #define LOAD_STRING_BY_TYPE(var, type_idx, error) \ const char* var = CheckLoadStringByTypeIdx(type_idx, error); \ - if (var == nullptr) { \ + if (UNLIKELY(var == nullptr)) { \ return false; \ } // Helper macro to load method id. Return last parameter on error. #define LOAD_METHOD(var, idx, error_string, error_val) \ const DexFile::MethodId* var = CheckLoadMethodId(idx, error_string); \ - if (var == nullptr) { \ + if (UNLIKELY(var == nullptr)) { \ return error_val; \ } // Helper macro to load method id. Return last parameter on error. #define LOAD_FIELD(var, idx, fmt, error_val) \ const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \ - if (var == nullptr) { \ + if (UNLIKELY(var == nullptr)) { \ return error_val; \ } @@ -1596,12 +1596,18 @@ bool DexFileVerifier::CheckInterMethodIdItem() { } // Check that the name is valid. - LOAD_STRING(descriptor, item->name_idx_, "inter_method_id_item class_idx") + LOAD_STRING(descriptor, item->name_idx_, "inter_method_id_item name_idx") if (UNLIKELY(!IsValidMemberName(descriptor))) { ErrorStringPrintf("Invalid method name: '%s'", descriptor); return false; } + // Check that the proto id is valid. + if (UNLIKELY(!CheckIndex(item->proto_idx_, dex_file_->NumProtoIds(), + "inter_method_id_item proto_idx"))) { + return false; + } + // Check ordering between items. This relies on the other sections being in order. if (previous_item_ != NULL) { const DexFile::MethodId* prev_item = reinterpret_cast<const DexFile::MethodId*>(previous_item_); diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc new file mode 100644 index 0000000000..d0ce00fc66 --- /dev/null +++ b/runtime/dex_file_verifier_test.cc @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_file_verifier.h" + +#include <memory> +#include "zlib.h" + +#include "common_runtime_test.h" +#include "base/macros.h" + +namespace art { + +class DexFileVerifierTest : public CommonRuntimeTest {}; + +static const byte kBase64Map[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, + 255, 254, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, // NOLINT + 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, // NOLINT + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, // NOLINT + 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, // NOLINT + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255 +}; + +static inline byte* DecodeBase64(const char* src, size_t* dst_size) { + std::vector<byte> tmp; + uint32_t t = 0, y = 0; + int g = 3; + for (size_t i = 0; src[i] != '\0'; ++i) { + byte c = kBase64Map[src[i] & 0xFF]; + if (c == 255) continue; + // the final = symbols are read and used to trim the remaining bytes + if (c == 254) { + c = 0; + // prevent g < 0 which would potentially allow an overflow later + if (--g < 0) { + *dst_size = 0; + return nullptr; + } + } else if (g != 3) { + // we only allow = to be at the end + *dst_size = 0; + return nullptr; + } + t = (t << 6) | c; + if (++y == 4) { + tmp.push_back((t >> 16) & 255); + if (g > 1) { + tmp.push_back((t >> 8) & 255); + } + if (g > 2) { + tmp.push_back(t & 255); + } + y = t = 0; + } + } + if (y != 0) { + *dst_size = 0; + return nullptr; + } + std::unique_ptr<byte[]> dst(new byte[tmp.size()]); + if (dst_size != nullptr) { + *dst_size = tmp.size(); + } else { + *dst_size = 0; + } + std::copy(tmp.begin(), tmp.end(), dst.get()); + return dst.release(); +} + +static const DexFile* OpenDexFileBase64(const char* base64, const char* location, + std::string* error_msg) { + // decode base64 + CHECK(base64 != NULL); + size_t length; + std::unique_ptr<byte[]> dex_bytes(DecodeBase64(base64, &length)); + CHECK(dex_bytes.get() != NULL); + + // write to provided file + std::unique_ptr<File> file(OS::CreateEmptyFile(location)); + CHECK(file.get() != NULL); + if (!file->WriteFully(dex_bytes.get(), length)) { + PLOG(FATAL) << "Failed to write base64 as dex file"; + } + file.reset(); + + // read dex file + ScopedObjectAccess soa(Thread::Current()); + return DexFile::Open(location, location, error_msg); +} + + +// For reference. +static const char kGoodTestDex[] = + "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN" + "AAAAcAAAAAYAAACkAAAAAgAAALwAAAABAAAA1AAAAAQAAADcAAAAAQAAAPwAAACIAQAAHAEAAFoB" + "AABiAQAAagEAAIEBAACVAQAAqQEAAL0BAADDAQAAzgEAANEBAADVAQAA2gEAAN8BAAABAAAAAgAA" + "AAMAAAAEAAAABQAAAAgAAAAIAAAABQAAAAAAAAAJAAAABQAAAFQBAAAEAAEACwAAAAAAAAAAAAAA" + "AAAAAAoAAAABAAEADAAAAAIAAAAAAAAAAAAAAAEAAAACAAAAAAAAAAcAAAAAAAAA8wEAAAAAAAAB" + "AAEAAQAAAOgBAAAEAAAAcBADAAAADgACAAAAAgAAAO0BAAAIAAAAYgAAABoBBgBuIAIAEAAOAAEA" + "AAADAAY8aW5pdD4ABkxUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABJMamF2YS9sYW5nL09i" + "amVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwASTGphdmEvbGFuZy9TeXN0ZW07AARUZXN0AAlUZXN0" + "LmphdmEAAVYAAlZMAANmb28AA291dAAHcHJpbnRsbgABAAcOAAMABw54AAAAAgAAgYAEnAIBCbQC" + "AAAADQAAAAAAAAABAAAAAAAAAAEAAAANAAAAcAAAAAIAAAAGAAAApAAAAAMAAAACAAAAvAAAAAQA" + "AAABAAAA1AAAAAUAAAAEAAAA3AAAAAYAAAABAAAA/AAAAAEgAAACAAAAHAEAAAEQAAABAAAAVAEA" + "AAIgAAANAAAAWgEAAAMgAAACAAAA6AEAAAAgAAABAAAA8wEAAAAQAAABAAAABAIAAA=="; + +TEST_F(DexFileVerifierTest, GoodDex) { + ScratchFile tmp; + std::string error_msg; + std::unique_ptr<const DexFile> raw(OpenDexFileBase64(kGoodTestDex, tmp.GetFilename().c_str(), + &error_msg)); + ASSERT_TRUE(raw.get() != nullptr) << error_msg; +} + +static void FixUpChecksum(byte* dex_file) { + DexFile::Header* header = reinterpret_cast<DexFile::Header*>(dex_file); + uint32_t expected_size = header->file_size_; + uint32_t adler_checksum = adler32(0L, Z_NULL, 0); + const uint32_t non_sum = sizeof(DexFile::Header::magic_) + sizeof(DexFile::Header::checksum_); + const byte* non_sum_ptr = dex_file + non_sum; + adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum); + header->checksum_ = adler_checksum; +} + +static const DexFile* FixChecksumAndOpen(byte* bytes, size_t length, const char* location, + std::string* error_msg) { + // Check data. + CHECK(bytes != nullptr); + + // Fixup of checksum. + FixUpChecksum(bytes); + + // write to provided file + std::unique_ptr<File> file(OS::CreateEmptyFile(location)); + CHECK(file.get() != NULL); + if (!file->WriteFully(bytes, length)) { + PLOG(FATAL) << "Failed to write base64 as dex file"; + } + file.reset(); + + // read dex file + ScopedObjectAccess soa(Thread::Current()); + return DexFile::Open(location, location, error_msg); +} + +static bool ModifyAndLoad(const char* location, size_t offset, uint8_t new_val, + std::string* error_msg) { + // Decode base64. + size_t length; + std::unique_ptr<byte[]> dex_bytes(DecodeBase64(kGoodTestDex, &length)); + CHECK(dex_bytes.get() != NULL); + + // Make modifications. + dex_bytes.get()[offset] = new_val; + + // Fixup and load. + std::unique_ptr<const DexFile> file(FixChecksumAndOpen(dex_bytes.get(), length, location, + error_msg)); + return file.get() != nullptr; +} + +TEST_F(DexFileVerifierTest, MethodId) { + { + // Class error. + ScratchFile tmp; + std::string error_msg; + bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg); + ASSERT_TRUE(success); + ASSERT_NE(error_msg.find("inter_method_id_item class_idx"), std::string::npos) << error_msg; + } + + { + // Proto error. + ScratchFile tmp; + std::string error_msg; + bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg); + ASSERT_TRUE(success); + ASSERT_NE(error_msg.find("inter_method_id_item proto_idx"), std::string::npos) << error_msg; + } + + { + // Name error. + ScratchFile tmp; + std::string error_msg; + bool success = !ModifyAndLoad(tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg); + ASSERT_TRUE(success); + ASSERT_NE(error_msg.find("inter_method_id_item name_idx"), std::string::npos) << error_msg; + } +} + +} // namespace art diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 7e3810cd18..9512a5a7f5 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -340,16 +340,7 @@ static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename, if (Runtime::Current()->GetProfilerOptions().IsEnabled() && (pkgname != nullptr)) { const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */) + std::string("/") + pkgname; - const std::string profile_cache_dir = GetDalvikCacheOrDie("profile-cache", - false /* create_if_absent */); - - // Make the profile cache if it doesn't exist. - mkdir(profile_cache_dir.c_str(), 0700); - - // The previous profile file (a copy of the profile the last time this was run) is - // in the dalvik-cache directory because this is owned by system. The profiles - // directory is owned by install so system cannot write files in there. - std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname; + const std::string prev_profile_file = profile_file + std::string("@old"); struct stat profstat, prevstat; int e1 = stat(profile_file.c_str(), &profstat); diff --git a/runtime/reflection.cc b/runtime/reflection.cc index 89cdb4dc7e..fe5e1043a9 100644 --- a/runtime/reflection.cc +++ b/runtime/reflection.cc @@ -815,6 +815,10 @@ bool UnboxPrimitiveForResult(const ThrowLocation& throw_location, mirror::Object bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags) { NthCallerVisitor visitor(Thread::Current(), 2); visitor.WalkStack(); + if (UNLIKELY(visitor.caller == nullptr)) { + // The caller is an attached native thread. + return (access_flags & kAccPublic) != 0; + } mirror::Class* caller_class = visitor.caller->GetDeclaringClass(); if (((access_flags & kAccPublic) != 0) || (caller_class == declaring_class)) { diff --git a/runtime/utils.h b/runtime/utils.h index 6a4198fcfc..6d52459ec8 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -140,9 +140,8 @@ struct TypeIdentity { template<typename T> static constexpr T RoundDown(T x, typename TypeIdentity<T>::type n) { return - // DCHECK(IsPowerOfTwo(n)) in a form acceptable in a constexpr function: - (kIsDebugBuild && !IsPowerOfTwo(n)) ? (LOG(FATAL) << n << " isn't a power of 2", T(0)) - : (x & -n); + DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0)) + (x & -n); } template<typename T> diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index e5dcbb0ac4..63a1fe5f3b 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -1334,6 +1334,31 @@ bool MethodVerifier::CodeFlowVerifyMethod() { insn_flags_[insn_idx].ClearChanged(); } + // When we're in compiler mode, do not accept quickened instructions. + // We explicitly iterate over *all* instructions to check code that may be unreachable and + // missed by the loop above. + if (Runtime::Current() != nullptr && Runtime::Current()->IsCompiler()) { + uint32_t insn_idx = 0; + for (; insn_idx < insns_size; insn_idx += insn_flags_[insn_idx].GetLengthInCodeUnits()) { + const Instruction* inst = Instruction::At(insns + insn_idx); + switch (inst->Opcode()) { + case Instruction::IGET_QUICK: + case Instruction::IGET_WIDE_QUICK: + case Instruction::IGET_OBJECT_QUICK: + case Instruction::IPUT_QUICK: + case Instruction::IPUT_WIDE_QUICK: + case Instruction::IPUT_OBJECT_QUICK: + case Instruction::INVOKE_VIRTUAL_QUICK: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Quickened instructions not allowed. "; + return false; + + default: + break; + } + } + } + if (gDebugVerify) { /* * Scan for dead code. There's nothing "evil" about dead code @@ -2131,6 +2156,9 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { case Instruction::INVOKE_VIRTUAL_RANGE: case Instruction::INVOKE_SUPER: case Instruction::INVOKE_SUPER_RANGE: { + if (inst->VRegA() == 0) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke_virtual/super needs at least receiver"; + } bool is_range = (inst->Opcode() == Instruction::INVOKE_VIRTUAL_RANGE || inst->Opcode() == Instruction::INVOKE_SUPER_RANGE); bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER || diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java index 3c4ed3505f..33418a98b2 100644 --- a/test/JniTest/JniTest.java +++ b/test/JniTest/JniTest.java @@ -21,6 +21,7 @@ class JniTest { System.loadLibrary("arttest"); testFindClassOnAttachedNativeThread(); testFindFieldOnAttachedNativeThread(); + testReflectFieldGetFromAttachedNativeThreadNative(); testCallStaticVoidMethodOnSubClass(); testGetMirandaMethod(); testZeroLengthByteBuffers(); @@ -34,6 +35,10 @@ class JniTest { private static boolean testFindFieldOnAttachedNativeThreadField; + private static native void testReflectFieldGetFromAttachedNativeThreadNative(); + + public static boolean testReflectFieldGetFromAttachedNativeThreadField; + private static void testFindFieldOnAttachedNativeThread() { testFindFieldOnAttachedNativeThreadNative(); if (!testFindFieldOnAttachedNativeThreadField) { diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc index 024ba53708..36cad72ace 100644 --- a/test/JniTest/jni_test.cc +++ b/test/JniTest/jni_test.cc @@ -103,6 +103,66 @@ extern "C" JNIEXPORT void JNICALL Java_JniTest_testFindFieldOnAttachedNativeThre assert(pthread_join_result == 0); } +static void* testReflectFieldGetFromAttachedNativeThread(void*) { + assert(jvm != NULL); + + JNIEnv* env = NULL; + JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL }; + int attach_result = jvm->AttachCurrentThread(&env, &args); + assert(attach_result == 0); + + jclass clazz = env->FindClass("JniTest"); + assert(clazz != NULL); + assert(!env->ExceptionCheck()); + + jclass class_clazz = env->FindClass("java/lang/Class"); + assert(class_clazz != NULL); + assert(!env->ExceptionCheck()); + + jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField", + "(Ljava/lang/String;)Ljava/lang/reflect/Field;"); + assert(getFieldMetodId != NULL); + assert(!env->ExceptionCheck()); + + jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField"); + assert(field_name != NULL); + assert(!env->ExceptionCheck()); + + jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name); + assert(field != NULL); + assert(!env->ExceptionCheck()); + + jclass field_clazz = env->FindClass("java/lang/reflect/Field"); + assert(field_clazz != NULL); + assert(!env->ExceptionCheck()); + + jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean", + "(Ljava/lang/Object;)Z"); + assert(getBooleanMetodId != NULL); + assert(!env->ExceptionCheck()); + + jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz); + assert(value == false); + assert(!env->ExceptionCheck()); + + int detach_result = jvm->DetachCurrentThread(); + assert(detach_result == 0); + return NULL; +} + +// http://b/15539150 +extern "C" JNIEXPORT void JNICALL Java_JniTest_testReflectFieldGetFromAttachedNativeThreadNative( + JNIEnv*, jclass) { + pthread_t pthread; + int pthread_create_result = pthread_create(&pthread, + NULL, + testReflectFieldGetFromAttachedNativeThread, + NULL); + assert(pthread_create_result == 0); + int pthread_join_result = pthread_join(pthread, NULL); + assert(pthread_join_result == 0); +} + // http://b/11243757 extern "C" JNIEXPORT void JNICALL Java_JniTest_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env, |