diff options
Diffstat (limited to 'compiler')
26 files changed, 724 insertions, 218 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index eb4a3367bb..964a222c05 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -417,7 +417,7 @@ std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind); enum FixupKind { kFixupNone, kFixupLabel, // For labels we just adjust the offset. - kFixupLoad, // Mostly for imediates. + kFixupLoad, // Mostly for immediates. kFixupVLoad, // FP load which *may* be pc-relative. kFixupCBxZ, // Cbz, Cbnz. kFixupPushPop, // Not really pc relative, but changes size based on args. diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 64fa685c19..3f122de623 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -287,9 +287,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, } cu.EndTiming(); - driver.GetTimingsLogger()->Start(); driver.GetTimingsLogger()->AddLogger(cu.timings); - driver.GetTimingsLogger()->End(); return result; } diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index caf84d9bba..7955d6cd3f 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1062,134 +1062,142 @@ void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { */ #define PADDING_MOV_R5_R5 0x1C2D -void ArmMir2Lir::EncodeLIR(LIR* lir) { - int opcode = lir->opcode; - if (IsPseudoLirOp(opcode)) { - if (UNLIKELY(opcode == kPseudoPseudoAlign4)) { - // Note: size for this opcode will be either 0 or 2 depending on final alignment. - lir->u.a.bytes[0] = (PADDING_MOV_R5_R5 & 0xff); - lir->u.a.bytes[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff); - lir->flags.size = (lir->offset & 0x2); - } - } else if (LIKELY(!lir->flags.is_nop)) { - const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; - uint32_t bits = encoder->skeleton; - for (int i = 0; i < 4; i++) { - uint32_t operand; - uint32_t value; - operand = lir->operands[i]; - ArmEncodingKind kind = encoder->field_loc[i].kind; - if (LIKELY(kind == kFmtBitBlt)) { - value = (operand << encoder->field_loc[i].start) & - ((1 << (encoder->field_loc[i].end + 1)) - 1); - bits |= value; - } else { - switch (encoder->field_loc[i].kind) { - case kFmtSkip: - break; // Nothing to do, but continue to next. - case kFmtUnused: - i = 4; // Done, break out of the enclosing loop. - break; - case kFmtFPImm: - value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; - value |= (operand & 0x0F) << encoder->field_loc[i].start; - bits |= value; - break; - case kFmtBrOffset: - value = ((operand & 0x80000) >> 19) << 26; - value |= ((operand & 0x40000) >> 18) << 11; - value |= ((operand & 0x20000) >> 17) << 13; - value |= ((operand & 0x1f800) >> 11) << 16; - value |= (operand & 0x007ff); - bits |= value; - break; - case kFmtShift5: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtShift: - value = ((operand & 0x70) >> 4) << 12; - value |= (operand & 0x0f) << 4; - bits |= value; - break; - case kFmtBWidth: - value = operand - 1; - bits |= value; - break; - case kFmtLsb: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtImm6: - value = ((operand & 0x20) >> 5) << 9; - value |= (operand & 0x1f) << 3; - bits |= value; - break; - case kFmtDfp: { - DCHECK(ARM_DOUBLEREG(operand)); - DCHECK_EQ((operand & 0x1), 0U); - uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1; - /* Snag the 1-bit slice and position it */ - value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= (reg_name & 0x0f) << encoder->field_loc[i].start; - bits |= value; - break; +uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { + for (; lir != NULL; lir = NEXT_LIR(lir)) { + if (!lir->flags.is_nop) { + int opcode = lir->opcode; + if (IsPseudoLirOp(opcode)) { + if (UNLIKELY(opcode == kPseudoPseudoAlign4)) { + // Note: size for this opcode will be either 0 or 2 depending on final alignment. + if (lir->offset & 0x2) { + write_pos[0] = (PADDING_MOV_R5_R5 & 0xff); + write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff); + write_pos += 2; } - case kFmtSfp: - DCHECK(ARM_SINGLEREG(operand)); - /* Snag the 1-bit slice and position it */ - value = (operand & 0x1) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; - bits |= value; - break; - case kFmtImm12: - case kFmtModImm: - value = ((operand & 0x800) >> 11) << 26; - value |= ((operand & 0x700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; - break; - case kFmtImm16: - value = ((operand & 0x0800) >> 11) << 26; - value |= ((operand & 0xf000) >> 12) << 16; - value |= ((operand & 0x0700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; - break; - case kFmtOff24: { - uint32_t signbit = (operand >> 31) & 0x1; - uint32_t i1 = (operand >> 22) & 0x1; - uint32_t i2 = (operand >> 21) & 0x1; - uint32_t imm10 = (operand >> 11) & 0x03ff; - uint32_t imm11 = operand & 0x07ff; - uint32_t j1 = (i1 ^ signbit) ? 0 : 1; - uint32_t j2 = (i2 ^ signbit) ? 0 : 1; - value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | - imm11; + } + } else if (LIKELY(!lir->flags.is_nop)) { + const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + uint32_t bits = encoder->skeleton; + for (int i = 0; i < 4; i++) { + uint32_t operand; + uint32_t value; + operand = lir->operands[i]; + ArmEncodingKind kind = encoder->field_loc[i].kind; + if (LIKELY(kind == kFmtBitBlt)) { + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); bits |= value; + } else { + switch (encoder->field_loc[i].kind) { + case kFmtSkip: + break; // Nothing to do, but continue to next. + case kFmtUnused: + i = 4; // Done, break out of the enclosing loop. + break; + case kFmtFPImm: + value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; + value |= (operand & 0x0F) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtBrOffset: + value = ((operand & 0x80000) >> 19) << 26; + value |= ((operand & 0x40000) >> 18) << 11; + value |= ((operand & 0x20000) >> 17) << 13; + value |= ((operand & 0x1f800) >> 11) << 16; + value |= (operand & 0x007ff); + bits |= value; + break; + case kFmtShift5: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtShift: + value = ((operand & 0x70) >> 4) << 12; + value |= (operand & 0x0f) << 4; + bits |= value; + break; + case kFmtBWidth: + value = operand - 1; + bits |= value; + break; + case kFmtLsb: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtImm6: + value = ((operand & 0x20) >> 5) << 9; + value |= (operand & 0x1f) << 3; + bits |= value; + break; + case kFmtDfp: { + DCHECK(ARM_DOUBLEREG(operand)); + DCHECK_EQ((operand & 0x1), 0U); + uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1; + /* Snag the 1-bit slice and position it */ + value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= (reg_name & 0x0f) << encoder->field_loc[i].start; + bits |= value; + break; + } + case kFmtSfp: + DCHECK(ARM_SINGLEREG(operand)); + /* Snag the 1-bit slice and position it */ + value = (operand & 0x1) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtImm12: + case kFmtModImm: + value = ((operand & 0x800) >> 11) << 26; + value |= ((operand & 0x700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtImm16: + value = ((operand & 0x0800) >> 11) << 26; + value |= ((operand & 0xf000) >> 12) << 16; + value |= ((operand & 0x0700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtOff24: { + uint32_t signbit = (operand >> 31) & 0x1; + uint32_t i1 = (operand >> 22) & 0x1; + uint32_t i2 = (operand >> 21) & 0x1; + uint32_t imm10 = (operand >> 11) & 0x03ff; + uint32_t imm11 = operand & 0x07ff; + uint32_t j1 = (i1 ^ signbit) ? 0 : 1; + uint32_t j2 = (i2 ^ signbit) ? 0 : 1; + value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | + imm11; + bits |= value; + } + break; + default: + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; } - break; - default: - LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + } + } + if (encoder->size == 4) { + write_pos[0] = ((bits >> 16) & 0xff); + write_pos[1] = ((bits >> 24) & 0xff); + write_pos[2] = (bits & 0xff); + write_pos[3] = ((bits >> 8) & 0xff); + write_pos += 4; + } else { + DCHECK_EQ(encoder->size, 2); + write_pos[0] = (bits & 0xff); + write_pos[1] = ((bits >> 8) & 0xff); + write_pos += 2; } } } - if (encoder->size == 4) { - lir->u.a.bytes[0] = ((bits >> 16) & 0xff); - lir->u.a.bytes[1] = ((bits >> 24) & 0xff); - lir->u.a.bytes[2] = (bits & 0xff); - lir->u.a.bytes[3] = ((bits >> 8) & 0xff); - } else { - DCHECK_EQ(encoder->size, 2); - lir->u.a.bytes[0] = (bits & 0xff); - lir->u.a.bytes[1] = ((bits >> 8) & 0xff); - } - lir->flags.size = encoder->size; } + return write_pos; } // Assemble the LIR into binary instruction format. @@ -1198,7 +1206,7 @@ void ArmMir2Lir::AssembleLIR() { LIR* prev_lir; cu_->NewTimingSplit("Assemble"); int assembler_retries = 0; - CodeOffset starting_offset = EncodeRange(first_lir_insn_, last_lir_insn_, 0); + CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0); data_offset_ = (starting_offset + 0x3) & ~0x3; int32_t offset_adjustment; AssignDataOffsets(); @@ -1304,8 +1312,6 @@ void ArmMir2Lir::AssembleLIR() { lir->operands[2] = 0; lir->operands[1] = base_reg; } - // Must redo encoding here - won't ever revisit this node. - EncodeLIR(lir); prev_lir = new_adr; // Continue scan with new_adr; lir = new_adr->u.a.pcrel_next; res = kRetryAll; @@ -1346,9 +1352,8 @@ void ArmMir2Lir::AssembleLIR() { /* operand[0] is src1 in both cb[n]z & CmpRI8 */ lir->operands[1] = 0; lir->target = 0; - EncodeLIR(lir); // NOTE: sets flags.size. + lir->flags.size = EncodingMap[lir->opcode].size; // Add back the new size. - DCHECK_EQ(lir->flags.size, static_cast<uint32_t>(EncodingMap[lir->opcode].size)); offset_adjustment += lir->flags.size; // Set up the new following inst. new_inst->offset = lir->offset + lir->flags.size; @@ -1570,20 +1575,6 @@ void ArmMir2Lir::AssembleLIR() { default: LOG(FATAL) << "Unexpected case " << lir->flags.fixup; } - /* - * If one of the pc-relative instructions expanded we'll have - * to make another pass. Don't bother to fully assemble the - * instruction. - */ - if (res == kSuccess) { - EncodeLIR(lir); - if (assembler_retries == 0) { - // Go ahead and fix up the code buffer image. - for (int i = 0; i < lir->flags.size; i++) { - code_buffer_[lir->offset + i] = lir->u.a.bytes[i]; - } - } - } prev_lir = lir; lir = lir->u.a.pcrel_next; } @@ -1602,21 +1593,15 @@ void ArmMir2Lir::AssembleLIR() { } } - // Rebuild the CodeBuffer if we had to retry; otherwise it should be good as-is. - if (assembler_retries != 0) { - code_buffer_.clear(); - for (LIR* lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - if (lir->flags.is_nop) { - continue; - } else { - for (int i = 0; i < lir->flags.size; i++) { - code_buffer_.push_back(lir->u.a.bytes[i]); - } - } - } - } + // Build the CodeBuffer. + DCHECK_LE(data_offset_, total_size_); + code_buffer_.reserve(total_size_); + code_buffer_.resize(starting_offset); + uint8_t* write_pos = &code_buffer_[0]; + write_pos = EncodeLIRs(write_pos, first_lir_insn_); + DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset); - data_offset_ = (code_buffer_.size() + 0x3) & ~0x3; + DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3); // Install literals InstallLiteralPools(); @@ -1641,19 +1626,11 @@ int ArmMir2Lir::GetInsnSize(LIR* lir) { } // Encode instruction bit pattern and assign offsets. -uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) { +uint32_t ArmMir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) { LIR* end_lir = tail_lir->next; - /* - * A significant percentage of methods can be assembled in a single pass. We'll - * go ahead and build the code image here, leaving holes for pc-relative fixup - * codes. If the code size changes during that pass, we'll have to throw away - * this work - but if not, we're ready to go. - */ - code_buffer_.reserve(estimated_native_code_size_ + 256); // Add a little slop. LIR* last_fixup = NULL; for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) { - lir->offset = offset; if (!lir->flags.is_nop) { if (lir->flags.fixup != kFixupNone) { if (!IsPseudoLirOp(lir->opcode)) { @@ -1675,11 +1652,7 @@ uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) last_fixup->u.a.pcrel_next = lir; } last_fixup = lir; - } else { - EncodeLIR(lir); - } - for (int i = 0; i < lir->flags.size; i++) { - code_buffer_.push_back(lir->u.a.bytes[i]); + lir->offset = offset; } offset += lir->flags.size; } diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 175fc06efb..d6724f1382 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -183,15 +183,18 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { LockCallTemps(); // Prepare for explicit register usage constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. if (kArchVariantHasGoodBranchPredictor) { - LIR* null_check_branch; + LIR* null_check_branch = nullptr; if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { null_check_branch = nullptr; // No null check. } else { // If the null-check fails its handled by the slow-path to reduce exception related meta-data. - null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + if (Runtime::Current()->ExplicitNullChecks()) { + null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + } } LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2); NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + MarkPossibleNullPointerException(opt_flags); LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); @@ -216,8 +219,8 @@ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { // Explicit null-check as slow-path is entered using an IT. GenNullCheck(rs_r0, opt_flags); LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2); - MarkPossibleNullPointerException(opt_flags); NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + MarkPossibleNullPointerException(opt_flags); OpRegImm(kOpCmp, rs_r1, 0); OpIT(kCondEq, ""); NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); @@ -241,7 +244,7 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { FlushAllRegs(); LoadValueDirectFixed(rl_src, rs_r0); // Get obj LockCallTemps(); // Prepare for explicit register usage - LIR* null_check_branch; + LIR* null_check_branch = nullptr; LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2); constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. if (kArchVariantHasGoodBranchPredictor) { @@ -249,9 +252,12 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { null_check_branch = nullptr; // No null check. } else { // If the null-check fails its handled by the slow-path to reduce exception related meta-data. - null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + if (Runtime::Current()->ExplicitNullChecks()) { + null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + } } LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + MarkPossibleNullPointerException(opt_flags); LoadConstantNoClobber(rs_r3, 0); LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); @@ -404,11 +410,17 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } } else { // Implicit stack overflow check. - // Generate a load from [sp, #-framesize]. If this is in the stack + // Generate a load from [sp, #-overflowsize]. If this is in the stack // redzone we will get a segmentation fault. - OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); - LoadWordDisp(rs_rARM_SP, 0, rs_rARM_LR); + // + // Caveat coder: if someone changes the kStackOverflowReservedBytes value + // we need to make sure that it's loadable in an immediate field of + // a sub instruction. Otherwise we will get a temp allocation and the + // code size will increase. + OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); + LoadWordDisp(rs_r12, 0, rs_r12); MarkPossibleStackOverflowException(); + OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); } } else { OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 0f1e17141c..8bfdb6af2c 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -77,10 +77,10 @@ class ArmMir2Lir : public Mir2Lir { // Required for target - miscellaneous. void AssembleLIR(); - uint32_t EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t starting_offset); + uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset); int AssignInsnOffsets(); void AssignOffsets(); - void EncodeLIR(LIR* lir); + static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index 1ec0a2c65d..8df5b25ebd 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -828,6 +828,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag int encoded_disp = displacement; bool already_generated = false; int dest_low_reg = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg(); + bool null_pointer_safepoint = false; switch (size) { case kDouble: case kLong: @@ -848,6 +849,7 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag displacement >> 2); } else { load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg); + null_pointer_safepoint = true; LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG); } already_generated = true; @@ -939,6 +941,11 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag // TODO: in future may need to differentiate Dalvik accesses w/ spills if (r_base == rs_rARM_SP) { AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); + } else { + // We might need to generate a safepoint if we have two store instructions (wide or double). + if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { + MarkSafepointPC(load); + } } return load; } @@ -965,6 +972,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora int encoded_disp = displacement; bool already_generated = false; int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg(); + bool null_pointer_safepoint = false; switch (size) { case kLong: case kDouble: @@ -974,6 +982,7 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora displacement >> 2); } else { store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord); + null_pointer_safepoint = true; StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord); } already_generated = true; @@ -1061,6 +1070,11 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora // TODO: In future, may need to differentiate Dalvik & spill accesses if (r_base == rs_rARM_SP) { AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); + } else { + // We might need to generate a safepoint if we have two store instructions (wide or double). + if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { + MarkSafepointPC(store); + } } return store; } diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 717ad86a75..4c6c7a45b4 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -702,7 +702,8 @@ void Mir2Lir::CreateNativeGcMap() { uint32_t native_offset = it.NativePcOffset(); uint32_t dex_pc = it.DexPc(); const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); - CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc; + CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc << + ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file); native_gc_map_builder.AddEntry(native_offset, references); } } diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 2afa5ca815..866ce5f397 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -74,14 +74,19 @@ LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, T /* Perform null-check on a register. */ LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) { if (Runtime::Current()->ExplicitNullChecks()) { - if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { - return NULL; - } - return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); + return GenExplicitNullCheck(m_reg, opt_flags); } return nullptr; } +/* Perform an explicit null-check on a register. */ +LIR* Mir2Lir::GenExplicitNullCheck(RegStorage m_reg, int opt_flags) { + if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { + return NULL; + } + return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); +} + void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) { if (!Runtime::Current()->ExplicitNullChecks()) { if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { @@ -732,6 +737,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value()); rl_result = EvalLoc(rl_dest, reg_class, true); LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG); + MarkPossibleNullPointerException(opt_flags); if (field_info.IsVolatile()) { // Without context sensitive analysis, we must issue the most conservative barriers. // In this case, either a load or store may follow so we issue both barriers. diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index a0242d514d..7689b51f96 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -781,7 +781,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, type, skip_this); if (pcrLabel) { - *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags); + if (Runtime::Current()->ExplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + LoadWordDisp(TargetReg(kArg1), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } } return call_state; } @@ -987,7 +997,17 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); if (pcrLabel) { - *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags); + if (Runtime::Current()->ExplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + LoadWordDisp(TargetReg(kArg1), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } } return call_state; } @@ -1299,7 +1319,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { LoadValueDirectFixed(rl_start, reg_start); } RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)); - GenNullCheck(reg_ptr, info->opt_flags); + GenExplicitNullCheck(reg_ptr, info->opt_flags); LIR* high_code_point_branch = rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr); // NOTE: not a safepoint @@ -1337,7 +1357,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) { LoadValueDirectFixed(rl_cmp, reg_cmp); RegStorage r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : RegStorage::InvalidReg(); - GenNullCheck(reg_this, info->opt_flags); + GenExplicitNullCheck(reg_this, info->opt_flags); info->opt_flags |= MIR_IGNORE_NULL_CHECK; // Record that we've null checked. // TUNING: check if rl_cmp.s_reg_low is already null checked LIR* cmp_null_check_branch = OpCmpImmBranch(kCondEq, reg_cmp, 0, nullptr); diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index e81a037c7b..cd3dadbc74 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -446,6 +446,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list GenNullCheck(rl_src[0].reg, opt_flags); rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadWordDisp(rl_src[0].reg, len_offset, rl_result.reg); + MarkPossibleNullPointerException(opt_flags); StoreValue(rl_dest, rl_result); break; diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index bac35aad6e..10f431f938 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -131,7 +131,6 @@ struct UseDefMasks { struct AssemblyInfo { LIR* pcrel_next; // Chain of LIR nodes needing pc relative fixups. - uint8_t bytes[16]; // Encoded instruction bytes. }; struct LIR { @@ -151,7 +150,7 @@ struct LIR { } flags; union { UseDefMasks m; // Use & Def masks used during optimization. - AssemblyInfo a; // Instruction encoding used during assembly phase. + AssemblyInfo a; // Instruction info used during assembly phase. } u; int32_t operands[5]; // [0..4] = [dest, src1, src2, extra, extra2]. }; @@ -340,7 +339,7 @@ class Mir2Lir : public Backend { return code_buffer_.size() / sizeof(code_buffer_[0]); } - bool IsPseudoLirOp(int opcode) { + static bool IsPseudoLirOp(int opcode) { return (opcode < 0); } @@ -565,6 +564,7 @@ class Mir2Lir : public Backend { void ForceImplicitNullCheck(RegStorage reg, int opt_flags); LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind); LIR* GenNullCheck(RegStorage m_reg, int opt_flags); + LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags); LIR* GenRegRegCheck(ConditionCode c_code, RegStorage reg1, RegStorage reg2, ThrowKind kind); void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_src2, LIR* taken, LIR* fall_through); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 39535e953b..64ecdb5c24 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -16,10 +16,12 @@ */ #include "dex_file.h" +#include "dex_file-inl.h" #include "dex_instruction.h" #include "dex_instruction-inl.h" #include "builder.h" #include "nodes.h" +#include "primitive.h" namespace art { @@ -192,6 +194,55 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ break; } + case Instruction::INVOKE_STATIC: { + uint32_t method_idx = instruction.VRegB_35c(); + const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx); + uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_; + const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx); + const size_t number_of_arguments = instruction.VRegA_35c(); + if (number_of_arguments != 0) { + return false; + } + if (Primitive::GetType(descriptor[0]) != Primitive::kPrimVoid) { + return false; + } + current_block_->AddInstruction(new (arena_) HInvokeStatic( + arena_, number_of_arguments, dex_offset, method_idx)); + break; + } + + case Instruction::ADD_INT: { + HInstruction* first = LoadLocal(instruction.VRegB()); + HInstruction* second = LoadLocal(instruction.VRegC()); + current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + break; + } + + case Instruction::ADD_INT_2ADDR: { + HInstruction* first = LoadLocal(instruction.VRegA()); + HInstruction* second = LoadLocal(instruction.VRegB()); + current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + break; + } + + case Instruction::ADD_INT_LIT16: { + HInstruction* first = LoadLocal(instruction.VRegB()); + HInstruction* second = GetConstant(instruction.VRegC_22s()); + current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + break; + } + + case Instruction::ADD_INT_LIT8: { + HInstruction* first = LoadLocal(instruction.VRegB()); + HInstruction* second = GetConstant(instruction.VRegC_22b()); + current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + break; + } + case Instruction::NOP: break; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index fff83a1205..46ca9aabd7 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ #include "dex_file.h" +#include "driver/dex_compilation_unit.h" #include "utils/allocation.h" #include "utils/growable_array.h" @@ -33,7 +34,9 @@ class HLocal; class HGraphBuilder : public ValueObject { public: - explicit HGraphBuilder(ArenaAllocator* arena) + HGraphBuilder(ArenaAllocator* arena, + const DexCompilationUnit* dex_compilation_unit = nullptr, + const DexFile* dex_file = nullptr) : arena_(arena), branch_targets_(arena, 0), locals_(arena, 0), @@ -42,7 +45,9 @@ class HGraphBuilder : public ValueObject { current_block_(nullptr), graph_(nullptr), constant0_(nullptr), - constant1_(nullptr) { } + constant1_(nullptr), + dex_file_(dex_file), + dex_compilation_unit_(dex_compilation_unit) { } HGraph* BuildGraph(const DexFile::CodeItem& code); @@ -83,6 +88,9 @@ class HGraphBuilder : public ValueObject { HIntConstant* constant0_; HIntConstant* constant1_; + const DexFile* const dex_file_; + const DexCompilationUnit* const dex_compilation_unit_; + DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); }; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bb6ac84a9f..b86665b9ee 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -21,8 +21,11 @@ #include "dex/verified_method.h" #include "driver/dex_compilation_unit.h" #include "gc_map_builder.h" +#include "leb128.h" +#include "mapping_table.h" #include "utils/assembler.h" #include "verifier/dex_gc_map.h" +#include "vmap_table.h" namespace art { @@ -120,8 +123,95 @@ void CodeGenerator::BuildNativeGCMap( dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - GcMapBuilder builder(data, 0, 0, dex_gc_map.RegWidth()); + uint32_t max_native_offset = 0; + for (size_t i = 0; i < pc_infos_.Size(); i++) { + uint32_t native_offset = pc_infos_.Get(i).native_pc; + if (native_offset > max_native_offset) { + max_native_offset = native_offset; + } + } + + GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth()); + for (size_t i = 0; i < pc_infos_.Size(); i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + uint32_t native_offset = pc_info.native_pc; + uint32_t dex_pc = pc_info.dex_pc; + const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); + CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc; + builder.AddEntry(native_offset, references); + } } +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { + uint32_t pc2dex_data_size = 0u; + uint32_t pc2dex_entries = pc_infos_.Size(); + uint32_t pc2dex_offset = 0u; + int32_t pc2dex_dalvik_offset = 0; + uint32_t dex2pc_data_size = 0u; + uint32_t dex2pc_entries = 0u; + + // We currently only have pc2dex entries. + for (size_t i = 0; i < pc2dex_entries; i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); + pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = pc_info.native_pc; + pc2dex_dalvik_offset = pc_info.dex_pc; + } + + uint32_t total_entries = pc2dex_entries + dex2pc_entries; + uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries); + uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size; + data->resize(data_size); + + uint8_t* data_ptr = &(*data)[0]; + uint8_t* write_pos = data_ptr; + write_pos = EncodeUnsignedLeb128(write_pos, total_entries); + write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries); + DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size); + uint8_t* write_pos2 = write_pos + pc2dex_data_size; + + pc2dex_offset = 0u; + pc2dex_dalvik_offset = 0u; + for (size_t i = 0; i < pc2dex_entries; i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + DCHECK(pc2dex_offset <= pc_info.native_pc); + write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset); + write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = pc_info.native_pc; + pc2dex_dalvik_offset = pc_info.dex_pc; + } + DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size); + DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size); + + if (kIsDebugBuild) { + // Verify the encoded table holds the expected data. + MappingTable table(data_ptr); + CHECK_EQ(table.TotalSize(), total_entries); + CHECK_EQ(table.PcToDexSize(), pc2dex_entries); + auto it = table.PcToDexBegin(); + auto it2 = table.DexToPcBegin(); + for (size_t i = 0; i < pc2dex_entries; i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + CHECK_EQ(pc_info.native_pc, it.NativePcOffset()); + CHECK_EQ(pc_info.dex_pc, it.DexPc()); + ++it; + } + CHECK(it == table.PcToDexEnd()); + CHECK(it2 == table.DexToPcEnd()); + } +} + +void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const { + Leb128EncodingVector vmap_encoder; + size_t size = 1 + 1 /* marker */ + 0; + vmap_encoder.Reserve(size + 1u); // All values are likely to be one byte in ULEB128 (<128). + vmap_encoder.PushBackUnsigned(size); + // We're currently always saving the frame pointer, so set it in the table as a temporary. + vmap_encoder.PushBackUnsigned(kVRegTempBaseReg + VmapTable::kEntryAdjustment); + vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); + + *data = vmap_encoder.GetData(); +} } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 63f8cbf429..24dcab6131 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -38,6 +38,11 @@ class CodeAllocator { DISALLOW_COPY_AND_ASSIGN(CodeAllocator); }; +struct PcInfo { + uint32_t dex_pc; + uintptr_t native_pc; +}; + /** * A Location is an abstraction over the potential location * of an instruction. It could be in register or stack. @@ -81,7 +86,8 @@ class Location : public ValueObject { class LocationSummary : public ArenaObject { public: explicit LocationSummary(HInstruction* instruction) - : inputs(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()) { + : inputs(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), + temps(instruction->GetBlock()->GetGraph()->GetArena(), 0) { inputs.SetSize(instruction->InputCount()); for (int i = 0; i < instruction->InputCount(); i++) { inputs.Put(i, Location()); @@ -100,10 +106,19 @@ class LocationSummary : public ArenaObject { output = Location(location); } + void AddTemp(Location location) { + temps.Add(location); + } + + Location GetTemp(uint32_t at) const { + return temps.Get(at); + } + Location Out() const { return output; } private: GrowableArray<Location> inputs; + GrowableArray<Location> temps; Location output; DISALLOW_COPY_AND_ASSIGN(LocationSummary); @@ -134,9 +149,17 @@ class CodeGenerator : public ArenaObject { uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } + uint32_t GetCoreSpillMask() const { return core_spill_mask_; } + + void RecordPcInfo(uint32_t dex_pc) { + struct PcInfo pc_info; + pc_info.dex_pc = dex_pc; + pc_info.native_pc = GetAssembler()->CodeSize(); + pc_infos_.Add(pc_info); + } - void BuildMappingTable(std::vector<uint8_t>* vector) const { } - void BuildVMapTable(std::vector<uint8_t>* vector) const { } + void BuildMappingTable(std::vector<uint8_t>* vector) const; + void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; @@ -144,23 +167,26 @@ class CodeGenerator : public ArenaObject { explicit CodeGenerator(HGraph* graph) : frame_size_(0), graph_(graph), - block_labels_(graph->GetArena(), 0) { + block_labels_(graph->GetArena(), 0), + pc_infos_(graph->GetArena(), 32) { block_labels_.SetSize(graph->GetBlocks()->Size()); } ~CodeGenerator() { } + // Frame size required for this method. + uint32_t frame_size_; + uint32_t core_spill_mask_; + private: void InitLocations(HInstruction* instruction); void CompileBlock(HBasicBlock* block); void CompileEntryBlock(); - // Frame size required for this method. - uint32_t frame_size_; - HGraph* const graph_; // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; + GrowableArray<PcInfo> pc_infos_; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 04bdc34de7..68c997bf5f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -18,17 +18,27 @@ #include "utils/assembler.h" #include "utils/arm/assembler_arm.h" +#include "mirror/array.h" +#include "mirror/art_method.h" + #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())-> namespace art { namespace arm { void CodeGeneratorARM::GenerateFrameEntry() { + core_spill_mask_ |= (1 << LR); + // We're currently always using FP, which is callee-saved in Quick. + core_spill_mask_ |= (1 << FP); + __ PushList((1 << FP) | (1 << LR)); __ mov(FP, ShifterOperand(SP)); - if (GetFrameSize() != 0) { - __ AddConstant(SP, -GetFrameSize()); - } + + // Add the current ART method to the frame size, the return pc, and FP. + SetFrameSize(RoundUp(GetFrameSize() + 3 * kWordSize, kStackAlignment)); + // PC and FP have already been pushed on the stack. + __ AddConstant(SP, -(GetFrameSize() - 2 * kWordSize)); + __ str(R0, Address(SP, 0)); } void CodeGeneratorARM::GenerateFrameExit() { @@ -173,5 +183,71 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { codegen_->GenerateFrameExit(); } +void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); + CHECK_EQ(invoke->InputCount(), 0); + locations->AddTemp(Location(R0)); + invoke->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) { + __ ldr(reg, Address(SP, 0)); +} + +void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) { + Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>(); + size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + + invoke->GetIndexInDexCache() * kWordSize; + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ ldr(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ ldr(temp, Address(temp, index_in_cache)); + // LR = temp[offset_of_quick_compiled_code] + __ ldr(LR, Address(temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())); + // LR() + __ blx(LR); + + codegen_->RecordPcInfo(invoke->GetDexPc()); +} + +void LocationsBuilderARM::VisitAdd(HAdd* add) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add); + switch (add->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location(R0)); + locations->SetInAt(1, Location(R1)); + locations->SetOut(Location(R0)); + break; + } + default: + LOG(FATAL) << "Unimplemented"; + } + add->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { + LocationSummary* locations = add->GetLocations(); + switch (add->GetResultType()) { + case Primitive::kPrimInt: + __ add(locations->Out().reg<Register>(), + locations->InAt(0).reg<Register>(), + ShifterOperand(locations->InAt(1).reg<Register>())); + break; + default: + LOG(FATAL) << "Unimplemented"; + } +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 52a7bf45c7..7a2835d026 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -58,6 +58,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #undef DECLARE_VISIT_INSTRUCTION Assembler* GetAssembler() const { return assembler_; } + void LoadCurrentMethod(Register reg); private: Assembler* const assembler_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c4bda5645e..1764486e57 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -18,18 +18,28 @@ #include "utils/assembler.h" #include "utils/x86/assembler_x86.h" +#include "mirror/array.h" +#include "mirror/art_method.h" + #define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> namespace art { namespace x86 { void CodeGeneratorX86::GenerateFrameEntry() { + // Create a fake register to mimic Quick. + static const int kFakeReturnRegister = 8; + core_spill_mask_ |= (1 << kFakeReturnRegister); + // We're currently always using EBP, which is callee-saved in Quick. + core_spill_mask_ |= (1 << EBP); + __ pushl(EBP); __ movl(EBP, ESP); - - if (GetFrameSize() != 0) { - __ subl(ESP, Immediate(GetFrameSize())); - } + // Add the current ART method to the frame size, the return pc, and EBP. + SetFrameSize(RoundUp(GetFrameSize() + 3 * kWordSize, kStackAlignment)); + // The PC and EBP have already been pushed on the stack. + __ subl(ESP, Immediate(GetFrameSize() - 2 * kWordSize)); + __ movl(Address(ESP, 0), EAX); } void CodeGeneratorX86::GenerateFrameExit() { @@ -45,6 +55,10 @@ void CodeGeneratorX86::Push(HInstruction* instruction, Location location) { __ pushl(location.reg<Register>()); } +void InstructionCodeGeneratorX86::LoadCurrentMethod(Register reg) { + __ movl(reg, Address(ESP, 0)); +} + void CodeGeneratorX86::Move(HInstruction* instruction, Location location) { HIntConstant* constant = instruction->AsIntConstant(); if (constant != nullptr) { @@ -110,7 +124,8 @@ void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) { static int32_t GetStackSlot(HLocal* local) { // We are currently using EBP to access locals, so the offset must be negative. - return (local->GetRegNumber() + 1) * -kWordSize; + // +1 for going backwards, +1 for the method pointer. + return (local->GetRegNumber() + 2) * -kWordSize; } void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load) { @@ -172,5 +187,63 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { __ ret(); } +void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); + CHECK_EQ(invoke->InputCount(), 0); + locations->AddTemp(Location(EAX)); + invoke->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) { + Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>(); + size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + + invoke->GetIndexInDexCache() * kWordSize; + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, index_in_cache)); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())); + + codegen_->RecordPcInfo(invoke->GetDexPc()); +} + +void LocationsBuilderX86::VisitAdd(HAdd* add) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add); + switch (add->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location(EAX)); + locations->SetInAt(1, Location(ECX)); + locations->SetOut(Location(EAX)); + break; + } + default: + LOG(FATAL) << "Unimplemented"; + } + add->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { + LocationSummary* locations = add->GetLocations(); + switch (add->GetResultType()) { + case Primitive::kPrimInt: + DCHECK_EQ(locations->InAt(0).reg<Register>(), locations->Out().reg<Register>()); + __ addl(locations->InAt(0).reg<Register>(), locations->InAt(1).reg<Register>()); + break; + default: + LOG(FATAL) << "Unimplemented"; + } +} + } // namespace x86 } // namespace art diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index ad2a06143a..505237bd55 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -57,6 +57,8 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #undef DECLARE_VISIT_INSTRUCTION + void LoadCurrentMethod(Register reg); + Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index ff743d8ed3..d40990e86b 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -196,4 +196,42 @@ TEST(CodegenTest, ReturnIf2) { TestCode(data, true, 0); } +TEST(CodegenTest, ReturnAdd1) { + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 3 << 12 | 0, + Instruction::CONST_4 | 4 << 12 | 1 << 8, + Instruction::ADD_INT, 1 << 8 | 0, + Instruction::RETURN); + + TestCode(data, true, 7); +} + +TEST(CodegenTest, ReturnAdd2) { + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 3 << 12 | 0, + Instruction::CONST_4 | 4 << 12 | 1 << 8, + Instruction::ADD_INT_2ADDR | 1 << 12, + Instruction::RETURN); + + TestCode(data, true, 7); +} + +TEST(CodegenTest, ReturnAdd3) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 4 << 12 | 0 << 8, + Instruction::ADD_INT_LIT8, 3 << 8 | 0, + Instruction::RETURN); + + TestCode(data, true, 7); +} + +TEST(CodegenTest, ReturnAdd4) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 4 << 12 | 0 << 8, + Instruction::ADD_INT_LIT16, 3, + Instruction::RETURN); + + TestCode(data, true, 7); +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e74ed827ec..fc67486267 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -180,11 +180,13 @@ class HBasicBlock : public ArenaObject { }; #define FOR_EACH_INSTRUCTION(M) \ + M(Add) \ M(Equal) \ M(Exit) \ M(Goto) \ M(If) \ M(IntConstant) \ + M(InvokeStatic) \ M(LoadLocal) \ M(Local) \ M(Return) \ @@ -476,14 +478,36 @@ class HIf : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HIf); }; -// Instruction to check if two inputs are equal to each other. -class HEqual : public HTemplateInstruction<2> { +class HBinaryOperation : public HTemplateInstruction<2> { public: - HEqual(HInstruction* first, HInstruction* second) { - SetRawInputAt(0, first); - SetRawInputAt(1, second); + HBinaryOperation(Primitive::Type result_type, + HInstruction* left, + HInstruction* right) : result_type_(result_type) { + SetRawInputAt(0, left); + SetRawInputAt(1, right); } + HInstruction* GetLeft() const { return InputAt(0); } + HInstruction* GetRight() const { return InputAt(1); } + Primitive::Type GetResultType() const { return result_type_; } + + virtual bool IsCommutative() { return false; } + + private: + const Primitive::Type result_type_; + + DISALLOW_COPY_AND_ASSIGN(HBinaryOperation); +}; + + +// Instruction to check if two inputs are equal to each other. +class HEqual : public HBinaryOperation { + public: + HEqual(HInstruction* first, HInstruction* second) + : HBinaryOperation(Primitive::kPrimBoolean, first, second) {} + + virtual bool IsCommutative() { return true; } + DECLARE_INSTRUCTION(Equal) private: @@ -554,6 +578,55 @@ class HIntConstant : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; +class HInvoke : public HInstruction { + public: + HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, int32_t dex_pc) + : inputs_(arena, number_of_arguments), + dex_pc_(dex_pc) { + inputs_.SetSize(number_of_arguments); + } + + virtual intptr_t InputCount() const { return inputs_.Size(); } + virtual HInstruction* InputAt(intptr_t i) const { return inputs_.Get(i); } + + int32_t GetDexPc() const { return dex_pc_; } + + protected: + GrowableArray<HInstruction*> inputs_; + const int32_t dex_pc_; + + private: + DISALLOW_COPY_AND_ASSIGN(HInvoke); +}; + +class HInvokeStatic : public HInvoke { + public: + HInvokeStatic(ArenaAllocator* arena, uint32_t number_of_arguments, int32_t dex_pc, int32_t index_in_dex_cache) + : HInvoke(arena, number_of_arguments, dex_pc), index_in_dex_cache_(index_in_dex_cache) { } + + uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; } + + DECLARE_INSTRUCTION(InvokeStatic) + + private: + uint32_t index_in_dex_cache_; + + DISALLOW_COPY_AND_ASSIGN(HInvokeStatic); +}; + +class HAdd : public HBinaryOperation { + public: + HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right) + : HBinaryOperation(result_type, left, right) {} + + virtual bool IsCommutative() { return true; } + + DECLARE_INSTRUCTION(Add); + + private: + DISALLOW_COPY_AND_ASSIGN(HAdd); +}; + class HGraphVisitor : public ValueObject { public: explicit HGraphVisitor(HGraph* graph) : graph_(graph) { } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 334b185b44..d19c40c291 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -62,17 +62,31 @@ CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver, nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item, class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx)); + // For testing purposes, we put a special marker on method names that should be compiled + // with this compiler. This makes sure we're not regressing. + bool shouldCompile = dex_compilation_unit.GetSymbol().find("00024opt_00024") != std::string::npos; + ArenaPool pool; ArenaAllocator arena(&pool); - HGraphBuilder builder(&arena); + HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file); HGraph* graph = builder.BuildGraph(*code_item); if (graph == nullptr) { + if (shouldCompile) { + LOG(FATAL) << "Could not build graph in optimizing compiler"; + } return nullptr; } InstructionSet instruction_set = driver.GetInstructionSet(); + // The optimizing compiler currently does not have a Thumb2 assembler. + if (instruction_set == kThumb2) { + instruction_set = kArm; + } CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); if (codegen == nullptr) { + if (shouldCompile) { + LOG(FATAL) << "Could not find code generator for optimizing compiler"; + } return nullptr; } @@ -90,7 +104,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(CompilerDriver& driver, instruction_set, allocator.GetMemory(), codegen->GetFrameSize(), - 0, /* GPR spill mask, unused */ + codegen->GetCoreSpillMask(), 0, /* FPR spill mask, unused */ mapping_table, vmap_table, diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 72ebdd3741..c23fd440dc 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -52,6 +52,23 @@ namespace x86_64 { class X86_64Assembler; } +class ExternalLabel { + public: + ExternalLabel(const char* name, uword address) + : name_(name), address_(address) { + DCHECK(name != nullptr); + } + + const char* name() const { return name_; } + uword address() const { + return address_; + } + + private: + const char* name_; + const uword address_; +}; + class Label { public: Label() : position_(0) {} diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc index a78d28792b..bd78eaef0d 100644 --- a/compiler/utils/scoped_arena_allocator.cc +++ b/compiler/utils/scoped_arena_allocator.cc @@ -99,6 +99,7 @@ void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) { } CurrentStats()->RecordAlloc(bytes, kind); top_ptr_ = ptr + rounded_bytes; + VALGRIND_MAKE_MEM_UNDEFINED(ptr, bytes); VALGRIND_MAKE_MEM_NOACCESS(ptr + bytes, rounded_bytes - bytes); return ptr; } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index d242c17c8f..ebbb43a442 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -54,6 +54,16 @@ void X86Assembler::call(Label* label) { } +void X86Assembler::call(const ExternalLabel& label) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + intptr_t call_start = buffer_.GetPosition(); + EmitUint8(0xE8); + EmitInt32(label.address()); + static const intptr_t kCallExternalLabelSize = 5; + DCHECK_EQ((buffer_.GetPosition() - call_start), kCallExternalLabelSize); +} + + void X86Assembler::pushl(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x50 + reg); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 879f4ec795..f906a6f7c7 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -227,6 +227,7 @@ class X86Assembler FINAL : public Assembler { void call(Register reg); void call(const Address& address); void call(Label* label); + void call(const ExternalLabel& label); void pushl(Register reg); void pushl(const Address& address); |