diff options
43 files changed, 715 insertions, 137 deletions
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 7082bedc5e..d5ac34186b 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1126,7 +1126,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); @@ -1141,7 +1141,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); } } } else { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index e19e74f37a..7ae405ab3a 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1693,8 +1693,14 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d } else { // Chained cmp-and-branch, starting from starting_key. for (size_t i = 1; i <= num_entries; i++) { - BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, - starting_key + i - 1, table.GetEntryAt(i), dex_pc); + BuildSwitchCaseHelper(instruction, + i, + i == num_entries, + table, + value, + starting_key + i - 1, + table.GetEntryAt(i), + dex_pc); } } } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d05c514912..2c6c3b726a 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -374,6 +374,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << instance_of->MustDoNullCheck() << std::noboolalpha; } + void VisitArraySet(HArraySet* array_set) OVERRIDE { + StartAttributeStream("value_can_be_null") << std::boolalpha + << array_set->GetValueCanBeNull() << std::noboolalpha; + } + void VisitInvoke(HInvoke* invoke) OVERRIDE { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); StartAttributeStream("method_name") << PrettyMethod( diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0ac26de674..30dc9b303a 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -71,7 +71,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitFakeString(HFakeString* fake_string) OVERRIDE; - bool IsDominatedByInputNullCheck(HInstruction* instr); + + bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -187,14 +188,18 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { } } -bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) { - HInstruction* input = instr->InputAt(0); +bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const { + if (!input->CanBeNull()) { + return true; + } + for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) { HInstruction* use = it.Current()->GetUser(); - if (use->IsNullCheck() && use->StrictlyDominates(instr)) { + if (use->IsNullCheck() && use->StrictlyDominates(at)) { return true; } } + return false; } @@ -231,7 +236,7 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + if (CanEnsureNotNullAt(object, check_cast)) { check_cast->ClearMustDoNullCheck(); } @@ -267,7 +272,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); bool can_be_null = true; - if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + if (CanEnsureNotNullAt(object, instruction)) { can_be_null = false; instruction->ClearMustDoNullCheck(); } @@ -305,14 +310,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } @@ -437,7 +442,7 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { instruction->ClearNeedsTypeCheck(); } - if (!value->CanBeNull()) { + if (CanEnsureNotNullAt(value, instruction)) { instruction->ClearValueCanBeNull(); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b19726d3ba..486968cf9e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2408,7 +2408,9 @@ class HCurrentMethod : public HExpression<0> { // will be the block containing the next Dex opcode. class HPackedSwitch : public HTemplateInstruction<1> { public: - HPackedSwitch(int32_t start_value, uint32_t num_entries, HInstruction* input, + HPackedSwitch(int32_t start_value, + uint32_t num_entries, + HInstruction* input, uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc), start_value_(start_value), @@ -2429,8 +2431,8 @@ class HPackedSwitch : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(PackedSwitch); private: - int32_t start_value_; - uint32_t num_entries_; + const int32_t start_value_; + const uint32_t num_entries_; DISALLOW_COPY_AND_ASSIGN(HPackedSwitch); }; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index ad8c682b3a..fb11d76320 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -145,8 +145,14 @@ void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi, to guarantee convergence of the algorithm. - phi->SetType(phi->InputAt(0)->GetType()); + // Give a type to the loop phi to guarantee convergence of the algorithm. + // Note that the dead phi may already have a type if it is an equivalent + // generated for a typed LoadLocal. In that case we do not change the + // type because it could lead to an unsupported PrimNot/Float/Double -> + // PrimInt/Long transition and create same type equivalents. + if (phi->GetType() == Primitive::kPrimVoid) { + phi->SetType(phi->InputAt(0)->GetType()); + } AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc index 403d348752..8f6b1ff0a5 100644 --- a/runtime/arch/arm/context_arm.cc +++ b/runtime/arch/arm/context_arm.cc @@ -30,9 +30,11 @@ void ArmContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[PC] = &pc_; + gprs_[R0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = ArmContext::kBadGprBase + SP; pc_ = ArmContext::kBadGprBase + PC; + arg0_ = 0; } void ArmContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h index 77bb5c8399..ea31055e9d 100644 --- a/runtime/arch/arm/context_arm.h +++ b/runtime/arch/arm/context_arm.h @@ -45,6 +45,10 @@ class ArmContext : public Context { SetGPR(PC, new_pc); } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(R0, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters)); return gprs_[reg] != nullptr; @@ -84,7 +88,7 @@ class ArmContext : public Context { uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfSRegisters]; // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + uintptr_t sp_, pc_, arg0_; }; } // namespace arm diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index e45d828584..dc1cf8ab51 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -437,8 +437,8 @@ ARM_ENTRY art_quick_do_long_jump ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ - mov r0, #0 @ clear result registers r0 and r1 - mov r1, #0 + ldr r0, [r0, #-12] @ load r0 value + mov r1, #0 @ clear result register r1 bx r2 @ do long jump END art_quick_do_long_jump @@ -1142,7 +1142,7 @@ END art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc index 60becc6aea..4477631c67 100644 --- a/runtime/arch/arm64/context_arm64.cc +++ b/runtime/arch/arm64/context_arm64.cc @@ -31,10 +31,12 @@ void Arm64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; - gprs_[LR] = &pc_; + gprs_[kPC] = &pc_; + gprs_[X0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Arm64Context::kBadGprBase + SP; - pc_ = Arm64Context::kBadGprBase + LR; + pc_ = Arm64Context::kBadGprBase + kPC; + arg0_ = 0; } void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { @@ -58,8 +60,8 @@ void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { } void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); - DCHECK_NE(reg, static_cast<uint32_t>(XZR)); + DCHECK_LT(reg, arraysize(gprs_)); + // Note: we use kPC == XZR, so do not ensure that reg != XZR. DCHECK(IsAccessibleGPR(reg)); DCHECK_NE(gprs_[reg], &gZero); // Can't overwrite this static value since they are never reset. *gprs_[reg] = value; @@ -124,13 +126,13 @@ void Arm64Context::SmashCallerSaves() { extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*); void Arm64Context::DoLongJump() { - uint64_t gprs[kNumberOfXRegisters]; + uint64_t gprs[arraysize(gprs_)]; uint64_t fprs[kNumberOfDRegisters]; // The long jump routine called below expects to find the value for SP at index 31. DCHECK_EQ(SP, 31); - for (size_t i = 0; i < kNumberOfXRegisters; ++i) { + for (size_t i = 0; i < arraysize(gprs_); ++i) { gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i; } for (size_t i = 0; i < kNumberOfDRegisters; ++i) { diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h index 1c99f3c42d..11314e08ed 100644 --- a/runtime/arch/arm64/context_arm64.h +++ b/runtime/arch/arm64/context_arm64.h @@ -42,20 +42,25 @@ class Arm64Context : public Context { } void SetPC(uintptr_t new_lr) OVERRIDE { - SetGPR(LR, new_lr); + SetGPR(kPC, new_lr); + } + + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(X0, new_arg0_value); } bool IsAccessibleGPR(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg] != nullptr; } uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg]; } uintptr_t GetGPR(uint32_t reg) OVERRIDE { + // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value. DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); DCHECK(IsAccessibleGPR(reg)); return *gprs_[reg]; @@ -79,12 +84,15 @@ class Arm64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + static constexpr size_t kPC = kNumberOfXRegisters; + private: - // Pointers to register locations, initialized to null or the specific registers below. - uintptr_t* gprs_[kNumberOfXRegisters]; + // Pointers to register locations, initialized to null or the specific registers below. We need + // an additional one for the PC. + uintptr_t* gprs_[kNumberOfXRegisters + 1]; uint64_t * fprs_[kNumberOfDRegisters]; - // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + // Hold values for sp, pc and arg0 if they are not located within a stack frame. + uintptr_t sp_, pc_, arg0_; }; } // namespace arm64 diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 169bc384a8..68121781ca 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -941,7 +941,7 @@ ENTRY art_quick_do_long_jump // Load GPRs // TODO: lots of those are smashed, could optimize. add x0, x0, #30*8 - ldp x30, x1, [x0], #-16 + ldp x30, x1, [x0], #-16 // LR & SP ldp x28, x29, [x0], #-16 ldp x26, x27, [x0], #-16 ldp x24, x25, [x0], #-16 @@ -958,10 +958,12 @@ ENTRY art_quick_do_long_jump ldp x2, x3, [x0], #-16 mov sp, x1 - // TODO: Is it really OK to use LR for the target PC? - mov x0, #0 - mov x1, #0 - br xLR + // Need to load PC, it's at the end (after the space for the unused XZR). Use x1. + ldr x1, [x0, #33*8] + // And the value of x0. + ldr x0, [x0] + + br x1 END art_quick_do_long_jump /* diff --git a/runtime/arch/context.h b/runtime/arch/context.h index 9ef761e981..9af7c04f5c 100644 --- a/runtime/arch/context.h +++ b/runtime/arch/context.h @@ -50,6 +50,9 @@ class Context { // Sets the program counter value. virtual void SetPC(uintptr_t new_pc) = 0; + // Sets the first argument register. + virtual void SetArg0(uintptr_t new_arg0_value) = 0; + // Returns whether the given GPR is accessible (read or write). virtual bool IsAccessibleGPR(uint32_t reg) = 0; diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc index bc2bf68993..08ab356855 100644 --- a/runtime/arch/mips/context_mips.cc +++ b/runtime/arch/mips/context_mips.cc @@ -30,9 +30,11 @@ void MipsContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[RA] = &ra_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = MipsContext::kBadGprBase + SP; ra_ = MipsContext::kBadGprBase + RA; + arg0_ = 0; } void MipsContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h index 38cf29a6aa..0affe5397a 100644 --- a/runtime/arch/mips/context_mips.h +++ b/runtime/arch/mips/context_mips.h @@ -78,12 +78,17 @@ class MipsContext : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfFRegisters]; - // Hold values for sp and ra (return address) if they are not located within a stack frame. - uintptr_t sp_, ra_; + // Hold values for sp and ra (return address) if they are not located within a stack frame, as + // well as the first argument. + uintptr_t sp_, ra_, arg0_; }; } // namespace mips } // namespace art diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc index cc6dc7e17c..2c17f1c118 100644 --- a/runtime/arch/mips64/context_mips64.cc +++ b/runtime/arch/mips64/context_mips64.cc @@ -30,9 +30,11 @@ void Mips64Context::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[T9] = &t9_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Mips64Context::kBadGprBase + SP; t9_ = Mips64Context::kBadGprBase + T9; + arg0_ = 0; } void Mips64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h index 26fbcfe9d4..84b1c9bad4 100644 --- a/runtime/arch/mips64/context_mips64.h +++ b/runtime/arch/mips64/context_mips64.h @@ -78,14 +78,20 @@ class Mips64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfGpuRegisters]; uint64_t* fprs_[kNumberOfFpuRegisters]; // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the - // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). - uintptr_t sp_, t9_; + // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We + // also need the first argument for single-frame deopt. + uintptr_t sp_, t9_, arg0_; }; + } // namespace mips64 } // namespace art diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc index 7096c82aad..987ad60fd8 100644 --- a/runtime/arch/x86/context_x86.cc +++ b/runtime/arch/x86/context_x86.cc @@ -29,9 +29,11 @@ void X86Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[ESP] = &esp_; + gprs_[EAX] = &arg0_; // Initialize registers with easy to spot debug values. esp_ = X86Context::kBadGprBase + ESP; eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h index c4a11d8a88..59beb12ffa 100644 --- a/runtime/arch/x86/context_x86.h +++ b/runtime/arch/x86/context_x86.h @@ -44,6 +44,10 @@ class X86Context : public Context { eip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(EAX, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -95,10 +99,10 @@ class X86Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint32_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat + // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t esp_, eip_; + uintptr_t esp_, eip_, arg0_; }; } // namespace x86 } // namespace art diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 029a296e5a..f3b15c9ab2 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1695,7 +1695,7 @@ END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index 1fe2ef8fd8..3dc7d71df4 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -29,9 +29,11 @@ void X86_64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[RSP] = &rsp_; + gprs_[RDI] = &arg0_; // Initialize registers with easy to spot debug values. rsp_ = X86_64Context::kBadGprBase + RSP; rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86_64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h index 30bb9ec362..f05b7f093f 100644 --- a/runtime/arch/x86_64/context_x86_64.h +++ b/runtime/arch/x86_64/context_x86_64.h @@ -44,6 +44,10 @@ class X86_64Context : public Context { rip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(RDI, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -82,10 +86,10 @@ class X86_64Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint64_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat + // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t rsp_, rip_; + uintptr_t rsp_, rip_, arg0_; }; } // namespace x86_64 } // namespace art diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 861f8025a5..2f438a3c8f 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1724,18 +1724,18 @@ END_FUNCTION art_quick_instrumentation_exit * will long jump to the upcall with a special exception of -1. */ DEFINE_FUNCTION art_quick_deoptimize - pushq %rsi // Entry point for a jump. Fake that we were called. - // Use hidden arg. + pushq %rsi // Entry point for a jump. Fake that we were called. + // Use hidden arg. SETUP_SAVE_ALL_CALLEE_SAVE_FRAME - // Stack should be aligned now. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + // Stack should be aligned now. + movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. + call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index d749664d12..dfd9fcddb8 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -22,13 +22,16 @@ #include "mirror/class-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" +#include "quick_exception_handler.h" #include "stack.h" #include "thread.h" #include "verifier/method_verifier.h" namespace art { -NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { +extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + if (VLOG_IS_ON(deopt)) { LOG(INFO) << "Deopting:"; self->Dump(LOG(INFO)); @@ -39,19 +42,26 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mut self->QuickDeliverException(); } -extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { - ScopedQuickEntrypointChecks sqec(self); - artDeoptimizeImpl(self); -} - extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); + + // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the + // specialized visitor that will show whether a method is Quick or Shadow. + // Before deoptimizing to interpreter, we must push the deoptimization context. JValue return_value; return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. self->PushDeoptimizationContext(return_value, false, self->GetException()); - artDeoptimizeImpl(self); + + QuickExceptionHandler exception_handler(self, true); + exception_handler.DeoptimizeSingleFrame(); + exception_handler.UpdateInstrumentationStack(); + exception_handler.DeoptimizeSingleFrameArchDependentFixup(); + // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would + // be caller-saved. This has the downside that we cannot track incorrect register usage down the + // line. + exception_handler.DoLongJump(false); } } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 1302c5f17b..1e9e4fb3c6 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -29,8 +29,10 @@ #include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "quick_exception_handler.h" #include "runtime.h" #include "scoped_thread_state_change.h" +#include "stack.h" #include "debugger.h" namespace art { @@ -646,27 +648,86 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, if (method->IsAbstract()) { ThrowAbstractMethodError(method); return 0; + } + + JValue tmp_value; + ShadowFrame* deopt_frame = self->PopStackedShadowFrame( + StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false); + const DexFile::CodeItem* code_item = method->GetCodeItem(); + DCHECK(code_item != nullptr) << PrettyMethod(method); + ManagedStack fragment; + + DCHECK(!method->IsNative()) << PrettyMethod(method); + uint32_t shorty_len = 0; + auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); + const char* shorty = non_proxy_method->GetShorty(&shorty_len); + + JValue result; + + if (deopt_frame != nullptr) { + // Coming from single-frame deopt. + + if (kIsDebugBuild) { + // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom + // of the call-stack) corresponds to the called method. + ShadowFrame* linked = deopt_frame; + while (linked->GetLink() != nullptr) { + linked = linked->GetLink(); + } + CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " " + << PrettyMethod(linked->GetMethod()); + } + + if (VLOG_IS_ON(deopt)) { + // Print out the stack to verify that it was a single-frame deopt. + LOG(INFO) << "Continue-ing from deopt. Stack is:"; + QuickExceptionHandler::DumpFramesWithType(self, true); + } + + mirror::Throwable* pending_exception = nullptr; + self->PopDeoptimizationContext(&result, &pending_exception); + + // Push a transition back into managed code onto the linked list in thread. + self->PushManagedStackFragment(&fragment); + + // Ensure that the stack is still in order. + if (kIsDebugBuild) { + class DummyStackVisitor : public StackVisitor { + public: + explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking + // logic. Just always say we want to continue. + return true; + } + }; + DummyStackVisitor dsv(self); + dsv.WalkStack(); + } + + // Restore the exception that was pending before deoptimization then interpret the + // deoptimized frames. + if (pending_exception != nullptr) { + self->SetException(pending_exception); + } + interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result); } else { - DCHECK(!method->IsNative()) << PrettyMethod(method); const char* old_cause = self->StartAssertNoThreadSuspension( "Building interpreter shadow frame"); - const DexFile::CodeItem* code_item = method->GetCodeItem(); - DCHECK(code_item != nullptr) << PrettyMethod(method); uint16_t num_regs = code_item->registers_size_; - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); // No last shadow coming from quick. - ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0); + ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_; - uint32_t shorty_len = 0; - auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); - const char* shorty = non_proxy_method->GetShorty(&shorty_len); BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len, shadow_frame, first_arg_reg); shadow_frame_builder.VisitArguments(); const bool needs_initialization = method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); // Push a transition back into managed code onto the linked list in thread. - ManagedStack fragment; self->PushManagedStackFragment(&fragment); self->PushShadowFrame(shadow_frame); self->EndAssertNoThreadSuspension(old_cause); @@ -681,24 +742,26 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, return 0; } } - JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); - // Pop transition. - self->PopManagedStackFragment(fragment); - - // Request a stack deoptimization if needed - ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); - if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { - // Push the context of the deoptimization stack so we can restore the return value and the - // exception before executing the deoptimized frames. - self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); - - // Set special exception to cause deoptimization. - self->SetException(Thread::GetDeoptimizationException()); - } - // No need to restore the args since the method has already been run by the interpreter. - return result.GetJ(); + result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); } + + // Pop transition. + self->PopManagedStackFragment(fragment); + + // Request a stack deoptimization if needed + ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); + if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { + // Push the context of the deoptimization stack so we can restore the return value and the + // exception before executing the deoptimized frames. + self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); + + // Set special exception to cause deoptimization. + self->SetException(Thread::GetDeoptimizationException()); + } + + // No need to restore the args since the method has already been run by the interpreter. + return result.GetJ(); } // Visits arguments on the stack placing them into the args vector, Object* arguments are converted diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 3ac80c6642..f783b04b95 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -21,6 +21,7 @@ #include "mirror/string-inl.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" +#include "stack.h" #include "unstarted_runtime.h" namespace art { @@ -330,8 +331,9 @@ void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receive } // Set up shadow frame with matching number of reference slots to vregs. ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame(); - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); - ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0); + ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); self->PushShadowFrame(shadow_frame); size_t cur_reg = num_regs - num_ins; diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc index 2de12dcec2..ad34c9ad9e 100644 --- a/runtime/interpreter/interpreter_common.cc +++ b/runtime/interpreter/interpreter_common.cc @@ -21,6 +21,7 @@ #include "debugger.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "mirror/array-inl.h" +#include "stack.h" #include "unstarted_runtime.h" #include "verifier/method_verifier.h" @@ -584,9 +585,9 @@ static inline bool DoCallCommon(ArtMethod* called_method, // Allocate shadow frame on the stack. const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon"); - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); - ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0, - memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0); + ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get(); // Initialize new shadow frame by copying the registers from the callee shadow frame. if (do_assignability_check) { diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index ab70d02349..b5e28e9314 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -265,7 +265,11 @@ class Libraries { for (auto it = libraries_.begin(); it != libraries_.end(); ) { SharedLibrary* const library = it->second; // If class loader is null then it was unloaded, call JNI_OnUnload. - if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) == nullptr) { + const jweak class_loader = library->GetClassLoader(); + // If class_loader is a null jobject then it is the boot class loader. We should not unload + // the native libraries of the boot class loader. + if (class_loader != nullptr && + soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) { void* const sym = library->FindSymbol("JNI_OnUnload", nullptr); if (sym == nullptr) { VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5c13e13f90..63f43cf3b2 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -20,6 +20,7 @@ #include "art_method-inl.h" #include "dex_instruction.h" #include "entrypoints/entrypoint_utils.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" @@ -36,8 +37,9 @@ QuickExceptionHandler::QuickExceptionHandler(Thread* self, bool is_deoptimizatio : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization), method_tracing_active_(is_deoptimization || Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()), - handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr), - handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) { + handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0), + handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false), + handler_frame_depth_(kInvalidFrameDepth) { } // Finds catch handler. @@ -260,19 +262,25 @@ void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* // Prepares deoptimization. class DeoptimizeStackVisitor FINAL : public StackVisitor { public: - DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler) + DeoptimizeStackVisitor(Thread* self, + Context* context, + QuickExceptionHandler* exception_handler, + bool single_frame) SHARED_REQUIRES(Locks::mutator_lock_) : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), exception_handler_(exception_handler), prev_shadow_frame_(nullptr), - stacked_shadow_frame_pushed_(false) { + stacked_shadow_frame_pushed_(false), + single_frame_deopt_(single_frame), + single_frame_done_(false) { } bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { exception_handler_->SetHandlerFrameDepth(GetFrameDepth()); ArtMethod* method = GetMethod(); - if (method == nullptr) { - // This is the upcall, we remember the frame and last pc so that we may long jump to them. + if (method == nullptr || single_frame_done_) { + // This is the upcall (or the next full frame in single-frame deopt), we remember the frame + // and last pc so that we may long jump to them. exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc()); exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame()); if (!stacked_shadow_frame_pushed_) { @@ -295,7 +303,13 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { CHECK_EQ(GetFrameDepth(), 1U); return true; } else { - return HandleDeoptimization(method); + HandleDeoptimization(method); + if (single_frame_deopt_ && !IsInInlinedFrame()) { + // Single-frame deopt ends at the first non-inlined frame and needs to store that method. + exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method)); + single_frame_done_ = true; + } + return true; } } @@ -304,7 +318,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { return static_cast<VRegKind>(kinds.at(reg * 2)); } - bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { + void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { const DexFile::CodeItem* code_item = m->GetCodeItem(); CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m); uint16_t num_regs = code_item->registers_size_; @@ -448,16 +462,20 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { // Will be popped after the long jump after DeoptimizeStack(), // right before interpreter::EnterInterpreterFromDeoptimize(). stacked_shadow_frame_pushed_ = true; - GetThread()->PushStackedShadowFrame(new_frame, - StackedShadowFrameType::kDeoptimizationShadowFrame); + GetThread()->PushStackedShadowFrame( + new_frame, + single_frame_deopt_ + ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame + : StackedShadowFrameType::kDeoptimizationShadowFrame); } prev_shadow_frame_ = new_frame; - return true; } QuickExceptionHandler* const exception_handler_; ShadowFrame* prev_shadow_frame_; bool stacked_shadow_frame_pushed_; + const bool single_frame_deopt_; + bool single_frame_done_; DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor); }; @@ -468,13 +486,46 @@ void QuickExceptionHandler::DeoptimizeStack() { self_->DumpStack(LOG(INFO) << "Deoptimizing: "); } - DeoptimizeStackVisitor visitor(self_, context_, this); + DeoptimizeStackVisitor visitor(self_, context_, this, false); visitor.WalkStack(true); // Restore deoptimization exception self_->SetException(Thread::GetDeoptimizationException()); } +void QuickExceptionHandler::DeoptimizeSingleFrame() { + DCHECK(is_deoptimization_); + + if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) { + LOG(INFO) << "Single-frame deopting:"; + DumpFramesWithType(self_, true); + } + + DeoptimizeStackVisitor visitor(self_, context_, this, true); + visitor.WalkStack(true); + + // PC needs to be of the quick-to-interpreter bridge. + int32_t offset; + #ifdef __LP64__ + offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value(); + #else + offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value(); + #endif + handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>( + reinterpret_cast<uint8_t*>(self_) + offset); +} + +void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() { + // Architecture-dependent work. This is to get the LR right for x86 and x86-64. + + if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) { + // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to + // change how longjump works. + handler_quick_frame_ = reinterpret_cast<ArtMethod**>( + reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*)); + } +} + // Unwinds all instrumentation stack frame prior to catch handler or upcall. class InstrumentationStackVisitor : public StackVisitor { public: @@ -529,15 +580,67 @@ void QuickExceptionHandler::UpdateInstrumentationStack() { } } -void QuickExceptionHandler::DoLongJump() { +void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) { // Place context back on thread so it will be available when we continue. self_->ReleaseLongJumpContext(context_); context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_)); CHECK_NE(handler_quick_frame_pc_, 0u); context_->SetPC(handler_quick_frame_pc_); - context_->SmashCallerSaves(); + context_->SetArg0(handler_quick_arg0_); + if (smash_caller_saves) { + context_->SmashCallerSaves(); + } context_->DoLongJump(); UNREACHABLE(); } +// Prints out methods with their type of frame. +class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor { + public: + DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + show_details_(show_details) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* method = GetMethod(); + if (show_details_) { + LOG(INFO) << "|> pc = " << std::hex << GetCurrentQuickFramePc(); + LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame()); + if (GetCurrentQuickFrame() != nullptr && method != nullptr) { + LOG(INFO) << "|> ret = " << std::hex << GetReturnPc(); + } + } + if (method == nullptr) { + // Transition, do go on, we want to unwind over bridges, all the way. + if (show_details_) { + LOG(INFO) << "N <transition>"; + } + return true; + } else if (method->IsRuntimeMethod()) { + if (show_details_) { + LOG(INFO) << "R " << PrettyMethod(method, true); + } + return true; + } else { + bool is_shadow = GetCurrentShadowFrame() != nullptr; + LOG(INFO) << (is_shadow ? "S" : "Q") + << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ") + << " " + << PrettyMethod(method, true); + return true; // Go on. + } + } + + private: + bool show_details_; + + DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor); +}; + +void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) { + DumpFramesWithTypeStackVisitor visitor(self, details); + visitor.WalkStack(true); +} + } // namespace art diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h index 2e05c7e1e5..89d6a25128 100644 --- a/runtime/quick_exception_handler.h +++ b/runtime/quick_exception_handler.h @@ -49,6 +49,9 @@ class QuickExceptionHandler { // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy" // shadow frame that will be executed with the interpreter. void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_); + // Update the instrumentation stack by removing all methods that will be unwound // by the exception being thrown. void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_); @@ -58,7 +61,7 @@ class QuickExceptionHandler { SHARED_REQUIRES(Locks::mutator_lock_); // Long jump either to a catch handler or to the upcall. - NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_); + NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_); void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) { handler_quick_frame_ = handler_quick_frame; @@ -68,6 +71,10 @@ class QuickExceptionHandler { handler_quick_frame_pc_ = handler_quick_frame_pc; } + void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) { + handler_quick_arg0_ = handler_quick_arg0; + } + ArtMethod* GetHandlerMethod() const { return handler_method_; } @@ -92,6 +99,11 @@ class QuickExceptionHandler { handler_frame_depth_ = frame_depth; } + // Walk the stack frames of the given thread, printing out non-runtime methods with their types + // of frames. Helps to verify that single-frame deopt really only deopted one frame. + static void DumpFramesWithType(Thread* self, bool details = false) + SHARED_REQUIRES(Locks::mutator_lock_); + private: Thread* const self_; Context* const context_; @@ -103,6 +115,8 @@ class QuickExceptionHandler { ArtMethod** handler_quick_frame_; // PC to branch to for the handler. uintptr_t handler_quick_frame_pc_; + // The value for argument 0. + uintptr_t handler_quick_arg0_; // The handler method to report to the debugger. ArtMethod* handler_method_; // The handler's dex PC, zero implies an uncaught exception. diff --git a/runtime/stack.h b/runtime/stack.h index b805239836..292c745090 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -62,6 +62,10 @@ template<class MirrorType> class MANAGED StackReference : public mirror::CompressedReference<MirrorType> { }; +// Forward declaration. Just calls the destructor. +struct ShadowFrameDeleter; +using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>; + // ShadowFrame has 2 possible layouts: // - interpreter - separate VRegs and reference arrays. References are in the reference array. // - JNI - just VRegs, but where every VReg holds a reference. @@ -77,21 +81,26 @@ class ShadowFrame { static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc) { uint8_t* memory = new uint8_t[ComputeSize(num_vregs)]; - return Create(num_vregs, link, method, dex_pc, memory); + return CreateShadowFrameImpl(num_vregs, link, method, dex_pc, memory); } // Delete a ShadowFrame allocated on the heap for deoptimization. static void DeleteDeoptimizedFrame(ShadowFrame* sf) { + sf->~ShadowFrame(); // Explicitly destruct. uint8_t* memory = reinterpret_cast<uint8_t*>(sf); delete[] memory; } - // Create ShadowFrame for interpreter using provided memory. - static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link, - ArtMethod* method, uint32_t dex_pc, void* memory) { - ShadowFrame* sf = new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); - return sf; - } + // Create a shadow frame in a fresh alloca. This needs to be in the context of the caller. + // Inlining doesn't work, the compiler will still undo the alloca. So this needs to be a macro. +#define CREATE_SHADOW_FRAME(num_vregs, link, method, dex_pc) ({ \ + size_t frame_size = ShadowFrame::ComputeSize(num_vregs); \ + void* alloca_mem = alloca(frame_size); \ + ShadowFrameAllocaUniquePtr( \ + ShadowFrame::CreateShadowFrameImpl((num_vregs), (link), (method), (dex_pc), \ + (alloca_mem))); \ + }) + ~ShadowFrame() {} // TODO(iam): Clean references array up since they're always there, @@ -283,6 +292,15 @@ class ShadowFrame { return OFFSETOF_MEMBER(ShadowFrame, vregs_); } + // Create ShadowFrame for interpreter using provided memory. + static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs, + ShadowFrame* link, + ArtMethod* method, + uint32_t dex_pc, + void* memory) { + return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); + } + private: ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc, bool has_reference_array) @@ -326,6 +344,14 @@ class ShadowFrame { DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame); }; +struct ShadowFrameDeleter { + inline void operator()(ShadowFrame* frame) { + if (frame != nullptr) { + frame->~ShadowFrame(); + } + } +}; + class JavaFrameRootInfo : public RootInfo { public: JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg) diff --git a/runtime/thread.cc b/runtime/thread.cc index 5bf895ef80..82e6fb0f00 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -250,10 +250,16 @@ void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type tlsPtr_.stacked_shadow_frame_record = record; } -ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) { +ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) { StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record; - DCHECK(record != nullptr); - DCHECK_EQ(record->GetType(), type); + if (must_be_present) { + DCHECK(record != nullptr); + DCHECK_EQ(record->GetType(), type); + } else { + if (record == nullptr || record->GetType() != type) { + return nullptr; + } + } tlsPtr_.stacked_shadow_frame_record = record->GetLink(); ShadowFrame* shadow_frame = record->GetShadowFrame(); delete record; diff --git a/runtime/thread.h b/runtime/thread.h index 11f2e285a1..d21644d179 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -108,7 +108,8 @@ enum ThreadFlag { enum class StackedShadowFrameType { kShadowFrameUnderConstruction, - kDeoptimizationShadowFrame + kDeoptimizationShadowFrame, + kSingleFrameDeoptimizationShadowFrame }; static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34; @@ -843,7 +844,7 @@ class Thread { void AssertHasDeoptimizationContext() SHARED_REQUIRES(Locks::mutator_lock_); void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type); - ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type); + ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true); // For debugger, find the shadow frame that corresponds to a frame id. // Or return null if there is none. diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index a746664160..f06c250dc7 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -249,6 +249,25 @@ public class Main { array[Integer.MAX_VALUE - 998] = 1; } + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before) + /// CHECK: BoundsCheck + /// CHECK: ArraySet + /// CHECK: BoundsCheck + /// CHECK: ArraySet + + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after) + /// CHECK: Deoptimize + + static void constantIndexing6(int[] array) { + array[3] = 1; + array[4] = 1; + } + + // A helper into which the actual throwing function should be inlined. + static void constantIndexingForward6(int[] array) { + constantIndexing6(array); + } + /// CHECK-START: void Main.loopPattern1(int[]) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -602,7 +621,12 @@ public class Main { // This will cause AIOOBE. constantIndexing2(new int[3]); } catch (ArrayIndexOutOfBoundsException e) { - return 99; + try { + // This will cause AIOOBE. + constantIndexingForward6(new int[3]); + } catch (ArrayIndexOutOfBoundsException e2) { + return 99; + } } return 0; } diff --git a/test/485-checker-dce-switch/expected.txt b/test/485-checker-dce-switch/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/485-checker-dce-switch/expected.txt diff --git a/test/485-checker-dce-switch/info.txt b/test/485-checker-dce-switch/info.txt new file mode 100644 index 0000000000..6653526827 --- /dev/null +++ b/test/485-checker-dce-switch/info.txt @@ -0,0 +1 @@ +Tests that DCE can remove a packed switch. diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java new file mode 100644 index 0000000000..019d876ec8 --- /dev/null +++ b/test/485-checker-dce-switch/src/Main.java @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static int $inline$method() { + return 5; + } + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after) + /// CHECK-DAG: <<Const100:i\d+>> IntConstant 100 + /// CHECK-DAG: Return [<<Const100>>] + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int wholeSwitchDead(int j) { + int i = $inline$method(); + int l = 100; + if (i > 100) { + switch(j) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + l += i; + } + + return l; + } + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: Return [<<Const7>>] + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_InRange() { + int i = $inline$method(); + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<Const15:i\d+>> IntConstant 15 + /// CHECK-DAG: Return [<<Const15>>] + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_AboveRange() { + int i = $inline$method() + 10; + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<ConstM5:i\d+>> IntConstant -5 + /// CHECK-DAG: Return [<<ConstM5>>] + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_BelowRange() { + int i = $inline$method() - 10; + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + public static void main(String[] args) throws Exception { + int ret_val = wholeSwitchDead(10); + if (ret_val != 100) { + throw new Error("Incorrect return value from wholeSwitchDead:" + ret_val); + } + + ret_val = constantSwitch_InRange(); + if (ret_val != 7) { + throw new Error("Incorrect return value from constantSwitch_InRange:" + ret_val); + } + + ret_val = constantSwitch_AboveRange(); + if (ret_val != 15) { + throw new Error("Incorrect return value from constantSwitch_AboveRange:" + ret_val); + } + + ret_val = constantSwitch_BelowRange(); + if (ret_val != -5) { + throw new Error("Incorrect return value from constantSwitch_BelowRange:" + ret_val); + } + } +} diff --git a/test/526-checker-caller-callee-regs/src/Main.java b/test/526-checker-caller-callee-regs/src/Main.java index a1f33014ef..f402c2cd48 100644 --- a/test/526-checker-caller-callee-regs/src/Main.java +++ b/test/526-checker-caller-callee-regs/src/Main.java @@ -36,6 +36,8 @@ public class Main { // ------------------------------|------------------------|----------------- // ARM64 callee-saved registers | [x20-x29] | x2[0-9] // ARM callee-saved registers | [r5-r8,r10,r11] | r([5-8]|10|11) + // X86 callee-saved registers | [ebp,esi,edi] | e(bp|si|di) + // X86_64 callee-saved registers | [rbx,rbp,r12-15] | r(bx|bp|1[2-5]) /** * Check that a value live across a function call is allocated in a callee @@ -58,7 +60,21 @@ public class Main { /// CHECK: Sub [<<t1>>,<<t2>>] /// CHECK: Return - // TODO: Add tests for other architectures. + /// CHECK-START-X86: int Main.$opt$LiveInCall(int) register (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<t1:i\d+>> Add [<<Arg>>,<<Const1>>] {{.*->e(bp|si|di)}} + /// CHECK: <<t2:i\d+>> InvokeStaticOrDirect + /// CHECK: Sub [<<t1>>,<<t2>>] + /// CHECK: Return + + /// CHECK-START-X86_64: int Main.$opt$LiveInCall(int) register (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<t1:i\d+>> Add [<<Arg>>,<<Const1>>] {{.*->r(bx|bp|1[2-5])}} + /// CHECK: <<t2:i\d+>> InvokeStaticOrDirect + /// CHECK: Sub [<<t1>>,<<t2>>] + /// CHECK: Return public static int $opt$LiveInCall(int arg) { int t1 = arg + 1; diff --git a/test/532-checker-nonnull-arrayset/expected.txt b/test/532-checker-nonnull-arrayset/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/expected.txt diff --git a/test/532-checker-nonnull-arrayset/info.txt b/test/532-checker-nonnull-arrayset/info.txt new file mode 100644 index 0000000000..e1578c8f14 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/info.txt @@ -0,0 +1 @@ +Test that we optimize ArraySet when the value is not null. diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java new file mode 100644 index 0000000000..7d8fff46ba --- /dev/null +++ b/test/532-checker-nonnull-arrayset/src/Main.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + // Check that we don't put a null check in the card marking code. + + /// CHECK-START: void Main.test() instruction_simplifier (before) + /// CHECK: ArraySet value_can_be_null:true + + /// CHECK-START: void Main.test() instruction_simplifier (after) + /// CHECK: ArraySet value_can_be_null:false + + /// CHECK-START-X86: void Main.test() disassembly (after) + /// CHECK: ArraySet value_can_be_null:false + /// CHECK-NOT: test + /// CHECK: ReturnVoid + public static void test() { + Object[] array = new Object[1]; + Object nonNull = array[0]; + nonNull.getClass(); // Ensure nonNull has an implicit null check. + array[0] = nonNull; + } + + public static void main(String[] args) {} +} diff --git a/test/run-test b/test/run-test index 828939d247..a5b6e92869 100755 --- a/test/run-test +++ b/test/run-test @@ -392,7 +392,7 @@ fi # Most interesting target architecture variables are Makefile variables, not environment variables. # Try to map the suffix64 flag and what we find in ${ANDROID_PRODUCT_OUT}/data/art-test to an architecture name. -function guess_arch_name() { +function guess_target_arch_name() { grep32bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm|x86|mips)$'` grep64bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm64|x86_64|mips64)$'` if [ "x${suffix64}" = "x64" ]; then @@ -402,6 +402,14 @@ function guess_arch_name() { fi } +function guess_host_arch_name() { + if [ "x${suffix64}" = "x64" ]; then + host_arch_name="x86_64" + else + host_arch_name="x86" + fi +} + if [ "$target_mode" = "no" ]; then if [ "$runtime" = "jvm" ]; then if [ "$prebuild_mode" = "yes" ]; then @@ -437,10 +445,11 @@ elif [ "$runtime" = "art" ]; then if [ -z "$ANDROID_HOST_OUT" ]; then export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86 fi + guess_host_arch_name run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art" run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}" else - guess_arch_name + guess_target_arch_name run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}" run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art" fi @@ -635,7 +644,7 @@ if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then run_checker="yes" if [ "$target_mode" = "no" ]; then cfg_output_dir="$tmp_dir" - checker_arch_option= + checker_arch_option="--arch=${host_arch_name^^}" else cfg_output_dir="$DEX_LOCATION" checker_arch_option="--arch=${target_arch_name^^}" diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index a670fc7738..972e827667 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -68,20 +68,14 @@ if [[ $mode == "host" ]]; then echo "Executing $make_command" $make_command elif [[ $mode == "target" ]]; then - # We need to provide our own linker in case the linker on the device - # is out of date. - env="TARGET_GLOBAL_LDFLAGS=-Wl,-dynamic-linker=$android_root/bin/$linker" - # gcc gives a linker error, so compile with clang. - # TODO: investigate and fix? - if [[ $TARGET_PRODUCT == "mips32r2_fp" ]]; then - env="$env USE_CLANG_PLATFORM_BUILD=true" - fi - # Disable NINJA for building on target, it does not support the -e option to Makefile. + # Disable NINJA for building on target, it does not support setting environment variables + # within the make command. env="$env USE_NINJA=false" - # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS. - # Also, we build extra tools that will be used by tests, so that + # Build extra tools that will be used by tests, so that # they are compiled with our own linker. - make_command="make -e $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb" + # We need to provide our own linker in case the linker on the device + # is out of date. + make_command="make TARGET_LINKER=$android_root/bin/$linker $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb" echo "Executing env $env $make_command" env $env $make_command fi |